feat(voice): add spoken summary mode for conversation replies
This commit is contained in:
@@ -11,6 +11,7 @@ import type {
|
||||
SpeechSynthesisResponse,
|
||||
SpeechTranscriptionResponse,
|
||||
ServerMeta,
|
||||
VoiceModeStateResponse,
|
||||
WorkspaceCreateRequest,
|
||||
WorkspaceDescriptor,
|
||||
WorkspaceFileResponse,
|
||||
@@ -348,6 +349,12 @@ export const serverApi = {
|
||||
{ method: "POST" },
|
||||
)
|
||||
},
|
||||
updateVoiceMode(instanceId: string, enabled: boolean): Promise<VoiceModeStateResponse> {
|
||||
return request<VoiceModeStateResponse>(`/workspaces/${encodeURIComponent(instanceId)}/plugin/voice-mode`, {
|
||||
method: "POST",
|
||||
body: JSON.stringify({ enabled }),
|
||||
})
|
||||
},
|
||||
fetchBackgroundProcessOutput(
|
||||
instanceId: string,
|
||||
processId: string,
|
||||
|
||||
@@ -30,6 +30,7 @@ interface PlaybackHandle {
|
||||
|
||||
const log = getLogger("actions")
|
||||
const [conversationModeInstances, setConversationModeInstances] = createSignal<Map<string, boolean>>(new Map())
|
||||
const LEADING_SPOKEN_BLOCK_REGEX = /^\s*```spoken[ \t]*\r?\n([\s\S]*?)\r?\n```(?:\r?\n|$)/i
|
||||
|
||||
const queuedKeys = new Set<string>()
|
||||
const spokenKeysBySession = new Map<string, Set<string>>()
|
||||
@@ -107,6 +108,9 @@ export function canUseConversationMode(): boolean {
|
||||
}
|
||||
|
||||
export function setConversationModeEnabled(instanceId: string, enabled: boolean): void {
|
||||
const previous = isConversationModeEnabled(instanceId)
|
||||
if (previous === enabled) return
|
||||
|
||||
setConversationModeInstances((prev) => {
|
||||
const next = new Map(prev)
|
||||
if (enabled) {
|
||||
@@ -120,6 +124,23 @@ export function setConversationModeEnabled(instanceId: string, enabled: boolean)
|
||||
if (!enabled) {
|
||||
clearConversationPlaybackForInstance(instanceId)
|
||||
}
|
||||
|
||||
void serverApi.updateVoiceMode(instanceId, enabled).catch((error) => {
|
||||
log.error("Failed to update conversation mode", error)
|
||||
setConversationModeInstances((prev) => {
|
||||
const next = new Map(prev)
|
||||
if (previous) {
|
||||
next.set(instanceId, true)
|
||||
} else {
|
||||
next.delete(instanceId)
|
||||
}
|
||||
return next
|
||||
})
|
||||
|
||||
if (!previous) {
|
||||
clearConversationPlaybackForInstance(instanceId)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
export function toggleConversationMode(instanceId: string): void {
|
||||
@@ -188,7 +209,7 @@ export function handleConversationAssistantPartUpdated(instanceId: string, part:
|
||||
if (!isConversationModeEnabled(instanceId)) return
|
||||
if (!isSpeakableSession(instanceId, sessionId)) return
|
||||
|
||||
const text = resolveTextPartContent(part).trim()
|
||||
const text = extractLeadingSpokenBlock(resolveTextPartContent(part))
|
||||
if (!text) return
|
||||
|
||||
const key = getEntryKey(instanceId, sessionId, messageId, partId)
|
||||
@@ -505,3 +526,9 @@ function createObjectUrlFromBase64(audioBase64: string, mimeType: string): strin
|
||||
}
|
||||
return URL.createObjectURL(new Blob([bytes], { type: mimeType || "audio/mpeg" }))
|
||||
}
|
||||
|
||||
function extractLeadingSpokenBlock(text: string): string {
|
||||
const match = text.match(LEADING_SPOKEN_BLOCK_REGEX)
|
||||
if (!match?.[1]) return ""
|
||||
return match[1].trim()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user