feat(voice): add spoken summary mode for conversation replies

This commit is contained in:
Shantur
2026-03-31 00:20:26 +01:00
parent d1a27ac31b
commit 045d8da8b2
6 changed files with 103 additions and 6 deletions

View File

@@ -11,6 +11,7 @@ import type {
SpeechSynthesisResponse,
SpeechTranscriptionResponse,
ServerMeta,
VoiceModeStateResponse,
WorkspaceCreateRequest,
WorkspaceDescriptor,
WorkspaceFileResponse,
@@ -348,6 +349,12 @@ export const serverApi = {
{ method: "POST" },
)
},
updateVoiceMode(instanceId: string, enabled: boolean): Promise<VoiceModeStateResponse> {
return request<VoiceModeStateResponse>(`/workspaces/${encodeURIComponent(instanceId)}/plugin/voice-mode`, {
method: "POST",
body: JSON.stringify({ enabled }),
})
},
fetchBackgroundProcessOutput(
instanceId: string,
processId: string,

View File

@@ -30,6 +30,7 @@ interface PlaybackHandle {
const log = getLogger("actions")
const [conversationModeInstances, setConversationModeInstances] = createSignal<Map<string, boolean>>(new Map())
const LEADING_SPOKEN_BLOCK_REGEX = /^\s*```spoken[ \t]*\r?\n([\s\S]*?)\r?\n```(?:\r?\n|$)/i
const queuedKeys = new Set<string>()
const spokenKeysBySession = new Map<string, Set<string>>()
@@ -107,6 +108,9 @@ export function canUseConversationMode(): boolean {
}
export function setConversationModeEnabled(instanceId: string, enabled: boolean): void {
const previous = isConversationModeEnabled(instanceId)
if (previous === enabled) return
setConversationModeInstances((prev) => {
const next = new Map(prev)
if (enabled) {
@@ -120,6 +124,23 @@ export function setConversationModeEnabled(instanceId: string, enabled: boolean)
if (!enabled) {
clearConversationPlaybackForInstance(instanceId)
}
void serverApi.updateVoiceMode(instanceId, enabled).catch((error) => {
log.error("Failed to update conversation mode", error)
setConversationModeInstances((prev) => {
const next = new Map(prev)
if (previous) {
next.set(instanceId, true)
} else {
next.delete(instanceId)
}
return next
})
if (!previous) {
clearConversationPlaybackForInstance(instanceId)
}
})
}
export function toggleConversationMode(instanceId: string): void {
@@ -188,7 +209,7 @@ export function handleConversationAssistantPartUpdated(instanceId: string, part:
if (!isConversationModeEnabled(instanceId)) return
if (!isSpeakableSession(instanceId, sessionId)) return
const text = resolveTextPartContent(part).trim()
const text = extractLeadingSpokenBlock(resolveTextPartContent(part))
if (!text) return
const key = getEntryKey(instanceId, sessionId, messageId, partId)
@@ -505,3 +526,9 @@ function createObjectUrlFromBase64(audioBase64: string, mimeType: string): strin
}
return URL.createObjectURL(new Blob([bytes], { type: mimeType || "audio/mpeg" }))
}
function extractLeadingSpokenBlock(text: string): string {
const match = text.match(LEADING_SPOKEN_BLOCK_REGEX)
if (!match?.[1]) return ""
return match[1].trim()
}