feat(ui): add assistant conversation playback mode
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import { Suspense, createEffect, createSignal, lazy, on, onCleanup, Show } from "solid-js"
|
||||
import { ArrowBigUp, ArrowBigDown, Loader2, Mic, X } from "lucide-solid"
|
||||
import { ArrowBigUp, ArrowBigDown, Loader2, Mic, Volume2, X } from "lucide-solid"
|
||||
import ExpandButton from "./expand-button"
|
||||
import { clearAttachments, removeAttachment } from "../stores/attachments"
|
||||
import { resolvePastedPlaceholders } from "../lib/prompt-placeholders"
|
||||
@@ -19,6 +19,7 @@ import { usePromptAttachments } from "./prompt-input/usePromptAttachments"
|
||||
import { usePromptPicker } from "./prompt-input/usePromptPicker"
|
||||
import { usePromptKeyDown } from "./prompt-input/usePromptKeyDown"
|
||||
import { usePromptVoiceInput } from "./prompt-input/usePromptVoiceInput"
|
||||
import { canUseConversationMode, isConversationModeEnabled, toggleConversationMode } from "../stores/conversation-speech"
|
||||
const log = getLogger("actions")
|
||||
const LazyUnifiedPicker = lazy(() => import("./unified-picker"))
|
||||
|
||||
@@ -476,6 +477,13 @@ export default function PromptInput(props: PromptInputProps) {
|
||||
const showVoiceInput = () =>
|
||||
preferences().showPromptVoiceInput &&
|
||||
(voiceInput.canUseVoiceInput() || voiceInput.isRecording() || voiceInput.isTranscribing())
|
||||
const conversationModeEnabled = () => isConversationModeEnabled(props.instanceId)
|
||||
const showConversationToggle = () => showVoiceInput() || conversationModeEnabled()
|
||||
const canToggleConversationMode = () => canUseConversationMode()
|
||||
const conversationModeButtonTitle = () =>
|
||||
conversationModeEnabled()
|
||||
? t("promptInput.conversationMode.disable.title")
|
||||
: t("promptInput.conversationMode.enable.title")
|
||||
|
||||
const instance = () => getActiveInstance()
|
||||
|
||||
@@ -601,6 +609,19 @@ export default function PromptInput(props: PromptInputProps) {
|
||||
</Show>
|
||||
</button>
|
||||
</Show>
|
||||
<Show when={showConversationToggle()}>
|
||||
<button
|
||||
type="button"
|
||||
class={`prompt-voice-button prompt-nav-voice-button prompt-conversation-button ${conversationModeEnabled() ? "is-active" : ""}`}
|
||||
onClick={() => toggleConversationMode(props.instanceId)}
|
||||
disabled={!conversationModeEnabled() && !canToggleConversationMode()}
|
||||
aria-pressed={conversationModeEnabled()}
|
||||
aria-label={conversationModeButtonTitle()}
|
||||
title={conversationModeButtonTitle()}
|
||||
>
|
||||
<Volume2 class="h-4 w-4" aria-hidden="true" />
|
||||
</button>
|
||||
</Show>
|
||||
<button
|
||||
type="button"
|
||||
class="prompt-clear-button"
|
||||
|
||||
@@ -16,6 +16,7 @@ import { getLogger } from "../../lib/logger"
|
||||
import { requestData } from "../../lib/opencode-api"
|
||||
import { useI18n } from "../../lib/i18n"
|
||||
import type { PromptInputApi, PromptInsertMode } from "../prompt-input/types"
|
||||
import { clearConversationPlaybackForSession } from "../../stores/conversation-speech"
|
||||
|
||||
const log = getLogger("session")
|
||||
|
||||
@@ -88,6 +89,10 @@ export const SessionView: Component<SessionViewProps> = (props) => {
|
||||
on(
|
||||
() => props.isActive,
|
||||
(isActive) => {
|
||||
if (!isActive) {
|
||||
clearConversationPlaybackForSession(props.instanceId, props.sessionId)
|
||||
return
|
||||
}
|
||||
if (!isActive) return
|
||||
|
||||
// On phones, focusing the prompt on session switch is disruptive (it raises the OSK).
|
||||
|
||||
@@ -147,6 +147,10 @@ export const messagingMessages = {
|
||||
"promptInput.send.ariaLabel": "Send message",
|
||||
"promptInput.send.errorFallback": "Failed to send message",
|
||||
"promptInput.send.errorTitle": "Send failed",
|
||||
"promptInput.conversationMode.enable.title": "Enable conversation mode",
|
||||
"promptInput.conversationMode.disable.title": "Disable conversation mode",
|
||||
"promptInput.conversationMode.error.title": "Conversation playback failed",
|
||||
"promptInput.conversationMode.error.message": "Unable to continue speaking assistant replies.",
|
||||
"promptInput.voiceInput.start.title": "Start voice input",
|
||||
"promptInput.voiceInput.stop.title": "Stop recording and transcribe",
|
||||
"promptInput.voiceInput.transcribing.title": "Transcribing audio",
|
||||
|
||||
@@ -149,6 +149,10 @@ export const messagingMessages = {
|
||||
"promptInput.send.ariaLabel": "Enviar mensaje",
|
||||
"promptInput.send.errorFallback": "No se pudo enviar el mensaje",
|
||||
"promptInput.send.errorTitle": "Error al enviar",
|
||||
"promptInput.conversationMode.enable.title": "Activar modo conversacion",
|
||||
"promptInput.conversationMode.disable.title": "Desactivar modo conversacion",
|
||||
"promptInput.conversationMode.error.title": "Fallo la reproduccion de la conversacion",
|
||||
"promptInput.conversationMode.error.message": "No se pudieron seguir reproduciendo las respuestas del asistente.",
|
||||
"promptInput.voiceInput.start.title": "Iniciar entrada de voz",
|
||||
"promptInput.voiceInput.stop.title": "Detener grabación y transcribir",
|
||||
"promptInput.voiceInput.transcribing.title": "Transcribiendo audio",
|
||||
|
||||
@@ -149,6 +149,10 @@ export const messagingMessages = {
|
||||
"promptInput.send.ariaLabel": "Envoyer le message",
|
||||
"promptInput.send.errorFallback": "Impossible d'envoyer le message",
|
||||
"promptInput.send.errorTitle": "Échec de l'envoi",
|
||||
"promptInput.conversationMode.enable.title": "Activer le mode conversation",
|
||||
"promptInput.conversationMode.disable.title": "Desactiver le mode conversation",
|
||||
"promptInput.conversationMode.error.title": "La lecture de la conversation a echoue",
|
||||
"promptInput.conversationMode.error.message": "Impossible de continuer a lire les reponses de l'assistant.",
|
||||
"promptInput.voiceInput.start.title": "Démarrer la saisie vocale",
|
||||
"promptInput.voiceInput.stop.title": "Arrêter l'enregistrement et transcrire",
|
||||
"promptInput.voiceInput.transcribing.title": "Transcription de l'audio",
|
||||
|
||||
@@ -147,6 +147,10 @@ export const messagingMessages = {
|
||||
"promptInput.send.ariaLabel": "שלח הודעה",
|
||||
"promptInput.send.errorFallback": "שליחת ההודעה נכשלה",
|
||||
"promptInput.send.errorTitle": "השליחה נכשלה",
|
||||
"promptInput.conversationMode.enable.title": "הפעל מצב שיחה",
|
||||
"promptInput.conversationMode.disable.title": "כבה מצב שיחה",
|
||||
"promptInput.conversationMode.error.title": "ניגון השיחה נכשל",
|
||||
"promptInput.conversationMode.error.message": "לא ניתן היה להמשיך להקריא את תגובות העוזר.",
|
||||
"promptInput.voiceInput.start.title": "התחל קלט קולי",
|
||||
"promptInput.voiceInput.stop.title": "עצור הקלטה ותמלל",
|
||||
"promptInput.voiceInput.transcribing.title": "מתמלל אודיו",
|
||||
|
||||
@@ -149,6 +149,10 @@ export const messagingMessages = {
|
||||
"promptInput.send.ariaLabel": "メッセージを送信",
|
||||
"promptInput.send.errorFallback": "メッセージの送信に失敗しました",
|
||||
"promptInput.send.errorTitle": "送信に失敗",
|
||||
"promptInput.conversationMode.enable.title": "会話モードを有効化",
|
||||
"promptInput.conversationMode.disable.title": "会話モードを無効化",
|
||||
"promptInput.conversationMode.error.title": "会話の読み上げに失敗しました",
|
||||
"promptInput.conversationMode.error.message": "アシスタントの返信の読み上げを続行できませんでした。",
|
||||
"promptInput.voiceInput.start.title": "音声入力を開始",
|
||||
"promptInput.voiceInput.stop.title": "録音を停止して文字起こし",
|
||||
"promptInput.voiceInput.transcribing.title": "音声を文字起こし中",
|
||||
|
||||
@@ -149,6 +149,10 @@ export const messagingMessages = {
|
||||
"promptInput.send.ariaLabel": "Отправить сообщение",
|
||||
"promptInput.send.errorFallback": "Не удалось отправить сообщение",
|
||||
"promptInput.send.errorTitle": "Не удалось отправить",
|
||||
"promptInput.conversationMode.enable.title": "Включить режим разговора",
|
||||
"promptInput.conversationMode.disable.title": "Выключить режим разговора",
|
||||
"promptInput.conversationMode.error.title": "Сбой озвучивания разговора",
|
||||
"promptInput.conversationMode.error.message": "Не удалось продолжить озвучивание ответов ассистента.",
|
||||
"promptInput.voiceInput.start.title": "Начать голосовой ввод",
|
||||
"promptInput.voiceInput.stop.title": "Остановить запись и расшифровать",
|
||||
"promptInput.voiceInput.transcribing.title": "Идёт расшифровка аудио",
|
||||
|
||||
@@ -149,6 +149,10 @@ export const messagingMessages = {
|
||||
"promptInput.send.ariaLabel": "发送消息",
|
||||
"promptInput.send.errorFallback": "发送消息失败",
|
||||
"promptInput.send.errorTitle": "发送失败",
|
||||
"promptInput.conversationMode.enable.title": "开启对话模式",
|
||||
"promptInput.conversationMode.disable.title": "关闭对话模式",
|
||||
"promptInput.conversationMode.error.title": "对话播报失败",
|
||||
"promptInput.conversationMode.error.message": "无法继续播报助手回复。",
|
||||
"promptInput.voiceInput.start.title": "开始语音输入",
|
||||
"promptInput.voiceInput.stop.title": "停止录音并转写",
|
||||
"promptInput.voiceInput.transcribing.title": "正在转写音频",
|
||||
|
||||
507
packages/ui/src/stores/conversation-speech.ts
Normal file
507
packages/ui/src/stores/conversation-speech.ts
Normal file
@@ -0,0 +1,507 @@
|
||||
import { createSignal } from "solid-js"
|
||||
import { tGlobal } from "../lib/i18n"
|
||||
import { showToastNotification } from "../lib/notifications"
|
||||
import { serverApi } from "../lib/api-client"
|
||||
import { getLogger } from "../lib/logger"
|
||||
import { formatToMimeType, getSpeechPlaybackSupport } from "../lib/speech-playback-support"
|
||||
import { serverSettings } from "./preferences"
|
||||
import { loadSpeechCapabilities, speechCapabilities } from "./speech"
|
||||
import { getActiveSession, sessions } from "./session-state"
|
||||
import type { ClientPart, MessageInfo } from "../types/message"
|
||||
import { messageStoreBus } from "./message-v2/bus"
|
||||
import { activeInstanceId } from "./instances"
|
||||
|
||||
type SpeechPlaybackMode = "streaming" | "buffered"
|
||||
type SpeechTtsFormat = "mp3" | "wav" | "opus" | "aac"
|
||||
|
||||
interface ConversationQueueEntry {
|
||||
key: string
|
||||
instanceId: string
|
||||
sessionId: string
|
||||
messageId: string
|
||||
partId: string
|
||||
text: string
|
||||
}
|
||||
|
||||
interface PlaybackHandle {
|
||||
stop: () => void
|
||||
done: Promise<void>
|
||||
}
|
||||
|
||||
const log = getLogger("actions")
|
||||
const [conversationModeInstances, setConversationModeInstances] = createSignal<Map<string, boolean>>(new Map())
|
||||
|
||||
const queuedKeys = new Set<string>()
|
||||
const spokenKeysBySession = new Map<string, Set<string>>()
|
||||
let queue: ConversationQueueEntry[] = []
|
||||
let currentPlayback:
|
||||
| {
|
||||
entry: ConversationQueueEntry
|
||||
handle: PlaybackHandle
|
||||
}
|
||||
| null = null
|
||||
let queueRunner: Promise<void> | null = null
|
||||
let playbackErrorShown = false
|
||||
|
||||
function getEntryKey(instanceId: string, sessionId: string, messageId: string, partId: string): string {
|
||||
return `${instanceId}:${sessionId}:${messageId}:${partId}`
|
||||
}
|
||||
|
||||
function getSpokenKeySet(instanceId: string, sessionId: string): Set<string> {
|
||||
const sessionKey = `${instanceId}:${sessionId}`
|
||||
const existing = spokenKeysBySession.get(sessionKey)
|
||||
if (existing) return existing
|
||||
const next = new Set<string>()
|
||||
spokenKeysBySession.set(sessionKey, next)
|
||||
return next
|
||||
}
|
||||
|
||||
function resolveTextPartContent(part: ClientPart): string {
|
||||
if (part.type !== "text") return ""
|
||||
if (typeof part.text === "string") {
|
||||
return part.text
|
||||
}
|
||||
|
||||
if (part.text && typeof part.text === "object") {
|
||||
const value = part.text as { text?: unknown; value?: unknown; content?: unknown[] }
|
||||
const segments: string[] = []
|
||||
if (typeof value.text === "string") {
|
||||
segments.push(value.text)
|
||||
}
|
||||
if (typeof value.value === "string") {
|
||||
segments.push(value.value)
|
||||
}
|
||||
if (Array.isArray(value.content)) {
|
||||
for (const segment of value.content) {
|
||||
if (typeof segment === "string") {
|
||||
segments.push(segment)
|
||||
} else if (segment && typeof segment === "object") {
|
||||
const typedSegment = segment as { text?: unknown; value?: unknown }
|
||||
if (typeof typedSegment.text === "string") segments.push(typedSegment.text)
|
||||
if (typeof typedSegment.value === "string") segments.push(typedSegment.value)
|
||||
}
|
||||
}
|
||||
}
|
||||
return segments.join("\n")
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
export function isConversationModeEnabled(instanceId: string): boolean {
|
||||
return conversationModeInstances().get(instanceId) === true
|
||||
}
|
||||
|
||||
export function canUseConversationMode(): boolean {
|
||||
const capabilities = speechCapabilities()
|
||||
if (!capabilities?.available || !capabilities.configured || !capabilities.supportsTts) {
|
||||
return false
|
||||
}
|
||||
|
||||
const settings = serverSettings().speech
|
||||
return getSpeechPlaybackSupport({
|
||||
playbackMode: settings.playbackMode,
|
||||
ttsFormat: settings.ttsFormat,
|
||||
capabilities,
|
||||
}).available
|
||||
}
|
||||
|
||||
export function setConversationModeEnabled(instanceId: string, enabled: boolean): void {
|
||||
setConversationModeInstances((prev) => {
|
||||
const next = new Map(prev)
|
||||
if (enabled) {
|
||||
next.set(instanceId, true)
|
||||
} else {
|
||||
next.delete(instanceId)
|
||||
}
|
||||
return next
|
||||
})
|
||||
|
||||
if (!enabled) {
|
||||
clearConversationPlaybackForInstance(instanceId)
|
||||
}
|
||||
}
|
||||
|
||||
export function toggleConversationMode(instanceId: string): void {
|
||||
setConversationModeEnabled(instanceId, !isConversationModeEnabled(instanceId))
|
||||
}
|
||||
|
||||
export function clearConversationPlaybackForSession(instanceId: string, sessionId: string): void {
|
||||
const sessionKey = `${instanceId}:${sessionId}`
|
||||
queue = queue.filter((entry) => {
|
||||
if (`${entry.instanceId}:${entry.sessionId}` === sessionKey) {
|
||||
queuedKeys.delete(entry.key)
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
if (currentPlayback && `${currentPlayback.entry.instanceId}:${currentPlayback.entry.sessionId}` === sessionKey) {
|
||||
currentPlayback.handle.stop()
|
||||
currentPlayback = null
|
||||
}
|
||||
}
|
||||
|
||||
export function clearConversationPlaybackForInstance(instanceId: string): void {
|
||||
queue = queue.filter((entry) => {
|
||||
if (entry.instanceId === instanceId) {
|
||||
queuedKeys.delete(entry.key)
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
if (currentPlayback?.entry.instanceId === instanceId) {
|
||||
currentPlayback.handle.stop()
|
||||
currentPlayback = null
|
||||
}
|
||||
}
|
||||
|
||||
function isSpeakableSession(instanceId: string, sessionId: string): boolean {
|
||||
if (activeInstanceId() !== instanceId) {
|
||||
return false
|
||||
}
|
||||
|
||||
const activeSession = getActiveSession(instanceId)
|
||||
if (!activeSession || activeSession.id !== sessionId) {
|
||||
return false
|
||||
}
|
||||
|
||||
const session = sessions().get(instanceId)?.get(sessionId) ?? activeSession
|
||||
return !session?.parentId
|
||||
}
|
||||
|
||||
export function handleConversationAssistantPartUpdated(instanceId: string, part: ClientPart, messageInfo?: MessageInfo): void {
|
||||
if (part.type !== "text") return
|
||||
|
||||
const sessionId = typeof part.sessionID === "string" ? part.sessionID : messageInfo?.sessionID
|
||||
const messageId = typeof part.messageID === "string" ? part.messageID : messageInfo?.id
|
||||
const partId = typeof part.id === "string" ? part.id : undefined
|
||||
if (!sessionId || !messageId || !partId) return
|
||||
|
||||
const messageRole =
|
||||
messageInfo?.role ??
|
||||
messageStoreBus.getOrCreate(instanceId).getMessage(messageId)?.role ??
|
||||
null
|
||||
if (messageRole !== "assistant") return
|
||||
|
||||
if (!isConversationModeEnabled(instanceId)) return
|
||||
if (!isSpeakableSession(instanceId, sessionId)) return
|
||||
|
||||
const text = resolveTextPartContent(part).trim()
|
||||
if (!text) return
|
||||
|
||||
const key = getEntryKey(instanceId, sessionId, messageId, partId)
|
||||
const spokenKeys = getSpokenKeySet(instanceId, sessionId)
|
||||
if (spokenKeys.has(key) || queuedKeys.has(key) || currentPlayback?.entry.key === key) {
|
||||
return
|
||||
}
|
||||
|
||||
queuedKeys.add(key)
|
||||
queue.push({ key, instanceId, sessionId, messageId, partId, text })
|
||||
void runConversationQueue()
|
||||
}
|
||||
|
||||
async function runConversationQueue(): Promise<void> {
|
||||
if (queueRunner) {
|
||||
await queueRunner
|
||||
return
|
||||
}
|
||||
|
||||
queueRunner = (async () => {
|
||||
while (queue.length > 0) {
|
||||
const entry = queue.shift()!
|
||||
queuedKeys.delete(entry.key)
|
||||
|
||||
if (!isConversationModeEnabled(entry.instanceId)) {
|
||||
continue
|
||||
}
|
||||
if (!isSpeakableSession(entry.instanceId, entry.sessionId)) {
|
||||
continue
|
||||
}
|
||||
|
||||
const spokenKeys = getSpokenKeySet(entry.instanceId, entry.sessionId)
|
||||
spokenKeys.add(entry.key)
|
||||
|
||||
try {
|
||||
const handle = await createPlaybackHandle(entry.text)
|
||||
currentPlayback = { entry, handle }
|
||||
await handle.done
|
||||
} catch (error) {
|
||||
spokenKeys.delete(entry.key)
|
||||
clearConversationPlaybackForInstance(entry.instanceId)
|
||||
if (!playbackErrorShown) {
|
||||
playbackErrorShown = true
|
||||
showToastNotification({
|
||||
title: tGlobal("promptInput.conversationMode.error.title"),
|
||||
message:
|
||||
error instanceof Error && error.message
|
||||
? error.message
|
||||
: tGlobal("promptInput.conversationMode.error.message"),
|
||||
variant: "error",
|
||||
})
|
||||
}
|
||||
log.error("Conversation playback failed", error)
|
||||
break
|
||||
} finally {
|
||||
if (currentPlayback?.entry.key === entry.key) {
|
||||
currentPlayback = null
|
||||
}
|
||||
}
|
||||
}
|
||||
})()
|
||||
|
||||
try {
|
||||
await queueRunner
|
||||
} finally {
|
||||
queueRunner = null
|
||||
if (queue.length === 0) {
|
||||
playbackErrorShown = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function createPlaybackHandle(text: string): Promise<PlaybackHandle> {
|
||||
const capabilities = (await loadSpeechCapabilities()) ?? speechCapabilities()
|
||||
const settings = serverSettings().speech
|
||||
|
||||
if (!capabilities?.available || !capabilities.configured || !capabilities.supportsTts) {
|
||||
throw new Error(tGlobal("messageItem.actions.speak.error.unavailable"))
|
||||
}
|
||||
|
||||
const support = getSpeechPlaybackSupport({
|
||||
playbackMode: settings.playbackMode,
|
||||
ttsFormat: settings.ttsFormat,
|
||||
capabilities,
|
||||
})
|
||||
if (!support.available) {
|
||||
if (support.reason === "provider-streaming-unavailable") {
|
||||
throw new Error(tGlobal("settings.speech.compatibility.streamingUnavailable"))
|
||||
}
|
||||
if (support.reason === "browser-streaming-unavailable") {
|
||||
throw new Error(tGlobal("settings.speech.compatibility.browserStreamingUnavailable"))
|
||||
}
|
||||
throw new Error(tGlobal("messageItem.actions.speak.error.unsupported"))
|
||||
}
|
||||
|
||||
return settings.playbackMode === "streaming"
|
||||
? createStreamingPlaybackHandle(text, settings.ttsFormat)
|
||||
: createBufferedPlaybackHandle(text, settings.ttsFormat)
|
||||
}
|
||||
|
||||
async function createBufferedPlaybackHandle(text: string, format: SpeechTtsFormat): Promise<PlaybackHandle> {
|
||||
const response = await serverApi.synthesizeSpeech({ text, format })
|
||||
const objectUrl = createObjectUrlFromBase64(response.audioBase64, response.mimeType)
|
||||
const audio = new Audio(objectUrl)
|
||||
|
||||
let settled = false
|
||||
let resolveDone!: () => void
|
||||
let rejectDone!: (error: unknown) => void
|
||||
|
||||
const cleanup = () => {
|
||||
audio.pause()
|
||||
audio.src = ""
|
||||
audio.load()
|
||||
URL.revokeObjectURL(objectUrl)
|
||||
}
|
||||
|
||||
const done = new Promise<void>((resolve, reject) => {
|
||||
resolveDone = () => {
|
||||
if (settled) return
|
||||
settled = true
|
||||
cleanup()
|
||||
resolve()
|
||||
}
|
||||
rejectDone = (error) => {
|
||||
if (settled) return
|
||||
settled = true
|
||||
cleanup()
|
||||
reject(error)
|
||||
}
|
||||
})
|
||||
|
||||
audio.addEventListener("ended", () => resolveDone(), { once: true })
|
||||
audio.addEventListener("error", () => rejectDone(new Error(tGlobal("messageItem.actions.speak.error.generate"))), {
|
||||
once: true,
|
||||
})
|
||||
|
||||
await audio.play()
|
||||
|
||||
return {
|
||||
stop: () => resolveDone(),
|
||||
done,
|
||||
}
|
||||
}
|
||||
|
||||
async function createStreamingPlaybackHandle(text: string, format: SpeechTtsFormat): Promise<PlaybackHandle> {
|
||||
if (typeof MediaSource === "undefined") {
|
||||
throw new Error(tGlobal("messageItem.actions.speak.error.unsupported"))
|
||||
}
|
||||
|
||||
const abortController = new AbortController()
|
||||
const response = await serverApi.synthesizeSpeechStream({ text, format }, abortController.signal)
|
||||
const mimeType = response.headers.get("content-type") || formatToMimeType(format)
|
||||
const stream = response.body
|
||||
if (!stream) {
|
||||
throw new Error(tGlobal("messageItem.actions.speak.error.generate"))
|
||||
}
|
||||
|
||||
if (!MediaSource.isTypeSupported(mimeType)) {
|
||||
throw new Error(tGlobal("settings.speech.compatibility.browserStreamingUnavailable"))
|
||||
}
|
||||
|
||||
const mediaSource = new MediaSource()
|
||||
const objectUrl = URL.createObjectURL(mediaSource)
|
||||
const audio = new Audio(objectUrl)
|
||||
|
||||
let settled = false
|
||||
let startedPlayback = false
|
||||
let resolveDone!: () => void
|
||||
let rejectDone!: (error: unknown) => void
|
||||
|
||||
const cleanup = () => {
|
||||
abortController.abort()
|
||||
audio.pause()
|
||||
audio.src = ""
|
||||
audio.load()
|
||||
URL.revokeObjectURL(objectUrl)
|
||||
}
|
||||
|
||||
const done = new Promise<void>((resolve, reject) => {
|
||||
resolveDone = () => {
|
||||
if (settled) return
|
||||
settled = true
|
||||
cleanup()
|
||||
resolve()
|
||||
}
|
||||
rejectDone = (error) => {
|
||||
if (settled) return
|
||||
settled = true
|
||||
cleanup()
|
||||
reject(error)
|
||||
}
|
||||
})
|
||||
|
||||
audio.addEventListener("ended", () => resolveDone(), { once: true })
|
||||
audio.addEventListener("error", () => rejectDone(new Error(tGlobal("messageItem.actions.speak.error.generate"))), {
|
||||
once: true,
|
||||
})
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
mediaSource.addEventListener(
|
||||
"sourceopen",
|
||||
() => {
|
||||
void streamToMediaSource({
|
||||
mediaSource,
|
||||
stream,
|
||||
mimeType,
|
||||
onPlayable: async () => {
|
||||
if (startedPlayback) return
|
||||
startedPlayback = true
|
||||
try {
|
||||
await audio.play()
|
||||
resolve()
|
||||
} catch (error) {
|
||||
reject(error)
|
||||
}
|
||||
},
|
||||
onError: reject,
|
||||
})
|
||||
},
|
||||
{ once: true },
|
||||
)
|
||||
})
|
||||
|
||||
return {
|
||||
stop: () => resolveDone(),
|
||||
done,
|
||||
}
|
||||
}
|
||||
|
||||
async function streamToMediaSource(options: {
|
||||
mediaSource: MediaSource
|
||||
stream: ReadableStream<Uint8Array>
|
||||
mimeType: string
|
||||
onPlayable: () => Promise<void>
|
||||
onError: (error: unknown) => void
|
||||
}) {
|
||||
try {
|
||||
const sourceBuffer = options.mediaSource.addSourceBuffer(options.mimeType)
|
||||
const reader = options.stream.getReader()
|
||||
const queue: Uint8Array[] = []
|
||||
let processing = false
|
||||
let playbackStarted = false
|
||||
|
||||
const flushQueue = async () => {
|
||||
if (processing || sourceBuffer.updating || queue.length === 0) return
|
||||
processing = true
|
||||
const chunk = queue.shift()!
|
||||
await appendChunk(sourceBuffer, chunk)
|
||||
if (!playbackStarted) {
|
||||
playbackStarted = true
|
||||
await options.onPlayable()
|
||||
}
|
||||
processing = false
|
||||
await flushQueue()
|
||||
}
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
if (value && value.byteLength > 0) {
|
||||
queue.push(value)
|
||||
await flushQueue()
|
||||
}
|
||||
}
|
||||
|
||||
while (queue.length > 0 || sourceBuffer.updating) {
|
||||
if (queue.length > 0) {
|
||||
await flushQueue()
|
||||
} else {
|
||||
await waitForUpdateEnd(sourceBuffer)
|
||||
}
|
||||
}
|
||||
|
||||
if (options.mediaSource.readyState === "open") {
|
||||
options.mediaSource.endOfStream()
|
||||
}
|
||||
} catch (error) {
|
||||
options.onError(error)
|
||||
}
|
||||
}
|
||||
|
||||
function appendChunk(sourceBuffer: SourceBuffer, chunk: Uint8Array): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const handleUpdateEnd = () => {
|
||||
cleanup()
|
||||
resolve()
|
||||
}
|
||||
const handleError = () => {
|
||||
cleanup()
|
||||
reject(new Error(tGlobal("messageItem.actions.speak.error.generate")))
|
||||
}
|
||||
const cleanup = () => {
|
||||
sourceBuffer.removeEventListener("updateend", handleUpdateEnd)
|
||||
sourceBuffer.removeEventListener("error", handleError)
|
||||
}
|
||||
|
||||
sourceBuffer.addEventListener("updateend", handleUpdateEnd, { once: true })
|
||||
sourceBuffer.addEventListener("error", handleError, { once: true })
|
||||
sourceBuffer.appendBuffer(new Uint8Array(chunk).buffer)
|
||||
})
|
||||
}
|
||||
|
||||
function waitForUpdateEnd(sourceBuffer: SourceBuffer): Promise<void> {
|
||||
return new Promise((resolve) => {
|
||||
sourceBuffer.addEventListener("updateend", () => resolve(), { once: true })
|
||||
})
|
||||
}
|
||||
|
||||
function createObjectUrlFromBase64(audioBase64: string, mimeType: string): string {
|
||||
const binary = atob(audioBase64)
|
||||
const bytes = new Uint8Array(binary.length)
|
||||
for (let index = 0; index < binary.length; index += 1) {
|
||||
bytes[index] = binary.charCodeAt(index)
|
||||
}
|
||||
return URL.createObjectURL(new Blob([bytes], { type: mimeType || "audio/mpeg" }))
|
||||
}
|
||||
@@ -10,6 +10,7 @@ import { messageStoreBus } from "./message-v2/bus"
|
||||
import { removeMessagePartV2, removeMessageV2 } from "./message-v2/bridge"
|
||||
import { getLogger } from "../lib/logger"
|
||||
import { requestData } from "../lib/opencode-api"
|
||||
import { clearConversationPlaybackForSession } from "./conversation-speech"
|
||||
|
||||
const log = getLogger("actions")
|
||||
|
||||
@@ -165,6 +166,8 @@ async function sendMessage(
|
||||
const store = messageStoreBus.getOrCreate(instanceId)
|
||||
const createdAt = Date.now()
|
||||
|
||||
clearConversationPlaybackForSession(instanceId, sessionId)
|
||||
|
||||
store.upsertMessage({
|
||||
id: messageId,
|
||||
sessionId,
|
||||
|
||||
@@ -63,6 +63,7 @@ import {
|
||||
} from "./message-v2/bridge"
|
||||
import { messageStoreBus } from "./message-v2/bus"
|
||||
import type { InstanceMessageStore } from "./message-v2/instance-store"
|
||||
import { handleConversationAssistantPartUpdated } from "./conversation-speech"
|
||||
|
||||
const log = getLogger("sse")
|
||||
const pendingSessionFetches = new Map<string, Promise<void>>()
|
||||
@@ -330,8 +331,9 @@ function handleMessageUpdate(instanceId: string, event: MessageUpdateEvent | Mes
|
||||
if (messageInfo) {
|
||||
upsertMessageInfoV2(instanceId, messageInfo, { status: "streaming" })
|
||||
}
|
||||
|
||||
|
||||
applyPartUpdateV2(instanceId, { ...part, sessionID: sessionId, messageID: messageId })
|
||||
handleConversationAssistantPartUpdated(instanceId, { ...part, sessionID: sessionId, messageID: messageId }, messageInfo)
|
||||
|
||||
if (part.type === "tool" && part.tool === "question") {
|
||||
// Questions can arrive before their tool part exists; re-link now.
|
||||
|
||||
@@ -236,6 +236,16 @@
|
||||
@apply opacity-50 cursor-not-allowed;
|
||||
}
|
||||
|
||||
.prompt-conversation-button.is-active {
|
||||
background-color: color-mix(in oklab, var(--accent-primary) 76%, var(--surface-secondary));
|
||||
color: var(--text-inverted);
|
||||
}
|
||||
|
||||
.prompt-conversation-button.is-active:hover:not(:disabled) {
|
||||
background-color: color-mix(in oklab, var(--accent-primary) 88%, var(--surface-secondary));
|
||||
color: var(--text-inverted);
|
||||
}
|
||||
|
||||
.prompt-voice-timer {
|
||||
font-size: 0.68rem;
|
||||
font-variant-numeric: tabular-nums;
|
||||
|
||||
Reference in New Issue
Block a user