feat(ui): add assistant conversation playback mode
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
import { Suspense, createEffect, createSignal, lazy, on, onCleanup, Show } from "solid-js"
|
import { Suspense, createEffect, createSignal, lazy, on, onCleanup, Show } from "solid-js"
|
||||||
import { ArrowBigUp, ArrowBigDown, Loader2, Mic, X } from "lucide-solid"
|
import { ArrowBigUp, ArrowBigDown, Loader2, Mic, Volume2, X } from "lucide-solid"
|
||||||
import ExpandButton from "./expand-button"
|
import ExpandButton from "./expand-button"
|
||||||
import { clearAttachments, removeAttachment } from "../stores/attachments"
|
import { clearAttachments, removeAttachment } from "../stores/attachments"
|
||||||
import { resolvePastedPlaceholders } from "../lib/prompt-placeholders"
|
import { resolvePastedPlaceholders } from "../lib/prompt-placeholders"
|
||||||
@@ -19,6 +19,7 @@ import { usePromptAttachments } from "./prompt-input/usePromptAttachments"
|
|||||||
import { usePromptPicker } from "./prompt-input/usePromptPicker"
|
import { usePromptPicker } from "./prompt-input/usePromptPicker"
|
||||||
import { usePromptKeyDown } from "./prompt-input/usePromptKeyDown"
|
import { usePromptKeyDown } from "./prompt-input/usePromptKeyDown"
|
||||||
import { usePromptVoiceInput } from "./prompt-input/usePromptVoiceInput"
|
import { usePromptVoiceInput } from "./prompt-input/usePromptVoiceInput"
|
||||||
|
import { canUseConversationMode, isConversationModeEnabled, toggleConversationMode } from "../stores/conversation-speech"
|
||||||
const log = getLogger("actions")
|
const log = getLogger("actions")
|
||||||
const LazyUnifiedPicker = lazy(() => import("./unified-picker"))
|
const LazyUnifiedPicker = lazy(() => import("./unified-picker"))
|
||||||
|
|
||||||
@@ -476,6 +477,13 @@ export default function PromptInput(props: PromptInputProps) {
|
|||||||
const showVoiceInput = () =>
|
const showVoiceInput = () =>
|
||||||
preferences().showPromptVoiceInput &&
|
preferences().showPromptVoiceInput &&
|
||||||
(voiceInput.canUseVoiceInput() || voiceInput.isRecording() || voiceInput.isTranscribing())
|
(voiceInput.canUseVoiceInput() || voiceInput.isRecording() || voiceInput.isTranscribing())
|
||||||
|
const conversationModeEnabled = () => isConversationModeEnabled(props.instanceId)
|
||||||
|
const showConversationToggle = () => showVoiceInput() || conversationModeEnabled()
|
||||||
|
const canToggleConversationMode = () => canUseConversationMode()
|
||||||
|
const conversationModeButtonTitle = () =>
|
||||||
|
conversationModeEnabled()
|
||||||
|
? t("promptInput.conversationMode.disable.title")
|
||||||
|
: t("promptInput.conversationMode.enable.title")
|
||||||
|
|
||||||
const instance = () => getActiveInstance()
|
const instance = () => getActiveInstance()
|
||||||
|
|
||||||
@@ -601,6 +609,19 @@ export default function PromptInput(props: PromptInputProps) {
|
|||||||
</Show>
|
</Show>
|
||||||
</button>
|
</button>
|
||||||
</Show>
|
</Show>
|
||||||
|
<Show when={showConversationToggle()}>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
class={`prompt-voice-button prompt-nav-voice-button prompt-conversation-button ${conversationModeEnabled() ? "is-active" : ""}`}
|
||||||
|
onClick={() => toggleConversationMode(props.instanceId)}
|
||||||
|
disabled={!conversationModeEnabled() && !canToggleConversationMode()}
|
||||||
|
aria-pressed={conversationModeEnabled()}
|
||||||
|
aria-label={conversationModeButtonTitle()}
|
||||||
|
title={conversationModeButtonTitle()}
|
||||||
|
>
|
||||||
|
<Volume2 class="h-4 w-4" aria-hidden="true" />
|
||||||
|
</button>
|
||||||
|
</Show>
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
class="prompt-clear-button"
|
class="prompt-clear-button"
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import { getLogger } from "../../lib/logger"
|
|||||||
import { requestData } from "../../lib/opencode-api"
|
import { requestData } from "../../lib/opencode-api"
|
||||||
import { useI18n } from "../../lib/i18n"
|
import { useI18n } from "../../lib/i18n"
|
||||||
import type { PromptInputApi, PromptInsertMode } from "../prompt-input/types"
|
import type { PromptInputApi, PromptInsertMode } from "../prompt-input/types"
|
||||||
|
import { clearConversationPlaybackForSession } from "../../stores/conversation-speech"
|
||||||
|
|
||||||
const log = getLogger("session")
|
const log = getLogger("session")
|
||||||
|
|
||||||
@@ -88,6 +89,10 @@ export const SessionView: Component<SessionViewProps> = (props) => {
|
|||||||
on(
|
on(
|
||||||
() => props.isActive,
|
() => props.isActive,
|
||||||
(isActive) => {
|
(isActive) => {
|
||||||
|
if (!isActive) {
|
||||||
|
clearConversationPlaybackForSession(props.instanceId, props.sessionId)
|
||||||
|
return
|
||||||
|
}
|
||||||
if (!isActive) return
|
if (!isActive) return
|
||||||
|
|
||||||
// On phones, focusing the prompt on session switch is disruptive (it raises the OSK).
|
// On phones, focusing the prompt on session switch is disruptive (it raises the OSK).
|
||||||
|
|||||||
@@ -147,6 +147,10 @@ export const messagingMessages = {
|
|||||||
"promptInput.send.ariaLabel": "Send message",
|
"promptInput.send.ariaLabel": "Send message",
|
||||||
"promptInput.send.errorFallback": "Failed to send message",
|
"promptInput.send.errorFallback": "Failed to send message",
|
||||||
"promptInput.send.errorTitle": "Send failed",
|
"promptInput.send.errorTitle": "Send failed",
|
||||||
|
"promptInput.conversationMode.enable.title": "Enable conversation mode",
|
||||||
|
"promptInput.conversationMode.disable.title": "Disable conversation mode",
|
||||||
|
"promptInput.conversationMode.error.title": "Conversation playback failed",
|
||||||
|
"promptInput.conversationMode.error.message": "Unable to continue speaking assistant replies.",
|
||||||
"promptInput.voiceInput.start.title": "Start voice input",
|
"promptInput.voiceInput.start.title": "Start voice input",
|
||||||
"promptInput.voiceInput.stop.title": "Stop recording and transcribe",
|
"promptInput.voiceInput.stop.title": "Stop recording and transcribe",
|
||||||
"promptInput.voiceInput.transcribing.title": "Transcribing audio",
|
"promptInput.voiceInput.transcribing.title": "Transcribing audio",
|
||||||
|
|||||||
@@ -149,6 +149,10 @@ export const messagingMessages = {
|
|||||||
"promptInput.send.ariaLabel": "Enviar mensaje",
|
"promptInput.send.ariaLabel": "Enviar mensaje",
|
||||||
"promptInput.send.errorFallback": "No se pudo enviar el mensaje",
|
"promptInput.send.errorFallback": "No se pudo enviar el mensaje",
|
||||||
"promptInput.send.errorTitle": "Error al enviar",
|
"promptInput.send.errorTitle": "Error al enviar",
|
||||||
|
"promptInput.conversationMode.enable.title": "Activar modo conversacion",
|
||||||
|
"promptInput.conversationMode.disable.title": "Desactivar modo conversacion",
|
||||||
|
"promptInput.conversationMode.error.title": "Fallo la reproduccion de la conversacion",
|
||||||
|
"promptInput.conversationMode.error.message": "No se pudieron seguir reproduciendo las respuestas del asistente.",
|
||||||
"promptInput.voiceInput.start.title": "Iniciar entrada de voz",
|
"promptInput.voiceInput.start.title": "Iniciar entrada de voz",
|
||||||
"promptInput.voiceInput.stop.title": "Detener grabación y transcribir",
|
"promptInput.voiceInput.stop.title": "Detener grabación y transcribir",
|
||||||
"promptInput.voiceInput.transcribing.title": "Transcribiendo audio",
|
"promptInput.voiceInput.transcribing.title": "Transcribiendo audio",
|
||||||
|
|||||||
@@ -149,6 +149,10 @@ export const messagingMessages = {
|
|||||||
"promptInput.send.ariaLabel": "Envoyer le message",
|
"promptInput.send.ariaLabel": "Envoyer le message",
|
||||||
"promptInput.send.errorFallback": "Impossible d'envoyer le message",
|
"promptInput.send.errorFallback": "Impossible d'envoyer le message",
|
||||||
"promptInput.send.errorTitle": "Échec de l'envoi",
|
"promptInput.send.errorTitle": "Échec de l'envoi",
|
||||||
|
"promptInput.conversationMode.enable.title": "Activer le mode conversation",
|
||||||
|
"promptInput.conversationMode.disable.title": "Desactiver le mode conversation",
|
||||||
|
"promptInput.conversationMode.error.title": "La lecture de la conversation a echoue",
|
||||||
|
"promptInput.conversationMode.error.message": "Impossible de continuer a lire les reponses de l'assistant.",
|
||||||
"promptInput.voiceInput.start.title": "Démarrer la saisie vocale",
|
"promptInput.voiceInput.start.title": "Démarrer la saisie vocale",
|
||||||
"promptInput.voiceInput.stop.title": "Arrêter l'enregistrement et transcrire",
|
"promptInput.voiceInput.stop.title": "Arrêter l'enregistrement et transcrire",
|
||||||
"promptInput.voiceInput.transcribing.title": "Transcription de l'audio",
|
"promptInput.voiceInput.transcribing.title": "Transcription de l'audio",
|
||||||
|
|||||||
@@ -147,6 +147,10 @@ export const messagingMessages = {
|
|||||||
"promptInput.send.ariaLabel": "שלח הודעה",
|
"promptInput.send.ariaLabel": "שלח הודעה",
|
||||||
"promptInput.send.errorFallback": "שליחת ההודעה נכשלה",
|
"promptInput.send.errorFallback": "שליחת ההודעה נכשלה",
|
||||||
"promptInput.send.errorTitle": "השליחה נכשלה",
|
"promptInput.send.errorTitle": "השליחה נכשלה",
|
||||||
|
"promptInput.conversationMode.enable.title": "הפעל מצב שיחה",
|
||||||
|
"promptInput.conversationMode.disable.title": "כבה מצב שיחה",
|
||||||
|
"promptInput.conversationMode.error.title": "ניגון השיחה נכשל",
|
||||||
|
"promptInput.conversationMode.error.message": "לא ניתן היה להמשיך להקריא את תגובות העוזר.",
|
||||||
"promptInput.voiceInput.start.title": "התחל קלט קולי",
|
"promptInput.voiceInput.start.title": "התחל קלט קולי",
|
||||||
"promptInput.voiceInput.stop.title": "עצור הקלטה ותמלל",
|
"promptInput.voiceInput.stop.title": "עצור הקלטה ותמלל",
|
||||||
"promptInput.voiceInput.transcribing.title": "מתמלל אודיו",
|
"promptInput.voiceInput.transcribing.title": "מתמלל אודיו",
|
||||||
|
|||||||
@@ -149,6 +149,10 @@ export const messagingMessages = {
|
|||||||
"promptInput.send.ariaLabel": "メッセージを送信",
|
"promptInput.send.ariaLabel": "メッセージを送信",
|
||||||
"promptInput.send.errorFallback": "メッセージの送信に失敗しました",
|
"promptInput.send.errorFallback": "メッセージの送信に失敗しました",
|
||||||
"promptInput.send.errorTitle": "送信に失敗",
|
"promptInput.send.errorTitle": "送信に失敗",
|
||||||
|
"promptInput.conversationMode.enable.title": "会話モードを有効化",
|
||||||
|
"promptInput.conversationMode.disable.title": "会話モードを無効化",
|
||||||
|
"promptInput.conversationMode.error.title": "会話の読み上げに失敗しました",
|
||||||
|
"promptInput.conversationMode.error.message": "アシスタントの返信の読み上げを続行できませんでした。",
|
||||||
"promptInput.voiceInput.start.title": "音声入力を開始",
|
"promptInput.voiceInput.start.title": "音声入力を開始",
|
||||||
"promptInput.voiceInput.stop.title": "録音を停止して文字起こし",
|
"promptInput.voiceInput.stop.title": "録音を停止して文字起こし",
|
||||||
"promptInput.voiceInput.transcribing.title": "音声を文字起こし中",
|
"promptInput.voiceInput.transcribing.title": "音声を文字起こし中",
|
||||||
|
|||||||
@@ -149,6 +149,10 @@ export const messagingMessages = {
|
|||||||
"promptInput.send.ariaLabel": "Отправить сообщение",
|
"promptInput.send.ariaLabel": "Отправить сообщение",
|
||||||
"promptInput.send.errorFallback": "Не удалось отправить сообщение",
|
"promptInput.send.errorFallback": "Не удалось отправить сообщение",
|
||||||
"promptInput.send.errorTitle": "Не удалось отправить",
|
"promptInput.send.errorTitle": "Не удалось отправить",
|
||||||
|
"promptInput.conversationMode.enable.title": "Включить режим разговора",
|
||||||
|
"promptInput.conversationMode.disable.title": "Выключить режим разговора",
|
||||||
|
"promptInput.conversationMode.error.title": "Сбой озвучивания разговора",
|
||||||
|
"promptInput.conversationMode.error.message": "Не удалось продолжить озвучивание ответов ассистента.",
|
||||||
"promptInput.voiceInput.start.title": "Начать голосовой ввод",
|
"promptInput.voiceInput.start.title": "Начать голосовой ввод",
|
||||||
"promptInput.voiceInput.stop.title": "Остановить запись и расшифровать",
|
"promptInput.voiceInput.stop.title": "Остановить запись и расшифровать",
|
||||||
"promptInput.voiceInput.transcribing.title": "Идёт расшифровка аудио",
|
"promptInput.voiceInput.transcribing.title": "Идёт расшифровка аудио",
|
||||||
|
|||||||
@@ -149,6 +149,10 @@ export const messagingMessages = {
|
|||||||
"promptInput.send.ariaLabel": "发送消息",
|
"promptInput.send.ariaLabel": "发送消息",
|
||||||
"promptInput.send.errorFallback": "发送消息失败",
|
"promptInput.send.errorFallback": "发送消息失败",
|
||||||
"promptInput.send.errorTitle": "发送失败",
|
"promptInput.send.errorTitle": "发送失败",
|
||||||
|
"promptInput.conversationMode.enable.title": "开启对话模式",
|
||||||
|
"promptInput.conversationMode.disable.title": "关闭对话模式",
|
||||||
|
"promptInput.conversationMode.error.title": "对话播报失败",
|
||||||
|
"promptInput.conversationMode.error.message": "无法继续播报助手回复。",
|
||||||
"promptInput.voiceInput.start.title": "开始语音输入",
|
"promptInput.voiceInput.start.title": "开始语音输入",
|
||||||
"promptInput.voiceInput.stop.title": "停止录音并转写",
|
"promptInput.voiceInput.stop.title": "停止录音并转写",
|
||||||
"promptInput.voiceInput.transcribing.title": "正在转写音频",
|
"promptInput.voiceInput.transcribing.title": "正在转写音频",
|
||||||
|
|||||||
507
packages/ui/src/stores/conversation-speech.ts
Normal file
507
packages/ui/src/stores/conversation-speech.ts
Normal file
@@ -0,0 +1,507 @@
|
|||||||
|
import { createSignal } from "solid-js"
|
||||||
|
import { tGlobal } from "../lib/i18n"
|
||||||
|
import { showToastNotification } from "../lib/notifications"
|
||||||
|
import { serverApi } from "../lib/api-client"
|
||||||
|
import { getLogger } from "../lib/logger"
|
||||||
|
import { formatToMimeType, getSpeechPlaybackSupport } from "../lib/speech-playback-support"
|
||||||
|
import { serverSettings } from "./preferences"
|
||||||
|
import { loadSpeechCapabilities, speechCapabilities } from "./speech"
|
||||||
|
import { getActiveSession, sessions } from "./session-state"
|
||||||
|
import type { ClientPart, MessageInfo } from "../types/message"
|
||||||
|
import { messageStoreBus } from "./message-v2/bus"
|
||||||
|
import { activeInstanceId } from "./instances"
|
||||||
|
|
||||||
|
type SpeechPlaybackMode = "streaming" | "buffered"
|
||||||
|
type SpeechTtsFormat = "mp3" | "wav" | "opus" | "aac"
|
||||||
|
|
||||||
|
interface ConversationQueueEntry {
|
||||||
|
key: string
|
||||||
|
instanceId: string
|
||||||
|
sessionId: string
|
||||||
|
messageId: string
|
||||||
|
partId: string
|
||||||
|
text: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PlaybackHandle {
|
||||||
|
stop: () => void
|
||||||
|
done: Promise<void>
|
||||||
|
}
|
||||||
|
|
||||||
|
const log = getLogger("actions")
|
||||||
|
const [conversationModeInstances, setConversationModeInstances] = createSignal<Map<string, boolean>>(new Map())
|
||||||
|
|
||||||
|
const queuedKeys = new Set<string>()
|
||||||
|
const spokenKeysBySession = new Map<string, Set<string>>()
|
||||||
|
let queue: ConversationQueueEntry[] = []
|
||||||
|
let currentPlayback:
|
||||||
|
| {
|
||||||
|
entry: ConversationQueueEntry
|
||||||
|
handle: PlaybackHandle
|
||||||
|
}
|
||||||
|
| null = null
|
||||||
|
let queueRunner: Promise<void> | null = null
|
||||||
|
let playbackErrorShown = false
|
||||||
|
|
||||||
|
function getEntryKey(instanceId: string, sessionId: string, messageId: string, partId: string): string {
|
||||||
|
return `${instanceId}:${sessionId}:${messageId}:${partId}`
|
||||||
|
}
|
||||||
|
|
||||||
|
function getSpokenKeySet(instanceId: string, sessionId: string): Set<string> {
|
||||||
|
const sessionKey = `${instanceId}:${sessionId}`
|
||||||
|
const existing = spokenKeysBySession.get(sessionKey)
|
||||||
|
if (existing) return existing
|
||||||
|
const next = new Set<string>()
|
||||||
|
spokenKeysBySession.set(sessionKey, next)
|
||||||
|
return next
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolveTextPartContent(part: ClientPart): string {
|
||||||
|
if (part.type !== "text") return ""
|
||||||
|
if (typeof part.text === "string") {
|
||||||
|
return part.text
|
||||||
|
}
|
||||||
|
|
||||||
|
if (part.text && typeof part.text === "object") {
|
||||||
|
const value = part.text as { text?: unknown; value?: unknown; content?: unknown[] }
|
||||||
|
const segments: string[] = []
|
||||||
|
if (typeof value.text === "string") {
|
||||||
|
segments.push(value.text)
|
||||||
|
}
|
||||||
|
if (typeof value.value === "string") {
|
||||||
|
segments.push(value.value)
|
||||||
|
}
|
||||||
|
if (Array.isArray(value.content)) {
|
||||||
|
for (const segment of value.content) {
|
||||||
|
if (typeof segment === "string") {
|
||||||
|
segments.push(segment)
|
||||||
|
} else if (segment && typeof segment === "object") {
|
||||||
|
const typedSegment = segment as { text?: unknown; value?: unknown }
|
||||||
|
if (typeof typedSegment.text === "string") segments.push(typedSegment.text)
|
||||||
|
if (typeof typedSegment.value === "string") segments.push(typedSegment.value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return segments.join("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isConversationModeEnabled(instanceId: string): boolean {
|
||||||
|
return conversationModeInstances().get(instanceId) === true
|
||||||
|
}
|
||||||
|
|
||||||
|
export function canUseConversationMode(): boolean {
|
||||||
|
const capabilities = speechCapabilities()
|
||||||
|
if (!capabilities?.available || !capabilities.configured || !capabilities.supportsTts) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
const settings = serverSettings().speech
|
||||||
|
return getSpeechPlaybackSupport({
|
||||||
|
playbackMode: settings.playbackMode,
|
||||||
|
ttsFormat: settings.ttsFormat,
|
||||||
|
capabilities,
|
||||||
|
}).available
|
||||||
|
}
|
||||||
|
|
||||||
|
export function setConversationModeEnabled(instanceId: string, enabled: boolean): void {
|
||||||
|
setConversationModeInstances((prev) => {
|
||||||
|
const next = new Map(prev)
|
||||||
|
if (enabled) {
|
||||||
|
next.set(instanceId, true)
|
||||||
|
} else {
|
||||||
|
next.delete(instanceId)
|
||||||
|
}
|
||||||
|
return next
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!enabled) {
|
||||||
|
clearConversationPlaybackForInstance(instanceId)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function toggleConversationMode(instanceId: string): void {
|
||||||
|
setConversationModeEnabled(instanceId, !isConversationModeEnabled(instanceId))
|
||||||
|
}
|
||||||
|
|
||||||
|
export function clearConversationPlaybackForSession(instanceId: string, sessionId: string): void {
|
||||||
|
const sessionKey = `${instanceId}:${sessionId}`
|
||||||
|
queue = queue.filter((entry) => {
|
||||||
|
if (`${entry.instanceId}:${entry.sessionId}` === sessionKey) {
|
||||||
|
queuedKeys.delete(entry.key)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
|
||||||
|
if (currentPlayback && `${currentPlayback.entry.instanceId}:${currentPlayback.entry.sessionId}` === sessionKey) {
|
||||||
|
currentPlayback.handle.stop()
|
||||||
|
currentPlayback = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function clearConversationPlaybackForInstance(instanceId: string): void {
|
||||||
|
queue = queue.filter((entry) => {
|
||||||
|
if (entry.instanceId === instanceId) {
|
||||||
|
queuedKeys.delete(entry.key)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
|
||||||
|
if (currentPlayback?.entry.instanceId === instanceId) {
|
||||||
|
currentPlayback.handle.stop()
|
||||||
|
currentPlayback = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function isSpeakableSession(instanceId: string, sessionId: string): boolean {
|
||||||
|
if (activeInstanceId() !== instanceId) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
const activeSession = getActiveSession(instanceId)
|
||||||
|
if (!activeSession || activeSession.id !== sessionId) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
const session = sessions().get(instanceId)?.get(sessionId) ?? activeSession
|
||||||
|
return !session?.parentId
|
||||||
|
}
|
||||||
|
|
||||||
|
export function handleConversationAssistantPartUpdated(instanceId: string, part: ClientPart, messageInfo?: MessageInfo): void {
|
||||||
|
if (part.type !== "text") return
|
||||||
|
|
||||||
|
const sessionId = typeof part.sessionID === "string" ? part.sessionID : messageInfo?.sessionID
|
||||||
|
const messageId = typeof part.messageID === "string" ? part.messageID : messageInfo?.id
|
||||||
|
const partId = typeof part.id === "string" ? part.id : undefined
|
||||||
|
if (!sessionId || !messageId || !partId) return
|
||||||
|
|
||||||
|
const messageRole =
|
||||||
|
messageInfo?.role ??
|
||||||
|
messageStoreBus.getOrCreate(instanceId).getMessage(messageId)?.role ??
|
||||||
|
null
|
||||||
|
if (messageRole !== "assistant") return
|
||||||
|
|
||||||
|
if (!isConversationModeEnabled(instanceId)) return
|
||||||
|
if (!isSpeakableSession(instanceId, sessionId)) return
|
||||||
|
|
||||||
|
const text = resolveTextPartContent(part).trim()
|
||||||
|
if (!text) return
|
||||||
|
|
||||||
|
const key = getEntryKey(instanceId, sessionId, messageId, partId)
|
||||||
|
const spokenKeys = getSpokenKeySet(instanceId, sessionId)
|
||||||
|
if (spokenKeys.has(key) || queuedKeys.has(key) || currentPlayback?.entry.key === key) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
queuedKeys.add(key)
|
||||||
|
queue.push({ key, instanceId, sessionId, messageId, partId, text })
|
||||||
|
void runConversationQueue()
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runConversationQueue(): Promise<void> {
|
||||||
|
if (queueRunner) {
|
||||||
|
await queueRunner
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
queueRunner = (async () => {
|
||||||
|
while (queue.length > 0) {
|
||||||
|
const entry = queue.shift()!
|
||||||
|
queuedKeys.delete(entry.key)
|
||||||
|
|
||||||
|
if (!isConversationModeEnabled(entry.instanceId)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if (!isSpeakableSession(entry.instanceId, entry.sessionId)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
const spokenKeys = getSpokenKeySet(entry.instanceId, entry.sessionId)
|
||||||
|
spokenKeys.add(entry.key)
|
||||||
|
|
||||||
|
try {
|
||||||
|
const handle = await createPlaybackHandle(entry.text)
|
||||||
|
currentPlayback = { entry, handle }
|
||||||
|
await handle.done
|
||||||
|
} catch (error) {
|
||||||
|
spokenKeys.delete(entry.key)
|
||||||
|
clearConversationPlaybackForInstance(entry.instanceId)
|
||||||
|
if (!playbackErrorShown) {
|
||||||
|
playbackErrorShown = true
|
||||||
|
showToastNotification({
|
||||||
|
title: tGlobal("promptInput.conversationMode.error.title"),
|
||||||
|
message:
|
||||||
|
error instanceof Error && error.message
|
||||||
|
? error.message
|
||||||
|
: tGlobal("promptInput.conversationMode.error.message"),
|
||||||
|
variant: "error",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
log.error("Conversation playback failed", error)
|
||||||
|
break
|
||||||
|
} finally {
|
||||||
|
if (currentPlayback?.entry.key === entry.key) {
|
||||||
|
currentPlayback = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})()
|
||||||
|
|
||||||
|
try {
|
||||||
|
await queueRunner
|
||||||
|
} finally {
|
||||||
|
queueRunner = null
|
||||||
|
if (queue.length === 0) {
|
||||||
|
playbackErrorShown = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createPlaybackHandle(text: string): Promise<PlaybackHandle> {
|
||||||
|
const capabilities = (await loadSpeechCapabilities()) ?? speechCapabilities()
|
||||||
|
const settings = serverSettings().speech
|
||||||
|
|
||||||
|
if (!capabilities?.available || !capabilities.configured || !capabilities.supportsTts) {
|
||||||
|
throw new Error(tGlobal("messageItem.actions.speak.error.unavailable"))
|
||||||
|
}
|
||||||
|
|
||||||
|
const support = getSpeechPlaybackSupport({
|
||||||
|
playbackMode: settings.playbackMode,
|
||||||
|
ttsFormat: settings.ttsFormat,
|
||||||
|
capabilities,
|
||||||
|
})
|
||||||
|
if (!support.available) {
|
||||||
|
if (support.reason === "provider-streaming-unavailable") {
|
||||||
|
throw new Error(tGlobal("settings.speech.compatibility.streamingUnavailable"))
|
||||||
|
}
|
||||||
|
if (support.reason === "browser-streaming-unavailable") {
|
||||||
|
throw new Error(tGlobal("settings.speech.compatibility.browserStreamingUnavailable"))
|
||||||
|
}
|
||||||
|
throw new Error(tGlobal("messageItem.actions.speak.error.unsupported"))
|
||||||
|
}
|
||||||
|
|
||||||
|
return settings.playbackMode === "streaming"
|
||||||
|
? createStreamingPlaybackHandle(text, settings.ttsFormat)
|
||||||
|
: createBufferedPlaybackHandle(text, settings.ttsFormat)
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createBufferedPlaybackHandle(text: string, format: SpeechTtsFormat): Promise<PlaybackHandle> {
|
||||||
|
const response = await serverApi.synthesizeSpeech({ text, format })
|
||||||
|
const objectUrl = createObjectUrlFromBase64(response.audioBase64, response.mimeType)
|
||||||
|
const audio = new Audio(objectUrl)
|
||||||
|
|
||||||
|
let settled = false
|
||||||
|
let resolveDone!: () => void
|
||||||
|
let rejectDone!: (error: unknown) => void
|
||||||
|
|
||||||
|
const cleanup = () => {
|
||||||
|
audio.pause()
|
||||||
|
audio.src = ""
|
||||||
|
audio.load()
|
||||||
|
URL.revokeObjectURL(objectUrl)
|
||||||
|
}
|
||||||
|
|
||||||
|
const done = new Promise<void>((resolve, reject) => {
|
||||||
|
resolveDone = () => {
|
||||||
|
if (settled) return
|
||||||
|
settled = true
|
||||||
|
cleanup()
|
||||||
|
resolve()
|
||||||
|
}
|
||||||
|
rejectDone = (error) => {
|
||||||
|
if (settled) return
|
||||||
|
settled = true
|
||||||
|
cleanup()
|
||||||
|
reject(error)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
audio.addEventListener("ended", () => resolveDone(), { once: true })
|
||||||
|
audio.addEventListener("error", () => rejectDone(new Error(tGlobal("messageItem.actions.speak.error.generate"))), {
|
||||||
|
once: true,
|
||||||
|
})
|
||||||
|
|
||||||
|
await audio.play()
|
||||||
|
|
||||||
|
return {
|
||||||
|
stop: () => resolveDone(),
|
||||||
|
done,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createStreamingPlaybackHandle(text: string, format: SpeechTtsFormat): Promise<PlaybackHandle> {
|
||||||
|
if (typeof MediaSource === "undefined") {
|
||||||
|
throw new Error(tGlobal("messageItem.actions.speak.error.unsupported"))
|
||||||
|
}
|
||||||
|
|
||||||
|
const abortController = new AbortController()
|
||||||
|
const response = await serverApi.synthesizeSpeechStream({ text, format }, abortController.signal)
|
||||||
|
const mimeType = response.headers.get("content-type") || formatToMimeType(format)
|
||||||
|
const stream = response.body
|
||||||
|
if (!stream) {
|
||||||
|
throw new Error(tGlobal("messageItem.actions.speak.error.generate"))
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!MediaSource.isTypeSupported(mimeType)) {
|
||||||
|
throw new Error(tGlobal("settings.speech.compatibility.browserStreamingUnavailable"))
|
||||||
|
}
|
||||||
|
|
||||||
|
const mediaSource = new MediaSource()
|
||||||
|
const objectUrl = URL.createObjectURL(mediaSource)
|
||||||
|
const audio = new Audio(objectUrl)
|
||||||
|
|
||||||
|
let settled = false
|
||||||
|
let startedPlayback = false
|
||||||
|
let resolveDone!: () => void
|
||||||
|
let rejectDone!: (error: unknown) => void
|
||||||
|
|
||||||
|
const cleanup = () => {
|
||||||
|
abortController.abort()
|
||||||
|
audio.pause()
|
||||||
|
audio.src = ""
|
||||||
|
audio.load()
|
||||||
|
URL.revokeObjectURL(objectUrl)
|
||||||
|
}
|
||||||
|
|
||||||
|
const done = new Promise<void>((resolve, reject) => {
|
||||||
|
resolveDone = () => {
|
||||||
|
if (settled) return
|
||||||
|
settled = true
|
||||||
|
cleanup()
|
||||||
|
resolve()
|
||||||
|
}
|
||||||
|
rejectDone = (error) => {
|
||||||
|
if (settled) return
|
||||||
|
settled = true
|
||||||
|
cleanup()
|
||||||
|
reject(error)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
audio.addEventListener("ended", () => resolveDone(), { once: true })
|
||||||
|
audio.addEventListener("error", () => rejectDone(new Error(tGlobal("messageItem.actions.speak.error.generate"))), {
|
||||||
|
once: true,
|
||||||
|
})
|
||||||
|
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
mediaSource.addEventListener(
|
||||||
|
"sourceopen",
|
||||||
|
() => {
|
||||||
|
void streamToMediaSource({
|
||||||
|
mediaSource,
|
||||||
|
stream,
|
||||||
|
mimeType,
|
||||||
|
onPlayable: async () => {
|
||||||
|
if (startedPlayback) return
|
||||||
|
startedPlayback = true
|
||||||
|
try {
|
||||||
|
await audio.play()
|
||||||
|
resolve()
|
||||||
|
} catch (error) {
|
||||||
|
reject(error)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
onError: reject,
|
||||||
|
})
|
||||||
|
},
|
||||||
|
{ once: true },
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
stop: () => resolveDone(),
|
||||||
|
done,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function streamToMediaSource(options: {
|
||||||
|
mediaSource: MediaSource
|
||||||
|
stream: ReadableStream<Uint8Array>
|
||||||
|
mimeType: string
|
||||||
|
onPlayable: () => Promise<void>
|
||||||
|
onError: (error: unknown) => void
|
||||||
|
}) {
|
||||||
|
try {
|
||||||
|
const sourceBuffer = options.mediaSource.addSourceBuffer(options.mimeType)
|
||||||
|
const reader = options.stream.getReader()
|
||||||
|
const queue: Uint8Array[] = []
|
||||||
|
let processing = false
|
||||||
|
let playbackStarted = false
|
||||||
|
|
||||||
|
const flushQueue = async () => {
|
||||||
|
if (processing || sourceBuffer.updating || queue.length === 0) return
|
||||||
|
processing = true
|
||||||
|
const chunk = queue.shift()!
|
||||||
|
await appendChunk(sourceBuffer, chunk)
|
||||||
|
if (!playbackStarted) {
|
||||||
|
playbackStarted = true
|
||||||
|
await options.onPlayable()
|
||||||
|
}
|
||||||
|
processing = false
|
||||||
|
await flushQueue()
|
||||||
|
}
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read()
|
||||||
|
if (done) break
|
||||||
|
if (value && value.byteLength > 0) {
|
||||||
|
queue.push(value)
|
||||||
|
await flushQueue()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (queue.length > 0 || sourceBuffer.updating) {
|
||||||
|
if (queue.length > 0) {
|
||||||
|
await flushQueue()
|
||||||
|
} else {
|
||||||
|
await waitForUpdateEnd(sourceBuffer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options.mediaSource.readyState === "open") {
|
||||||
|
options.mediaSource.endOfStream()
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
options.onError(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function appendChunk(sourceBuffer: SourceBuffer, chunk: Uint8Array): Promise<void> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const handleUpdateEnd = () => {
|
||||||
|
cleanup()
|
||||||
|
resolve()
|
||||||
|
}
|
||||||
|
const handleError = () => {
|
||||||
|
cleanup()
|
||||||
|
reject(new Error(tGlobal("messageItem.actions.speak.error.generate")))
|
||||||
|
}
|
||||||
|
const cleanup = () => {
|
||||||
|
sourceBuffer.removeEventListener("updateend", handleUpdateEnd)
|
||||||
|
sourceBuffer.removeEventListener("error", handleError)
|
||||||
|
}
|
||||||
|
|
||||||
|
sourceBuffer.addEventListener("updateend", handleUpdateEnd, { once: true })
|
||||||
|
sourceBuffer.addEventListener("error", handleError, { once: true })
|
||||||
|
sourceBuffer.appendBuffer(new Uint8Array(chunk).buffer)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
function waitForUpdateEnd(sourceBuffer: SourceBuffer): Promise<void> {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
sourceBuffer.addEventListener("updateend", () => resolve(), { once: true })
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
function createObjectUrlFromBase64(audioBase64: string, mimeType: string): string {
|
||||||
|
const binary = atob(audioBase64)
|
||||||
|
const bytes = new Uint8Array(binary.length)
|
||||||
|
for (let index = 0; index < binary.length; index += 1) {
|
||||||
|
bytes[index] = binary.charCodeAt(index)
|
||||||
|
}
|
||||||
|
return URL.createObjectURL(new Blob([bytes], { type: mimeType || "audio/mpeg" }))
|
||||||
|
}
|
||||||
@@ -10,6 +10,7 @@ import { messageStoreBus } from "./message-v2/bus"
|
|||||||
import { removeMessagePartV2, removeMessageV2 } from "./message-v2/bridge"
|
import { removeMessagePartV2, removeMessageV2 } from "./message-v2/bridge"
|
||||||
import { getLogger } from "../lib/logger"
|
import { getLogger } from "../lib/logger"
|
||||||
import { requestData } from "../lib/opencode-api"
|
import { requestData } from "../lib/opencode-api"
|
||||||
|
import { clearConversationPlaybackForSession } from "./conversation-speech"
|
||||||
|
|
||||||
const log = getLogger("actions")
|
const log = getLogger("actions")
|
||||||
|
|
||||||
@@ -165,6 +166,8 @@ async function sendMessage(
|
|||||||
const store = messageStoreBus.getOrCreate(instanceId)
|
const store = messageStoreBus.getOrCreate(instanceId)
|
||||||
const createdAt = Date.now()
|
const createdAt = Date.now()
|
||||||
|
|
||||||
|
clearConversationPlaybackForSession(instanceId, sessionId)
|
||||||
|
|
||||||
store.upsertMessage({
|
store.upsertMessage({
|
||||||
id: messageId,
|
id: messageId,
|
||||||
sessionId,
|
sessionId,
|
||||||
|
|||||||
@@ -63,6 +63,7 @@ import {
|
|||||||
} from "./message-v2/bridge"
|
} from "./message-v2/bridge"
|
||||||
import { messageStoreBus } from "./message-v2/bus"
|
import { messageStoreBus } from "./message-v2/bus"
|
||||||
import type { InstanceMessageStore } from "./message-v2/instance-store"
|
import type { InstanceMessageStore } from "./message-v2/instance-store"
|
||||||
|
import { handleConversationAssistantPartUpdated } from "./conversation-speech"
|
||||||
|
|
||||||
const log = getLogger("sse")
|
const log = getLogger("sse")
|
||||||
const pendingSessionFetches = new Map<string, Promise<void>>()
|
const pendingSessionFetches = new Map<string, Promise<void>>()
|
||||||
@@ -330,8 +331,9 @@ function handleMessageUpdate(instanceId: string, event: MessageUpdateEvent | Mes
|
|||||||
if (messageInfo) {
|
if (messageInfo) {
|
||||||
upsertMessageInfoV2(instanceId, messageInfo, { status: "streaming" })
|
upsertMessageInfoV2(instanceId, messageInfo, { status: "streaming" })
|
||||||
}
|
}
|
||||||
|
|
||||||
applyPartUpdateV2(instanceId, { ...part, sessionID: sessionId, messageID: messageId })
|
applyPartUpdateV2(instanceId, { ...part, sessionID: sessionId, messageID: messageId })
|
||||||
|
handleConversationAssistantPartUpdated(instanceId, { ...part, sessionID: sessionId, messageID: messageId }, messageInfo)
|
||||||
|
|
||||||
if (part.type === "tool" && part.tool === "question") {
|
if (part.type === "tool" && part.tool === "question") {
|
||||||
// Questions can arrive before their tool part exists; re-link now.
|
// Questions can arrive before their tool part exists; re-link now.
|
||||||
|
|||||||
@@ -236,6 +236,16 @@
|
|||||||
@apply opacity-50 cursor-not-allowed;
|
@apply opacity-50 cursor-not-allowed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.prompt-conversation-button.is-active {
|
||||||
|
background-color: color-mix(in oklab, var(--accent-primary) 76%, var(--surface-secondary));
|
||||||
|
color: var(--text-inverted);
|
||||||
|
}
|
||||||
|
|
||||||
|
.prompt-conversation-button.is-active:hover:not(:disabled) {
|
||||||
|
background-color: color-mix(in oklab, var(--accent-primary) 88%, var(--surface-secondary));
|
||||||
|
color: var(--text-inverted);
|
||||||
|
}
|
||||||
|
|
||||||
.prompt-voice-timer {
|
.prompt-voice-timer {
|
||||||
font-size: 0.68rem;
|
font-size: 0.68rem;
|
||||||
font-variant-numeric: tabular-nums;
|
font-variant-numeric: tabular-nums;
|
||||||
|
|||||||
Reference in New Issue
Block a user