From d447b0582187583252f5c6a5d3703b903698a66c Mon Sep 17 00:00:00 2001 From: Shantur Rathore Date: Thu, 26 Mar 2026 18:29:45 +0000 Subject: [PATCH] feat(ui): add message text-to-speech controls --- packages/ui/src/components/message-block.tsx | 23 ++ packages/ui/src/components/message-item.tsx | 29 +++ .../src/components/speech-action-button.tsx | 31 +++ packages/ui/src/components/tool-call.tsx | 28 +++ packages/ui/src/components/tool-call/utils.ts | 34 +++ packages/ui/src/lib/hooks/use-speech.ts | 203 ++++++++++++++++++ .../ui/src/lib/i18n/messages/en/messaging.ts | 7 + .../ui/src/lib/i18n/messages/es/messaging.ts | 7 + .../ui/src/lib/i18n/messages/fr/messaging.ts | 7 + .../ui/src/lib/i18n/messages/he/messaging.ts | 7 + .../ui/src/lib/i18n/messages/ja/messaging.ts | 7 + .../ui/src/lib/i18n/messages/ru/messaging.ts | 7 + .../lib/i18n/messages/zh-Hans/messaging.ts | 7 + 13 files changed, 397 insertions(+) create mode 100644 packages/ui/src/components/speech-action-button.tsx create mode 100644 packages/ui/src/lib/hooks/use-speech.ts diff --git a/packages/ui/src/components/message-block.tsx b/packages/ui/src/components/message-block.tsx index 4157f342..0020333d 100644 --- a/packages/ui/src/components/message-block.tsx +++ b/packages/ui/src/components/message-block.tsx @@ -14,6 +14,8 @@ import { showAlertDialog } from "../stores/alerts" import { deleteMessage } from "../stores/session-actions" import { useI18n } from "../lib/i18n" import type { DeleteHoverState } from "../types/delete-hover" +import { useSpeech } from "../lib/hooks/use-speech" +import SpeechActionButton from "./speech-action-button" function DeleteUpToIcon() { return ( @@ -1384,6 +1386,13 @@ function ReasoningCard(props: ReasoningCardProps) { const viewHideLabel = () => expanded() ? t("messageBlock.reasoning.indicator.hide") : t("messageBlock.reasoning.indicator.view") + const speech = useSpeech({ + id: () => `${props.instanceId}:${props.sessionId}:${props.messageId}:${(props.part as any)?.id ?? "reasoning"}`, + text: reasoningText, + }) + + const canSpeakReasoning = () => reasoningText().trim().length > 0 && speech.canUseSpeech() + createEffect(() => { if (!expanded()) return reasoningText() @@ -1462,6 +1471,20 @@ function ReasoningCard(props: ReasoningCardProps) {
+ + { + event.preventDefault() + event.stopPropagation() + void speech.toggle() + }} + title={speech.buttonTitle()} + isLoading={speech.isLoading()} + isPlaying={speech.isPlaying()} + /> + + + + void speech.toggle()} + title={speech.buttonTitle()} + isLoading={speech.isLoading()} + isPlaying={speech.isPlaying()} + /> + + + + void speech.toggle()} + title={speech.buttonTitle()} + isLoading={speech.isLoading()} + isPlaying={speech.isPlaying()} + /> + + + ) +} diff --git a/packages/ui/src/components/tool-call.tsx b/packages/ui/src/components/tool-call.tsx index 918a3e51..572d178c 100644 --- a/packages/ui/src/components/tool-call.tsx +++ b/packages/ui/src/components/tool-call.tsx @@ -29,6 +29,7 @@ import type { ToolScrollHelpers, } from "./tool-call/types" import { + buildToolSpeechText, ensureMarkdownContent, getRelativePath, getToolIcon, @@ -41,6 +42,8 @@ import { } from "./tool-call/utils" import { resolveTitleForTool } from "./tool-call/tool-title" import { getLogger } from "../lib/logger" +import { useSpeech } from "../lib/hooks/use-speech" +import SpeechActionButton from "./speech-action-button" const log = getLogger("session") @@ -960,6 +963,21 @@ export default function ToolCall(props: ToolCallProps) { return renderToolTitle() }) + const speechText = createMemo(() => + buildToolSpeechText({ + title: headerText(), + state: toolState(), + t, + }), + ) + + const speech = useSpeech({ + id: () => `${props.instanceId}:${props.sessionId}:${props.messageId ?? "message"}:${toolCallIdentifier()}`, + text: speechText, + }) + + const canSpeakToolCall = () => speechText().trim().length > 0 && speech.canUseSpeech() + const handleCopyHeader = async (event: MouseEvent) => { event.preventDefault() event.stopPropagation() @@ -1023,6 +1041,16 @@ export default function ToolCall(props: ToolCallProps) { + + void speech.toggle()} + title={speech.buttonTitle()} + isLoading={speech.isLoading()} + isPlaying={speech.isPlaying()} + /> + + diff --git a/packages/ui/src/components/tool-call/utils.ts b/packages/ui/src/components/tool-call/utils.ts index d1ea1b6f..766d8d99 100644 --- a/packages/ui/src/components/tool-call/utils.ts +++ b/packages/ui/src/components/tool-call/utils.ts @@ -231,3 +231,37 @@ export function getDefaultToolAction(toolName: string) { return tGlobal("toolCall.renderer.action.working") } } + +export function buildToolSpeechText(options: { + title: string + state?: ToolState + t: (key: string, params?: Record) => string +}): string { + const sections: string[] = [] + + if (options.title.trim()) { + sections.push(options.title.trim()) + } + + const { input, output } = readToolStatePayload(options.state) + const formattedInput = formatUnknown(input) + const formattedOutput = formatUnknown(output) + + if (formattedInput?.text?.trim()) { + sections.push(`${options.t("toolCall.io.input")}:\n${formattedInput.text.trim()}`) + } + + if (formattedOutput?.text?.trim()) { + sections.push(`${options.t("toolCall.io.output")}:\n${formattedOutput.text.trim()}`) + } + + if (options.state?.status === "error" && options.state.error?.trim()) { + sections.push(`${options.t("toolCall.error.label")} ${options.state.error.trim()}`) + } + + if (sections.length === 1 && options.state?.status === "pending") { + sections.push(options.t("toolCall.pending.waitingToRun")) + } + + return sections.join("\n\n").trim() +} diff --git a/packages/ui/src/lib/hooks/use-speech.ts b/packages/ui/src/lib/hooks/use-speech.ts new file mode 100644 index 00000000..c4335d93 --- /dev/null +++ b/packages/ui/src/lib/hooks/use-speech.ts @@ -0,0 +1,203 @@ +import { createEffect, createSignal, onCleanup, type Accessor } from "solid-js" +import { showAlertDialog } from "../../stores/alerts" +import { serverApi } from "../api-client" +import { useI18n } from "../i18n" +import { loadSpeechCapabilities, speechCapabilities } from "../../stores/speech" + +type SpeechPlaybackState = "idle" | "loading" | "playing" + +interface UseSpeechOptions { + id: Accessor + text: Accessor +} + +interface ActivePlaybackEntry { + ownerId: string + stop: () => void +} + +const stateResetters = new Map void>() + +let activePlayback: ActivePlaybackEntry | null = null + +function resetOwnerState(ownerId: string) { + stateResetters.get(ownerId)?.() +} + +function stopActivePlayback(ownerId?: string) { + if (!activePlayback) return + if (ownerId && activePlayback.ownerId !== ownerId) return + const current = activePlayback + activePlayback = null + current.stop() +} + +function setActivePlayback(ownerId: string, stop: () => void) { + if (activePlayback?.ownerId === ownerId) { + activePlayback = { ownerId, stop } + return + } + + stopActivePlayback() + activePlayback = { ownerId, stop } +} + +export function useSpeech(options: UseSpeechOptions) { + const { t } = useI18n() + const [state, setState] = createSignal("idle") + + let requestVersion = 0 + let audio: HTMLAudioElement | null = null + let objectUrl: string | null = null + + createEffect(() => { + void loadSpeechCapabilities() + }) + + const cleanupAudio = () => { + if (audio) { + audio.pause() + audio.currentTime = 0 + audio.src = "" + audio.load() + audio = null + } + + if (objectUrl) { + URL.revokeObjectURL(objectUrl) + objectUrl = null + } + } + + const resetState = () => { + requestVersion += 1 + cleanupAudio() + setState("idle") + } + + stateResetters.set(options.id(), resetState) + + onCleanup(() => { + stateResetters.delete(options.id()) + stopActivePlayback(options.id()) + resetState() + }) + + const isSupported = () => typeof window !== "undefined" && typeof window.Audio !== "undefined" + + const canUseSpeech = () => { + const capabilities = speechCapabilities() + return Boolean(isSupported() && capabilities?.available && capabilities?.configured && capabilities?.supportsTts) + } + + const stop = () => { + if (activePlayback?.ownerId === options.id()) { + activePlayback = null + } + resetState() + } + + const start = async () => { + const ownerId = options.id() + const text = options.text().trim() + if (!text || state() === "loading" || state() === "playing") return + + if (!isSupported()) { + showAlertDialog(t("messageItem.actions.speak.error.unsupported"), { + title: t("messageItem.actions.speak.error.title"), + variant: "error", + }) + return + } + + const capabilities = (await loadSpeechCapabilities()) ?? speechCapabilities() + if (!capabilities?.available || !capabilities?.configured || !capabilities?.supportsTts) { + showAlertDialog(t("messageItem.actions.speak.error.unavailable"), { + title: t("messageItem.actions.speak.error.title"), + variant: "error", + }) + return + } + + requestVersion += 1 + const currentRequest = requestVersion + stopActivePlayback() + cleanupAudio() + setState("loading") + + try { + const response = await serverApi.synthesizeSpeech({ + text, + format: "mp3", + }) + + if (currentRequest !== requestVersion) { + return + } + + const nextUrl = createObjectUrlFromBase64(response.audioBase64, response.mimeType) + const nextAudio = new Audio(nextUrl) + objectUrl = nextUrl + audio = nextAudio + + const finish = () => { + if (activePlayback?.ownerId === ownerId) { + activePlayback = null + } + resetOwnerState(ownerId) + } + + nextAudio.addEventListener("ended", finish, { once: true }) + nextAudio.addEventListener("error", finish, { once: true }) + + setActivePlayback(ownerId, () => { + cleanupAudio() + setState("idle") + }) + + setState("playing") + await nextAudio.play() + } catch (error) { + if (currentRequest !== requestVersion) { + return + } + resetState() + showAlertDialog(t("messageItem.actions.speak.error.generate"), { + title: t("messageItem.actions.speak.error.title"), + detail: error instanceof Error ? error.message : String(error), + variant: "error", + }) + } + } + + const toggle = async () => { + if (state() === "idle") { + await start() + return + } + stop() + } + + return { + state, + canUseSpeech, + isLoading: () => state() === "loading", + isPlaying: () => state() === "playing", + toggle, + stop, + buttonTitle: () => { + if (state() === "loading") return t("messageItem.actions.generatingSpeech") + if (state() === "playing") return t("messageItem.actions.stopSpeech") + return t("messageItem.actions.speak") + }, + } +} + +function createObjectUrlFromBase64(audioBase64: string, mimeType: string): string { + const binary = atob(audioBase64) + const bytes = new Uint8Array(binary.length) + for (let index = 0; index < binary.length; index += 1) { + bytes[index] = binary.charCodeAt(index) + } + return URL.createObjectURL(new Blob([bytes], { type: mimeType || "audio/mpeg" })) +} diff --git a/packages/ui/src/lib/i18n/messages/en/messaging.ts b/packages/ui/src/lib/i18n/messages/en/messaging.ts index 7b8a574a..27701293 100644 --- a/packages/ui/src/lib/i18n/messages/en/messaging.ts +++ b/packages/ui/src/lib/i18n/messages/en/messaging.ts @@ -75,6 +75,13 @@ export const messagingMessages = { "messageItem.actions.copy": "Copy", "messageItem.actions.copyTitle": "Copy message", "messageItem.actions.copied": "Copied!", + "messageItem.actions.speak": "Speak message", + "messageItem.actions.generatingSpeech": "Generating speech", + "messageItem.actions.stopSpeech": "Stop playback", + "messageItem.actions.speak.error.title": "Speech playback failed", + "messageItem.actions.speak.error.unsupported": "Speech playback is not supported in this browser.", + "messageItem.actions.speak.error.unavailable": "Speech playback is unavailable until speech settings are configured.", + "messageItem.actions.speak.error.generate": "Unable to generate speech for this message.", "messageItem.actions.deleteMessage": "Delete message (doesn't undo changes)", "messageItem.actions.deleteMessagesUpTo": "Delete messages up to here (doesn't undo changes)", "messageItem.actions.deletingMessage": "Deleting...", diff --git a/packages/ui/src/lib/i18n/messages/es/messaging.ts b/packages/ui/src/lib/i18n/messages/es/messaging.ts index 23ad076e..c5411592 100644 --- a/packages/ui/src/lib/i18n/messages/es/messaging.ts +++ b/packages/ui/src/lib/i18n/messages/es/messaging.ts @@ -77,6 +77,13 @@ export const messagingMessages = { "messageItem.actions.copy": "Copiar", "messageItem.actions.copyTitle": "Copiar mensaje", "messageItem.actions.copied": "¡Copiado!", + "messageItem.actions.speak": "Reproducir mensaje", + "messageItem.actions.generatingSpeech": "Generando audio", + "messageItem.actions.stopSpeech": "Detener reproduccion", + "messageItem.actions.speak.error.title": "La reproduccion de voz fallo", + "messageItem.actions.speak.error.unsupported": "La reproduccion de voz no es compatible con este navegador.", + "messageItem.actions.speak.error.unavailable": "La reproduccion de voz no estara disponible hasta que la configuracion de voz este lista.", + "messageItem.actions.speak.error.generate": "No se pudo generar audio para este mensaje.", "messageItem.actions.deleteMessage": "Eliminar mensaje (no deshace cambios)", "messageItem.actions.deleteMessagesUpTo": "Eliminar mensajes hasta aqui (no deshace cambios)", "messageItem.actions.deletingMessage": "Eliminando...", diff --git a/packages/ui/src/lib/i18n/messages/fr/messaging.ts b/packages/ui/src/lib/i18n/messages/fr/messaging.ts index 81a387db..9cebf563 100644 --- a/packages/ui/src/lib/i18n/messages/fr/messaging.ts +++ b/packages/ui/src/lib/i18n/messages/fr/messaging.ts @@ -77,6 +77,13 @@ export const messagingMessages = { "messageItem.actions.copy": "Copier", "messageItem.actions.copyTitle": "Copier le message", "messageItem.actions.copied": "Copié !", + "messageItem.actions.speak": "Lire le message", + "messageItem.actions.generatingSpeech": "Generation de l'audio", + "messageItem.actions.stopSpeech": "Arreter la lecture", + "messageItem.actions.speak.error.title": "La lecture vocale a echoue", + "messageItem.actions.speak.error.unsupported": "La lecture vocale n'est pas prise en charge dans ce navigateur.", + "messageItem.actions.speak.error.unavailable": "La lecture vocale n'est pas disponible tant que les parametres vocaux ne sont pas configures.", + "messageItem.actions.speak.error.generate": "Impossible de generer l'audio pour ce message.", "messageItem.actions.deleteMessage": "Supprimer le message (sans annuler les changements)", "messageItem.actions.deleteMessagesUpTo": "Supprimer les messages jusqu'ici (sans annuler les changements)", "messageItem.actions.deletingMessage": "Suppression...", diff --git a/packages/ui/src/lib/i18n/messages/he/messaging.ts b/packages/ui/src/lib/i18n/messages/he/messaging.ts index 0c450ff3..1b900be1 100644 --- a/packages/ui/src/lib/i18n/messages/he/messaging.ts +++ b/packages/ui/src/lib/i18n/messages/he/messaging.ts @@ -75,6 +75,13 @@ export const messagingMessages = { "messageItem.actions.copy": "העתק", "messageItem.actions.copyTitle": "העתק הודעה", "messageItem.actions.copied": "הועתק!", + "messageItem.actions.speak": "השמע הודעה", + "messageItem.actions.generatingSpeech": "יוצר אודיו", + "messageItem.actions.stopSpeech": "עצור ניגון", + "messageItem.actions.speak.error.title": "ניגון הקול נכשל", + "messageItem.actions.speak.error.unsupported": "ניגון קול אינו נתמך בדפדפן הזה.", + "messageItem.actions.speak.error.unavailable": "ניגון קול לא זמין עד שהגדרות הקול יוגדרו.", + "messageItem.actions.speak.error.generate": "לא ניתן היה ליצור אודיו עבור ההודעה הזו.", "messageItem.actions.deleteMessage": "מחק הודעה (לא מבטל שינויים)", "messageItem.actions.deleteMessagesUpTo": "מחק הודעות עד כאן (לא מבטל שינויים)", "messageItem.actions.deletingMessage": "מוחק...", diff --git a/packages/ui/src/lib/i18n/messages/ja/messaging.ts b/packages/ui/src/lib/i18n/messages/ja/messaging.ts index 4cabba20..ef95ce22 100644 --- a/packages/ui/src/lib/i18n/messages/ja/messaging.ts +++ b/packages/ui/src/lib/i18n/messages/ja/messaging.ts @@ -77,6 +77,13 @@ export const messagingMessages = { "messageItem.actions.copy": "コピー", "messageItem.actions.copyTitle": "メッセージをコピー", "messageItem.actions.copied": "コピーしました!", + "messageItem.actions.speak": "メッセージを読み上げ", + "messageItem.actions.generatingSpeech": "音声を生成中", + "messageItem.actions.stopSpeech": "再生を停止", + "messageItem.actions.speak.error.title": "音声再生に失敗しました", + "messageItem.actions.speak.error.unsupported": "このブラウザでは音声再生に対応していません。", + "messageItem.actions.speak.error.unavailable": "音声設定が完了するまで音声再生は利用できません。", + "messageItem.actions.speak.error.generate": "このメッセージの音声を生成できませんでした。", "messageItem.actions.deleteMessage": "メッセージを削除(変更は元に戻さない)", "messageItem.actions.deleteMessagesUpTo": "ここまでのメッセージを削除(変更は元に戻さない)", "messageItem.actions.deletingMessage": "削除中...", diff --git a/packages/ui/src/lib/i18n/messages/ru/messaging.ts b/packages/ui/src/lib/i18n/messages/ru/messaging.ts index 46006b6b..40547b3f 100644 --- a/packages/ui/src/lib/i18n/messages/ru/messaging.ts +++ b/packages/ui/src/lib/i18n/messages/ru/messaging.ts @@ -77,6 +77,13 @@ export const messagingMessages = { "messageItem.actions.copy": "Копировать", "messageItem.actions.copyTitle": "Копировать сообщение", "messageItem.actions.copied": "Скопировано!", + "messageItem.actions.speak": "Озвучить сообщение", + "messageItem.actions.generatingSpeech": "Генерация аудио", + "messageItem.actions.stopSpeech": "Остановить воспроизведение", + "messageItem.actions.speak.error.title": "Не удалось воспроизвести речь", + "messageItem.actions.speak.error.unsupported": "В этом браузере воспроизведение речи не поддерживается.", + "messageItem.actions.speak.error.unavailable": "Воспроизведение речи недоступно, пока не настроены голосовые параметры.", + "messageItem.actions.speak.error.generate": "Не удалось сгенерировать аудио для этого сообщения.", "messageItem.actions.deleteMessage": "Удалить сообщение (без отката изменений)", "messageItem.actions.deleteMessagesUpTo": "Удалить сообщения до этого места (без отката изменений)", "messageItem.actions.deletingMessage": "Удаление...", diff --git a/packages/ui/src/lib/i18n/messages/zh-Hans/messaging.ts b/packages/ui/src/lib/i18n/messages/zh-Hans/messaging.ts index 76f0f203..4aa31036 100644 --- a/packages/ui/src/lib/i18n/messages/zh-Hans/messaging.ts +++ b/packages/ui/src/lib/i18n/messages/zh-Hans/messaging.ts @@ -77,6 +77,13 @@ export const messagingMessages = { "messageItem.actions.copy": "复制", "messageItem.actions.copyTitle": "复制消息", "messageItem.actions.copied": "已复制!", + "messageItem.actions.speak": "朗读消息", + "messageItem.actions.generatingSpeech": "正在生成语音", + "messageItem.actions.stopSpeech": "停止播放", + "messageItem.actions.speak.error.title": "语音播放失败", + "messageItem.actions.speak.error.unsupported": "此浏览器不支持语音播放。", + "messageItem.actions.speak.error.unavailable": "语音设置完成前,语音播放不可用。", + "messageItem.actions.speak.error.generate": "无法为这条消息生成语音。", "messageItem.actions.deleteMessage": "删除消息(不会撤销更改)", "messageItem.actions.deleteMessagesUpTo": "删除到此处的消息(不会撤销更改)", "messageItem.actions.deletingMessage": "正在删除...",