feat(ui): add message text-to-speech controls

This commit is contained in:
Shantur Rathore
2026-03-26 18:29:45 +00:00
parent 1233121a13
commit d447b05821
13 changed files with 397 additions and 0 deletions

View File

@@ -14,6 +14,8 @@ import { showAlertDialog } from "../stores/alerts"
import { deleteMessage } from "../stores/session-actions"
import { useI18n } from "../lib/i18n"
import type { DeleteHoverState } from "../types/delete-hover"
import { useSpeech } from "../lib/hooks/use-speech"
import SpeechActionButton from "./speech-action-button"
function DeleteUpToIcon() {
return (
@@ -1384,6 +1386,13 @@ function ReasoningCard(props: ReasoningCardProps) {
const viewHideLabel = () =>
expanded() ? t("messageBlock.reasoning.indicator.hide") : t("messageBlock.reasoning.indicator.view")
const speech = useSpeech({
id: () => `${props.instanceId}:${props.sessionId}:${props.messageId}:${(props.part as any)?.id ?? "reasoning"}`,
text: reasoningText,
})
const canSpeakReasoning = () => reasoningText().trim().length > 0 && speech.canUseSpeech()
createEffect(() => {
if (!expanded()) return
reasoningText()
@@ -1462,6 +1471,20 @@ function ReasoningCard(props: ReasoningCardProps) {
</button>
<div class="message-reasoning-actions">
<Show when={canSpeakReasoning()}>
<SpeechActionButton
class="message-action-button"
onClick={(event) => {
event.preventDefault()
event.stopPropagation()
void speech.toggle()
}}
title={speech.buttonTitle()}
isLoading={speech.isLoading()}
isPlaying={speech.isPlaying()}
/>
</Show>
<button
type="button"
class="message-action-button"

View File

@@ -11,6 +11,8 @@ import { showAlertDialog } from "../stores/alerts"
import { deleteMessage } from "../stores/session-actions"
import { isTauriHost } from "../lib/runtime-env"
import type { DeleteHoverState } from "../types/delete-hover"
import { useSpeech } from "../lib/hooks/use-speech"
import SpeechActionButton from "./speech-action-button"
function DeleteUpToIcon() {
return (
@@ -294,6 +296,13 @@ export default function MessageItem(props: MessageItemProps) {
.join("\n\n")
}
const speech = useSpeech({
id: () => `${props.instanceId}:${props.sessionId}:${props.record.id}`,
text: getRawContent,
})
const canSpeakMessage = () => getRawContent().trim().length > 0 && speech.canUseSpeech()
const handleCopy = async () => {
const content = getRawContent()
if (!content) return
@@ -443,6 +452,16 @@ export default function MessageItem(props: MessageItemProps) {
<Copy class="w-3.5 h-3.5" aria-hidden="true" />
</button>
<Show when={canSpeakMessage()}>
<SpeechActionButton
class="message-action-button"
onClick={() => void speech.toggle()}
title={speech.buttonTitle()}
isLoading={speech.isLoading()}
isPlaying={speech.isPlaying()}
/>
</Show>
<Show when={props.onFork}>
<button
class="message-action-button"
@@ -503,6 +522,16 @@ export default function MessageItem(props: MessageItemProps) {
<Copy class="w-3.5 h-3.5" aria-hidden="true" />
</button>
<Show when={canSpeakMessage()}>
<SpeechActionButton
class="message-action-button"
onClick={() => void speech.toggle()}
title={speech.buttonTitle()}
isLoading={speech.isLoading()}
isPlaying={speech.isPlaying()}
/>
</Show>
<Show when={props.showDeleteMessage}>
<button
class="message-action-button"

View File

@@ -0,0 +1,31 @@
import { Loader2, Square, Volume2 } from "lucide-solid"
import type { JSX } from "solid-js"
interface SpeechActionButtonProps {
class?: string
title: string
isLoading: boolean
isPlaying: boolean
onClick: JSX.EventHandlerUnion<HTMLButtonElement, MouseEvent>
type?: "button" | "submit" | "reset"
}
export default function SpeechActionButton(props: SpeechActionButtonProps) {
return (
<button
type={props.type ?? "button"}
class={props.class}
onClick={props.onClick}
aria-label={props.title}
title={props.title}
>
{props.isLoading ? (
<Loader2 class="w-3.5 h-3.5 animate-spin" aria-hidden="true" />
) : props.isPlaying ? (
<Square class="w-3.5 h-3.5" aria-hidden="true" />
) : (
<Volume2 class="w-3.5 h-3.5" aria-hidden="true" />
)}
</button>
)
}

View File

@@ -29,6 +29,7 @@ import type {
ToolScrollHelpers,
} from "./tool-call/types"
import {
buildToolSpeechText,
ensureMarkdownContent,
getRelativePath,
getToolIcon,
@@ -41,6 +42,8 @@ import {
} from "./tool-call/utils"
import { resolveTitleForTool } from "./tool-call/tool-title"
import { getLogger } from "../lib/logger"
import { useSpeech } from "../lib/hooks/use-speech"
import SpeechActionButton from "./speech-action-button"
const log = getLogger("session")
@@ -960,6 +963,21 @@ export default function ToolCall(props: ToolCallProps) {
return renderToolTitle()
})
const speechText = createMemo(() =>
buildToolSpeechText({
title: headerText(),
state: toolState(),
t,
}),
)
const speech = useSpeech({
id: () => `${props.instanceId}:${props.sessionId}:${props.messageId ?? "message"}:${toolCallIdentifier()}`,
text: speechText,
})
const canSpeakToolCall = () => speechText().trim().length > 0 && speech.canUseSpeech()
const handleCopyHeader = async (event: MouseEvent) => {
event.preventDefault()
event.stopPropagation()
@@ -1023,6 +1041,16 @@ export default function ToolCall(props: ToolCallProps) {
<Copy class="w-3.5 h-3.5" />
</button>
<Show when={canSpeakToolCall()}>
<SpeechActionButton
class="tool-call-header-copy"
onClick={() => void speech.toggle()}
title={speech.buttonTitle()}
isLoading={speech.isLoading()}
isPlaying={speech.isPlaying()}
/>
</Show>
<span class="tool-call-header-status" aria-hidden="true">
{statusIcon()}
</span>

View File

@@ -231,3 +231,37 @@ export function getDefaultToolAction(toolName: string) {
return tGlobal("toolCall.renderer.action.working")
}
}
export function buildToolSpeechText(options: {
title: string
state?: ToolState
t: (key: string, params?: Record<string, unknown>) => string
}): string {
const sections: string[] = []
if (options.title.trim()) {
sections.push(options.title.trim())
}
const { input, output } = readToolStatePayload(options.state)
const formattedInput = formatUnknown(input)
const formattedOutput = formatUnknown(output)
if (formattedInput?.text?.trim()) {
sections.push(`${options.t("toolCall.io.input")}:\n${formattedInput.text.trim()}`)
}
if (formattedOutput?.text?.trim()) {
sections.push(`${options.t("toolCall.io.output")}:\n${formattedOutput.text.trim()}`)
}
if (options.state?.status === "error" && options.state.error?.trim()) {
sections.push(`${options.t("toolCall.error.label")} ${options.state.error.trim()}`)
}
if (sections.length === 1 && options.state?.status === "pending") {
sections.push(options.t("toolCall.pending.waitingToRun"))
}
return sections.join("\n\n").trim()
}

View File

@@ -0,0 +1,203 @@
import { createEffect, createSignal, onCleanup, type Accessor } from "solid-js"
import { showAlertDialog } from "../../stores/alerts"
import { serverApi } from "../api-client"
import { useI18n } from "../i18n"
import { loadSpeechCapabilities, speechCapabilities } from "../../stores/speech"
type SpeechPlaybackState = "idle" | "loading" | "playing"
interface UseSpeechOptions {
id: Accessor<string>
text: Accessor<string>
}
interface ActivePlaybackEntry {
ownerId: string
stop: () => void
}
const stateResetters = new Map<string, () => void>()
let activePlayback: ActivePlaybackEntry | null = null
function resetOwnerState(ownerId: string) {
stateResetters.get(ownerId)?.()
}
function stopActivePlayback(ownerId?: string) {
if (!activePlayback) return
if (ownerId && activePlayback.ownerId !== ownerId) return
const current = activePlayback
activePlayback = null
current.stop()
}
function setActivePlayback(ownerId: string, stop: () => void) {
if (activePlayback?.ownerId === ownerId) {
activePlayback = { ownerId, stop }
return
}
stopActivePlayback()
activePlayback = { ownerId, stop }
}
export function useSpeech(options: UseSpeechOptions) {
const { t } = useI18n()
const [state, setState] = createSignal<SpeechPlaybackState>("idle")
let requestVersion = 0
let audio: HTMLAudioElement | null = null
let objectUrl: string | null = null
createEffect(() => {
void loadSpeechCapabilities()
})
const cleanupAudio = () => {
if (audio) {
audio.pause()
audio.currentTime = 0
audio.src = ""
audio.load()
audio = null
}
if (objectUrl) {
URL.revokeObjectURL(objectUrl)
objectUrl = null
}
}
const resetState = () => {
requestVersion += 1
cleanupAudio()
setState("idle")
}
stateResetters.set(options.id(), resetState)
onCleanup(() => {
stateResetters.delete(options.id())
stopActivePlayback(options.id())
resetState()
})
const isSupported = () => typeof window !== "undefined" && typeof window.Audio !== "undefined"
const canUseSpeech = () => {
const capabilities = speechCapabilities()
return Boolean(isSupported() && capabilities?.available && capabilities?.configured && capabilities?.supportsTts)
}
const stop = () => {
if (activePlayback?.ownerId === options.id()) {
activePlayback = null
}
resetState()
}
const start = async () => {
const ownerId = options.id()
const text = options.text().trim()
if (!text || state() === "loading" || state() === "playing") return
if (!isSupported()) {
showAlertDialog(t("messageItem.actions.speak.error.unsupported"), {
title: t("messageItem.actions.speak.error.title"),
variant: "error",
})
return
}
const capabilities = (await loadSpeechCapabilities()) ?? speechCapabilities()
if (!capabilities?.available || !capabilities?.configured || !capabilities?.supportsTts) {
showAlertDialog(t("messageItem.actions.speak.error.unavailable"), {
title: t("messageItem.actions.speak.error.title"),
variant: "error",
})
return
}
requestVersion += 1
const currentRequest = requestVersion
stopActivePlayback()
cleanupAudio()
setState("loading")
try {
const response = await serverApi.synthesizeSpeech({
text,
format: "mp3",
})
if (currentRequest !== requestVersion) {
return
}
const nextUrl = createObjectUrlFromBase64(response.audioBase64, response.mimeType)
const nextAudio = new Audio(nextUrl)
objectUrl = nextUrl
audio = nextAudio
const finish = () => {
if (activePlayback?.ownerId === ownerId) {
activePlayback = null
}
resetOwnerState(ownerId)
}
nextAudio.addEventListener("ended", finish, { once: true })
nextAudio.addEventListener("error", finish, { once: true })
setActivePlayback(ownerId, () => {
cleanupAudio()
setState("idle")
})
setState("playing")
await nextAudio.play()
} catch (error) {
if (currentRequest !== requestVersion) {
return
}
resetState()
showAlertDialog(t("messageItem.actions.speak.error.generate"), {
title: t("messageItem.actions.speak.error.title"),
detail: error instanceof Error ? error.message : String(error),
variant: "error",
})
}
}
const toggle = async () => {
if (state() === "idle") {
await start()
return
}
stop()
}
return {
state,
canUseSpeech,
isLoading: () => state() === "loading",
isPlaying: () => state() === "playing",
toggle,
stop,
buttonTitle: () => {
if (state() === "loading") return t("messageItem.actions.generatingSpeech")
if (state() === "playing") return t("messageItem.actions.stopSpeech")
return t("messageItem.actions.speak")
},
}
}
function createObjectUrlFromBase64(audioBase64: string, mimeType: string): string {
const binary = atob(audioBase64)
const bytes = new Uint8Array(binary.length)
for (let index = 0; index < binary.length; index += 1) {
bytes[index] = binary.charCodeAt(index)
}
return URL.createObjectURL(new Blob([bytes], { type: mimeType || "audio/mpeg" }))
}

View File

@@ -75,6 +75,13 @@ export const messagingMessages = {
"messageItem.actions.copy": "Copy",
"messageItem.actions.copyTitle": "Copy message",
"messageItem.actions.copied": "Copied!",
"messageItem.actions.speak": "Speak message",
"messageItem.actions.generatingSpeech": "Generating speech",
"messageItem.actions.stopSpeech": "Stop playback",
"messageItem.actions.speak.error.title": "Speech playback failed",
"messageItem.actions.speak.error.unsupported": "Speech playback is not supported in this browser.",
"messageItem.actions.speak.error.unavailable": "Speech playback is unavailable until speech settings are configured.",
"messageItem.actions.speak.error.generate": "Unable to generate speech for this message.",
"messageItem.actions.deleteMessage": "Delete message (doesn't undo changes)",
"messageItem.actions.deleteMessagesUpTo": "Delete messages up to here (doesn't undo changes)",
"messageItem.actions.deletingMessage": "Deleting...",

View File

@@ -77,6 +77,13 @@ export const messagingMessages = {
"messageItem.actions.copy": "Copiar",
"messageItem.actions.copyTitle": "Copiar mensaje",
"messageItem.actions.copied": "¡Copiado!",
"messageItem.actions.speak": "Reproducir mensaje",
"messageItem.actions.generatingSpeech": "Generando audio",
"messageItem.actions.stopSpeech": "Detener reproduccion",
"messageItem.actions.speak.error.title": "La reproduccion de voz fallo",
"messageItem.actions.speak.error.unsupported": "La reproduccion de voz no es compatible con este navegador.",
"messageItem.actions.speak.error.unavailable": "La reproduccion de voz no estara disponible hasta que la configuracion de voz este lista.",
"messageItem.actions.speak.error.generate": "No se pudo generar audio para este mensaje.",
"messageItem.actions.deleteMessage": "Eliminar mensaje (no deshace cambios)",
"messageItem.actions.deleteMessagesUpTo": "Eliminar mensajes hasta aqui (no deshace cambios)",
"messageItem.actions.deletingMessage": "Eliminando...",

View File

@@ -77,6 +77,13 @@ export const messagingMessages = {
"messageItem.actions.copy": "Copier",
"messageItem.actions.copyTitle": "Copier le message",
"messageItem.actions.copied": "Copié !",
"messageItem.actions.speak": "Lire le message",
"messageItem.actions.generatingSpeech": "Generation de l'audio",
"messageItem.actions.stopSpeech": "Arreter la lecture",
"messageItem.actions.speak.error.title": "La lecture vocale a echoue",
"messageItem.actions.speak.error.unsupported": "La lecture vocale n'est pas prise en charge dans ce navigateur.",
"messageItem.actions.speak.error.unavailable": "La lecture vocale n'est pas disponible tant que les parametres vocaux ne sont pas configures.",
"messageItem.actions.speak.error.generate": "Impossible de generer l'audio pour ce message.",
"messageItem.actions.deleteMessage": "Supprimer le message (sans annuler les changements)",
"messageItem.actions.deleteMessagesUpTo": "Supprimer les messages jusqu'ici (sans annuler les changements)",
"messageItem.actions.deletingMessage": "Suppression...",

View File

@@ -75,6 +75,13 @@ export const messagingMessages = {
"messageItem.actions.copy": "העתק",
"messageItem.actions.copyTitle": "העתק הודעה",
"messageItem.actions.copied": "הועתק!",
"messageItem.actions.speak": "השמע הודעה",
"messageItem.actions.generatingSpeech": "יוצר אודיו",
"messageItem.actions.stopSpeech": "עצור ניגון",
"messageItem.actions.speak.error.title": "ניגון הקול נכשל",
"messageItem.actions.speak.error.unsupported": "ניגון קול אינו נתמך בדפדפן הזה.",
"messageItem.actions.speak.error.unavailable": "ניגון קול לא זמין עד שהגדרות הקול יוגדרו.",
"messageItem.actions.speak.error.generate": "לא ניתן היה ליצור אודיו עבור ההודעה הזו.",
"messageItem.actions.deleteMessage": "מחק הודעה (לא מבטל שינויים)",
"messageItem.actions.deleteMessagesUpTo": "מחק הודעות עד כאן (לא מבטל שינויים)",
"messageItem.actions.deletingMessage": "מוחק...",

View File

@@ -77,6 +77,13 @@ export const messagingMessages = {
"messageItem.actions.copy": "コピー",
"messageItem.actions.copyTitle": "メッセージをコピー",
"messageItem.actions.copied": "コピーしました!",
"messageItem.actions.speak": "メッセージを読み上げ",
"messageItem.actions.generatingSpeech": "音声を生成中",
"messageItem.actions.stopSpeech": "再生を停止",
"messageItem.actions.speak.error.title": "音声再生に失敗しました",
"messageItem.actions.speak.error.unsupported": "このブラウザでは音声再生に対応していません。",
"messageItem.actions.speak.error.unavailable": "音声設定が完了するまで音声再生は利用できません。",
"messageItem.actions.speak.error.generate": "このメッセージの音声を生成できませんでした。",
"messageItem.actions.deleteMessage": "メッセージを削除(変更は元に戻さない)",
"messageItem.actions.deleteMessagesUpTo": "ここまでのメッセージを削除(変更は元に戻さない)",
"messageItem.actions.deletingMessage": "削除中...",

View File

@@ -77,6 +77,13 @@ export const messagingMessages = {
"messageItem.actions.copy": "Копировать",
"messageItem.actions.copyTitle": "Копировать сообщение",
"messageItem.actions.copied": "Скопировано!",
"messageItem.actions.speak": "Озвучить сообщение",
"messageItem.actions.generatingSpeech": "Генерация аудио",
"messageItem.actions.stopSpeech": "Остановить воспроизведение",
"messageItem.actions.speak.error.title": "Не удалось воспроизвести речь",
"messageItem.actions.speak.error.unsupported": "В этом браузере воспроизведение речи не поддерживается.",
"messageItem.actions.speak.error.unavailable": "Воспроизведение речи недоступно, пока не настроены голосовые параметры.",
"messageItem.actions.speak.error.generate": "Не удалось сгенерировать аудио для этого сообщения.",
"messageItem.actions.deleteMessage": "Удалить сообщение (без отката изменений)",
"messageItem.actions.deleteMessagesUpTo": "Удалить сообщения до этого места (без отката изменений)",
"messageItem.actions.deletingMessage": "Удаление...",

View File

@@ -77,6 +77,13 @@ export const messagingMessages = {
"messageItem.actions.copy": "复制",
"messageItem.actions.copyTitle": "复制消息",
"messageItem.actions.copied": "已复制!",
"messageItem.actions.speak": "朗读消息",
"messageItem.actions.generatingSpeech": "正在生成语音",
"messageItem.actions.stopSpeech": "停止播放",
"messageItem.actions.speak.error.title": "语音播放失败",
"messageItem.actions.speak.error.unsupported": "此浏览器不支持语音播放。",
"messageItem.actions.speak.error.unavailable": "语音设置完成前,语音播放不可用。",
"messageItem.actions.speak.error.generate": "无法为这条消息生成语音。",
"messageItem.actions.deleteMessage": "删除消息(不会撤销更改)",
"messageItem.actions.deleteMessagesUpTo": "删除到此处的消息(不会撤销更改)",
"messageItem.actions.deletingMessage": "正在删除...",