feat(speech): add configurable TTS playback modes

2026-03-26 20:46:49 +00:00
parent 740f37db86
commit d13ecba322
15 changed files with 561 additions and 55 deletions
--- a/packages/ui/src/components/settings/speech-settings-card.tsx
+++ b/packages/ui/src/components/settings/speech-settings-card.tsx
@@ -1,9 +1,10 @@
-import { Show, createEffect, createMemo, createSignal, type Component } from "solid-js"
-import { Mic, Volume2 } from "lucide-solid"
+import { For, Show, createEffect, createMemo, createSignal, type Component } from "solid-js"
+import { Loader2, Mic, Square, Volume2 } from "lucide-solid"
 import { useConfig, type SpeechSettings } from "../../stores/preferences"
 import { useI18n } from "../../lib/i18n"
 import { loadSpeechCapabilities, speechCapabilities, speechCapabilitiesError, speechCapabilitiesLoading } from "../../stores/speech"
 import { getLogger } from "../../lib/logger"
+import { useSpeech } from "../../lib/hooks/use-speech"

 const log = getLogger("actions")

@@ -13,6 +14,8 @@ type DraftFields = {
  sttModel: string
  ttsModel: string
  ttsVoice: string
+  playbackMode: SpeechSettings["playbackMode"]
+  ttsFormat: SpeechSettings["ttsFormat"]
 }

 function createDraftFields(speech: SpeechSettings): DraftFields {
@@ -22,11 +25,21 @@ function createDraftFields(speech: SpeechSettings): DraftFields {
    sttModel: speech.sttModel,
    ttsModel: speech.ttsModel,
    ttsVoice: speech.ttsVoice,
+    playbackMode: speech.playbackMode,
+    ttsFormat: speech.ttsFormat,
  }
 }

 function isDraftEqual(a: DraftFields, b: DraftFields): boolean {
-  return a.apiKey === b.apiKey && a.baseUrl === b.baseUrl && a.sttModel === b.sttModel && a.ttsModel === b.ttsModel && a.ttsVoice === b.ttsVoice
+  return (
+    a.apiKey === b.apiKey &&
+    a.baseUrl === b.baseUrl &&
+    a.sttModel === b.sttModel &&
+    a.ttsModel === b.ttsModel &&
+    a.ttsVoice === b.ttsVoice &&
+    a.playbackMode === b.playbackMode &&
+    a.ttsFormat === b.ttsFormat
+  )
 }

 export const SpeechSettingsCard: Component = () => {
@@ -39,6 +52,15 @@ export const SpeechSettingsCard: Component = () => {
  const [apiKeyTouched, setApiKeyTouched] = createSignal(false)
  const [clearStoredApiKey, setClearStoredApiKey] = createSignal(false)

+  const testSpeech = useSpeech({
+    id: () => "settings-speech-test",
+    text: () => t("settings.speech.testPlayback.sample"),
+    settingsOverride: () => ({
+      playbackMode: drafts().playbackMode,
+      ttsFormat: drafts().ttsFormat,
+    }),
+  })
+
  createEffect(() => {
    const speech = serverSettings().speech
    const nextDrafts = createDraftFields(speech)
@@ -84,7 +106,9 @@ export const SpeechSettingsCard: Component = () => {
      (current.baseUrl || "") !== (speech.baseUrl || "") ||
      current.sttModel !== speech.sttModel ||
      current.ttsModel !== speech.ttsModel ||
-      current.ttsVoice !== speech.ttsVoice
+      current.ttsVoice !== speech.ttsVoice ||
+      current.playbackMode !== speech.playbackMode ||
+      current.ttsFormat !== speech.ttsFormat
    )
  })

@@ -108,6 +132,8 @@ export const SpeechSettingsCard: Component = () => {
        sttModel: current.sttModel.trim() || undefined,
        ttsModel: current.ttsModel.trim() || undefined,
        ttsVoice: current.ttsVoice.trim() || undefined,
+        playbackMode: current.playbackMode,
+        ttsFormat: current.ttsFormat,
      })
      await loadSpeechCapabilities(true)
      setDrafts({
@@ -116,6 +142,8 @@ export const SpeechSettingsCard: Component = () => {
        sttModel: current.sttModel.trim() || serverSettings().speech.sttModel,
        ttsModel: current.ttsModel.trim() || serverSettings().speech.ttsModel,
        ttsVoice: current.ttsVoice.trim() || serverSettings().speech.ttsVoice,
+        playbackMode: current.playbackMode,
+        ttsFormat: current.ttsFormat,
      })
      setApiKeyTouched(false)
      setClearStoredApiKey(false)
@@ -151,6 +179,32 @@ export const SpeechSettingsCard: Component = () => {
            <span class="settings-inline-note">{t("settings.speech.provider.openaiCompatible")}</span>
            <span class="settings-inline-note">{capabilityLabel()}</span>
            <span class="settings-inline-note">{saveStatusLabel()}</span>
+            <button
+              type="button"
+              class="selector-button selector-button-secondary w-auto whitespace-nowrap inline-flex items-center gap-2"
+              onClick={() => void testSpeech.toggle()}
+              disabled={isSaving()}
+              title={testSpeech.buttonTitle()}
+              aria-label={testSpeech.buttonTitle()}
+            >
+              <Show
+                when={testSpeech.isLoading()}
+                fallback={
+                  <Show when={testSpeech.isPlaying()} fallback={<Volume2 class="w-3.5 h-3.5" aria-hidden="true" />}>
+                    <Square class="w-3.5 h-3.5" aria-hidden="true" />
+                  </Show>
+                }
+              >
+                <Loader2 class="w-3.5 h-3.5 animate-spin" aria-hidden="true" />
+              </Show>
+              <span>
+                {testSpeech.isPlaying()
+                  ? t("settings.speech.testPlayback.stop")
+                  : testSpeech.isLoading()
+                    ? t("settings.speech.testPlayback.generating")
+                    : t("settings.speech.testPlayback.action")}
+              </span>
+            </button>
            <button
              type="button"
              class="selector-button selector-button-primary w-auto whitespace-nowrap"
@@ -213,8 +267,31 @@ export const SpeechSettingsCard: Component = () => {
          onInput={(value) => updateDraft("ttsVoice", value)}
          icon={<Mic class="w-3.5 h-3.5 icon-muted flex-shrink-0" />}
        />
+        <SelectField
+          label={t("settings.speech.playbackMode.title")}
+          caption={t("settings.speech.playbackMode.subtitle")}
+          value={drafts().playbackMode}
+          onInput={(value) => updateDraft("playbackMode", value as DraftFields["playbackMode"])}
+          options={[
+            { value: "streaming", label: t("settings.speech.playbackMode.streaming") },
+            { value: "buffered", label: t("settings.speech.playbackMode.buffered") },
+          ]}
+        />
+        <SelectField
+          label={t("settings.speech.ttsFormat.title")}
+          caption={t("settings.speech.ttsFormat.subtitle")}
+          value={drafts().ttsFormat}
+          onInput={(value) => updateDraft("ttsFormat", value as DraftFields["ttsFormat"])}
+          options={[
+            { value: "mp3", label: "MP3" },
+            { value: "wav", label: "WAV" },
+            { value: "opus", label: "Opus" },
+            { value: "aac", label: "AAC" },
+          ]}
+        />

        <div class="settings-inline-note">{t("settings.speech.help")}</div>
+        <div class="settings-inline-note">{t("settings.speech.testPlayback.note")}</div>
      </div>
    </div>
  )
@@ -249,4 +326,26 @@ const Field: Component<{
  )
 }

+const SelectField: Component<{
+  label: string
+  caption: string
+  value: string
+  onInput: (value: string) => void
+  options: Array<{ value: string; label: string }>
+}> = (props) => {
+  return (
+    <div class="settings-toggle-row settings-toggle-row-compact">
+      <div>
+        <div class="settings-toggle-title">{props.label}</div>
+        <div class="settings-toggle-caption">{props.caption}</div>
+      </div>
+      <div class="min-w-[18rem] max-w-[24rem] w-full">
+        <select value={props.value} onInput={(event) => props.onInput(event.currentTarget.value)} class="selector-input w-full">
+          <For each={props.options}>{(option) => <option value={option.value}>{option.label}</option>}</For>
+        </select>
+      </div>
+    </div>
+  )
+}
+
 export default SpeechSettingsCard