feat(speech): add configurable TTS playback modes

This commit is contained in:
Shantur Rathore
2026-03-26 20:46:49 +00:00
parent 740f37db86
commit d13ecba322
15 changed files with 561 additions and 55 deletions

View File

@@ -1,9 +1,10 @@
import { Show, createEffect, createMemo, createSignal, type Component } from "solid-js"
import { Mic, Volume2 } from "lucide-solid"
import { For, Show, createEffect, createMemo, createSignal, type Component } from "solid-js"
import { Loader2, Mic, Square, Volume2 } from "lucide-solid"
import { useConfig, type SpeechSettings } from "../../stores/preferences"
import { useI18n } from "../../lib/i18n"
import { loadSpeechCapabilities, speechCapabilities, speechCapabilitiesError, speechCapabilitiesLoading } from "../../stores/speech"
import { getLogger } from "../../lib/logger"
import { useSpeech } from "../../lib/hooks/use-speech"
const log = getLogger("actions")
@@ -13,6 +14,8 @@ type DraftFields = {
sttModel: string
ttsModel: string
ttsVoice: string
playbackMode: SpeechSettings["playbackMode"]
ttsFormat: SpeechSettings["ttsFormat"]
}
function createDraftFields(speech: SpeechSettings): DraftFields {
@@ -22,11 +25,21 @@ function createDraftFields(speech: SpeechSettings): DraftFields {
sttModel: speech.sttModel,
ttsModel: speech.ttsModel,
ttsVoice: speech.ttsVoice,
playbackMode: speech.playbackMode,
ttsFormat: speech.ttsFormat,
}
}
function isDraftEqual(a: DraftFields, b: DraftFields): boolean {
return a.apiKey === b.apiKey && a.baseUrl === b.baseUrl && a.sttModel === b.sttModel && a.ttsModel === b.ttsModel && a.ttsVoice === b.ttsVoice
return (
a.apiKey === b.apiKey &&
a.baseUrl === b.baseUrl &&
a.sttModel === b.sttModel &&
a.ttsModel === b.ttsModel &&
a.ttsVoice === b.ttsVoice &&
a.playbackMode === b.playbackMode &&
a.ttsFormat === b.ttsFormat
)
}
export const SpeechSettingsCard: Component = () => {
@@ -39,6 +52,15 @@ export const SpeechSettingsCard: Component = () => {
const [apiKeyTouched, setApiKeyTouched] = createSignal(false)
const [clearStoredApiKey, setClearStoredApiKey] = createSignal(false)
const testSpeech = useSpeech({
id: () => "settings-speech-test",
text: () => t("settings.speech.testPlayback.sample"),
settingsOverride: () => ({
playbackMode: drafts().playbackMode,
ttsFormat: drafts().ttsFormat,
}),
})
createEffect(() => {
const speech = serverSettings().speech
const nextDrafts = createDraftFields(speech)
@@ -84,7 +106,9 @@ export const SpeechSettingsCard: Component = () => {
(current.baseUrl || "") !== (speech.baseUrl || "") ||
current.sttModel !== speech.sttModel ||
current.ttsModel !== speech.ttsModel ||
current.ttsVoice !== speech.ttsVoice
current.ttsVoice !== speech.ttsVoice ||
current.playbackMode !== speech.playbackMode ||
current.ttsFormat !== speech.ttsFormat
)
})
@@ -108,6 +132,8 @@ export const SpeechSettingsCard: Component = () => {
sttModel: current.sttModel.trim() || undefined,
ttsModel: current.ttsModel.trim() || undefined,
ttsVoice: current.ttsVoice.trim() || undefined,
playbackMode: current.playbackMode,
ttsFormat: current.ttsFormat,
})
await loadSpeechCapabilities(true)
setDrafts({
@@ -116,6 +142,8 @@ export const SpeechSettingsCard: Component = () => {
sttModel: current.sttModel.trim() || serverSettings().speech.sttModel,
ttsModel: current.ttsModel.trim() || serverSettings().speech.ttsModel,
ttsVoice: current.ttsVoice.trim() || serverSettings().speech.ttsVoice,
playbackMode: current.playbackMode,
ttsFormat: current.ttsFormat,
})
setApiKeyTouched(false)
setClearStoredApiKey(false)
@@ -151,6 +179,32 @@ export const SpeechSettingsCard: Component = () => {
<span class="settings-inline-note">{t("settings.speech.provider.openaiCompatible")}</span>
<span class="settings-inline-note">{capabilityLabel()}</span>
<span class="settings-inline-note">{saveStatusLabel()}</span>
<button
type="button"
class="selector-button selector-button-secondary w-auto whitespace-nowrap inline-flex items-center gap-2"
onClick={() => void testSpeech.toggle()}
disabled={isSaving()}
title={testSpeech.buttonTitle()}
aria-label={testSpeech.buttonTitle()}
>
<Show
when={testSpeech.isLoading()}
fallback={
<Show when={testSpeech.isPlaying()} fallback={<Volume2 class="w-3.5 h-3.5" aria-hidden="true" />}>
<Square class="w-3.5 h-3.5" aria-hidden="true" />
</Show>
}
>
<Loader2 class="w-3.5 h-3.5 animate-spin" aria-hidden="true" />
</Show>
<span>
{testSpeech.isPlaying()
? t("settings.speech.testPlayback.stop")
: testSpeech.isLoading()
? t("settings.speech.testPlayback.generating")
: t("settings.speech.testPlayback.action")}
</span>
</button>
<button
type="button"
class="selector-button selector-button-primary w-auto whitespace-nowrap"
@@ -213,8 +267,31 @@ export const SpeechSettingsCard: Component = () => {
onInput={(value) => updateDraft("ttsVoice", value)}
icon={<Mic class="w-3.5 h-3.5 icon-muted flex-shrink-0" />}
/>
<SelectField
label={t("settings.speech.playbackMode.title")}
caption={t("settings.speech.playbackMode.subtitle")}
value={drafts().playbackMode}
onInput={(value) => updateDraft("playbackMode", value as DraftFields["playbackMode"])}
options={[
{ value: "streaming", label: t("settings.speech.playbackMode.streaming") },
{ value: "buffered", label: t("settings.speech.playbackMode.buffered") },
]}
/>
<SelectField
label={t("settings.speech.ttsFormat.title")}
caption={t("settings.speech.ttsFormat.subtitle")}
value={drafts().ttsFormat}
onInput={(value) => updateDraft("ttsFormat", value as DraftFields["ttsFormat"])}
options={[
{ value: "mp3", label: "MP3" },
{ value: "wav", label: "WAV" },
{ value: "opus", label: "Opus" },
{ value: "aac", label: "AAC" },
]}
/>
<div class="settings-inline-note">{t("settings.speech.help")}</div>
<div class="settings-inline-note">{t("settings.speech.testPlayback.note")}</div>
</div>
</div>
)
@@ -249,4 +326,26 @@ const Field: Component<{
)
}
const SelectField: Component<{
label: string
caption: string
value: string
onInput: (value: string) => void
options: Array<{ value: string; label: string }>
}> = (props) => {
return (
<div class="settings-toggle-row settings-toggle-row-compact">
<div>
<div class="settings-toggle-title">{props.label}</div>
<div class="settings-toggle-caption">{props.caption}</div>
</div>
<div class="min-w-[18rem] max-w-[24rem] w-full">
<select value={props.value} onInput={(event) => props.onInput(event.currentTarget.value)} class="selector-input w-full">
<For each={props.options}>{(option) => <option value={option.value}>{option.label}</option>}</For>
</select>
</div>
</div>
)
}
export default SpeechSettingsCard