feat(speech): add realtime prompt dictation support

Add server-backed realtime transcription for prompt voice input and expose speech settings to choose realtime mode and models.
This commit is contained in:
Shantur Rathore
2026-03-19 11:32:45 +00:00
parent cc2f6976f6
commit f9b5e2b529
29 changed files with 1572 additions and 263 deletions

View File

@@ -34,6 +34,8 @@ export interface SpeechSettings {
provider: SpeechProviderPreference
apiKey?: string
baseUrl?: string
useRealtime: boolean
realtimeModel: string
sttModel: string
ttsModel: string
ttsVoice: string
@@ -136,6 +138,8 @@ const defaultUiSettings: UiSettings = {
const defaultSpeechSettings: SpeechSettings = {
provider: "openai-compatible",
useRealtime: true,
realtimeModel: "gpt-realtime",
sttModel: "gpt-4o-mini-transcribe",
ttsModel: "gpt-4o-mini-tts",
ttsVoice: "alloy",
@@ -184,6 +188,11 @@ function normalizeSpeechSettings(input?: Partial<SpeechSettings> | null): Speech
provider: sanitized.provider === "openai-compatible" ? sanitized.provider : defaultSpeechSettings.provider,
apiKey: typeof sanitized.apiKey === "string" && sanitized.apiKey.trim() ? sanitized.apiKey.trim() : undefined,
baseUrl: typeof sanitized.baseUrl === "string" && sanitized.baseUrl.trim() ? sanitized.baseUrl.trim() : undefined,
useRealtime: sanitized.useRealtime ?? defaultSpeechSettings.useRealtime,
realtimeModel:
typeof sanitized.realtimeModel === "string" && sanitized.realtimeModel.trim()
? sanitized.realtimeModel.trim()
: defaultSpeechSettings.realtimeModel,
sttModel:
typeof sanitized.sttModel === "string" && sanitized.sttModel.trim()
? sanitized.sttModel.trim()