feat(speech): add prompt voice input groundwork

This commit is contained in:
Shantur Rathore
2026-03-12 22:04:57 +00:00
parent d9068ac8c6
commit 0ed19aeefb
34 changed files with 1284 additions and 9 deletions

View File

@@ -7,6 +7,7 @@ import {
updateInstanceConfig as updateInstanceData,
} from "./instance-config"
import { getLogger } from "../lib/logger"
import { loadSpeechCapabilities, resetSpeechCapabilities } from "./speech"
const log = getLogger("actions")
@@ -27,6 +28,16 @@ export type DiffViewMode = "split" | "unified"
export type ExpansionPreference = "expanded" | "collapsed"
export type ToolInputsVisibilityPreference = "hidden" | "collapsed" | "expanded"
export type ListeningMode = "local" | "all"
export type SpeechProviderPreference = "openai-compatible"
export interface SpeechSettings {
provider: SpeechProviderPreference
apiKey?: string
baseUrl?: string
sttModel: string
ttsModel: string
ttsVoice: string
}
export interface UiSettings {
showThinkingBlocks: boolean
@@ -34,6 +45,7 @@ export interface UiSettings {
thinkingBlocksExpansion: ExpansionPreference
showTimelineTools: boolean
promptSubmitOnEnter: boolean
showPromptVoiceInput: boolean
locale?: string
diffViewMode: DiffViewMode
toolOutputExpansion: ExpansionPreference
@@ -75,6 +87,7 @@ interface ServerConfigBucket {
listeningMode?: ListeningMode
environmentVariables?: Record<string, string>
opencodeBinary?: string
speech?: Partial<SpeechSettings>
}
interface UiStateBucket {
@@ -107,6 +120,7 @@ const defaultUiSettings: UiSettings = {
thinkingBlocksExpansion: "expanded",
showTimelineTools: true,
promptSubmitOnEnter: false,
showPromptVoiceInput: true,
diffViewMode: "split",
toolOutputExpansion: "expanded",
diagnosticsExpansion: "expanded",
@@ -120,6 +134,13 @@ const defaultUiSettings: UiSettings = {
notifyOnIdle: true,
}
const defaultSpeechSettings: SpeechSettings = {
provider: "openai-compatible",
sttModel: "gpt-4o-mini-transcribe",
ttsModel: "gpt-4o-mini-tts",
ttsVoice: "alloy",
}
function normalizeUiSettings(input?: Partial<UiSettings> | null): UiSettings {
const sanitized = input ?? {}
return {
@@ -129,6 +150,7 @@ function normalizeUiSettings(input?: Partial<UiSettings> | null): UiSettings {
thinkingBlocksExpansion: sanitized.thinkingBlocksExpansion ?? defaultUiSettings.thinkingBlocksExpansion,
showTimelineTools: sanitized.showTimelineTools ?? defaultUiSettings.showTimelineTools,
promptSubmitOnEnter: sanitized.promptSubmitOnEnter ?? defaultUiSettings.promptSubmitOnEnter,
showPromptVoiceInput: sanitized.showPromptVoiceInput ?? defaultUiSettings.showPromptVoiceInput,
locale: sanitized.locale ?? defaultUiSettings.locale,
diffViewMode: sanitized.diffViewMode ?? defaultUiSettings.diffViewMode,
toolOutputExpansion: sanitized.toolOutputExpansion ?? defaultUiSettings.toolOutputExpansion,
@@ -156,6 +178,27 @@ function normalizeRecord(value: unknown): Record<string, string> {
return out
}
function normalizeSpeechSettings(input?: Partial<SpeechSettings> | null): SpeechSettings {
const sanitized = input ?? {}
return {
provider: sanitized.provider === "openai-compatible" ? sanitized.provider : defaultSpeechSettings.provider,
apiKey: typeof sanitized.apiKey === "string" && sanitized.apiKey.trim() ? sanitized.apiKey.trim() : undefined,
baseUrl: typeof sanitized.baseUrl === "string" && sanitized.baseUrl.trim() ? sanitized.baseUrl.trim() : undefined,
sttModel:
typeof sanitized.sttModel === "string" && sanitized.sttModel.trim()
? sanitized.sttModel.trim()
: defaultSpeechSettings.sttModel,
ttsModel:
typeof sanitized.ttsModel === "string" && sanitized.ttsModel.trim()
? sanitized.ttsModel.trim()
: defaultSpeechSettings.ttsModel,
ttsVoice:
typeof sanitized.ttsVoice === "string" && sanitized.ttsVoice.trim()
? sanitized.ttsVoice.trim()
: defaultSpeechSettings.ttsVoice,
}
}
function cloneArray<T>(value: unknown, mapper: (item: any) => T | null): T[] {
if (!Array.isArray(value)) return []
const out: T[] = []
@@ -206,12 +249,15 @@ function normalizeUiState(input?: UiStateBucket | null): NormalizedUiState {
}
}
function normalizeServerConfig(input?: ServerConfigBucket | null): Required<Pick<ServerConfigBucket, "listeningMode" | "environmentVariables" | "opencodeBinary">> {
function normalizeServerConfig(
input?: ServerConfigBucket | null,
): Required<Pick<ServerConfigBucket, "listeningMode" | "environmentVariables" | "opencodeBinary">> & { speech: SpeechSettings } {
const source = input ?? {}
const listeningMode = source.listeningMode === "all" ? "all" : "local"
const opencodeBinary = typeof source.opencodeBinary === "string" && source.opencodeBinary.trim() ? source.opencodeBinary : "opencode"
const environmentVariables = normalizeRecord(source.environmentVariables)
return { listeningMode, opencodeBinary, environmentVariables }
const speech = normalizeSpeechSettings(source.speech)
return { listeningMode, opencodeBinary, environmentVariables, speech }
}
function getModelKey(model: { providerId: string; modelId: string }): string {
@@ -342,6 +388,16 @@ function updateLastUsedBinary(path: string): void {
void patchStateOwner("ui", { opencodeBinaries: nextList }).catch((error) => log.error("Failed to update binary list", error))
}
async function updateSpeechSettings(updates: Partial<SpeechSettings>): Promise<void> {
const next = normalizeSpeechSettings({ ...serverSettings().speech, ...updates })
try {
await patchConfigOwner("server", { speech: next })
} catch (error) {
log.error("Failed to update speech settings", error)
throw error
}
}
function addOpenCodeBinary(path: string, version?: string): void {
const nextList = buildBinaryList(path, version, opencodeBinaries())
void patchStateOwner("ui", { opencodeBinaries: nextList }).catch((error) => log.error("Failed to add binary", error))
@@ -476,6 +532,10 @@ function togglePromptSubmitOnEnter(): void {
updateUiSettings({ promptSubmitOnEnter: !preferences().promptSubmitOnEnter })
}
function toggleShowPromptVoiceInput(): void {
updateUiSettings({ showPromptVoiceInput: !preferences().showPromptVoiceInput })
}
function toggleAutoCleanupBlankSessions(): void {
const nextValue = !preferences().autoCleanupBlankSessions
log.info("toggle auto cleanup", { value: nextValue })
@@ -521,6 +581,7 @@ interface ConfigContextValue {
addEnvironmentVariable: typeof addEnvironmentVariable
removeEnvironmentVariable: typeof removeEnvironmentVariable
updateLastUsedBinary: typeof updateLastUsedBinary
updateSpeechSettings: typeof updateSpeechSettings
// ui-owned state
recentFolders: typeof recentFolders
@@ -544,6 +605,7 @@ interface ConfigContextValue {
toggleUsageMetrics: typeof toggleUsageMetrics
toggleAutoCleanupBlankSessions: typeof toggleAutoCleanupBlankSessions
togglePromptSubmitOnEnter: typeof togglePromptSubmitOnEnter
toggleShowPromptVoiceInput: typeof toggleShowPromptVoiceInput
setDiffViewMode: typeof setDiffViewMode
setToolOutputExpansion: typeof setToolOutputExpansion
setDiagnosticsExpansion: typeof setDiagnosticsExpansion
@@ -569,6 +631,7 @@ const configContextValue: ConfigContextValue = {
addEnvironmentVariable,
removeEnvironmentVariable,
updateLastUsedBinary,
updateSpeechSettings,
recentFolders,
opencodeBinaries,
uiState,
@@ -588,6 +651,7 @@ const configContextValue: ConfigContextValue = {
toggleUsageMetrics,
toggleAutoCleanupBlankSessions,
togglePromptSubmitOnEnter,
toggleShowPromptVoiceInput,
setDiffViewMode,
setToolOutputExpansion,
setDiagnosticsExpansion,
@@ -610,6 +674,8 @@ export const ConfigProvider: ParentComponent = (props) => {
const unsubServer = storage.onConfigOwnerChanged("server", (bucket) => {
setServerConfigBucket(bucket as any)
setIsLoaded(true)
resetSpeechCapabilities()
void loadSpeechCapabilities(true)
})
const unsubStateUi = storage.onStateOwnerChanged("ui", (bucket) => {
setUiStateBucket(bucket as any)
@@ -648,6 +714,7 @@ export {
addEnvironmentVariable,
removeEnvironmentVariable,
updateLastUsedBinary,
updateSpeechSettings,
addRecentFolder,
removeRecentFolder,
addOpenCodeBinary,
@@ -664,6 +731,7 @@ export {
toggleUsageMetrics,
toggleAutoCleanupBlankSessions,
togglePromptSubmitOnEnter,
toggleShowPromptVoiceInput,
setDiffViewMode,
setToolOutputExpansion,
setDiagnosticsExpansion,

View File

@@ -1,6 +1,6 @@
import { createSignal } from "solid-js"
export type SettingsSectionId = "appearance" | "notifications" | "remote" | "opencode"
export type SettingsSectionId = "appearance" | "notifications" | "remote" | "speech" | "opencode"
const [settingsOpen, setSettingsOpen] = createSignal(false)
const [activeSettingsSection, setActiveSettingsSection] = createSignal<SettingsSectionId>("appearance")

View File

@@ -0,0 +1,46 @@
import { createSignal } from "solid-js"
import type { SpeechCapabilitiesResponse } from "../../../server/src/api-types"
import { serverApi } from "../lib/api-client"
import { getLogger } from "../lib/logger"
const log = getLogger("api")
const [speechCapabilities, setSpeechCapabilities] = createSignal<SpeechCapabilitiesResponse | null>(null)
const [speechCapabilitiesLoading, setSpeechCapabilitiesLoading] = createSignal(false)
const [speechCapabilitiesError, setSpeechCapabilitiesError] = createSignal<string | null>(null)
let speechCapabilitiesPromise: Promise<SpeechCapabilitiesResponse | null> | null = null
async function loadSpeechCapabilities(force = false): Promise<SpeechCapabilitiesResponse | null> {
if (!force && speechCapabilities()) return speechCapabilities()
if (speechCapabilitiesPromise) return speechCapabilitiesPromise
setSpeechCapabilitiesLoading(true)
setSpeechCapabilitiesError(null)
speechCapabilitiesPromise = serverApi
.fetchSpeechCapabilities()
.then((result) => {
setSpeechCapabilities(result)
setSpeechCapabilitiesError(null)
return result
})
.catch((error) => {
log.error("Failed to load speech capabilities", error)
setSpeechCapabilities(null)
setSpeechCapabilitiesError(error instanceof Error ? error.message : String(error))
return null
})
.finally(() => {
setSpeechCapabilitiesLoading(false)
speechCapabilitiesPromise = null
})
return speechCapabilitiesPromise
}
function resetSpeechCapabilities(): void {
setSpeechCapabilities(null)
setSpeechCapabilitiesError(null)
}
export { speechCapabilities, speechCapabilitiesLoading, speechCapabilitiesError, loadSpeechCapabilities, resetSpeechCapabilities }