feat(speech): add prompt voice input groundwork
This commit is contained in:
@@ -7,6 +7,7 @@ import {
|
||||
updateInstanceConfig as updateInstanceData,
|
||||
} from "./instance-config"
|
||||
import { getLogger } from "../lib/logger"
|
||||
import { loadSpeechCapabilities, resetSpeechCapabilities } from "./speech"
|
||||
|
||||
const log = getLogger("actions")
|
||||
|
||||
@@ -27,6 +28,16 @@ export type DiffViewMode = "split" | "unified"
|
||||
export type ExpansionPreference = "expanded" | "collapsed"
|
||||
export type ToolInputsVisibilityPreference = "hidden" | "collapsed" | "expanded"
|
||||
export type ListeningMode = "local" | "all"
|
||||
export type SpeechProviderPreference = "openai-compatible"
|
||||
|
||||
export interface SpeechSettings {
|
||||
provider: SpeechProviderPreference
|
||||
apiKey?: string
|
||||
baseUrl?: string
|
||||
sttModel: string
|
||||
ttsModel: string
|
||||
ttsVoice: string
|
||||
}
|
||||
|
||||
export interface UiSettings {
|
||||
showThinkingBlocks: boolean
|
||||
@@ -34,6 +45,7 @@ export interface UiSettings {
|
||||
thinkingBlocksExpansion: ExpansionPreference
|
||||
showTimelineTools: boolean
|
||||
promptSubmitOnEnter: boolean
|
||||
showPromptVoiceInput: boolean
|
||||
locale?: string
|
||||
diffViewMode: DiffViewMode
|
||||
toolOutputExpansion: ExpansionPreference
|
||||
@@ -75,6 +87,7 @@ interface ServerConfigBucket {
|
||||
listeningMode?: ListeningMode
|
||||
environmentVariables?: Record<string, string>
|
||||
opencodeBinary?: string
|
||||
speech?: Partial<SpeechSettings>
|
||||
}
|
||||
|
||||
interface UiStateBucket {
|
||||
@@ -107,6 +120,7 @@ const defaultUiSettings: UiSettings = {
|
||||
thinkingBlocksExpansion: "expanded",
|
||||
showTimelineTools: true,
|
||||
promptSubmitOnEnter: false,
|
||||
showPromptVoiceInput: true,
|
||||
diffViewMode: "split",
|
||||
toolOutputExpansion: "expanded",
|
||||
diagnosticsExpansion: "expanded",
|
||||
@@ -120,6 +134,13 @@ const defaultUiSettings: UiSettings = {
|
||||
notifyOnIdle: true,
|
||||
}
|
||||
|
||||
const defaultSpeechSettings: SpeechSettings = {
|
||||
provider: "openai-compatible",
|
||||
sttModel: "gpt-4o-mini-transcribe",
|
||||
ttsModel: "gpt-4o-mini-tts",
|
||||
ttsVoice: "alloy",
|
||||
}
|
||||
|
||||
function normalizeUiSettings(input?: Partial<UiSettings> | null): UiSettings {
|
||||
const sanitized = input ?? {}
|
||||
return {
|
||||
@@ -129,6 +150,7 @@ function normalizeUiSettings(input?: Partial<UiSettings> | null): UiSettings {
|
||||
thinkingBlocksExpansion: sanitized.thinkingBlocksExpansion ?? defaultUiSettings.thinkingBlocksExpansion,
|
||||
showTimelineTools: sanitized.showTimelineTools ?? defaultUiSettings.showTimelineTools,
|
||||
promptSubmitOnEnter: sanitized.promptSubmitOnEnter ?? defaultUiSettings.promptSubmitOnEnter,
|
||||
showPromptVoiceInput: sanitized.showPromptVoiceInput ?? defaultUiSettings.showPromptVoiceInput,
|
||||
locale: sanitized.locale ?? defaultUiSettings.locale,
|
||||
diffViewMode: sanitized.diffViewMode ?? defaultUiSettings.diffViewMode,
|
||||
toolOutputExpansion: sanitized.toolOutputExpansion ?? defaultUiSettings.toolOutputExpansion,
|
||||
@@ -156,6 +178,27 @@ function normalizeRecord(value: unknown): Record<string, string> {
|
||||
return out
|
||||
}
|
||||
|
||||
function normalizeSpeechSettings(input?: Partial<SpeechSettings> | null): SpeechSettings {
|
||||
const sanitized = input ?? {}
|
||||
return {
|
||||
provider: sanitized.provider === "openai-compatible" ? sanitized.provider : defaultSpeechSettings.provider,
|
||||
apiKey: typeof sanitized.apiKey === "string" && sanitized.apiKey.trim() ? sanitized.apiKey.trim() : undefined,
|
||||
baseUrl: typeof sanitized.baseUrl === "string" && sanitized.baseUrl.trim() ? sanitized.baseUrl.trim() : undefined,
|
||||
sttModel:
|
||||
typeof sanitized.sttModel === "string" && sanitized.sttModel.trim()
|
||||
? sanitized.sttModel.trim()
|
||||
: defaultSpeechSettings.sttModel,
|
||||
ttsModel:
|
||||
typeof sanitized.ttsModel === "string" && sanitized.ttsModel.trim()
|
||||
? sanitized.ttsModel.trim()
|
||||
: defaultSpeechSettings.ttsModel,
|
||||
ttsVoice:
|
||||
typeof sanitized.ttsVoice === "string" && sanitized.ttsVoice.trim()
|
||||
? sanitized.ttsVoice.trim()
|
||||
: defaultSpeechSettings.ttsVoice,
|
||||
}
|
||||
}
|
||||
|
||||
function cloneArray<T>(value: unknown, mapper: (item: any) => T | null): T[] {
|
||||
if (!Array.isArray(value)) return []
|
||||
const out: T[] = []
|
||||
@@ -206,12 +249,15 @@ function normalizeUiState(input?: UiStateBucket | null): NormalizedUiState {
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeServerConfig(input?: ServerConfigBucket | null): Required<Pick<ServerConfigBucket, "listeningMode" | "environmentVariables" | "opencodeBinary">> {
|
||||
function normalizeServerConfig(
|
||||
input?: ServerConfigBucket | null,
|
||||
): Required<Pick<ServerConfigBucket, "listeningMode" | "environmentVariables" | "opencodeBinary">> & { speech: SpeechSettings } {
|
||||
const source = input ?? {}
|
||||
const listeningMode = source.listeningMode === "all" ? "all" : "local"
|
||||
const opencodeBinary = typeof source.opencodeBinary === "string" && source.opencodeBinary.trim() ? source.opencodeBinary : "opencode"
|
||||
const environmentVariables = normalizeRecord(source.environmentVariables)
|
||||
return { listeningMode, opencodeBinary, environmentVariables }
|
||||
const speech = normalizeSpeechSettings(source.speech)
|
||||
return { listeningMode, opencodeBinary, environmentVariables, speech }
|
||||
}
|
||||
|
||||
function getModelKey(model: { providerId: string; modelId: string }): string {
|
||||
@@ -342,6 +388,16 @@ function updateLastUsedBinary(path: string): void {
|
||||
void patchStateOwner("ui", { opencodeBinaries: nextList }).catch((error) => log.error("Failed to update binary list", error))
|
||||
}
|
||||
|
||||
async function updateSpeechSettings(updates: Partial<SpeechSettings>): Promise<void> {
|
||||
const next = normalizeSpeechSettings({ ...serverSettings().speech, ...updates })
|
||||
try {
|
||||
await patchConfigOwner("server", { speech: next })
|
||||
} catch (error) {
|
||||
log.error("Failed to update speech settings", error)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
function addOpenCodeBinary(path: string, version?: string): void {
|
||||
const nextList = buildBinaryList(path, version, opencodeBinaries())
|
||||
void patchStateOwner("ui", { opencodeBinaries: nextList }).catch((error) => log.error("Failed to add binary", error))
|
||||
@@ -476,6 +532,10 @@ function togglePromptSubmitOnEnter(): void {
|
||||
updateUiSettings({ promptSubmitOnEnter: !preferences().promptSubmitOnEnter })
|
||||
}
|
||||
|
||||
function toggleShowPromptVoiceInput(): void {
|
||||
updateUiSettings({ showPromptVoiceInput: !preferences().showPromptVoiceInput })
|
||||
}
|
||||
|
||||
function toggleAutoCleanupBlankSessions(): void {
|
||||
const nextValue = !preferences().autoCleanupBlankSessions
|
||||
log.info("toggle auto cleanup", { value: nextValue })
|
||||
@@ -521,6 +581,7 @@ interface ConfigContextValue {
|
||||
addEnvironmentVariable: typeof addEnvironmentVariable
|
||||
removeEnvironmentVariable: typeof removeEnvironmentVariable
|
||||
updateLastUsedBinary: typeof updateLastUsedBinary
|
||||
updateSpeechSettings: typeof updateSpeechSettings
|
||||
|
||||
// ui-owned state
|
||||
recentFolders: typeof recentFolders
|
||||
@@ -544,6 +605,7 @@ interface ConfigContextValue {
|
||||
toggleUsageMetrics: typeof toggleUsageMetrics
|
||||
toggleAutoCleanupBlankSessions: typeof toggleAutoCleanupBlankSessions
|
||||
togglePromptSubmitOnEnter: typeof togglePromptSubmitOnEnter
|
||||
toggleShowPromptVoiceInput: typeof toggleShowPromptVoiceInput
|
||||
setDiffViewMode: typeof setDiffViewMode
|
||||
setToolOutputExpansion: typeof setToolOutputExpansion
|
||||
setDiagnosticsExpansion: typeof setDiagnosticsExpansion
|
||||
@@ -569,6 +631,7 @@ const configContextValue: ConfigContextValue = {
|
||||
addEnvironmentVariable,
|
||||
removeEnvironmentVariable,
|
||||
updateLastUsedBinary,
|
||||
updateSpeechSettings,
|
||||
recentFolders,
|
||||
opencodeBinaries,
|
||||
uiState,
|
||||
@@ -588,6 +651,7 @@ const configContextValue: ConfigContextValue = {
|
||||
toggleUsageMetrics,
|
||||
toggleAutoCleanupBlankSessions,
|
||||
togglePromptSubmitOnEnter,
|
||||
toggleShowPromptVoiceInput,
|
||||
setDiffViewMode,
|
||||
setToolOutputExpansion,
|
||||
setDiagnosticsExpansion,
|
||||
@@ -610,6 +674,8 @@ export const ConfigProvider: ParentComponent = (props) => {
|
||||
const unsubServer = storage.onConfigOwnerChanged("server", (bucket) => {
|
||||
setServerConfigBucket(bucket as any)
|
||||
setIsLoaded(true)
|
||||
resetSpeechCapabilities()
|
||||
void loadSpeechCapabilities(true)
|
||||
})
|
||||
const unsubStateUi = storage.onStateOwnerChanged("ui", (bucket) => {
|
||||
setUiStateBucket(bucket as any)
|
||||
@@ -648,6 +714,7 @@ export {
|
||||
addEnvironmentVariable,
|
||||
removeEnvironmentVariable,
|
||||
updateLastUsedBinary,
|
||||
updateSpeechSettings,
|
||||
addRecentFolder,
|
||||
removeRecentFolder,
|
||||
addOpenCodeBinary,
|
||||
@@ -664,6 +731,7 @@ export {
|
||||
toggleUsageMetrics,
|
||||
toggleAutoCleanupBlankSessions,
|
||||
togglePromptSubmitOnEnter,
|
||||
toggleShowPromptVoiceInput,
|
||||
setDiffViewMode,
|
||||
setToolOutputExpansion,
|
||||
setDiagnosticsExpansion,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { createSignal } from "solid-js"
|
||||
|
||||
export type SettingsSectionId = "appearance" | "notifications" | "remote" | "opencode"
|
||||
export type SettingsSectionId = "appearance" | "notifications" | "remote" | "speech" | "opencode"
|
||||
|
||||
const [settingsOpen, setSettingsOpen] = createSignal(false)
|
||||
const [activeSettingsSection, setActiveSettingsSection] = createSignal<SettingsSectionId>("appearance")
|
||||
|
||||
46
packages/ui/src/stores/speech.ts
Normal file
46
packages/ui/src/stores/speech.ts
Normal file
@@ -0,0 +1,46 @@
|
||||
import { createSignal } from "solid-js"
|
||||
import type { SpeechCapabilitiesResponse } from "../../../server/src/api-types"
|
||||
import { serverApi } from "../lib/api-client"
|
||||
import { getLogger } from "../lib/logger"
|
||||
|
||||
const log = getLogger("api")
|
||||
|
||||
const [speechCapabilities, setSpeechCapabilities] = createSignal<SpeechCapabilitiesResponse | null>(null)
|
||||
const [speechCapabilitiesLoading, setSpeechCapabilitiesLoading] = createSignal(false)
|
||||
const [speechCapabilitiesError, setSpeechCapabilitiesError] = createSignal<string | null>(null)
|
||||
|
||||
let speechCapabilitiesPromise: Promise<SpeechCapabilitiesResponse | null> | null = null
|
||||
|
||||
async function loadSpeechCapabilities(force = false): Promise<SpeechCapabilitiesResponse | null> {
|
||||
if (!force && speechCapabilities()) return speechCapabilities()
|
||||
if (speechCapabilitiesPromise) return speechCapabilitiesPromise
|
||||
|
||||
setSpeechCapabilitiesLoading(true)
|
||||
setSpeechCapabilitiesError(null)
|
||||
speechCapabilitiesPromise = serverApi
|
||||
.fetchSpeechCapabilities()
|
||||
.then((result) => {
|
||||
setSpeechCapabilities(result)
|
||||
setSpeechCapabilitiesError(null)
|
||||
return result
|
||||
})
|
||||
.catch((error) => {
|
||||
log.error("Failed to load speech capabilities", error)
|
||||
setSpeechCapabilities(null)
|
||||
setSpeechCapabilitiesError(error instanceof Error ? error.message : String(error))
|
||||
return null
|
||||
})
|
||||
.finally(() => {
|
||||
setSpeechCapabilitiesLoading(false)
|
||||
speechCapabilitiesPromise = null
|
||||
})
|
||||
|
||||
return speechCapabilitiesPromise
|
||||
}
|
||||
|
||||
function resetSpeechCapabilities(): void {
|
||||
setSpeechCapabilities(null)
|
||||
setSpeechCapabilitiesError(null)
|
||||
}
|
||||
|
||||
export { speechCapabilities, speechCapabilitiesLoading, speechCapabilitiesError, loadSpeechCapabilities, resetSpeechCapabilities }
|
||||
Reference in New Issue
Block a user