Compare commits

...

9 Commits

Author SHA1 Message Date
Shantur Rathore
9c5dd6436e fix(i18n): localize speech settings copy 2026-03-25 13:49:39 +00:00
Shantur Rathore
6c27d4d1c4 fix(speech): keep provider secrets server-side 2026-03-25 13:39:36 +00:00
Shantur Rathore
c064cea4cc fix(ui): move prompt mic beside expand control 2026-03-25 11:56:02 +00:00
Shantur Rathore
fd529196fa fix(ui): restore prompt navigation button placement 2026-03-25 11:53:59 +00:00
Shantur Rathore
5f24fd4db7 feat(speech): make prompt input push to talk 2026-03-25 09:24:14 +00:00
Shantur Rathore
3c882e86b3 Revert "feat(speech): add realtime prompt dictation support"
This reverts commit f9b5e2b529.
2026-03-25 09:23:46 +00:00
Shantur Rathore
2354051297 feat(speech): add realtime prompt dictation support
Add server-backed realtime transcription for prompt voice input and expose speech settings to choose realtime mode and models.
2026-03-25 09:23:46 +00:00
Shantur Rathore
5948e25b97 fix(speech): preserve edits while saving settings 2026-03-25 09:23:46 +00:00
Shantur Rathore
f3a1ccd8b8 feat(speech): add prompt voice input groundwork 2026-03-25 09:23:46 +00:00
38 changed files with 1506 additions and 27 deletions

23
package-lock.json generated
View File

@@ -8240,6 +8240,27 @@
"regex-recursion": "^6.0.2"
}
},
"node_modules/openai": {
"version": "6.27.0",
"resolved": "https://registry.npmjs.org/openai/-/openai-6.27.0.tgz",
"integrity": "sha512-osTKySlrdYrLYTt0zjhY8yp0JUBmWDCN+Q+QxsV4xMQnnoVFpylgKGgxwN8sSdTNw0G4y+WUXs4eCMWpyDNWZQ==",
"license": "Apache-2.0",
"bin": {
"openai": "bin/cli"
},
"peerDependencies": {
"ws": "^8.18.0",
"zod": "^3.25 || ^4.0"
},
"peerDependenciesMeta": {
"ws": {
"optional": true
},
"zod": {
"optional": true
}
}
},
"node_modules/own-keys": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/own-keys/-/own-keys-1.0.1.tgz",
@@ -12019,6 +12040,7 @@
"node_modules/zod": {
"version": "3.25.76",
"license": "MIT",
"peer": true,
"funding": {
"url": "https://github.com/sponsors/colinhacks"
}
@@ -12080,6 +12102,7 @@
"fastify": "^4.28.1",
"fuzzysort": "^2.0.4",
"node-forge": "^1.3.3",
"openai": "^6.27.0",
"pino": "^9.4.0",
"undici": "^6.19.8",
"yaml": "^2.4.2",

View File

@@ -4,6 +4,6 @@
"private": true,
"license": "MIT",
"dependencies": {
"@opencode-ai/plugin": "1.2.24"
"@opencode-ai/plugin": "1.2.14"
}
}

View File

@@ -32,6 +32,7 @@
"fastify": "^4.28.1",
"fuzzysort": "^2.0.4",
"node-forge": "^1.3.3",
"openai": "^6.27.0",
"pino": "^9.4.0",
"undici": "^6.19.8",
"yaml": "^2.4.2",

View File

@@ -207,6 +207,36 @@ export interface BinaryValidationResult {
error?: string
}
export interface SpeechSegment {
startMs: number
endMs: number
text: string
}
export interface SpeechCapabilitiesResponse {
available: boolean
configured: boolean
provider: string
supportsStt: boolean
supportsTts: boolean
baseUrl?: string
sttModel: string
ttsModel: string
ttsVoice: string
}
export interface SpeechTranscriptionResponse {
text: string
language?: string
durationMs?: number
segments?: SpeechSegment[]
}
export interface SpeechSynthesisResponse {
audioBase64: string
mimeType: string
}
export type WorkspaceEventType =
| "workspace.created"
| "workspace.started"

View File

@@ -23,6 +23,7 @@ import { AuthManager, BOOTSTRAP_TOKEN_STDOUT_PREFIX, DEFAULT_AUTH_USERNAME } fro
import { resolveHttpsOptions } from "./server/tls"
import { resolveNetworkAddresses } from "./server/network-addresses"
import { startDevReleaseMonitor } from "./releases/dev-release-monitor"
import { SpeechService } from "./speech/service"
const require = createRequire(import.meta.url)
@@ -304,6 +305,7 @@ async function main() {
})
const fileSystemBrowser = new FileSystemBrowser({ rootDir: options.rootDir, unrestricted: options.unrestrictedRoot })
const instanceStore = new InstanceStore(configLocation.instancesDir)
const speechService = new SpeechService(settings, logger.child({ component: "speech" }))
const instanceEventBridge = new InstanceEventBridge({
workspaceManager,
eventBus,
@@ -388,6 +390,7 @@ async function main() {
eventBus,
serverMeta,
instanceStore,
speechService,
authManager,
uiStaticDir: uiResolution.uiStaticDir ?? DEFAULT_UI_STATIC_DIR,
uiDevServerUrl: uiResolution.uiDevServerUrl,
@@ -408,6 +411,7 @@ async function main() {
eventBus,
serverMeta,
instanceStore,
speechService,
authManager,
uiStaticDir: uiResolution.uiStaticDir ?? DEFAULT_UI_STATIC_DIR,
uiDevServerUrl: undefined,

View File

@@ -21,12 +21,14 @@ import { registerStorageRoutes } from "./routes/storage"
import { registerPluginRoutes } from "./routes/plugin"
import { registerBackgroundProcessRoutes } from "./routes/background-processes"
import { registerWorktreeRoutes } from "./routes/worktrees"
import { registerSpeechRoutes } from "./routes/speech"
import { ServerMeta } from "../api-types"
import { InstanceStore } from "../storage/instance-store"
import { BackgroundProcessManager } from "../background-processes/manager"
import type { AuthManager } from "../auth/manager"
import { registerAuthRoutes } from "./routes/auth"
import { sendUnauthorized, wantsHtml } from "../auth/http-auth"
import type { SpeechService } from "../speech/service"
interface HttpServerDeps {
bindHost: string
@@ -41,6 +43,7 @@ interface HttpServerDeps {
eventBus: EventBus
serverMeta: ServerMeta
instanceStore: InstanceStore
speechService: SpeechService
authManager: AuthManager
uiStaticDir: string
uiDevServerUrl?: string
@@ -252,6 +255,7 @@ export function createHttpServer(deps: HttpServerDeps) {
eventBus: deps.eventBus,
workspaceManager: deps.workspaceManager,
})
registerSpeechRoutes(app, { speechService: deps.speechService })
registerPluginRoutes(app, { workspaceManager: deps.workspaceManager, eventBus: deps.eventBus, logger: proxyLogger })
registerBackgroundProcessRoutes(app, { backgroundProcessManager })
registerInstanceProxyRoutes(app, { workspaceManager: deps.workspaceManager, logger: proxyLogger })

View File

@@ -3,6 +3,7 @@ import { z } from "zod"
import { probeBinaryVersion } from "../../workspaces/runtime"
import type { SettingsService } from "../../settings/service"
import type { Logger } from "../../logger"
import { sanitizeConfigDoc, sanitizeConfigOwner } from "../../settings/public-config"
interface RouteDeps {
settings: SettingsService
@@ -20,10 +21,10 @@ function validateBinaryPath(binaryPath: string): { valid: boolean; version?: str
export function registerSettingsRoutes(app: FastifyInstance, deps: RouteDeps) {
// Full-document access
app.get("/api/storage/config", async () => deps.settings.getDoc("config"))
app.get("/api/storage/config", async () => sanitizeConfigDoc(deps.settings.getDoc("config")))
app.patch("/api/storage/config", async (request, reply) => {
try {
return deps.settings.mergePatchDoc("config", request.body ?? {})
return sanitizeConfigDoc(deps.settings.mergePatchDoc("config", request.body ?? {}))
} catch (error) {
reply.code(400)
return { error: error instanceof Error ? error.message : "Invalid patch" }
@@ -31,12 +32,15 @@ export function registerSettingsRoutes(app: FastifyInstance, deps: RouteDeps) {
})
app.get<{ Params: { owner: string } }>("/api/storage/config/:owner", async (request) => {
return deps.settings.getOwner("config", request.params.owner)
return sanitizeConfigOwner(request.params.owner, deps.settings.getOwner("config", request.params.owner))
})
app.patch<{ Params: { owner: string } }>("/api/storage/config/:owner", async (request, reply) => {
try {
return deps.settings.mergePatchOwner("config", request.params.owner, request.body ?? {})
return sanitizeConfigOwner(
request.params.owner,
deps.settings.mergePatchOwner("config", request.params.owner, request.body ?? {}),
)
} catch (error) {
reply.code(400)
return { error: error instanceof Error ? error.message : "Invalid patch" }

View File

@@ -0,0 +1,60 @@
import type { FastifyInstance } from "fastify"
import { z } from "zod"
import type { SpeechService } from "../../speech/service"
interface RouteDeps {
speechService: SpeechService
}
const TranscribeBodySchema = z.object({
audioBase64: z.string().min(1, "Audio payload is required"),
mimeType: z.string().min(1, "Audio MIME type is required"),
filename: z.string().optional(),
language: z.string().optional(),
prompt: z.string().optional(),
})
const SynthesizeBodySchema = z.object({
text: z.string().trim().min(1, "Text is required"),
format: z.enum(["mp3", "wav", "opus"]).optional(),
})
function getSpeechErrorStatus(error: unknown): number {
if (error instanceof z.ZodError) {
return 400
}
if (error instanceof Error && /not configured/i.test(error.message)) {
return 503
}
return 502
}
function getSpeechErrorMessage(error: unknown, fallback: string): string {
return error instanceof Error ? error.message : fallback
}
export function registerSpeechRoutes(app: FastifyInstance, deps: RouteDeps) {
app.get("/api/speech/capabilities", async () => deps.speechService.getCapabilities())
app.post("/api/speech/transcribe", async (request, reply) => {
try {
const body = TranscribeBodySchema.parse(request.body ?? {})
return await deps.speechService.transcribe(body)
} catch (error) {
request.log.error({ err: error }, "Failed to transcribe audio")
reply.code(getSpeechErrorStatus(error))
return { error: getSpeechErrorMessage(error, "Failed to transcribe audio") }
}
})
app.post("/api/speech/synthesize", async (request, reply) => {
try {
const body = SynthesizeBodySchema.parse(request.body ?? {})
return await deps.speechService.synthesize(body)
} catch (error) {
request.log.error({ err: error }, "Failed to synthesize audio")
reply.code(getSpeechErrorStatus(error))
return { error: getSpeechErrorMessage(error, "Failed to synthesize audio") }
}
})
}

View File

@@ -0,0 +1,40 @@
import type { SettingsDoc } from "./yaml-doc-store"
function isPlainObject(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null && !Array.isArray(value)
}
function sanitizeServerOwner(value: SettingsDoc): SettingsDoc {
const next: SettingsDoc = { ...value }
const speech = isPlainObject(next.speech) ? { ...next.speech } : null
if (!speech) {
return next
}
const rawApiKey = typeof speech.apiKey === "string" ? speech.apiKey.trim() : ""
if (rawApiKey) {
delete speech.apiKey
speech.hasApiKey = true
} else if (!("hasApiKey" in speech)) {
speech.hasApiKey = false
}
next.speech = speech
return next
}
export function sanitizeConfigOwner(owner: string, value: SettingsDoc): SettingsDoc {
if (owner !== "server") {
return value
}
return sanitizeServerOwner(value)
}
export function sanitizeConfigDoc(value: SettingsDoc): SettingsDoc {
const next: SettingsDoc = { ...value }
if (isPlainObject(next.server)) {
next.server = sanitizeServerOwner(next.server)
}
return next
}

View File

@@ -4,6 +4,7 @@ import type { ConfigLocation } from "../config/location"
import { YamlDocStore, type SettingsDoc } from "./yaml-doc-store"
import { migrateSettingsLayout } from "./migrate"
import type { WorkspaceEventPayload } from "../api-types"
import { sanitizeConfigOwner } from "./public-config"
export type DocKind = "config" | "state"
@@ -45,10 +46,11 @@ export class SettingsService {
private publish(kind: DocKind, owner: string, value?: SettingsDoc) {
if (!this.eventBus) return
const type = kind === "config" ? "storage.configChanged" : "storage.stateChanged"
const nextValue = value ?? this.getOwner(kind, owner)
const payload: WorkspaceEventPayload = {
type,
owner,
value: value ?? this.getOwner(kind, owner),
value: kind === "config" ? sanitizeConfigOwner(owner, nextValue) : nextValue,
} as any
this.eventBus.publish(payload)
}

View File

@@ -0,0 +1,148 @@
import OpenAI from "openai"
import { toFile } from "openai/uploads"
import type { SpeechSynthesisResponse, SpeechTranscriptionResponse } from "../../api-types"
import type { Logger } from "../../logger"
import type { NormalizedSpeechSettings, SynthesizeSpeechInput, TranscribeAudioInput } from "../service"
interface OpenAICompatibleSpeechProviderOptions {
settings: NormalizedSpeechSettings
logger: Logger
}
export class OpenAICompatibleSpeechProvider {
constructor(private readonly options: OpenAICompatibleSpeechProviderOptions) {}
getCapabilities() {
const { settings } = this.options
return {
available: true,
configured: Boolean(settings.apiKey),
provider: settings.provider,
supportsStt: true,
supportsTts: true,
baseUrl: settings.baseUrl,
sttModel: settings.sttModel,
ttsModel: settings.ttsModel,
ttsVoice: settings.ttsVoice,
}
}
async transcribe(input: TranscribeAudioInput): Promise<SpeechTranscriptionResponse> {
const client = this.createClient()
const startedAt = Date.now()
const extension = extensionForMime(input.mimeType)
const buffer = Buffer.from(input.audioBase64, "base64")
const filename = input.filename?.trim() || `prompt-input.${extension}`
this.options.logger.info(
{
mimeType: input.mimeType,
bytes: buffer.byteLength,
language: input.language,
model: this.options.settings.sttModel,
},
"speech.transcribe",
)
const response = await this.requestTranscription(client, buffer, filename, input)
return {
text: typeof response?.text === "string" ? response.text : "",
language: typeof response?.language === "string" ? response.language : input.language,
durationMs: Number.isFinite(response?.duration) ? Math.round(Number(response.duration) * 1000) : Date.now() - startedAt,
segments: Array.isArray(response?.segments)
? response.segments
.filter((segment: any) => typeof segment?.text === "string")
.map((segment: any) => ({
startMs: Math.max(0, Math.round(Number(segment.start ?? 0) * 1000)),
endMs: Math.max(0, Math.round(Number(segment.end ?? 0) * 1000)),
text: String(segment.text),
}))
: undefined,
}
}
private async requestTranscription(
client: OpenAI,
buffer: Buffer,
filename: string,
input: TranscribeAudioInput,
): Promise<any> {
const baseRequest = {
model: this.options.settings.sttModel,
...(input.language ? { language: input.language } : {}),
...(input.prompt ? { prompt: input.prompt } : {}),
}
try {
const file = await toFile(buffer, filename, { type: input.mimeType })
return (await client.audio.transcriptions.create({
...baseRequest,
file,
response_format: "verbose_json" as any,
} as any)) as any
} catch (error) {
this.options.logger.warn({ err: error }, "speech.transcribe verbose_json failed; retrying default format")
const retryFile = await toFile(buffer, filename, { type: input.mimeType })
return (await client.audio.transcriptions.create({
...baseRequest,
file: retryFile,
} as any)) as any
}
}
async synthesize(input: SynthesizeSpeechInput): Promise<SpeechSynthesisResponse> {
const client = this.createClient()
const format = input.format ?? "mp3"
this.options.logger.info(
{
model: this.options.settings.ttsModel,
voice: this.options.settings.ttsVoice,
format,
},
"speech.synthesize",
)
const response = await client.audio.speech.create({
model: this.options.settings.ttsModel,
voice: this.options.settings.ttsVoice as any,
input: input.text,
response_format: format as any,
})
const audioBuffer = Buffer.from(await response.arrayBuffer())
return {
audioBase64: audioBuffer.toString("base64"),
mimeType: mimeTypeForFormat(format),
}
}
private createClient(): OpenAI {
const { settings } = this.options
if (!settings.apiKey) {
throw new Error("Speech provider is not configured. Add an API key in Speech settings.")
}
return new OpenAI({
apiKey: settings.apiKey,
baseURL: settings.baseUrl,
})
}
}
function extensionForMime(mimeType: string): string {
const normalized = mimeType.toLowerCase()
if (normalized.includes("webm")) return "webm"
if (normalized.includes("ogg")) return "ogg"
if (normalized.includes("wav")) return "wav"
if (normalized.includes("mpeg") || normalized.includes("mp3")) return "mp3"
if (normalized.includes("mp4") || normalized.includes("aac")) return "m4a"
return "webm"
}
function mimeTypeForFormat(format: "mp3" | "wav" | "opus"): string {
if (format === "wav") return "audio/wav"
if (format === "opus") return "audio/opus"
return "audio/mpeg"
}

View File

@@ -0,0 +1,91 @@
import { z } from "zod"
import type { Logger } from "../logger"
import type { SettingsService } from "../settings/service"
import type { SpeechCapabilitiesResponse, SpeechSynthesisResponse, SpeechTranscriptionResponse } from "../api-types"
import { OpenAICompatibleSpeechProvider } from "./providers/openai-compatible"
const ServerSpeechSettingsSchema = z.object({
speech: z
.object({
provider: z.string().optional(),
apiKey: z.string().optional(),
baseUrl: z.string().optional(),
sttModel: z.string().optional(),
ttsModel: z.string().optional(),
ttsVoice: z.string().optional(),
})
.optional(),
})
export interface TranscribeAudioInput {
audioBase64: string
mimeType: string
filename?: string
language?: string
prompt?: string
}
export interface SynthesizeSpeechInput {
text: string
format?: "mp3" | "wav" | "opus"
}
export interface SpeechProvider {
getCapabilities(): SpeechCapabilitiesResponse
transcribe(input: TranscribeAudioInput): Promise<SpeechTranscriptionResponse>
synthesize(input: SynthesizeSpeechInput): Promise<SpeechSynthesisResponse>
}
export interface NormalizedSpeechSettings {
provider: string
apiKey?: string
baseUrl?: string
sttModel: string
ttsModel: string
ttsVoice: string
}
const DEFAULT_PROVIDER = "openai-compatible"
const DEFAULT_STT_MODEL = "gpt-4o-mini-transcribe"
const DEFAULT_TTS_MODEL = "gpt-4o-mini-tts"
const DEFAULT_TTS_VOICE = "alloy"
export class SpeechService {
constructor(
private readonly settings: SettingsService,
private readonly logger: Logger,
) {}
getCapabilities(): SpeechCapabilitiesResponse {
return this.createProvider().getCapabilities()
}
async transcribe(input: TranscribeAudioInput): Promise<SpeechTranscriptionResponse> {
return this.createProvider().transcribe(input)
}
async synthesize(input: SynthesizeSpeechInput): Promise<SpeechSynthesisResponse> {
return this.createProvider().synthesize(input)
}
private createProvider(): SpeechProvider {
const settings = this.resolveSettings()
return new OpenAICompatibleSpeechProvider({
settings,
logger: this.logger.child({ provider: settings.provider }),
})
}
private resolveSettings(): NormalizedSpeechSettings {
const parsed = ServerSpeechSettingsSchema.parse(this.settings.getOwner("config", "server") ?? {})
const speech = parsed.speech ?? {}
return {
provider: speech.provider?.trim() || DEFAULT_PROVIDER,
apiKey: speech.apiKey?.trim() || process.env.OPENAI_API_KEY,
baseUrl: speech.baseUrl?.trim() || process.env.OPENAI_BASE_URL || undefined,
sttModel: speech.sttModel?.trim() || DEFAULT_STT_MODEL,
ttsModel: speech.ttsModel?.trim() || DEFAULT_TTS_MODEL,
ttsVoice: speech.ttsVoice?.trim() || DEFAULT_TTS_VOICE,
}
}
}

View File

@@ -68,6 +68,7 @@ const App: Component = () => {
toggleAutoCleanupBlankSessions,
toggleUsageMetrics,
togglePromptSubmitOnEnter,
toggleShowPromptVoiceInput,
setDiffViewMode,
setToolOutputExpansion,
setDiagnosticsExpansion,
@@ -353,6 +354,7 @@ const App: Component = () => {
toggleShowTimelineTools,
toggleUsageMetrics,
togglePromptSubmitOnEnter,
toggleShowPromptVoiceInput,
setDiffViewMode,
setToolOutputExpansion,
setDiagnosticsExpansion,

View File

@@ -1,5 +1,5 @@
import { Suspense, createEffect, createSignal, lazy, on, onCleanup, onMount, Show } from "solid-js"
import { ArrowBigUp, ArrowBigDown } from "lucide-solid"
import { Suspense, createEffect, createSignal, lazy, on, onCleanup, Show } from "solid-js"
import { ArrowBigUp, ArrowBigDown, Loader2, Mic } from "lucide-solid"
import ExpandButton from "./expand-button"
import { clearAttachments, removeAttachment } from "../stores/attachments"
import { resolvePastedPlaceholders } from "../lib/prompt-placeholders"
@@ -18,6 +18,7 @@ import { usePromptState } from "./prompt-input/usePromptState"
import { usePromptAttachments } from "./prompt-input/usePromptAttachments"
import { usePromptPicker } from "./prompt-input/usePromptPicker"
import { usePromptKeyDown } from "./prompt-input/usePromptKeyDown"
import { usePromptVoiceInput } from "./prompt-input/usePromptVoiceInput"
const log = getLogger("actions")
const LazyUnifiedPicker = lazy(() => import("./unified-picker"))
@@ -450,9 +451,45 @@ export default function PromptInput(props: PromptInputProps) {
})
const shouldShowOverlay = () => prompt().length === 0
const voiceInput = usePromptVoiceInput({
prompt,
setPrompt,
getTextarea: () => textareaRef ?? null,
enabled: () => preferences().showPromptVoiceInput,
disabled: () => Boolean(props.disabled),
})
const showVoiceInput = () =>
preferences().showPromptVoiceInput &&
(voiceInput.canUseVoiceInput() || voiceInput.isRecording() || voiceInput.isTranscribing())
const instance = () => getActiveInstance()
let voiceButtonPressed = false
const beginVoicePress = (event?: PointerEvent | KeyboardEvent) => {
if (voiceButtonPressed || props.disabled || voiceInput.isTranscribing() || !voiceInput.canUseVoiceInput()) return
voiceButtonPressed = true
if (event instanceof PointerEvent) {
const target = event.currentTarget
if (target instanceof HTMLElement) {
try {
target.setPointerCapture(event.pointerId)
} catch {
// no-op
}
}
}
void voiceInput.startRecording()
}
const endVoicePress = () => {
if (!voiceButtonPressed) return
voiceButtonPressed = false
voiceInput.stopRecording()
}
return (
<div class="prompt-input-container">
<div
@@ -506,10 +543,54 @@ export default function PromptInput(props: PromptInputProps) {
autocomplete="off"
/>
<div class="prompt-nav-buttons">
<ExpandButton
expandState={expandState}
onToggleExpand={handleExpandToggle}
/>
<div class="prompt-nav-top-row">
<Show when={showVoiceInput()}>
<button
type="button"
class={`prompt-voice-button prompt-nav-voice-button ${voiceInput.isRecording() ? "is-recording" : ""}`}
onPointerDown={(event) => {
event.preventDefault()
beginVoicePress(event)
}}
onPointerUp={(event) => {
event.preventDefault()
endVoicePress()
}}
onPointerCancel={() => endVoicePress()}
onLostPointerCapture={() => endVoicePress()}
onKeyDown={(event) => {
if (event.repeat) return
if (event.key !== " " && event.key !== "Enter") return
event.preventDefault()
beginVoicePress(event)
}}
onKeyUp={(event) => {
if (event.key !== " " && event.key !== "Enter") return
event.preventDefault()
endVoicePress()
}}
onBlur={() => endVoicePress()}
disabled={!voiceInput.isRecording() && (props.disabled || voiceInput.isTranscribing() || !voiceInput.canUseVoiceInput())}
aria-label={voiceInput.buttonTitle()}
title={voiceInput.buttonTitle()}
>
<Show
when={voiceInput.isRecording()}
fallback={
<Show when={voiceInput.isTranscribing()} fallback={<Mic class="h-4 w-4" aria-hidden="true" />}>
<Loader2 class="h-4 w-4 animate-spin" aria-hidden="true" />
</Show>
}
>
<span class="prompt-voice-timer">{formatVoiceTimer(voiceInput.elapsedMs())}</span>
</Show>
</button>
</Show>
<ExpandButton
expandState={expandState}
onToggleExpand={handleExpandToggle}
/>
</div>
<Show when={hasHistory()}>
<button
type="button"
@@ -631,3 +712,10 @@ export default function PromptInput(props: PromptInputProps) {
</div>
)
}
function formatVoiceTimer(elapsedMs: number): string {
const totalSeconds = Math.max(0, Math.floor(elapsedMs / 1000))
const minutes = Math.floor(totalSeconds / 60)
const seconds = totalSeconds % 60
return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}`
}

View File

@@ -0,0 +1,244 @@
import { createEffect, createSignal, onCleanup, type Accessor } from "solid-js"
import { showAlertDialog } from "../../stores/alerts"
import { loadSpeechCapabilities, speechCapabilities } from "../../stores/speech"
import { serverApi } from "../../lib/api-client"
import { useI18n } from "../../lib/i18n"
interface UsePromptVoiceInputOptions {
prompt: Accessor<string>
setPrompt: (value: string) => void
getTextarea: () => HTMLTextAreaElement | null
enabled: Accessor<boolean>
disabled: Accessor<boolean>
}
type VoiceInputState = "idle" | "recording" | "transcribing"
export function usePromptVoiceInput(options: UsePromptVoiceInputOptions) {
const { t } = useI18n()
const [state, setState] = createSignal<VoiceInputState>("idle")
const [elapsedMs, setElapsedMs] = createSignal(0)
let mediaRecorder: MediaRecorder | null = null
let mediaStream: MediaStream | null = null
let timerId: number | undefined
let shouldTranscribe = true
let recordedChunks: Blob[] = []
let recordingStartedAt = 0
createEffect(() => {
void loadSpeechCapabilities()
})
onCleanup(() => {
cleanupMedia(false)
})
const isSupported = () => {
if (typeof window === "undefined") return false
return typeof window.MediaRecorder !== "undefined" && Boolean(navigator.mediaDevices?.getUserMedia)
}
const canUseVoiceInput = () => {
const capabilities = speechCapabilities()
return Boolean(
options.enabled() &&
isSupported() &&
capabilities?.available &&
capabilities?.configured &&
capabilities?.supportsStt,
)
}
async function toggleRecording(): Promise<void> {
if (state() === "recording") {
stopRecording()
return
}
await startRecording()
}
function stopRecording() {
if (!mediaRecorder || state() !== "recording") return
shouldTranscribe = true
mediaRecorder.stop()
setState("transcribing")
stopTimer()
}
function cancelRecording() {
if (!mediaRecorder || state() !== "recording") return
shouldTranscribe = false
mediaRecorder.stop()
cleanupMedia(false)
}
async function startRecording() {
if (!canUseVoiceInput() || options.disabled() || state() === "transcribing" || state() === "recording") return
if (!isSupported()) {
showAlertDialog(t("promptInput.voiceInput.error.unsupported"), {
title: t("promptInput.voiceInput.error.title"),
variant: "error",
})
return
}
try {
recordedChunks = []
shouldTranscribe = true
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true })
mediaRecorder = createRecorder(mediaStream)
mediaRecorder.addEventListener("dataavailable", (event) => {
if (event.data.size > 0) {
recordedChunks.push(event.data)
}
})
mediaRecorder.addEventListener("stop", () => {
void finalizeRecording()
})
recordingStartedAt = Date.now()
setElapsedMs(0)
setState("recording")
startTimer()
mediaRecorder.start()
} catch (error) {
cleanupMedia(false)
showAlertDialog(t("promptInput.voiceInput.error.permission"), {
title: t("promptInput.voiceInput.error.title"),
detail: error instanceof Error ? error.message : String(error),
variant: "error",
})
}
}
async function finalizeRecording() {
const recorder = mediaRecorder
const stream = mediaStream
mediaRecorder = null
mediaStream = null
if (!shouldTranscribe || recordedChunks.length === 0) {
recordedChunks = []
stopTracks(stream)
setState("idle")
setElapsedMs(0)
return
}
const mimeType = recorder?.mimeType || recordedChunks[0]?.type || "audio/webm"
try {
const audioBlob = new Blob(recordedChunks, { type: mimeType })
const transcription = await serverApi.transcribeAudio({
audioBase64: await blobToBase64(audioBlob),
mimeType,
})
if (transcription.text.trim()) {
insertTranscript(transcription.text.trim())
}
} catch (error) {
showAlertDialog(t("promptInput.voiceInput.error.transcribe"), {
title: t("promptInput.voiceInput.error.title"),
detail: error instanceof Error ? error.message : String(error),
variant: "error",
})
} finally {
recordedChunks = []
stopTracks(stream)
setState("idle")
setElapsedMs(0)
}
}
function insertTranscript(text: string) {
const current = options.prompt()
const textarea = options.getTextarea()
const start = textarea ? textarea.selectionStart : current.length
const end = textarea ? textarea.selectionEnd : current.length
const before = current.slice(0, start)
const after = current.slice(end)
const prefix = before.length > 0 && !/\s$/.test(before) ? " " : ""
const suffix = after.length > 0 && !/^\s/.test(after) ? " " : ""
const nextValue = `${before}${prefix}${text}${suffix}${after}`
const cursor = before.length + prefix.length + text.length
options.setPrompt(nextValue)
if (textarea) {
setTimeout(() => {
textarea.focus()
textarea.setSelectionRange(cursor, cursor)
}, 0)
}
}
function cleanupMedia(resetState = true) {
stopTimer()
if (mediaRecorder && mediaRecorder.state !== "inactive") {
mediaRecorder.stop()
}
mediaRecorder = null
stopTracks(mediaStream)
mediaStream = null
recordedChunks = []
if (resetState) {
setState("idle")
setElapsedMs(0)
}
}
function startTimer() {
stopTimer()
timerId = window.setInterval(() => {
setElapsedMs(Date.now() - recordingStartedAt)
}, 250)
}
function stopTimer() {
if (timerId !== undefined) {
window.clearInterval(timerId)
timerId = undefined
}
}
return {
state,
elapsedMs,
canUseVoiceInput,
startRecording,
stopRecording,
toggleRecording,
cancelRecording,
isRecording: () => state() === "recording",
isTranscribing: () => state() === "transcribing",
buttonTitle: () => {
if (state() === "recording") return t("promptInput.voiceInput.stop.title")
if (state() === "transcribing") return t("promptInput.voiceInput.transcribing.title")
return t("promptInput.voiceInput.start.title")
},
}
}
function createRecorder(stream: MediaStream): MediaRecorder {
const candidates = ["audio/webm;codecs=opus", "audio/webm", "audio/mp4", "audio/ogg;codecs=opus"]
const supported = candidates.find((candidate) => typeof MediaRecorder.isTypeSupported !== "function" || MediaRecorder.isTypeSupported(candidate))
return supported ? new MediaRecorder(stream, { mimeType: supported }) : new MediaRecorder(stream)
}
function stopTracks(stream: MediaStream | null) {
stream?.getTracks().forEach((track) => track.stop())
}
async function blobToBase64(blob: Blob): Promise<string> {
const buffer = await blob.arrayBuffer()
const bytes = new Uint8Array(buffer)
let binary = ""
for (const byte of bytes) {
binary += String.fromCharCode(byte)
}
return btoa(binary)
}

View File

@@ -1,5 +1,5 @@
import { Dialog } from "@kobalte/core/dialog"
import { Settings, Bell, MonitorUp, Paintbrush, Terminal, X } from "lucide-solid"
import { Settings, Bell, MonitorUp, Paintbrush, Terminal, Volume2, X } from "lucide-solid"
import { createMemo, For, type Component } from "solid-js"
import { useI18n } from "../lib/i18n"
import {
@@ -13,6 +13,7 @@ import { AppearanceSettingsSection } from "./settings/appearance-settings-sectio
import { NotificationsSettingsSection } from "./settings/notifications-settings-section"
import { OpenCodeSettingsSection } from "./settings/opencode-settings-section"
import { RemoteAccessSettingsSection } from "./settings/remote-access-settings-section"
import { SpeechSettingsSection } from "./settings/speech-settings-section"
export const SettingsScreen: Component = () => {
const { t } = useI18n()
@@ -21,6 +22,7 @@ export const SettingsScreen: Component = () => {
{ id: "appearance" as SettingsSectionId, icon: Paintbrush, label: t("settings.nav.appearance") },
{ id: "notifications" as SettingsSectionId, icon: Bell, label: t("settings.nav.notifications") },
{ id: "remote" as SettingsSectionId, icon: MonitorUp, label: t("settings.nav.remote") },
{ id: "speech" as SettingsSectionId, icon: Volume2, label: t("settings.nav.speech") },
{ id: "opencode" as SettingsSectionId, icon: Terminal, label: t("settings.nav.opencode") },
])
@@ -30,6 +32,8 @@ export const SettingsScreen: Component = () => {
return <NotificationsSettingsSection />
case "remote":
return <RemoteAccessSettingsSection />
case "speech":
return <SpeechSettingsSection />
case "opencode":
return <OpenCodeSettingsSection />
case "appearance":

View File

@@ -24,6 +24,7 @@ export const AppearanceSettingsSection: Component = () => {
toggleUsageMetrics,
toggleAutoCleanupBlankSessions,
togglePromptSubmitOnEnter,
toggleShowPromptVoiceInput,
setDiffViewMode,
setToolOutputExpansion,
setDiagnosticsExpansion,
@@ -38,10 +39,11 @@ export const AppearanceSettingsSection: Component = () => {
toggleShowThinkingBlocks,
toggleKeyboardShortcutHints,
toggleShowTimelineTools,
toggleUsageMetrics,
toggleAutoCleanupBlankSessions,
togglePromptSubmitOnEnter,
setDiffViewMode,
toggleUsageMetrics,
toggleAutoCleanupBlankSessions,
togglePromptSubmitOnEnter,
toggleShowPromptVoiceInput,
setDiffViewMode,
setToolOutputExpansion,
setDiagnosticsExpansion,
setThinkingBlocksExpansion,

View File

@@ -0,0 +1,252 @@
import { Show, createEffect, createMemo, createSignal, type Component } from "solid-js"
import { Mic, Volume2 } from "lucide-solid"
import { useConfig, type SpeechSettings } from "../../stores/preferences"
import { useI18n } from "../../lib/i18n"
import { loadSpeechCapabilities, speechCapabilities, speechCapabilitiesError, speechCapabilitiesLoading } from "../../stores/speech"
import { getLogger } from "../../lib/logger"
const log = getLogger("actions")
type DraftFields = {
apiKey: string
baseUrl: string
sttModel: string
ttsModel: string
ttsVoice: string
}
function createDraftFields(speech: SpeechSettings): DraftFields {
return {
apiKey: "",
baseUrl: speech.baseUrl ?? "",
sttModel: speech.sttModel,
ttsModel: speech.ttsModel,
ttsVoice: speech.ttsVoice,
}
}
function isDraftEqual(a: DraftFields, b: DraftFields): boolean {
return a.apiKey === b.apiKey && a.baseUrl === b.baseUrl && a.sttModel === b.sttModel && a.ttsModel === b.ttsModel && a.ttsVoice === b.ttsVoice
}
export const SpeechSettingsCard: Component = () => {
const { t } = useI18n()
const { serverSettings, updateSpeechSettings } = useConfig()
const initialDrafts = createDraftFields(serverSettings().speech)
const [isSaving, setIsSaving] = createSignal(false)
const [saveStatus, setSaveStatus] = createSignal<"idle" | "saved" | "error">("saved")
const [drafts, setDrafts] = createSignal<DraftFields>(initialDrafts)
const [apiKeyTouched, setApiKeyTouched] = createSignal(false)
const [clearStoredApiKey, setClearStoredApiKey] = createSignal(false)
createEffect(() => {
const speech = serverSettings().speech
const nextDrafts = createDraftFields(speech)
if (!isSaving() && !isDirty()) {
if (!isDraftEqual(drafts(), nextDrafts)) {
setDrafts(nextDrafts)
}
if (apiKeyTouched()) {
setApiKeyTouched(false)
}
if (clearStoredApiKey()) {
setClearStoredApiKey(false)
}
}
})
createEffect(() => {
void loadSpeechCapabilities()
})
const capabilityLabel = () => {
if (speechCapabilitiesLoading()) return t("settings.speech.status.loading")
if (speechCapabilitiesError()) return t("settings.speech.status.error")
return speechCapabilities()?.configured ? t("settings.speech.status.configured") : t("settings.speech.status.missing")
}
const updateDraft = (key: keyof DraftFields, value: string) => {
setSaveStatus("idle")
if (key === "apiKey") {
setApiKeyTouched(true)
setClearStoredApiKey(false)
}
setDrafts((current) => ({ ...current, [key]: value }))
}
const apiKeyDirty = createMemo(() => clearStoredApiKey() || drafts().apiKey.trim().length > 0)
const isDirty = createMemo(() => {
const speech = serverSettings().speech
const current = drafts()
return (
apiKeyDirty() ||
(current.baseUrl || "") !== (speech.baseUrl || "") ||
current.sttModel !== speech.sttModel ||
current.ttsModel !== speech.ttsModel ||
current.ttsVoice !== speech.ttsVoice
)
})
const saveStatusLabel = () => {
if (isSaving()) return t("settings.speech.save.saving")
if (saveStatus() === "saved") return t("settings.speech.save.saved")
if (saveStatus() === "error") return t("settings.speech.save.error")
return t("settings.speech.save.unsaved")
}
async function handleSave() {
if (!isDirty() || isSaving()) return
const current = drafts()
setIsSaving(true)
setSaveStatus("idle")
try {
const trimmedApiKey = current.apiKey.trim()
await updateSpeechSettings({
...(clearStoredApiKey() ? { apiKey: null } : trimmedApiKey ? { apiKey: trimmedApiKey } : {}),
baseUrl: current.baseUrl.trim() || undefined,
sttModel: current.sttModel.trim() || undefined,
ttsModel: current.ttsModel.trim() || undefined,
ttsVoice: current.ttsVoice.trim() || undefined,
})
await loadSpeechCapabilities(true)
setDrafts({
apiKey: "",
baseUrl: current.baseUrl.trim(),
sttModel: current.sttModel.trim() || serverSettings().speech.sttModel,
ttsModel: current.ttsModel.trim() || serverSettings().speech.ttsModel,
ttsVoice: current.ttsVoice.trim() || serverSettings().speech.ttsVoice,
})
setApiKeyTouched(false)
setClearStoredApiKey(false)
setSaveStatus("saved")
} catch (error) {
log.error("Failed to save speech settings", error)
setSaveStatus("error")
} finally {
setIsSaving(false)
}
}
return (
<div class="settings-card">
<div class="settings-card-header">
<div class="settings-card-heading-with-icon">
<Volume2 class="settings-card-heading-icon" />
<div>
<h3 class="settings-card-title">{t("settings.speech.title")}</h3>
<p class="settings-card-subtitle">{t("settings.speech.subtitle")}</p>
</div>
</div>
<span class="settings-scope-badge settings-scope-badge-server">{t("settings.scope.server")}</span>
</div>
<div class="settings-stack">
<div class="settings-toggle-row settings-toggle-row-compact">
<div>
<div class="settings-toggle-title">{t("settings.speech.provider.title")}</div>
<div class="settings-toggle-caption">{t("settings.speech.provider.subtitle")}</div>
</div>
<div class="settings-toolbar-inline">
<span class="settings-inline-note">{t("settings.speech.provider.openaiCompatible")}</span>
<span class="settings-inline-note">{capabilityLabel()}</span>
<span class="settings-inline-note">{saveStatusLabel()}</span>
<button
type="button"
class="selector-button selector-button-primary w-auto whitespace-nowrap"
onClick={() => void handleSave()}
disabled={!isDirty() || isSaving()}
>
{isSaving() ? t("settings.speech.save.saving") : t("settings.speech.save.action")}
</button>
</div>
</div>
<Field
label={t("settings.speech.apiKey.title")}
caption={t("settings.speech.apiKey.subtitle")}
value={drafts().apiKey}
onInput={(value) => updateDraft("apiKey", value)}
type="password"
placeholder={serverSettings().speech.hasApiKey ? t("settings.speech.apiKey.placeholder") : undefined}
/>
<Show when={serverSettings().speech.hasApiKey && !apiKeyTouched() && drafts().apiKey.length === 0}>
<div class="settings-inline-note">
{clearStoredApiKey() ? t("settings.speech.apiKey.clearPending") : t("settings.speech.apiKey.storedNote")}{" "}
<Show when={!clearStoredApiKey()}>
<button
type="button"
class="selector-button selector-button-secondary w-auto whitespace-nowrap"
onClick={() => {
setClearStoredApiKey(true)
setSaveStatus("idle")
}}
>
{t("settings.speech.apiKey.clearAction")}
</button>
</Show>
</div>
</Show>
<Field
label={t("settings.speech.baseUrl.title")}
caption={t("settings.speech.baseUrl.subtitle")}
value={drafts().baseUrl}
onInput={(value) => updateDraft("baseUrl", value)}
placeholder={t("settings.speech.baseUrl.placeholder")}
/>
<Field
label={t("settings.speech.sttModel.title")}
caption={t("settings.speech.sttModel.subtitle")}
value={drafts().sttModel}
onInput={(value) => updateDraft("sttModel", value)}
/>
<Field
label={t("settings.speech.ttsModel.title")}
caption={t("settings.speech.ttsModel.subtitle")}
value={drafts().ttsModel}
onInput={(value) => updateDraft("ttsModel", value)}
/>
<Field
label={t("settings.speech.ttsVoice.title")}
caption={t("settings.speech.ttsVoice.subtitle")}
value={drafts().ttsVoice}
onInput={(value) => updateDraft("ttsVoice", value)}
icon={<Mic class="w-3.5 h-3.5 icon-muted flex-shrink-0" />}
/>
<div class="settings-inline-note">{t("settings.speech.help")}</div>
</div>
</div>
)
}
const Field: Component<{
label: string
caption: string
value: string
type?: string
placeholder?: string
onInput: (value: string) => void
icon?: any
}> = (props) => {
return (
<div class="settings-toggle-row settings-toggle-row-compact">
<div>
<div class="settings-toggle-title">{props.label}</div>
<div class="settings-toggle-caption">{props.caption}</div>
</div>
<div class="flex items-center gap-2 min-w-[18rem] max-w-[24rem] w-full">
{props.icon}
<input
type={props.type ?? "text"}
value={props.value}
onInput={(event) => props.onInput(event.currentTarget.value)}
class="selector-input w-full"
placeholder={props.placeholder}
/>
</div>
</div>
)
}
export default SpeechSettingsCard

View File

@@ -0,0 +1,10 @@
import type { Component } from "solid-js"
import SpeechSettingsCard from "./speech-settings-card"
export const SpeechSettingsSection: Component = () => {
return (
<div class="settings-section-stack">
<SpeechSettingsCard />
</div>
)
}

View File

@@ -7,6 +7,9 @@ import type {
FileSystemCreateFolderResponse,
FileSystemListResponse,
InstanceData,
SpeechCapabilitiesResponse,
SpeechSynthesisResponse,
SpeechTranscriptionResponse,
ServerMeta,
WorkspaceCreateRequest,
WorkspaceDescriptor,
@@ -235,6 +238,27 @@ export const serverApi = {
body: JSON.stringify({ path }),
})
},
fetchSpeechCapabilities(): Promise<SpeechCapabilitiesResponse> {
return request<SpeechCapabilitiesResponse>("/api/speech/capabilities")
},
transcribeAudio(payload: {
audioBase64: string
mimeType: string
filename?: string
language?: string
prompt?: string
}): Promise<SpeechTranscriptionResponse> {
return request<SpeechTranscriptionResponse>("/api/speech/transcribe", {
method: "POST",
body: JSON.stringify(payload),
})
},
synthesizeSpeech(payload: { text: string; format?: "mp3" | "wav" | "opus" }): Promise<SpeechSynthesisResponse> {
return request<SpeechSynthesisResponse>("/api/speech/synthesize", {
method: "POST",
body: JSON.stringify(payload),
})
},
listFileSystem(path?: string, options?: { includeFiles?: boolean }): Promise<FileSystemListResponse> {
const params = new URLSearchParams()
if (path && path !== ".") {

View File

@@ -34,6 +34,7 @@ export interface UseCommandsOptions {
toggleUsageMetrics: () => void
toggleAutoCleanupBlankSessions: () => void
togglePromptSubmitOnEnter: () => void
toggleShowPromptVoiceInput: () => void
setDiffViewMode: (mode: "split" | "unified") => void
setToolOutputExpansion: (mode: ExpansionPreference) => void
setDiagnosticsExpansion: (mode: ExpansionPreference) => void
@@ -435,6 +436,7 @@ export function useCommands(options: UseCommandsOptions) {
toggleUsageMetrics: options.toggleUsageMetrics,
toggleAutoCleanupBlankSessions: options.toggleAutoCleanupBlankSessions,
togglePromptSubmitOnEnter: options.togglePromptSubmitOnEnter,
toggleShowPromptVoiceInput: options.toggleShowPromptVoiceInput,
setDiffViewMode: options.setDiffViewMode,
setToolOutputExpansion: options.setToolOutputExpansion,
setDiagnosticsExpansion: options.setDiagnosticsExpansion,

View File

@@ -138,4 +138,11 @@ export const messagingMessages = {
"promptInput.send.ariaLabel": "Send message",
"promptInput.send.errorFallback": "Failed to send message",
"promptInput.send.errorTitle": "Send failed",
"promptInput.voiceInput.start.title": "Start voice input",
"promptInput.voiceInput.stop.title": "Stop recording and transcribe",
"promptInput.voiceInput.transcribing.title": "Transcribing audio",
"promptInput.voiceInput.error.title": "Voice input failed",
"promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
"promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
"promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
} as const

View File

@@ -65,6 +65,7 @@ export const settingsMessages = {
"settings.nav.appearance": "Appearance",
"settings.nav.notifications": "Notifications",
"settings.nav.remote": "Remote Access",
"settings.nav.speech": "Speech",
"settings.nav.opencode": "OpenCode",
"settings.scope.device": "This device",
"settings.scope.server": "Server setting",
@@ -137,6 +138,38 @@ export const settingsMessages = {
"settings.behavior.usageMetrics.subtitle": "Show or hide token and cost stats for assistant messages.",
"settings.behavior.autoCleanup.title": "Auto-cleanup blank sessions",
"settings.behavior.autoCleanup.subtitle": "Automatically clean up blank sessions when creating new ones.",
"settings.behavior.promptVoiceInput.title": "Prompt voice input",
"settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
"settings.behavior.promptSubmit.title": "Enter to submit",
"settings.behavior.promptSubmit.subtitle": "Use Enter to submit prompts; Cmd/Ctrl+Enter inserts a new line.",
"settings.speech.title": "Speech",
"settings.speech.subtitle": "Configure speech-to-text now and text-to-speech groundwork for later features.",
"settings.speech.provider.title": "Provider",
"settings.speech.provider.subtitle": "Speech requests use the server-side speech adapter.",
"settings.speech.provider.openaiCompatible": "OpenAI-compatible",
"settings.speech.status.loading": "Checking configuration...",
"settings.speech.status.configured": "Configured",
"settings.speech.status.missing": "Missing API key",
"settings.speech.status.error": "Speech service unavailable",
"settings.speech.apiKey.title": "API key",
"settings.speech.apiKey.subtitle": "Used for CodeNomad-managed speech requests.",
"settings.speech.apiKey.placeholder": "Enter a new API key",
"settings.speech.apiKey.storedNote": "A saved API key is hidden. Enter a new value to replace it, or leave the field blank to keep it.",
"settings.speech.apiKey.clearAction": "Clear saved key",
"settings.speech.apiKey.clearPending": "The saved API key will be removed when you save.",
"settings.speech.baseUrl.title": "Base URL",
"settings.speech.baseUrl.subtitle": "Optional override for OpenAI-compatible speech endpoints.",
"settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
"settings.speech.sttModel.title": "Transcription model",
"settings.speech.sttModel.subtitle": "Model used for prompt speech-to-text requests.",
"settings.speech.ttsModel.title": "Speech model",
"settings.speech.ttsModel.subtitle": "Default text-to-speech model reserved for future playback features.",
"settings.speech.ttsVoice.title": "Default voice",
"settings.speech.ttsVoice.subtitle": "Default text-to-speech voice reserved for future playback features.",
"settings.speech.help": "Prompt voice input only appears when speech transcription is configured and supported by this browser.",
"settings.speech.save.action": "Save",
"settings.speech.save.saving": "Saving...",
"settings.speech.save.saved": "Saved",
"settings.speech.save.unsaved": "Unsaved changes",
"settings.speech.save.error": "Save failed",
} as const

View File

@@ -140,4 +140,11 @@ export const messagingMessages = {
"promptInput.send.ariaLabel": "Enviar mensaje",
"promptInput.send.errorFallback": "No se pudo enviar el mensaje",
"promptInput.send.errorTitle": "Error al enviar",
"promptInput.voiceInput.start.title": "Start voice input",
"promptInput.voiceInput.stop.title": "Stop recording and transcribe",
"promptInput.voiceInput.transcribing.title": "Transcribing audio",
"promptInput.voiceInput.error.title": "Voice input failed",
"promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
"promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
"promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
} as const

View File

@@ -65,6 +65,7 @@ export const settingsMessages = {
"settings.nav.appearance": "Appearance",
"settings.nav.notifications": "Notifications",
"settings.nav.remote": "Remote Access",
"settings.nav.speech": "Speech",
"settings.nav.opencode": "OpenCode",
"settings.scope.device": "This device",
"settings.scope.server": "Server setting",
@@ -137,6 +138,38 @@ export const settingsMessages = {
"settings.behavior.usageMetrics.subtitle": "Muestra u oculta estadisticas de tokens y costo en mensajes del asistente.",
"settings.behavior.autoCleanup.title": "Limpieza automatica de sesiones en blanco",
"settings.behavior.autoCleanup.subtitle": "Limpia automaticamente las sesiones en blanco al crear nuevas.",
"settings.behavior.promptVoiceInput.title": "Prompt voice input",
"settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
"settings.behavior.promptSubmit.title": "Enter para enviar",
"settings.behavior.promptSubmit.subtitle": "Usa Enter para enviar; Cmd/Ctrl+Enter inserta una nueva linea.",
"settings.speech.title": "Voz",
"settings.speech.subtitle": "Configura ahora el reconocimiento de voz y prepara la base de texto a voz para funciones futuras.",
"settings.speech.provider.title": "Proveedor",
"settings.speech.provider.subtitle": "Las solicitudes de voz usan el adaptador de voz del servidor.",
"settings.speech.provider.openaiCompatible": "OpenAI-compatible",
"settings.speech.status.loading": "Comprobando configuración...",
"settings.speech.status.configured": "Configurado",
"settings.speech.status.missing": "Falta la clave API",
"settings.speech.status.error": "Servicio de voz no disponible",
"settings.speech.apiKey.title": "API key",
"settings.speech.apiKey.subtitle": "Se usa para las solicitudes de voz gestionadas por CodeNomad.",
"settings.speech.apiKey.placeholder": "Introduce una nueva clave API",
"settings.speech.apiKey.storedNote": "Hay una clave API guardada y oculta. Introduce un nuevo valor para reemplazarla o deja el campo vacío para conservarla.",
"settings.speech.apiKey.clearAction": "Borrar clave guardada",
"settings.speech.apiKey.clearPending": "La clave API guardada se eliminará al guardar.",
"settings.speech.baseUrl.title": "Base URL",
"settings.speech.baseUrl.subtitle": "Anulación opcional para endpoints de voz compatibles con OpenAI.",
"settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
"settings.speech.sttModel.title": "Modelo de transcripción",
"settings.speech.sttModel.subtitle": "Modelo usado para las solicitudes de voz a texto en el prompt.",
"settings.speech.ttsModel.title": "Modelo de voz",
"settings.speech.ttsModel.subtitle": "Modelo predeterminado de texto a voz reservado para futuras funciones de reproducción.",
"settings.speech.ttsVoice.title": "Voz predeterminada",
"settings.speech.ttsVoice.subtitle": "Voz predeterminada de texto a voz reservada para futuras funciones de reproducción.",
"settings.speech.help": "La entrada de voz del prompt solo aparece cuando la transcripción de voz está configurada y este navegador la admite.",
"settings.speech.save.action": "Guardar",
"settings.speech.save.saving": "Guardando...",
"settings.speech.save.saved": "Guardado",
"settings.speech.save.unsaved": "Cambios sin guardar",
"settings.speech.save.error": "Error al guardar",
} as const

View File

@@ -140,4 +140,11 @@ export const messagingMessages = {
"promptInput.send.ariaLabel": "Envoyer le message",
"promptInput.send.errorFallback": "Impossible d'envoyer le message",
"promptInput.send.errorTitle": "Échec de l'envoi",
"promptInput.voiceInput.start.title": "Start voice input",
"promptInput.voiceInput.stop.title": "Stop recording and transcribe",
"promptInput.voiceInput.transcribing.title": "Transcribing audio",
"promptInput.voiceInput.error.title": "Voice input failed",
"promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
"promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
"promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
} as const

View File

@@ -65,6 +65,7 @@ export const settingsMessages = {
"settings.nav.appearance": "Appearance",
"settings.nav.notifications": "Notifications",
"settings.nav.remote": "Remote Access",
"settings.nav.speech": "Speech",
"settings.nav.opencode": "OpenCode",
"settings.scope.device": "This device",
"settings.scope.server": "Server setting",
@@ -137,6 +138,38 @@ export const settingsMessages = {
"settings.behavior.usageMetrics.subtitle": "Afficher ou masquer les stats de tokens et de cout pour les messages de l'assistant.",
"settings.behavior.autoCleanup.title": "Nettoyage auto des sessions vides",
"settings.behavior.autoCleanup.subtitle": "Nettoyer automatiquement les sessions vides lors de la creation de nouvelles.",
"settings.behavior.promptVoiceInput.title": "Prompt voice input",
"settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
"settings.behavior.promptSubmit.title": "Entrer pour envoyer",
"settings.behavior.promptSubmit.subtitle": "Utiliser Entrer pour envoyer; Cmd/Ctrl+Entrer insere une nouvelle ligne.",
"settings.speech.title": "Voix",
"settings.speech.subtitle": "Configurez dès maintenant la reconnaissance vocale et préparez la synthèse vocale pour de futures fonctionnalités.",
"settings.speech.provider.title": "Fournisseur",
"settings.speech.provider.subtitle": "Les requêtes vocales utilisent l'adaptateur vocal côté serveur.",
"settings.speech.provider.openaiCompatible": "OpenAI-compatible",
"settings.speech.status.loading": "Vérification de la configuration...",
"settings.speech.status.configured": "Configuré",
"settings.speech.status.missing": "Clé API manquante",
"settings.speech.status.error": "Service vocal indisponible",
"settings.speech.apiKey.title": "API key",
"settings.speech.apiKey.subtitle": "Utilisée pour les requêtes vocales gérées par CodeNomad.",
"settings.speech.apiKey.placeholder": "Saisissez une nouvelle clé API",
"settings.speech.apiKey.storedNote": "Une clé API enregistrée est masquée. Saisissez une nouvelle valeur pour la remplacer ou laissez le champ vide pour la conserver.",
"settings.speech.apiKey.clearAction": "Effacer la clé enregistrée",
"settings.speech.apiKey.clearPending": "La clé API enregistrée sera supprimée lors de l'enregistrement.",
"settings.speech.baseUrl.title": "Base URL",
"settings.speech.baseUrl.subtitle": "Remplacement facultatif des points d'accès vocaux compatibles OpenAI.",
"settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
"settings.speech.sttModel.title": "Modèle de transcription",
"settings.speech.sttModel.subtitle": "Modèle utilisé pour les requêtes vocales vers texte du prompt.",
"settings.speech.ttsModel.title": "Modèle vocal",
"settings.speech.ttsModel.subtitle": "Modèle de synthèse vocale par défaut réservé aux futures fonctions de lecture.",
"settings.speech.ttsVoice.title": "Voix par défaut",
"settings.speech.ttsVoice.subtitle": "Voix de synthèse vocale par défaut réservée aux futures fonctions de lecture.",
"settings.speech.help": "La saisie vocale du prompt n'apparaît que lorsque la transcription vocale est configurée et prise en charge par ce navigateur.",
"settings.speech.save.action": "Enregistrer",
"settings.speech.save.saving": "Enregistrement...",
"settings.speech.save.saved": "Enregistré",
"settings.speech.save.unsaved": "Modifications non enregistrées",
"settings.speech.save.error": "Échec de l'enregistrement",
} as const

View File

@@ -140,4 +140,11 @@ export const messagingMessages = {
"promptInput.send.ariaLabel": "メッセージを送信",
"promptInput.send.errorFallback": "メッセージの送信に失敗しました",
"promptInput.send.errorTitle": "送信に失敗",
"promptInput.voiceInput.start.title": "Start voice input",
"promptInput.voiceInput.stop.title": "Stop recording and transcribe",
"promptInput.voiceInput.transcribing.title": "Transcribing audio",
"promptInput.voiceInput.error.title": "Voice input failed",
"promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
"promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
"promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
} as const

View File

@@ -65,6 +65,7 @@ export const settingsMessages = {
"settings.nav.appearance": "Appearance",
"settings.nav.notifications": "Notifications",
"settings.nav.remote": "Remote Access",
"settings.nav.speech": "Speech",
"settings.nav.opencode": "OpenCode",
"settings.scope.device": "This device",
"settings.scope.server": "Server setting",
@@ -137,6 +138,38 @@ export const settingsMessages = {
"settings.behavior.usageMetrics.subtitle": "アシスタントのメッセージにトークン数とコストの統計を表示/非表示にします。",
"settings.behavior.autoCleanup.title": "空のセッションを自動クリーンアップ",
"settings.behavior.autoCleanup.subtitle": "新しいセッション作成時に空のセッションを自動的にクリーンアップします。",
"settings.behavior.promptVoiceInput.title": "Prompt voice input",
"settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
"settings.behavior.promptSubmit.title": "Enterで送信",
"settings.behavior.promptSubmit.subtitle": "Enterで送信し、Cmd/Ctrl+Enterで改行します。",
"settings.speech.title": "音声",
"settings.speech.subtitle": "今すぐ音声入力を設定し、今後の機能のために音声合成の基盤も準備します。",
"settings.speech.provider.title": "プロバイダー",
"settings.speech.provider.subtitle": "音声リクエストはサーバー側の音声アダプターを使用します。",
"settings.speech.provider.openaiCompatible": "OpenAI-compatible",
"settings.speech.status.loading": "設定を確認しています...",
"settings.speech.status.configured": "設定済み",
"settings.speech.status.missing": "APIキーがありません",
"settings.speech.status.error": "音声サービスを利用できません",
"settings.speech.apiKey.title": "API key",
"settings.speech.apiKey.subtitle": "CodeNomadが管理する音声リクエストに使用されます。",
"settings.speech.apiKey.placeholder": "新しいAPIキーを入力",
"settings.speech.apiKey.storedNote": "保存済みのAPIキーは非表示になっています。置き換えるには新しい値を入力し、そのまま使うには空欄のままにしてください。",
"settings.speech.apiKey.clearAction": "保存済みキーを削除",
"settings.speech.apiKey.clearPending": "保存すると、保存済みのAPIキーは削除されます。",
"settings.speech.baseUrl.title": "Base URL",
"settings.speech.baseUrl.subtitle": "OpenAI互換の音声エンドポイント用の任意の上書き設定です。",
"settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
"settings.speech.sttModel.title": "文字起こしモデル",
"settings.speech.sttModel.subtitle": "プロンプトの音声入力を文字起こしする際に使用するモデルです。",
"settings.speech.ttsModel.title": "音声モデル",
"settings.speech.ttsModel.subtitle": "将来の再生機能のために予約されている既定の音声合成モデルです。",
"settings.speech.ttsVoice.title": "既定の音声",
"settings.speech.ttsVoice.subtitle": "将来の再生機能のために予約されている既定の音声合成ボイスです。",
"settings.speech.help": "プロンプト音声入力は、音声文字起こしが設定され、このブラウザーでサポートされている場合にのみ表示されます。",
"settings.speech.save.action": "保存",
"settings.speech.save.saving": "保存中...",
"settings.speech.save.saved": "保存済み",
"settings.speech.save.unsaved": "未保存の変更",
"settings.speech.save.error": "保存に失敗しました",
} as const

View File

@@ -140,4 +140,11 @@ export const messagingMessages = {
"promptInput.send.ariaLabel": "Отправить сообщение",
"promptInput.send.errorFallback": "Не удалось отправить сообщение",
"promptInput.send.errorTitle": "Не удалось отправить",
"promptInput.voiceInput.start.title": "Start voice input",
"promptInput.voiceInput.stop.title": "Stop recording and transcribe",
"promptInput.voiceInput.transcribing.title": "Transcribing audio",
"promptInput.voiceInput.error.title": "Voice input failed",
"promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
"promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
"promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
} as const

View File

@@ -65,6 +65,7 @@ export const settingsMessages = {
"settings.nav.appearance": "Appearance",
"settings.nav.notifications": "Notifications",
"settings.nav.remote": "Remote Access",
"settings.nav.speech": "Speech",
"settings.nav.opencode": "OpenCode",
"settings.scope.device": "This device",
"settings.scope.server": "Server setting",
@@ -137,6 +138,38 @@ export const settingsMessages = {
"settings.behavior.usageMetrics.subtitle": "Показывать или скрывать статистику токенов и стоимости в сообщениях ассистента.",
"settings.behavior.autoCleanup.title": "Автоочистка пустых сессий",
"settings.behavior.autoCleanup.subtitle": "Автоматически очищать пустые сессии при создании новых.",
"settings.behavior.promptVoiceInput.title": "Prompt voice input",
"settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
"settings.behavior.promptSubmit.title": "Enter для отправки",
"settings.behavior.promptSubmit.subtitle": "Enter отправляет; Cmd/Ctrl+Enter вставляет новую строку.",
"settings.speech.title": "Речь",
"settings.speech.subtitle": "Настройте преобразование речи в текст сейчас и подготовьте основу для синтеза речи в будущих функциях.",
"settings.speech.provider.title": "Провайдер",
"settings.speech.provider.subtitle": "Речевые запросы используют серверный речевой адаптер.",
"settings.speech.provider.openaiCompatible": "OpenAI-compatible",
"settings.speech.status.loading": "Проверка конфигурации...",
"settings.speech.status.configured": "Настроено",
"settings.speech.status.missing": "Отсутствует API-ключ",
"settings.speech.status.error": "Речевой сервис недоступен",
"settings.speech.apiKey.title": "API key",
"settings.speech.apiKey.subtitle": "Используется для речевых запросов, управляемых CodeNomad.",
"settings.speech.apiKey.placeholder": "Введите новый API-ключ",
"settings.speech.apiKey.storedNote": "Сохранённый API-ключ скрыт. Введите новое значение, чтобы заменить его, или оставьте поле пустым, чтобы сохранить текущий ключ.",
"settings.speech.apiKey.clearAction": "Удалить сохранённый ключ",
"settings.speech.apiKey.clearPending": "Сохранённый API-ключ будет удалён после сохранения.",
"settings.speech.baseUrl.title": "Base URL",
"settings.speech.baseUrl.subtitle": "Необязательная переопределяющая ссылка для речевых endpoint'ов, совместимых с OpenAI.",
"settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
"settings.speech.sttModel.title": "Модель распознавания",
"settings.speech.sttModel.subtitle": "Модель, используемая для преобразования голосового ввода в тексте запроса.",
"settings.speech.ttsModel.title": "Речевая модель",
"settings.speech.ttsModel.subtitle": "Модель синтеза речи по умолчанию, зарезервированная для будущих функций воспроизведения.",
"settings.speech.ttsVoice.title": "Голос по умолчанию",
"settings.speech.ttsVoice.subtitle": "Голос синтеза речи по умолчанию, зарезервированный для будущих функций воспроизведения.",
"settings.speech.help": "Голосовой ввод в поле запроса появляется только если распознавание речи настроено и поддерживается этим браузером.",
"settings.speech.save.action": "Сохранить",
"settings.speech.save.saving": "Сохранение...",
"settings.speech.save.saved": "Сохранено",
"settings.speech.save.unsaved": "Есть несохранённые изменения",
"settings.speech.save.error": "Не удалось сохранить",
} as const

View File

@@ -140,4 +140,11 @@ export const messagingMessages = {
"promptInput.send.ariaLabel": "发送消息",
"promptInput.send.errorFallback": "发送消息失败",
"promptInput.send.errorTitle": "发送失败",
"promptInput.voiceInput.start.title": "Start voice input",
"promptInput.voiceInput.stop.title": "Stop recording and transcribe",
"promptInput.voiceInput.transcribing.title": "Transcribing audio",
"promptInput.voiceInput.error.title": "Voice input failed",
"promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
"promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
"promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
} as const

View File

@@ -65,6 +65,7 @@ export const settingsMessages = {
"settings.nav.appearance": "Appearance",
"settings.nav.notifications": "Notifications",
"settings.nav.remote": "Remote Access",
"settings.nav.speech": "Speech",
"settings.nav.opencode": "OpenCode",
"settings.scope.device": "This device",
"settings.scope.server": "Server setting",
@@ -137,6 +138,38 @@ export const settingsMessages = {
"settings.behavior.usageMetrics.subtitle": "显示或隐藏助手消息的令牌与成本统计。",
"settings.behavior.autoCleanup.title": "自动清理空会话",
"settings.behavior.autoCleanup.subtitle": "创建新会话时自动清理空会话。",
"settings.behavior.promptVoiceInput.title": "Prompt voice input",
"settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
"settings.behavior.promptSubmit.title": "回车发送",
"settings.behavior.promptSubmit.subtitle": "使用回车发送Cmd/Ctrl+回车插入新行。",
"settings.speech.title": "语音",
"settings.speech.subtitle": "立即配置语音转文字,并为后续功能预留文字转语音基础。",
"settings.speech.provider.title": "提供商",
"settings.speech.provider.subtitle": "语音请求使用服务器端语音适配器。",
"settings.speech.provider.openaiCompatible": "OpenAI-compatible",
"settings.speech.status.loading": "正在检查配置...",
"settings.speech.status.configured": "已配置",
"settings.speech.status.missing": "缺少 API 密钥",
"settings.speech.status.error": "语音服务不可用",
"settings.speech.apiKey.title": "API key",
"settings.speech.apiKey.subtitle": "用于 CodeNomad 管理的语音请求。",
"settings.speech.apiKey.placeholder": "输入新的 API 密钥",
"settings.speech.apiKey.storedNote": "已保存的 API 密钥会被隐藏。输入新值可替换它,留空则保留当前密钥。",
"settings.speech.apiKey.clearAction": "清除已保存的密钥",
"settings.speech.apiKey.clearPending": "保存后将删除已保存的 API 密钥。",
"settings.speech.baseUrl.title": "Base URL",
"settings.speech.baseUrl.subtitle": "可选,用于覆盖 OpenAI 兼容语音端点的基础地址。",
"settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
"settings.speech.sttModel.title": "转写模型",
"settings.speech.sttModel.subtitle": "用于提示框语音转文字请求的模型。",
"settings.speech.ttsModel.title": "语音模型",
"settings.speech.ttsModel.subtitle": "为未来播放功能预留的默认文字转语音模型。",
"settings.speech.ttsVoice.title": "默认语音",
"settings.speech.ttsVoice.subtitle": "为未来播放功能预留的默认文字转语音音色。",
"settings.speech.help": "只有在语音转写已配置且当前浏览器支持时,提示框语音输入才会显示。",
"settings.speech.save.action": "保存",
"settings.speech.save.saving": "保存中...",
"settings.speech.save.saved": "已保存",
"settings.speech.save.unsaved": "有未保存的更改",
"settings.speech.save.error": "保存失败",
} as const

View File

@@ -42,6 +42,7 @@ export type BehaviorRegistryActions = {
toggleUsageMetrics: () => void
toggleAutoCleanupBlankSessions: () => void
togglePromptSubmitOnEnter: () => void
toggleShowPromptVoiceInput: () => void
setDiffViewMode: (mode: "split" | "unified") => void
setToolOutputExpansion: (mode: ExpansionPreference) => void
setDiagnosticsExpansion: (mode: ExpansionPreference) => void
@@ -248,6 +249,24 @@ export function getBehaviorSettings(actions: BehaviorRegistryActions): BehaviorS
)
},
},
{
kind: "toggle",
id: "behavior.promptVoiceInput",
titleKey: "settings.behavior.promptVoiceInput.title",
subtitleKey: "settings.behavior.promptVoiceInput.subtitle",
get: (p) => Boolean(p.showPromptVoiceInput ?? true),
set: (next) => {
if (updatePreferences) {
updatePreferences({ showPromptVoiceInput: next })
return
}
setBooleanByToggle(
() => Boolean(prefs().showPromptVoiceInput ?? true),
actions.toggleShowPromptVoiceInput,
next,
)
},
},
{
kind: "toggle",
id: "behavior.promptSubmitOnEnter",

View File

@@ -7,6 +7,7 @@ import {
updateInstanceConfig as updateInstanceData,
} from "./instance-config"
import { getLogger } from "../lib/logger"
import { loadSpeechCapabilities, resetSpeechCapabilities } from "./speech"
const log = getLogger("actions")
@@ -27,6 +28,21 @@ export type DiffViewMode = "split" | "unified"
export type ExpansionPreference = "expanded" | "collapsed"
export type ToolInputsVisibilityPreference = "hidden" | "collapsed" | "expanded"
export type ListeningMode = "local" | "all"
export type SpeechProviderPreference = "openai-compatible"
export interface SpeechSettings {
provider: SpeechProviderPreference
apiKey?: string
hasApiKey: boolean
baseUrl?: string
sttModel: string
ttsModel: string
ttsVoice: string
}
export type SpeechSettingsUpdate = Partial<Omit<SpeechSettings, "apiKey">> & {
apiKey?: string | null
}
export interface UiSettings {
showThinkingBlocks: boolean
@@ -34,6 +50,7 @@ export interface UiSettings {
thinkingBlocksExpansion: ExpansionPreference
showTimelineTools: boolean
promptSubmitOnEnter: boolean
showPromptVoiceInput: boolean
locale?: string
diffViewMode: DiffViewMode
toolOutputExpansion: ExpansionPreference
@@ -75,6 +92,7 @@ interface ServerConfigBucket {
listeningMode?: ListeningMode
environmentVariables?: Record<string, string>
opencodeBinary?: string
speech?: Partial<SpeechSettings>
}
interface UiStateBucket {
@@ -107,6 +125,7 @@ const defaultUiSettings: UiSettings = {
thinkingBlocksExpansion: "expanded",
showTimelineTools: true,
promptSubmitOnEnter: false,
showPromptVoiceInput: true,
diffViewMode: "split",
toolOutputExpansion: "expanded",
diagnosticsExpansion: "expanded",
@@ -120,6 +139,14 @@ const defaultUiSettings: UiSettings = {
notifyOnIdle: true,
}
const defaultSpeechSettings: SpeechSettings = {
provider: "openai-compatible",
hasApiKey: false,
sttModel: "gpt-4o-mini-transcribe",
ttsModel: "gpt-4o-mini-tts",
ttsVoice: "alloy",
}
function normalizeUiSettings(input?: Partial<UiSettings> | null): UiSettings {
const sanitized = input ?? {}
return {
@@ -129,6 +156,7 @@ function normalizeUiSettings(input?: Partial<UiSettings> | null): UiSettings {
thinkingBlocksExpansion: sanitized.thinkingBlocksExpansion ?? defaultUiSettings.thinkingBlocksExpansion,
showTimelineTools: sanitized.showTimelineTools ?? defaultUiSettings.showTimelineTools,
promptSubmitOnEnter: sanitized.promptSubmitOnEnter ?? defaultUiSettings.promptSubmitOnEnter,
showPromptVoiceInput: sanitized.showPromptVoiceInput ?? defaultUiSettings.showPromptVoiceInput,
locale: sanitized.locale ?? defaultUiSettings.locale,
diffViewMode: sanitized.diffViewMode ?? defaultUiSettings.diffViewMode,
toolOutputExpansion: sanitized.toolOutputExpansion ?? defaultUiSettings.toolOutputExpansion,
@@ -156,6 +184,28 @@ function normalizeRecord(value: unknown): Record<string, string> {
return out
}
function normalizeSpeechSettings(input?: Partial<SpeechSettings> | null): SpeechSettings {
const sanitized = input ?? {}
return {
provider: sanitized.provider === "openai-compatible" ? sanitized.provider : defaultSpeechSettings.provider,
apiKey: typeof sanitized.apiKey === "string" && sanitized.apiKey.trim() ? sanitized.apiKey.trim() : undefined,
hasApiKey: sanitized.hasApiKey === true || (typeof sanitized.apiKey === "string" && sanitized.apiKey.trim().length > 0),
baseUrl: typeof sanitized.baseUrl === "string" && sanitized.baseUrl.trim() ? sanitized.baseUrl.trim() : undefined,
sttModel:
typeof sanitized.sttModel === "string" && sanitized.sttModel.trim()
? sanitized.sttModel.trim()
: defaultSpeechSettings.sttModel,
ttsModel:
typeof sanitized.ttsModel === "string" && sanitized.ttsModel.trim()
? sanitized.ttsModel.trim()
: defaultSpeechSettings.ttsModel,
ttsVoice:
typeof sanitized.ttsVoice === "string" && sanitized.ttsVoice.trim()
? sanitized.ttsVoice.trim()
: defaultSpeechSettings.ttsVoice,
}
}
function cloneArray<T>(value: unknown, mapper: (item: any) => T | null): T[] {
if (!Array.isArray(value)) return []
const out: T[] = []
@@ -206,12 +256,15 @@ function normalizeUiState(input?: UiStateBucket | null): NormalizedUiState {
}
}
function normalizeServerConfig(input?: ServerConfigBucket | null): Required<Pick<ServerConfigBucket, "listeningMode" | "environmentVariables" | "opencodeBinary">> {
function normalizeServerConfig(
input?: ServerConfigBucket | null,
): Required<Pick<ServerConfigBucket, "listeningMode" | "environmentVariables" | "opencodeBinary">> & { speech: SpeechSettings } {
const source = input ?? {}
const listeningMode = source.listeningMode === "all" ? "all" : "local"
const opencodeBinary = typeof source.opencodeBinary === "string" && source.opencodeBinary.trim() ? source.opencodeBinary : "opencode"
const environmentVariables = normalizeRecord(source.environmentVariables)
return { listeningMode, opencodeBinary, environmentVariables }
const speech = normalizeSpeechSettings(source.speech)
return { listeningMode, opencodeBinary, environmentVariables, speech }
}
function getModelKey(model: { providerId: string; modelId: string }): string {
@@ -342,6 +395,27 @@ function updateLastUsedBinary(path: string): void {
void patchStateOwner("ui", { opencodeBinaries: nextList }).catch((error) => log.error("Failed to update binary list", error))
}
async function updateSpeechSettings(updates: SpeechSettingsUpdate): Promise<void> {
const apiKeyPatch = updates.apiKey
const { apiKey: _apiKey, ...restUpdates } = updates
const next = normalizeSpeechSettings({
...serverSettings().speech,
...restUpdates,
...(apiKeyPatch === null ? {} : { apiKey: apiKeyPatch }),
})
const { hasApiKey: _hasApiKey, ...persistedSpeech } = next
const patch = {
...persistedSpeech,
...(apiKeyPatch === null ? { apiKey: null } : {}),
}
try {
await patchConfigOwner("server", { speech: patch })
} catch (error) {
log.error("Failed to update speech settings", error)
throw error
}
}
function addOpenCodeBinary(path: string, version?: string): void {
const nextList = buildBinaryList(path, version, opencodeBinaries())
void patchStateOwner("ui", { opencodeBinaries: nextList }).catch((error) => log.error("Failed to add binary", error))
@@ -476,6 +550,10 @@ function togglePromptSubmitOnEnter(): void {
updateUiSettings({ promptSubmitOnEnter: !preferences().promptSubmitOnEnter })
}
function toggleShowPromptVoiceInput(): void {
updateUiSettings({ showPromptVoiceInput: !preferences().showPromptVoiceInput })
}
function toggleAutoCleanupBlankSessions(): void {
const nextValue = !preferences().autoCleanupBlankSessions
log.info("toggle auto cleanup", { value: nextValue })
@@ -521,6 +599,7 @@ interface ConfigContextValue {
addEnvironmentVariable: typeof addEnvironmentVariable
removeEnvironmentVariable: typeof removeEnvironmentVariable
updateLastUsedBinary: typeof updateLastUsedBinary
updateSpeechSettings: typeof updateSpeechSettings
// ui-owned state
recentFolders: typeof recentFolders
@@ -544,6 +623,7 @@ interface ConfigContextValue {
toggleUsageMetrics: typeof toggleUsageMetrics
toggleAutoCleanupBlankSessions: typeof toggleAutoCleanupBlankSessions
togglePromptSubmitOnEnter: typeof togglePromptSubmitOnEnter
toggleShowPromptVoiceInput: typeof toggleShowPromptVoiceInput
setDiffViewMode: typeof setDiffViewMode
setToolOutputExpansion: typeof setToolOutputExpansion
setDiagnosticsExpansion: typeof setDiagnosticsExpansion
@@ -569,6 +649,7 @@ const configContextValue: ConfigContextValue = {
addEnvironmentVariable,
removeEnvironmentVariable,
updateLastUsedBinary,
updateSpeechSettings,
recentFolders,
opencodeBinaries,
uiState,
@@ -588,6 +669,7 @@ const configContextValue: ConfigContextValue = {
toggleUsageMetrics,
toggleAutoCleanupBlankSessions,
togglePromptSubmitOnEnter,
toggleShowPromptVoiceInput,
setDiffViewMode,
setToolOutputExpansion,
setDiagnosticsExpansion,
@@ -610,6 +692,8 @@ export const ConfigProvider: ParentComponent = (props) => {
const unsubServer = storage.onConfigOwnerChanged("server", (bucket) => {
setServerConfigBucket(bucket as any)
setIsLoaded(true)
resetSpeechCapabilities()
void loadSpeechCapabilities(true)
})
const unsubStateUi = storage.onStateOwnerChanged("ui", (bucket) => {
setUiStateBucket(bucket as any)
@@ -648,6 +732,7 @@ export {
addEnvironmentVariable,
removeEnvironmentVariable,
updateLastUsedBinary,
updateSpeechSettings,
addRecentFolder,
removeRecentFolder,
addOpenCodeBinary,
@@ -664,6 +749,7 @@ export {
toggleUsageMetrics,
toggleAutoCleanupBlankSessions,
togglePromptSubmitOnEnter,
toggleShowPromptVoiceInput,
setDiffViewMode,
setToolOutputExpansion,
setDiagnosticsExpansion,

View File

@@ -1,6 +1,6 @@
import { createSignal } from "solid-js"
export type SettingsSectionId = "appearance" | "notifications" | "remote" | "opencode"
export type SettingsSectionId = "appearance" | "notifications" | "remote" | "speech" | "opencode"
const [settingsOpen, setSettingsOpen] = createSignal(false)
const [activeSettingsSection, setActiveSettingsSection] = createSignal<SettingsSectionId>("appearance")

View File

@@ -0,0 +1,46 @@
import { createSignal } from "solid-js"
import type { SpeechCapabilitiesResponse } from "../../../server/src/api-types"
import { serverApi } from "../lib/api-client"
import { getLogger } from "../lib/logger"
const log = getLogger("api")
const [speechCapabilities, setSpeechCapabilities] = createSignal<SpeechCapabilitiesResponse | null>(null)
const [speechCapabilitiesLoading, setSpeechCapabilitiesLoading] = createSignal(false)
const [speechCapabilitiesError, setSpeechCapabilitiesError] = createSignal<string | null>(null)
let speechCapabilitiesPromise: Promise<SpeechCapabilitiesResponse | null> | null = null
async function loadSpeechCapabilities(force = false): Promise<SpeechCapabilitiesResponse | null> {
if (!force && speechCapabilities()) return speechCapabilities()
if (speechCapabilitiesPromise) return speechCapabilitiesPromise
setSpeechCapabilitiesLoading(true)
setSpeechCapabilitiesError(null)
speechCapabilitiesPromise = serverApi
.fetchSpeechCapabilities()
.then((result) => {
setSpeechCapabilities(result)
setSpeechCapabilitiesError(null)
return result
})
.catch((error) => {
log.error("Failed to load speech capabilities", error)
setSpeechCapabilities(null)
setSpeechCapabilitiesError(error instanceof Error ? error.message : String(error))
return null
})
.finally(() => {
setSpeechCapabilitiesLoading(false)
speechCapabilitiesPromise = null
})
return speechCapabilitiesPromise
}
function resetSpeechCapabilities(): void {
setSpeechCapabilities(null)
setSpeechCapabilitiesError(null)
}
export { speechCapabilities, speechCapabilitiesLoading, speechCapabilitiesError, loadSpeechCapabilities, resetSpeechCapabilities }

View File

@@ -36,8 +36,8 @@
.prompt-input {
@apply w-full pt-2.5 border text-sm resize-none outline-none transition-colors;
padding-inline-start: 2.5rem;
padding-inline-end: 0.75rem;
padding-inline-start: 0.75rem;
padding-inline-end: 5.5rem;
font-family: inherit;
background-color: var(--surface-base);
color: var(--text-primary);
@@ -83,21 +83,27 @@
color: var(--text-primary);
}
/* Navigation buttons container (expand, prev, next).
Intentionally at inline-start (left in LTR, right in RTL) so buttons never overlap
the scrollbar, which browsers always place at inline-end. */
/* Navigation buttons container (expand, prev, next). */
.prompt-nav-buttons {
position: absolute;
top: 0.25rem;
inset-inline-start: 0.25rem;
inset-inline-end: 0.25rem;
bottom: 0.25rem;
display: flex;
flex-direction: column;
align-items: flex-end;
justify-content: flex-start;
gap: 0.125rem;
z-index: 2;
}
.prompt-nav-top-row {
display: flex;
align-items: center;
justify-content: flex-end;
gap: 0.125rem;
}
.prompt-expand-button,
.prompt-history-button {
@apply w-7 h-7 flex items-center justify-center rounded-md;
@@ -179,6 +185,53 @@
color: var(--button-danger-text, var(--text-inverted, #ffffff));
}
.prompt-voice-button {
@apply h-10 rounded-md border-none cursor-pointer flex items-center justify-center transition-all flex-shrink-0;
min-width: 2.5rem;
background-color: color-mix(in oklab, var(--surface-secondary) 82%, var(--surface-base));
color: var(--text-secondary);
}
.prompt-voice-button:hover:not(:disabled) {
color: var(--text-primary);
background-color: color-mix(in oklab, var(--accent-primary) 12%, var(--surface-secondary));
@apply scale-105;
}
.prompt-voice-button:active:not(:disabled) {
@apply scale-95;
}
.prompt-voice-button.is-recording {
min-width: 3.5rem;
background-color: color-mix(in oklab, var(--button-danger-bg, rgba(239, 68, 68, 0.85)) 88%, white 12%);
color: var(--button-danger-text, var(--text-inverted, #ffffff));
}
.prompt-nav-voice-button {
min-width: 1.75rem;
width: 1.75rem;
height: 1.75rem;
border-radius: 0.375rem;
}
.prompt-nav-voice-button.is-recording {
min-width: 3.5rem;
width: auto;
}
.prompt-voice-button:disabled {
@apply opacity-50 cursor-not-allowed;
}
.prompt-voice-timer {
font-size: 0.68rem;
font-variant-numeric: tabular-nums;
font-weight: 600;
line-height: 1;
color: currentColor;
}
.stop-button:hover:not(:disabled) {
background-color: var(--button-danger-hover-bg, rgba(239, 68, 68, 0.9));
@apply opacity-95 scale-105;
@@ -344,7 +397,7 @@
.prompt-input {
min-height: 0;
padding: 0.5rem 0.75rem;
padding-inline-start: 2.5rem; /* preserve space for nav buttons */
padding-inline-end: 5.5rem;
padding-bottom: 0.75rem;
}