feat(speech): make prompt input push to talk

Revert "feat(speech): add realtime prompt dictation support"
This reverts commit f9b5e2b529.
2026-03-24 22:42:27 +00:00 · 2026-03-24 20:52:04 +00:00 · 2026-03-19 11:32:45 +00:00 · 2026-03-13 08:34:34 +00:00 · 2026-03-12 22:04:57 +00:00
34 changed files with 1331 additions and 9 deletions
--- a/package-lock.json
+++ b/package-lock.json
@@ -8231,6 +8231,27 @@
        "regex-recursion": "^6.0.2"
      }
    },
    "node_modules/openai": {
      "version": "6.27.0",
      "resolved": "https://registry.npmjs.org/openai/-/openai-6.27.0.tgz",
      "integrity": "sha512-osTKySlrdYrLYTt0zjhY8yp0JUBmWDCN+Q+QxsV4xMQnnoVFpylgKGgxwN8sSdTNw0G4y+WUXs4eCMWpyDNWZQ==",
      "license": "Apache-2.0",
      "bin": {
        "openai": "bin/cli"
      },
      "peerDependencies": {
        "ws": "^8.18.0",
        "zod": "^3.25 || ^4.0"
      },
      "peerDependenciesMeta": {
        "ws": {
          "optional": true
        },
        "zod": {
          "optional": true
        }
      }
    },
    "node_modules/own-keys": {
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/own-keys/-/own-keys-1.0.1.tgz",
@@ -11988,6 +12009,7 @@
    "node_modules/zod": {
      "version": "3.25.76",
      "license": "MIT",
      "peer": true,
      "funding": {
        "url": "https://github.com/sponsors/colinhacks"
      }
@@ -12049,6 +12071,7 @@
        "fastify": "^4.28.1",
        "fuzzysort": "^2.0.4",
        "node-forge": "^1.3.3",
        "openai": "^6.27.0",
        "pino": "^9.4.0",
        "undici": "^6.19.8",
        "yaml": "^2.4.2",
--- a/packages/server/package.json
+++ b/packages/server/package.json
@@ -32,6 +32,7 @@
    "fastify": "^4.28.1",
    "fuzzysort": "^2.0.4",
    "node-forge": "^1.3.3",
    "openai": "^6.27.0",
    "pino": "^9.4.0",
    "undici": "^6.19.8",
    "yaml": "^2.4.2",
--- a/packages/server/src/api-types.ts
+++ b/packages/server/src/api-types.ts
@@ -207,6 +207,36 @@ export interface BinaryValidationResult {
  error?: string
 }
 export interface SpeechSegment {
  startMs: number
  endMs: number
  text: string
 }
 export interface SpeechCapabilitiesResponse {
  available: boolean
  configured: boolean
  provider: string
  supportsStt: boolean
  supportsTts: boolean
  baseUrl?: string
  sttModel: string
  ttsModel: string
  ttsVoice: string
 }
 export interface SpeechTranscriptionResponse {
  text: string
  language?: string
  durationMs?: number
  segments?: SpeechSegment[]
 }
 export interface SpeechSynthesisResponse {
  audioBase64: string
  mimeType: string
 }
 export type WorkspaceEventType =
  | "workspace.created"
  | "workspace.started"
--- a/packages/server/src/index.ts
+++ b/packages/server/src/index.ts
@@ -23,6 +23,7 @@ import { AuthManager, BOOTSTRAP_TOKEN_STDOUT_PREFIX, DEFAULT_AUTH_USERNAME } fro
 import { resolveHttpsOptions } from "./server/tls"
 import { resolveNetworkAddresses } from "./server/network-addresses"
 import { startDevReleaseMonitor } from "./releases/dev-release-monitor"
 import { SpeechService } from "./speech/service"
 const require = createRequire(import.meta.url)
@@ -304,6 +305,7 @@ async function main() {
  })
  const fileSystemBrowser = new FileSystemBrowser({ rootDir: options.rootDir, unrestricted: options.unrestrictedRoot })
  const instanceStore = new InstanceStore(configLocation.instancesDir)
  const speechService = new SpeechService(settings, logger.child({ component: "speech" }))
  const instanceEventBridge = new InstanceEventBridge({
    workspaceManager,
    eventBus,
@@ -388,6 +390,7 @@ async function main() {
        eventBus,
        serverMeta,
        instanceStore,
        speechService,
        authManager,
        uiStaticDir: uiResolution.uiStaticDir ?? DEFAULT_UI_STATIC_DIR,
        uiDevServerUrl: uiResolution.uiDevServerUrl,
@@ -408,6 +411,7 @@ async function main() {
        eventBus,
        serverMeta,
        instanceStore,
        speechService,
        authManager,
        uiStaticDir: uiResolution.uiStaticDir ?? DEFAULT_UI_STATIC_DIR,
        uiDevServerUrl: undefined,
--- a/packages/server/src/server/http-server.ts
+++ b/packages/server/src/server/http-server.ts
@@ -21,12 +21,14 @@ import { registerStorageRoutes } from "./routes/storage"
 import { registerPluginRoutes } from "./routes/plugin"
 import { registerBackgroundProcessRoutes } from "./routes/background-processes"
 import { registerWorktreeRoutes } from "./routes/worktrees"
 import { registerSpeechRoutes } from "./routes/speech"
 import { ServerMeta } from "../api-types"
 import { InstanceStore } from "../storage/instance-store"
 import { BackgroundProcessManager } from "../background-processes/manager"
 import type { AuthManager } from "../auth/manager"
 import { registerAuthRoutes } from "./routes/auth"
 import { sendUnauthorized, wantsHtml } from "../auth/http-auth"
 import type { SpeechService } from "../speech/service"
 interface HttpServerDeps {
  bindHost: string
@@ -41,6 +43,7 @@ interface HttpServerDeps {
  eventBus: EventBus
  serverMeta: ServerMeta
  instanceStore: InstanceStore
  speechService: SpeechService
  authManager: AuthManager
  uiStaticDir: string
  uiDevServerUrl?: string
@@ -252,6 +255,7 @@ export function createHttpServer(deps: HttpServerDeps) {
    eventBus: deps.eventBus,
    workspaceManager: deps.workspaceManager,
  })
  registerSpeechRoutes(app, { speechService: deps.speechService })
  registerPluginRoutes(app, { workspaceManager: deps.workspaceManager, eventBus: deps.eventBus, logger: proxyLogger })
  registerBackgroundProcessRoutes(app, { backgroundProcessManager })
  registerInstanceProxyRoutes(app, { workspaceManager: deps.workspaceManager, logger: proxyLogger })
--- a/packages/server/src/server/routes/speech.ts
+++ b/packages/server/src/server/routes/speech.ts
@@ -0,0 +1,46 @@
 import type { FastifyInstance } from "fastify"
 import { z } from "zod"
 import type { SpeechService } from "../../speech/service"
 interface RouteDeps {
  speechService: SpeechService
 }
 const TranscribeBodySchema = z.object({
  audioBase64: z.string().min(1, "Audio payload is required"),
  mimeType: z.string().min(1, "Audio MIME type is required"),
  filename: z.string().optional(),
  language: z.string().optional(),
  prompt: z.string().optional(),
 })
 const SynthesizeBodySchema = z.object({
  text: z.string().trim().min(1, "Text is required"),
  format: z.enum(["mp3", "wav", "opus"]).optional(),
 })
 export function registerSpeechRoutes(app: FastifyInstance, deps: RouteDeps) {
  app.get("/api/speech/capabilities", async () => deps.speechService.getCapabilities())
  app.post("/api/speech/transcribe", async (request, reply) => {
    try {
      const body = TranscribeBodySchema.parse(request.body ?? {})
      return await deps.speechService.transcribe(body)
    } catch (error) {
      request.log.error({ err: error }, "Failed to transcribe audio")
      reply.code(400)
      return { error: error instanceof Error ? error.message : "Failed to transcribe audio" }
    }
  })
  app.post("/api/speech/synthesize", async (request, reply) => {
    try {
      const body = SynthesizeBodySchema.parse(request.body ?? {})
      return await deps.speechService.synthesize(body)
    } catch (error) {
      request.log.error({ err: error }, "Failed to synthesize audio")
      reply.code(400)
      return { error: error instanceof Error ? error.message : "Failed to synthesize audio" }
    }
  })
 }
--- a/packages/server/src/speech/providers/openai-compatible.ts
+++ b/packages/server/src/speech/providers/openai-compatible.ts
@@ -0,0 +1,148 @@
 import OpenAI from "openai"
 import { toFile } from "openai/uploads"
 import type { SpeechSynthesisResponse, SpeechTranscriptionResponse } from "../../api-types"
 import type { Logger } from "../../logger"
 import type { NormalizedSpeechSettings, SynthesizeSpeechInput, TranscribeAudioInput } from "../service"
 interface OpenAICompatibleSpeechProviderOptions {
  settings: NormalizedSpeechSettings
  logger: Logger
 }
 export class OpenAICompatibleSpeechProvider {
  constructor(private readonly options: OpenAICompatibleSpeechProviderOptions) {}
  getCapabilities() {
    const { settings } = this.options
    return {
      available: true,
      configured: Boolean(settings.apiKey),
      provider: settings.provider,
      supportsStt: true,
      supportsTts: true,
      baseUrl: settings.baseUrl,
      sttModel: settings.sttModel,
      ttsModel: settings.ttsModel,
      ttsVoice: settings.ttsVoice,
    }
  }
  async transcribe(input: TranscribeAudioInput): Promise<SpeechTranscriptionResponse> {
    const client = this.createClient()
    const startedAt = Date.now()
    const extension = extensionForMime(input.mimeType)
    const buffer = Buffer.from(input.audioBase64, "base64")
    const filename = input.filename?.trim() || `prompt-input.${extension}`
    this.options.logger.info(
      {
        mimeType: input.mimeType,
        bytes: buffer.byteLength,
        language: input.language,
        model: this.options.settings.sttModel,
      },
      "speech.transcribe",
    )
    const response = await this.requestTranscription(client, buffer, filename, input)
    return {
      text: typeof response?.text === "string" ? response.text : "",
      language: typeof response?.language === "string" ? response.language : input.language,
      durationMs: Number.isFinite(response?.duration) ? Math.round(Number(response.duration) * 1000) : Date.now() - startedAt,
      segments: Array.isArray(response?.segments)
        ? response.segments
            .filter((segment: any) => typeof segment?.text === "string")
            .map((segment: any) => ({
              startMs: Math.max(0, Math.round(Number(segment.start ?? 0) * 1000)),
              endMs: Math.max(0, Math.round(Number(segment.end ?? 0) * 1000)),
              text: String(segment.text),
            }))
        : undefined,
    }
  }
  private async requestTranscription(
    client: OpenAI,
    buffer: Buffer,
    filename: string,
    input: TranscribeAudioInput,
  ): Promise<any> {
    const baseRequest = {
      model: this.options.settings.sttModel,
      ...(input.language ? { language: input.language } : {}),
      ...(input.prompt ? { prompt: input.prompt } : {}),
    }
    try {
      const file = await toFile(buffer, filename, { type: input.mimeType })
      return (await client.audio.transcriptions.create({
        ...baseRequest,
        file,
        response_format: "verbose_json" as any,
      } as any)) as any
    } catch (error) {
      this.options.logger.warn({ err: error }, "speech.transcribe verbose_json failed; retrying default format")
      const retryFile = await toFile(buffer, filename, { type: input.mimeType })
      return (await client.audio.transcriptions.create({
        ...baseRequest,
        file: retryFile,
      } as any)) as any
    }
  }
  async synthesize(input: SynthesizeSpeechInput): Promise<SpeechSynthesisResponse> {
    const client = this.createClient()
    const format = input.format ?? "mp3"
    this.options.logger.info(
      {
        model: this.options.settings.ttsModel,
        voice: this.options.settings.ttsVoice,
        format,
      },
      "speech.synthesize",
    )
    const response = await client.audio.speech.create({
      model: this.options.settings.ttsModel,
      voice: this.options.settings.ttsVoice as any,
      input: input.text,
      response_format: format as any,
    })
    const audioBuffer = Buffer.from(await response.arrayBuffer())
    return {
      audioBase64: audioBuffer.toString("base64"),
      mimeType: mimeTypeForFormat(format),
    }
  }
  private createClient(): OpenAI {
    const { settings } = this.options
    if (!settings.apiKey) {
      throw new Error("Speech provider is not configured. Add an API key in Speech settings.")
    }
    return new OpenAI({
      apiKey: settings.apiKey,
      baseURL: settings.baseUrl,
    })
  }
 }
 function extensionForMime(mimeType: string): string {
  const normalized = mimeType.toLowerCase()
  if (normalized.includes("webm")) return "webm"
  if (normalized.includes("ogg")) return "ogg"
  if (normalized.includes("wav")) return "wav"
  if (normalized.includes("mpeg") || normalized.includes("mp3")) return "mp3"
  if (normalized.includes("mp4") || normalized.includes("aac")) return "m4a"
  return "webm"
 }
 function mimeTypeForFormat(format: "mp3" | "wav" | "opus"): string {
  if (format === "wav") return "audio/wav"
  if (format === "opus") return "audio/opus"
  return "audio/mpeg"
 }
--- a/packages/server/src/speech/service.ts
+++ b/packages/server/src/speech/service.ts
@@ -0,0 +1,91 @@
 import { z } from "zod"
 import type { Logger } from "../logger"
 import type { SettingsService } from "../settings/service"
 import type { SpeechCapabilitiesResponse, SpeechSynthesisResponse, SpeechTranscriptionResponse } from "../api-types"
 import { OpenAICompatibleSpeechProvider } from "./providers/openai-compatible"
 const ServerSpeechSettingsSchema = z.object({
  speech: z
    .object({
      provider: z.string().optional(),
      apiKey: z.string().optional(),
      baseUrl: z.string().optional(),
      sttModel: z.string().optional(),
      ttsModel: z.string().optional(),
      ttsVoice: z.string().optional(),
    })
    .optional(),
 })
 export interface TranscribeAudioInput {
  audioBase64: string
  mimeType: string
  filename?: string
  language?: string
  prompt?: string
 }
 export interface SynthesizeSpeechInput {
  text: string
  format?: "mp3" | "wav" | "opus"
 }
 export interface SpeechProvider {
  getCapabilities(): SpeechCapabilitiesResponse
  transcribe(input: TranscribeAudioInput): Promise<SpeechTranscriptionResponse>
  synthesize(input: SynthesizeSpeechInput): Promise<SpeechSynthesisResponse>
 }
 export interface NormalizedSpeechSettings {
  provider: string
  apiKey?: string
  baseUrl?: string
  sttModel: string
  ttsModel: string
  ttsVoice: string
 }
 const DEFAULT_PROVIDER = "openai-compatible"
 const DEFAULT_STT_MODEL = "gpt-4o-mini-transcribe"
 const DEFAULT_TTS_MODEL = "gpt-4o-mini-tts"
 const DEFAULT_TTS_VOICE = "alloy"
 export class SpeechService {
  constructor(
    private readonly settings: SettingsService,
    private readonly logger: Logger,
  ) {}
  getCapabilities(): SpeechCapabilitiesResponse {
    return this.createProvider().getCapabilities()
  }
  async transcribe(input: TranscribeAudioInput): Promise<SpeechTranscriptionResponse> {
    return this.createProvider().transcribe(input)
  }
  async synthesize(input: SynthesizeSpeechInput): Promise<SpeechSynthesisResponse> {
    return this.createProvider().synthesize(input)
  }
  private createProvider(): SpeechProvider {
    const settings = this.resolveSettings()
    return new OpenAICompatibleSpeechProvider({
      settings,
      logger: this.logger.child({ provider: settings.provider }),
    })
  }
  private resolveSettings(): NormalizedSpeechSettings {
    const parsed = ServerSpeechSettingsSchema.parse(this.settings.getOwner("config", "server") ?? {})
    const speech = parsed.speech ?? {}
    return {
      provider: speech.provider?.trim() || DEFAULT_PROVIDER,
      apiKey: speech.apiKey?.trim() || process.env.OPENAI_API_KEY,
      baseUrl: speech.baseUrl?.trim() || process.env.OPENAI_BASE_URL || undefined,
      sttModel: speech.sttModel?.trim() || DEFAULT_STT_MODEL,
      ttsModel: speech.ttsModel?.trim() || DEFAULT_TTS_MODEL,
      ttsVoice: speech.ttsVoice?.trim() || DEFAULT_TTS_VOICE,
    }
  }
 }
--- a/packages/ui/src/App.tsx
+++ b/packages/ui/src/App.tsx
@@ -71,6 +71,7 @@ const App: Component = () => {
    toggleAutoCleanupBlankSessions,
    toggleUsageMetrics,
    togglePromptSubmitOnEnter,
    toggleShowPromptVoiceInput,
    setDiffViewMode,
    setToolOutputExpansion,
    setDiagnosticsExpansion,
@@ -360,6 +361,7 @@ const App: Component = () => {
    toggleShowTimelineTools,
    toggleUsageMetrics,
    togglePromptSubmitOnEnter,
    toggleShowPromptVoiceInput,
    setDiffViewMode,
    setToolOutputExpansion,
    setDiagnosticsExpansion,
--- a/packages/ui/src/components/prompt-input.tsx
+++ b/packages/ui/src/components/prompt-input.tsx
@@ -1,5 +1,5 @@
 import { createSignal, Show, onMount, onCleanup, createEffect, on } from "solid-js"
-import { ArrowBigUp, ArrowBigDown } from "lucide-solid"
+import { ArrowBigUp, ArrowBigDown, Loader2, Mic } from "lucide-solid"
 import UnifiedPicker from "./unified-picker"
 import ExpandButton from "./expand-button"
 import { clearAttachments, removeAttachment } from "../stores/attachments"
@@ -17,6 +17,7 @@ import { usePromptState } from "./prompt-input/usePromptState"
 import { usePromptAttachments } from "./prompt-input/usePromptAttachments"
 import { usePromptPicker } from "./prompt-input/usePromptPicker"
 import { usePromptKeyDown } from "./prompt-input/usePromptKeyDown"
 import { usePromptVoiceInput } from "./prompt-input/usePromptVoiceInput"
 const log = getLogger("actions")
 export default function PromptInput(props: PromptInputProps) {
@@ -411,9 +412,45 @@ export default function PromptInput(props: PromptInputProps) {
  })
  const shouldShowOverlay = () => prompt().length === 0
  const voiceInput = usePromptVoiceInput({
    prompt,
    setPrompt,
    getTextarea: () => textareaRef ?? null,
    enabled: () => preferences().showPromptVoiceInput,
    disabled: () => Boolean(props.disabled),
  })
  const showVoiceInput = () =>
    preferences().showPromptVoiceInput &&
    (voiceInput.canUseVoiceInput() || voiceInput.isRecording() || voiceInput.isTranscribing())
  const instance = () => getActiveInstance()
  let voiceButtonPressed = false
  const beginVoicePress = (event?: PointerEvent | KeyboardEvent) => {
    if (voiceButtonPressed || props.disabled || voiceInput.isTranscribing() || !voiceInput.canUseVoiceInput()) return
    voiceButtonPressed = true
    if (event instanceof PointerEvent) {
      const target = event.currentTarget
      if (target instanceof HTMLElement) {
        try {
          target.setPointerCapture(event.pointerId)
        } catch {
          // no-op
        }
      }
    }
    void voiceInput.startRecording()
  }
  const endVoicePress = () => {
    if (!voiceButtonPressed) return
    voiceButtonPressed = false
    voiceInput.stopRecording()
  }
  return (
    <div class="prompt-input-container">
      <div
@@ -555,6 +592,48 @@ export default function PromptInput(props: PromptInputProps) {
        </div>
        <div class="prompt-input-actions">
          <Show when={showVoiceInput()}>
            <button
              type="button"
              class={`prompt-voice-button ${voiceInput.isRecording() ? "is-recording" : ""}`}
              onPointerDown={(event) => {
                event.preventDefault()
                beginVoicePress(event)
              }}
              onPointerUp={(event) => {
                event.preventDefault()
                endVoicePress()
              }}
              onPointerCancel={() => endVoicePress()}
              onLostPointerCapture={() => endVoicePress()}
              onKeyDown={(event) => {
                if (event.repeat) return
                if (event.key !== " " && event.key !== "Enter") return
                event.preventDefault()
                beginVoicePress(event)
              }}
              onKeyUp={(event) => {
                if (event.key !== " " && event.key !== "Enter") return
                event.preventDefault()
                endVoicePress()
              }}
              onBlur={() => endVoicePress()}
              disabled={!voiceInput.isRecording() && (props.disabled || voiceInput.isTranscribing() || !voiceInput.canUseVoiceInput())}
              aria-label={voiceInput.buttonTitle()}
              title={voiceInput.buttonTitle()}
            >
              <Show
                when={voiceInput.isRecording()}
                fallback={
                  <Show when={voiceInput.isTranscribing()} fallback={<Mic class="h-4 w-4" aria-hidden="true" />}>
                    <Loader2 class="h-4 w-4 animate-spin" aria-hidden="true" />
                  </Show>
                }
              >
                <span class="prompt-voice-timer">{formatVoiceTimer(voiceInput.elapsedMs())}</span>
              </Show>
            </button>
          </Show>
          <button
            type="button"
            class="stop-button"
@@ -589,3 +668,10 @@ export default function PromptInput(props: PromptInputProps) {
    </div>
  )
 }
 function formatVoiceTimer(elapsedMs: number): string {
  const totalSeconds = Math.max(0, Math.floor(elapsedMs / 1000))
  const minutes = Math.floor(totalSeconds / 60)
  const seconds = totalSeconds % 60
  return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}`
 }
--- a/packages/ui/src/components/prompt-input/usePromptVoiceInput.ts
+++ b/packages/ui/src/components/prompt-input/usePromptVoiceInput.ts
@@ -0,0 +1,244 @@
 import { createEffect, createSignal, onCleanup, type Accessor } from "solid-js"
 import { showAlertDialog } from "../../stores/alerts"
 import { loadSpeechCapabilities, speechCapabilities } from "../../stores/speech"
 import { serverApi } from "../../lib/api-client"
 import { useI18n } from "../../lib/i18n"
 interface UsePromptVoiceInputOptions {
  prompt: Accessor<string>
  setPrompt: (value: string) => void
  getTextarea: () => HTMLTextAreaElement | null
  enabled: Accessor<boolean>
  disabled: Accessor<boolean>
 }
 type VoiceInputState = "idle" | "recording" | "transcribing"
 export function usePromptVoiceInput(options: UsePromptVoiceInputOptions) {
  const { t } = useI18n()
  const [state, setState] = createSignal<VoiceInputState>("idle")
  const [elapsedMs, setElapsedMs] = createSignal(0)
  let mediaRecorder: MediaRecorder | null = null
  let mediaStream: MediaStream | null = null
  let timerId: number | undefined
  let shouldTranscribe = true
  let recordedChunks: Blob[] = []
  let recordingStartedAt = 0
  createEffect(() => {
    void loadSpeechCapabilities()
  })
  onCleanup(() => {
    cleanupMedia(false)
  })
  const isSupported = () => {
    if (typeof window === "undefined") return false
    return typeof window.MediaRecorder !== "undefined" && Boolean(navigator.mediaDevices?.getUserMedia)
  }
  const canUseVoiceInput = () => {
    const capabilities = speechCapabilities()
    return Boolean(
      options.enabled() &&
        isSupported() &&
        capabilities?.available &&
        capabilities?.configured &&
        capabilities?.supportsStt,
    )
  }
  async function toggleRecording(): Promise<void> {
    if (state() === "recording") {
      stopRecording()
      return
    }
    await startRecording()
  }
  function stopRecording() {
    if (!mediaRecorder || state() !== "recording") return
    shouldTranscribe = true
    mediaRecorder.stop()
    setState("transcribing")
    stopTimer()
  }
  function cancelRecording() {
    if (!mediaRecorder || state() !== "recording") return
    shouldTranscribe = false
    mediaRecorder.stop()
    cleanupMedia(false)
  }
  async function startRecording() {
    if (!canUseVoiceInput() || options.disabled() || state() === "transcribing" || state() === "recording") return
    if (!isSupported()) {
      showAlertDialog(t("promptInput.voiceInput.error.unsupported"), {
        title: t("promptInput.voiceInput.error.title"),
        variant: "error",
      })
      return
    }
    try {
      recordedChunks = []
      shouldTranscribe = true
      mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true })
      mediaRecorder = createRecorder(mediaStream)
      mediaRecorder.addEventListener("dataavailable", (event) => {
        if (event.data.size > 0) {
          recordedChunks.push(event.data)
        }
      })
      mediaRecorder.addEventListener("stop", () => {
        void finalizeRecording()
      })
      recordingStartedAt = Date.now()
      setElapsedMs(0)
      setState("recording")
      startTimer()
      mediaRecorder.start()
    } catch (error) {
      cleanupMedia(false)
      showAlertDialog(t("promptInput.voiceInput.error.permission"), {
        title: t("promptInput.voiceInput.error.title"),
        detail: error instanceof Error ? error.message : String(error),
        variant: "error",
      })
    }
  }
  async function finalizeRecording() {
    const recorder = mediaRecorder
    const stream = mediaStream
    mediaRecorder = null
    mediaStream = null
    if (!shouldTranscribe || recordedChunks.length === 0) {
      recordedChunks = []
      stopTracks(stream)
      setState("idle")
      setElapsedMs(0)
      return
    }
    const mimeType = recorder?.mimeType || recordedChunks[0]?.type || "audio/webm"
    try {
      const audioBlob = new Blob(recordedChunks, { type: mimeType })
      const transcription = await serverApi.transcribeAudio({
        audioBase64: await blobToBase64(audioBlob),
        mimeType,
      })
      if (transcription.text.trim()) {
        insertTranscript(transcription.text.trim())
      }
    } catch (error) {
      showAlertDialog(t("promptInput.voiceInput.error.transcribe"), {
        title: t("promptInput.voiceInput.error.title"),
        detail: error instanceof Error ? error.message : String(error),
        variant: "error",
      })
    } finally {
      recordedChunks = []
      stopTracks(stream)
      setState("idle")
      setElapsedMs(0)
    }
  }
  function insertTranscript(text: string) {
    const current = options.prompt()
    const textarea = options.getTextarea()
    const start = textarea ? textarea.selectionStart : current.length
    const end = textarea ? textarea.selectionEnd : current.length
    const before = current.slice(0, start)
    const after = current.slice(end)
    const prefix = before.length > 0 && !/\s$/.test(before) ? " " : ""
    const suffix = after.length > 0 && !/^\s/.test(after) ? " " : ""
    const nextValue = `${before}${prefix}${text}${suffix}${after}`
    const cursor = before.length + prefix.length + text.length
    options.setPrompt(nextValue)
    if (textarea) {
      setTimeout(() => {
        textarea.focus()
        textarea.setSelectionRange(cursor, cursor)
      }, 0)
    }
  }
  function cleanupMedia(resetState = true) {
    stopTimer()
    if (mediaRecorder && mediaRecorder.state !== "inactive") {
      mediaRecorder.stop()
    }
    mediaRecorder = null
    stopTracks(mediaStream)
    mediaStream = null
    recordedChunks = []
    if (resetState) {
      setState("idle")
      setElapsedMs(0)
    }
  }
  function startTimer() {
    stopTimer()
    timerId = window.setInterval(() => {
      setElapsedMs(Date.now() - recordingStartedAt)
    }, 250)
  }
  function stopTimer() {
    if (timerId !== undefined) {
      window.clearInterval(timerId)
      timerId = undefined
    }
  }
  return {
    state,
    elapsedMs,
    canUseVoiceInput,
    startRecording,
    stopRecording,
    toggleRecording,
    cancelRecording,
    isRecording: () => state() === "recording",
    isTranscribing: () => state() === "transcribing",
    buttonTitle: () => {
      if (state() === "recording") return t("promptInput.voiceInput.stop.title")
      if (state() === "transcribing") return t("promptInput.voiceInput.transcribing.title")
      return t("promptInput.voiceInput.start.title")
    },
  }
 }
 function createRecorder(stream: MediaStream): MediaRecorder {
  const candidates = ["audio/webm;codecs=opus", "audio/webm", "audio/mp4", "audio/ogg;codecs=opus"]
  const supported = candidates.find((candidate) => typeof MediaRecorder.isTypeSupported !== "function" || MediaRecorder.isTypeSupported(candidate))
  return supported ? new MediaRecorder(stream, { mimeType: supported }) : new MediaRecorder(stream)
 }
 function stopTracks(stream: MediaStream | null) {
  stream?.getTracks().forEach((track) => track.stop())
 }
 async function blobToBase64(blob: Blob): Promise<string> {
  const buffer = await blob.arrayBuffer()
  const bytes = new Uint8Array(buffer)
  let binary = ""
  for (const byte of bytes) {
    binary += String.fromCharCode(byte)
  }
  return btoa(binary)
 }
--- a/packages/ui/src/components/settings-screen.tsx
+++ b/packages/ui/src/components/settings-screen.tsx
@@ -1,5 +1,5 @@
 import { Dialog } from "@kobalte/core/dialog"
-import { Settings, Bell, MonitorUp, Paintbrush, Terminal, X } from "lucide-solid"
+import { Settings, Bell, MonitorUp, Paintbrush, Terminal, Volume2, X } from "lucide-solid"
 import { createMemo, For, type Component } from "solid-js"
 import { useI18n } from "../lib/i18n"
 import {
@@ -13,6 +13,7 @@ import { AppearanceSettingsSection } from "./settings/appearance-settings-sectio
 import { NotificationsSettingsSection } from "./settings/notifications-settings-section"
 import { OpenCodeSettingsSection } from "./settings/opencode-settings-section"
 import { RemoteAccessSettingsSection } from "./settings/remote-access-settings-section"
 import { SpeechSettingsSection } from "./settings/speech-settings-section"
 export const SettingsScreen: Component = () => {
  const { t } = useI18n()
@@ -21,6 +22,7 @@ export const SettingsScreen: Component = () => {
    { id: "appearance" as SettingsSectionId, icon: Paintbrush, label: t("settings.nav.appearance") },
    { id: "notifications" as SettingsSectionId, icon: Bell, label: t("settings.nav.notifications") },
    { id: "remote" as SettingsSectionId, icon: MonitorUp, label: t("settings.nav.remote") },
    { id: "speech" as SettingsSectionId, icon: Volume2, label: t("settings.nav.speech") },
    { id: "opencode" as SettingsSectionId, icon: Terminal, label: t("settings.nav.opencode") },
  ])
@@ -30,6 +32,8 @@ export const SettingsScreen: Component = () => {
        return <NotificationsSettingsSection />
      case "remote":
        return <RemoteAccessSettingsSection />
      case "speech":
        return <SpeechSettingsSection />
      case "opencode":
        return <OpenCodeSettingsSection />
      case "appearance":
--- a/packages/ui/src/components/settings/appearance-settings-section.tsx
+++ b/packages/ui/src/components/settings/appearance-settings-section.tsx
@@ -24,6 +24,7 @@ export const AppearanceSettingsSection: Component = () => {
    toggleUsageMetrics,
    toggleAutoCleanupBlankSessions,
    togglePromptSubmitOnEnter,
    toggleShowPromptVoiceInput,
    setDiffViewMode,
    setToolOutputExpansion,
    setDiagnosticsExpansion,
@@ -38,10 +39,11 @@ export const AppearanceSettingsSection: Component = () => {
      toggleShowThinkingBlocks,
      toggleKeyboardShortcutHints,
      toggleShowTimelineTools,
-      toggleUsageMetrics,
+        toggleUsageMetrics,
-      toggleAutoCleanupBlankSessions,
+        toggleAutoCleanupBlankSessions,
-      togglePromptSubmitOnEnter,
+        togglePromptSubmitOnEnter,
-      setDiffViewMode,
+        toggleShowPromptVoiceInput,
        setDiffViewMode,
      setToolOutputExpansion,
      setDiagnosticsExpansion,
      setThinkingBlocksExpansion,
--- a/packages/ui/src/components/settings/speech-settings-card.tsx
+++ b/packages/ui/src/components/settings/speech-settings-card.tsx
@@ -0,0 +1,217 @@
 import { createEffect, createMemo, createSignal, type Component } from "solid-js"
 import { Mic, Volume2 } from "lucide-solid"
 import { useConfig, type SpeechSettings } from "../../stores/preferences"
 import { useI18n } from "../../lib/i18n"
 import { loadSpeechCapabilities, speechCapabilities, speechCapabilitiesError, speechCapabilitiesLoading } from "../../stores/speech"
 import { getLogger } from "../../lib/logger"
 const log = getLogger("actions")
 type DraftFields = {
  apiKey: string
  baseUrl: string
  sttModel: string
  ttsModel: string
  ttsVoice: string
 }
 function createDraftFields(speech: SpeechSettings): DraftFields {
  return {
    apiKey: speech.apiKey ?? "",
    baseUrl: speech.baseUrl ?? "",
    sttModel: speech.sttModel,
    ttsModel: speech.ttsModel,
    ttsVoice: speech.ttsVoice,
  }
 }
 function isDraftEqual(a: DraftFields, b: DraftFields): boolean {
  return a.apiKey === b.apiKey && a.baseUrl === b.baseUrl && a.sttModel === b.sttModel && a.ttsModel === b.ttsModel && a.ttsVoice === b.ttsVoice
 }
 export const SpeechSettingsCard: Component = () => {
  const { t } = useI18n()
  const { serverSettings, updateSpeechSettings } = useConfig()
  const initialDrafts = createDraftFields(serverSettings().speech)
  const [isSaving, setIsSaving] = createSignal(false)
  const [saveStatus, setSaveStatus] = createSignal<"idle" | "saved" | "error">("saved")
  const [drafts, setDrafts] = createSignal<DraftFields>(initialDrafts)
  createEffect(() => {
    const speech = serverSettings().speech
    const nextDrafts = createDraftFields(speech)
    if (!isSaving() && !isDirty()) {
      if (!isDraftEqual(drafts(), nextDrafts)) {
        setDrafts(nextDrafts)
      }
    }
  })
  createEffect(() => {
    void loadSpeechCapabilities()
  })
  const capabilityLabel = () => {
    if (speechCapabilitiesLoading()) return t("settings.speech.status.loading")
    if (speechCapabilitiesError()) return t("settings.speech.status.error")
    return speechCapabilities()?.configured ? t("settings.speech.status.configured") : t("settings.speech.status.missing")
  }
  const updateDraft = (key: keyof DraftFields, value: string) => {
    setSaveStatus("idle")
    setDrafts((current) => ({ ...current, [key]: value }))
  }
  const isDirty = createMemo(() => {
    const speech = serverSettings().speech
    const current = drafts()
    return (
      (current.apiKey || "") !== (speech.apiKey || "") ||
      (current.baseUrl || "") !== (speech.baseUrl || "") ||
      current.sttModel !== speech.sttModel ||
      current.ttsModel !== speech.ttsModel ||
      current.ttsVoice !== speech.ttsVoice
    )
  })
  const saveStatusLabel = () => {
    if (isSaving()) return t("settings.speech.save.saving")
    if (saveStatus() === "saved") return t("settings.speech.save.saved")
    if (saveStatus() === "error") return t("settings.speech.save.error")
    return t("settings.speech.save.unsaved")
  }
  async function handleSave() {
    if (!isDirty() || isSaving()) return
    const current = drafts()
    setIsSaving(true)
    setSaveStatus("idle")
    try {
      await updateSpeechSettings({
        apiKey: current.apiKey.trim() || undefined,
        baseUrl: current.baseUrl.trim() || undefined,
        sttModel: current.sttModel.trim() || undefined,
        ttsModel: current.ttsModel.trim() || undefined,
        ttsVoice: current.ttsVoice.trim() || undefined,
      })
      await loadSpeechCapabilities(true)
      setDrafts({
        apiKey: current.apiKey.trim(),
        baseUrl: current.baseUrl.trim(),
        sttModel: current.sttModel.trim() || serverSettings().speech.sttModel,
        ttsModel: current.ttsModel.trim() || serverSettings().speech.ttsModel,
        ttsVoice: current.ttsVoice.trim() || serverSettings().speech.ttsVoice,
      })
      setSaveStatus("saved")
    } catch (error) {
      log.error("Failed to save speech settings", error)
      setSaveStatus("error")
    } finally {
      setIsSaving(false)
    }
  }
  return (
    <div class="settings-card">
      <div class="settings-card-header">
        <div class="settings-card-heading-with-icon">
          <Volume2 class="settings-card-heading-icon" />
          <div>
            <h3 class="settings-card-title">{t("settings.speech.title")}</h3>
            <p class="settings-card-subtitle">{t("settings.speech.subtitle")}</p>
          </div>
        </div>
        <span class="settings-scope-badge settings-scope-badge-server">{t("settings.scope.server")}</span>
      </div>
      <div class="settings-stack">
        <div class="settings-toggle-row settings-toggle-row-compact">
          <div>
            <div class="settings-toggle-title">{t("settings.speech.provider.title")}</div>
            <div class="settings-toggle-caption">{t("settings.speech.provider.subtitle")}</div>
          </div>
          <div class="settings-toolbar-inline">
            <span class="settings-inline-note">{t("settings.speech.provider.openaiCompatible")}</span>
            <span class="settings-inline-note">{capabilityLabel()}</span>
            <span class="settings-inline-note">{saveStatusLabel()}</span>
            <button
              type="button"
              class="selector-button selector-button-primary w-auto whitespace-nowrap"
              onClick={() => void handleSave()}
              disabled={!isDirty() || isSaving()}
            >
              {isSaving() ? t("settings.speech.save.saving") : t("settings.speech.save.action")}
            </button>
          </div>
        </div>
        <Field
          label={t("settings.speech.apiKey.title")}
          caption={t("settings.speech.apiKey.subtitle")}
          value={drafts().apiKey}
          onInput={(value) => updateDraft("apiKey", value)}
          type="password"
        />
        <Field
          label={t("settings.speech.baseUrl.title")}
          caption={t("settings.speech.baseUrl.subtitle")}
          value={drafts().baseUrl}
          onInput={(value) => updateDraft("baseUrl", value)}
          placeholder={t("settings.speech.baseUrl.placeholder")}
        />
        <Field
          label={t("settings.speech.sttModel.title")}
          caption={t("settings.speech.sttModel.subtitle")}
          value={drafts().sttModel}
          onInput={(value) => updateDraft("sttModel", value)}
        />
        <Field
          label={t("settings.speech.ttsModel.title")}
          caption={t("settings.speech.ttsModel.subtitle")}
          value={drafts().ttsModel}
          onInput={(value) => updateDraft("ttsModel", value)}
        />
        <Field
          label={t("settings.speech.ttsVoice.title")}
          caption={t("settings.speech.ttsVoice.subtitle")}
          value={drafts().ttsVoice}
          onInput={(value) => updateDraft("ttsVoice", value)}
          icon={<Mic class="w-3.5 h-3.5 icon-muted flex-shrink-0" />}
        />
        <div class="settings-inline-note">{t("settings.speech.help")}</div>
      </div>
    </div>
  )
 }
 const Field: Component<{
  label: string
  caption: string
  value: string
  type?: string
  placeholder?: string
  onInput: (value: string) => void
  icon?: any
 }> = (props) => {
  return (
    <div class="settings-toggle-row settings-toggle-row-compact">
      <div>
        <div class="settings-toggle-title">{props.label}</div>
        <div class="settings-toggle-caption">{props.caption}</div>
      </div>
      <div class="flex items-center gap-2 min-w-[18rem] max-w-[24rem] w-full">
        {props.icon}
        <input
          type={props.type ?? "text"}
          value={props.value}
          onInput={(event) => props.onInput(event.currentTarget.value)}
          class="selector-input w-full"
          placeholder={props.placeholder}
        />
      </div>
    </div>
  )
 }
 export default SpeechSettingsCard
--- a/packages/ui/src/components/settings/speech-settings-section.tsx
+++ b/packages/ui/src/components/settings/speech-settings-section.tsx
@@ -0,0 +1,10 @@
 import type { Component } from "solid-js"
 import SpeechSettingsCard from "./speech-settings-card"
 export const SpeechSettingsSection: Component = () => {
  return (
    <div class="settings-section-stack">
      <SpeechSettingsCard />
    </div>
  )
 }
--- a/packages/ui/src/lib/api-client.ts
+++ b/packages/ui/src/lib/api-client.ts
@@ -7,6 +7,9 @@ import type {
  FileSystemCreateFolderResponse,
  FileSystemListResponse,
  InstanceData,
  SpeechCapabilitiesResponse,
  SpeechSynthesisResponse,
  SpeechTranscriptionResponse,
  ServerMeta,
  WorkspaceCreateRequest,
  WorkspaceDescriptor,
@@ -235,6 +238,27 @@ export const serverApi = {
      body: JSON.stringify({ path }),
    })
  },
  fetchSpeechCapabilities(): Promise<SpeechCapabilitiesResponse> {
    return request<SpeechCapabilitiesResponse>("/api/speech/capabilities")
  },
  transcribeAudio(payload: {
    audioBase64: string
    mimeType: string
    filename?: string
    language?: string
    prompt?: string
  }): Promise<SpeechTranscriptionResponse> {
    return request<SpeechTranscriptionResponse>("/api/speech/transcribe", {
      method: "POST",
      body: JSON.stringify(payload),
    })
  },
  synthesizeSpeech(payload: { text: string; format?: "mp3" | "wav" | "opus" }): Promise<SpeechSynthesisResponse> {
    return request<SpeechSynthesisResponse>("/api/speech/synthesize", {
      method: "POST",
      body: JSON.stringify(payload),
    })
  },
  listFileSystem(path?: string, options?: { includeFiles?: boolean }): Promise<FileSystemListResponse> {
    const params = new URLSearchParams()
    if (path && path !== ".") {
--- a/packages/ui/src/lib/hooks/use-commands.ts
+++ b/packages/ui/src/lib/hooks/use-commands.ts
@@ -34,6 +34,7 @@ export interface UseCommandsOptions {
  toggleUsageMetrics: () => void
  toggleAutoCleanupBlankSessions: () => void
  togglePromptSubmitOnEnter: () => void
  toggleShowPromptVoiceInput: () => void
  setDiffViewMode: (mode: "split" | "unified") => void
  setToolOutputExpansion: (mode: ExpansionPreference) => void
  setDiagnosticsExpansion: (mode: ExpansionPreference) => void
@@ -435,6 +436,7 @@ export function useCommands(options: UseCommandsOptions) {
      toggleUsageMetrics: options.toggleUsageMetrics,
      toggleAutoCleanupBlankSessions: options.toggleAutoCleanupBlankSessions,
      togglePromptSubmitOnEnter: options.togglePromptSubmitOnEnter,
      toggleShowPromptVoiceInput: options.toggleShowPromptVoiceInput,
      setDiffViewMode: options.setDiffViewMode,
      setToolOutputExpansion: options.setToolOutputExpansion,
      setDiagnosticsExpansion: options.setDiagnosticsExpansion,
--- a/packages/ui/src/lib/i18n/messages/en/messaging.ts
+++ b/packages/ui/src/lib/i18n/messages/en/messaging.ts
@@ -138,4 +138,11 @@ export const messagingMessages = {
  "promptInput.send.ariaLabel": "Send message",
  "promptInput.send.errorFallback": "Failed to send message",
  "promptInput.send.errorTitle": "Send failed",
  "promptInput.voiceInput.start.title": "Start voice input",
  "promptInput.voiceInput.stop.title": "Stop recording and transcribe",
  "promptInput.voiceInput.transcribing.title": "Transcribing audio",
  "promptInput.voiceInput.error.title": "Voice input failed",
  "promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
  "promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
  "promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
 } as const
--- a/packages/ui/src/lib/i18n/messages/en/settings.ts
+++ b/packages/ui/src/lib/i18n/messages/en/settings.ts
@@ -65,6 +65,7 @@ export const settingsMessages = {
  "settings.nav.appearance": "Appearance",
  "settings.nav.notifications": "Notifications",
  "settings.nav.remote": "Remote Access",
  "settings.nav.speech": "Speech",
  "settings.nav.opencode": "OpenCode",
  "settings.scope.device": "This device",
  "settings.scope.server": "Server setting",
@@ -137,6 +138,34 @@ export const settingsMessages = {
  "settings.behavior.usageMetrics.subtitle": "Show or hide token and cost stats for assistant messages.",
  "settings.behavior.autoCleanup.title": "Auto-cleanup blank sessions",
  "settings.behavior.autoCleanup.subtitle": "Automatically clean up blank sessions when creating new ones.",
  "settings.behavior.promptVoiceInput.title": "Prompt voice input",
  "settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
  "settings.behavior.promptSubmit.title": "Enter to submit",
  "settings.behavior.promptSubmit.subtitle": "Use Enter to submit prompts; Cmd/Ctrl+Enter inserts a new line.",
  "settings.speech.title": "Speech",
  "settings.speech.subtitle": "Configure speech-to-text now and text-to-speech groundwork for later features.",
  "settings.speech.provider.title": "Provider",
  "settings.speech.provider.subtitle": "Speech requests use the server-side speech adapter.",
  "settings.speech.provider.openaiCompatible": "OpenAI-compatible",
  "settings.speech.status.loading": "Checking configuration...",
  "settings.speech.status.configured": "Configured",
  "settings.speech.status.missing": "Missing API key",
  "settings.speech.status.error": "Speech service unavailable",
  "settings.speech.apiKey.title": "API key",
  "settings.speech.apiKey.subtitle": "Used for CodeNomad-managed speech requests.",
  "settings.speech.baseUrl.title": "Base URL",
  "settings.speech.baseUrl.subtitle": "Optional override for OpenAI-compatible speech endpoints.",
  "settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
  "settings.speech.sttModel.title": "Transcription model",
  "settings.speech.sttModel.subtitle": "Model used for prompt speech-to-text requests.",
  "settings.speech.ttsModel.title": "Speech model",
  "settings.speech.ttsModel.subtitle": "Default text-to-speech model reserved for future playback features.",
  "settings.speech.ttsVoice.title": "Default voice",
  "settings.speech.ttsVoice.subtitle": "Default text-to-speech voice reserved for future playback features.",
  "settings.speech.help": "Prompt voice input only appears when speech transcription is configured and supported by this browser.",
  "settings.speech.save.action": "Save",
  "settings.speech.save.saving": "Saving...",
  "settings.speech.save.saved": "Saved",
  "settings.speech.save.unsaved": "Unsaved changes",
  "settings.speech.save.error": "Save failed",
 } as const
--- a/packages/ui/src/lib/i18n/messages/es/messaging.ts
+++ b/packages/ui/src/lib/i18n/messages/es/messaging.ts
@@ -140,4 +140,11 @@ export const messagingMessages = {
  "promptInput.send.ariaLabel": "Enviar mensaje",
  "promptInput.send.errorFallback": "No se pudo enviar el mensaje",
  "promptInput.send.errorTitle": "Error al enviar",
  "promptInput.voiceInput.start.title": "Start voice input",
  "promptInput.voiceInput.stop.title": "Stop recording and transcribe",
  "promptInput.voiceInput.transcribing.title": "Transcribing audio",
  "promptInput.voiceInput.error.title": "Voice input failed",
  "promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
  "promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
  "promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
 } as const
--- a/packages/ui/src/lib/i18n/messages/es/settings.ts
+++ b/packages/ui/src/lib/i18n/messages/es/settings.ts
@@ -65,6 +65,7 @@ export const settingsMessages = {
  "settings.nav.appearance": "Appearance",
  "settings.nav.notifications": "Notifications",
  "settings.nav.remote": "Remote Access",
  "settings.nav.speech": "Speech",
  "settings.nav.opencode": "OpenCode",
  "settings.scope.device": "This device",
  "settings.scope.server": "Server setting",
@@ -137,6 +138,34 @@ export const settingsMessages = {
  "settings.behavior.usageMetrics.subtitle": "Muestra u oculta estadisticas de tokens y costo en mensajes del asistente.",
  "settings.behavior.autoCleanup.title": "Limpieza automatica de sesiones en blanco",
  "settings.behavior.autoCleanup.subtitle": "Limpia automaticamente las sesiones en blanco al crear nuevas.",
  "settings.behavior.promptVoiceInput.title": "Prompt voice input",
  "settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
  "settings.behavior.promptSubmit.title": "Enter para enviar",
  "settings.behavior.promptSubmit.subtitle": "Usa Enter para enviar; Cmd/Ctrl+Enter inserta una nueva linea.",
  "settings.speech.title": "Speech",
  "settings.speech.subtitle": "Configure speech-to-text now and text-to-speech groundwork for later features.",
  "settings.speech.provider.title": "Provider",
  "settings.speech.provider.subtitle": "Speech requests use the server-side speech adapter.",
  "settings.speech.provider.openaiCompatible": "OpenAI-compatible",
  "settings.speech.status.loading": "Checking configuration...",
  "settings.speech.status.configured": "Configured",
  "settings.speech.status.missing": "Missing API key",
  "settings.speech.status.error": "Speech service unavailable",
  "settings.speech.apiKey.title": "API key",
  "settings.speech.apiKey.subtitle": "Used for CodeNomad-managed speech requests.",
  "settings.speech.baseUrl.title": "Base URL",
  "settings.speech.baseUrl.subtitle": "Optional override for OpenAI-compatible speech endpoints.",
  "settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
  "settings.speech.sttModel.title": "Transcription model",
  "settings.speech.sttModel.subtitle": "Model used for prompt speech-to-text requests.",
  "settings.speech.ttsModel.title": "Speech model",
  "settings.speech.ttsModel.subtitle": "Default text-to-speech model reserved for future playback features.",
  "settings.speech.ttsVoice.title": "Default voice",
  "settings.speech.ttsVoice.subtitle": "Default text-to-speech voice reserved for future playback features.",
  "settings.speech.help": "Prompt voice input only appears when speech transcription is configured and supported by this browser.",
  "settings.speech.save.action": "Save",
  "settings.speech.save.saving": "Saving...",
  "settings.speech.save.saved": "Saved",
  "settings.speech.save.unsaved": "Unsaved changes",
  "settings.speech.save.error": "Save failed",
 } as const
--- a/packages/ui/src/lib/i18n/messages/fr/messaging.ts
+++ b/packages/ui/src/lib/i18n/messages/fr/messaging.ts
@@ -140,4 +140,11 @@ export const messagingMessages = {
  "promptInput.send.ariaLabel": "Envoyer le message",
  "promptInput.send.errorFallback": "Impossible d'envoyer le message",
  "promptInput.send.errorTitle": "Échec de l'envoi",
  "promptInput.voiceInput.start.title": "Start voice input",
  "promptInput.voiceInput.stop.title": "Stop recording and transcribe",
  "promptInput.voiceInput.transcribing.title": "Transcribing audio",
  "promptInput.voiceInput.error.title": "Voice input failed",
  "promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
  "promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
  "promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
 } as const
--- a/packages/ui/src/lib/i18n/messages/fr/settings.ts
+++ b/packages/ui/src/lib/i18n/messages/fr/settings.ts
@@ -65,6 +65,7 @@ export const settingsMessages = {
  "settings.nav.appearance": "Appearance",
  "settings.nav.notifications": "Notifications",
  "settings.nav.remote": "Remote Access",
  "settings.nav.speech": "Speech",
  "settings.nav.opencode": "OpenCode",
  "settings.scope.device": "This device",
  "settings.scope.server": "Server setting",
@@ -137,6 +138,34 @@ export const settingsMessages = {
  "settings.behavior.usageMetrics.subtitle": "Afficher ou masquer les stats de tokens et de cout pour les messages de l'assistant.",
  "settings.behavior.autoCleanup.title": "Nettoyage auto des sessions vides",
  "settings.behavior.autoCleanup.subtitle": "Nettoyer automatiquement les sessions vides lors de la creation de nouvelles.",
  "settings.behavior.promptVoiceInput.title": "Prompt voice input",
  "settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
  "settings.behavior.promptSubmit.title": "Entrer pour envoyer",
  "settings.behavior.promptSubmit.subtitle": "Utiliser Entrer pour envoyer; Cmd/Ctrl+Entrer insere une nouvelle ligne.",
  "settings.speech.title": "Speech",
  "settings.speech.subtitle": "Configure speech-to-text now and text-to-speech groundwork for later features.",
  "settings.speech.provider.title": "Provider",
  "settings.speech.provider.subtitle": "Speech requests use the server-side speech adapter.",
  "settings.speech.provider.openaiCompatible": "OpenAI-compatible",
  "settings.speech.status.loading": "Checking configuration...",
  "settings.speech.status.configured": "Configured",
  "settings.speech.status.missing": "Missing API key",
  "settings.speech.status.error": "Speech service unavailable",
  "settings.speech.apiKey.title": "API key",
  "settings.speech.apiKey.subtitle": "Used for CodeNomad-managed speech requests.",
  "settings.speech.baseUrl.title": "Base URL",
  "settings.speech.baseUrl.subtitle": "Optional override for OpenAI-compatible speech endpoints.",
  "settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
  "settings.speech.sttModel.title": "Transcription model",
  "settings.speech.sttModel.subtitle": "Model used for prompt speech-to-text requests.",
  "settings.speech.ttsModel.title": "Speech model",
  "settings.speech.ttsModel.subtitle": "Default text-to-speech model reserved for future playback features.",
  "settings.speech.ttsVoice.title": "Default voice",
  "settings.speech.ttsVoice.subtitle": "Default text-to-speech voice reserved for future playback features.",
  "settings.speech.help": "Prompt voice input only appears when speech transcription is configured and supported by this browser.",
  "settings.speech.save.action": "Save",
  "settings.speech.save.saving": "Saving...",
  "settings.speech.save.saved": "Saved",
  "settings.speech.save.unsaved": "Unsaved changes",
  "settings.speech.save.error": "Save failed",
 } as const
--- a/packages/ui/src/lib/i18n/messages/ja/messaging.ts
+++ b/packages/ui/src/lib/i18n/messages/ja/messaging.ts
@@ -140,4 +140,11 @@ export const messagingMessages = {
  "promptInput.send.ariaLabel": "メッセージを送信",
  "promptInput.send.errorFallback": "メッセージの送信に失敗しました",
  "promptInput.send.errorTitle": "送信に失敗",
  "promptInput.voiceInput.start.title": "Start voice input",
  "promptInput.voiceInput.stop.title": "Stop recording and transcribe",
  "promptInput.voiceInput.transcribing.title": "Transcribing audio",
  "promptInput.voiceInput.error.title": "Voice input failed",
  "promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
  "promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
  "promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
 } as const
--- a/packages/ui/src/lib/i18n/messages/ja/settings.ts
+++ b/packages/ui/src/lib/i18n/messages/ja/settings.ts
@@ -65,6 +65,7 @@ export const settingsMessages = {
  "settings.nav.appearance": "Appearance",
  "settings.nav.notifications": "Notifications",
  "settings.nav.remote": "Remote Access",
  "settings.nav.speech": "Speech",
  "settings.nav.opencode": "OpenCode",
  "settings.scope.device": "This device",
  "settings.scope.server": "Server setting",
@@ -137,6 +138,34 @@ export const settingsMessages = {
  "settings.behavior.usageMetrics.subtitle": "アシスタントのメッセージにトークン数とコストの統計を表示/非表示にします。",
  "settings.behavior.autoCleanup.title": "空のセッションを自動クリーンアップ",
  "settings.behavior.autoCleanup.subtitle": "新しいセッション作成時に空のセッションを自動的にクリーンアップします。",
  "settings.behavior.promptVoiceInput.title": "Prompt voice input",
  "settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
  "settings.behavior.promptSubmit.title": "Enterで送信",
  "settings.behavior.promptSubmit.subtitle": "Enterで送信し、Cmd/Ctrl+Enterで改行します。",
  "settings.speech.title": "Speech",
  "settings.speech.subtitle": "Configure speech-to-text now and text-to-speech groundwork for later features.",
  "settings.speech.provider.title": "Provider",
  "settings.speech.provider.subtitle": "Speech requests use the server-side speech adapter.",
  "settings.speech.provider.openaiCompatible": "OpenAI-compatible",
  "settings.speech.status.loading": "Checking configuration...",
  "settings.speech.status.configured": "Configured",
  "settings.speech.status.missing": "Missing API key",
  "settings.speech.status.error": "Speech service unavailable",
  "settings.speech.apiKey.title": "API key",
  "settings.speech.apiKey.subtitle": "Used for CodeNomad-managed speech requests.",
  "settings.speech.baseUrl.title": "Base URL",
  "settings.speech.baseUrl.subtitle": "Optional override for OpenAI-compatible speech endpoints.",
  "settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
  "settings.speech.sttModel.title": "Transcription model",
  "settings.speech.sttModel.subtitle": "Model used for prompt speech-to-text requests.",
  "settings.speech.ttsModel.title": "Speech model",
  "settings.speech.ttsModel.subtitle": "Default text-to-speech model reserved for future playback features.",
  "settings.speech.ttsVoice.title": "Default voice",
  "settings.speech.ttsVoice.subtitle": "Default text-to-speech voice reserved for future playback features.",
  "settings.speech.help": "Prompt voice input only appears when speech transcription is configured and supported by this browser.",
  "settings.speech.save.action": "Save",
  "settings.speech.save.saving": "Saving...",
  "settings.speech.save.saved": "Saved",
  "settings.speech.save.unsaved": "Unsaved changes",
  "settings.speech.save.error": "Save failed",
 } as const
--- a/packages/ui/src/lib/i18n/messages/ru/messaging.ts
+++ b/packages/ui/src/lib/i18n/messages/ru/messaging.ts
@@ -140,4 +140,11 @@ export const messagingMessages = {
  "promptInput.send.ariaLabel": "Отправить сообщение",
  "promptInput.send.errorFallback": "Не удалось отправить сообщение",
  "promptInput.send.errorTitle": "Не удалось отправить",
  "promptInput.voiceInput.start.title": "Start voice input",
  "promptInput.voiceInput.stop.title": "Stop recording and transcribe",
  "promptInput.voiceInput.transcribing.title": "Transcribing audio",
  "promptInput.voiceInput.error.title": "Voice input failed",
  "promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
  "promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
  "promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
 } as const
--- a/packages/ui/src/lib/i18n/messages/ru/settings.ts
+++ b/packages/ui/src/lib/i18n/messages/ru/settings.ts
@@ -65,6 +65,7 @@ export const settingsMessages = {
  "settings.nav.appearance": "Appearance",
  "settings.nav.notifications": "Notifications",
  "settings.nav.remote": "Remote Access",
  "settings.nav.speech": "Speech",
  "settings.nav.opencode": "OpenCode",
  "settings.scope.device": "This device",
  "settings.scope.server": "Server setting",
@@ -137,6 +138,34 @@ export const settingsMessages = {
  "settings.behavior.usageMetrics.subtitle": "Показывать или скрывать статистику токенов и стоимости в сообщениях ассистента.",
  "settings.behavior.autoCleanup.title": "Автоочистка пустых сессий",
  "settings.behavior.autoCleanup.subtitle": "Автоматически очищать пустые сессии при создании новых.",
  "settings.behavior.promptVoiceInput.title": "Prompt voice input",
  "settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
  "settings.behavior.promptSubmit.title": "Enter для отправки",
  "settings.behavior.promptSubmit.subtitle": "Enter отправляет; Cmd/Ctrl+Enter вставляет новую строку.",
  "settings.speech.title": "Speech",
  "settings.speech.subtitle": "Configure speech-to-text now and text-to-speech groundwork for later features.",
  "settings.speech.provider.title": "Provider",
  "settings.speech.provider.subtitle": "Speech requests use the server-side speech adapter.",
  "settings.speech.provider.openaiCompatible": "OpenAI-compatible",
  "settings.speech.status.loading": "Checking configuration...",
  "settings.speech.status.configured": "Configured",
  "settings.speech.status.missing": "Missing API key",
  "settings.speech.status.error": "Speech service unavailable",
  "settings.speech.apiKey.title": "API key",
  "settings.speech.apiKey.subtitle": "Used for CodeNomad-managed speech requests.",
  "settings.speech.baseUrl.title": "Base URL",
  "settings.speech.baseUrl.subtitle": "Optional override for OpenAI-compatible speech endpoints.",
  "settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
  "settings.speech.sttModel.title": "Transcription model",
  "settings.speech.sttModel.subtitle": "Model used for prompt speech-to-text requests.",
  "settings.speech.ttsModel.title": "Speech model",
  "settings.speech.ttsModel.subtitle": "Default text-to-speech model reserved for future playback features.",
  "settings.speech.ttsVoice.title": "Default voice",
  "settings.speech.ttsVoice.subtitle": "Default text-to-speech voice reserved for future playback features.",
  "settings.speech.help": "Prompt voice input only appears when speech transcription is configured and supported by this browser.",
  "settings.speech.save.action": "Save",
  "settings.speech.save.saving": "Saving...",
  "settings.speech.save.saved": "Saved",
  "settings.speech.save.unsaved": "Unsaved changes",
  "settings.speech.save.error": "Save failed",
 } as const
--- a/packages/ui/src/lib/i18n/messages/zh-Hans/messaging.ts
+++ b/packages/ui/src/lib/i18n/messages/zh-Hans/messaging.ts
@@ -140,4 +140,11 @@ export const messagingMessages = {
  "promptInput.send.ariaLabel": "发送消息",
  "promptInput.send.errorFallback": "发送消息失败",
  "promptInput.send.errorTitle": "发送失败",
  "promptInput.voiceInput.start.title": "Start voice input",
  "promptInput.voiceInput.stop.title": "Stop recording and transcribe",
  "promptInput.voiceInput.transcribing.title": "Transcribing audio",
  "promptInput.voiceInput.error.title": "Voice input failed",
  "promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
  "promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
  "promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
 } as const
--- a/packages/ui/src/lib/i18n/messages/zh-Hans/settings.ts
+++ b/packages/ui/src/lib/i18n/messages/zh-Hans/settings.ts
@@ -65,6 +65,7 @@ export const settingsMessages = {
  "settings.nav.appearance": "Appearance",
  "settings.nav.notifications": "Notifications",
  "settings.nav.remote": "Remote Access",
  "settings.nav.speech": "Speech",
  "settings.nav.opencode": "OpenCode",
  "settings.scope.device": "This device",
  "settings.scope.server": "Server setting",
@@ -137,6 +138,34 @@ export const settingsMessages = {
  "settings.behavior.usageMetrics.subtitle": "显示或隐藏助手消息的令牌与成本统计。",
  "settings.behavior.autoCleanup.title": "自动清理空会话",
  "settings.behavior.autoCleanup.subtitle": "创建新会话时自动清理空会话。",
  "settings.behavior.promptVoiceInput.title": "Prompt voice input",
  "settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
  "settings.behavior.promptSubmit.title": "回车发送",
  "settings.behavior.promptSubmit.subtitle": "使用回车发送；Cmd/Ctrl+回车插入新行。",
  "settings.speech.title": "Speech",
  "settings.speech.subtitle": "Configure speech-to-text now and text-to-speech groundwork for later features.",
  "settings.speech.provider.title": "Provider",
  "settings.speech.provider.subtitle": "Speech requests use the server-side speech adapter.",
  "settings.speech.provider.openaiCompatible": "OpenAI-compatible",
  "settings.speech.status.loading": "Checking configuration...",
  "settings.speech.status.configured": "Configured",
  "settings.speech.status.missing": "Missing API key",
  "settings.speech.status.error": "Speech service unavailable",
  "settings.speech.apiKey.title": "API key",
  "settings.speech.apiKey.subtitle": "Used for CodeNomad-managed speech requests.",
  "settings.speech.baseUrl.title": "Base URL",
  "settings.speech.baseUrl.subtitle": "Optional override for OpenAI-compatible speech endpoints.",
  "settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
  "settings.speech.sttModel.title": "Transcription model",
  "settings.speech.sttModel.subtitle": "Model used for prompt speech-to-text requests.",
  "settings.speech.ttsModel.title": "Speech model",
  "settings.speech.ttsModel.subtitle": "Default text-to-speech model reserved for future playback features.",
  "settings.speech.ttsVoice.title": "Default voice",
  "settings.speech.ttsVoice.subtitle": "Default text-to-speech voice reserved for future playback features.",
  "settings.speech.help": "Prompt voice input only appears when speech transcription is configured and supported by this browser.",
  "settings.speech.save.action": "Save",
  "settings.speech.save.saving": "Saving...",
  "settings.speech.save.saved": "Saved",
  "settings.speech.save.unsaved": "Unsaved changes",
  "settings.speech.save.error": "Save failed",
 } as const
--- a/packages/ui/src/lib/settings/behavior-registry.ts
+++ b/packages/ui/src/lib/settings/behavior-registry.ts
@@ -42,6 +42,7 @@ export type BehaviorRegistryActions = {
  toggleUsageMetrics: () => void
  toggleAutoCleanupBlankSessions: () => void
  togglePromptSubmitOnEnter: () => void
  toggleShowPromptVoiceInput: () => void
  setDiffViewMode: (mode: "split" | "unified") => void
  setToolOutputExpansion: (mode: ExpansionPreference) => void
  setDiagnosticsExpansion: (mode: ExpansionPreference) => void
@@ -248,6 +249,24 @@ export function getBehaviorSettings(actions: BehaviorRegistryActions): BehaviorS
        )
      },
    },
    {
      kind: "toggle",
      id: "behavior.promptVoiceInput",
      titleKey: "settings.behavior.promptVoiceInput.title",
      subtitleKey: "settings.behavior.promptVoiceInput.subtitle",
      get: (p) => Boolean(p.showPromptVoiceInput ?? true),
      set: (next) => {
        if (updatePreferences) {
          updatePreferences({ showPromptVoiceInput: next })
          return
        }
        setBooleanByToggle(
          () => Boolean(prefs().showPromptVoiceInput ?? true),
          actions.toggleShowPromptVoiceInput,
          next,
        )
      },
    },
    {
      kind: "toggle",
      id: "behavior.promptSubmitOnEnter",
--- a/packages/ui/src/stores/preferences.tsx
+++ b/packages/ui/src/stores/preferences.tsx
@@ -7,6 +7,7 @@ import {
  updateInstanceConfig as updateInstanceData,
 } from "./instance-config"
 import { getLogger } from "../lib/logger"
 import { loadSpeechCapabilities, resetSpeechCapabilities } from "./speech"
 const log = getLogger("actions")
@@ -27,6 +28,16 @@ export type DiffViewMode = "split" | "unified"
 export type ExpansionPreference = "expanded" | "collapsed"
 export type ToolInputsVisibilityPreference = "hidden" | "collapsed" | "expanded"
 export type ListeningMode = "local" | "all"
 export type SpeechProviderPreference = "openai-compatible"
 export interface SpeechSettings {
  provider: SpeechProviderPreference
  apiKey?: string
  baseUrl?: string
  sttModel: string
  ttsModel: string
  ttsVoice: string
 }
 export interface UiSettings {
  showThinkingBlocks: boolean
@@ -34,6 +45,7 @@ export interface UiSettings {
  thinkingBlocksExpansion: ExpansionPreference
  showTimelineTools: boolean
  promptSubmitOnEnter: boolean
  showPromptVoiceInput: boolean
  locale?: string
  diffViewMode: DiffViewMode
  toolOutputExpansion: ExpansionPreference
@@ -75,6 +87,7 @@ interface ServerConfigBucket {
  listeningMode?: ListeningMode
  environmentVariables?: Record<string, string>
  opencodeBinary?: string
  speech?: Partial<SpeechSettings>
 }
 interface UiStateBucket {
@@ -107,6 +120,7 @@ const defaultUiSettings: UiSettings = {
  thinkingBlocksExpansion: "expanded",
  showTimelineTools: true,
  promptSubmitOnEnter: false,
  showPromptVoiceInput: true,
  diffViewMode: "split",
  toolOutputExpansion: "expanded",
  diagnosticsExpansion: "expanded",
@@ -120,6 +134,13 @@ const defaultUiSettings: UiSettings = {
  notifyOnIdle: true,
 }
 const defaultSpeechSettings: SpeechSettings = {
  provider: "openai-compatible",
  sttModel: "gpt-4o-mini-transcribe",
  ttsModel: "gpt-4o-mini-tts",
  ttsVoice: "alloy",
 }
 function normalizeUiSettings(input?: Partial<UiSettings> | null): UiSettings {
  const sanitized = input ?? {}
  return {
@@ -129,6 +150,7 @@ function normalizeUiSettings(input?: Partial<UiSettings> | null): UiSettings {
    thinkingBlocksExpansion: sanitized.thinkingBlocksExpansion ?? defaultUiSettings.thinkingBlocksExpansion,
    showTimelineTools: sanitized.showTimelineTools ?? defaultUiSettings.showTimelineTools,
    promptSubmitOnEnter: sanitized.promptSubmitOnEnter ?? defaultUiSettings.promptSubmitOnEnter,
    showPromptVoiceInput: sanitized.showPromptVoiceInput ?? defaultUiSettings.showPromptVoiceInput,
    locale: sanitized.locale ?? defaultUiSettings.locale,
    diffViewMode: sanitized.diffViewMode ?? defaultUiSettings.diffViewMode,
    toolOutputExpansion: sanitized.toolOutputExpansion ?? defaultUiSettings.toolOutputExpansion,
@@ -156,6 +178,27 @@ function normalizeRecord(value: unknown): Record<string, string> {
  return out
 }
 function normalizeSpeechSettings(input?: Partial<SpeechSettings> | null): SpeechSettings {
  const sanitized = input ?? {}
  return {
    provider: sanitized.provider === "openai-compatible" ? sanitized.provider : defaultSpeechSettings.provider,
    apiKey: typeof sanitized.apiKey === "string" && sanitized.apiKey.trim() ? sanitized.apiKey.trim() : undefined,
    baseUrl: typeof sanitized.baseUrl === "string" && sanitized.baseUrl.trim() ? sanitized.baseUrl.trim() : undefined,
    sttModel:
      typeof sanitized.sttModel === "string" && sanitized.sttModel.trim()
        ? sanitized.sttModel.trim()
        : defaultSpeechSettings.sttModel,
    ttsModel:
      typeof sanitized.ttsModel === "string" && sanitized.ttsModel.trim()
        ? sanitized.ttsModel.trim()
        : defaultSpeechSettings.ttsModel,
    ttsVoice:
      typeof sanitized.ttsVoice === "string" && sanitized.ttsVoice.trim()
        ? sanitized.ttsVoice.trim()
        : defaultSpeechSettings.ttsVoice,
  }
 }
 function cloneArray<T>(value: unknown, mapper: (item: any) => T | null): T[] {
  if (!Array.isArray(value)) return []
  const out: T[] = []
@@ -206,12 +249,15 @@ function normalizeUiState(input?: UiStateBucket | null): NormalizedUiState {
  }
 }
-function normalizeServerConfig(input?: ServerConfigBucket | null): Required<Pick<ServerConfigBucket, "listeningMode" | "environmentVariables" | "opencodeBinary">> {
+function normalizeServerConfig(
  input?: ServerConfigBucket | null,
 ): Required<Pick<ServerConfigBucket, "listeningMode" | "environmentVariables" | "opencodeBinary">> & { speech: SpeechSettings } {
  const source = input ?? {}
  const listeningMode = source.listeningMode === "all" ? "all" : "local"
  const opencodeBinary = typeof source.opencodeBinary === "string" && source.opencodeBinary.trim() ? source.opencodeBinary : "opencode"
  const environmentVariables = normalizeRecord(source.environmentVariables)
-  return { listeningMode, opencodeBinary, environmentVariables }
+  const speech = normalizeSpeechSettings(source.speech)
  return { listeningMode, opencodeBinary, environmentVariables, speech }
 }
 function getModelKey(model: { providerId: string; modelId: string }): string {
@@ -342,6 +388,16 @@ function updateLastUsedBinary(path: string): void {
  void patchStateOwner("ui", { opencodeBinaries: nextList }).catch((error) => log.error("Failed to update binary list", error))
 }
 async function updateSpeechSettings(updates: Partial<SpeechSettings>): Promise<void> {
  const next = normalizeSpeechSettings({ ...serverSettings().speech, ...updates })
  try {
    await patchConfigOwner("server", { speech: next })
  } catch (error) {
    log.error("Failed to update speech settings", error)
    throw error
  }
 }
 function addOpenCodeBinary(path: string, version?: string): void {
  const nextList = buildBinaryList(path, version, opencodeBinaries())
  void patchStateOwner("ui", { opencodeBinaries: nextList }).catch((error) => log.error("Failed to add binary", error))
@@ -476,6 +532,10 @@ function togglePromptSubmitOnEnter(): void {
  updateUiSettings({ promptSubmitOnEnter: !preferences().promptSubmitOnEnter })
 }
 function toggleShowPromptVoiceInput(): void {
  updateUiSettings({ showPromptVoiceInput: !preferences().showPromptVoiceInput })
 }
 function toggleAutoCleanupBlankSessions(): void {
  const nextValue = !preferences().autoCleanupBlankSessions
  log.info("toggle auto cleanup", { value: nextValue })
@@ -521,6 +581,7 @@ interface ConfigContextValue {
  addEnvironmentVariable: typeof addEnvironmentVariable
  removeEnvironmentVariable: typeof removeEnvironmentVariable
  updateLastUsedBinary: typeof updateLastUsedBinary
  updateSpeechSettings: typeof updateSpeechSettings
  // ui-owned state
  recentFolders: typeof recentFolders
@@ -544,6 +605,7 @@ interface ConfigContextValue {
  toggleUsageMetrics: typeof toggleUsageMetrics
  toggleAutoCleanupBlankSessions: typeof toggleAutoCleanupBlankSessions
  togglePromptSubmitOnEnter: typeof togglePromptSubmitOnEnter
  toggleShowPromptVoiceInput: typeof toggleShowPromptVoiceInput
  setDiffViewMode: typeof setDiffViewMode
  setToolOutputExpansion: typeof setToolOutputExpansion
  setDiagnosticsExpansion: typeof setDiagnosticsExpansion
@@ -569,6 +631,7 @@ const configContextValue: ConfigContextValue = {
  addEnvironmentVariable,
  removeEnvironmentVariable,
  updateLastUsedBinary,
  updateSpeechSettings,
  recentFolders,
  opencodeBinaries,
  uiState,
@@ -588,6 +651,7 @@ const configContextValue: ConfigContextValue = {
  toggleUsageMetrics,
  toggleAutoCleanupBlankSessions,
  togglePromptSubmitOnEnter,
  toggleShowPromptVoiceInput,
  setDiffViewMode,
  setToolOutputExpansion,
  setDiagnosticsExpansion,
@@ -610,6 +674,8 @@ export const ConfigProvider: ParentComponent = (props) => {
    const unsubServer = storage.onConfigOwnerChanged("server", (bucket) => {
      setServerConfigBucket(bucket as any)
      setIsLoaded(true)
      resetSpeechCapabilities()
      void loadSpeechCapabilities(true)
    })
    const unsubStateUi = storage.onStateOwnerChanged("ui", (bucket) => {
      setUiStateBucket(bucket as any)
@@ -648,6 +714,7 @@ export {
  addEnvironmentVariable,
  removeEnvironmentVariable,
  updateLastUsedBinary,
  updateSpeechSettings,
  addRecentFolder,
  removeRecentFolder,
  addOpenCodeBinary,
@@ -664,6 +731,7 @@ export {
  toggleUsageMetrics,
  toggleAutoCleanupBlankSessions,
  togglePromptSubmitOnEnter,
  toggleShowPromptVoiceInput,
  setDiffViewMode,
  setToolOutputExpansion,
  setDiagnosticsExpansion,
--- a/packages/ui/src/stores/settings-screen.ts
+++ b/packages/ui/src/stores/settings-screen.ts
@@ -1,6 +1,6 @@
 import { createSignal } from "solid-js"
-export type SettingsSectionId = "appearance" | "notifications" | "remote" | "opencode"
+export type SettingsSectionId = "appearance" | "notifications" | "remote" | "speech" | "opencode"
 const [settingsOpen, setSettingsOpen] = createSignal(false)
 const [activeSettingsSection, setActiveSettingsSection] = createSignal<SettingsSectionId>("appearance")
--- a/packages/ui/src/stores/speech.ts
+++ b/packages/ui/src/stores/speech.ts
@@ -0,0 +1,46 @@
 import { createSignal } from "solid-js"
 import type { SpeechCapabilitiesResponse } from "../../../server/src/api-types"
 import { serverApi } from "../lib/api-client"
 import { getLogger } from "../lib/logger"
 const log = getLogger("api")
 const [speechCapabilities, setSpeechCapabilities] = createSignal<SpeechCapabilitiesResponse | null>(null)
 const [speechCapabilitiesLoading, setSpeechCapabilitiesLoading] = createSignal(false)
 const [speechCapabilitiesError, setSpeechCapabilitiesError] = createSignal<string | null>(null)
 let speechCapabilitiesPromise: Promise<SpeechCapabilitiesResponse | null> | null = null
 async function loadSpeechCapabilities(force = false): Promise<SpeechCapabilitiesResponse | null> {
  if (!force && speechCapabilities()) return speechCapabilities()
  if (speechCapabilitiesPromise) return speechCapabilitiesPromise
  setSpeechCapabilitiesLoading(true)
  setSpeechCapabilitiesError(null)
  speechCapabilitiesPromise = serverApi
    .fetchSpeechCapabilities()
    .then((result) => {
      setSpeechCapabilities(result)
      setSpeechCapabilitiesError(null)
      return result
    })
    .catch((error) => {
      log.error("Failed to load speech capabilities", error)
      setSpeechCapabilities(null)
      setSpeechCapabilitiesError(error instanceof Error ? error.message : String(error))
      return null
    })
    .finally(() => {
      setSpeechCapabilitiesLoading(false)
      speechCapabilitiesPromise = null
    })
  return speechCapabilitiesPromise
 }
 function resetSpeechCapabilities(): void {
  setSpeechCapabilities(null)
  setSpeechCapabilitiesError(null)
 }
 export { speechCapabilities, speechCapabilitiesLoading, speechCapabilitiesError, loadSpeechCapabilities, resetSpeechCapabilities }
--- a/packages/ui/src/styles/messaging/prompt-input.css
+++ b/packages/ui/src/styles/messaging/prompt-input.css
@@ -170,6 +170,41 @@
  color: var(--button-danger-text, var(--text-inverted, #ffffff));
 }
 .prompt-voice-button {
  @apply h-10 rounded-md border-none cursor-pointer flex items-center justify-center transition-all flex-shrink-0;
  min-width: 2.5rem;
  background-color: color-mix(in oklab, var(--surface-secondary) 82%, var(--surface-base));
  color: var(--text-secondary);
 }
 .prompt-voice-button:hover:not(:disabled) {
  color: var(--text-primary);
  background-color: color-mix(in oklab, var(--accent-primary) 12%, var(--surface-secondary));
  @apply scale-105;
 }
 .prompt-voice-button:active:not(:disabled) {
  @apply scale-95;
 }
 .prompt-voice-button.is-recording {
  min-width: 3.5rem;
  background-color: color-mix(in oklab, var(--button-danger-bg, rgba(239, 68, 68, 0.85)) 88%, white 12%);
  color: var(--button-danger-text, var(--text-inverted, #ffffff));
 }
 .prompt-voice-button:disabled {
  @apply opacity-50 cursor-not-allowed;
 }
 .prompt-voice-timer {
  font-size: 0.68rem;
  font-variant-numeric: tabular-nums;
  font-weight: 600;
  line-height: 1;
  color: currentColor;
 }
 .stop-button:hover:not(:disabled) {
  background-color: var(--button-danger-hover-bg, rgba(239, 68, 68, 0.9));
  @apply opacity-95 scale-105;
Author	SHA1	Message	Date
Shantur Rathore	bf07904789	feat(speech): make prompt input push to talk	2026-03-24 22:42:27 +00:00
Shantur Rathore	4e576829b7	Revert "feat(speech): add realtime prompt dictation support" This reverts commit `f9b5e2b529`.	2026-03-24 20:52:04 +00:00
Shantur Rathore	f9b5e2b529	feat(speech): add realtime prompt dictation support Add server-backed realtime transcription for prompt voice input and expose speech settings to choose realtime mode and models.	2026-03-19 11:32:45 +00:00
Shantur Rathore	cc2f6976f6	fix(speech): preserve edits while saving settings	2026-03-13 08:34:34 +00:00
Shantur Rathore	0ed19aeefb	feat(speech): add prompt voice input groundwork	2026-03-12 22:04:57 +00:00