feat(speech): make prompt input push to talk

Revert "feat(speech): add realtime prompt dictation support"
This reverts commit f9b5e2b529.
2026-03-24 22:42:27 +00:00 · 2026-03-24 20:52:04 +00:00 · 2026-03-19 11:32:45 +00:00 · 2026-03-13 08:34:34 +00:00 · 2026-03-12 22:04:57 +00:00
67 changed files with 3007 additions and 1536 deletions
--- a/.github/workflows/pr-build.yml
+++ b/.github/workflows/pr-build.yml
@@ -1,52 +0,0 @@
-name: PR Build Validation
-
-on:
-  pull_request:
-    types:
-      - opened
-      - synchronize
-      - reopened
-
-permissions:
-  contents: read
-
-concurrency:
-  group: pr-build-${{ github.event.pull_request.number }}
-  cancel-in-progress: true
-
-jobs:
-  authorize:
-    runs-on: ubuntu-latest
-    outputs:
-      allowed: ${{ steps.auth.outputs.allowed }}
-    env:
-      ALLOWED_ACTORS: ${{ vars.ALLOWED_NON_DEV_PR_ACTORS }}
-      ACTOR: ${{ github.actor }}
-      BASE_REF: ${{ github.event.pull_request.base.ref }}
-    steps:
-      - name: Check PR authorization
-        id: auth
-        shell: bash
-        run: |
-          set -euo pipefail
-          if [ "$BASE_REF" = "dev" ]; then
-            echo "allowed=true" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-
-          normalized=",${ALLOWED_ACTORS},"
-          if [[ "$normalized" == *",${ACTOR},"* ]]; then
-            echo "allowed=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "allowed=false" >> "$GITHUB_OUTPUT"
-            echo "Skipping builds for unauthorized PR targeting $BASE_REF" >&2
-          fi
-
-  build:
-    needs: authorize
-    if: ${{ needs.authorize.outputs.allowed == 'true' }}
-    uses: ./.github/workflows/build-and-upload.yml
-    with:
-      ref: ${{ github.event.pull_request.head.sha }}
-      upload: false
-      set_versions: false
--- a/.github/workflows/restrict-non-dev-prs.yml
+++ b/.github/workflows/restrict-non-dev-prs.yml
@@ -1,54 +0,0 @@
-name: Restrict Non-Dev PRs
-
-on:
-  pull_request_target:
-    types:
-      - opened
-      - reopened
-      - synchronize
-
-permissions:
-  contents: read
-  pull-requests: write
-
-jobs:
-  restrict-non-dev-prs:
-    if: ${{ github.event.pull_request.base.ref != 'dev' }}
-    runs-on: ubuntu-latest
-    env:
-      ALLOWED_ACTORS: ${{ vars.ALLOWED_NON_DEV_PR_ACTORS }}
-      ACTOR: ${{ github.actor }}
-      PR_NUMBER: ${{ github.event.pull_request.number }}
-      BASE_REF: ${{ github.event.pull_request.base.ref }}
-    steps:
-      - name: Check allowed actor
-        id: auth
-        shell: bash
-        run: |
-          set -euo pipefail
-          normalized=",${ALLOWED_ACTORS},"
-          if [[ "$normalized" == *",${ACTOR},"* ]]; then
-            echo "authorized=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "authorized=false" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Comment on unauthorized PR
-        if: ${{ steps.auth.outputs.authorized != 'true' }}
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          gh pr comment "$PR_NUMBER" --body "Thanks for the contribution. PRs need to target \`dev\` branch. Please retarget this PR to the dev branch"
-
-      - name: Close unauthorized PR
-        if: ${{ steps.auth.outputs.authorized != 'true' }}
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          gh pr close "$PR_NUMBER"
-
-      - name: Fail unauthorized PR
-        if: ${{ steps.auth.outputs.authorized != 'true' }}
-        run: |
-          echo "Actor $ACTOR is not allowed to open PRs targeting $BASE_REF" >&2
-          exit 1
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "codenomad-workspace",
-  "version": "0.12.3",
+  "version": "0.12.2",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "codenomad-workspace",
-      "version": "0.12.3",
+      "version": "0.12.2",
      "license": "MIT",
      "dependencies": {
        "7zip-bin": "^5.2.0",
@@ -3253,9 +3253,9 @@
      }
    },
    "node_modules/@tauri-apps/api": {
-      "version": "2.10.1",
-      "resolved": "https://registry.npmjs.org/@tauri-apps/api/-/api-2.10.1.tgz",
-      "integrity": "sha512-hKL/jWf293UDSUN09rR69hrToyIXBb8CjGaWC7gfinvnQrBVvnLr08FeFi38gxtugAVyVcTa5/FD/Xnkb1siBw==",
+      "version": "2.9.1",
+      "resolved": "https://registry.npmjs.org/@tauri-apps/api/-/api-2.9.1.tgz",
+      "integrity": "sha512-IGlhP6EivjXHepbBic618GOmiWe4URJiIeZFlB7x3czM0yDHHYviH1Xvoiv4FefdkQtn6v7TuwWCRfOGdnVUGw==",
      "license": "Apache-2.0 OR MIT",
      "funding": {
        "type": "opencollective",
@@ -3322,15 +3322,6 @@
        "node": ">= 10"
      }
    },
-    "node_modules/@tauri-apps/plugin-dialog": {
-      "version": "2.6.0",
-      "resolved": "https://registry.npmjs.org/@tauri-apps/plugin-dialog/-/plugin-dialog-2.6.0.tgz",
-      "integrity": "sha512-q4Uq3eY87TdcYzXACiYSPhmpBA76shgmQswGkSVio4C82Sz2W4iehe9TnKYwbq7weHiL88Yw19XZm7v28+Micg==",
-      "license": "MIT OR Apache-2.0",
-      "dependencies": {
-        "@tauri-apps/api": "^2.8.0"
-      }
-    },
    "node_modules/@tauri-apps/plugin-notification": {
      "version": "2.3.3",
      "resolved": "https://registry.npmjs.org/@tauri-apps/plugin-notification/-/plugin-notification-2.3.3.tgz",
@@ -8240,6 +8231,27 @@
        "regex-recursion": "^6.0.2"
      }
    },
+    "node_modules/openai": {
+      "version": "6.27.0",
+      "resolved": "https://registry.npmjs.org/openai/-/openai-6.27.0.tgz",
+      "integrity": "sha512-osTKySlrdYrLYTt0zjhY8yp0JUBmWDCN+Q+QxsV4xMQnnoVFpylgKGgxwN8sSdTNw0G4y+WUXs4eCMWpyDNWZQ==",
+      "license": "Apache-2.0",
+      "bin": {
+        "openai": "bin/cli"
+      },
+      "peerDependencies": {
+        "ws": "^8.18.0",
+        "zod": "^3.25 || ^4.0"
+      },
+      "peerDependenciesMeta": {
+        "ws": {
+          "optional": true
+        },
+        "zod": {
+          "optional": true
+        }
+      }
+    },
    "node_modules/own-keys": {
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/own-keys/-/own-keys-1.0.1.tgz",
@@ -10244,6 +10256,14 @@
      "dev": true,
      "license": "ISC"
    },
+    "node_modules/tauri-plugin-keepawake-api": {
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/tauri-plugin-keepawake-api/-/tauri-plugin-keepawake-api-0.1.0.tgz",
+      "integrity": "sha512-XPUl66zUYiB7kCRxsTdmCoNjFM/++NWCJ4kdTo2NUOgBUa8UVYfayDWnnTzGIQbhT7qNAHs+jgKSjhqSKs/QHA==",
+      "dependencies": {
+        "@tauri-apps/api": ">=2.0.0-beta.6"
+      }
+    },
    "node_modules/temp-dir": {
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/temp-dir/-/temp-dir-2.0.0.tgz",
@@ -11989,6 +12009,7 @@
    "node_modules/zod": {
      "version": "3.25.76",
      "license": "MIT",
+      "peer": true,
      "funding": {
        "url": "https://github.com/sponsors/colinhacks"
      }
@@ -12003,7 +12024,7 @@
    },
    "packages/electron-app": {
      "name": "@neuralnomads/codenomad-electron-app",
-      "version": "0.12.3",
+      "version": "0.12.2",
      "license": "MIT",
      "dependencies": {
        "@codenomad/ui": "file:../ui",
@@ -12040,7 +12061,7 @@
    },
    "packages/server": {
      "name": "@neuralnomads/codenomad",
-      "version": "0.12.3",
+      "version": "0.12.2",
      "license": "MIT",
      "dependencies": {
        "@fastify/cors": "^8.5.0",
@@ -12050,6 +12071,7 @@
        "fastify": "^4.28.1",
        "fuzzysort": "^2.0.4",
        "node-forge": "^1.3.3",
+        "openai": "^6.27.0",
        "pino": "^9.4.0",
        "undici": "^6.19.8",
        "yaml": "^2.4.2",
@@ -12081,7 +12103,7 @@
    },
    "packages/tauri-app": {
      "name": "@codenomad/tauri-app",
-      "version": "0.12.3",
+      "version": "0.12.2",
      "license": "MIT",
      "devDependencies": {
        "@tauri-apps/cli": "^2.9.4"
@@ -12089,7 +12111,7 @@
    },
    "packages/ui": {
      "name": "@codenomad/ui",
-      "version": "0.12.3",
+      "version": "0.12.2",
      "license": "MIT",
      "dependencies": {
        "@git-diff-view/solid": "^0.0.8",
@@ -12099,8 +12121,6 @@
        "@suid/icons-material": "^0.9.0",
        "@suid/material": "^0.19.0",
        "@suid/system": "^0.14.0",
-        "@tauri-apps/api": "^2.10.1",
-        "@tauri-apps/plugin-dialog": "^2.6.0",
        "@tauri-apps/plugin-notification": "^2.3.3",
        "@tauri-apps/plugin-opener": "^2.5.3",
        "ansi-sequence-parser": "^1.1.3",
@@ -12113,6 +12133,7 @@
        "shiki": "^3.13.0",
        "solid-js": "^1.8.0",
        "solid-toast": "^0.5.0",
+        "tauri-plugin-keepawake-api": "^0.1.0",
        "yaml": "^2.4.2"
      },
      "devDependencies": {
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "codenomad-workspace",
-  "version": "0.12.3",
+  "version": "0.12.2",
  "private": true,
  "description": "CodeNomad monorepo workspace",
  "license": "MIT",
--- a/packages/cloudflare/release-config.json
+++ b/packages/cloudflare/release-config.json
@@ -1,4 +1,4 @@
 {
-  "minServerVersion": "0.12.3",
+  "minServerVersion": "0.11.4",
  "latestServerUrl": "https://github.com/NeuralNomadsAI/CodeNomad/releases/latest"
 }
--- a/packages/electron-app/package.json
+++ b/packages/electron-app/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@neuralnomads/codenomad-electron-app",
-  "version": "0.12.3",
+  "version": "0.12.2",
  "description": "CodeNomad - AI coding assistant",
  "license": "MIT",
  "author": {
--- a/packages/opencode-config/package.json
+++ b/packages/opencode-config/package.json
@@ -4,6 +4,6 @@
  "private": true,
  "license": "MIT",
  "dependencies": {
-    "@opencode-ai/plugin": "1.2.25"
+    "@opencode-ai/plugin": "1.2.14"
  }
 }
--- a/packages/server/package-lock.json
+++ b/packages/server/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@neuralnomads/codenomad",
-  "version": "0.12.3",
+  "version": "0.12.2",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@neuralnomads/codenomad",
-      "version": "0.12.3",
+      "version": "0.12.2",
      "dependencies": {
        "@fastify/cors": "^8.5.0",
        "@fastify/reply-from": "^9.8.0",
--- a/packages/server/package.json
+++ b/packages/server/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@neuralnomads/codenomad",
-  "version": "0.12.3",
+  "version": "0.12.2",
  "description": "CodeNomad Server",
  "license": "MIT",
  "author": {
@@ -32,6 +32,7 @@
    "fastify": "^4.28.1",
    "fuzzysort": "^2.0.4",
    "node-forge": "^1.3.3",
+    "openai": "^6.27.0",
    "pino": "^9.4.0",
    "undici": "^6.19.8",
    "yaml": "^2.4.2",
--- a/packages/server/src/api-types.ts
+++ b/packages/server/src/api-types.ts
@@ -207,6 +207,36 @@ export interface BinaryValidationResult {
  error?: string
 }

+export interface SpeechSegment {
+  startMs: number
+  endMs: number
+  text: string
+}
+
+export interface SpeechCapabilitiesResponse {
+  available: boolean
+  configured: boolean
+  provider: string
+  supportsStt: boolean
+  supportsTts: boolean
+  baseUrl?: string
+  sttModel: string
+  ttsModel: string
+  ttsVoice: string
+}
+
+export interface SpeechTranscriptionResponse {
+  text: string
+  language?: string
+  durationMs?: number
+  segments?: SpeechSegment[]
+}
+
+export interface SpeechSynthesisResponse {
+  audioBase64: string
+  mimeType: string
+}
+
 export type WorkspaceEventType =
  | "workspace.created"
  | "workspace.started"
--- a/packages/server/src/index.ts
+++ b/packages/server/src/index.ts
@@ -23,6 +23,7 @@ import { AuthManager, BOOTSTRAP_TOKEN_STDOUT_PREFIX, DEFAULT_AUTH_USERNAME } fro
 import { resolveHttpsOptions } from "./server/tls"
 import { resolveNetworkAddresses } from "./server/network-addresses"
 import { startDevReleaseMonitor } from "./releases/dev-release-monitor"
+import { SpeechService } from "./speech/service"

 const require = createRequire(import.meta.url)

@@ -304,6 +305,7 @@ async function main() {
  })
  const fileSystemBrowser = new FileSystemBrowser({ rootDir: options.rootDir, unrestricted: options.unrestrictedRoot })
  const instanceStore = new InstanceStore(configLocation.instancesDir)
+  const speechService = new SpeechService(settings, logger.child({ component: "speech" }))
  const instanceEventBridge = new InstanceEventBridge({
    workspaceManager,
    eventBus,
@@ -388,6 +390,7 @@ async function main() {
        eventBus,
        serverMeta,
        instanceStore,
+        speechService,
        authManager,
        uiStaticDir: uiResolution.uiStaticDir ?? DEFAULT_UI_STATIC_DIR,
        uiDevServerUrl: uiResolution.uiDevServerUrl,
@@ -408,6 +411,7 @@ async function main() {
        eventBus,
        serverMeta,
        instanceStore,
+        speechService,
        authManager,
        uiStaticDir: uiResolution.uiStaticDir ?? DEFAULT_UI_STATIC_DIR,
        uiDevServerUrl: undefined,
--- a/packages/server/src/server/http-server.ts
+++ b/packages/server/src/server/http-server.ts
@@ -21,12 +21,14 @@ import { registerStorageRoutes } from "./routes/storage"
 import { registerPluginRoutes } from "./routes/plugin"
 import { registerBackgroundProcessRoutes } from "./routes/background-processes"
 import { registerWorktreeRoutes } from "./routes/worktrees"
+import { registerSpeechRoutes } from "./routes/speech"
 import { ServerMeta } from "../api-types"
 import { InstanceStore } from "../storage/instance-store"
 import { BackgroundProcessManager } from "../background-processes/manager"
 import type { AuthManager } from "../auth/manager"
 import { registerAuthRoutes } from "./routes/auth"
 import { sendUnauthorized, wantsHtml } from "../auth/http-auth"
+import type { SpeechService } from "../speech/service"

 interface HttpServerDeps {
  bindHost: string
@@ -41,6 +43,7 @@ interface HttpServerDeps {
  eventBus: EventBus
  serverMeta: ServerMeta
  instanceStore: InstanceStore
+  speechService: SpeechService
  authManager: AuthManager
  uiStaticDir: string
  uiDevServerUrl?: string
@@ -252,6 +255,7 @@ export function createHttpServer(deps: HttpServerDeps) {
    eventBus: deps.eventBus,
    workspaceManager: deps.workspaceManager,
  })
+  registerSpeechRoutes(app, { speechService: deps.speechService })
  registerPluginRoutes(app, { workspaceManager: deps.workspaceManager, eventBus: deps.eventBus, logger: proxyLogger })
  registerBackgroundProcessRoutes(app, { backgroundProcessManager })
  registerInstanceProxyRoutes(app, { workspaceManager: deps.workspaceManager, logger: proxyLogger })
--- a/packages/server/src/server/routes/speech.ts
+++ b/packages/server/src/server/routes/speech.ts
@@ -0,0 +1,46 @@
+import type { FastifyInstance } from "fastify"
+import { z } from "zod"
+import type { SpeechService } from "../../speech/service"
+
+interface RouteDeps {
+  speechService: SpeechService
+}
+
+const TranscribeBodySchema = z.object({
+  audioBase64: z.string().min(1, "Audio payload is required"),
+  mimeType: z.string().min(1, "Audio MIME type is required"),
+  filename: z.string().optional(),
+  language: z.string().optional(),
+  prompt: z.string().optional(),
+})
+
+const SynthesizeBodySchema = z.object({
+  text: z.string().trim().min(1, "Text is required"),
+  format: z.enum(["mp3", "wav", "opus"]).optional(),
+})
+
+export function registerSpeechRoutes(app: FastifyInstance, deps: RouteDeps) {
+  app.get("/api/speech/capabilities", async () => deps.speechService.getCapabilities())
+
+  app.post("/api/speech/transcribe", async (request, reply) => {
+    try {
+      const body = TranscribeBodySchema.parse(request.body ?? {})
+      return await deps.speechService.transcribe(body)
+    } catch (error) {
+      request.log.error({ err: error }, "Failed to transcribe audio")
+      reply.code(400)
+      return { error: error instanceof Error ? error.message : "Failed to transcribe audio" }
+    }
+  })
+
+  app.post("/api/speech/synthesize", async (request, reply) => {
+    try {
+      const body = SynthesizeBodySchema.parse(request.body ?? {})
+      return await deps.speechService.synthesize(body)
+    } catch (error) {
+      request.log.error({ err: error }, "Failed to synthesize audio")
+      reply.code(400)
+      return { error: error instanceof Error ? error.message : "Failed to synthesize audio" }
+    }
+  })
+}
--- a/packages/server/src/speech/providers/openai-compatible.ts
+++ b/packages/server/src/speech/providers/openai-compatible.ts
@@ -0,0 +1,148 @@
+import OpenAI from "openai"
+import { toFile } from "openai/uploads"
+import type { SpeechSynthesisResponse, SpeechTranscriptionResponse } from "../../api-types"
+import type { Logger } from "../../logger"
+import type { NormalizedSpeechSettings, SynthesizeSpeechInput, TranscribeAudioInput } from "../service"
+
+interface OpenAICompatibleSpeechProviderOptions {
+  settings: NormalizedSpeechSettings
+  logger: Logger
+}
+
+export class OpenAICompatibleSpeechProvider {
+  constructor(private readonly options: OpenAICompatibleSpeechProviderOptions) {}
+
+  getCapabilities() {
+    const { settings } = this.options
+    return {
+      available: true,
+      configured: Boolean(settings.apiKey),
+      provider: settings.provider,
+      supportsStt: true,
+      supportsTts: true,
+      baseUrl: settings.baseUrl,
+      sttModel: settings.sttModel,
+      ttsModel: settings.ttsModel,
+      ttsVoice: settings.ttsVoice,
+    }
+  }
+
+  async transcribe(input: TranscribeAudioInput): Promise<SpeechTranscriptionResponse> {
+    const client = this.createClient()
+    const startedAt = Date.now()
+    const extension = extensionForMime(input.mimeType)
+    const buffer = Buffer.from(input.audioBase64, "base64")
+    const filename = input.filename?.trim() || `prompt-input.${extension}`
+
+    this.options.logger.info(
+      {
+        mimeType: input.mimeType,
+        bytes: buffer.byteLength,
+        language: input.language,
+        model: this.options.settings.sttModel,
+      },
+      "speech.transcribe",
+    )
+
+    const response = await this.requestTranscription(client, buffer, filename, input)
+
+    return {
+      text: typeof response?.text === "string" ? response.text : "",
+      language: typeof response?.language === "string" ? response.language : input.language,
+      durationMs: Number.isFinite(response?.duration) ? Math.round(Number(response.duration) * 1000) : Date.now() - startedAt,
+      segments: Array.isArray(response?.segments)
+        ? response.segments
+            .filter((segment: any) => typeof segment?.text === "string")
+            .map((segment: any) => ({
+              startMs: Math.max(0, Math.round(Number(segment.start ?? 0) * 1000)),
+              endMs: Math.max(0, Math.round(Number(segment.end ?? 0) * 1000)),
+              text: String(segment.text),
+            }))
+        : undefined,
+    }
+  }
+
+  private async requestTranscription(
+    client: OpenAI,
+    buffer: Buffer,
+    filename: string,
+    input: TranscribeAudioInput,
+  ): Promise<any> {
+    const baseRequest = {
+      model: this.options.settings.sttModel,
+      ...(input.language ? { language: input.language } : {}),
+      ...(input.prompt ? { prompt: input.prompt } : {}),
+    }
+
+    try {
+      const file = await toFile(buffer, filename, { type: input.mimeType })
+      return (await client.audio.transcriptions.create({
+        ...baseRequest,
+        file,
+        response_format: "verbose_json" as any,
+      } as any)) as any
+    } catch (error) {
+      this.options.logger.warn({ err: error }, "speech.transcribe verbose_json failed; retrying default format")
+      const retryFile = await toFile(buffer, filename, { type: input.mimeType })
+      return (await client.audio.transcriptions.create({
+        ...baseRequest,
+        file: retryFile,
+      } as any)) as any
+    }
+  }
+
+  async synthesize(input: SynthesizeSpeechInput): Promise<SpeechSynthesisResponse> {
+    const client = this.createClient()
+    const format = input.format ?? "mp3"
+
+    this.options.logger.info(
+      {
+        model: this.options.settings.ttsModel,
+        voice: this.options.settings.ttsVoice,
+        format,
+      },
+      "speech.synthesize",
+    )
+
+    const response = await client.audio.speech.create({
+      model: this.options.settings.ttsModel,
+      voice: this.options.settings.ttsVoice as any,
+      input: input.text,
+      response_format: format as any,
+    })
+
+    const audioBuffer = Buffer.from(await response.arrayBuffer())
+    return {
+      audioBase64: audioBuffer.toString("base64"),
+      mimeType: mimeTypeForFormat(format),
+    }
+  }
+
+  private createClient(): OpenAI {
+    const { settings } = this.options
+    if (!settings.apiKey) {
+      throw new Error("Speech provider is not configured. Add an API key in Speech settings.")
+    }
+
+    return new OpenAI({
+      apiKey: settings.apiKey,
+      baseURL: settings.baseUrl,
+    })
+  }
+}
+
+function extensionForMime(mimeType: string): string {
+  const normalized = mimeType.toLowerCase()
+  if (normalized.includes("webm")) return "webm"
+  if (normalized.includes("ogg")) return "ogg"
+  if (normalized.includes("wav")) return "wav"
+  if (normalized.includes("mpeg") || normalized.includes("mp3")) return "mp3"
+  if (normalized.includes("mp4") || normalized.includes("aac")) return "m4a"
+  return "webm"
+}
+
+function mimeTypeForFormat(format: "mp3" | "wav" | "opus"): string {
+  if (format === "wav") return "audio/wav"
+  if (format === "opus") return "audio/opus"
+  return "audio/mpeg"
+}
--- a/packages/server/src/speech/service.ts
+++ b/packages/server/src/speech/service.ts
@@ -0,0 +1,91 @@
+import { z } from "zod"
+import type { Logger } from "../logger"
+import type { SettingsService } from "../settings/service"
+import type { SpeechCapabilitiesResponse, SpeechSynthesisResponse, SpeechTranscriptionResponse } from "../api-types"
+import { OpenAICompatibleSpeechProvider } from "./providers/openai-compatible"
+
+const ServerSpeechSettingsSchema = z.object({
+  speech: z
+    .object({
+      provider: z.string().optional(),
+      apiKey: z.string().optional(),
+      baseUrl: z.string().optional(),
+      sttModel: z.string().optional(),
+      ttsModel: z.string().optional(),
+      ttsVoice: z.string().optional(),
+    })
+    .optional(),
+})
+
+export interface TranscribeAudioInput {
+  audioBase64: string
+  mimeType: string
+  filename?: string
+  language?: string
+  prompt?: string
+}
+
+export interface SynthesizeSpeechInput {
+  text: string
+  format?: "mp3" | "wav" | "opus"
+}
+
+export interface SpeechProvider {
+  getCapabilities(): SpeechCapabilitiesResponse
+  transcribe(input: TranscribeAudioInput): Promise<SpeechTranscriptionResponse>
+  synthesize(input: SynthesizeSpeechInput): Promise<SpeechSynthesisResponse>
+}
+
+export interface NormalizedSpeechSettings {
+  provider: string
+  apiKey?: string
+  baseUrl?: string
+  sttModel: string
+  ttsModel: string
+  ttsVoice: string
+}
+
+const DEFAULT_PROVIDER = "openai-compatible"
+const DEFAULT_STT_MODEL = "gpt-4o-mini-transcribe"
+const DEFAULT_TTS_MODEL = "gpt-4o-mini-tts"
+const DEFAULT_TTS_VOICE = "alloy"
+export class SpeechService {
+  constructor(
+    private readonly settings: SettingsService,
+    private readonly logger: Logger,
+  ) {}
+
+  getCapabilities(): SpeechCapabilitiesResponse {
+    return this.createProvider().getCapabilities()
+  }
+
+  async transcribe(input: TranscribeAudioInput): Promise<SpeechTranscriptionResponse> {
+    return this.createProvider().transcribe(input)
+  }
+
+  async synthesize(input: SynthesizeSpeechInput): Promise<SpeechSynthesisResponse> {
+    return this.createProvider().synthesize(input)
+  }
+
+  private createProvider(): SpeechProvider {
+    const settings = this.resolveSettings()
+    return new OpenAICompatibleSpeechProvider({
+      settings,
+      logger: this.logger.child({ provider: settings.provider }),
+    })
+  }
+
+  private resolveSettings(): NormalizedSpeechSettings {
+    const parsed = ServerSpeechSettingsSchema.parse(this.settings.getOwner("config", "server") ?? {})
+    const speech = parsed.speech ?? {}
+
+    return {
+      provider: speech.provider?.trim() || DEFAULT_PROVIDER,
+      apiKey: speech.apiKey?.trim() || process.env.OPENAI_API_KEY,
+      baseUrl: speech.baseUrl?.trim() || process.env.OPENAI_BASE_URL || undefined,
+      sttModel: speech.sttModel?.trim() || DEFAULT_STT_MODEL,
+      ttsModel: speech.ttsModel?.trim() || DEFAULT_TTS_MODEL,
+      ttsVoice: speech.ttsVoice?.trim() || DEFAULT_TTS_VOICE,
+    }
+  }
+}
--- a/packages/tauri-app/Cargo.lock
+++ b/packages/tauri-app/Cargo.lock
--- a/packages/tauri-app/package.json
+++ b/packages/tauri-app/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@codenomad/tauri-app",
-  "version": "0.12.3",
+  "version": "0.12.2",
  "private": true,
  "license": "MIT",
  "scripts": {
--- a/packages/tauri-app/scripts/prebuild.js
+++ b/packages/tauri-app/scripts/prebuild.js
@@ -20,7 +20,6 @@ const serverDevInstallCommand =
  "npm install --workspace @neuralnomads/codenomad --include-workspace-root=false --install-strategy=nested --fund=false --audit=false"
 const uiDevInstallCommand =
  "npm install --workspace @codenomad/ui --include-workspace-root=false --install-strategy=nested --fund=false --audit=false"
-const serverPrepareUiCommand = "npm run prepare-ui --workspace @neuralnomads/codenomad"

 const envWithRootBin = {
  ...process.env,
@@ -92,15 +91,6 @@ function ensureUiBuild() {
  }
 }

-function syncServerUiBundle() {
-  console.log("[prebuild] syncing server public UI bundle...")
-  execSync(serverPrepareUiCommand, {
-    cwd: workspaceRoot,
-    stdio: "inherit",
-    env: envWithRootBin,
-  })
-}
-
 function ensureServerDevDependencies() {
  if (fs.existsSync(braceExpansionPath)) {
    return
@@ -256,7 +246,6 @@ function copyUiLoadingAssets() {
  ensureServerDependencies()
  ensureServerBuild()
  ensureUiBuild()
-  syncServerUiBundle()
  copyServerArtifacts()
  stripNodeModuleBins()
  copyUiLoadingAssets()
--- a/packages/tauri-app/src-tauri/Cargo.toml
+++ b/packages/tauri-app/src-tauri/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "codenomad-tauri"
-version = "0.12.3"
+version = "0.1.0"
 edition = "2021"
 license = "MIT"

@@ -19,12 +19,9 @@ thiserror = "1"
 anyhow = "1"
 which = "4"
 libc = "0.2"
-keepawake = "0.6"
 tauri-plugin-dialog = "2"
 dirs = "5"
 tauri-plugin-opener = "2"
 url = "2"
+tauri-plugin-keepawake = "0.1.1"
 tauri-plugin-notification = "2"
-
-[target.'cfg(windows)'.dependencies]
-windows-sys = { version = "0.59", features = ["Win32_UI_Shell"] }
--- a/packages/tauri-app/src-tauri/gen/schemas/acl-manifests.json
+++ b/packages/tauri-app/src-tauri/gen/schemas/acl-manifests.json
--- a/packages/tauri-app/src-tauri/gen/schemas/desktop-schema.json
+++ b/packages/tauri-app/src-tauri/gen/schemas/desktop-schema.json
@@ -2378,6 +2378,36 @@
          "const": "dialog:deny-save",
          "markdownDescription": "Denies the save command without any pre-configured scope."
        },
+        {
+          "description": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-start`\n- `allow-stop`",
+          "type": "string",
+          "const": "keepawake:default",
+          "markdownDescription": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-start`\n- `allow-stop`"
+        },
+        {
+          "description": "Enables the start command without any pre-configured scope.",
+          "type": "string",
+          "const": "keepawake:allow-start",
+          "markdownDescription": "Enables the start command without any pre-configured scope."
+        },
+        {
+          "description": "Enables the stop command without any pre-configured scope.",
+          "type": "string",
+          "const": "keepawake:allow-stop",
+          "markdownDescription": "Enables the stop command without any pre-configured scope."
+        },
+        {
+          "description": "Denies the start command without any pre-configured scope.",
+          "type": "string",
+          "const": "keepawake:deny-start",
+          "markdownDescription": "Denies the start command without any pre-configured scope."
+        },
+        {
+          "description": "Denies the stop command without any pre-configured scope.",
+          "type": "string",
+          "const": "keepawake:deny-stop",
+          "markdownDescription": "Denies the stop command without any pre-configured scope."
+        },
        {
          "description": "This permission set configures which\nnotification features are by default exposed.\n\n#### Granted Permissions\n\nIt allows all notification related features.\n\n\n#### This default permission set includes:\n\n- `allow-is-permission-granted`\n- `allow-request-permission`\n- `allow-notify`\n- `allow-register-action-types`\n- `allow-register-listener`\n- `allow-cancel`\n- `allow-get-pending`\n- `allow-remove-active`\n- `allow-get-active`\n- `allow-check-permissions`\n- `allow-show`\n- `allow-batch`\n- `allow-list-channels`\n- `allow-delete-channel`\n- `allow-create-channel`\n- `allow-permission-state`",
          "type": "string",
--- a/packages/tauri-app/src-tauri/gen/schemas/macOS-schema.json
+++ b/packages/tauri-app/src-tauri/gen/schemas/macOS-schema.json
@@ -2378,6 +2378,36 @@
          "const": "dialog:deny-save",
          "markdownDescription": "Denies the save command without any pre-configured scope."
        },
+        {
+          "description": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-start`\n- `allow-stop`",
+          "type": "string",
+          "const": "keepawake:default",
+          "markdownDescription": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-start`\n- `allow-stop`"
+        },
+        {
+          "description": "Enables the start command without any pre-configured scope.",
+          "type": "string",
+          "const": "keepawake:allow-start",
+          "markdownDescription": "Enables the start command without any pre-configured scope."
+        },
+        {
+          "description": "Enables the stop command without any pre-configured scope.",
+          "type": "string",
+          "const": "keepawake:allow-stop",
+          "markdownDescription": "Enables the stop command without any pre-configured scope."
+        },
+        {
+          "description": "Denies the start command without any pre-configured scope.",
+          "type": "string",
+          "const": "keepawake:deny-start",
+          "markdownDescription": "Denies the start command without any pre-configured scope."
+        },
+        {
+          "description": "Denies the stop command without any pre-configured scope.",
+          "type": "string",
+          "const": "keepawake:deny-stop",
+          "markdownDescription": "Denies the stop command without any pre-configured scope."
+        },
        {
          "description": "This permission set configures which\nnotification features are by default exposed.\n\n#### Granted Permissions\n\nIt allows all notification related features.\n\n\n#### This default permission set includes:\n\n- `allow-is-permission-granted`\n- `allow-request-permission`\n- `allow-notify`\n- `allow-register-action-types`\n- `allow-register-listener`\n- `allow-cancel`\n- `allow-get-pending`\n- `allow-remove-active`\n- `allow-get-active`\n- `allow-check-permissions`\n- `allow-show`\n- `allow-batch`\n- `allow-list-channels`\n- `allow-delete-channel`\n- `allow-create-channel`\n- `allow-permission-state`",
          "type": "string",
--- a/packages/tauri-app/src-tauri/src/cli_manager.rs
+++ b/packages/tauri-app/src-tauri/src/cli_manager.rs
@@ -9,8 +9,6 @@ use std::ffi::OsStr;
 use std::fs;
 use std::io::{BufRead, BufReader, Read, Write};
 use std::net::TcpStream;
-#[cfg(unix)]
-use std::os::unix::process::CommandExt;
 use std::path::PathBuf;
 use std::process::{Child, Command, Stdio};
 use std::sync::atomic::{AtomicBool, Ordering};
@@ -19,24 +17,10 @@ use std::thread;
 use std::time::{Duration, Instant};
 use tauri::{webview::cookie::Cookie, AppHandle, Emitter, Manager, Url};

-#[cfg(windows)]
-use std::os::windows::process::CommandExt;
-
-#[cfg(windows)]
-const CREATE_NO_WINDOW: u32 = 0x08000000;
-
 fn log_line(message: &str) {
    println!("[tauri-cli] {message}");
 }

-#[cfg(windows)]
-fn configure_spawn(command: &mut Command) {
-    command.creation_flags(CREATE_NO_WINDOW);
-}
-
-#[cfg(not(windows))]
-fn configure_spawn(_command: &mut Command) {}
-
 fn workspace_root() -> Option<PathBuf> {
    std::env::current_dir().ok().and_then(|mut dir| {
        for _ in 0..3 {
@@ -52,46 +36,6 @@ const SESSION_COOKIE_NAME: &str = "codenomad_session";

 const CLI_STOP_GRACE_SECS: u64 = 30;

-#[cfg(unix)]
-fn configure_posix_process_group(command: &mut Command) {
-    // Ensure the CLI runs in its own process group so we can terminate wrapper
-    // processes (login shell/tsx) without leaving the server orphaned.
-    unsafe {
-        command.pre_exec(|| {
-            if libc::setpgid(0, 0) != 0 {
-                return Err(std::io::Error::last_os_error());
-            }
-            Ok(())
-        });
-    }
-}
-
-#[cfg(windows)]
-fn kill_process_tree_windows(pid: u32, force: bool) -> bool {
-    let mut args = vec!["/PID".to_string(), pid.to_string(), "/T".to_string()];
-    if force {
-        args.push("/F".to_string());
-    }
-
-    let mut command = Command::new("taskkill");
-    command.args(&args);
-    configure_spawn(&mut command);
-
-    match command.output() {
-        Ok(output) => {
-            if output.status.success() {
-                return true;
-            }
-
-            // If the PID is already gone, treat it as success.
-            let stdout = String::from_utf8_lossy(&output.stdout).to_lowercase();
-            let stderr = String::from_utf8_lossy(&output.stderr).to_lowercase();
-            let combined = format!("{stdout}\n{stderr}");
-            combined.contains("not found") || combined.contains("no running instance")
-        }
-        Err(_) => false,
-    }
-}
 fn navigate_main(app: &AppHandle, url: &str) {
    if let Some(win) = app.webview_windows().get("main") {
        let mut display = url.to_string();
@@ -404,19 +348,11 @@ impl CliProcessManager {
            log_line(&format!("stopping CLI pid={}", child.id()));
            #[cfg(unix)]
            unsafe {
-                let pid = child.id() as i32;
-                // Prefer signaling the process group to avoid orphaning children
-                // when the CLI was launched via a wrapper shell.
-                let group_res = libc::kill(-pid, libc::SIGTERM);
-                if group_res != 0 {
-                    let _ = libc::kill(pid, libc::SIGTERM);
-                }
+                libc::kill(child.id() as i32, libc::SIGTERM);
            }
            #[cfg(windows)]
            {
-                if !kill_process_tree_windows(child.id(), false) {
-                    let _ = child.kill();
-                }
+                let _ = child.kill();
            }

            let start = Instant::now();
@@ -432,17 +368,11 @@ impl CliProcessManager {
                            ));
                            #[cfg(unix)]
                            unsafe {
-                                let pid = child.id() as i32;
-                                let group_res = libc::kill(-pid, libc::SIGKILL);
-                                if group_res != 0 {
-                                    let _ = libc::kill(pid, libc::SIGKILL);
-                                }
+                                libc::kill(child.id() as i32, libc::SIGKILL);
                            }
                            #[cfg(windows)]
                            {
-                                if !kill_process_tree_windows(child.id(), true) {
-                                    let _ = child.kill();
-                                }
+                                let _ = child.kill();
                            }
                            break;
                        }
@@ -520,12 +450,9 @@ impl CliProcessManager {
                    .env("ELECTRON_RUN_AS_NODE", "1")
                    .stdout(Stdio::piped())
                    .stderr(Stdio::piped());
-                configure_spawn(&mut c);
                if let Some(ref cwd) = cwd {
                    c.current_dir(cwd);
                }
-                #[cfg(unix)]
-                configure_posix_process_group(&mut c);
                c.spawn()?
            }
            ShellCommandType::Direct(cmd) => {
@@ -535,12 +462,9 @@ impl CliProcessManager {
                    .env("ELECTRON_RUN_AS_NODE", "1")
                    .stdout(Stdio::piped())
                    .stderr(Stdio::piped());
-                configure_spawn(&mut c);
                if let Some(ref cwd) = cwd {
                    c.current_dir(cwd);
                }
-                #[cfg(unix)]
-                configure_posix_process_group(&mut c);
                c.spawn()?
            }
        };
@@ -613,24 +537,7 @@ impl CliProcessManager {
            locked.error = Some("CLI did not start in time".to_string());
            log_line("timeout waiting for CLI readiness");
            if let Some(child) = child_holder_clone.lock().as_mut() {
-                #[cfg(unix)]
-                unsafe {
-                    let pid = child.id() as i32;
-                    let group_res = libc::kill(-pid, libc::SIGKILL);
-                    if group_res != 0 {
-                        let _ = libc::kill(pid, libc::SIGKILL);
-                    }
-                }
-                #[cfg(windows)]
-                {
-                    if !kill_process_tree_windows(child.id(), true) {
-                        let _ = child.kill();
-                    }
-                }
-                #[cfg(not(any(unix, windows)))]
-                {
-                    let _ = child.kill();
-                }
+                let _ = child.kill();
            }
            let _ = app_clone.emit("cli:error", json!({"message": "CLI did not start in time"}));
            Self::emit_status(&app_clone, &locked);
--- a/packages/tauri-app/src-tauri/src/main.rs
+++ b/packages/tauri-app/src-tauri/src/main.rs
@@ -3,11 +3,8 @@
 mod cli_manager;

 use cli_manager::{CliProcessManager, CliStatus};
-use keepawake::KeepAwake;
-use serde::Deserialize;
 use serde_json::json;
 use std::sync::atomic::{AtomicBool, Ordering};
-use std::sync::Mutex;
 use tauri::menu::{MenuBuilder, MenuItem, SubmenuBuilder};
 use tauri::plugin::{Builder as PluginBuilder, TauriPlugin};
 use tauri::webview::Webview;
@@ -15,31 +12,11 @@ use tauri::{AppHandle, Emitter, Manager, Runtime, Wry};
 use tauri_plugin_opener::OpenerExt;
 use url::Url;

-#[cfg(windows)]
-use std::ffi::OsStr;
-#[cfg(windows)]
-use std::iter;
-#[cfg(windows)]
-use std::os::windows::ffi::OsStrExt;
-#[cfg(windows)]
-use windows_sys::Win32::UI::Shell::SetCurrentProcessExplicitAppUserModelID;
-
 static QUIT_REQUESTED: AtomicBool = AtomicBool::new(false);

-#[cfg(windows)]
-const WINDOWS_APP_USER_MODEL_ID: &str = "ai.neuralnomads.codenomad.client";
-
+#[derive(Clone)]
 pub struct AppState {
    pub manager: CliProcessManager,
-    pub wake_lock: Mutex<Option<KeepAwake>>,
-}
-
-#[derive(Debug, Default, Deserialize)]
-#[serde(default, rename_all = "camelCase")]
-struct WakeLockConfig {
-    display: bool,
-    idle: bool,
-    sleep: bool,
 }

 #[tauri::command]
@@ -58,39 +35,6 @@ fn cli_restart(app: AppHandle, state: tauri::State<AppState>) -> Result<CliStatu
    Ok(state.manager.status())
 }

-#[tauri::command]
-fn wake_lock_start(
-    state: tauri::State<AppState>,
-    config: Option<WakeLockConfig>,
-) -> Result<(), String> {
-    let config = config.unwrap_or(WakeLockConfig {
-        display: true,
-        idle: false,
-        sleep: false,
-    });
-
-    let mut builder = keepawake::Builder::default();
-    builder
-        .display(config.display)
-        .idle(config.idle)
-        .sleep(config.sleep)
-        .reason("CodeNomad active session")
-        .app_name("CodeNomad")
-        .app_reverse_domain("ai.neuralnomads.codenomad.client");
-
-    let wake_lock = builder.create().map_err(|err| err.to_string())?;
-    let mut state_lock = state.wake_lock.lock().map_err(|err| err.to_string())?;
-    *state_lock = Some(wake_lock);
-    Ok(())
-}
-
-#[tauri::command]
-fn wake_lock_stop(state: tauri::State<AppState>) -> Result<(), String> {
-    let mut state_lock = state.wake_lock.lock().map_err(|err| err.to_string())?;
-    state_lock.take();
-    Ok(())
-}
-
 fn is_dev_mode() -> bool {
    cfg!(debug_assertions) || std::env::var("TAURI_DEV").is_ok()
 }
@@ -157,22 +101,6 @@ fn emit_folder_drop_event(
    }
 }

-#[cfg(windows)]
-fn set_windows_app_user_model_id() {
-    let app_id: Vec<u16> = OsStr::new(WINDOWS_APP_USER_MODEL_ID)
-        .encode_wide()
-        .chain(iter::once(0))
-        .collect();
-
-    let result = unsafe { SetCurrentProcessExplicitAppUserModelID(app_id.as_ptr()) };
-    if result < 0 {
-        eprintln!("[tauri] failed to set AppUserModelID: {result}");
-    }
-}
-
-#[cfg(not(windows))]
-fn set_windows_app_user_model_id() {}
-
 fn main() {
    let navigation_guard: TauriPlugin<Wry, ()> = PluginBuilder::new("external-link-guard")
        .on_navigation(|webview, url| intercept_navigation(webview, url))
@@ -181,14 +109,13 @@ fn main() {
    tauri::Builder::default()
        .plugin(tauri_plugin_dialog::init())
        .plugin(tauri_plugin_opener::init())
+        .plugin(tauri_plugin_keepawake::init())
        .plugin(tauri_plugin_notification::init())
        .plugin(navigation_guard)
        .manage(AppState {
            manager: CliProcessManager::new(),
-            wake_lock: Mutex::new(None),
        })
        .setup(|app| {
-            set_windows_app_user_model_id();
            build_menu(&app.handle())?;
            let dev_mode = is_dev_mode();
            let app_handle = app.handle().clone();
@@ -200,12 +127,7 @@ fn main() {
            });
            Ok(())
        })
-        .invoke_handler(tauri::generate_handler![
-            cli_get_status,
-            cli_restart,
-            wake_lock_start,
-            wake_lock_stop
-        ])
+        .invoke_handler(tauri::generate_handler![cli_get_status, cli_restart])
        .on_menu_event(|app_handle, event| {
            match event.id().0.as_str() {
                // File menu
--- a/packages/tauri-app/src-tauri/tauri.conf.json
+++ b/packages/tauri-app/src-tauri/tauri.conf.json
@@ -1,8 +1,8 @@
 {
  "$schema": "https://schema.tauri.app/config/2",
  "productName": "CodeNomad",
-  "version": "0.12.3",
-  "identifier": "ai.neuralnomads.codenomad.client",
+  "version": "0.1.0",
+  "identifier": "ai.opencode.client",
  "build": {
    "beforeDevCommand": "npm run dev:bootstrap",
    "beforeBuildCommand": "npm run bundle:server",
--- a/packages/ui/package.json
+++ b/packages/ui/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@codenomad/ui",
-  "version": "0.12.3",
+  "version": "0.12.2",
  "private": true,
  "license": "MIT",
  "type": "module",
@@ -18,10 +18,8 @@
    "@suid/icons-material": "^0.9.0",
    "@suid/material": "^0.19.0",
    "@suid/system": "^0.14.0",
-    "@tauri-apps/api": "^2.10.1",
-    "@tauri-apps/plugin-dialog": "^2.6.0",
-    "@tauri-apps/plugin-notification": "^2.3.3",
    "@tauri-apps/plugin-opener": "^2.5.3",
+    "@tauri-apps/plugin-notification": "^2.3.3",
    "ansi-sequence-parser": "^1.1.3",
    "debug": "^4.4.3",
    "github-markdown-css": "^5.8.1",
@@ -32,6 +30,7 @@
    "shiki": "^3.13.0",
    "solid-js": "^1.8.0",
    "solid-toast": "^0.5.0",
+    "tauri-plugin-keepawake-api": "^0.1.0",
    "yaml": "^2.4.2"
  },
  "devDependencies": {
--- a/packages/ui/src/App.tsx
+++ b/packages/ui/src/App.tsx
@@ -71,6 +71,7 @@ const App: Component = () => {
    toggleAutoCleanupBlankSessions,
    toggleUsageMetrics,
    togglePromptSubmitOnEnter,
+    toggleShowPromptVoiceInput,
    setDiffViewMode,
    setToolOutputExpansion,
    setDiagnosticsExpansion,
@@ -360,6 +361,7 @@ const App: Component = () => {
    toggleShowTimelineTools,
    toggleUsageMetrics,
    togglePromptSubmitOnEnter,
+    toggleShowPromptVoiceInput,
    setDiffViewMode,
    setToolOutputExpansion,
    setDiagnosticsExpansion,
--- a/packages/ui/src/components/prompt-input.tsx
+++ b/packages/ui/src/components/prompt-input.tsx
@@ -1,5 +1,5 @@
 import { createSignal, Show, onMount, onCleanup, createEffect, on } from "solid-js"
-import { ArrowBigUp, ArrowBigDown } from "lucide-solid"
+import { ArrowBigUp, ArrowBigDown, Loader2, Mic } from "lucide-solid"
 import UnifiedPicker from "./unified-picker"
 import ExpandButton from "./expand-button"
 import { clearAttachments, removeAttachment } from "../stores/attachments"
@@ -17,6 +17,7 @@ import { usePromptState } from "./prompt-input/usePromptState"
 import { usePromptAttachments } from "./prompt-input/usePromptAttachments"
 import { usePromptPicker } from "./prompt-input/usePromptPicker"
 import { usePromptKeyDown } from "./prompt-input/usePromptKeyDown"
+import { usePromptVoiceInput } from "./prompt-input/usePromptVoiceInput"
 const log = getLogger("actions")

 export default function PromptInput(props: PromptInputProps) {
@@ -411,9 +412,45 @@ export default function PromptInput(props: PromptInputProps) {
  })

  const shouldShowOverlay = () => prompt().length === 0
+  const voiceInput = usePromptVoiceInput({
+    prompt,
+    setPrompt,
+    getTextarea: () => textareaRef ?? null,
+    enabled: () => preferences().showPromptVoiceInput,
+    disabled: () => Boolean(props.disabled),
+  })
+  const showVoiceInput = () =>
+    preferences().showPromptVoiceInput &&
+    (voiceInput.canUseVoiceInput() || voiceInput.isRecording() || voiceInput.isTranscribing())

  const instance = () => getActiveInstance()

+  let voiceButtonPressed = false
+
+  const beginVoicePress = (event?: PointerEvent | KeyboardEvent) => {
+    if (voiceButtonPressed || props.disabled || voiceInput.isTranscribing() || !voiceInput.canUseVoiceInput()) return
+    voiceButtonPressed = true
+
+    if (event instanceof PointerEvent) {
+      const target = event.currentTarget
+      if (target instanceof HTMLElement) {
+        try {
+          target.setPointerCapture(event.pointerId)
+        } catch {
+          // no-op
+        }
+      }
+    }
+
+    void voiceInput.startRecording()
+  }
+
+  const endVoicePress = () => {
+    if (!voiceButtonPressed) return
+    voiceButtonPressed = false
+    voiceInput.stopRecording()
+  }
+
  return (
    <div class="prompt-input-container">
      <div
@@ -555,6 +592,48 @@ export default function PromptInput(props: PromptInputProps) {
        </div>

        <div class="prompt-input-actions">
+          <Show when={showVoiceInput()}>
+            <button
+              type="button"
+              class={`prompt-voice-button ${voiceInput.isRecording() ? "is-recording" : ""}`}
+              onPointerDown={(event) => {
+                event.preventDefault()
+                beginVoicePress(event)
+              }}
+              onPointerUp={(event) => {
+                event.preventDefault()
+                endVoicePress()
+              }}
+              onPointerCancel={() => endVoicePress()}
+              onLostPointerCapture={() => endVoicePress()}
+              onKeyDown={(event) => {
+                if (event.repeat) return
+                if (event.key !== " " && event.key !== "Enter") return
+                event.preventDefault()
+                beginVoicePress(event)
+              }}
+              onKeyUp={(event) => {
+                if (event.key !== " " && event.key !== "Enter") return
+                event.preventDefault()
+                endVoicePress()
+              }}
+              onBlur={() => endVoicePress()}
+              disabled={!voiceInput.isRecording() && (props.disabled || voiceInput.isTranscribing() || !voiceInput.canUseVoiceInput())}
+              aria-label={voiceInput.buttonTitle()}
+              title={voiceInput.buttonTitle()}
+            >
+              <Show
+                when={voiceInput.isRecording()}
+                fallback={
+                  <Show when={voiceInput.isTranscribing()} fallback={<Mic class="h-4 w-4" aria-hidden="true" />}>
+                    <Loader2 class="h-4 w-4 animate-spin" aria-hidden="true" />
+                  </Show>
+                }
+              >
+                <span class="prompt-voice-timer">{formatVoiceTimer(voiceInput.elapsedMs())}</span>
+              </Show>
+            </button>
+          </Show>
          <button
            type="button"
            class="stop-button"
@@ -589,3 +668,10 @@ export default function PromptInput(props: PromptInputProps) {
    </div>
  )
 }
+
+function formatVoiceTimer(elapsedMs: number): string {
+  const totalSeconds = Math.max(0, Math.floor(elapsedMs / 1000))
+  const minutes = Math.floor(totalSeconds / 60)
+  const seconds = totalSeconds % 60
+  return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}`
+}
--- a/packages/ui/src/components/prompt-input/usePromptVoiceInput.ts
+++ b/packages/ui/src/components/prompt-input/usePromptVoiceInput.ts
@@ -0,0 +1,244 @@
+import { createEffect, createSignal, onCleanup, type Accessor } from "solid-js"
+import { showAlertDialog } from "../../stores/alerts"
+import { loadSpeechCapabilities, speechCapabilities } from "../../stores/speech"
+import { serverApi } from "../../lib/api-client"
+import { useI18n } from "../../lib/i18n"
+
+interface UsePromptVoiceInputOptions {
+  prompt: Accessor<string>
+  setPrompt: (value: string) => void
+  getTextarea: () => HTMLTextAreaElement | null
+  enabled: Accessor<boolean>
+  disabled: Accessor<boolean>
+}
+
+type VoiceInputState = "idle" | "recording" | "transcribing"
+
+export function usePromptVoiceInput(options: UsePromptVoiceInputOptions) {
+  const { t } = useI18n()
+  const [state, setState] = createSignal<VoiceInputState>("idle")
+  const [elapsedMs, setElapsedMs] = createSignal(0)
+
+  let mediaRecorder: MediaRecorder | null = null
+  let mediaStream: MediaStream | null = null
+  let timerId: number | undefined
+  let shouldTranscribe = true
+  let recordedChunks: Blob[] = []
+  let recordingStartedAt = 0
+
+  createEffect(() => {
+    void loadSpeechCapabilities()
+  })
+
+  onCleanup(() => {
+    cleanupMedia(false)
+  })
+
+  const isSupported = () => {
+    if (typeof window === "undefined") return false
+    return typeof window.MediaRecorder !== "undefined" && Boolean(navigator.mediaDevices?.getUserMedia)
+  }
+
+  const canUseVoiceInput = () => {
+    const capabilities = speechCapabilities()
+    return Boolean(
+      options.enabled() &&
+        isSupported() &&
+        capabilities?.available &&
+        capabilities?.configured &&
+        capabilities?.supportsStt,
+    )
+  }
+
+  async function toggleRecording(): Promise<void> {
+    if (state() === "recording") {
+      stopRecording()
+      return
+    }
+
+    await startRecording()
+  }
+
+  function stopRecording() {
+    if (!mediaRecorder || state() !== "recording") return
+    shouldTranscribe = true
+    mediaRecorder.stop()
+    setState("transcribing")
+    stopTimer()
+  }
+
+  function cancelRecording() {
+    if (!mediaRecorder || state() !== "recording") return
+    shouldTranscribe = false
+    mediaRecorder.stop()
+    cleanupMedia(false)
+  }
+
+  async function startRecording() {
+    if (!canUseVoiceInput() || options.disabled() || state() === "transcribing" || state() === "recording") return
+
+    if (!isSupported()) {
+      showAlertDialog(t("promptInput.voiceInput.error.unsupported"), {
+        title: t("promptInput.voiceInput.error.title"),
+        variant: "error",
+      })
+      return
+    }
+
+    try {
+      recordedChunks = []
+      shouldTranscribe = true
+      mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true })
+      mediaRecorder = createRecorder(mediaStream)
+
+      mediaRecorder.addEventListener("dataavailable", (event) => {
+        if (event.data.size > 0) {
+          recordedChunks.push(event.data)
+        }
+      })
+
+      mediaRecorder.addEventListener("stop", () => {
+        void finalizeRecording()
+      })
+
+      recordingStartedAt = Date.now()
+      setElapsedMs(0)
+      setState("recording")
+      startTimer()
+      mediaRecorder.start()
+    } catch (error) {
+      cleanupMedia(false)
+      showAlertDialog(t("promptInput.voiceInput.error.permission"), {
+        title: t("promptInput.voiceInput.error.title"),
+        detail: error instanceof Error ? error.message : String(error),
+        variant: "error",
+      })
+    }
+  }
+
+  async function finalizeRecording() {
+    const recorder = mediaRecorder
+    const stream = mediaStream
+    mediaRecorder = null
+    mediaStream = null
+
+    if (!shouldTranscribe || recordedChunks.length === 0) {
+      recordedChunks = []
+      stopTracks(stream)
+      setState("idle")
+      setElapsedMs(0)
+      return
+    }
+
+    const mimeType = recorder?.mimeType || recordedChunks[0]?.type || "audio/webm"
+
+    try {
+      const audioBlob = new Blob(recordedChunks, { type: mimeType })
+      const transcription = await serverApi.transcribeAudio({
+        audioBase64: await blobToBase64(audioBlob),
+        mimeType,
+      })
+      if (transcription.text.trim()) {
+        insertTranscript(transcription.text.trim())
+      }
+    } catch (error) {
+      showAlertDialog(t("promptInput.voiceInput.error.transcribe"), {
+        title: t("promptInput.voiceInput.error.title"),
+        detail: error instanceof Error ? error.message : String(error),
+        variant: "error",
+      })
+    } finally {
+      recordedChunks = []
+      stopTracks(stream)
+      setState("idle")
+      setElapsedMs(0)
+    }
+  }
+
+  function insertTranscript(text: string) {
+    const current = options.prompt()
+    const textarea = options.getTextarea()
+    const start = textarea ? textarea.selectionStart : current.length
+    const end = textarea ? textarea.selectionEnd : current.length
+    const before = current.slice(0, start)
+    const after = current.slice(end)
+    const prefix = before.length > 0 && !/\s$/.test(before) ? " " : ""
+    const suffix = after.length > 0 && !/^\s/.test(after) ? " " : ""
+    const nextValue = `${before}${prefix}${text}${suffix}${after}`
+    const cursor = before.length + prefix.length + text.length
+
+    options.setPrompt(nextValue)
+    if (textarea) {
+      setTimeout(() => {
+        textarea.focus()
+        textarea.setSelectionRange(cursor, cursor)
+      }, 0)
+    }
+  }
+
+  function cleanupMedia(resetState = true) {
+    stopTimer()
+    if (mediaRecorder && mediaRecorder.state !== "inactive") {
+      mediaRecorder.stop()
+    }
+    mediaRecorder = null
+    stopTracks(mediaStream)
+    mediaStream = null
+    recordedChunks = []
+    if (resetState) {
+      setState("idle")
+      setElapsedMs(0)
+    }
+  }
+
+  function startTimer() {
+    stopTimer()
+    timerId = window.setInterval(() => {
+      setElapsedMs(Date.now() - recordingStartedAt)
+    }, 250)
+  }
+
+  function stopTimer() {
+    if (timerId !== undefined) {
+      window.clearInterval(timerId)
+      timerId = undefined
+    }
+  }
+
+  return {
+    state,
+    elapsedMs,
+    canUseVoiceInput,
+    startRecording,
+    stopRecording,
+    toggleRecording,
+    cancelRecording,
+    isRecording: () => state() === "recording",
+    isTranscribing: () => state() === "transcribing",
+    buttonTitle: () => {
+      if (state() === "recording") return t("promptInput.voiceInput.stop.title")
+      if (state() === "transcribing") return t("promptInput.voiceInput.transcribing.title")
+      return t("promptInput.voiceInput.start.title")
+    },
+  }
+}
+
+function createRecorder(stream: MediaStream): MediaRecorder {
+  const candidates = ["audio/webm;codecs=opus", "audio/webm", "audio/mp4", "audio/ogg;codecs=opus"]
+  const supported = candidates.find((candidate) => typeof MediaRecorder.isTypeSupported !== "function" || MediaRecorder.isTypeSupported(candidate))
+  return supported ? new MediaRecorder(stream, { mimeType: supported }) : new MediaRecorder(stream)
+}
+
+function stopTracks(stream: MediaStream | null) {
+  stream?.getTracks().forEach((track) => track.stop())
+}
+
+async function blobToBase64(blob: Blob): Promise<string> {
+  const buffer = await blob.arrayBuffer()
+  const bytes = new Uint8Array(buffer)
+  let binary = ""
+  for (const byte of bytes) {
+    binary += String.fromCharCode(byte)
+  }
+  return btoa(binary)
+}
--- a/packages/ui/src/components/settings-screen.tsx
+++ b/packages/ui/src/components/settings-screen.tsx
@@ -1,5 +1,5 @@
 import { Dialog } from "@kobalte/core/dialog"
-import { Settings, Bell, MonitorUp, Paintbrush, Terminal, X } from "lucide-solid"
+import { Settings, Bell, MonitorUp, Paintbrush, Terminal, Volume2, X } from "lucide-solid"
 import { createMemo, For, type Component } from "solid-js"
 import { useI18n } from "../lib/i18n"
 import {
@@ -13,6 +13,7 @@ import { AppearanceSettingsSection } from "./settings/appearance-settings-sectio
 import { NotificationsSettingsSection } from "./settings/notifications-settings-section"
 import { OpenCodeSettingsSection } from "./settings/opencode-settings-section"
 import { RemoteAccessSettingsSection } from "./settings/remote-access-settings-section"
+import { SpeechSettingsSection } from "./settings/speech-settings-section"

 export const SettingsScreen: Component = () => {
  const { t } = useI18n()
@@ -21,6 +22,7 @@ export const SettingsScreen: Component = () => {
    { id: "appearance" as SettingsSectionId, icon: Paintbrush, label: t("settings.nav.appearance") },
    { id: "notifications" as SettingsSectionId, icon: Bell, label: t("settings.nav.notifications") },
    { id: "remote" as SettingsSectionId, icon: MonitorUp, label: t("settings.nav.remote") },
+    { id: "speech" as SettingsSectionId, icon: Volume2, label: t("settings.nav.speech") },
    { id: "opencode" as SettingsSectionId, icon: Terminal, label: t("settings.nav.opencode") },
  ])

@@ -30,6 +32,8 @@ export const SettingsScreen: Component = () => {
        return <NotificationsSettingsSection />
      case "remote":
        return <RemoteAccessSettingsSection />
+      case "speech":
+        return <SpeechSettingsSection />
      case "opencode":
        return <OpenCodeSettingsSection />
      case "appearance":
--- a/packages/ui/src/components/settings/appearance-settings-section.tsx
+++ b/packages/ui/src/components/settings/appearance-settings-section.tsx
@@ -24,6 +24,7 @@ export const AppearanceSettingsSection: Component = () => {
    toggleUsageMetrics,
    toggleAutoCleanupBlankSessions,
    togglePromptSubmitOnEnter,
+    toggleShowPromptVoiceInput,
    setDiffViewMode,
    setToolOutputExpansion,
    setDiagnosticsExpansion,
@@ -38,10 +39,11 @@ export const AppearanceSettingsSection: Component = () => {
      toggleShowThinkingBlocks,
      toggleKeyboardShortcutHints,
      toggleShowTimelineTools,
-      toggleUsageMetrics,
-      toggleAutoCleanupBlankSessions,
-      togglePromptSubmitOnEnter,
-      setDiffViewMode,
+        toggleUsageMetrics,
+        toggleAutoCleanupBlankSessions,
+        togglePromptSubmitOnEnter,
+        toggleShowPromptVoiceInput,
+        setDiffViewMode,
      setToolOutputExpansion,
      setDiagnosticsExpansion,
      setThinkingBlocksExpansion,
--- a/packages/ui/src/components/settings/speech-settings-card.tsx
+++ b/packages/ui/src/components/settings/speech-settings-card.tsx
@@ -0,0 +1,217 @@
+import { createEffect, createMemo, createSignal, type Component } from "solid-js"
+import { Mic, Volume2 } from "lucide-solid"
+import { useConfig, type SpeechSettings } from "../../stores/preferences"
+import { useI18n } from "../../lib/i18n"
+import { loadSpeechCapabilities, speechCapabilities, speechCapabilitiesError, speechCapabilitiesLoading } from "../../stores/speech"
+import { getLogger } from "../../lib/logger"
+
+const log = getLogger("actions")
+
+type DraftFields = {
+  apiKey: string
+  baseUrl: string
+  sttModel: string
+  ttsModel: string
+  ttsVoice: string
+}
+
+function createDraftFields(speech: SpeechSettings): DraftFields {
+  return {
+    apiKey: speech.apiKey ?? "",
+    baseUrl: speech.baseUrl ?? "",
+    sttModel: speech.sttModel,
+    ttsModel: speech.ttsModel,
+    ttsVoice: speech.ttsVoice,
+  }
+}
+
+function isDraftEqual(a: DraftFields, b: DraftFields): boolean {
+  return a.apiKey === b.apiKey && a.baseUrl === b.baseUrl && a.sttModel === b.sttModel && a.ttsModel === b.ttsModel && a.ttsVoice === b.ttsVoice
+}
+
+export const SpeechSettingsCard: Component = () => {
+  const { t } = useI18n()
+  const { serverSettings, updateSpeechSettings } = useConfig()
+  const initialDrafts = createDraftFields(serverSettings().speech)
+  const [isSaving, setIsSaving] = createSignal(false)
+  const [saveStatus, setSaveStatus] = createSignal<"idle" | "saved" | "error">("saved")
+  const [drafts, setDrafts] = createSignal<DraftFields>(initialDrafts)
+
+  createEffect(() => {
+    const speech = serverSettings().speech
+    const nextDrafts = createDraftFields(speech)
+    if (!isSaving() && !isDirty()) {
+      if (!isDraftEqual(drafts(), nextDrafts)) {
+        setDrafts(nextDrafts)
+      }
+    }
+  })
+
+  createEffect(() => {
+    void loadSpeechCapabilities()
+  })
+
+  const capabilityLabel = () => {
+    if (speechCapabilitiesLoading()) return t("settings.speech.status.loading")
+    if (speechCapabilitiesError()) return t("settings.speech.status.error")
+    return speechCapabilities()?.configured ? t("settings.speech.status.configured") : t("settings.speech.status.missing")
+  }
+
+  const updateDraft = (key: keyof DraftFields, value: string) => {
+    setSaveStatus("idle")
+    setDrafts((current) => ({ ...current, [key]: value }))
+  }
+
+  const isDirty = createMemo(() => {
+    const speech = serverSettings().speech
+    const current = drafts()
+    return (
+      (current.apiKey || "") !== (speech.apiKey || "") ||
+      (current.baseUrl || "") !== (speech.baseUrl || "") ||
+      current.sttModel !== speech.sttModel ||
+      current.ttsModel !== speech.ttsModel ||
+      current.ttsVoice !== speech.ttsVoice
+    )
+  })
+
+  const saveStatusLabel = () => {
+    if (isSaving()) return t("settings.speech.save.saving")
+    if (saveStatus() === "saved") return t("settings.speech.save.saved")
+    if (saveStatus() === "error") return t("settings.speech.save.error")
+    return t("settings.speech.save.unsaved")
+  }
+
+  async function handleSave() {
+    if (!isDirty() || isSaving()) return
+    const current = drafts()
+    setIsSaving(true)
+    setSaveStatus("idle")
+    try {
+      await updateSpeechSettings({
+        apiKey: current.apiKey.trim() || undefined,
+        baseUrl: current.baseUrl.trim() || undefined,
+        sttModel: current.sttModel.trim() || undefined,
+        ttsModel: current.ttsModel.trim() || undefined,
+        ttsVoice: current.ttsVoice.trim() || undefined,
+      })
+      await loadSpeechCapabilities(true)
+      setDrafts({
+        apiKey: current.apiKey.trim(),
+        baseUrl: current.baseUrl.trim(),
+        sttModel: current.sttModel.trim() || serverSettings().speech.sttModel,
+        ttsModel: current.ttsModel.trim() || serverSettings().speech.ttsModel,
+        ttsVoice: current.ttsVoice.trim() || serverSettings().speech.ttsVoice,
+      })
+      setSaveStatus("saved")
+    } catch (error) {
+      log.error("Failed to save speech settings", error)
+      setSaveStatus("error")
+    } finally {
+      setIsSaving(false)
+    }
+  }
+
+  return (
+    <div class="settings-card">
+      <div class="settings-card-header">
+        <div class="settings-card-heading-with-icon">
+          <Volume2 class="settings-card-heading-icon" />
+          <div>
+            <h3 class="settings-card-title">{t("settings.speech.title")}</h3>
+            <p class="settings-card-subtitle">{t("settings.speech.subtitle")}</p>
+          </div>
+        </div>
+        <span class="settings-scope-badge settings-scope-badge-server">{t("settings.scope.server")}</span>
+      </div>
+
+      <div class="settings-stack">
+        <div class="settings-toggle-row settings-toggle-row-compact">
+          <div>
+            <div class="settings-toggle-title">{t("settings.speech.provider.title")}</div>
+            <div class="settings-toggle-caption">{t("settings.speech.provider.subtitle")}</div>
+          </div>
+          <div class="settings-toolbar-inline">
+            <span class="settings-inline-note">{t("settings.speech.provider.openaiCompatible")}</span>
+            <span class="settings-inline-note">{capabilityLabel()}</span>
+            <span class="settings-inline-note">{saveStatusLabel()}</span>
+            <button
+              type="button"
+              class="selector-button selector-button-primary w-auto whitespace-nowrap"
+              onClick={() => void handleSave()}
+              disabled={!isDirty() || isSaving()}
+            >
+              {isSaving() ? t("settings.speech.save.saving") : t("settings.speech.save.action")}
+            </button>
+          </div>
+        </div>
+
+        <Field
+          label={t("settings.speech.apiKey.title")}
+          caption={t("settings.speech.apiKey.subtitle")}
+          value={drafts().apiKey}
+          onInput={(value) => updateDraft("apiKey", value)}
+          type="password"
+        />
+        <Field
+          label={t("settings.speech.baseUrl.title")}
+          caption={t("settings.speech.baseUrl.subtitle")}
+          value={drafts().baseUrl}
+          onInput={(value) => updateDraft("baseUrl", value)}
+          placeholder={t("settings.speech.baseUrl.placeholder")}
+        />
+        <Field
+          label={t("settings.speech.sttModel.title")}
+          caption={t("settings.speech.sttModel.subtitle")}
+          value={drafts().sttModel}
+          onInput={(value) => updateDraft("sttModel", value)}
+        />
+        <Field
+          label={t("settings.speech.ttsModel.title")}
+          caption={t("settings.speech.ttsModel.subtitle")}
+          value={drafts().ttsModel}
+          onInput={(value) => updateDraft("ttsModel", value)}
+        />
+        <Field
+          label={t("settings.speech.ttsVoice.title")}
+          caption={t("settings.speech.ttsVoice.subtitle")}
+          value={drafts().ttsVoice}
+          onInput={(value) => updateDraft("ttsVoice", value)}
+          icon={<Mic class="w-3.5 h-3.5 icon-muted flex-shrink-0" />}
+        />
+
+        <div class="settings-inline-note">{t("settings.speech.help")}</div>
+      </div>
+    </div>
+  )
+}
+
+const Field: Component<{
+  label: string
+  caption: string
+  value: string
+  type?: string
+  placeholder?: string
+  onInput: (value: string) => void
+  icon?: any
+}> = (props) => {
+  return (
+    <div class="settings-toggle-row settings-toggle-row-compact">
+      <div>
+        <div class="settings-toggle-title">{props.label}</div>
+        <div class="settings-toggle-caption">{props.caption}</div>
+      </div>
+      <div class="flex items-center gap-2 min-w-[18rem] max-w-[24rem] w-full">
+        {props.icon}
+        <input
+          type={props.type ?? "text"}
+          value={props.value}
+          onInput={(event) => props.onInput(event.currentTarget.value)}
+          class="selector-input w-full"
+          placeholder={props.placeholder}
+        />
+      </div>
+    </div>
+  )
+}
+
+export default SpeechSettingsCard
--- a/packages/ui/src/components/settings/speech-settings-section.tsx
+++ b/packages/ui/src/components/settings/speech-settings-section.tsx
@@ -0,0 +1,10 @@
+import type { Component } from "solid-js"
+import SpeechSettingsCard from "./speech-settings-card"
+
+export const SpeechSettingsSection: Component = () => {
+  return (
+    <div class="settings-section-stack">
+      <SpeechSettingsCard />
+    </div>
+  )
+}
--- a/packages/ui/src/lib/api-client.ts
+++ b/packages/ui/src/lib/api-client.ts
@@ -7,6 +7,9 @@ import type {
  FileSystemCreateFolderResponse,
  FileSystemListResponse,
  InstanceData,
+  SpeechCapabilitiesResponse,
+  SpeechSynthesisResponse,
+  SpeechTranscriptionResponse,
  ServerMeta,
  WorkspaceCreateRequest,
  WorkspaceDescriptor,
@@ -235,6 +238,27 @@ export const serverApi = {
      body: JSON.stringify({ path }),
    })
  },
+  fetchSpeechCapabilities(): Promise<SpeechCapabilitiesResponse> {
+    return request<SpeechCapabilitiesResponse>("/api/speech/capabilities")
+  },
+  transcribeAudio(payload: {
+    audioBase64: string
+    mimeType: string
+    filename?: string
+    language?: string
+    prompt?: string
+  }): Promise<SpeechTranscriptionResponse> {
+    return request<SpeechTranscriptionResponse>("/api/speech/transcribe", {
+      method: "POST",
+      body: JSON.stringify(payload),
+    })
+  },
+  synthesizeSpeech(payload: { text: string; format?: "mp3" | "wav" | "opus" }): Promise<SpeechSynthesisResponse> {
+    return request<SpeechSynthesisResponse>("/api/speech/synthesize", {
+      method: "POST",
+      body: JSON.stringify(payload),
+    })
+  },
  listFileSystem(path?: string, options?: { includeFiles?: boolean }): Promise<FileSystemListResponse> {
    const params = new URLSearchParams()
    if (path && path !== ".") {
--- a/packages/ui/src/lib/hooks/use-commands.ts
+++ b/packages/ui/src/lib/hooks/use-commands.ts
@@ -34,6 +34,7 @@ export interface UseCommandsOptions {
  toggleUsageMetrics: () => void
  toggleAutoCleanupBlankSessions: () => void
  togglePromptSubmitOnEnter: () => void
+  toggleShowPromptVoiceInput: () => void
  setDiffViewMode: (mode: "split" | "unified") => void
  setToolOutputExpansion: (mode: ExpansionPreference) => void
  setDiagnosticsExpansion: (mode: ExpansionPreference) => void
@@ -435,6 +436,7 @@ export function useCommands(options: UseCommandsOptions) {
      toggleUsageMetrics: options.toggleUsageMetrics,
      toggleAutoCleanupBlankSessions: options.toggleAutoCleanupBlankSessions,
      togglePromptSubmitOnEnter: options.togglePromptSubmitOnEnter,
+      toggleShowPromptVoiceInput: options.toggleShowPromptVoiceInput,
      setDiffViewMode: options.setDiffViewMode,
      setToolOutputExpansion: options.setToolOutputExpansion,
      setDiagnosticsExpansion: options.setDiagnosticsExpansion,
--- a/packages/ui/src/lib/i18n/index.tsx
+++ b/packages/ui/src/lib/i18n/index.tsx
@@ -2,6 +2,11 @@ import { createContext, createEffect, createMemo, createSignal, onCleanup, onMou
 import type { ParentComponent } from "solid-js"
 import { useConfig } from "../../stores/preferences"
 import { enMessages } from "./messages/en"
+import { esMessages } from "./messages/es"
+import { frMessages } from "./messages/fr"
+import { ruMessages } from "./messages/ru"
+import { jaMessages } from "./messages/ja"
+import { zhHansMessages } from "./messages/zh-Hans"

 type Messages = Record<string, string>

@@ -10,18 +15,14 @@ export type TranslateParams = Record<string, unknown>
 export type Locale = "en" | "es" | "fr" | "ru" | "ja" | "zh-Hans"

 const SUPPORTED_LOCALES: readonly Locale[] = ["en", "es", "fr", "ru", "ja", "zh-Hans"] as const
-const SUPPORTED_LOCALES_BY_LOWER = new Map(SUPPORTED_LOCALES.map((locale) => [locale.toLowerCase(), locale]))

-const localeMessagesCache = new Map<Locale, Messages>([["en", enMessages]])
-const localeMessagesPromises = new Map<Locale, Promise<Messages>>()
-
-const localeLoaders: Record<Locale, () => Promise<Messages>> = {
-  en: async () => enMessages,
-  es: async () => (await import("./messages/es")).esMessages,
-  fr: async () => (await import("./messages/fr")).frMessages,
-  ru: async () => (await import("./messages/ru")).ruMessages,
-  ja: async () => (await import("./messages/ja")).jaMessages,
-  "zh-Hans": async () => (await import("./messages/zh-Hans")).zhHansMessages,
+const messagesByLocale: Record<Locale, Messages> = {
+  en: enMessages,
+  es: esMessages,
+  fr: frMessages,
+  ru: ruMessages,
+  ja: jaMessages,
+  "zh-Hans": zhHansMessages,
 }

 function normalizeLocaleTag(value: string): string {
@@ -33,7 +34,8 @@ function matchSupportedLocale(value: string | undefined): Locale | null {

  const normalized = normalizeLocaleTag(value)
  const lower = normalized.toLowerCase()
-  const exact = SUPPORTED_LOCALES_BY_LOWER.get(lower)
+  const supportedLower = new Map(SUPPORTED_LOCALES.map((locale) => [locale.toLowerCase(), locale]))
+  const exact = supportedLower.get(lower)
  if (exact) return exact

  const parts = lower.split("-")
@@ -41,11 +43,11 @@ function matchSupportedLocale(value: string | undefined): Locale | null {
  if (!base) return null

  if (base === "zh") {
-    const zhHans = SUPPORTED_LOCALES_BY_LOWER.get("zh-hans")
+    const zhHans = supportedLower.get("zh-hans")
    return zhHans ?? null
  }

-  const baseMatch = SUPPORTED_LOCALES_BY_LOWER.get(base)
+  const baseMatch = supportedLower.get(base)
  return baseMatch ?? null
 }

@@ -82,54 +84,8 @@ function translateFrom(messages: Messages, key: string, params?: TranslateParams
 }

 const [globalRevision, setGlobalRevision] = createSignal(0)
-let globalMessages: Messages = enMessages
-let globalLocale: Locale = "en"
-
-function getMessagesForLocale(locale: Locale): Messages {
-  return localeMessagesCache.get(locale) ?? enMessages
-}
-
-async function loadLocaleMessages(locale: Locale): Promise<Messages> {
-  const cached = localeMessagesCache.get(locale)
-  if (cached) {
-    return cached
-  }
-
-  const pending = localeMessagesPromises.get(locale)
-  if (pending) {
-    return pending
-  }
-
-  const loader = localeLoaders[locale]
-  const promise = loader()
-    .then((messages) => {
-      localeMessagesCache.set(locale, messages)
-      localeMessagesPromises.delete(locale)
-      return messages
-    })
-    .catch((error) => {
-      localeMessagesPromises.delete(locale)
-      throw error
-    })
-
-  localeMessagesPromises.set(locale, promise)
-  return promise
-}
-
-export async function preloadLocaleMessages(preferredLocale?: string | null): Promise<Locale> {
-  const resolvedLocale = matchSupportedLocale(preferredLocale ?? undefined) ?? detectNavigatorLocale() ?? "en"
-  try {
-    globalMessages = await loadLocaleMessages(resolvedLocale)
-    globalLocale = resolvedLocale
-    setGlobalRevision((value) => value + 1)
-    return resolvedLocale
-  } catch {
-    globalMessages = enMessages
-    globalLocale = "en"
-    setGlobalRevision((value) => value + 1)
-    return "en"
-  }
-}
+const initialGlobalLocale: Locale = detectNavigatorLocale() ?? "en"
+let globalMessages: Messages = messagesByLocale[initialGlobalLocale]

 export function tGlobal(key: string, params?: TranslateParams): string {
  globalRevision()
@@ -145,10 +101,9 @@ const I18nContext = createContext<I18nContextValue>()

 export const I18nProvider: ParentComponent = (props) => {
  const { preferences } = useConfig()
-  const [detectedLocale, setDetectedLocale] = createSignal<Locale>(globalLocale)
-  const [resolvedLocale, setResolvedLocale] = createSignal<Locale>(globalLocale)
-  const previousGlobalMessages = globalMessages
-  const previousGlobalLocale = globalLocale
+  const [detectedLocale, setDetectedLocale] = createSignal<Locale>("en")
+
+  const previousMessages = globalMessages

  onMount(() => {
    const detected = detectNavigatorLocale()
@@ -160,44 +115,19 @@ export const I18nProvider: ParentComponent = (props) => {
    return configured ?? detectedLocale() ?? "en"
  })

-  const messages = createMemo<Messages>(() => getMessagesForLocale(resolvedLocale()))
+  const messages = createMemo<Messages>(() => messagesByLocale[locale()])

  function t(key: string, params?: TranslateParams): string {
    return translateFrom(messages(), key, params)
  }

  createEffect(() => {
-    const nextLocale = locale()
-    let cancelled = false
-
-    void loadLocaleMessages(nextLocale)
-      .then((loadedMessages) => {
-        if (cancelled) {
-          return
-        }
-        setResolvedLocale(nextLocale)
-        globalLocale = nextLocale
-        globalMessages = loadedMessages
-        setGlobalRevision((value) => value + 1)
-      })
-      .catch(() => {
-        if (cancelled) {
-          return
-        }
-        setResolvedLocale("en")
-        globalMessages = enMessages
-        globalLocale = "en"
-        setGlobalRevision((value) => value + 1)
-      })
-
-    onCleanup(() => {
-      cancelled = true
-    })
+    globalMessages = messages()
+    setGlobalRevision((value) => value + 1)
  })

  onCleanup(() => {
-    globalMessages = previousGlobalMessages
-    globalLocale = previousGlobalLocale
+    globalMessages = previousMessages
    setGlobalRevision((value) => value + 1)
  })

--- a/packages/ui/src/lib/i18n/messages/en/messaging.ts
+++ b/packages/ui/src/lib/i18n/messages/en/messaging.ts
@@ -138,4 +138,11 @@ export const messagingMessages = {
  "promptInput.send.ariaLabel": "Send message",
  "promptInput.send.errorFallback": "Failed to send message",
  "promptInput.send.errorTitle": "Send failed",
+  "promptInput.voiceInput.start.title": "Start voice input",
+  "promptInput.voiceInput.stop.title": "Stop recording and transcribe",
+  "promptInput.voiceInput.transcribing.title": "Transcribing audio",
+  "promptInput.voiceInput.error.title": "Voice input failed",
+  "promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
+  "promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
+  "promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
 } as const
--- a/packages/ui/src/lib/i18n/messages/en/settings.ts
+++ b/packages/ui/src/lib/i18n/messages/en/settings.ts
@@ -65,6 +65,7 @@ export const settingsMessages = {
  "settings.nav.appearance": "Appearance",
  "settings.nav.notifications": "Notifications",
  "settings.nav.remote": "Remote Access",
+  "settings.nav.speech": "Speech",
  "settings.nav.opencode": "OpenCode",
  "settings.scope.device": "This device",
  "settings.scope.server": "Server setting",
@@ -137,6 +138,34 @@ export const settingsMessages = {
  "settings.behavior.usageMetrics.subtitle": "Show or hide token and cost stats for assistant messages.",
  "settings.behavior.autoCleanup.title": "Auto-cleanup blank sessions",
  "settings.behavior.autoCleanup.subtitle": "Automatically clean up blank sessions when creating new ones.",
+  "settings.behavior.promptVoiceInput.title": "Prompt voice input",
+  "settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
  "settings.behavior.promptSubmit.title": "Enter to submit",
  "settings.behavior.promptSubmit.subtitle": "Use Enter to submit prompts; Cmd/Ctrl+Enter inserts a new line.",
+  "settings.speech.title": "Speech",
+  "settings.speech.subtitle": "Configure speech-to-text now and text-to-speech groundwork for later features.",
+  "settings.speech.provider.title": "Provider",
+  "settings.speech.provider.subtitle": "Speech requests use the server-side speech adapter.",
+  "settings.speech.provider.openaiCompatible": "OpenAI-compatible",
+  "settings.speech.status.loading": "Checking configuration...",
+  "settings.speech.status.configured": "Configured",
+  "settings.speech.status.missing": "Missing API key",
+  "settings.speech.status.error": "Speech service unavailable",
+  "settings.speech.apiKey.title": "API key",
+  "settings.speech.apiKey.subtitle": "Used for CodeNomad-managed speech requests.",
+  "settings.speech.baseUrl.title": "Base URL",
+  "settings.speech.baseUrl.subtitle": "Optional override for OpenAI-compatible speech endpoints.",
+  "settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
+  "settings.speech.sttModel.title": "Transcription model",
+  "settings.speech.sttModel.subtitle": "Model used for prompt speech-to-text requests.",
+  "settings.speech.ttsModel.title": "Speech model",
+  "settings.speech.ttsModel.subtitle": "Default text-to-speech model reserved for future playback features.",
+  "settings.speech.ttsVoice.title": "Default voice",
+  "settings.speech.ttsVoice.subtitle": "Default text-to-speech voice reserved for future playback features.",
+  "settings.speech.help": "Prompt voice input only appears when speech transcription is configured and supported by this browser.",
+  "settings.speech.save.action": "Save",
+  "settings.speech.save.saving": "Saving...",
+  "settings.speech.save.saved": "Saved",
+  "settings.speech.save.unsaved": "Unsaved changes",
+  "settings.speech.save.error": "Save failed",
 } as const
--- a/packages/ui/src/lib/i18n/messages/es/messaging.ts
+++ b/packages/ui/src/lib/i18n/messages/es/messaging.ts
@@ -140,4 +140,11 @@ export const messagingMessages = {
  "promptInput.send.ariaLabel": "Enviar mensaje",
  "promptInput.send.errorFallback": "No se pudo enviar el mensaje",
  "promptInput.send.errorTitle": "Error al enviar",
+  "promptInput.voiceInput.start.title": "Start voice input",
+  "promptInput.voiceInput.stop.title": "Stop recording and transcribe",
+  "promptInput.voiceInput.transcribing.title": "Transcribing audio",
+  "promptInput.voiceInput.error.title": "Voice input failed",
+  "promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
+  "promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
+  "promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
 } as const
--- a/packages/ui/src/lib/i18n/messages/es/settings.ts
+++ b/packages/ui/src/lib/i18n/messages/es/settings.ts
@@ -65,6 +65,7 @@ export const settingsMessages = {
  "settings.nav.appearance": "Appearance",
  "settings.nav.notifications": "Notifications",
  "settings.nav.remote": "Remote Access",
+  "settings.nav.speech": "Speech",
  "settings.nav.opencode": "OpenCode",
  "settings.scope.device": "This device",
  "settings.scope.server": "Server setting",
@@ -137,6 +138,34 @@ export const settingsMessages = {
  "settings.behavior.usageMetrics.subtitle": "Muestra u oculta estadisticas de tokens y costo en mensajes del asistente.",
  "settings.behavior.autoCleanup.title": "Limpieza automatica de sesiones en blanco",
  "settings.behavior.autoCleanup.subtitle": "Limpia automaticamente las sesiones en blanco al crear nuevas.",
+  "settings.behavior.promptVoiceInput.title": "Prompt voice input",
+  "settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
  "settings.behavior.promptSubmit.title": "Enter para enviar",
  "settings.behavior.promptSubmit.subtitle": "Usa Enter para enviar; Cmd/Ctrl+Enter inserta una nueva linea.",
+  "settings.speech.title": "Speech",
+  "settings.speech.subtitle": "Configure speech-to-text now and text-to-speech groundwork for later features.",
+  "settings.speech.provider.title": "Provider",
+  "settings.speech.provider.subtitle": "Speech requests use the server-side speech adapter.",
+  "settings.speech.provider.openaiCompatible": "OpenAI-compatible",
+  "settings.speech.status.loading": "Checking configuration...",
+  "settings.speech.status.configured": "Configured",
+  "settings.speech.status.missing": "Missing API key",
+  "settings.speech.status.error": "Speech service unavailable",
+  "settings.speech.apiKey.title": "API key",
+  "settings.speech.apiKey.subtitle": "Used for CodeNomad-managed speech requests.",
+  "settings.speech.baseUrl.title": "Base URL",
+  "settings.speech.baseUrl.subtitle": "Optional override for OpenAI-compatible speech endpoints.",
+  "settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
+  "settings.speech.sttModel.title": "Transcription model",
+  "settings.speech.sttModel.subtitle": "Model used for prompt speech-to-text requests.",
+  "settings.speech.ttsModel.title": "Speech model",
+  "settings.speech.ttsModel.subtitle": "Default text-to-speech model reserved for future playback features.",
+  "settings.speech.ttsVoice.title": "Default voice",
+  "settings.speech.ttsVoice.subtitle": "Default text-to-speech voice reserved for future playback features.",
+  "settings.speech.help": "Prompt voice input only appears when speech transcription is configured and supported by this browser.",
+  "settings.speech.save.action": "Save",
+  "settings.speech.save.saving": "Saving...",
+  "settings.speech.save.saved": "Saved",
+  "settings.speech.save.unsaved": "Unsaved changes",
+  "settings.speech.save.error": "Save failed",
 } as const
--- a/packages/ui/src/lib/i18n/messages/fr/messaging.ts
+++ b/packages/ui/src/lib/i18n/messages/fr/messaging.ts
@@ -140,4 +140,11 @@ export const messagingMessages = {
  "promptInput.send.ariaLabel": "Envoyer le message",
  "promptInput.send.errorFallback": "Impossible d'envoyer le message",
  "promptInput.send.errorTitle": "Échec de l'envoi",
+  "promptInput.voiceInput.start.title": "Start voice input",
+  "promptInput.voiceInput.stop.title": "Stop recording and transcribe",
+  "promptInput.voiceInput.transcribing.title": "Transcribing audio",
+  "promptInput.voiceInput.error.title": "Voice input failed",
+  "promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
+  "promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
+  "promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
 } as const
--- a/packages/ui/src/lib/i18n/messages/fr/settings.ts
+++ b/packages/ui/src/lib/i18n/messages/fr/settings.ts
@@ -65,6 +65,7 @@ export const settingsMessages = {
  "settings.nav.appearance": "Appearance",
  "settings.nav.notifications": "Notifications",
  "settings.nav.remote": "Remote Access",
+  "settings.nav.speech": "Speech",
  "settings.nav.opencode": "OpenCode",
  "settings.scope.device": "This device",
  "settings.scope.server": "Server setting",
@@ -137,6 +138,34 @@ export const settingsMessages = {
  "settings.behavior.usageMetrics.subtitle": "Afficher ou masquer les stats de tokens et de cout pour les messages de l'assistant.",
  "settings.behavior.autoCleanup.title": "Nettoyage auto des sessions vides",
  "settings.behavior.autoCleanup.subtitle": "Nettoyer automatiquement les sessions vides lors de la creation de nouvelles.",
+  "settings.behavior.promptVoiceInput.title": "Prompt voice input",
+  "settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
  "settings.behavior.promptSubmit.title": "Entrer pour envoyer",
  "settings.behavior.promptSubmit.subtitle": "Utiliser Entrer pour envoyer; Cmd/Ctrl+Entrer insere une nouvelle ligne.",
+  "settings.speech.title": "Speech",
+  "settings.speech.subtitle": "Configure speech-to-text now and text-to-speech groundwork for later features.",
+  "settings.speech.provider.title": "Provider",
+  "settings.speech.provider.subtitle": "Speech requests use the server-side speech adapter.",
+  "settings.speech.provider.openaiCompatible": "OpenAI-compatible",
+  "settings.speech.status.loading": "Checking configuration...",
+  "settings.speech.status.configured": "Configured",
+  "settings.speech.status.missing": "Missing API key",
+  "settings.speech.status.error": "Speech service unavailable",
+  "settings.speech.apiKey.title": "API key",
+  "settings.speech.apiKey.subtitle": "Used for CodeNomad-managed speech requests.",
+  "settings.speech.baseUrl.title": "Base URL",
+  "settings.speech.baseUrl.subtitle": "Optional override for OpenAI-compatible speech endpoints.",
+  "settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
+  "settings.speech.sttModel.title": "Transcription model",
+  "settings.speech.sttModel.subtitle": "Model used for prompt speech-to-text requests.",
+  "settings.speech.ttsModel.title": "Speech model",
+  "settings.speech.ttsModel.subtitle": "Default text-to-speech model reserved for future playback features.",
+  "settings.speech.ttsVoice.title": "Default voice",
+  "settings.speech.ttsVoice.subtitle": "Default text-to-speech voice reserved for future playback features.",
+  "settings.speech.help": "Prompt voice input only appears when speech transcription is configured and supported by this browser.",
+  "settings.speech.save.action": "Save",
+  "settings.speech.save.saving": "Saving...",
+  "settings.speech.save.saved": "Saved",
+  "settings.speech.save.unsaved": "Unsaved changes",
+  "settings.speech.save.error": "Save failed",
 } as const
--- a/packages/ui/src/lib/i18n/messages/ja/messaging.ts
+++ b/packages/ui/src/lib/i18n/messages/ja/messaging.ts
@@ -140,4 +140,11 @@ export const messagingMessages = {
  "promptInput.send.ariaLabel": "メッセージを送信",
  "promptInput.send.errorFallback": "メッセージの送信に失敗しました",
  "promptInput.send.errorTitle": "送信に失敗",
+  "promptInput.voiceInput.start.title": "Start voice input",
+  "promptInput.voiceInput.stop.title": "Stop recording and transcribe",
+  "promptInput.voiceInput.transcribing.title": "Transcribing audio",
+  "promptInput.voiceInput.error.title": "Voice input failed",
+  "promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
+  "promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
+  "promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
 } as const
--- a/packages/ui/src/lib/i18n/messages/ja/settings.ts
+++ b/packages/ui/src/lib/i18n/messages/ja/settings.ts
@@ -65,6 +65,7 @@ export const settingsMessages = {
  "settings.nav.appearance": "Appearance",
  "settings.nav.notifications": "Notifications",
  "settings.nav.remote": "Remote Access",
+  "settings.nav.speech": "Speech",
  "settings.nav.opencode": "OpenCode",
  "settings.scope.device": "This device",
  "settings.scope.server": "Server setting",
@@ -137,6 +138,34 @@ export const settingsMessages = {
  "settings.behavior.usageMetrics.subtitle": "アシスタントのメッセージにトークン数とコストの統計を表示/非表示にします。",
  "settings.behavior.autoCleanup.title": "空のセッションを自動クリーンアップ",
  "settings.behavior.autoCleanup.subtitle": "新しいセッション作成時に空のセッションを自動的にクリーンアップします。",
+  "settings.behavior.promptVoiceInput.title": "Prompt voice input",
+  "settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
  "settings.behavior.promptSubmit.title": "Enterで送信",
  "settings.behavior.promptSubmit.subtitle": "Enterで送信し、Cmd/Ctrl+Enterで改行します。",
+  "settings.speech.title": "Speech",
+  "settings.speech.subtitle": "Configure speech-to-text now and text-to-speech groundwork for later features.",
+  "settings.speech.provider.title": "Provider",
+  "settings.speech.provider.subtitle": "Speech requests use the server-side speech adapter.",
+  "settings.speech.provider.openaiCompatible": "OpenAI-compatible",
+  "settings.speech.status.loading": "Checking configuration...",
+  "settings.speech.status.configured": "Configured",
+  "settings.speech.status.missing": "Missing API key",
+  "settings.speech.status.error": "Speech service unavailable",
+  "settings.speech.apiKey.title": "API key",
+  "settings.speech.apiKey.subtitle": "Used for CodeNomad-managed speech requests.",
+  "settings.speech.baseUrl.title": "Base URL",
+  "settings.speech.baseUrl.subtitle": "Optional override for OpenAI-compatible speech endpoints.",
+  "settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
+  "settings.speech.sttModel.title": "Transcription model",
+  "settings.speech.sttModel.subtitle": "Model used for prompt speech-to-text requests.",
+  "settings.speech.ttsModel.title": "Speech model",
+  "settings.speech.ttsModel.subtitle": "Default text-to-speech model reserved for future playback features.",
+  "settings.speech.ttsVoice.title": "Default voice",
+  "settings.speech.ttsVoice.subtitle": "Default text-to-speech voice reserved for future playback features.",
+  "settings.speech.help": "Prompt voice input only appears when speech transcription is configured and supported by this browser.",
+  "settings.speech.save.action": "Save",
+  "settings.speech.save.saving": "Saving...",
+  "settings.speech.save.saved": "Saved",
+  "settings.speech.save.unsaved": "Unsaved changes",
+  "settings.speech.save.error": "Save failed",
 } as const
--- a/packages/ui/src/lib/i18n/messages/ru/messaging.ts
+++ b/packages/ui/src/lib/i18n/messages/ru/messaging.ts
@@ -140,4 +140,11 @@ export const messagingMessages = {
  "promptInput.send.ariaLabel": "Отправить сообщение",
  "promptInput.send.errorFallback": "Не удалось отправить сообщение",
  "promptInput.send.errorTitle": "Не удалось отправить",
+  "promptInput.voiceInput.start.title": "Start voice input",
+  "promptInput.voiceInput.stop.title": "Stop recording and transcribe",
+  "promptInput.voiceInput.transcribing.title": "Transcribing audio",
+  "promptInput.voiceInput.error.title": "Voice input failed",
+  "promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
+  "promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
+  "promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
 } as const
--- a/packages/ui/src/lib/i18n/messages/ru/settings.ts
+++ b/packages/ui/src/lib/i18n/messages/ru/settings.ts
@@ -65,6 +65,7 @@ export const settingsMessages = {
  "settings.nav.appearance": "Appearance",
  "settings.nav.notifications": "Notifications",
  "settings.nav.remote": "Remote Access",
+  "settings.nav.speech": "Speech",
  "settings.nav.opencode": "OpenCode",
  "settings.scope.device": "This device",
  "settings.scope.server": "Server setting",
@@ -137,6 +138,34 @@ export const settingsMessages = {
  "settings.behavior.usageMetrics.subtitle": "Показывать или скрывать статистику токенов и стоимости в сообщениях ассистента.",
  "settings.behavior.autoCleanup.title": "Автоочистка пустых сессий",
  "settings.behavior.autoCleanup.subtitle": "Автоматически очищать пустые сессии при создании новых.",
+  "settings.behavior.promptVoiceInput.title": "Prompt voice input",
+  "settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
  "settings.behavior.promptSubmit.title": "Enter для отправки",
  "settings.behavior.promptSubmit.subtitle": "Enter отправляет; Cmd/Ctrl+Enter вставляет новую строку.",
+  "settings.speech.title": "Speech",
+  "settings.speech.subtitle": "Configure speech-to-text now and text-to-speech groundwork for later features.",
+  "settings.speech.provider.title": "Provider",
+  "settings.speech.provider.subtitle": "Speech requests use the server-side speech adapter.",
+  "settings.speech.provider.openaiCompatible": "OpenAI-compatible",
+  "settings.speech.status.loading": "Checking configuration...",
+  "settings.speech.status.configured": "Configured",
+  "settings.speech.status.missing": "Missing API key",
+  "settings.speech.status.error": "Speech service unavailable",
+  "settings.speech.apiKey.title": "API key",
+  "settings.speech.apiKey.subtitle": "Used for CodeNomad-managed speech requests.",
+  "settings.speech.baseUrl.title": "Base URL",
+  "settings.speech.baseUrl.subtitle": "Optional override for OpenAI-compatible speech endpoints.",
+  "settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
+  "settings.speech.sttModel.title": "Transcription model",
+  "settings.speech.sttModel.subtitle": "Model used for prompt speech-to-text requests.",
+  "settings.speech.ttsModel.title": "Speech model",
+  "settings.speech.ttsModel.subtitle": "Default text-to-speech model reserved for future playback features.",
+  "settings.speech.ttsVoice.title": "Default voice",
+  "settings.speech.ttsVoice.subtitle": "Default text-to-speech voice reserved for future playback features.",
+  "settings.speech.help": "Prompt voice input only appears when speech transcription is configured and supported by this browser.",
+  "settings.speech.save.action": "Save",
+  "settings.speech.save.saving": "Saving...",
+  "settings.speech.save.saved": "Saved",
+  "settings.speech.save.unsaved": "Unsaved changes",
+  "settings.speech.save.error": "Save failed",
 } as const
--- a/packages/ui/src/lib/i18n/messages/zh-Hans/messaging.ts
+++ b/packages/ui/src/lib/i18n/messages/zh-Hans/messaging.ts
@@ -140,4 +140,11 @@ export const messagingMessages = {
  "promptInput.send.ariaLabel": "发送消息",
  "promptInput.send.errorFallback": "发送消息失败",
  "promptInput.send.errorTitle": "发送失败",
+  "promptInput.voiceInput.start.title": "Start voice input",
+  "promptInput.voiceInput.stop.title": "Stop recording and transcribe",
+  "promptInput.voiceInput.transcribing.title": "Transcribing audio",
+  "promptInput.voiceInput.error.title": "Voice input failed",
+  "promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
+  "promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
+  "promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
 } as const
--- a/packages/ui/src/lib/i18n/messages/zh-Hans/settings.ts
+++ b/packages/ui/src/lib/i18n/messages/zh-Hans/settings.ts
@@ -65,6 +65,7 @@ export const settingsMessages = {
  "settings.nav.appearance": "Appearance",
  "settings.nav.notifications": "Notifications",
  "settings.nav.remote": "Remote Access",
+  "settings.nav.speech": "Speech",
  "settings.nav.opencode": "OpenCode",
  "settings.scope.device": "This device",
  "settings.scope.server": "Server setting",
@@ -137,6 +138,34 @@ export const settingsMessages = {
  "settings.behavior.usageMetrics.subtitle": "显示或隐藏助手消息的令牌与成本统计。",
  "settings.behavior.autoCleanup.title": "自动清理空会话",
  "settings.behavior.autoCleanup.subtitle": "创建新会话时自动清理空会话。",
+  "settings.behavior.promptVoiceInput.title": "Prompt voice input",
+  "settings.behavior.promptVoiceInput.subtitle": "Show the microphone control for speech-to-text prompt input when speech is configured.",
  "settings.behavior.promptSubmit.title": "回车发送",
  "settings.behavior.promptSubmit.subtitle": "使用回车发送；Cmd/Ctrl+回车插入新行。",
+  "settings.speech.title": "Speech",
+  "settings.speech.subtitle": "Configure speech-to-text now and text-to-speech groundwork for later features.",
+  "settings.speech.provider.title": "Provider",
+  "settings.speech.provider.subtitle": "Speech requests use the server-side speech adapter.",
+  "settings.speech.provider.openaiCompatible": "OpenAI-compatible",
+  "settings.speech.status.loading": "Checking configuration...",
+  "settings.speech.status.configured": "Configured",
+  "settings.speech.status.missing": "Missing API key",
+  "settings.speech.status.error": "Speech service unavailable",
+  "settings.speech.apiKey.title": "API key",
+  "settings.speech.apiKey.subtitle": "Used for CodeNomad-managed speech requests.",
+  "settings.speech.baseUrl.title": "Base URL",
+  "settings.speech.baseUrl.subtitle": "Optional override for OpenAI-compatible speech endpoints.",
+  "settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
+  "settings.speech.sttModel.title": "Transcription model",
+  "settings.speech.sttModel.subtitle": "Model used for prompt speech-to-text requests.",
+  "settings.speech.ttsModel.title": "Speech model",
+  "settings.speech.ttsModel.subtitle": "Default text-to-speech model reserved for future playback features.",
+  "settings.speech.ttsVoice.title": "Default voice",
+  "settings.speech.ttsVoice.subtitle": "Default text-to-speech voice reserved for future playback features.",
+  "settings.speech.help": "Prompt voice input only appears when speech transcription is configured and supported by this browser.",
+  "settings.speech.save.action": "Save",
+  "settings.speech.save.saving": "Saving...",
+  "settings.speech.save.saved": "Saved",
+  "settings.speech.save.unsaved": "Unsaved changes",
+  "settings.speech.save.error": "Save failed",
 } as const
--- a/packages/ui/src/lib/native/cli.ts
+++ b/packages/ui/src/lib/native/cli.ts
@@ -1,4 +1,3 @@
-import { invoke } from "@tauri-apps/api/core"
 import { runtimeEnv } from "../runtime-env"
 import { getLogger } from "../logger"
 const log = getLogger("actions")
@@ -16,8 +15,9 @@ export async function restartCli(): Promise<boolean> {
    }

    if (runtimeEnv.host === "tauri") {
-      if (typeof window.__TAURI__?.core?.invoke === "function") {
-        await invoke("cli_restart")
+      const tauri = (window as typeof window & { __TAURI__?: { invoke?: <T = unknown>(cmd: string, args?: Record<string, unknown>) => Promise<T> } }).__TAURI__
+      if (tauri?.invoke) {
+        await tauri.invoke("cli_restart")
        return true
      }
      return false
--- a/packages/ui/src/lib/native/desktop-file-drop.ts
+++ b/packages/ui/src/lib/native/desktop-file-drop.ts
@@ -1,4 +1,3 @@
-import { listen } from "@tauri-apps/api/event"
 import { getLogger } from "../logger"
 import { runtimeEnv } from "../runtime-env"

@@ -108,8 +107,13 @@ export async function listenForNativeFolderDrops(onDrop: (paths: string[]) => vo
    return () => {}
  }

+  const eventApi = window.__TAURI__?.event
+  if (!eventApi?.listen) {
+    return () => {}
+  }
+
  try {
-    const unlisten = await listen("desktop:folder-drop", (event) => {
+    const unlisten = await eventApi.listen("desktop:folder-drop", (event) => {
      const payload = (event.payload ?? {}) as TauriFolderDropPayload
      const paths = normalizePathList(payload.paths)
      if (paths.length > 0) {
@@ -130,10 +134,15 @@ export async function listenForNativeFolderDropState(onState: (state: NativeFold
    return () => {}
  }

+  const eventApi = window.__TAURI__?.event
+  if (!eventApi?.listen) {
+    return () => {}
+  }
+
  try {
    const [unlistenEnter, unlistenLeave] = await Promise.all([
-      listen("desktop:folder-drag-enter", () => onState("enter")),
-      listen("desktop:folder-drag-leave", () => onState("leave")),
+      eventApi.listen("desktop:folder-drag-enter", () => onState("enter")),
+      eventApi.listen("desktop:folder-drag-leave", () => onState("leave")),
    ])
    return () => {
      unlistenEnter()
--- a/packages/ui/src/lib/native/tauri/functions.ts
+++ b/packages/ui/src/lib/native/tauri/functions.ts
@@ -1,21 +1,43 @@
-import { open } from "@tauri-apps/plugin-dialog"
 import type { NativeDialogOptions } from "../native-functions"
 import { getLogger } from "../../logger"
 const log = getLogger("actions")

+
+interface TauriDialogModule {
+  open?: (
+    options: {
+      title?: string
+      defaultPath?: string
+      filters?: { name?: string; extensions: string[] }[]
+      directory?: boolean
+      multiple?: boolean
+    },
+  ) => Promise<string | string[] | null>
+}
+
+interface TauriBridge {
+  dialog?: TauriDialogModule
+}
+
 export async function openTauriNativeDialog(options: NativeDialogOptions): Promise<string | null> {
  if (typeof window === "undefined") {
    return null
  }

+  const tauriBridge = (window as Window & { __TAURI__?: TauriBridge }).__TAURI__
+  const dialogApi = tauriBridge?.dialog
+  if (!dialogApi?.open) {
+    return null
+  }
+
  try {
-    const response = await open({
+    const response = await dialogApi.open({
      title: options.title,
      defaultPath: options.defaultPath,
      directory: options.mode === "directory",
      multiple: false,
      filters: options.filters?.map((filter) => ({
-        name: filter.name ?? "Files",
+        name: filter.name,
        extensions: filter.extensions,
      })),
    })
--- a/packages/ui/src/lib/native/wake-lock.ts
+++ b/packages/ui/src/lib/native/wake-lock.ts
@@ -1,4 +1,3 @@
-import { invoke } from "@tauri-apps/api/core"
 import { runtimeEnv } from "../runtime-env"
 import { getLogger } from "../logger"

@@ -61,7 +60,8 @@ function hasAnyWakeLockSupport(): boolean {
    if (api?.setWakeLock) return true
  }
  if (runtimeEnv.host === "tauri") {
-    return typeof window.__TAURI__?.core?.invoke === "function"
+    // We'll attempt dynamic import; treat as potentially supported.
+    return true
  }
  return Boolean((navigator as any)?.wakeLock?.request)
 }
@@ -84,18 +84,21 @@ async function setElectronWakeLock(enabled: boolean): Promise<boolean> {

 async function setTauriWakeLock(enabled: boolean): Promise<boolean> {
  try {
-    if (!hasAnyWakeLockSupport()) {
+    const mod = await import("tauri-plugin-keepawake-api")
+    const start = (mod as any).start as ((config?: any) => Promise<void>) | undefined
+    const stop = (mod as any).stop as (() => Promise<void>) | undefined
+    if (!start || !stop) {
      return false
    }

    if (enabled) {
-      // Match Electron's prevent-display-sleep behavior by keeping the display
-      // awake without blocking explicit system sleep requests.
-      await invoke("wake_lock_start", { config: { display: true, idle: false, sleep: false } })
+      // Plugin config supports toggling display/idle/sleep. Use a conservative
+      // default to keep both system + display awake.
+      await start({ display: true, idle: true, sleep: true })
      return true
    }

-    await invoke("wake_lock_stop")
+    await stop()
    return false
  } catch (error) {
    log.log("[wake-lock] tauri wake lock failed", error)
@@ -134,12 +137,13 @@ export function setWakeLockDesired(nextDesired: boolean): Promise<boolean> {
  inFlight = (async () => {
    try {
      const ok = await applyWakeLock(target)
-      applied = target ? ok : false
+      // Treat disable attempts as applied even if the underlying API doesn't exist.
+      applied = target
      return ok
    } finally {
      inFlight = null
      // If desired changed while in-flight, re-apply once.
-      if (desired !== target) {
+      if (desired !== applied) {
        void setWakeLockDesired(desired)
      }

--- a/packages/ui/src/lib/runtime-env.ts
+++ b/packages/ui/src/lib/runtime-env.ts
@@ -9,14 +9,17 @@ export interface RuntimeEnvironment {
 }

 declare global {
-  interface TauriCoreModule {
-    invoke: <T = unknown>(cmd: string, args?: Record<string, unknown>) => Promise<T>
-  }
-
  interface Window {
    electronAPI?: unknown
    __TAURI__?: {
-      core?: TauriCoreModule
+      invoke?: <T = unknown>(cmd: string, args?: Record<string, unknown>) => Promise<T>
+      event?: {
+        listen: (event: string, handler: (payload: { payload: unknown }) => void) => Promise<() => void>
+      }
+      dialog?: {
+        open?: (options: Record<string, unknown>) => Promise<string | string[] | null>
+        save?: (options: Record<string, unknown>) => Promise<string | null>
+      }
    }
  }
 }
--- a/packages/ui/src/lib/settings/behavior-registry.ts
+++ b/packages/ui/src/lib/settings/behavior-registry.ts
@@ -42,6 +42,7 @@ export type BehaviorRegistryActions = {
  toggleUsageMetrics: () => void
  toggleAutoCleanupBlankSessions: () => void
  togglePromptSubmitOnEnter: () => void
+  toggleShowPromptVoiceInput: () => void
  setDiffViewMode: (mode: "split" | "unified") => void
  setToolOutputExpansion: (mode: ExpansionPreference) => void
  setDiagnosticsExpansion: (mode: ExpansionPreference) => void
@@ -248,6 +249,24 @@ export function getBehaviorSettings(actions: BehaviorRegistryActions): BehaviorS
        )
      },
    },
+    {
+      kind: "toggle",
+      id: "behavior.promptVoiceInput",
+      titleKey: "settings.behavior.promptVoiceInput.title",
+      subtitleKey: "settings.behavior.promptVoiceInput.subtitle",
+      get: (p) => Boolean(p.showPromptVoiceInput ?? true),
+      set: (next) => {
+        if (updatePreferences) {
+          updatePreferences({ showPromptVoiceInput: next })
+          return
+        }
+        setBooleanByToggle(
+          () => Boolean(prefs().showPromptVoiceInput ?? true),
+          actions.toggleShowPromptVoiceInput,
+          next,
+        )
+      },
+    },
    {
      kind: "toggle",
      id: "behavior.promptSubmitOnEnter",
--- a/packages/ui/src/main.tsx
+++ b/packages/ui/src/main.tsx
@@ -4,7 +4,7 @@ import { ThemeProvider } from "./lib/theme"
 import { ConfigProvider } from "./stores/preferences"
 import { InstanceConfigProvider } from "./stores/instance-config"
 import { runtimeEnv } from "./lib/runtime-env"
-import { I18nProvider, preloadLocaleMessages } from "./lib/i18n"
+import { I18nProvider } from "./lib/i18n"
 import { storage } from "./lib/storage"
 import "./index.css"
 import "@git-diff-view/solid/styles/diff-view-pure.css"
@@ -31,19 +31,15 @@ async function bootstrap() {

    try {
      const uiConfig = await storage.loadConfigOwner("ui")
-      const theme = (uiConfig as any)?.theme
-      const locale = typeof (uiConfig as any)?.settings?.locale === "string" ? (uiConfig as any).settings.locale : undefined
+      const theme = (uiConfig as any)?.theme ?? "system"

-      if (theme === "light" || theme === "dark") {
-        document.documentElement.setAttribute("data-theme", theme)
-      } else {
+      if (theme === "system") {
        document.documentElement.removeAttribute("data-theme")
+      } else {
+        document.documentElement.setAttribute("data-theme", theme)
      }
-
-      await preloadLocaleMessages(locale)
    } catch {
      // If config fails to load, fall back to CSS defaults.
-      await preloadLocaleMessages()
    }
  }

--- a/packages/ui/src/renderer/loading/main.tsx
+++ b/packages/ui/src/renderer/loading/main.tsx
@@ -1,5 +1,3 @@
-import { invoke } from "@tauri-apps/api/core"
-import { listen } from "@tauri-apps/api/event"
 import { Show, createSignal, onCleanup, onMount } from "solid-js"
 import { render } from "solid-js/web"
 import iconUrl from "../../images/CodeNomad-Icon.png"
@@ -29,6 +27,13 @@ interface CliStatus {
  error?: string | null
 }

+interface TauriBridge {
+  invoke?: <T = unknown>(cmd: string, args?: Record<string, unknown>) => Promise<T>
+  event?: {
+    listen: (event: string, handler: (payload: { payload: unknown }) => void) => Promise<() => void>
+  }
+}
+
 function pickPhraseKey(previous?: PhraseKey) {
  const filtered = phraseKeys.filter((key) => key !== previous)
  const source = filtered.length > 0 ? filtered : phraseKeys
@@ -41,6 +46,17 @@ function navigateTo(url?: string | null) {
  window.location.replace(url)
 }

+function getTauriBridge(): TauriBridge | null {
+  if (typeof window === "undefined") {
+    return null
+  }
+  const bridge = (window as { __TAURI__?: TauriBridge }).__TAURI__
+  if (!bridge || !bridge.event || !bridge.invoke) {
+    return null
+  }
+  return bridge
+}
+
 function annotateDocument() {
  if (typeof document === "undefined") {
    return
@@ -61,22 +77,25 @@ function LoadingApp() {
    setPhraseKey(pickPhraseKey())
    const unsubscribers: Array<() => void> = []

-    async function bootstrapTauri() {
+    async function bootstrapTauri(tauriBridge: TauriBridge | null) {
+      if (!tauriBridge || !tauriBridge.event || !tauriBridge.invoke) {
+        return
+      }
      try {
-        const readyUnlisten = await listen("cli:ready", (event) => {
+        const readyUnlisten = await tauriBridge.event.listen("cli:ready", (event) => {
          const payload = (event?.payload as CliStatus) || {}
          setError(null)
          setStatusKey(null)
          navigateTo(payload.url)
        })
-        const errorUnlisten = await listen("cli:error", (event) => {
+        const errorUnlisten = await tauriBridge.event.listen("cli:error", (event) => {
          const payload = (event?.payload as CliStatus) || {}
          if (payload.error) {
            setError(payload.error)
            setStatusKey("loadingScreen.status.issue")
          }
        })
-        const statusUnlisten = await listen("cli:status", (event) => {
+        const statusUnlisten = await tauriBridge.event.listen("cli:status", (event) => {
          const payload = (event?.payload as CliStatus) || {}
          if (payload.state === "error" && payload.error) {
            setError(payload.error)
@@ -90,7 +109,7 @@ function LoadingApp() {
        })
        unsubscribers.push(readyUnlisten, errorUnlisten, statusUnlisten)

-        const result = await invoke<CliStatus>("cli_get_status")
+        const result = await tauriBridge.invoke<CliStatus>("cli_get_status")
        if (result?.state === "ready" && result.url) {
          navigateTo(result.url)
        } else if (result?.state === "error" && result.error) {
@@ -104,7 +123,7 @@ function LoadingApp() {
    }

    if (isTauriHost()) {
-      void bootstrapTauri()
+      void bootstrapTauri(getTauriBridge())
    }

    onCleanup(() => {
--- a/packages/ui/src/stores/message-v2/bridge.ts
+++ b/packages/ui/src/stores/message-v2/bridge.ts
@@ -5,7 +5,7 @@ import { getQuestionCallId, getQuestionMessageId } from "../../types/question"
 import type { Message, MessageInfo, ClientPart } from "../../types/message"
 import type { Session } from "../../types/session"
 import { messageStoreBus } from "./bus"
-import type { MessageStatus, ReplaceMessageIdOptions, SessionRevertState } from "./types"
+import type { MessageStatus, SessionRevertState } from "./types"

 interface SessionMetadata {
  id: string
@@ -121,10 +121,10 @@ export function applyPartDeltaV2(
  })
 }

-export function replaceMessageIdV2(instanceId: string, oldId: string, newId: string, options?: Omit<ReplaceMessageIdOptions, "oldId" | "newId">): void {
+export function replaceMessageIdV2(instanceId: string, oldId: string, newId: string): void {
  if (!oldId || !newId || oldId === newId) return
  const store = messageStoreBus.getOrCreate(instanceId)
-  store.replaceMessageId({ oldId, newId, ...(options ?? {}) })
+  store.replaceMessageId({ oldId, newId })
 }

 function extractPermissionMessageId(permission: PermissionRequestLike): string | undefined {
--- a/packages/ui/src/stores/message-v2/instance-store.ts
+++ b/packages/ui/src/stores/message-v2/instance-store.ts
@@ -586,10 +586,10 @@ export function createInstanceMessageStore(instanceId: string, hooks?: MessageSt
      bufferPendingPart({ messageId: input.messageId, part: input.part, receivedAt: Date.now() })
      return
    }
-
+  
    const partId = ensurePartId(input.messageId, input.part, message.partIds.length)
    const cloned = clonePart(input.part)
-
+  
    setState(
      "messages",
      input.messageId,
@@ -792,8 +792,6 @@ export function createInstanceMessageStore(instanceId: string, hooks?: MessageSt
      id: options.newId,
      isEphemeral: false,
      updatedAt: Date.now(),
-      partIds: options.clearParts ? [] : existing.partIds,
-      parts: options.clearParts ? {} : existing.parts,
    }

    setState("messages", options.newId, cloned)
--- a/packages/ui/src/stores/message-v2/types.ts
+++ b/packages/ui/src/stores/message-v2/types.ts
@@ -152,7 +152,6 @@ export interface PartUpdateInput {
 export interface ReplaceMessageIdOptions {
  oldId: string
  newId: string
-  clearParts?: boolean
 }

 export interface ScrollCacheKey {
--- a/packages/ui/src/stores/preferences.tsx
+++ b/packages/ui/src/stores/preferences.tsx
@@ -7,6 +7,7 @@ import {
  updateInstanceConfig as updateInstanceData,
 } from "./instance-config"
 import { getLogger } from "../lib/logger"
+import { loadSpeechCapabilities, resetSpeechCapabilities } from "./speech"

 const log = getLogger("actions")

@@ -27,6 +28,16 @@ export type DiffViewMode = "split" | "unified"
 export type ExpansionPreference = "expanded" | "collapsed"
 export type ToolInputsVisibilityPreference = "hidden" | "collapsed" | "expanded"
 export type ListeningMode = "local" | "all"
+export type SpeechProviderPreference = "openai-compatible"
+
+export interface SpeechSettings {
+  provider: SpeechProviderPreference
+  apiKey?: string
+  baseUrl?: string
+  sttModel: string
+  ttsModel: string
+  ttsVoice: string
+}

 export interface UiSettings {
  showThinkingBlocks: boolean
@@ -34,6 +45,7 @@ export interface UiSettings {
  thinkingBlocksExpansion: ExpansionPreference
  showTimelineTools: boolean
  promptSubmitOnEnter: boolean
+  showPromptVoiceInput: boolean
  locale?: string
  diffViewMode: DiffViewMode
  toolOutputExpansion: ExpansionPreference
@@ -75,6 +87,7 @@ interface ServerConfigBucket {
  listeningMode?: ListeningMode
  environmentVariables?: Record<string, string>
  opencodeBinary?: string
+  speech?: Partial<SpeechSettings>
 }

 interface UiStateBucket {
@@ -107,6 +120,7 @@ const defaultUiSettings: UiSettings = {
  thinkingBlocksExpansion: "expanded",
  showTimelineTools: true,
  promptSubmitOnEnter: false,
+  showPromptVoiceInput: true,
  diffViewMode: "split",
  toolOutputExpansion: "expanded",
  diagnosticsExpansion: "expanded",
@@ -120,6 +134,13 @@ const defaultUiSettings: UiSettings = {
  notifyOnIdle: true,
 }

+const defaultSpeechSettings: SpeechSettings = {
+  provider: "openai-compatible",
+  sttModel: "gpt-4o-mini-transcribe",
+  ttsModel: "gpt-4o-mini-tts",
+  ttsVoice: "alloy",
+}
+
 function normalizeUiSettings(input?: Partial<UiSettings> | null): UiSettings {
  const sanitized = input ?? {}
  return {
@@ -129,6 +150,7 @@ function normalizeUiSettings(input?: Partial<UiSettings> | null): UiSettings {
    thinkingBlocksExpansion: sanitized.thinkingBlocksExpansion ?? defaultUiSettings.thinkingBlocksExpansion,
    showTimelineTools: sanitized.showTimelineTools ?? defaultUiSettings.showTimelineTools,
    promptSubmitOnEnter: sanitized.promptSubmitOnEnter ?? defaultUiSettings.promptSubmitOnEnter,
+    showPromptVoiceInput: sanitized.showPromptVoiceInput ?? defaultUiSettings.showPromptVoiceInput,
    locale: sanitized.locale ?? defaultUiSettings.locale,
    diffViewMode: sanitized.diffViewMode ?? defaultUiSettings.diffViewMode,
    toolOutputExpansion: sanitized.toolOutputExpansion ?? defaultUiSettings.toolOutputExpansion,
@@ -156,6 +178,27 @@ function normalizeRecord(value: unknown): Record<string, string> {
  return out
 }

+function normalizeSpeechSettings(input?: Partial<SpeechSettings> | null): SpeechSettings {
+  const sanitized = input ?? {}
+  return {
+    provider: sanitized.provider === "openai-compatible" ? sanitized.provider : defaultSpeechSettings.provider,
+    apiKey: typeof sanitized.apiKey === "string" && sanitized.apiKey.trim() ? sanitized.apiKey.trim() : undefined,
+    baseUrl: typeof sanitized.baseUrl === "string" && sanitized.baseUrl.trim() ? sanitized.baseUrl.trim() : undefined,
+    sttModel:
+      typeof sanitized.sttModel === "string" && sanitized.sttModel.trim()
+        ? sanitized.sttModel.trim()
+        : defaultSpeechSettings.sttModel,
+    ttsModel:
+      typeof sanitized.ttsModel === "string" && sanitized.ttsModel.trim()
+        ? sanitized.ttsModel.trim()
+        : defaultSpeechSettings.ttsModel,
+    ttsVoice:
+      typeof sanitized.ttsVoice === "string" && sanitized.ttsVoice.trim()
+        ? sanitized.ttsVoice.trim()
+        : defaultSpeechSettings.ttsVoice,
+  }
+}
+
 function cloneArray<T>(value: unknown, mapper: (item: any) => T | null): T[] {
  if (!Array.isArray(value)) return []
  const out: T[] = []
@@ -206,12 +249,15 @@ function normalizeUiState(input?: UiStateBucket | null): NormalizedUiState {
  }
 }

-function normalizeServerConfig(input?: ServerConfigBucket | null): Required<Pick<ServerConfigBucket, "listeningMode" | "environmentVariables" | "opencodeBinary">> {
+function normalizeServerConfig(
+  input?: ServerConfigBucket | null,
+): Required<Pick<ServerConfigBucket, "listeningMode" | "environmentVariables" | "opencodeBinary">> & { speech: SpeechSettings } {
  const source = input ?? {}
  const listeningMode = source.listeningMode === "all" ? "all" : "local"
  const opencodeBinary = typeof source.opencodeBinary === "string" && source.opencodeBinary.trim() ? source.opencodeBinary : "opencode"
  const environmentVariables = normalizeRecord(source.environmentVariables)
-  return { listeningMode, opencodeBinary, environmentVariables }
+  const speech = normalizeSpeechSettings(source.speech)
+  return { listeningMode, opencodeBinary, environmentVariables, speech }
 }

 function getModelKey(model: { providerId: string; modelId: string }): string {
@@ -342,6 +388,16 @@ function updateLastUsedBinary(path: string): void {
  void patchStateOwner("ui", { opencodeBinaries: nextList }).catch((error) => log.error("Failed to update binary list", error))
 }

+async function updateSpeechSettings(updates: Partial<SpeechSettings>): Promise<void> {
+  const next = normalizeSpeechSettings({ ...serverSettings().speech, ...updates })
+  try {
+    await patchConfigOwner("server", { speech: next })
+  } catch (error) {
+    log.error("Failed to update speech settings", error)
+    throw error
+  }
+}
+
 function addOpenCodeBinary(path: string, version?: string): void {
  const nextList = buildBinaryList(path, version, opencodeBinaries())
  void patchStateOwner("ui", { opencodeBinaries: nextList }).catch((error) => log.error("Failed to add binary", error))
@@ -476,6 +532,10 @@ function togglePromptSubmitOnEnter(): void {
  updateUiSettings({ promptSubmitOnEnter: !preferences().promptSubmitOnEnter })
 }

+function toggleShowPromptVoiceInput(): void {
+  updateUiSettings({ showPromptVoiceInput: !preferences().showPromptVoiceInput })
+}
+
 function toggleAutoCleanupBlankSessions(): void {
  const nextValue = !preferences().autoCleanupBlankSessions
  log.info("toggle auto cleanup", { value: nextValue })
@@ -521,6 +581,7 @@ interface ConfigContextValue {
  addEnvironmentVariable: typeof addEnvironmentVariable
  removeEnvironmentVariable: typeof removeEnvironmentVariable
  updateLastUsedBinary: typeof updateLastUsedBinary
+  updateSpeechSettings: typeof updateSpeechSettings

  // ui-owned state
  recentFolders: typeof recentFolders
@@ -544,6 +605,7 @@ interface ConfigContextValue {
  toggleUsageMetrics: typeof toggleUsageMetrics
  toggleAutoCleanupBlankSessions: typeof toggleAutoCleanupBlankSessions
  togglePromptSubmitOnEnter: typeof togglePromptSubmitOnEnter
+  toggleShowPromptVoiceInput: typeof toggleShowPromptVoiceInput
  setDiffViewMode: typeof setDiffViewMode
  setToolOutputExpansion: typeof setToolOutputExpansion
  setDiagnosticsExpansion: typeof setDiagnosticsExpansion
@@ -569,6 +631,7 @@ const configContextValue: ConfigContextValue = {
  addEnvironmentVariable,
  removeEnvironmentVariable,
  updateLastUsedBinary,
+  updateSpeechSettings,
  recentFolders,
  opencodeBinaries,
  uiState,
@@ -588,6 +651,7 @@ const configContextValue: ConfigContextValue = {
  toggleUsageMetrics,
  toggleAutoCleanupBlankSessions,
  togglePromptSubmitOnEnter,
+  toggleShowPromptVoiceInput,
  setDiffViewMode,
  setToolOutputExpansion,
  setDiagnosticsExpansion,
@@ -610,6 +674,8 @@ export const ConfigProvider: ParentComponent = (props) => {
    const unsubServer = storage.onConfigOwnerChanged("server", (bucket) => {
      setServerConfigBucket(bucket as any)
      setIsLoaded(true)
+      resetSpeechCapabilities()
+      void loadSpeechCapabilities(true)
    })
    const unsubStateUi = storage.onStateOwnerChanged("ui", (bucket) => {
      setUiStateBucket(bucket as any)
@@ -648,6 +714,7 @@ export {
  addEnvironmentVariable,
  removeEnvironmentVariable,
  updateLastUsedBinary,
+  updateSpeechSettings,
  addRecentFolder,
  removeRecentFolder,
  addOpenCodeBinary,
@@ -664,6 +731,7 @@ export {
  toggleUsageMetrics,
  toggleAutoCleanupBlankSessions,
  togglePromptSubmitOnEnter,
+  toggleShowPromptVoiceInput,
  setDiffViewMode,
  setToolOutputExpansion,
  setDiagnosticsExpansion,
--- a/packages/ui/src/stores/session-actions.ts
+++ b/packages/ui/src/stores/session-actions.ts
@@ -94,7 +94,7 @@ async function sendMessage(
  }

  const messageId = createId("msg")
-  const textPartId = createId("prt")
+  const textPartId = createId("part")

  const resolvedPrompt = resolvePastedPlaceholders(prompt, attachments)

@@ -110,6 +110,7 @@ async function sendMessage(

  const requestParts: any[] = [
    {
+      id: textPartId,
      type: "text" as const,
      text: resolvedPrompt,
    },
@@ -119,8 +120,9 @@ async function sendMessage(
    for (const att of attachments) {
      const source = att.source
      if (source.type === "file") {
-        const partId = createId("prt")
+        const partId = createId("part")
        requestParts.push({
+          id: partId,
          type: "file" as const,
          url: att.url,
          mime: source.mime,
@@ -146,8 +148,9 @@ async function sendMessage(
          continue
        }

-        const partId = createId("prt")
+        const partId = createId("part")
        requestParts.push({
+          id: partId,
          type: "text" as const,
          text: value,
        })
@@ -181,6 +184,7 @@ async function sendMessage(
  })

  const requestBody = {
+    messageID: messageId,
    parts: requestParts,
    ...(session.agent && { agent: session.agent }),
    ...(session.model.providerId &&
--- a/packages/ui/src/stores/session-events.ts
+++ b/packages/ui/src/stores/session-events.ts
@@ -240,22 +240,19 @@ function resolveMessageRole(info?: MessageInfo | null): MessageRole {
  return info?.role === "user" ? "user" : "assistant"
 }

-function findPendingSyntheticMessageId(
+function findPendingMessageId(
  store: InstanceMessageStore,
  sessionId: string,
  role: MessageRole,
 ): string | undefined {
  const messageIds = store.getSessionMessageIds(sessionId)
-  for (const messageId of messageIds) {
-    const record = store.getMessage(messageId)
-    if (!record) continue
-    if (record.sessionId !== sessionId) continue
-    if (record.role !== role) continue
-    if (record.status !== "sending") continue
-    if (!record.isEphemeral) continue
-    return record.id
-  }
-  return undefined
+  const lastId = messageIds[messageIds.length - 1]
+  if (!lastId) return undefined
+  const record = store.getMessage(lastId)
+  if (!record) return undefined
+  if (record.sessionId !== sessionId) return undefined
+  if (record.role !== role) return undefined
+  return record.status === "sending" ? record.id : undefined
 }

 function handleMessageUpdate(instanceId: string, event: MessageUpdateEvent | MessagePartUpdatedEvent): void {
@@ -285,9 +282,9 @@ function handleMessageUpdate(instanceId: string, event: MessageUpdateEvent | Mes

    let record = store.getMessage(messageId)
    if (!record) {
-      const pendingId = findPendingSyntheticMessageId(store, sessionId, role)
+      const pendingId = findPendingMessageId(store, sessionId, role)
      if (pendingId && pendingId !== messageId) {
-        replaceMessageIdV2(instanceId, pendingId, messageId, { clearParts: role === "user" })
+        replaceMessageIdV2(instanceId, pendingId, messageId)
        record = store.getMessage(messageId)
      }
    }
@@ -348,9 +345,9 @@ function handleMessageUpdate(instanceId: string, event: MessageUpdateEvent | Mes

    let record = store.getMessage(messageId)
    if (!record) {
-      const pendingId = findPendingSyntheticMessageId(store, sessionId, role)
+      const pendingId = findPendingMessageId(store, sessionId, role)
      if (pendingId && pendingId !== messageId) {
-        replaceMessageIdV2(instanceId, pendingId, messageId, { clearParts: role === "user" })
+        replaceMessageIdV2(instanceId, pendingId, messageId)
        record = store.getMessage(messageId)
      }
    }
--- a/packages/ui/src/stores/settings-screen.ts
+++ b/packages/ui/src/stores/settings-screen.ts
@@ -1,6 +1,6 @@
 import { createSignal } from "solid-js"

-export type SettingsSectionId = "appearance" | "notifications" | "remote" | "opencode"
+export type SettingsSectionId = "appearance" | "notifications" | "remote" | "speech" | "opencode"

 const [settingsOpen, setSettingsOpen] = createSignal(false)
 const [activeSettingsSection, setActiveSettingsSection] = createSignal<SettingsSectionId>("appearance")
--- a/packages/ui/src/stores/speech.ts
+++ b/packages/ui/src/stores/speech.ts
@@ -0,0 +1,46 @@
+import { createSignal } from "solid-js"
+import type { SpeechCapabilitiesResponse } from "../../../server/src/api-types"
+import { serverApi } from "../lib/api-client"
+import { getLogger } from "../lib/logger"
+
+const log = getLogger("api")
+
+const [speechCapabilities, setSpeechCapabilities] = createSignal<SpeechCapabilitiesResponse | null>(null)
+const [speechCapabilitiesLoading, setSpeechCapabilitiesLoading] = createSignal(false)
+const [speechCapabilitiesError, setSpeechCapabilitiesError] = createSignal<string | null>(null)
+
+let speechCapabilitiesPromise: Promise<SpeechCapabilitiesResponse | null> | null = null
+
+async function loadSpeechCapabilities(force = false): Promise<SpeechCapabilitiesResponse | null> {
+  if (!force && speechCapabilities()) return speechCapabilities()
+  if (speechCapabilitiesPromise) return speechCapabilitiesPromise
+
+  setSpeechCapabilitiesLoading(true)
+  setSpeechCapabilitiesError(null)
+  speechCapabilitiesPromise = serverApi
+    .fetchSpeechCapabilities()
+    .then((result) => {
+      setSpeechCapabilities(result)
+      setSpeechCapabilitiesError(null)
+      return result
+    })
+    .catch((error) => {
+      log.error("Failed to load speech capabilities", error)
+      setSpeechCapabilities(null)
+      setSpeechCapabilitiesError(error instanceof Error ? error.message : String(error))
+      return null
+    })
+    .finally(() => {
+      setSpeechCapabilitiesLoading(false)
+      speechCapabilitiesPromise = null
+    })
+
+  return speechCapabilitiesPromise
+}
+
+function resetSpeechCapabilities(): void {
+  setSpeechCapabilities(null)
+  setSpeechCapabilitiesError(null)
+}
+
+export { speechCapabilities, speechCapabilitiesLoading, speechCapabilitiesError, loadSpeechCapabilities, resetSpeechCapabilities }
--- a/packages/ui/src/styles/messaging/prompt-input.css
+++ b/packages/ui/src/styles/messaging/prompt-input.css
@@ -170,6 +170,41 @@
  color: var(--button-danger-text, var(--text-inverted, #ffffff));
 }

+.prompt-voice-button {
+  @apply h-10 rounded-md border-none cursor-pointer flex items-center justify-center transition-all flex-shrink-0;
+  min-width: 2.5rem;
+  background-color: color-mix(in oklab, var(--surface-secondary) 82%, var(--surface-base));
+  color: var(--text-secondary);
+}
+
+.prompt-voice-button:hover:not(:disabled) {
+  color: var(--text-primary);
+  background-color: color-mix(in oklab, var(--accent-primary) 12%, var(--surface-secondary));
+  @apply scale-105;
+}
+
+.prompt-voice-button:active:not(:disabled) {
+  @apply scale-95;
+}
+
+.prompt-voice-button.is-recording {
+  min-width: 3.5rem;
+  background-color: color-mix(in oklab, var(--button-danger-bg, rgba(239, 68, 68, 0.85)) 88%, white 12%);
+  color: var(--button-danger-text, var(--text-inverted, #ffffff));
+}
+
+.prompt-voice-button:disabled {
+  @apply opacity-50 cursor-not-allowed;
+}
+
+.prompt-voice-timer {
+  font-size: 0.68rem;
+  font-variant-numeric: tabular-nums;
+  font-weight: 600;
+  line-height: 1;
+  color: currentColor;
+}
+
 .stop-button:hover:not(:disabled) {
  background-color: var(--button-danger-hover-bg, rgba(239, 68, 68, 0.9));
  @apply opacity-95 scale-105;
--- a/packages/ui/src/types/global.d.ts
+++ b/packages/ui/src/types/global.d.ts
@@ -47,9 +47,16 @@ declare global {
    webkitGetAsEntry?: () => FileSystemEntry | null
  }

+  interface TauriDialogModule {
+    open?: (options: Record<string, unknown>) => Promise<string | string[] | null>
+    save?: (options: Record<string, unknown>) => Promise<string | null>
+  }
+
  interface TauriBridge {
-    core?: {
-      invoke: <T = unknown>(cmd: string, args?: Record<string, unknown>) => Promise<T>
+    invoke?: <T = unknown>(cmd: string, args?: Record<string, unknown>) => Promise<T>
+    dialog?: TauriDialogModule
+    event?: {
+      listen: (event: string, handler: (payload: { payload: unknown }) => void) => Promise<() => void>
    }
  }

--- a/packages/ui/src/types/tauri-plugin-keepawake-api.d.ts
+++ b/packages/ui/src/types/tauri-plugin-keepawake-api.d.ts
@@ -0,0 +1,10 @@
+declare module "tauri-plugin-keepawake-api" {
+  export interface KeepAwakeConfig {
+    display?: boolean
+    idle?: boolean
+    sleep?: boolean
+  }
+
+  export function start(config?: KeepAwakeConfig): Promise<void>
+  export function stop(): Promise<void>
+}
Author	SHA1	Message	Date
Shantur Rathore	bf07904789	feat(speech): make prompt input push to talk	2026-03-24 22:42:27 +00:00
Shantur Rathore	4e576829b7	Revert "feat(speech): add realtime prompt dictation support" This reverts commit `f9b5e2b529`.	2026-03-24 20:52:04 +00:00
Shantur Rathore	f9b5e2b529	feat(speech): add realtime prompt dictation support Add server-backed realtime transcription for prompt voice input and expose speech settings to choose realtime mode and models.	2026-03-19 11:32:45 +00:00
Shantur Rathore	cc2f6976f6	fix(speech): preserve edits while saving settings	2026-03-13 08:34:34 +00:00
Shantur Rathore	0ed19aeefb	feat(speech): add prompt voice input groundwork	2026-03-12 22:04:57 +00:00