feat(speech): add prompt voice input (#249)

## Summary - add server-backed speech capabilities and transcription endpoints plus UI settings for speech configuration - add push-to-talk prompt voice input with microphone controls, transcription insertion, and browser capability gating - keep prompt controls aligned by restoring right-side nav placement and moving the mic beside the expand control
2026-03-25 14:08:11 +00:00
parent a950d47df0
commit 1233121a13
40 changed files with 1545 additions and 27 deletions
--- a/packages/ui/src/lib/api-client.ts
+++ b/packages/ui/src/lib/api-client.ts
@@ -7,6 +7,9 @@ import type {
  FileSystemCreateFolderResponse,
  FileSystemListResponse,
  InstanceData,
+  SpeechCapabilitiesResponse,
+  SpeechSynthesisResponse,
+  SpeechTranscriptionResponse,
  ServerMeta,
  WorkspaceCreateRequest,
  WorkspaceDescriptor,
@@ -235,6 +238,27 @@ export const serverApi = {
      body: JSON.stringify({ path }),
    })
  },
+  fetchSpeechCapabilities(): Promise<SpeechCapabilitiesResponse> {
+    return request<SpeechCapabilitiesResponse>("/api/speech/capabilities")
+  },
+  transcribeAudio(payload: {
+    audioBase64: string
+    mimeType: string
+    filename?: string
+    language?: string
+    prompt?: string
+  }): Promise<SpeechTranscriptionResponse> {
+    return request<SpeechTranscriptionResponse>("/api/speech/transcribe", {
+      method: "POST",
+      body: JSON.stringify(payload),
+    })
+  },
+  synthesizeSpeech(payload: { text: string; format?: "mp3" | "wav" | "opus" }): Promise<SpeechSynthesisResponse> {
+    return request<SpeechSynthesisResponse>("/api/speech/synthesize", {
+      method: "POST",
+      body: JSON.stringify(payload),
+    })
+  },
  listFileSystem(path?: string, options?: { includeFiles?: boolean }): Promise<FileSystemListResponse> {
    const params = new URLSearchParams()
    if (path && path !== ".") {