Compare commits

..

10 Commits

Author SHA1 Message Date
Shantur Rathore
1b4eff9419 Min version 0.13.1 2026-03-27 19:46:54 +00:00
Shantur Rathore
6c1febf50e Bump to v0.13.1 2026-03-27 19:46:12 +00:00
Shantur Rathore
75622ef366 refactor(ui): simplify prompt recording indicator 2026-03-27 19:45:56 +00:00
Shantur Rathore
864f913e3e feat(ui): add assistant conversation playback mode 2026-03-27 19:17:25 +00:00
Shantur Rathore
b7d4f8f869 feat(ui): add clear action to prompt input 2026-03-26 23:10:02 +00:00
Shantur Rathore
0dc5867fb3 fix(speech): surface streaming playback compatibility 2026-03-26 22:59:30 +00:00
Shantur Rathore
d13ecba322 feat(speech): add configurable TTS playback modes 2026-03-26 20:46:49 +00:00
Shantur Rathore
740f37db86 refactor(ui): use stop-square icon for speech playback 2026-03-26 19:39:37 +00:00
Shantur Rathore
d447b05821 feat(ui): add message text-to-speech controls 2026-03-26 18:29:45 +00:00
Shantur Rathore
1233121a13 feat(speech): add prompt voice input (#249)
## Summary
- add server-backed speech capabilities and transcription endpoints plus
UI settings for speech configuration
- add push-to-talk prompt voice input with microphone controls,
transcription insertion, and browser capability gating
- keep prompt controls aligned by restoring right-side nav placement and
moving the mic beside the expand control
2026-03-25 14:08:11 +00:00
43 changed files with 1829 additions and 134 deletions

12
package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "codenomad-workspace",
"version": "0.12.3",
"version": "0.13.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "codenomad-workspace",
"version": "0.12.3",
"version": "0.13.1",
"license": "MIT",
"dependencies": {
"7zip-bin": "^5.2.0",
@@ -12055,7 +12055,7 @@
},
"packages/electron-app": {
"name": "@neuralnomads/codenomad-electron-app",
"version": "0.12.3",
"version": "0.13.1",
"license": "MIT",
"dependencies": {
"@codenomad/ui": "file:../ui",
@@ -12092,7 +12092,7 @@
},
"packages/server": {
"name": "@neuralnomads/codenomad",
"version": "0.12.3",
"version": "0.13.1",
"license": "MIT",
"dependencies": {
"@fastify/cors": "^8.5.0",
@@ -12134,7 +12134,7 @@
},
"packages/tauri-app": {
"name": "@codenomad/tauri-app",
"version": "0.12.3",
"version": "0.13.1",
"license": "MIT",
"devDependencies": {
"@tauri-apps/cli": "^2.9.4"
@@ -12142,7 +12142,7 @@
},
"packages/ui": {
"name": "@codenomad/ui",
"version": "0.12.3",
"version": "0.13.1",
"license": "MIT",
"dependencies": {
"@git-diff-view/solid": "^0.0.8",

View File

@@ -1,6 +1,6 @@
{
"name": "codenomad-workspace",
"version": "0.12.3",
"version": "0.13.1",
"private": true,
"description": "CodeNomad monorepo workspace",
"license": "MIT",
@@ -31,4 +31,4 @@
"devDependencies": {
"baseline-browser-mapping": "^2.9.11"
}
}
}

View File

@@ -1,4 +1,4 @@
{
"minServerVersion": "0.12.3",
"minServerVersion": "0.13.1",
"latestServerUrl": "https://github.com/NeuralNomadsAI/CodeNomad/releases/latest"
}

View File

@@ -1,6 +1,6 @@
{
"name": "@neuralnomads/codenomad-electron-app",
"version": "0.12.3",
"version": "0.13.1",
"description": "CodeNomad - AI coding assistant",
"license": "MIT",
"author": {

View File

@@ -4,6 +4,6 @@
"private": true,
"license": "MIT",
"dependencies": {
"@opencode-ai/plugin": "1.2.14"
"@opencode-ai/plugin": "1.3.2"
}
}

View File

@@ -1,12 +1,12 @@
{
"name": "@neuralnomads/codenomad",
"version": "0.12.3",
"version": "0.13.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@neuralnomads/codenomad",
"version": "0.12.3",
"version": "0.13.1",
"dependencies": {
"@fastify/cors": "^8.5.0",
"@fastify/reply-from": "^9.8.0",

View File

@@ -1,6 +1,6 @@
{
"name": "@neuralnomads/codenomad",
"version": "0.12.3",
"version": "0.13.1",
"description": "CodeNomad Server",
"license": "MIT",
"author": {
@@ -47,4 +47,4 @@
"tsx": "^4.20.6",
"typescript": "^5.6.3"
}
}
}

View File

@@ -219,10 +219,13 @@ export interface SpeechCapabilitiesResponse {
provider: string
supportsStt: boolean
supportsTts: boolean
supportsStreamingTts: boolean
baseUrl?: string
sttModel: string
ttsModel: string
ttsVoice: string
ttsFormats: string[]
streamingTtsFormats: string[]
}
export interface SpeechTranscriptionResponse {

View File

@@ -16,7 +16,7 @@ const TranscribeBodySchema = z.object({
const SynthesizeBodySchema = z.object({
text: z.string().trim().min(1, "Text is required"),
format: z.enum(["mp3", "wav", "opus"]).optional(),
format: z.enum(["mp3", "wav", "opus", "aac"]).optional(),
})
function getSpeechErrorStatus(error: unknown): number {
@@ -57,4 +57,18 @@ export function registerSpeechRoutes(app: FastifyInstance, deps: RouteDeps) {
return { error: getSpeechErrorMessage(error, "Failed to synthesize audio") }
}
})
app.post("/api/speech/synthesize/stream", async (request, reply) => {
try {
const body = SynthesizeBodySchema.parse(request.body ?? {})
const result = await deps.speechService.synthesizeStream(body)
reply.header("Content-Type", result.mimeType)
reply.header("Cache-Control", "no-store")
return reply.send(result.stream)
} catch (error) {
request.log.error({ err: error }, "Failed to stream synthesized audio")
reply.code(getSpeechErrorStatus(error))
return { error: getSpeechErrorMessage(error, "Failed to stream synthesized audio") }
}
})
}

View File

@@ -1,8 +1,9 @@
import { Readable } from "node:stream"
import OpenAI from "openai"
import { toFile } from "openai/uploads"
import type { SpeechSynthesisResponse, SpeechTranscriptionResponse } from "../../api-types"
import type { Logger } from "../../logger"
import type { NormalizedSpeechSettings, SynthesizeSpeechInput, TranscribeAudioInput } from "../service"
import type { NormalizedSpeechSettings, SpeechSynthesisStreamResponse, SynthesizeSpeechInput, TranscribeAudioInput } from "../service"
interface OpenAICompatibleSpeechProviderOptions {
settings: NormalizedSpeechSettings
@@ -20,10 +21,13 @@ export class OpenAICompatibleSpeechProvider {
provider: settings.provider,
supportsStt: true,
supportsTts: true,
supportsStreamingTts: true,
baseUrl: settings.baseUrl,
sttModel: settings.sttModel,
ttsModel: settings.ttsModel,
ttsVoice: settings.ttsVoice,
ttsFormats: ["mp3", "wav", "opus", "aac"],
streamingTtsFormats: ["mp3", "wav", "opus", "aac"],
}
}
@@ -92,8 +96,7 @@ export class OpenAICompatibleSpeechProvider {
}
async synthesize(input: SynthesizeSpeechInput): Promise<SpeechSynthesisResponse> {
const client = this.createClient()
const format = input.format ?? "mp3"
const format = input.format ?? this.options.settings.ttsFormat
this.options.logger.info(
{
@@ -104,20 +107,68 @@ export class OpenAICompatibleSpeechProvider {
"speech.synthesize",
)
const response = await client.audio.speech.create({
model: this.options.settings.ttsModel,
voice: this.options.settings.ttsVoice as any,
input: input.text,
response_format: format as any,
})
const response = await this.requestSpeechAudio(input.text, format)
const mimeType = response.headers.get("content-type") || mimeTypeForFormat(format)
const audioBuffer = Buffer.from(await response.arrayBuffer())
return {
audioBase64: audioBuffer.toString("base64"),
mimeType: mimeTypeForFormat(format),
mimeType,
}
}
async synthesizeStream(input: SynthesizeSpeechInput): Promise<SpeechSynthesisStreamResponse> {
const format = input.format ?? this.options.settings.ttsFormat
this.options.logger.info(
{
model: this.options.settings.ttsModel,
voice: this.options.settings.ttsVoice,
format,
},
"speech.synthesize.stream",
)
const response = await this.requestSpeechAudio(input.text, format)
if (!response.body) {
throw new Error("Speech provider did not return a stream.")
}
return {
stream: Readable.fromWeb(response.body as any),
mimeType: response.headers.get("content-type") || mimeTypeForFormat(format),
}
}
private async requestSpeechAudio(text: string, format: "mp3" | "wav" | "opus" | "aac"): Promise<Response> {
const { settings } = this.options
if (!settings.apiKey) {
throw new Error("Speech provider is not configured. Add an API key in Speech settings.")
}
const endpoint = new URL("audio/speech", ensureTrailingSlash(settings.baseUrl ?? "https://api.openai.com/v1"))
const response = await fetch(endpoint, {
method: "POST",
headers: {
Authorization: `Bearer ${settings.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: settings.ttsModel,
voice: settings.ttsVoice,
input: text,
response_format: format,
}),
})
if (!response.ok) {
const detail = await response.text()
throw new Error(detail || `Speech synthesis failed with ${response.status}`)
}
return response
}
private createClient(): OpenAI {
const { settings } = this.options
if (!settings.apiKey) {
@@ -141,8 +192,13 @@ function extensionForMime(mimeType: string): string {
return "webm"
}
function mimeTypeForFormat(format: "mp3" | "wav" | "opus"): string {
function mimeTypeForFormat(format: "mp3" | "wav" | "opus" | "aac"): string {
if (format === "wav") return "audio/wav"
if (format === "opus") return "audio/opus"
if (format === "opus") return 'audio/ogg; codecs="opus"'
if (format === "aac") return "audio/aac"
return "audio/mpeg"
}
function ensureTrailingSlash(value: string): string {
return value.endsWith("/") ? value : `${value}/`
}

View File

@@ -1,4 +1,5 @@
import { z } from "zod"
import type { Readable } from "node:stream"
import type { Logger } from "../logger"
import type { SettingsService } from "../settings/service"
import type { SpeechCapabilitiesResponse, SpeechSynthesisResponse, SpeechTranscriptionResponse } from "../api-types"
@@ -13,6 +14,7 @@ const ServerSpeechSettingsSchema = z.object({
sttModel: z.string().optional(),
ttsModel: z.string().optional(),
ttsVoice: z.string().optional(),
ttsFormat: z.enum(["mp3", "wav", "opus", "aac"]).optional(),
})
.optional(),
})
@@ -27,13 +29,19 @@ export interface TranscribeAudioInput {
export interface SynthesizeSpeechInput {
text: string
format?: "mp3" | "wav" | "opus"
format?: "mp3" | "wav" | "opus" | "aac"
}
export interface SpeechSynthesisStreamResponse {
stream: Readable
mimeType: string
}
export interface SpeechProvider {
getCapabilities(): SpeechCapabilitiesResponse
transcribe(input: TranscribeAudioInput): Promise<SpeechTranscriptionResponse>
synthesize(input: SynthesizeSpeechInput): Promise<SpeechSynthesisResponse>
synthesizeStream(input: SynthesizeSpeechInput): Promise<SpeechSynthesisStreamResponse>
}
export interface NormalizedSpeechSettings {
@@ -43,12 +51,14 @@ export interface NormalizedSpeechSettings {
sttModel: string
ttsModel: string
ttsVoice: string
ttsFormat: "mp3" | "wav" | "opus" | "aac"
}
const DEFAULT_PROVIDER = "openai-compatible"
const DEFAULT_STT_MODEL = "gpt-4o-mini-transcribe"
const DEFAULT_TTS_MODEL = "gpt-4o-mini-tts"
const DEFAULT_TTS_VOICE = "alloy"
const DEFAULT_TTS_FORMAT = "mp3"
export class SpeechService {
constructor(
private readonly settings: SettingsService,
@@ -67,6 +77,10 @@ export class SpeechService {
return this.createProvider().synthesize(input)
}
async synthesizeStream(input: SynthesizeSpeechInput): Promise<SpeechSynthesisStreamResponse> {
return this.createProvider().synthesizeStream(input)
}
private createProvider(): SpeechProvider {
const settings = this.resolveSettings()
return new OpenAICompatibleSpeechProvider({
@@ -86,6 +100,7 @@ export class SpeechService {
sttModel: speech.sttModel?.trim() || DEFAULT_STT_MODEL,
ttsModel: speech.ttsModel?.trim() || DEFAULT_TTS_MODEL,
ttsVoice: speech.ttsVoice?.trim() || DEFAULT_TTS_VOICE,
ttsFormat: speech.ttsFormat ?? DEFAULT_TTS_FORMAT,
}
}
}

View File

@@ -1,6 +1,6 @@
{
"name": "@codenomad/tauri-app",
"version": "0.12.3",
"version": "0.13.1",
"private": true,
"license": "MIT",
"scripts": {

View File

@@ -1,6 +1,6 @@
{
"name": "@codenomad/ui",
"version": "0.12.3",
"version": "0.13.1",
"private": true,
"license": "MIT",
"type": "module",
@@ -45,4 +45,4 @@
"vite-plugin-pwa": "^1.2.0",
"vite-plugin-solid": "^2.10.0"
}
}
}

View File

@@ -14,6 +14,8 @@ import { showAlertDialog } from "../stores/alerts"
import { deleteMessage } from "../stores/session-actions"
import { useI18n } from "../lib/i18n"
import type { DeleteHoverState } from "../types/delete-hover"
import { useSpeech } from "../lib/hooks/use-speech"
import SpeechActionButton from "./speech-action-button"
function DeleteUpToIcon() {
return (
@@ -1384,6 +1386,13 @@ function ReasoningCard(props: ReasoningCardProps) {
const viewHideLabel = () =>
expanded() ? t("messageBlock.reasoning.indicator.hide") : t("messageBlock.reasoning.indicator.view")
const speech = useSpeech({
id: () => `${props.instanceId}:${props.sessionId}:${props.messageId}:${(props.part as any)?.id ?? "reasoning"}`,
text: reasoningText,
})
const canSpeakReasoning = () => reasoningText().trim().length > 0 && speech.canUseSpeech()
createEffect(() => {
if (!expanded()) return
reasoningText()
@@ -1462,6 +1471,20 @@ function ReasoningCard(props: ReasoningCardProps) {
</button>
<div class="message-reasoning-actions">
<Show when={canSpeakReasoning()}>
<SpeechActionButton
class="message-action-button"
onClick={(event) => {
event.preventDefault()
event.stopPropagation()
void speech.toggle()
}}
title={speech.buttonTitle()}
isLoading={speech.isLoading()}
isPlaying={speech.isPlaying()}
/>
</Show>
<button
type="button"
class="message-action-button"

View File

@@ -11,6 +11,8 @@ import { showAlertDialog } from "../stores/alerts"
import { deleteMessage } from "../stores/session-actions"
import { isTauriHost } from "../lib/runtime-env"
import type { DeleteHoverState } from "../types/delete-hover"
import { useSpeech } from "../lib/hooks/use-speech"
import SpeechActionButton from "./speech-action-button"
function DeleteUpToIcon() {
return (
@@ -294,6 +296,13 @@ export default function MessageItem(props: MessageItemProps) {
.join("\n\n")
}
const speech = useSpeech({
id: () => `${props.instanceId}:${props.sessionId}:${props.record.id}`,
text: getRawContent,
})
const canSpeakMessage = () => getRawContent().trim().length > 0 && speech.canUseSpeech()
const handleCopy = async () => {
const content = getRawContent()
if (!content) return
@@ -443,6 +452,16 @@ export default function MessageItem(props: MessageItemProps) {
<Copy class="w-3.5 h-3.5" aria-hidden="true" />
</button>
<Show when={canSpeakMessage()}>
<SpeechActionButton
class="message-action-button"
onClick={() => void speech.toggle()}
title={speech.buttonTitle()}
isLoading={speech.isLoading()}
isPlaying={speech.isPlaying()}
/>
</Show>
<Show when={props.onFork}>
<button
class="message-action-button"
@@ -503,6 +522,16 @@ export default function MessageItem(props: MessageItemProps) {
<Copy class="w-3.5 h-3.5" aria-hidden="true" />
</button>
<Show when={canSpeakMessage()}>
<SpeechActionButton
class="message-action-button"
onClick={() => void speech.toggle()}
title={speech.buttonTitle()}
isLoading={speech.isLoading()}
isPlaying={speech.isPlaying()}
/>
</Show>
<Show when={props.showDeleteMessage}>
<button
class="message-action-button"

View File

@@ -1,5 +1,5 @@
import { Suspense, createEffect, createSignal, lazy, on, onCleanup, Show } from "solid-js"
import { ArrowBigUp, ArrowBigDown, Loader2, Mic } from "lucide-solid"
import { ArrowBigUp, ArrowBigDown, Loader2, Mic, Volume2, X } from "lucide-solid"
import ExpandButton from "./expand-button"
import { clearAttachments, removeAttachment } from "../stores/attachments"
import { resolvePastedPlaceholders } from "../lib/prompt-placeholders"
@@ -19,6 +19,7 @@ import { usePromptAttachments } from "./prompt-input/usePromptAttachments"
import { usePromptPicker } from "./prompt-input/usePromptPicker"
import { usePromptKeyDown } from "./prompt-input/usePromptKeyDown"
import { usePromptVoiceInput } from "./prompt-input/usePromptVoiceInput"
import { canUseConversationMode, isConversationModeEnabled, toggleConversationMode } from "../stores/conversation-speech"
const log = getLogger("actions")
const LazyUnifiedPicker = lazy(() => import("./unified-picker"))
@@ -351,6 +352,19 @@ export default function PromptInput(props: PromptInputProps) {
textareaRef?.focus()
}
function handleClearPrompt() {
clearPrompt()
clearHistoryDraft()
resetHistoryNavigation()
setShowPicker(false)
setPickerMode("mention")
setAtPosition(null)
setSearchQuery("")
setIgnoredAtPositions(new Set<number>())
syncAttachmentCounters("")
textareaRef?.focus()
}
function insertBlockContent(block: string) {
const textarea = textareaRef
const current = prompt()
@@ -422,6 +436,8 @@ export default function PromptInput(props: PromptInputProps) {
return hasText || attachments().length > 0
}
const canClearPrompt = () => prompt().length > 0
const shellHint = () =>
mode() === "shell"
? { key: "Esc", text: t("promptInput.hints.shell.exit") }
@@ -461,6 +477,13 @@ export default function PromptInput(props: PromptInputProps) {
const showVoiceInput = () =>
preferences().showPromptVoiceInput &&
(voiceInput.canUseVoiceInput() || voiceInput.isRecording() || voiceInput.isTranscribing())
const conversationModeEnabled = () => isConversationModeEnabled(props.instanceId)
const showConversationToggle = () => showVoiceInput() || conversationModeEnabled()
const canToggleConversationMode = () => canUseConversationMode()
const conversationModeButtonTitle = () =>
conversationModeEnabled()
? t("promptInput.conversationMode.disable.title")
: t("promptInput.conversationMode.enable.title")
const instance = () => getActiveInstance()
@@ -543,7 +566,7 @@ export default function PromptInput(props: PromptInputProps) {
autocomplete="off"
/>
<div class="prompt-nav-buttons">
<div class="prompt-nav-top-row">
<div class="prompt-nav-column prompt-nav-column-left">
<Show when={showVoiceInput()}>
<button
type="button"
@@ -582,47 +605,72 @@ export default function PromptInput(props: PromptInputProps) {
</Show>
}
>
<span class="prompt-voice-timer">{formatVoiceTimer(voiceInput.elapsedMs())}</span>
<Mic class="h-4 w-4" aria-hidden="true" />
</Show>
</button>
</Show>
<Show when={showConversationToggle()}>
<button
type="button"
class={`prompt-voice-button prompt-nav-voice-button prompt-conversation-button ${conversationModeEnabled() ? "is-active" : ""}`}
onClick={() => toggleConversationMode(props.instanceId)}
disabled={!conversationModeEnabled() && !canToggleConversationMode()}
aria-pressed={conversationModeEnabled()}
aria-label={conversationModeButtonTitle()}
title={conversationModeButtonTitle()}
>
<Volume2 class="h-4 w-4" aria-hidden="true" />
</button>
</Show>
<button
type="button"
class="prompt-clear-button"
onClick={handleClearPrompt}
disabled={!canClearPrompt()}
aria-label={t("promptInput.clear.ariaLabel")}
title={t("promptInput.clear.title")}
>
<X class="h-4 w-4" aria-hidden="true" />
</button>
</div>
<div class="prompt-nav-column prompt-nav-column-right">
<ExpandButton
expandState={expandState}
onToggleExpand={handleExpandToggle}
/>
<Show when={hasHistory()}>
<button
type="button"
class="prompt-history-button"
onClick={() =>
selectPreviousHistory({
force: true,
isPickerOpen: showPicker(),
getTextarea: () => textareaRef,
})
}
disabled={!canHistoryGoPrevious()}
aria-label={t("promptInput.history.previousAriaLabel")}
>
<ArrowBigUp class="h-5 w-5" aria-hidden="true" />
</button>
<button
type="button"
class="prompt-history-button"
onClick={() =>
selectNextHistory({
force: true,
isPickerOpen: showPicker(),
getTextarea: () => textareaRef,
})
}
disabled={!canHistoryGoNext()}
aria-label={t("promptInput.history.nextAriaLabel")}
>
<ArrowBigDown class="h-5 w-5" aria-hidden="true" />
</button>
</Show>
</div>
<Show when={hasHistory()}>
<button
type="button"
class="prompt-history-button"
onClick={() =>
selectPreviousHistory({
force: true,
isPickerOpen: showPicker(),
getTextarea: () => textareaRef,
})
}
disabled={!canHistoryGoPrevious()}
aria-label={t("promptInput.history.previousAriaLabel")}
>
<ArrowBigUp class="h-5 w-5" aria-hidden="true" />
</button>
<button
type="button"
class="prompt-history-button"
onClick={() =>
selectNextHistory({
force: true,
isPickerOpen: showPicker(),
getTextarea: () => textareaRef,
})
}
disabled={!canHistoryGoNext()}
aria-label={t("promptInput.history.nextAriaLabel")}
>
<ArrowBigDown class="h-5 w-5" aria-hidden="true" />
</button>
</Show>
</div>
<Show when={shouldShowOverlay()}>
<div class={`prompt-input-overlay keyboard-hints ${mode() === "shell" ? "shell-mode" : ""}`}>
@@ -712,10 +760,3 @@ export default function PromptInput(props: PromptInputProps) {
</div>
)
}
function formatVoiceTimer(elapsedMs: number): string {
const totalSeconds = Math.max(0, Math.floor(elapsedMs / 1000))
const minutes = Math.floor(totalSeconds / 60)
const seconds = totalSeconds % 60
return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}`
}

View File

@@ -16,6 +16,7 @@ import { getLogger } from "../../lib/logger"
import { requestData } from "../../lib/opencode-api"
import { useI18n } from "../../lib/i18n"
import type { PromptInputApi, PromptInsertMode } from "../prompt-input/types"
import { clearConversationPlaybackForSession } from "../../stores/conversation-speech"
const log = getLogger("session")
@@ -88,6 +89,10 @@ export const SessionView: Component<SessionViewProps> = (props) => {
on(
() => props.isActive,
(isActive) => {
if (!isActive) {
clearConversationPlaybackForSession(props.instanceId, props.sessionId)
return
}
if (!isActive) return
// On phones, focusing the prompt on session switch is disruptive (it raises the OSK).

View File

@@ -1,9 +1,11 @@
import { Show, createEffect, createMemo, createSignal, type Component } from "solid-js"
import { Mic, Volume2 } from "lucide-solid"
import { For, Show, createEffect, createMemo, createSignal, type Component } from "solid-js"
import { Loader2, Mic, Square, Volume2 } from "lucide-solid"
import { useConfig, type SpeechSettings } from "../../stores/preferences"
import { useI18n } from "../../lib/i18n"
import { loadSpeechCapabilities, speechCapabilities, speechCapabilitiesError, speechCapabilitiesLoading } from "../../stores/speech"
import { getLogger } from "../../lib/logger"
import { useSpeech } from "../../lib/hooks/use-speech"
import { getSpeechPlaybackSupport } from "../../lib/speech-playback-support"
const log = getLogger("actions")
@@ -13,6 +15,8 @@ type DraftFields = {
sttModel: string
ttsModel: string
ttsVoice: string
playbackMode: SpeechSettings["playbackMode"]
ttsFormat: SpeechSettings["ttsFormat"]
}
function createDraftFields(speech: SpeechSettings): DraftFields {
@@ -22,11 +26,21 @@ function createDraftFields(speech: SpeechSettings): DraftFields {
sttModel: speech.sttModel,
ttsModel: speech.ttsModel,
ttsVoice: speech.ttsVoice,
playbackMode: speech.playbackMode,
ttsFormat: speech.ttsFormat,
}
}
function isDraftEqual(a: DraftFields, b: DraftFields): boolean {
return a.apiKey === b.apiKey && a.baseUrl === b.baseUrl && a.sttModel === b.sttModel && a.ttsModel === b.ttsModel && a.ttsVoice === b.ttsVoice
return (
a.apiKey === b.apiKey &&
a.baseUrl === b.baseUrl &&
a.sttModel === b.sttModel &&
a.ttsModel === b.ttsModel &&
a.ttsVoice === b.ttsVoice &&
a.playbackMode === b.playbackMode &&
a.ttsFormat === b.ttsFormat
)
}
export const SpeechSettingsCard: Component = () => {
@@ -39,6 +53,15 @@ export const SpeechSettingsCard: Component = () => {
const [apiKeyTouched, setApiKeyTouched] = createSignal(false)
const [clearStoredApiKey, setClearStoredApiKey] = createSignal(false)
const testSpeech = useSpeech({
id: () => "settings-speech-test",
text: () => t("settings.speech.testPlayback.sample"),
settingsOverride: () => ({
playbackMode: drafts().playbackMode,
ttsFormat: drafts().ttsFormat,
}),
})
createEffect(() => {
const speech = serverSettings().speech
const nextDrafts = createDraftFields(speech)
@@ -75,6 +98,26 @@ export const SpeechSettingsCard: Component = () => {
}
const apiKeyDirty = createMemo(() => clearStoredApiKey() || drafts().apiKey.trim().length > 0)
const playbackSupport = createMemo(() =>
getSpeechPlaybackSupport({
playbackMode: drafts().playbackMode,
ttsFormat: drafts().ttsFormat,
capabilities: speechCapabilities(),
}),
)
const compatibilityMessage = createMemo(() => {
const capabilities = speechCapabilities()
if (!capabilities?.available || !capabilities?.configured || !capabilities?.supportsTts) {
return null
}
if (drafts().playbackMode === "streaming" && !capabilities.supportsStreamingTts) {
return t("settings.speech.compatibility.streamingUnavailable")
}
if (drafts().playbackMode === "streaming" && !playbackSupport().available) {
return t("settings.speech.compatibility.browserStreamingUnavailable")
}
return t("settings.speech.compatibility.runtimeNote")
})
const isDirty = createMemo(() => {
const speech = serverSettings().speech
@@ -84,7 +127,9 @@ export const SpeechSettingsCard: Component = () => {
(current.baseUrl || "") !== (speech.baseUrl || "") ||
current.sttModel !== speech.sttModel ||
current.ttsModel !== speech.ttsModel ||
current.ttsVoice !== speech.ttsVoice
current.ttsVoice !== speech.ttsVoice ||
current.playbackMode !== speech.playbackMode ||
current.ttsFormat !== speech.ttsFormat
)
})
@@ -108,6 +153,8 @@ export const SpeechSettingsCard: Component = () => {
sttModel: current.sttModel.trim() || undefined,
ttsModel: current.ttsModel.trim() || undefined,
ttsVoice: current.ttsVoice.trim() || undefined,
playbackMode: current.playbackMode,
ttsFormat: current.ttsFormat,
})
await loadSpeechCapabilities(true)
setDrafts({
@@ -116,6 +163,8 @@ export const SpeechSettingsCard: Component = () => {
sttModel: current.sttModel.trim() || serverSettings().speech.sttModel,
ttsModel: current.ttsModel.trim() || serverSettings().speech.ttsModel,
ttsVoice: current.ttsVoice.trim() || serverSettings().speech.ttsVoice,
playbackMode: current.playbackMode,
ttsFormat: current.ttsFormat,
})
setApiKeyTouched(false)
setClearStoredApiKey(false)
@@ -151,6 +200,32 @@ export const SpeechSettingsCard: Component = () => {
<span class="settings-inline-note">{t("settings.speech.provider.openaiCompatible")}</span>
<span class="settings-inline-note">{capabilityLabel()}</span>
<span class="settings-inline-note">{saveStatusLabel()}</span>
<button
type="button"
class="selector-button selector-button-secondary w-auto whitespace-nowrap inline-flex items-center gap-2"
onClick={() => void testSpeech.toggle()}
disabled={isSaving()}
title={testSpeech.buttonTitle()}
aria-label={testSpeech.buttonTitle()}
>
<Show
when={testSpeech.isLoading()}
fallback={
<Show when={testSpeech.isPlaying()} fallback={<Volume2 class="w-3.5 h-3.5" aria-hidden="true" />}>
<Square class="w-3.5 h-3.5" aria-hidden="true" />
</Show>
}
>
<Loader2 class="w-3.5 h-3.5 animate-spin" aria-hidden="true" />
</Show>
<span>
{testSpeech.isPlaying()
? t("settings.speech.testPlayback.stop")
: testSpeech.isLoading()
? t("settings.speech.testPlayback.generating")
: t("settings.speech.testPlayback.action")}
</span>
</button>
<button
type="button"
class="selector-button selector-button-primary w-auto whitespace-nowrap"
@@ -213,8 +288,32 @@ export const SpeechSettingsCard: Component = () => {
onInput={(value) => updateDraft("ttsVoice", value)}
icon={<Mic class="w-3.5 h-3.5 icon-muted flex-shrink-0" />}
/>
<SelectField
label={t("settings.speech.playbackMode.title")}
caption={t("settings.speech.playbackMode.subtitle")}
value={drafts().playbackMode}
onInput={(value) => updateDraft("playbackMode", value as DraftFields["playbackMode"])}
options={[
{ value: "streaming", label: t("settings.speech.playbackMode.streaming") },
{ value: "buffered", label: t("settings.speech.playbackMode.buffered") },
]}
/>
<SelectField
label={t("settings.speech.ttsFormat.title")}
caption={t("settings.speech.ttsFormat.subtitle")}
value={drafts().ttsFormat}
onInput={(value) => updateDraft("ttsFormat", value as DraftFields["ttsFormat"])}
options={[
{ value: "mp3", label: "MP3" },
{ value: "wav", label: "WAV" },
{ value: "opus", label: "Opus" },
{ value: "aac", label: "AAC" },
]}
/>
<div class="settings-inline-note">{t("settings.speech.help")}</div>
<Show when={compatibilityMessage()}>{(message) => <div class="settings-inline-note">{message()}</div>}</Show>
<div class="settings-inline-note">{t("settings.speech.testPlayback.note")}</div>
</div>
</div>
)
@@ -249,4 +348,26 @@ const Field: Component<{
)
}
const SelectField: Component<{
label: string
caption: string
value: string
onInput: (value: string) => void
options: Array<{ value: string; label: string }>
}> = (props) => {
return (
<div class="settings-toggle-row settings-toggle-row-compact">
<div>
<div class="settings-toggle-title">{props.label}</div>
<div class="settings-toggle-caption">{props.caption}</div>
</div>
<div class="min-w-[18rem] max-w-[24rem] w-full">
<select value={props.value} onInput={(event) => props.onInput(event.currentTarget.value)} class="selector-input w-full">
<For each={props.options}>{(option) => <option value={option.value}>{option.label}</option>}</For>
</select>
</div>
</div>
)
}
export default SpeechSettingsCard

View File

@@ -0,0 +1,34 @@
import { Loader2, Volume2 } from "lucide-solid"
import type { JSX } from "solid-js"
interface SpeechActionButtonProps {
class?: string
title: string
isLoading: boolean
isPlaying: boolean
onClick: JSX.EventHandlerUnion<HTMLButtonElement, MouseEvent>
type?: "button" | "submit" | "reset"
}
export default function SpeechActionButton(props: SpeechActionButtonProps) {
return (
<button
type={props.type ?? "button"}
class={props.class}
onClick={props.onClick}
aria-label={props.title}
title={props.title}
>
{props.isLoading ? (
<Loader2 class="w-3.5 h-3.5 animate-spin" aria-hidden="true" />
) : props.isPlaying ? (
<svg class="w-3.5 h-3.5" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true">
<rect x="3" y="3" width="18" height="18" rx="2" />
<rect x="9" y="9" width="6" height="6" rx="1" fill="currentColor" stroke="none" />
</svg>
) : (
<Volume2 class="w-3.5 h-3.5" aria-hidden="true" />
)}
</button>
)
}

View File

@@ -29,6 +29,7 @@ import type {
ToolScrollHelpers,
} from "./tool-call/types"
import {
buildToolSpeechText,
ensureMarkdownContent,
getRelativePath,
getToolIcon,
@@ -41,6 +42,8 @@ import {
} from "./tool-call/utils"
import { resolveTitleForTool } from "./tool-call/tool-title"
import { getLogger } from "../lib/logger"
import { useSpeech } from "../lib/hooks/use-speech"
import SpeechActionButton from "./speech-action-button"
const log = getLogger("session")
@@ -960,6 +963,21 @@ export default function ToolCall(props: ToolCallProps) {
return renderToolTitle()
})
const speechText = createMemo(() =>
buildToolSpeechText({
title: headerText(),
state: toolState(),
t,
}),
)
const speech = useSpeech({
id: () => `${props.instanceId}:${props.sessionId}:${props.messageId ?? "message"}:${toolCallIdentifier()}`,
text: speechText,
})
const canSpeakToolCall = () => speechText().trim().length > 0 && speech.canUseSpeech()
const handleCopyHeader = async (event: MouseEvent) => {
event.preventDefault()
event.stopPropagation()
@@ -1023,6 +1041,16 @@ export default function ToolCall(props: ToolCallProps) {
<Copy class="w-3.5 h-3.5" />
</button>
<Show when={canSpeakToolCall()}>
<SpeechActionButton
class="tool-call-header-copy"
onClick={() => void speech.toggle()}
title={speech.buttonTitle()}
isLoading={speech.isLoading()}
isPlaying={speech.isPlaying()}
/>
</Show>
<span class="tool-call-header-status" aria-hidden="true">
{statusIcon()}
</span>

View File

@@ -231,3 +231,37 @@ export function getDefaultToolAction(toolName: string) {
return tGlobal("toolCall.renderer.action.working")
}
}
export function buildToolSpeechText(options: {
title: string
state?: ToolState
t: (key: string, params?: Record<string, unknown>) => string
}): string {
const sections: string[] = []
if (options.title.trim()) {
sections.push(options.title.trim())
}
const { input, output } = readToolStatePayload(options.state)
const formattedInput = formatUnknown(input)
const formattedOutput = formatUnknown(output)
if (formattedInput?.text?.trim()) {
sections.push(`${options.t("toolCall.io.input")}:\n${formattedInput.text.trim()}`)
}
if (formattedOutput?.text?.trim()) {
sections.push(`${options.t("toolCall.io.output")}:\n${formattedOutput.text.trim()}`)
}
if (options.state?.status === "error" && options.state.error?.trim()) {
sections.push(`${options.t("toolCall.error.label")} ${options.state.error.trim()}`)
}
if (sections.length === 1 && options.state?.status === "pending") {
sections.push(options.t("toolCall.pending.waitingToRun"))
}
return sections.join("\n\n").trim()
}

View File

@@ -123,6 +123,28 @@ async function request<T>(path: string, init?: RequestInit): Promise<T> {
}
}
async function requestRaw(path: string, init?: RequestInit): Promise<Response> {
const url = API_BASE ? new URL(path, API_BASE).toString() : path
const headers = normalizeHeaders(init?.headers)
if (init?.body !== undefined && !headers["Content-Type"]) {
headers["Content-Type"] = "application/json"
}
const method = (init?.method ?? "GET").toUpperCase()
const startedAt = Date.now()
logHttp(`${method} ${path}`)
const response = await fetch(url, { ...init, headers, credentials: init?.credentials ?? "include" })
if (!response.ok) {
const message = await response.text()
logHttp(`${method} ${path} -> ${response.status}`, { durationMs: Date.now() - startedAt, error: message })
throw new Error(message || `Request failed with ${response.status}`)
}
logHttp(`${method} ${path} -> ${response.status}`, { durationMs: Date.now() - startedAt })
return response
}
export const serverApi = {
fetchWorkspaces(): Promise<WorkspaceDescriptor[]> {
@@ -253,12 +275,22 @@ export const serverApi = {
body: JSON.stringify(payload),
})
},
synthesizeSpeech(payload: { text: string; format?: "mp3" | "wav" | "opus" }): Promise<SpeechSynthesisResponse> {
synthesizeSpeech(payload: { text: string; format?: "mp3" | "wav" | "opus" | "aac" }): Promise<SpeechSynthesisResponse> {
return request<SpeechSynthesisResponse>("/api/speech/synthesize", {
method: "POST",
body: JSON.stringify(payload),
})
},
synthesizeSpeechStream(
payload: { text: string; format?: "mp3" | "wav" | "opus" | "aac" },
signal?: AbortSignal,
): Promise<Response> {
return requestRaw("/api/speech/synthesize/stream", {
method: "POST",
body: JSON.stringify(payload),
signal,
})
},
listFileSystem(path?: string, options?: { includeFiles?: boolean }): Promise<FileSystemListResponse> {
const params = new URLSearchParams()
if (path && path !== ".") {

View File

@@ -0,0 +1,416 @@
import { createEffect, createSignal, onCleanup, type Accessor } from "solid-js"
import { showAlertDialog } from "../../stores/alerts"
import { serverApi } from "../api-client"
import { useI18n } from "../i18n"
import { loadSpeechCapabilities, speechCapabilities } from "../../stores/speech"
import { useConfig, type SpeechSettings } from "../../stores/preferences"
import { formatToMimeType, getSpeechPlaybackSupport } from "../speech-playback-support"
type SpeechPlaybackState = "idle" | "loading" | "playing"
interface UseSpeechOptions {
id: Accessor<string>
text: Accessor<string>
settingsOverride?: Accessor<Partial<Pick<SpeechSettings, "playbackMode" | "ttsFormat">>>
}
interface ActivePlaybackEntry {
ownerId: string
stop: () => void
}
const stateResetters = new Map<string, () => void>()
let activePlayback: ActivePlaybackEntry | null = null
function resetOwnerState(ownerId: string) {
stateResetters.get(ownerId)?.()
}
function stopActivePlayback(ownerId?: string) {
if (!activePlayback) return
if (ownerId && activePlayback.ownerId !== ownerId) return
const current = activePlayback
activePlayback = null
current.stop()
}
function setActivePlayback(ownerId: string, stop: () => void) {
if (activePlayback?.ownerId === ownerId) {
activePlayback = { ownerId, stop }
return
}
stopActivePlayback()
activePlayback = { ownerId, stop }
}
export function useSpeech(options: UseSpeechOptions) {
const { t } = useI18n()
const { serverSettings } = useConfig()
const [state, setState] = createSignal<SpeechPlaybackState>("idle")
let requestVersion = 0
let audio: HTMLAudioElement | null = null
let objectUrl: string | null = null
let mediaSource: MediaSource | null = null
let abortController: AbortController | null = null
createEffect(() => {
void loadSpeechCapabilities()
})
const cleanupAudio = () => {
if (abortController) {
abortController.abort()
abortController = null
}
if (audio) {
audio.pause()
audio.currentTime = 0
audio.src = ""
audio.load()
audio = null
}
mediaSource = null
if (objectUrl) {
URL.revokeObjectURL(objectUrl)
objectUrl = null
}
}
const resetState = () => {
requestVersion += 1
cleanupAudio()
setState("idle")
}
stateResetters.set(options.id(), resetState)
onCleanup(() => {
stateResetters.delete(options.id())
stopActivePlayback(options.id())
resetState()
})
const isSupported = () => typeof window !== "undefined" && typeof window.Audio !== "undefined"
const resolvedSettings = () => ({
...serverSettings().speech,
...(options.settingsOverride?.() ?? {}),
})
const canUseSpeech = () => {
const capabilities = speechCapabilities()
if (!isSupported() || !capabilities?.available || !capabilities?.configured || !capabilities?.supportsTts) {
return false
}
return getSpeechPlaybackSupport({
playbackMode: resolvedSettings().playbackMode,
ttsFormat: resolvedSettings().ttsFormat,
capabilities,
}).available
}
const stop = () => {
if (activePlayback?.ownerId === options.id()) {
activePlayback = null
}
resetState()
}
const start = async () => {
const ownerId = options.id()
const text = options.text().trim()
if (!text || state() === "loading" || state() === "playing") return
if (!isSupported()) {
showAlertDialog(t("messageItem.actions.speak.error.unsupported"), {
title: t("messageItem.actions.speak.error.title"),
variant: "error",
})
return
}
const capabilities = (await loadSpeechCapabilities()) ?? speechCapabilities()
if (!capabilities?.available || !capabilities?.configured || !capabilities?.supportsTts) {
showAlertDialog(t("messageItem.actions.speak.error.unavailable"), {
title: t("messageItem.actions.speak.error.title"),
variant: "error",
})
return
}
const support = getSpeechPlaybackSupport({
playbackMode: resolvedSettings().playbackMode,
ttsFormat: resolvedSettings().ttsFormat,
capabilities,
})
if (!support.available) {
const detailKey =
support.reason === "provider-streaming-unavailable"
? "settings.speech.compatibility.streamingUnavailable"
: support.reason === "browser-streaming-unavailable"
? "settings.speech.compatibility.browserStreamingUnavailable"
: "messageItem.actions.speak.error.unsupported"
showAlertDialog(t("messageItem.actions.speak.error.unavailable"), {
title: t("messageItem.actions.speak.error.title"),
detail: t(detailKey),
variant: "error",
})
return
}
requestVersion += 1
const currentRequest = requestVersion
stopActivePlayback()
cleanupAudio()
setState("loading")
const settings = resolvedSettings()
const format = settings.ttsFormat
try {
if (settings.playbackMode === "streaming") {
await startStreamingPlayback(ownerId, currentRequest, text, format)
} else {
await startBufferedPlayback(ownerId, currentRequest, text, format)
}
} catch (error) {
if (currentRequest !== requestVersion) {
return
}
resetState()
showAlertDialog(t("messageItem.actions.speak.error.generate"), {
title: t("messageItem.actions.speak.error.title"),
detail: error instanceof Error ? error.message : String(error),
variant: "error",
})
}
}
async function startBufferedPlayback(
ownerId: string,
currentRequest: number,
text: string,
format: "mp3" | "wav" | "opus" | "aac",
) {
const response = await serverApi.synthesizeSpeech({ text, format })
if (currentRequest !== requestVersion) {
return
}
const nextUrl = createObjectUrlFromBase64(response.audioBase64, response.mimeType)
const nextAudio = new Audio(nextUrl)
objectUrl = nextUrl
audio = nextAudio
attachPlaybackLifecycle(ownerId, nextAudio)
setActivePlayback(ownerId, () => {
cleanupAudio()
setState("idle")
})
setState("playing")
await nextAudio.play()
}
async function startStreamingPlayback(
ownerId: string,
currentRequest: number,
text: string,
format: "mp3" | "wav" | "opus" | "aac",
) {
if (typeof MediaSource === "undefined") {
throw new Error("MediaSource is not available in this browser.")
}
const controller = new AbortController()
abortController = controller
const response = await serverApi.synthesizeSpeechStream({ text, format }, controller.signal)
const mimeType = response.headers.get("content-type") || formatToMimeType(format)
if (!MediaSource.isTypeSupported(mimeType)) {
throw new Error(`Streaming playback is not supported for ${mimeType}.`)
}
const stream = response.body
if (!stream) {
throw new Error("Speech stream did not include a response body.")
}
const nextMediaSource = new MediaSource()
const nextObjectUrl = URL.createObjectURL(nextMediaSource)
const nextAudio = new Audio(nextObjectUrl)
mediaSource = nextMediaSource
objectUrl = nextObjectUrl
audio = nextAudio
attachPlaybackLifecycle(ownerId, nextAudio)
setActivePlayback(ownerId, () => {
cleanupAudio()
setState("idle")
})
await new Promise<void>((resolve, reject) => {
const handleSourceOpen = () => {
nextMediaSource.removeEventListener("sourceopen", handleSourceOpen)
void streamToMediaSource({
mediaSource: nextMediaSource,
stream,
mimeType,
audioElement: nextAudio,
onPlayable: async () => {
if (currentRequest !== requestVersion) return
if (state() !== "playing") {
setState("playing")
}
try {
await nextAudio.play()
} catch (error) {
reject(error)
}
},
onComplete: resolve,
onError: reject,
})
}
nextMediaSource.addEventListener("sourceopen", handleSourceOpen, { once: true })
nextAudio.addEventListener(
"error",
() => reject(new Error("Unable to play streamed speech.")),
{ once: true },
)
})
}
const toggle = async () => {
if (state() === "idle") {
await start()
return
}
stop()
}
return {
state,
canUseSpeech,
isLoading: () => state() === "loading",
isPlaying: () => state() === "playing",
toggle,
stop,
buttonTitle: () => {
if (state() === "loading") return t("messageItem.actions.generatingSpeech")
if (state() === "playing") return t("messageItem.actions.stopSpeech")
return t("messageItem.actions.speak")
},
}
}
function attachPlaybackLifecycle(ownerId: string, audio: HTMLAudioElement) {
const finish = () => {
if (activePlayback?.ownerId === ownerId) {
activePlayback = null
}
resetOwnerState(ownerId)
}
audio.addEventListener("ended", finish, { once: true })
audio.addEventListener("error", finish, { once: true })
}
async function streamToMediaSource(options: {
mediaSource: MediaSource
stream: ReadableStream<Uint8Array>
mimeType: string
audioElement: HTMLAudioElement
onPlayable: () => Promise<void>
onComplete: () => void
onError: (error: unknown) => void
}) {
try {
const sourceBuffer = options.mediaSource.addSourceBuffer(options.mimeType)
const reader = options.stream.getReader()
let startedPlayback = false
let queue: Uint8Array[] = []
let processing = false
const flushQueue = async () => {
if (processing || sourceBuffer.updating || queue.length === 0) return
processing = true
const chunk = queue.shift()!
await appendChunk(sourceBuffer, chunk)
if (!startedPlayback) {
startedPlayback = true
await options.onPlayable()
}
processing = false
await flushQueue()
}
while (true) {
const { done, value } = await reader.read()
if (done) break
if (value && value.byteLength > 0) {
queue.push(value)
await flushQueue()
}
}
while (queue.length > 0 || sourceBuffer.updating) {
if (queue.length > 0) {
await flushQueue()
} else {
await waitForUpdateEnd(sourceBuffer)
}
}
if (options.mediaSource.readyState === "open") {
options.mediaSource.endOfStream()
}
options.onComplete()
} catch (error) {
options.onError(error)
}
}
function appendChunk(sourceBuffer: SourceBuffer, chunk: Uint8Array): Promise<void> {
return new Promise((resolve, reject) => {
const handleUpdateEnd = () => {
cleanup()
resolve()
}
const handleError = () => {
cleanup()
reject(new Error("Failed to append audio stream chunk."))
}
const cleanup = () => {
sourceBuffer.removeEventListener("updateend", handleUpdateEnd)
sourceBuffer.removeEventListener("error", handleError)
}
sourceBuffer.addEventListener("updateend", handleUpdateEnd, { once: true })
sourceBuffer.addEventListener("error", handleError, { once: true })
sourceBuffer.appendBuffer(new Uint8Array(chunk).buffer)
})
}
function waitForUpdateEnd(sourceBuffer: SourceBuffer): Promise<void> {
return new Promise((resolve) => {
sourceBuffer.addEventListener("updateend", () => resolve(), { once: true })
})
}
function createObjectUrlFromBase64(audioBase64: string, mimeType: string): string {
const binary = atob(audioBase64)
const bytes = new Uint8Array(binary.length)
for (let index = 0; index < binary.length; index += 1) {
bytes[index] = binary.charCodeAt(index)
}
return URL.createObjectURL(new Blob([bytes], { type: mimeType || "audio/mpeg" }))
}

View File

@@ -75,6 +75,13 @@ export const messagingMessages = {
"messageItem.actions.copy": "Copy",
"messageItem.actions.copyTitle": "Copy message",
"messageItem.actions.copied": "Copied!",
"messageItem.actions.speak": "Speak message",
"messageItem.actions.generatingSpeech": "Generating speech",
"messageItem.actions.stopSpeech": "Stop playback",
"messageItem.actions.speak.error.title": "Speech playback failed",
"messageItem.actions.speak.error.unsupported": "Speech playback is not supported in this browser.",
"messageItem.actions.speak.error.unavailable": "Speech playback is unavailable until speech settings are configured.",
"messageItem.actions.speak.error.generate": "Unable to generate speech for this message.",
"messageItem.actions.deleteMessage": "Delete message (doesn't undo changes)",
"messageItem.actions.deleteMessagesUpTo": "Delete messages up to here (doesn't undo changes)",
"messageItem.actions.deletingMessage": "Deleting...",
@@ -135,9 +142,15 @@ export const messagingMessages = {
"promptInput.overlay.againToAbort": "again to abort session",
"promptInput.stopSession.ariaLabel": "Stop session",
"promptInput.stopSession.title": "Stop session",
"promptInput.clear.ariaLabel": "Clear prompt text",
"promptInput.clear.title": "Clear prompt text",
"promptInput.send.ariaLabel": "Send message",
"promptInput.send.errorFallback": "Failed to send message",
"promptInput.send.errorTitle": "Send failed",
"promptInput.conversationMode.enable.title": "Enable conversation mode",
"promptInput.conversationMode.disable.title": "Disable conversation mode",
"promptInput.conversationMode.error.title": "Conversation playback failed",
"promptInput.conversationMode.error.message": "Unable to continue speaking assistant replies.",
"promptInput.voiceInput.start.title": "Start voice input",
"promptInput.voiceInput.stop.title": "Stop recording and transcribe",
"promptInput.voiceInput.transcribing.title": "Transcribing audio",

View File

@@ -166,7 +166,21 @@ export const settingsMessages = {
"settings.speech.ttsModel.subtitle": "Default text-to-speech model reserved for future playback features.",
"settings.speech.ttsVoice.title": "Default voice",
"settings.speech.ttsVoice.subtitle": "Default text-to-speech voice reserved for future playback features.",
"settings.speech.help": "Prompt voice input only appears when speech transcription is configured and supported by this browser.",
"settings.speech.playbackMode.title": "Playback mode",
"settings.speech.playbackMode.subtitle": "Choose whether TTS starts playing as audio streams in or after the full file is generated.",
"settings.speech.playbackMode.streaming": "Streaming",
"settings.speech.playbackMode.buffered": "Buffered",
"settings.speech.ttsFormat.title": "Output format",
"settings.speech.ttsFormat.subtitle": "Choose the audio format for synthesized speech. Streaming support depends on your provider and browser.",
"settings.speech.help": "Prompt voice input appears when speech transcription is configured and supported. Message playback uses the TTS mode and format selected here.",
"settings.speech.compatibility.streamingUnavailable": "Your current speech provider configuration does not advertise streaming TTS. Switch playback mode to buffered if you want playback to work now.",
"settings.speech.compatibility.browserStreamingUnavailable": "Your current browser cannot stream the selected TTS format. Choose buffered playback or switch to a different format.",
"settings.speech.compatibility.runtimeNote": "All formats stay selectable in streaming mode. Some browser and provider combinations may still fail at playback time.",
"settings.speech.testPlayback.action": "Test playback",
"settings.speech.testPlayback.generating": "Generating sample",
"settings.speech.testPlayback.stop": "Stop sample",
"settings.speech.testPlayback.sample": "Thank you for using CodeNomad, your speech settings are working fine.",
"settings.speech.testPlayback.note": "The test uses your current playback mode and format immediately. Save API key, base URL, model, or voice changes first if you want those reflected too.",
"settings.speech.save.action": "Save",
"settings.speech.save.saving": "Saving...",
"settings.speech.save.saved": "Saved",

View File

@@ -77,6 +77,13 @@ export const messagingMessages = {
"messageItem.actions.copy": "Copiar",
"messageItem.actions.copyTitle": "Copiar mensaje",
"messageItem.actions.copied": "¡Copiado!",
"messageItem.actions.speak": "Reproducir mensaje",
"messageItem.actions.generatingSpeech": "Generando audio",
"messageItem.actions.stopSpeech": "Detener reproduccion",
"messageItem.actions.speak.error.title": "La reproduccion de voz fallo",
"messageItem.actions.speak.error.unsupported": "La reproduccion de voz no es compatible con este navegador.",
"messageItem.actions.speak.error.unavailable": "La reproduccion de voz no estara disponible hasta que la configuracion de voz este lista.",
"messageItem.actions.speak.error.generate": "No se pudo generar audio para este mensaje.",
"messageItem.actions.deleteMessage": "Eliminar mensaje (no deshace cambios)",
"messageItem.actions.deleteMessagesUpTo": "Eliminar mensajes hasta aqui (no deshace cambios)",
"messageItem.actions.deletingMessage": "Eliminando...",
@@ -137,14 +144,20 @@ export const messagingMessages = {
"promptInput.overlay.againToAbort": "otra vez para abortar la sesión",
"promptInput.stopSession.ariaLabel": "Detener sesión",
"promptInput.stopSession.title": "Detener sesión",
"promptInput.clear.ariaLabel": "Borrar el texto del prompt",
"promptInput.clear.title": "Borrar el texto del prompt",
"promptInput.send.ariaLabel": "Enviar mensaje",
"promptInput.send.errorFallback": "No se pudo enviar el mensaje",
"promptInput.send.errorTitle": "Error al enviar",
"promptInput.voiceInput.start.title": "Start voice input",
"promptInput.voiceInput.stop.title": "Stop recording and transcribe",
"promptInput.voiceInput.transcribing.title": "Transcribing audio",
"promptInput.voiceInput.error.title": "Voice input failed",
"promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
"promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
"promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
"promptInput.conversationMode.enable.title": "Activar modo conversacion",
"promptInput.conversationMode.disable.title": "Desactivar modo conversacion",
"promptInput.conversationMode.error.title": "Fallo la reproduccion de la conversacion",
"promptInput.conversationMode.error.message": "No se pudieron seguir reproduciendo las respuestas del asistente.",
"promptInput.voiceInput.start.title": "Iniciar entrada de voz",
"promptInput.voiceInput.stop.title": "Detener grabación y transcribir",
"promptInput.voiceInput.transcribing.title": "Transcribiendo audio",
"promptInput.voiceInput.error.title": "La entrada de voz falló",
"promptInput.voiceInput.error.permission": "Se requiere acceso al micrófono para grabar la entrada de voz.",
"promptInput.voiceInput.error.unsupported": "La entrada de voz no es compatible con este navegador.",
"promptInput.voiceInput.error.transcribe": "No se pudo transcribir el audio grabado.",
} as const

View File

@@ -166,7 +166,21 @@ export const settingsMessages = {
"settings.speech.ttsModel.subtitle": "Modelo predeterminado de texto a voz reservado para futuras funciones de reproducción.",
"settings.speech.ttsVoice.title": "Voz predeterminada",
"settings.speech.ttsVoice.subtitle": "Voz predeterminada de texto a voz reservada para futuras funciones de reproducción.",
"settings.speech.help": "La entrada de voz del prompt solo aparece cuando la transcripción de voz está configurada y este navegador la admite.",
"settings.speech.playbackMode.title": "Modo de reproduccion",
"settings.speech.playbackMode.subtitle": "Elige si TTS empieza a reproducirse mientras llega el audio o despues de generar el archivo completo.",
"settings.speech.playbackMode.streaming": "Streaming",
"settings.speech.playbackMode.buffered": "Buffered",
"settings.speech.ttsFormat.title": "Formato de salida",
"settings.speech.ttsFormat.subtitle": "Elige el formato de audio para la voz sintetizada. La compatibilidad de streaming depende de tu proveedor y navegador.",
"settings.speech.help": "La entrada de voz del prompt aparece cuando la transcripcion de voz esta configurada y es compatible. La reproduccion de mensajes usa el modo y formato TTS seleccionados aqui.",
"settings.speech.compatibility.streamingUnavailable": "Tu configuracion actual del proveedor de voz no anuncia TTS por streaming. Cambia el modo de reproduccion a buffered si quieres que la reproduccion funcione ahora.",
"settings.speech.compatibility.browserStreamingUnavailable": "Tu navegador actual no puede reproducir por streaming el formato TTS seleccionado. Elige reproduccion buffered o cambia a otro formato.",
"settings.speech.compatibility.runtimeNote": "Todos los formatos siguen disponibles en modo streaming. Algunas combinaciones de navegador y proveedor aun pueden fallar al reproducir.",
"settings.speech.testPlayback.action": "Probar reproduccion",
"settings.speech.testPlayback.generating": "Generando muestra",
"settings.speech.testPlayback.stop": "Detener muestra",
"settings.speech.testPlayback.sample": "Gracias por usar CodeNomad, tu configuracion de voz funciona correctamente.",
"settings.speech.testPlayback.note": "La prueba usa de inmediato el modo y formato actuales. Guarda primero los cambios de API key, base URL, modelo o voz si tambien quieres probarlos.",
"settings.speech.save.action": "Guardar",
"settings.speech.save.saving": "Guardando...",
"settings.speech.save.saved": "Guardado",

View File

@@ -77,6 +77,13 @@ export const messagingMessages = {
"messageItem.actions.copy": "Copier",
"messageItem.actions.copyTitle": "Copier le message",
"messageItem.actions.copied": "Copié !",
"messageItem.actions.speak": "Lire le message",
"messageItem.actions.generatingSpeech": "Generation de l'audio",
"messageItem.actions.stopSpeech": "Arreter la lecture",
"messageItem.actions.speak.error.title": "La lecture vocale a echoue",
"messageItem.actions.speak.error.unsupported": "La lecture vocale n'est pas prise en charge dans ce navigateur.",
"messageItem.actions.speak.error.unavailable": "La lecture vocale n'est pas disponible tant que les parametres vocaux ne sont pas configures.",
"messageItem.actions.speak.error.generate": "Impossible de generer l'audio pour ce message.",
"messageItem.actions.deleteMessage": "Supprimer le message (sans annuler les changements)",
"messageItem.actions.deleteMessagesUpTo": "Supprimer les messages jusqu'ici (sans annuler les changements)",
"messageItem.actions.deletingMessage": "Suppression...",
@@ -137,14 +144,20 @@ export const messagingMessages = {
"promptInput.overlay.againToAbort": "à nouveau pour interrompre la session",
"promptInput.stopSession.ariaLabel": "Arrêter la session",
"promptInput.stopSession.title": "Arrêter la session",
"promptInput.clear.ariaLabel": "Effacer le texte du prompt",
"promptInput.clear.title": "Effacer le texte du prompt",
"promptInput.send.ariaLabel": "Envoyer le message",
"promptInput.send.errorFallback": "Impossible d'envoyer le message",
"promptInput.send.errorTitle": "Échec de l'envoi",
"promptInput.voiceInput.start.title": "Start voice input",
"promptInput.voiceInput.stop.title": "Stop recording and transcribe",
"promptInput.voiceInput.transcribing.title": "Transcribing audio",
"promptInput.voiceInput.error.title": "Voice input failed",
"promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
"promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
"promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
"promptInput.conversationMode.enable.title": "Activer le mode conversation",
"promptInput.conversationMode.disable.title": "Desactiver le mode conversation",
"promptInput.conversationMode.error.title": "La lecture de la conversation a echoue",
"promptInput.conversationMode.error.message": "Impossible de continuer a lire les reponses de l'assistant.",
"promptInput.voiceInput.start.title": "Démarrer la saisie vocale",
"promptInput.voiceInput.stop.title": "Arrêter l'enregistrement et transcrire",
"promptInput.voiceInput.transcribing.title": "Transcription de l'audio",
"promptInput.voiceInput.error.title": "Échec de la saisie vocale",
"promptInput.voiceInput.error.permission": "L'accès au microphone est requis pour enregistrer la saisie vocale.",
"promptInput.voiceInput.error.unsupported": "La saisie vocale n'est pas prise en charge dans ce navigateur.",
"promptInput.voiceInput.error.transcribe": "Impossible de transcrire l'audio enregistré.",
} as const

View File

@@ -166,7 +166,21 @@ export const settingsMessages = {
"settings.speech.ttsModel.subtitle": "Modèle de synthèse vocale par défaut réservé aux futures fonctions de lecture.",
"settings.speech.ttsVoice.title": "Voix par défaut",
"settings.speech.ttsVoice.subtitle": "Voix de synthèse vocale par défaut réservée aux futures fonctions de lecture.",
"settings.speech.help": "La saisie vocale du prompt n'apparaît que lorsque la transcription vocale est configurée et prise en charge par ce navigateur.",
"settings.speech.playbackMode.title": "Mode de lecture",
"settings.speech.playbackMode.subtitle": "Choisissez si le TTS commence a jouer pendant le flux audio ou apres la generation complete du fichier.",
"settings.speech.playbackMode.streaming": "Streaming",
"settings.speech.playbackMode.buffered": "Buffered",
"settings.speech.ttsFormat.title": "Format de sortie",
"settings.speech.ttsFormat.subtitle": "Choisissez le format audio pour la voix synthetisee. La prise en charge du streaming depend du fournisseur et du navigateur.",
"settings.speech.help": "La saisie vocale du prompt apparait lorsque la transcription vocale est configuree et prise en charge. La lecture des messages utilise le mode et le format TTS selectionnes ici.",
"settings.speech.compatibility.streamingUnavailable": "Votre configuration actuelle du fournisseur vocal n'annonce pas le TTS en streaming. Passez le mode de lecture sur buffered si vous voulez que la lecture fonctionne maintenant.",
"settings.speech.compatibility.browserStreamingUnavailable": "Votre navigateur actuel ne peut pas lire en streaming le format TTS selectionne. Choisissez la lecture buffered ou passez a un autre format.",
"settings.speech.compatibility.runtimeNote": "Tous les formats restent selectionnables en mode streaming. Certaines combinaisons navigateur/fournisseur peuvent quand meme echouer au moment de la lecture.",
"settings.speech.testPlayback.action": "Tester la lecture",
"settings.speech.testPlayback.generating": "Generation de l'extrait",
"settings.speech.testPlayback.stop": "Arreter l'extrait",
"settings.speech.testPlayback.sample": "Merci d'utiliser CodeNomad, vos parametres vocaux fonctionnent correctement.",
"settings.speech.testPlayback.note": "Le test utilise immediatement le mode et le format actuels. Enregistrez d'abord les changements d'API key, d'URL de base, de modele ou de voix si vous voulez aussi les tester.",
"settings.speech.save.action": "Enregistrer",
"settings.speech.save.saving": "Enregistrement...",
"settings.speech.save.saved": "Enregistré",

View File

@@ -75,6 +75,13 @@ export const messagingMessages = {
"messageItem.actions.copy": "העתק",
"messageItem.actions.copyTitle": "העתק הודעה",
"messageItem.actions.copied": "הועתק!",
"messageItem.actions.speak": "השמע הודעה",
"messageItem.actions.generatingSpeech": "יוצר אודיו",
"messageItem.actions.stopSpeech": "עצור ניגון",
"messageItem.actions.speak.error.title": "ניגון הקול נכשל",
"messageItem.actions.speak.error.unsupported": "ניגון קול אינו נתמך בדפדפן הזה.",
"messageItem.actions.speak.error.unavailable": "ניגון קול לא זמין עד שהגדרות הקול יוגדרו.",
"messageItem.actions.speak.error.generate": "לא ניתן היה ליצור אודיו עבור ההודעה הזו.",
"messageItem.actions.deleteMessage": "מחק הודעה (לא מבטל שינויים)",
"messageItem.actions.deleteMessagesUpTo": "מחק הודעות עד כאן (לא מבטל שינויים)",
"messageItem.actions.deletingMessage": "מוחק...",
@@ -135,7 +142,20 @@ export const messagingMessages = {
"promptInput.overlay.againToAbort": "שוב כדי לבטל את הסשן",
"promptInput.stopSession.ariaLabel": "עצור סשן",
"promptInput.stopSession.title": "עצור סשן",
"promptInput.clear.ariaLabel": "נקה את טקסט הפרומפט",
"promptInput.clear.title": "נקה את טקסט הפרומפט",
"promptInput.send.ariaLabel": "שלח הודעה",
"promptInput.send.errorFallback": "שליחת ההודעה נכשלה",
"promptInput.send.errorTitle": "השליחה נכשלה",
"promptInput.conversationMode.enable.title": "הפעל מצב שיחה",
"promptInput.conversationMode.disable.title": "כבה מצב שיחה",
"promptInput.conversationMode.error.title": "ניגון השיחה נכשל",
"promptInput.conversationMode.error.message": "לא ניתן היה להמשיך להקריא את תגובות העוזר.",
"promptInput.voiceInput.start.title": "התחל קלט קולי",
"promptInput.voiceInput.stop.title": "עצור הקלטה ותמלל",
"promptInput.voiceInput.transcribing.title": "מתמלל אודיו",
"promptInput.voiceInput.error.title": "קלט קולי נכשל",
"promptInput.voiceInput.error.permission": "נדרשת גישה למיקרופון כדי להקליט קלט קולי.",
"promptInput.voiceInput.error.unsupported": "קלט קולי אינו נתמך בדפדפן זה.",
"promptInput.voiceInput.error.transcribe": "לא ניתן היה לתמלל את האודיו שהוקלט.",
} as const

View File

@@ -137,6 +137,52 @@ export const settingsMessages = {
"settings.behavior.usageMetrics.subtitle": "הצג או הסתר נתוני טוקנים ועלות להודעות הסוכן.",
"settings.behavior.autoCleanup.title": "ניקוי אוטומטי של סשנים ריקים",
"settings.behavior.autoCleanup.subtitle": "נקה אוטומטית סשנים ריקים בעת יצירת סשנים חדשים.",
"settings.behavior.promptVoiceInput.title": "קלט קולי לפרומפט",
"settings.behavior.promptVoiceInput.subtitle": "הצג את כפתור המיקרופון לקלט דיבור-לטקסט כאשר תכונת הקול מוגדרת.",
"settings.behavior.promptSubmit.title": "Enter לשליחה",
"settings.behavior.promptSubmit.subtitle": "השתמש ב-Enter לשליחת פקודות; Cmd/Ctrl+Enter מוסיף שורה חדשה.",
"settings.speech.title": "קול",
"settings.speech.subtitle": "הגדר כעת דיבור-לטקסט והכן תשתית לטקסט-לדיבור עבור יכולות עתידיות.",
"settings.speech.provider.title": "ספק",
"settings.speech.provider.subtitle": "בקשות קול משתמשות במתאם הקול שבצד השרת.",
"settings.speech.provider.openaiCompatible": "תואם OpenAI",
"settings.speech.status.loading": "בודק את ההגדרות...",
"settings.speech.status.configured": "מוגדר",
"settings.speech.status.missing": "חסר מפתח API",
"settings.speech.status.error": "שירות הקול אינו זמין",
"settings.speech.apiKey.title": "מפתח API",
"settings.speech.apiKey.subtitle": "משמש עבור בקשות קול המנוהלות על ידי CodeNomad.",
"settings.speech.apiKey.placeholder": "הזן מפתח API חדש",
"settings.speech.apiKey.storedNote": "מפתח API שמור מוסתר. הזן ערך חדש כדי להחליף אותו, או השאר את השדה ריק כדי לשמור עליו.",
"settings.speech.apiKey.clearAction": "נקה מפתח שמור",
"settings.speech.apiKey.clearPending": "מפתח ה-API השמור יוסר בעת השמירה.",
"settings.speech.baseUrl.title": "כתובת בסיס",
"settings.speech.baseUrl.subtitle": "עקיפה אופציונלית עבור נקודות קצה קוליות התואמות ל-OpenAI.",
"settings.speech.baseUrl.placeholder": "https://api.openai.com/v1",
"settings.speech.sttModel.title": "מודל תמלול",
"settings.speech.sttModel.subtitle": "המודל המשמש לבקשות דיבור-לטקסט בפרומפט.",
"settings.speech.ttsModel.title": "מודל קול",
"settings.speech.ttsModel.subtitle": "מודל ברירת מחדל לטקסט-לדיבור השמור ליכולות ניגון עתידיות.",
"settings.speech.ttsVoice.title": "קול ברירת מחדל",
"settings.speech.ttsVoice.subtitle": "קול ברירת מחדל לטקסט-לדיבור השמור ליכולות ניגון עתידיות.",
"settings.speech.playbackMode.title": "מצב ניגון",
"settings.speech.playbackMode.subtitle": "בחר אם TTS יתחיל לנגן בזמן שהאודיו מוזרם או רק אחרי שהקובץ כולו נוצר.",
"settings.speech.playbackMode.streaming": "סטרימינג",
"settings.speech.playbackMode.buffered": "באפר מלא",
"settings.speech.ttsFormat.title": "פורמט פלט",
"settings.speech.ttsFormat.subtitle": "בחר את פורמט האודיו לדיבור מסונתז. תמיכת סטרימינג תלויה בספק ובדפדפן.",
"settings.speech.help": "קלט קולי לפרומפט מופיע כאשר תמלול קול מוגדר ונתמך. השמעת הודעות משתמשת במצב ובפורמט ה-TTS שנבחרו כאן.",
"settings.speech.compatibility.streamingUnavailable": "תצורת ספק הקול הנוכחית שלך לא מצהירה על TTS בסטרימינג. עבור למצב buffered אם אתה רוצה שהניגון יעבוד כבר עכשיו.",
"settings.speech.compatibility.browserStreamingUnavailable": "הדפדפן הנוכחי שלך לא יכול לנגן בסטרימינג את פורמט ה-TTS שנבחר. בחר בניגון buffered או עבור לפורמט אחר.",
"settings.speech.compatibility.runtimeNote": "כל הפורמטים נשארים זמינים במצב סטרימינג. חלק מהשילובים של דפדפן וספק עדיין עלולים להיכשל בזמן הניגון.",
"settings.speech.testPlayback.action": "בדוק ניגון",
"settings.speech.testPlayback.generating": "יוצר דוגמה",
"settings.speech.testPlayback.stop": "עצור דוגמה",
"settings.speech.testPlayback.sample": "תודה שאתה משתמש ב-CodeNomad, הגדרות הקול שלך פועלות כראוי.",
"settings.speech.testPlayback.note": "המבחן משתמש מיד במצב ובפורמט הנוכחיים. שמור תחילה שינויים ב-API key, ב-Base URL, במודל או בקול אם גם אותם תרצה לבדוק.",
"settings.speech.save.action": "שמור",
"settings.speech.save.saving": "שומר...",
"settings.speech.save.saved": "נשמר",
"settings.speech.save.unsaved": "יש שינויים שלא נשמרו",
"settings.speech.save.error": "השמירה נכשלה",
} as const

View File

@@ -77,6 +77,13 @@ export const messagingMessages = {
"messageItem.actions.copy": "コピー",
"messageItem.actions.copyTitle": "メッセージをコピー",
"messageItem.actions.copied": "コピーしました!",
"messageItem.actions.speak": "メッセージを読み上げ",
"messageItem.actions.generatingSpeech": "音声を生成中",
"messageItem.actions.stopSpeech": "再生を停止",
"messageItem.actions.speak.error.title": "音声再生に失敗しました",
"messageItem.actions.speak.error.unsupported": "このブラウザでは音声再生に対応していません。",
"messageItem.actions.speak.error.unavailable": "音声設定が完了するまで音声再生は利用できません。",
"messageItem.actions.speak.error.generate": "このメッセージの音声を生成できませんでした。",
"messageItem.actions.deleteMessage": "メッセージを削除(変更は元に戻さない)",
"messageItem.actions.deleteMessagesUpTo": "ここまでのメッセージを削除(変更は元に戻さない)",
"messageItem.actions.deletingMessage": "削除中...",
@@ -137,14 +144,20 @@ export const messagingMessages = {
"promptInput.overlay.againToAbort": "もう一度押すとセッションを中断",
"promptInput.stopSession.ariaLabel": "セッションを停止",
"promptInput.stopSession.title": "セッションを停止",
"promptInput.clear.ariaLabel": "プロンプトのテキストをクリア",
"promptInput.clear.title": "プロンプトのテキストをクリア",
"promptInput.send.ariaLabel": "メッセージを送信",
"promptInput.send.errorFallback": "メッセージの送信に失敗しました",
"promptInput.send.errorTitle": "送信に失敗",
"promptInput.voiceInput.start.title": "Start voice input",
"promptInput.voiceInput.stop.title": "Stop recording and transcribe",
"promptInput.voiceInput.transcribing.title": "Transcribing audio",
"promptInput.voiceInput.error.title": "Voice input failed",
"promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
"promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
"promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
"promptInput.conversationMode.enable.title": "会話モードを有効化",
"promptInput.conversationMode.disable.title": "会話モードを無効化",
"promptInput.conversationMode.error.title": "会話の読み上げに失敗しました",
"promptInput.conversationMode.error.message": "アシスタントの返信の読み上げを続行できませんでした。",
"promptInput.voiceInput.start.title": "音声入力を開始",
"promptInput.voiceInput.stop.title": "録音を停止して文字起こし",
"promptInput.voiceInput.transcribing.title": "音声を文字起こし中",
"promptInput.voiceInput.error.title": "音声入力に失敗しました",
"promptInput.voiceInput.error.permission": "音声入力を録音するにはマイクへのアクセスが必要です。",
"promptInput.voiceInput.error.unsupported": "このブラウザーでは音声入力はサポートされていません。",
"promptInput.voiceInput.error.transcribe": "録音した音声を文字起こしできませんでした。",
} as const

View File

@@ -166,7 +166,21 @@ export const settingsMessages = {
"settings.speech.ttsModel.subtitle": "将来の再生機能のために予約されている既定の音声合成モデルです。",
"settings.speech.ttsVoice.title": "既定の音声",
"settings.speech.ttsVoice.subtitle": "将来の再生機能のために予約されている既定の音声合成ボイスです。",
"settings.speech.help": "プロンプト音声入力は、音声文字起こしが設定され、このブラウザーでサポートされている場合にのみ表示されます。",
"settings.speech.playbackMode.title": "再生モード",
"settings.speech.playbackMode.subtitle": "音声が届き次第再生を始めるか、ファイル全体の生成後に再生するかを選択します。",
"settings.speech.playbackMode.streaming": "Streaming",
"settings.speech.playbackMode.buffered": "Buffered",
"settings.speech.ttsFormat.title": "出力形式",
"settings.speech.ttsFormat.subtitle": "音声合成の出力形式を選択します。ストリーミング対応はプロバイダーとブラウザーに依存します。",
"settings.speech.help": "プロンプト音声入力は音声文字起こしが設定され対応している場合に表示されます。メッセージ再生にはここで選んだTTSモードと形式が使われます。",
"settings.speech.compatibility.streamingUnavailable": "現在の音声プロバイダー設定ではストリーミングTTSが利用可能として公開されていません。今すぐ再生を使いたい場合は再生モードを buffered に切り替えてください。",
"settings.speech.compatibility.browserStreamingUnavailable": "現在のブラウザーでは、選択したTTS形式をストリーミング再生できません。buffered 再生に切り替えるか、別の形式を選んでください。",
"settings.speech.compatibility.runtimeNote": "ストリーミングモードでも全ての形式を選択できますが、ブラウザーとプロバイダーの組み合わせによっては再生時に失敗することがあります。",
"settings.speech.testPlayback.action": "再生をテスト",
"settings.speech.testPlayback.generating": "サンプルを生成中",
"settings.speech.testPlayback.stop": "サンプルを停止",
"settings.speech.testPlayback.sample": "CodeNomad をご利用いただきありがとうございます。音声設定は正常に動作しています。",
"settings.speech.testPlayback.note": "このテストは現在の再生モードと形式をすぐに使います。APIキー、Base URL、モデル、音声の変更も試したい場合は先に保存してください。",
"settings.speech.save.action": "保存",
"settings.speech.save.saving": "保存中...",
"settings.speech.save.saved": "保存済み",

View File

@@ -77,6 +77,13 @@ export const messagingMessages = {
"messageItem.actions.copy": "Копировать",
"messageItem.actions.copyTitle": "Копировать сообщение",
"messageItem.actions.copied": "Скопировано!",
"messageItem.actions.speak": "Озвучить сообщение",
"messageItem.actions.generatingSpeech": "Генерация аудио",
"messageItem.actions.stopSpeech": "Остановить воспроизведение",
"messageItem.actions.speak.error.title": "Не удалось воспроизвести речь",
"messageItem.actions.speak.error.unsupported": "В этом браузере воспроизведение речи не поддерживается.",
"messageItem.actions.speak.error.unavailable": "Воспроизведение речи недоступно, пока не настроены голосовые параметры.",
"messageItem.actions.speak.error.generate": "Не удалось сгенерировать аудио для этого сообщения.",
"messageItem.actions.deleteMessage": "Удалить сообщение (без отката изменений)",
"messageItem.actions.deleteMessagesUpTo": "Удалить сообщения до этого места (без отката изменений)",
"messageItem.actions.deletingMessage": "Удаление...",
@@ -137,14 +144,20 @@ export const messagingMessages = {
"promptInput.overlay.againToAbort": "еще раз, чтобы прервать сессию",
"promptInput.stopSession.ariaLabel": "Остановить сессию",
"promptInput.stopSession.title": "Остановить сессию",
"promptInput.clear.ariaLabel": "Очистить текст prompt",
"promptInput.clear.title": "Очистить текст prompt",
"promptInput.send.ariaLabel": "Отправить сообщение",
"promptInput.send.errorFallback": "Не удалось отправить сообщение",
"promptInput.send.errorTitle": "Не удалось отправить",
"promptInput.voiceInput.start.title": "Start voice input",
"promptInput.voiceInput.stop.title": "Stop recording and transcribe",
"promptInput.voiceInput.transcribing.title": "Transcribing audio",
"promptInput.voiceInput.error.title": "Voice input failed",
"promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
"promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
"promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
"promptInput.conversationMode.enable.title": "Включить режим разговора",
"promptInput.conversationMode.disable.title": "Выключить режим разговора",
"promptInput.conversationMode.error.title": "Сбой озвучивания разговора",
"promptInput.conversationMode.error.message": "Не удалось продолжить озвучивание ответов ассистента.",
"promptInput.voiceInput.start.title": "Начать голосовой ввод",
"promptInput.voiceInput.stop.title": "Остановить запись и расшифровать",
"promptInput.voiceInput.transcribing.title": "Идёт расшифровка аудио",
"promptInput.voiceInput.error.title": "Сбой голосового ввода",
"promptInput.voiceInput.error.permission": "Для записи голосового ввода требуется доступ к микрофону.",
"promptInput.voiceInput.error.unsupported": "Голосовой ввод не поддерживается в этом браузере.",
"promptInput.voiceInput.error.transcribe": "Не удалось расшифровать записанное аудио.",
} as const

View File

@@ -166,7 +166,21 @@ export const settingsMessages = {
"settings.speech.ttsModel.subtitle": "Модель синтеза речи по умолчанию, зарезервированная для будущих функций воспроизведения.",
"settings.speech.ttsVoice.title": "Голос по умолчанию",
"settings.speech.ttsVoice.subtitle": "Голос синтеза речи по умолчанию, зарезервированный для будущих функций воспроизведения.",
"settings.speech.help": "Голосовой ввод в поле запроса появляется только если распознавание речи настроено и поддерживается этим браузером.",
"settings.speech.playbackMode.title": "Режим воспроизведения",
"settings.speech.playbackMode.subtitle": "Выберите, начинать ли воспроизведение TTS во время поступления аудио или только после полной генерации файла.",
"settings.speech.playbackMode.streaming": "Потоковый",
"settings.speech.playbackMode.buffered": "Буферизованный",
"settings.speech.ttsFormat.title": "Формат вывода",
"settings.speech.ttsFormat.subtitle": "Выберите аудиоформат для синтезированной речи. Поддержка потокового режима зависит от провайдера и браузера.",
"settings.speech.help": "Голосовой ввод появляется, когда распознавание речи настроено и поддерживается. Для воспроизведения сообщений используются выбранные здесь режим и формат TTS.",
"settings.speech.compatibility.streamingUnavailable": "Текущая конфигурация голосового провайдера не заявляет поддержку потокового TTS. Переключите режим воспроизведения на buffered, если хотите, чтобы воспроизведение работало уже сейчас.",
"settings.speech.compatibility.browserStreamingUnavailable": "Ваш текущий браузер не может воспроизводить потоково выбранный формат TTS. Выберите buffered-воспроизведение или переключитесь на другой формат.",
"settings.speech.compatibility.runtimeNote": "В режиме streaming по-прежнему доступны все форматы. Некоторые сочетания браузера и провайдера все равно могут завершаться ошибкой во время воспроизведения.",
"settings.speech.testPlayback.action": "Проверить воспроизведение",
"settings.speech.testPlayback.generating": "Генерация примера",
"settings.speech.testPlayback.stop": "Остановить пример",
"settings.speech.testPlayback.sample": "Спасибо, что используете CodeNomad, ваши настройки речи работают нормально.",
"settings.speech.testPlayback.note": "Тест сразу использует текущие режим и формат. Сначала сохраните изменения API key, Base URL, модели или голоса, если хотите проверить и их.",
"settings.speech.save.action": "Сохранить",
"settings.speech.save.saving": "Сохранение...",
"settings.speech.save.saved": "Сохранено",

View File

@@ -77,6 +77,13 @@ export const messagingMessages = {
"messageItem.actions.copy": "复制",
"messageItem.actions.copyTitle": "复制消息",
"messageItem.actions.copied": "已复制!",
"messageItem.actions.speak": "朗读消息",
"messageItem.actions.generatingSpeech": "正在生成语音",
"messageItem.actions.stopSpeech": "停止播放",
"messageItem.actions.speak.error.title": "语音播放失败",
"messageItem.actions.speak.error.unsupported": "此浏览器不支持语音播放。",
"messageItem.actions.speak.error.unavailable": "语音设置完成前,语音播放不可用。",
"messageItem.actions.speak.error.generate": "无法为这条消息生成语音。",
"messageItem.actions.deleteMessage": "删除消息(不会撤销更改)",
"messageItem.actions.deleteMessagesUpTo": "删除到此处的消息(不会撤销更改)",
"messageItem.actions.deletingMessage": "正在删除...",
@@ -137,14 +144,20 @@ export const messagingMessages = {
"promptInput.overlay.againToAbort": "再次按下以中止会话",
"promptInput.stopSession.ariaLabel": "停止会话",
"promptInput.stopSession.title": "停止会话",
"promptInput.clear.ariaLabel": "清除输入框文本",
"promptInput.clear.title": "清除输入框文本",
"promptInput.send.ariaLabel": "发送消息",
"promptInput.send.errorFallback": "发送消息失败",
"promptInput.send.errorTitle": "发送失败",
"promptInput.voiceInput.start.title": "Start voice input",
"promptInput.voiceInput.stop.title": "Stop recording and transcribe",
"promptInput.voiceInput.transcribing.title": "Transcribing audio",
"promptInput.voiceInput.error.title": "Voice input failed",
"promptInput.voiceInput.error.permission": "Microphone access is required to record voice input.",
"promptInput.voiceInput.error.unsupported": "Voice input is not supported in this browser.",
"promptInput.voiceInput.error.transcribe": "Unable to transcribe the recorded audio.",
"promptInput.conversationMode.enable.title": "开启对话模式",
"promptInput.conversationMode.disable.title": "关闭对话模式",
"promptInput.conversationMode.error.title": "对话播报失败",
"promptInput.conversationMode.error.message": "无法继续播报助手回复。",
"promptInput.voiceInput.start.title": "开始语音输入",
"promptInput.voiceInput.stop.title": "停止录音并转写",
"promptInput.voiceInput.transcribing.title": "正在转写音频",
"promptInput.voiceInput.error.title": "语音输入失败",
"promptInput.voiceInput.error.permission": "录制语音输入需要麦克风访问权限。",
"promptInput.voiceInput.error.unsupported": "此浏览器不支持语音输入。",
"promptInput.voiceInput.error.transcribe": "无法转写录制的音频。",
} as const

View File

@@ -166,7 +166,21 @@ export const settingsMessages = {
"settings.speech.ttsModel.subtitle": "为未来播放功能预留的默认文字转语音模型。",
"settings.speech.ttsVoice.title": "默认语音",
"settings.speech.ttsVoice.subtitle": "为未来播放功能预留的默认文字转语音音色。",
"settings.speech.help": "只有在语音转写已配置且当前浏览器支持时,提示框语音输入才会显示。",
"settings.speech.playbackMode.title": "播放模式",
"settings.speech.playbackMode.subtitle": "选择在音频流入时开始播放,还是在整个文件生成完成后再播放。",
"settings.speech.playbackMode.streaming": "流式",
"settings.speech.playbackMode.buffered": "缓冲后播放",
"settings.speech.ttsFormat.title": "输出格式",
"settings.speech.ttsFormat.subtitle": "选择语音合成的音频格式。流式支持取决于你的提供商和浏览器。",
"settings.speech.help": "当语音转写已配置且受支持时,提示框语音输入会显示。消息播放会使用这里选择的 TTS 模式和格式。",
"settings.speech.compatibility.streamingUnavailable": "你当前的语音提供商配置没有声明支持流式 TTS。如果你现在就想让播放可用请把播放模式切换为 buffered。",
"settings.speech.compatibility.browserStreamingUnavailable": "你当前的浏览器无法流式播放所选的 TTS 格式。请选择 buffered 播放,或切换到其他格式。",
"settings.speech.compatibility.runtimeNote": "在流式模式下仍然可以选择所有格式,但某些浏览器与提供商的组合在播放时仍可能失败。",
"settings.speech.testPlayback.action": "测试播放",
"settings.speech.testPlayback.generating": "正在生成示例",
"settings.speech.testPlayback.stop": "停止示例",
"settings.speech.testPlayback.sample": "感谢你使用 CodeNomad你的语音设置工作正常。",
"settings.speech.testPlayback.note": "测试会立即使用当前播放模式和格式。如果你也想测试 API key、Base URL、模型或音色的更改请先保存。",
"settings.speech.save.action": "保存",
"settings.speech.save.saving": "保存中...",
"settings.speech.save.saved": "已保存",

View File

@@ -0,0 +1,58 @@
import type { SpeechCapabilitiesResponse } from "../../../server/src/api-types"
import type { SpeechPlaybackMode, SpeechTtsFormat } from "../stores/preferences"
export interface SpeechPlaybackSupportResult {
available: boolean
reason?: "unsupported-environment" | "provider-streaming-unavailable" | "browser-streaming-unavailable"
}
export function formatToMimeType(format: SpeechTtsFormat): string {
if (format === "wav") return "audio/wav"
if (format === "opus") return getSupportedMimeType(format)
if (format === "aac") return "audio/aac"
return "audio/mpeg"
}
export function getCandidateMimeTypes(format: SpeechTtsFormat): string[] {
if (format === "wav") return ["audio/wav"]
if (format === "opus") {
return ['audio/ogg; codecs="opus"', 'audio/webm; codecs="opus"', "audio/opus"]
}
if (format === "aac") return ["audio/aac", "audio/mp4", 'audio/mp4; codecs="mp4a.40.2"']
return ["audio/mpeg"]
}
export function getSupportedMimeType(format: SpeechTtsFormat): string {
const candidates = getCandidateMimeTypes(format)
if (typeof MediaSource === "undefined") {
return candidates[0]
}
return candidates.find((candidate) => MediaSource.isTypeSupported(candidate)) ?? candidates[0]
}
export function getSpeechPlaybackSupport(options: {
playbackMode: SpeechPlaybackMode
ttsFormat: SpeechTtsFormat
capabilities?: SpeechCapabilitiesResponse | null
}): SpeechPlaybackSupportResult {
if (typeof window === "undefined" || typeof window.Audio === "undefined") {
return { available: false, reason: "unsupported-environment" }
}
if (options.playbackMode !== "streaming") {
return { available: true }
}
if (!options.capabilities?.supportsStreamingTts) {
return { available: false, reason: "provider-streaming-unavailable" }
}
if (
typeof MediaSource === "undefined" ||
!getCandidateMimeTypes(options.ttsFormat).some((candidate) => MediaSource.isTypeSupported(candidate))
) {
return { available: false, reason: "browser-streaming-unavailable" }
}
return { available: true }
}

View File

@@ -0,0 +1,507 @@
import { createSignal } from "solid-js"
import { tGlobal } from "../lib/i18n"
import { showToastNotification } from "../lib/notifications"
import { serverApi } from "../lib/api-client"
import { getLogger } from "../lib/logger"
import { formatToMimeType, getSpeechPlaybackSupport } from "../lib/speech-playback-support"
import { serverSettings } from "./preferences"
import { loadSpeechCapabilities, speechCapabilities } from "./speech"
import { getActiveSession, sessions } from "./session-state"
import type { ClientPart, MessageInfo } from "../types/message"
import { messageStoreBus } from "./message-v2/bus"
import { activeInstanceId } from "./instances"
type SpeechPlaybackMode = "streaming" | "buffered"
type SpeechTtsFormat = "mp3" | "wav" | "opus" | "aac"
interface ConversationQueueEntry {
key: string
instanceId: string
sessionId: string
messageId: string
partId: string
text: string
}
interface PlaybackHandle {
stop: () => void
done: Promise<void>
}
const log = getLogger("actions")
const [conversationModeInstances, setConversationModeInstances] = createSignal<Map<string, boolean>>(new Map())
const queuedKeys = new Set<string>()
const spokenKeysBySession = new Map<string, Set<string>>()
let queue: ConversationQueueEntry[] = []
let currentPlayback:
| {
entry: ConversationQueueEntry
handle: PlaybackHandle
}
| null = null
let queueRunner: Promise<void> | null = null
let playbackErrorShown = false
function getEntryKey(instanceId: string, sessionId: string, messageId: string, partId: string): string {
return `${instanceId}:${sessionId}:${messageId}:${partId}`
}
function getSpokenKeySet(instanceId: string, sessionId: string): Set<string> {
const sessionKey = `${instanceId}:${sessionId}`
const existing = spokenKeysBySession.get(sessionKey)
if (existing) return existing
const next = new Set<string>()
spokenKeysBySession.set(sessionKey, next)
return next
}
function resolveTextPartContent(part: ClientPart): string {
if (part.type !== "text") return ""
if (typeof part.text === "string") {
return part.text
}
if (part.text && typeof part.text === "object") {
const value = part.text as { text?: unknown; value?: unknown; content?: unknown[] }
const segments: string[] = []
if (typeof value.text === "string") {
segments.push(value.text)
}
if (typeof value.value === "string") {
segments.push(value.value)
}
if (Array.isArray(value.content)) {
for (const segment of value.content) {
if (typeof segment === "string") {
segments.push(segment)
} else if (segment && typeof segment === "object") {
const typedSegment = segment as { text?: unknown; value?: unknown }
if (typeof typedSegment.text === "string") segments.push(typedSegment.text)
if (typeof typedSegment.value === "string") segments.push(typedSegment.value)
}
}
}
return segments.join("\n")
}
return ""
}
export function isConversationModeEnabled(instanceId: string): boolean {
return conversationModeInstances().get(instanceId) === true
}
export function canUseConversationMode(): boolean {
const capabilities = speechCapabilities()
if (!capabilities?.available || !capabilities.configured || !capabilities.supportsTts) {
return false
}
const settings = serverSettings().speech
return getSpeechPlaybackSupport({
playbackMode: settings.playbackMode,
ttsFormat: settings.ttsFormat,
capabilities,
}).available
}
export function setConversationModeEnabled(instanceId: string, enabled: boolean): void {
setConversationModeInstances((prev) => {
const next = new Map(prev)
if (enabled) {
next.set(instanceId, true)
} else {
next.delete(instanceId)
}
return next
})
if (!enabled) {
clearConversationPlaybackForInstance(instanceId)
}
}
export function toggleConversationMode(instanceId: string): void {
setConversationModeEnabled(instanceId, !isConversationModeEnabled(instanceId))
}
export function clearConversationPlaybackForSession(instanceId: string, sessionId: string): void {
const sessionKey = `${instanceId}:${sessionId}`
queue = queue.filter((entry) => {
if (`${entry.instanceId}:${entry.sessionId}` === sessionKey) {
queuedKeys.delete(entry.key)
return false
}
return true
})
if (currentPlayback && `${currentPlayback.entry.instanceId}:${currentPlayback.entry.sessionId}` === sessionKey) {
currentPlayback.handle.stop()
currentPlayback = null
}
}
export function clearConversationPlaybackForInstance(instanceId: string): void {
queue = queue.filter((entry) => {
if (entry.instanceId === instanceId) {
queuedKeys.delete(entry.key)
return false
}
return true
})
if (currentPlayback?.entry.instanceId === instanceId) {
currentPlayback.handle.stop()
currentPlayback = null
}
}
function isSpeakableSession(instanceId: string, sessionId: string): boolean {
if (activeInstanceId() !== instanceId) {
return false
}
const activeSession = getActiveSession(instanceId)
if (!activeSession || activeSession.id !== sessionId) {
return false
}
const session = sessions().get(instanceId)?.get(sessionId) ?? activeSession
return !session?.parentId
}
export function handleConversationAssistantPartUpdated(instanceId: string, part: ClientPart, messageInfo?: MessageInfo): void {
if (part.type !== "text") return
const sessionId = typeof part.sessionID === "string" ? part.sessionID : messageInfo?.sessionID
const messageId = typeof part.messageID === "string" ? part.messageID : messageInfo?.id
const partId = typeof part.id === "string" ? part.id : undefined
if (!sessionId || !messageId || !partId) return
const messageRole =
messageInfo?.role ??
messageStoreBus.getOrCreate(instanceId).getMessage(messageId)?.role ??
null
if (messageRole !== "assistant") return
if (!isConversationModeEnabled(instanceId)) return
if (!isSpeakableSession(instanceId, sessionId)) return
const text = resolveTextPartContent(part).trim()
if (!text) return
const key = getEntryKey(instanceId, sessionId, messageId, partId)
const spokenKeys = getSpokenKeySet(instanceId, sessionId)
if (spokenKeys.has(key) || queuedKeys.has(key) || currentPlayback?.entry.key === key) {
return
}
queuedKeys.add(key)
queue.push({ key, instanceId, sessionId, messageId, partId, text })
void runConversationQueue()
}
async function runConversationQueue(): Promise<void> {
if (queueRunner) {
await queueRunner
return
}
queueRunner = (async () => {
while (queue.length > 0) {
const entry = queue.shift()!
queuedKeys.delete(entry.key)
if (!isConversationModeEnabled(entry.instanceId)) {
continue
}
if (!isSpeakableSession(entry.instanceId, entry.sessionId)) {
continue
}
const spokenKeys = getSpokenKeySet(entry.instanceId, entry.sessionId)
spokenKeys.add(entry.key)
try {
const handle = await createPlaybackHandle(entry.text)
currentPlayback = { entry, handle }
await handle.done
} catch (error) {
spokenKeys.delete(entry.key)
clearConversationPlaybackForInstance(entry.instanceId)
if (!playbackErrorShown) {
playbackErrorShown = true
showToastNotification({
title: tGlobal("promptInput.conversationMode.error.title"),
message:
error instanceof Error && error.message
? error.message
: tGlobal("promptInput.conversationMode.error.message"),
variant: "error",
})
}
log.error("Conversation playback failed", error)
break
} finally {
if (currentPlayback?.entry.key === entry.key) {
currentPlayback = null
}
}
}
})()
try {
await queueRunner
} finally {
queueRunner = null
if (queue.length === 0) {
playbackErrorShown = false
}
}
}
async function createPlaybackHandle(text: string): Promise<PlaybackHandle> {
const capabilities = (await loadSpeechCapabilities()) ?? speechCapabilities()
const settings = serverSettings().speech
if (!capabilities?.available || !capabilities.configured || !capabilities.supportsTts) {
throw new Error(tGlobal("messageItem.actions.speak.error.unavailable"))
}
const support = getSpeechPlaybackSupport({
playbackMode: settings.playbackMode,
ttsFormat: settings.ttsFormat,
capabilities,
})
if (!support.available) {
if (support.reason === "provider-streaming-unavailable") {
throw new Error(tGlobal("settings.speech.compatibility.streamingUnavailable"))
}
if (support.reason === "browser-streaming-unavailable") {
throw new Error(tGlobal("settings.speech.compatibility.browserStreamingUnavailable"))
}
throw new Error(tGlobal("messageItem.actions.speak.error.unsupported"))
}
return settings.playbackMode === "streaming"
? createStreamingPlaybackHandle(text, settings.ttsFormat)
: createBufferedPlaybackHandle(text, settings.ttsFormat)
}
async function createBufferedPlaybackHandle(text: string, format: SpeechTtsFormat): Promise<PlaybackHandle> {
const response = await serverApi.synthesizeSpeech({ text, format })
const objectUrl = createObjectUrlFromBase64(response.audioBase64, response.mimeType)
const audio = new Audio(objectUrl)
let settled = false
let resolveDone!: () => void
let rejectDone!: (error: unknown) => void
const cleanup = () => {
audio.pause()
audio.src = ""
audio.load()
URL.revokeObjectURL(objectUrl)
}
const done = new Promise<void>((resolve, reject) => {
resolveDone = () => {
if (settled) return
settled = true
cleanup()
resolve()
}
rejectDone = (error) => {
if (settled) return
settled = true
cleanup()
reject(error)
}
})
audio.addEventListener("ended", () => resolveDone(), { once: true })
audio.addEventListener("error", () => rejectDone(new Error(tGlobal("messageItem.actions.speak.error.generate"))), {
once: true,
})
await audio.play()
return {
stop: () => resolveDone(),
done,
}
}
async function createStreamingPlaybackHandle(text: string, format: SpeechTtsFormat): Promise<PlaybackHandle> {
if (typeof MediaSource === "undefined") {
throw new Error(tGlobal("messageItem.actions.speak.error.unsupported"))
}
const abortController = new AbortController()
const response = await serverApi.synthesizeSpeechStream({ text, format }, abortController.signal)
const mimeType = response.headers.get("content-type") || formatToMimeType(format)
const stream = response.body
if (!stream) {
throw new Error(tGlobal("messageItem.actions.speak.error.generate"))
}
if (!MediaSource.isTypeSupported(mimeType)) {
throw new Error(tGlobal("settings.speech.compatibility.browserStreamingUnavailable"))
}
const mediaSource = new MediaSource()
const objectUrl = URL.createObjectURL(mediaSource)
const audio = new Audio(objectUrl)
let settled = false
let startedPlayback = false
let resolveDone!: () => void
let rejectDone!: (error: unknown) => void
const cleanup = () => {
abortController.abort()
audio.pause()
audio.src = ""
audio.load()
URL.revokeObjectURL(objectUrl)
}
const done = new Promise<void>((resolve, reject) => {
resolveDone = () => {
if (settled) return
settled = true
cleanup()
resolve()
}
rejectDone = (error) => {
if (settled) return
settled = true
cleanup()
reject(error)
}
})
audio.addEventListener("ended", () => resolveDone(), { once: true })
audio.addEventListener("error", () => rejectDone(new Error(tGlobal("messageItem.actions.speak.error.generate"))), {
once: true,
})
await new Promise<void>((resolve, reject) => {
mediaSource.addEventListener(
"sourceopen",
() => {
void streamToMediaSource({
mediaSource,
stream,
mimeType,
onPlayable: async () => {
if (startedPlayback) return
startedPlayback = true
try {
await audio.play()
resolve()
} catch (error) {
reject(error)
}
},
onError: reject,
})
},
{ once: true },
)
})
return {
stop: () => resolveDone(),
done,
}
}
async function streamToMediaSource(options: {
mediaSource: MediaSource
stream: ReadableStream<Uint8Array>
mimeType: string
onPlayable: () => Promise<void>
onError: (error: unknown) => void
}) {
try {
const sourceBuffer = options.mediaSource.addSourceBuffer(options.mimeType)
const reader = options.stream.getReader()
const queue: Uint8Array[] = []
let processing = false
let playbackStarted = false
const flushQueue = async () => {
if (processing || sourceBuffer.updating || queue.length === 0) return
processing = true
const chunk = queue.shift()!
await appendChunk(sourceBuffer, chunk)
if (!playbackStarted) {
playbackStarted = true
await options.onPlayable()
}
processing = false
await flushQueue()
}
while (true) {
const { done, value } = await reader.read()
if (done) break
if (value && value.byteLength > 0) {
queue.push(value)
await flushQueue()
}
}
while (queue.length > 0 || sourceBuffer.updating) {
if (queue.length > 0) {
await flushQueue()
} else {
await waitForUpdateEnd(sourceBuffer)
}
}
if (options.mediaSource.readyState === "open") {
options.mediaSource.endOfStream()
}
} catch (error) {
options.onError(error)
}
}
function appendChunk(sourceBuffer: SourceBuffer, chunk: Uint8Array): Promise<void> {
return new Promise((resolve, reject) => {
const handleUpdateEnd = () => {
cleanup()
resolve()
}
const handleError = () => {
cleanup()
reject(new Error(tGlobal("messageItem.actions.speak.error.generate")))
}
const cleanup = () => {
sourceBuffer.removeEventListener("updateend", handleUpdateEnd)
sourceBuffer.removeEventListener("error", handleError)
}
sourceBuffer.addEventListener("updateend", handleUpdateEnd, { once: true })
sourceBuffer.addEventListener("error", handleError, { once: true })
sourceBuffer.appendBuffer(new Uint8Array(chunk).buffer)
})
}
function waitForUpdateEnd(sourceBuffer: SourceBuffer): Promise<void> {
return new Promise((resolve) => {
sourceBuffer.addEventListener("updateend", () => resolve(), { once: true })
})
}
function createObjectUrlFromBase64(audioBase64: string, mimeType: string): string {
const binary = atob(audioBase64)
const bytes = new Uint8Array(binary.length)
for (let index = 0; index < binary.length; index += 1) {
bytes[index] = binary.charCodeAt(index)
}
return URL.createObjectURL(new Blob([bytes], { type: mimeType || "audio/mpeg" }))
}

View File

@@ -29,6 +29,8 @@ export type ExpansionPreference = "expanded" | "collapsed"
export type ToolInputsVisibilityPreference = "hidden" | "collapsed" | "expanded"
export type ListeningMode = "local" | "all"
export type SpeechProviderPreference = "openai-compatible"
export type SpeechPlaybackMode = "streaming" | "buffered"
export type SpeechTtsFormat = "mp3" | "wav" | "opus" | "aac"
export interface SpeechSettings {
provider: SpeechProviderPreference
@@ -38,6 +40,8 @@ export interface SpeechSettings {
sttModel: string
ttsModel: string
ttsVoice: string
playbackMode: SpeechPlaybackMode
ttsFormat: SpeechTtsFormat
}
export type SpeechSettingsUpdate = Partial<Omit<SpeechSettings, "apiKey">> & {
@@ -145,6 +149,8 @@ const defaultSpeechSettings: SpeechSettings = {
sttModel: "gpt-4o-mini-transcribe",
ttsModel: "gpt-4o-mini-tts",
ttsVoice: "alloy",
playbackMode: "streaming",
ttsFormat: "mp3",
}
function normalizeUiSettings(input?: Partial<UiSettings> | null): UiSettings {
@@ -203,6 +209,14 @@ function normalizeSpeechSettings(input?: Partial<SpeechSettings> | null): Speech
typeof sanitized.ttsVoice === "string" && sanitized.ttsVoice.trim()
? sanitized.ttsVoice.trim()
: defaultSpeechSettings.ttsVoice,
playbackMode:
sanitized.playbackMode === "buffered" || sanitized.playbackMode === "streaming"
? sanitized.playbackMode
: defaultSpeechSettings.playbackMode,
ttsFormat:
sanitized.ttsFormat === "wav" || sanitized.ttsFormat === "opus" || sanitized.ttsFormat === "aac" || sanitized.ttsFormat === "mp3"
? sanitized.ttsFormat
: defaultSpeechSettings.ttsFormat,
}
}

View File

@@ -10,6 +10,7 @@ import { messageStoreBus } from "./message-v2/bus"
import { removeMessagePartV2, removeMessageV2 } from "./message-v2/bridge"
import { getLogger } from "../lib/logger"
import { requestData } from "../lib/opencode-api"
import { clearConversationPlaybackForSession } from "./conversation-speech"
const log = getLogger("actions")
@@ -165,6 +166,8 @@ async function sendMessage(
const store = messageStoreBus.getOrCreate(instanceId)
const createdAt = Date.now()
clearConversationPlaybackForSession(instanceId, sessionId)
store.upsertMessage({
id: messageId,
sessionId,

View File

@@ -63,6 +63,7 @@ import {
} from "./message-v2/bridge"
import { messageStoreBus } from "./message-v2/bus"
import type { InstanceMessageStore } from "./message-v2/instance-store"
import { handleConversationAssistantPartUpdated } from "./conversation-speech"
const log = getLogger("sse")
const pendingSessionFetches = new Map<string, Promise<void>>()
@@ -330,8 +331,9 @@ function handleMessageUpdate(instanceId: string, event: MessageUpdateEvent | Mes
if (messageInfo) {
upsertMessageInfoV2(instanceId, messageInfo, { status: "streaming" })
}
applyPartUpdateV2(instanceId, { ...part, sessionID: sessionId, messageID: messageId })
handleConversationAssistantPartUpdated(instanceId, { ...part, sessionID: sessionId, messageID: messageId }, messageInfo)
if (part.type === "tool" && part.tool === "question") {
// Questions can arrive before their tool part exists; re-link now.

View File

@@ -37,7 +37,7 @@
.prompt-input {
@apply w-full pt-2.5 border text-sm resize-none outline-none transition-colors;
padding-inline-start: 0.75rem;
padding-inline-end: 5.5rem;
padding-inline-end: 7.5rem;
font-family: inherit;
background-color: var(--surface-base);
color: var(--text-primary);
@@ -90,22 +90,32 @@
inset-inline-end: 0.25rem;
bottom: 0.25rem;
display: flex;
flex-direction: column;
align-items: flex-end;
justify-content: flex-start;
flex-direction: row;
align-items: flex-start;
justify-content: flex-end;
gap: 0.125rem;
z-index: 2;
}
.prompt-nav-top-row {
.prompt-nav-column {
display: flex;
align-items: center;
justify-content: flex-end;
flex-direction: column;
align-items: flex-end;
justify-content: flex-start;
gap: 0.125rem;
}
.prompt-nav-column-left {
min-width: 1.75rem;
}
.prompt-nav-column-right {
min-width: 1.75rem;
}
.prompt-expand-button,
.prompt-history-button {
.prompt-history-button,
.prompt-clear-button {
@apply w-7 h-7 flex items-center justify-center rounded-md;
color: var(--text-muted);
background-color: var(--control-ghost-bg);
@@ -115,7 +125,8 @@
}
.prompt-expand-button:hover:not(:disabled),
.prompt-history-button:hover:not(:disabled) {
.prompt-history-button:hover:not(:disabled),
.prompt-clear-button:hover:not(:disabled) {
background-color: var(--surface-secondary);
color: var(--text-primary);
}
@@ -127,7 +138,8 @@
}
.prompt-expand-button:disabled,
.prompt-history-button:disabled {
.prompt-history-button:disabled,
.prompt-clear-button:disabled {
opacity: 0.4;
cursor: not-allowed;
}
@@ -208,6 +220,16 @@
color: var(--button-danger-text, var(--text-inverted, #ffffff));
}
.prompt-voice-button.is-recording:hover:not(:disabled) {
background-color: var(--button-danger-hover-bg, rgba(239, 68, 68, 0.9));
color: var(--button-danger-text, var(--text-inverted, #ffffff));
}
.prompt-voice-button.is-recording:active:not(:disabled) {
background-color: var(--button-danger-active-bg, rgba(239, 68, 68, 1));
color: var(--button-danger-text, var(--text-inverted, #ffffff));
}
.prompt-nav-voice-button {
min-width: 1.75rem;
width: 1.75rem;
@@ -216,14 +238,24 @@
}
.prompt-nav-voice-button.is-recording {
min-width: 3.5rem;
width: auto;
min-width: 1.75rem;
width: 1.75rem;
}
.prompt-voice-button:disabled {
@apply opacity-50 cursor-not-allowed;
}
.prompt-conversation-button.is-active {
background-color: color-mix(in oklab, var(--accent-primary) 76%, var(--surface-secondary));
color: var(--text-inverted);
}
.prompt-conversation-button.is-active:hover:not(:disabled) {
background-color: color-mix(in oklab, var(--accent-primary) 88%, var(--surface-secondary));
color: var(--text-inverted);
}
.prompt-voice-timer {
font-size: 0.68rem;
font-variant-numeric: tabular-nums;
@@ -397,7 +429,7 @@
.prompt-input {
min-height: 0;
padding: 0.5rem 0.75rem;
padding-inline-end: 5.5rem;
padding-inline-end: 7.5rem;
padding-bottom: 0.75rem;
}