feat(voice): support per-client conversation mode state

This commit is contained in:
Shantur
2026-03-31 12:39:29 +01:00
parent 197dee2aea
commit 42589464e5
9 changed files with 409 additions and 14 deletions

View File

@@ -24,6 +24,7 @@ import type {
WorktreeMap,
WorktreeCreateRequest,
} from "../../../server/src/api-types"
import { getClientIdentity } from "./client-identity"
import { getLogger } from "./logger"
const RUNTIME_BASE = typeof window !== "undefined" ? window.location?.origin : undefined
@@ -350,9 +351,16 @@ export const serverApi = {
)
},
updateVoiceMode(instanceId: string, enabled: boolean): Promise<VoiceModeStateResponse> {
const identity = getClientIdentity()
return request<VoiceModeStateResponse>(`/workspaces/${encodeURIComponent(instanceId)}/plugin/voice-mode`, {
method: "POST",
body: JSON.stringify({ enabled }),
body: JSON.stringify({ ...identity, enabled }),
})
},
sendClientConnectionPong(payload: { clientId: string; connectionId: string; pingTs?: number }): Promise<void> {
return request<void>("/api/client-connections/pong", {
method: "POST",
body: JSON.stringify(payload),
})
},
fetchBackgroundProcessOutput(
@@ -379,9 +387,15 @@ export const serverApi = {
`/workspaces/${encodeURIComponent(instanceId)}/plugin/background-processes/${encodeURIComponent(processId)}/output${suffix}`,
)
},
connectEvents(onEvent: (event: WorkspaceEventPayload) => void, onError?: () => void) {
sseLogger.info(`Connecting to ${EVENTS_URL}`)
const source = new EventSource(EVENTS_URL, { withCredentials: true } as any)
connectEvents(
onEvent: (event: WorkspaceEventPayload) => void,
onError?: () => void,
onPing?: (payload: { ts?: number }) => void,
) {
const identity = getClientIdentity()
const url = buildClientEventsUrl(identity)
sseLogger.info(`Connecting to ${url}`)
const source = new EventSource(url, { withCredentials: true } as any)
source.onmessage = (event) => {
try {
const payload = JSON.parse(event.data) as WorkspaceEventPayload
@@ -394,8 +408,26 @@ export const serverApi = {
sseLogger.warn("EventSource error, closing stream")
onError?.()
}
source.addEventListener("codenomad.client.ping", (event: MessageEvent) => {
try {
const payload = event.data ? (JSON.parse(event.data) as { ts?: number }) : {}
onPing?.(payload)
} catch (error) {
sseLogger.error("Failed to parse ping event", error)
}
})
return source
},
}
function buildClientEventsUrl(identity: { clientId: string; connectionId: string }): string {
const url = new URL(EVENTS_URL, typeof window !== "undefined" ? window.location.origin : "http://localhost")
url.searchParams.set("clientId", identity.clientId)
url.searchParams.set("connectionId", identity.connectionId)
if (EVENTS_URL.startsWith("http://") || EVENTS_URL.startsWith("https://")) {
return url.toString()
}
return `${url.pathname}${url.search}`
}
export type { WorkspaceDescriptor, WorkspaceLogEntry, WorkspaceEventPayload, WorkspaceEventType }

View File

@@ -0,0 +1,58 @@
const CLIENT_ID_STORAGE_KEY = "codenomad.client-id"
const CONNECTION_ID_STORAGE_KEY = "codenomad.connection-id"
let cachedClientId: string | null = null
let cachedConnectionId: string | null = null
export function getClientIdentity(): { clientId: string; connectionId: string } {
return {
clientId: getOrCreateClientId(),
connectionId: getOrCreateConnectionId(),
}
}
function getOrCreateClientId(): string {
if (cachedClientId) return cachedClientId
cachedClientId = getOrCreateStoredValue(CLIENT_ID_STORAGE_KEY, window.localStorage)
return cachedClientId
}
function getOrCreateConnectionId(): string {
if (cachedConnectionId) return cachedConnectionId
cachedConnectionId = getOrCreateStoredValue(CONNECTION_ID_STORAGE_KEY, window.sessionStorage)
return cachedConnectionId
}
function getOrCreateStoredValue(key: string, storage: Storage): string {
if (typeof window === "undefined") {
return generateUUID()
}
try {
const existing = storage.getItem(key)
if (existing && existing.trim()) {
return existing.trim()
}
} catch {
return generateUUID()
}
const next = generateUUID()
try {
storage.setItem(key, next)
} catch {
// Ignore storage failures and fall back to the in-memory value.
}
return next
}
function generateUUID(): string {
if (typeof crypto !== "undefined" && crypto.randomUUID) {
return crypto.randomUUID()
}
return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (char) => {
const random = (Math.random() * 16) | 0
const value = char === "x" ? random : (random & 0x3) | 0x8
return value.toString(16)
})
}

View File

@@ -1,5 +1,6 @@
import type { WorkspaceEventPayload, WorkspaceEventType } from "../../../server/src/api-types"
import { serverApi } from "./api-client"
import { getClientIdentity } from "./client-identity"
import { getLogger } from "./logger"
const RETRY_BASE_DELAY = 1000
@@ -16,6 +17,7 @@ function logSse(message: string, context?: Record<string, unknown>) {
class ServerEvents {
private handlers = new Map<WorkspaceEventType | "*", Set<(event: WorkspaceEventPayload) => void>>()
private openHandlers = new Set<() => void>()
private source: EventSource | null = null
private retryDelay = RETRY_BASE_DELAY
@@ -28,10 +30,24 @@ class ServerEvents {
this.source.close()
}
logSse("Connecting to backend events stream")
this.source = serverApi.connectEvents((event) => this.dispatch(event), () => this.scheduleReconnect())
this.source = serverApi.connectEvents(
(event) => this.dispatch(event),
() => this.scheduleReconnect(),
(payload) => {
void serverApi
.sendClientConnectionPong({
...getClientIdentity(),
pingTs: payload.ts,
})
.catch((error) => {
log.error("Failed to send client connection pong", error)
})
},
)
this.source.onopen = () => {
logSse("Events stream connected")
this.retryDelay = RETRY_BASE_DELAY
this.openHandlers.forEach((handler) => handler())
}
}
@@ -61,6 +77,11 @@ class ServerEvents {
bucket.add(handler)
return () => bucket.delete(handler)
}
onOpen(handler: () => void): () => void {
this.openHandlers.add(handler)
return () => this.openHandlers.delete(handler)
}
}
export const serverEvents = new ServerEvents()

View File

@@ -4,6 +4,7 @@ import { showToastNotification } from "../lib/notifications"
import { serverApi } from "../lib/api-client"
import { getLogger } from "../lib/logger"
import { formatToMimeType, getSpeechPlaybackSupport } from "../lib/speech-playback-support"
import { serverEvents } from "../lib/server-events"
import { serverSettings } from "./preferences"
import { loadSpeechCapabilities, speechCapabilities } from "./speech"
import { getActiveSession, sessions } from "./session-state"
@@ -44,6 +45,10 @@ let currentPlayback:
let queueRunner: Promise<void> | null = null
let playbackErrorShown = false
serverEvents.onOpen(() => {
void syncConversationModesToServer()
})
function getEntryKey(instanceId: string, sessionId: string, messageId: string, partId: string): string {
return `${instanceId}:${sessionId}:${messageId}:${partId}`
}
@@ -532,3 +537,12 @@ function extractLeadingSpokenBlock(text: string): string {
if (!match?.[1]) return ""
return match[1].trim()
}
async function syncConversationModesToServer(): Promise<void> {
const updates: Promise<unknown>[] = []
for (const [instanceId, enabled] of conversationModeInstances()) {
if (!enabled) continue
updates.push(serverApi.updateVoiceMode(instanceId, true))
}
await Promise.allSettled(updates)
}