feat(voice): support per-client conversation mode state

This commit is contained in:
Shantur
2026-03-31 12:39:29 +01:00
parent 197dee2aea
commit 42589464e5
9 changed files with 409 additions and 14 deletions

View File

@@ -0,0 +1,128 @@
import type { Logger } from "../logger"
const STALE_CONNECTION_TIMEOUT_MS = 45000
const STALE_SWEEP_INTERVAL_MS = 5000
export interface ClientConnectionRef {
clientId: string
connectionId: string
}
export interface ClientConnectionRecord extends ClientConnectionRef {
key: string
connectedAt: number
lastSeenAt: number
}
type ConnectionChangeEvent = {
type: "connected" | "disconnected"
connection: ClientConnectionRecord
reason?: string
}
interface RegisteredConnection extends ClientConnectionRecord {
close: () => void
}
export class ClientConnectionManager {
private readonly connections = new Map<string, RegisteredConnection>()
private readonly subscribers = new Set<(event: ConnectionChangeEvent) => void>()
private readonly sweepTimer: NodeJS.Timeout
constructor(private readonly logger: Logger) {
this.sweepTimer = setInterval(() => this.sweepStaleConnections(), STALE_SWEEP_INTERVAL_MS)
this.sweepTimer.unref?.()
}
shutdown(): void {
clearInterval(this.sweepTimer)
for (const connection of Array.from(this.connections.values())) {
this.disconnect(connection.key, "shutdown", false)
}
}
subscribe(listener: (event: ConnectionChangeEvent) => void): () => void {
this.subscribers.add(listener)
return () => this.subscribers.delete(listener)
}
register(input: ClientConnectionRef & { close: () => void }): () => void {
const key = getConnectionKey(input)
const now = Date.now()
const existing = this.connections.get(key)
if (existing) {
this.logger.debug({ clientId: input.clientId, connectionId: input.connectionId }, "Replacing existing client connection")
this.disconnect(key, "replaced")
}
const connection: RegisteredConnection = {
key,
clientId: input.clientId,
connectionId: input.connectionId,
connectedAt: now,
lastSeenAt: now,
close: input.close,
}
this.connections.set(key, connection)
this.logger.debug({ clientId: input.clientId, connectionId: input.connectionId }, "Client connected")
this.notify({ type: "connected", connection })
return () => this.disconnect(key, "closed")
}
pong(input: ClientConnectionRef): boolean {
const key = getConnectionKey(input)
const connection = this.connections.get(key)
if (!connection) {
this.logger.debug({ clientId: input.clientId, connectionId: input.connectionId }, "Ignoring pong for unknown client connection")
return false
}
connection.lastSeenAt = Date.now()
return true
}
isConnected(input: ClientConnectionRef): boolean {
return this.connections.has(getConnectionKey(input))
}
private sweepStaleConnections(): void {
const cutoff = Date.now() - STALE_CONNECTION_TIMEOUT_MS
for (const connection of Array.from(this.connections.values())) {
if (connection.lastSeenAt > cutoff) continue
this.logger.debug({ clientId: connection.clientId, connectionId: connection.connectionId }, "Client connection timed out")
this.disconnect(connection.key, "timeout")
}
}
private disconnect(key: string, reason: string, invokeClose = true): void {
const connection = this.connections.get(key)
if (!connection) return
this.connections.delete(key)
this.logger.debug({ clientId: connection.clientId, connectionId: connection.connectionId, reason }, "Client disconnected")
if (invokeClose) {
try {
connection.close()
} catch (error) {
this.logger.warn({ err: error, clientId: connection.clientId, connectionId: connection.connectionId }, "Failed to close stale client connection")
}
}
this.notify({ type: "disconnected", connection, reason })
}
private notify(event: ConnectionChangeEvent): void {
for (const subscriber of this.subscribers) {
try {
subscriber(event)
} catch (error) {
this.logger.warn({ err: error, eventType: event.type }, "Client connection subscriber failed")
}
}
}
}
function getConnectionKey(input: ClientConnectionRef): string {
return `${input.clientId}:${input.connectionId}`
}

View File

@@ -0,0 +1,96 @@
import type { Logger } from "../logger"
import type { ClientConnectionManager, ClientConnectionRef } from "../clients/connection-manager"
import type { PluginChannelManager } from "./channel"
interface VoiceModeManagerOptions {
connections: ClientConnectionManager
channel: PluginChannelManager
logger: Logger
}
export class VoiceModeManager {
private readonly enabledConnectionsByInstance = new Map<string, Set<string>>()
private readonly aggregateByInstance = new Map<string, boolean>()
constructor(private readonly options: VoiceModeManagerOptions) {
this.options.connections.subscribe((event) => {
if (event.type !== "disconnected") return
this.clearConnection(event.connection)
})
}
setEnabled(instanceId: string, connection: ClientConnectionRef, enabled: boolean): void {
if (enabled && !this.options.connections.isConnected(connection)) {
this.options.logger.debug(
{ instanceId, clientId: connection.clientId, connectionId: connection.connectionId },
"Ignoring voice mode enable for disconnected client connection",
)
return
}
const key = getConnectionKey(connection)
const current = this.enabledConnectionsByInstance.get(instanceId) ?? new Set<string>()
if (enabled) {
current.add(key)
this.enabledConnectionsByInstance.set(instanceId, current)
} else if (current.delete(key)) {
if (current.size === 0) {
this.enabledConnectionsByInstance.delete(instanceId)
} else {
this.enabledConnectionsByInstance.set(instanceId, current)
}
}
this.options.logger.debug({ instanceId, clientId: connection.clientId, connectionId: connection.connectionId, enabled }, "Voice mode updated for client connection")
this.publishIfChanged(instanceId)
}
syncInstance(instanceId: string): void {
this.options.channel.send(instanceId, buildVoiceModeEvent(this.isEnabled(instanceId)))
}
isEnabled(instanceId: string): boolean {
return this.aggregateByInstance.get(instanceId) === true
}
private clearConnection(connection: ClientConnectionRef): void {
const key = getConnectionKey(connection)
for (const [instanceId, enabledConnections] of Array.from(this.enabledConnectionsByInstance.entries())) {
if (!enabledConnections.delete(key)) continue
if (enabledConnections.size === 0) {
this.enabledConnectionsByInstance.delete(instanceId)
}
this.publishIfChanged(instanceId)
}
}
private publishIfChanged(instanceId: string): void {
const enabled = (this.enabledConnectionsByInstance.get(instanceId)?.size ?? 0) > 0
const previous = this.aggregateByInstance.get(instanceId) === true
if (enabled === previous) return
if (enabled) {
this.aggregateByInstance.set(instanceId, true)
} else {
this.aggregateByInstance.delete(instanceId)
}
this.options.logger.debug({ instanceId, enabled }, "Broadcasting aggregate voice mode")
this.options.channel.send(instanceId, buildVoiceModeEvent(enabled))
}
}
function buildVoiceModeEvent(enabled: boolean) {
return {
type: "codenomad.voiceMode",
properties: {
enabled,
formatVersion: "v1",
},
}
}
function getConnectionKey(connection: ClientConnectionRef): string {
return `${connection.clientId}:${connection.connectionId}`
}

View File

@@ -29,7 +29,9 @@ import type { AuthManager } from "../auth/manager"
import { registerAuthRoutes } from "./routes/auth"
import { sendUnauthorized, wantsHtml } from "../auth/http-auth"
import type { SpeechService } from "../speech/service"
import { ClientConnectionManager } from "../clients/connection-manager"
import { PluginChannelManager } from "../plugins/channel"
import { VoiceModeManager } from "../plugins/voice-mode"
interface HttpServerDeps {
bindHost: string
@@ -174,7 +176,13 @@ export function createHttpServer(deps: HttpServerDeps) {
eventBus: deps.eventBus,
logger: deps.logger.child({ component: "background-processes" }),
})
const clientConnectionManager = new ClientConnectionManager(deps.logger.child({ component: "client-connections" }))
const pluginChannel = new PluginChannelManager(deps.logger.child({ component: "plugin-channel" }))
const voiceModeManager = new VoiceModeManager({
connections: clientConnectionManager,
channel: pluginChannel,
logger: deps.logger.child({ component: "voice-mode" }),
})
registerAuthRoutes(app, { authManager: deps.authManager })
@@ -250,7 +258,12 @@ export function createHttpServer(deps: HttpServerDeps) {
registerSettingsRoutes(app, { settings: deps.settings, logger: apiLogger })
registerFilesystemRoutes(app, { fileSystemBrowser: deps.fileSystemBrowser })
registerMetaRoutes(app, { serverMeta: deps.serverMeta })
registerEventRoutes(app, { eventBus: deps.eventBus, registerClient: registerSseClient, logger: sseLogger })
registerEventRoutes(app, {
eventBus: deps.eventBus,
registerClient: registerSseClient,
logger: sseLogger,
connectionManager: clientConnectionManager,
})
registerWorktreeRoutes(app, { workspaceManager: deps.workspaceManager })
registerStorageRoutes(app, {
instanceStore: deps.instanceStore,
@@ -263,6 +276,7 @@ export function createHttpServer(deps: HttpServerDeps) {
eventBus: deps.eventBus,
logger: proxyLogger,
channel: pluginChannel,
voiceModeManager,
})
registerBackgroundProcessRoutes(app, { backgroundProcessManager })
registerInstanceProxyRoutes(app, { workspaceManager: deps.workspaceManager, logger: proxyLogger })
@@ -328,6 +342,7 @@ export function createHttpServer(deps: HttpServerDeps) {
},
stop: () => {
closeSseClients()
clientConnectionManager.shutdown()
return app.close()
},
}

View File

@@ -1,19 +1,32 @@
import { FastifyInstance } from "fastify"
import { z } from "zod"
import { EventBus } from "../../events/bus"
import { WorkspaceEventPayload } from "../../api-types"
import type { ClientConnectionManager } from "../../clients/connection-manager"
import { Logger } from "../../logger"
interface RouteDeps {
eventBus: EventBus
registerClient: (cleanup: () => void) => () => void
logger: Logger
connectionManager: ClientConnectionManager
}
let nextClientId = 0
const ConnectionQuerySchema = z.object({
clientId: z.string().trim().min(1),
connectionId: z.string().trim().min(1),
})
const PongBodySchema = ConnectionQuerySchema.extend({
pingTs: z.number().optional(),
})
export function registerEventRoutes(app: FastifyInstance, deps: RouteDeps) {
app.get("/api/events", (request, reply) => {
const clientId = ++nextClientId
const connection = ConnectionQuerySchema.parse(request.query ?? {})
deps.logger.debug({ clientId }, "SSE client connected")
const origin = request.headers.origin ?? "*"
@@ -35,7 +48,8 @@ export function registerEventRoutes(app: FastifyInstance, deps: RouteDeps) {
const unsubscribe = deps.eventBus.onEvent(send)
const heartbeat = setInterval(() => {
reply.raw.write(`:hb ${Date.now()}\n\n`)
const ping = { ts: Date.now() }
reply.raw.write(`event: codenomad.client.ping\ndata: ${JSON.stringify(ping)}\n\n`)
}, 15000)
let closed = false
@@ -49,13 +63,27 @@ export function registerEventRoutes(app: FastifyInstance, deps: RouteDeps) {
}
const unregister = deps.registerClient(close)
const unregisterConnection = deps.connectionManager.register({
...connection,
close,
})
const handleClose = () => {
close()
unregister()
unregisterConnection()
}
request.raw.on("close", handleClose)
request.raw.on("error", handleClose)
})
app.post("/api/client-connections/pong", (request, reply) => {
const body = PongBodySchema.parse(request.body ?? {})
if (!deps.connectionManager.pong(body)) {
reply.code(404).send({ error: "Client connection not found" })
return
}
reply.code(204).send()
})
}

View File

@@ -6,12 +6,14 @@ import type { EventBus } from "../../events/bus"
import type { Logger } from "../../logger"
import { PluginChannelManager } from "../../plugins/channel"
import { buildPingEvent, handlePluginEvent } from "../../plugins/handlers"
import { VoiceModeManager } from "../../plugins/voice-mode"
interface RouteDeps {
workspaceManager: WorkspaceManager
eventBus: EventBus
logger: Logger
channel: PluginChannelManager
voiceModeManager: VoiceModeManager
}
const PluginEventSchema = z.object({
@@ -21,6 +23,8 @@ const PluginEventSchema = z.object({
const VoiceModeStateSchema = z.object({
enabled: z.boolean(),
clientId: z.string().trim().min(1),
connectionId: z.string().trim().min(1),
})
export function registerPluginRoutes(app: FastifyInstance, deps: RouteDeps) {
@@ -38,6 +42,7 @@ export function registerPluginRoutes(app: FastifyInstance, deps: RouteDeps) {
reply.hijack()
const registration = deps.channel.register(request.params.id, reply)
deps.voiceModeManager.syncInstance(request.params.id)
const heartbeat = setInterval(() => {
deps.channel.send(request.params.id, buildPingEvent())
@@ -61,13 +66,11 @@ export function registerPluginRoutes(app: FastifyInstance, deps: RouteDeps) {
}
const payload = VoiceModeStateSchema.parse(request.body ?? {})
deps.channel.send(request.params.id, {
type: "codenomad.voiceMode",
properties: {
enabled: payload.enabled,
formatVersion: "v1",
},
})
deps.voiceModeManager.setEnabled(
request.params.id,
{ clientId: payload.clientId, connectionId: payload.connectionId },
payload.enabled,
)
return { enabled: payload.enabled }
})