Release 0.13.3 - Voice conversation mode, File editing, YOLO mode (#264)

## Thanks for contributions
- PR #252 “feat: Enable file editing and saving” by @jchadwick
- PR #256 “feat(ui): add session yolo mode controls” by @pascalandr
- PR #257 “fix(tauri): sync native app version with package releases” by
@pascalandr
- PR #258 “fix(tauri): stop stale UI assets from shadowing desktop
builds” by @pascalandr
- PR #260 “fix(ui): escape raw HTML in user prompt messages” by
@app/codenomadbot

## Highlights
- **Edit and save files directly in CodeNomad**: Update workspace files
in the built-in editor, save them without leaving the app, and get safer
handling for unsaved changes or edit conflicts.
- **More control over session automation**: Turn on per-session YOLO
mode from the Status tab, keep it visible with a clear badge, and let
long-running sessions continue auto-accepting prompts as expected.
- **Better voice conversation options**: Use spoken summary mode for
replies and keep conversation speech settings isolated per client, so
one device’s voice preferences do not unexpectedly affect another.
- **Faster session recovery**: Reload a session transcript from the
sidebar and see when a session is retrying, including live status
feedback.

## What’s Improved
- **Smoother desktop setup**: Desktop builds now bundle the right CLI
resources and handle microphone access more cleanly.
- **More reliable cross-platform desktop behavior**: Windows process
handling and npm invocation are safer, reducing environment-specific
issues.
- **Clearer session status visibility**: Retrying sessions now show more
useful state in the sidebar and header, so it is easier to tell what is
happening.
- **Cleaner in-app feedback**: Long toast messages wrap properly, GitHub
star counts display more cleanly, and message/code rendering behaves
more predictably.

## Fixes
- **Safer prompt rendering**: Raw HTML in user prompts is escaped so
messages display safely instead of being interpreted.
- **More reliable code previews**: Incomplete syntax highlighting
results are no longer cached, which helps prevent broken-looking file
views.
- **Better voice handoff**: Conversation playback stops when voice input
starts, avoiding overlapping speech.
- **More dependable desktop releases**: Native app versions now stay
aligned with package releases, and stale UI assets no longer shadow new
desktop builds.

### Contributors
- @jchadwick
- @pascalandr
This commit is contained in:
Shantur Rathore
2026-03-31 20:33:43 +01:00
committed by GitHub
95 changed files with 2459 additions and 319 deletions

View File

@@ -240,6 +240,10 @@ export interface SpeechSynthesisResponse {
mimeType: string
}
export interface VoiceModeStateResponse {
enabled: boolean
}
export type WorkspaceEventType =
| "workspace.created"
| "workspace.started"

View File

@@ -0,0 +1,128 @@
import type { Logger } from "../logger"
const STALE_CONNECTION_TIMEOUT_MS = 45000
const STALE_SWEEP_INTERVAL_MS = 5000
export interface ClientConnectionRef {
clientId: string
connectionId: string
}
export interface ClientConnectionRecord extends ClientConnectionRef {
key: string
connectedAt: number
lastSeenAt: number
}
type ConnectionChangeEvent = {
type: "connected" | "disconnected"
connection: ClientConnectionRecord
reason?: string
}
interface RegisteredConnection extends ClientConnectionRecord {
close: () => void
}
export class ClientConnectionManager {
private readonly connections = new Map<string, RegisteredConnection>()
private readonly subscribers = new Set<(event: ConnectionChangeEvent) => void>()
private readonly sweepTimer: NodeJS.Timeout
constructor(private readonly logger: Logger) {
this.sweepTimer = setInterval(() => this.sweepStaleConnections(), STALE_SWEEP_INTERVAL_MS)
this.sweepTimer.unref?.()
}
shutdown(): void {
clearInterval(this.sweepTimer)
for (const connection of Array.from(this.connections.values())) {
this.disconnect(connection.key, "shutdown", false)
}
}
subscribe(listener: (event: ConnectionChangeEvent) => void): () => void {
this.subscribers.add(listener)
return () => this.subscribers.delete(listener)
}
register(input: ClientConnectionRef & { close: () => void }): () => void {
const key = getConnectionKey(input)
const now = Date.now()
const existing = this.connections.get(key)
if (existing) {
this.logger.debug({ clientId: input.clientId, connectionId: input.connectionId }, "Replacing existing client connection")
this.disconnect(key, "replaced")
}
const connection: RegisteredConnection = {
key,
clientId: input.clientId,
connectionId: input.connectionId,
connectedAt: now,
lastSeenAt: now,
close: input.close,
}
this.connections.set(key, connection)
this.logger.debug({ clientId: input.clientId, connectionId: input.connectionId }, "Client connected")
this.notify({ type: "connected", connection })
return () => this.disconnect(key, "closed")
}
pong(input: ClientConnectionRef): boolean {
const key = getConnectionKey(input)
const connection = this.connections.get(key)
if (!connection) {
this.logger.debug({ clientId: input.clientId, connectionId: input.connectionId }, "Ignoring pong for unknown client connection")
return false
}
connection.lastSeenAt = Date.now()
return true
}
isConnected(input: ClientConnectionRef): boolean {
return this.connections.has(getConnectionKey(input))
}
private sweepStaleConnections(): void {
const cutoff = Date.now() - STALE_CONNECTION_TIMEOUT_MS
for (const connection of Array.from(this.connections.values())) {
if (connection.lastSeenAt > cutoff) continue
this.logger.debug({ clientId: connection.clientId, connectionId: connection.connectionId }, "Client connection timed out")
this.disconnect(connection.key, "timeout")
}
}
private disconnect(key: string, reason: string, invokeClose = true): void {
const connection = this.connections.get(key)
if (!connection) return
this.connections.delete(key)
this.logger.debug({ clientId: connection.clientId, connectionId: connection.connectionId, reason }, "Client disconnected")
if (invokeClose) {
try {
connection.close()
} catch (error) {
this.logger.warn({ err: error, clientId: connection.clientId, connectionId: connection.connectionId }, "Failed to close stale client connection")
}
}
this.notify({ type: "disconnected", connection, reason })
}
private notify(event: ConnectionChangeEvent): void {
for (const subscriber of this.subscribers) {
try {
subscriber(event)
} catch (error) {
this.logger.warn({ err: error, eventType: event.type }, "Client connection subscriber failed")
}
}
}
}
function getConnectionKey(input: ClientConnectionRef): string {
return `${input.clientId}:${input.connectionId}`
}

View File

@@ -81,6 +81,14 @@ export class FileSystemBrowser {
return { path: relativePath, absolutePath }
}
writeFile(relativePath: string, contents: string): void {
if (this.unrestricted) {
throw new Error("writeFile is not available in unrestricted mode")
}
const resolved = this.toRestrictedAbsolute(relativePath)
fs.writeFileSync(resolved, contents, "utf-8")
}
readFile(relativePath: string): string {
if (this.unrestricted) {
throw new Error("readFile is not available in unrestricted mode")

View File

@@ -0,0 +1,96 @@
import type { Logger } from "../logger"
import type { ClientConnectionManager, ClientConnectionRef } from "../clients/connection-manager"
import type { PluginChannelManager } from "./channel"
interface VoiceModeManagerOptions {
connections: ClientConnectionManager
channel: PluginChannelManager
logger: Logger
}
export class VoiceModeManager {
private readonly enabledConnectionsByInstance = new Map<string, Set<string>>()
private readonly aggregateByInstance = new Map<string, boolean>()
constructor(private readonly options: VoiceModeManagerOptions) {
this.options.connections.subscribe((event) => {
if (event.type !== "disconnected") return
this.clearConnection(event.connection)
})
}
setEnabled(instanceId: string, connection: ClientConnectionRef, enabled: boolean): void {
if (enabled && !this.options.connections.isConnected(connection)) {
this.options.logger.debug(
{ instanceId, clientId: connection.clientId, connectionId: connection.connectionId },
"Ignoring voice mode enable for disconnected client connection",
)
return
}
const key = getConnectionKey(connection)
const current = this.enabledConnectionsByInstance.get(instanceId) ?? new Set<string>()
if (enabled) {
current.add(key)
this.enabledConnectionsByInstance.set(instanceId, current)
} else if (current.delete(key)) {
if (current.size === 0) {
this.enabledConnectionsByInstance.delete(instanceId)
} else {
this.enabledConnectionsByInstance.set(instanceId, current)
}
}
this.options.logger.debug({ instanceId, clientId: connection.clientId, connectionId: connection.connectionId, enabled }, "Voice mode updated for client connection")
this.publishIfChanged(instanceId)
}
syncInstance(instanceId: string): void {
this.options.channel.send(instanceId, buildVoiceModeEvent(this.isEnabled(instanceId)))
}
isEnabled(instanceId: string): boolean {
return this.aggregateByInstance.get(instanceId) === true
}
private clearConnection(connection: ClientConnectionRef): void {
const key = getConnectionKey(connection)
for (const [instanceId, enabledConnections] of Array.from(this.enabledConnectionsByInstance.entries())) {
if (!enabledConnections.delete(key)) continue
if (enabledConnections.size === 0) {
this.enabledConnectionsByInstance.delete(instanceId)
}
this.publishIfChanged(instanceId)
}
}
private publishIfChanged(instanceId: string): void {
const enabled = (this.enabledConnectionsByInstance.get(instanceId)?.size ?? 0) > 0
const previous = this.aggregateByInstance.get(instanceId) === true
if (enabled === previous) return
if (enabled) {
this.aggregateByInstance.set(instanceId, true)
} else {
this.aggregateByInstance.delete(instanceId)
}
this.options.logger.debug({ instanceId, enabled }, "Broadcasting aggregate voice mode")
this.options.channel.send(instanceId, buildVoiceModeEvent(enabled))
}
}
function buildVoiceModeEvent(enabled: boolean) {
return {
type: "codenomad.voiceMode",
properties: {
enabled,
formatVersion: "v1",
},
}
}
function getConnectionKey(connection: ClientConnectionRef): string {
return `${connection.clientId}:${connection.connectionId}`
}

View File

@@ -29,6 +29,9 @@ import type { AuthManager } from "../auth/manager"
import { registerAuthRoutes } from "./routes/auth"
import { sendUnauthorized, wantsHtml } from "../auth/http-auth"
import type { SpeechService } from "../speech/service"
import { ClientConnectionManager } from "../clients/connection-manager"
import { PluginChannelManager } from "../plugins/channel"
import { VoiceModeManager } from "../plugins/voice-mode"
interface HttpServerDeps {
bindHost: string
@@ -173,6 +176,13 @@ export function createHttpServer(deps: HttpServerDeps) {
eventBus: deps.eventBus,
logger: deps.logger.child({ component: "background-processes" }),
})
const clientConnectionManager = new ClientConnectionManager(deps.logger.child({ component: "client-connections" }))
const pluginChannel = new PluginChannelManager(deps.logger.child({ component: "plugin-channel" }))
const voiceModeManager = new VoiceModeManager({
connections: clientConnectionManager,
channel: pluginChannel,
logger: deps.logger.child({ component: "voice-mode" }),
})
registerAuthRoutes(app, { authManager: deps.authManager })
@@ -248,7 +258,12 @@ export function createHttpServer(deps: HttpServerDeps) {
registerSettingsRoutes(app, { settings: deps.settings, logger: apiLogger })
registerFilesystemRoutes(app, { fileSystemBrowser: deps.fileSystemBrowser })
registerMetaRoutes(app, { serverMeta: deps.serverMeta })
registerEventRoutes(app, { eventBus: deps.eventBus, registerClient: registerSseClient, logger: sseLogger })
registerEventRoutes(app, {
eventBus: deps.eventBus,
registerClient: registerSseClient,
logger: sseLogger,
connectionManager: clientConnectionManager,
})
registerWorktreeRoutes(app, { workspaceManager: deps.workspaceManager })
registerStorageRoutes(app, {
instanceStore: deps.instanceStore,
@@ -256,7 +271,13 @@ export function createHttpServer(deps: HttpServerDeps) {
workspaceManager: deps.workspaceManager,
})
registerSpeechRoutes(app, { speechService: deps.speechService })
registerPluginRoutes(app, { workspaceManager: deps.workspaceManager, eventBus: deps.eventBus, logger: proxyLogger })
registerPluginRoutes(app, {
workspaceManager: deps.workspaceManager,
eventBus: deps.eventBus,
logger: proxyLogger,
channel: pluginChannel,
voiceModeManager,
})
registerBackgroundProcessRoutes(app, { backgroundProcessManager })
registerInstanceProxyRoutes(app, { workspaceManager: deps.workspaceManager, logger: proxyLogger })
@@ -321,6 +342,7 @@ export function createHttpServer(deps: HttpServerDeps) {
},
stop: () => {
closeSseClients()
clientConnectionManager.shutdown()
return app.close()
},
}

View File

@@ -1,19 +1,32 @@
import { FastifyInstance } from "fastify"
import { z } from "zod"
import { EventBus } from "../../events/bus"
import { WorkspaceEventPayload } from "../../api-types"
import type { ClientConnectionManager } from "../../clients/connection-manager"
import { Logger } from "../../logger"
interface RouteDeps {
eventBus: EventBus
registerClient: (cleanup: () => void) => () => void
logger: Logger
connectionManager: ClientConnectionManager
}
let nextClientId = 0
const ConnectionQuerySchema = z.object({
clientId: z.string().trim().min(1),
connectionId: z.string().trim().min(1),
})
const PongBodySchema = ConnectionQuerySchema.extend({
pingTs: z.number().optional(),
})
export function registerEventRoutes(app: FastifyInstance, deps: RouteDeps) {
app.get("/api/events", (request, reply) => {
const clientId = ++nextClientId
const connection = ConnectionQuerySchema.parse(request.query ?? {})
deps.logger.debug({ clientId }, "SSE client connected")
const origin = request.headers.origin ?? "*"
@@ -35,7 +48,8 @@ export function registerEventRoutes(app: FastifyInstance, deps: RouteDeps) {
const unsubscribe = deps.eventBus.onEvent(send)
const heartbeat = setInterval(() => {
reply.raw.write(`:hb ${Date.now()}\n\n`)
const ping = { ts: Date.now() }
reply.raw.write(`event: codenomad.client.ping\ndata: ${JSON.stringify(ping)}\n\n`)
}, 15000)
let closed = false
@@ -49,13 +63,27 @@ export function registerEventRoutes(app: FastifyInstance, deps: RouteDeps) {
}
const unregister = deps.registerClient(close)
const unregisterConnection = deps.connectionManager.register({
...connection,
close,
})
const handleClose = () => {
close()
unregister()
unregisterConnection()
}
request.raw.on("close", handleClose)
request.raw.on("error", handleClose)
})
app.post("/api/client-connections/pong", (request, reply) => {
const body = PongBodySchema.parse(request.body ?? {})
if (!deps.connectionManager.pong(body)) {
reply.code(404).send({ error: "Client connection not found" })
return
}
reply.code(204).send()
})
}

View File

@@ -1,15 +1,19 @@
import { FastifyInstance } from "fastify"
import { z } from "zod"
import type { VoiceModeStateResponse } from "../../api-types"
import type { WorkspaceManager } from "../../workspaces/manager"
import type { EventBus } from "../../events/bus"
import type { Logger } from "../../logger"
import { PluginChannelManager } from "../../plugins/channel"
import { buildPingEvent, handlePluginEvent } from "../../plugins/handlers"
import { VoiceModeManager } from "../../plugins/voice-mode"
interface RouteDeps {
workspaceManager: WorkspaceManager
eventBus: EventBus
logger: Logger
channel: PluginChannelManager
voiceModeManager: VoiceModeManager
}
const PluginEventSchema = z.object({
@@ -17,9 +21,13 @@ const PluginEventSchema = z.object({
properties: z.record(z.unknown()).optional(),
})
export function registerPluginRoutes(app: FastifyInstance, deps: RouteDeps) {
const channel = new PluginChannelManager(deps.logger.child({ component: "plugin-channel" }))
const VoiceModeStateSchema = z.object({
enabled: z.boolean(),
clientId: z.string().trim().min(1),
connectionId: z.string().trim().min(1),
})
export function registerPluginRoutes(app: FastifyInstance, deps: RouteDeps) {
app.get<{ Params: { id: string } }>("/workspaces/:id/plugin/events", (request, reply) => {
const workspace = deps.workspaceManager.get(request.params.id)
if (!workspace) {
@@ -33,10 +41,11 @@ export function registerPluginRoutes(app: FastifyInstance, deps: RouteDeps) {
reply.raw.flushHeaders?.()
reply.hijack()
const registration = channel.register(request.params.id, reply)
const registration = deps.channel.register(request.params.id, reply)
deps.voiceModeManager.syncInstance(request.params.id)
const heartbeat = setInterval(() => {
channel.send(request.params.id, buildPingEvent())
deps.channel.send(request.params.id, buildPingEvent())
}, 15000)
const close = () => {
@@ -49,6 +58,22 @@ export function registerPluginRoutes(app: FastifyInstance, deps: RouteDeps) {
request.raw.on("error", close)
})
app.post<{ Params: { id: string }; Body: VoiceModeStateResponse }>("/workspaces/:id/plugin/voice-mode", (request, reply) => {
const workspace = deps.workspaceManager.get(request.params.id)
if (!workspace) {
reply.code(404).send({ error: "Workspace not found" })
return
}
const payload = VoiceModeStateSchema.parse(request.body ?? {})
deps.voiceModeManager.setEnabled(
request.params.id,
{ clientId: payload.clientId, connectionId: payload.connectionId },
payload.enabled,
)
return { enabled: payload.enabled }
})
const handleWildcard = async (request: any, reply: any) => {
const workspaceId = request.params.id as string
const workspace = deps.workspaceManager.get(workspaceId)

View File

@@ -19,6 +19,10 @@ const WorkspaceFileContentQuerySchema = z.object({
path: z.string(),
})
const WorkspaceFileContentBodySchema = z.object({
contents: z.string(),
})
const WorkspaceFileSearchQuerySchema = z.object({
q: z.string().trim().min(1, "Query is required"),
limit: z.coerce.number().int().positive().max(200).optional(),
@@ -100,6 +104,20 @@ export function registerWorkspaceRoutes(app: FastifyInstance, deps: RouteDeps) {
return handleWorkspaceError(error, reply)
}
})
app.put<{
Params: { id: string }
Querystring: { path?: string }
}>("/api/workspaces/:id/files/content", async (request, reply) => {
try {
const query = WorkspaceFileContentQuerySchema.parse(request.query ?? {})
const body = WorkspaceFileContentBodySchema.parse(request.body ?? {})
deps.workspaceManager.writeFile(request.params.id, query.path, body.contents)
reply.code(204)
} catch (error) {
return handleWorkspaceError(error, reply)
}
})
}

View File

@@ -147,19 +147,49 @@ export class OpenAICompatibleSpeechProvider {
}
const endpoint = new URL("audio/speech", ensureTrailingSlash(settings.baseUrl ?? "https://api.openai.com/v1"))
const response = await fetch(endpoint, {
method: "POST",
headers: {
Authorization: `Bearer ${settings.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: settings.ttsModel,
voice: settings.ttsVoice,
input: text,
response_format: format,
}),
})
let response: Response
try {
response = await fetch(endpoint, {
method: "POST",
headers: {
Authorization: `Bearer ${settings.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: settings.ttsModel,
voice: settings.ttsVoice,
input: text,
response_format: format,
}),
})
} catch (error) {
const detailedError = error as Error & {
cause?: unknown
code?: string
errno?: number | string
syscall?: string
address?: string
port?: number
}
this.options.logger.error(
{
err: error,
endpoint: endpoint.toString(),
baseUrl: settings.baseUrl,
model: settings.ttsModel,
voice: settings.ttsVoice,
format,
cause: detailedError.cause,
code: detailedError.code,
errno: detailedError.errno,
syscall: detailedError.syscall,
address: detailedError.address,
port: detailedError.port,
},
"speech.synthesize fetch failed",
)
throw error
}
if (!response.ok) {
const detail = await response.text()

View File

@@ -55,4 +55,31 @@ describe("resolveUi local version preference", () => {
assert.equal(result.uiStaticDir, bundledDir)
assert.equal(result.uiVersion, "0.8.1")
})
it("prefers bundled when bundled and downloaded versions are equal", async () => {
const bundledDir = path.join(tempRoot, "bundled")
const configDir = path.join(tempRoot, "config")
const currentDir = path.join(configDir, "ui", "current")
await mkdir(bundledDir, { recursive: true })
await mkdir(currentDir, { recursive: true })
writeFileSync(path.join(bundledDir, "index.html"), "<html>bundled</html>")
writeFileSync(path.join(bundledDir, "ui-version.json"), JSON.stringify({ uiVersion: "0.8.1" }))
writeFileSync(path.join(currentDir, "index.html"), "<html>current</html>")
writeFileSync(path.join(currentDir, "ui-version.json"), JSON.stringify({ uiVersion: "0.8.1" }))
const result = await resolveUi({
serverVersion: "0.8.1",
bundledUiDir: bundledDir,
autoUpdate: false,
configDir,
logger: noopLogger,
})
assert.equal(result.source, "bundled")
assert.equal(result.uiStaticDir, bundledDir)
assert.equal(result.uiVersion, "0.8.1")
})
})

View File

@@ -250,7 +250,7 @@ async function pickBestLocalUi(args: {
uiStaticDir: currentResolved,
source: "downloaded",
uiVersion: await readUiVersion(currentResolved),
priority: 2,
priority: 1,
})
}
@@ -260,7 +260,7 @@ async function pickBestLocalUi(args: {
uiStaticDir: bundledResolved,
source: "bundled",
uiVersion: await readUiVersion(bundledResolved),
priority: 1,
priority: 2,
})
}

View File

@@ -83,6 +83,12 @@ export class WorkspaceManager {
}
}
writeFile(workspaceId: string, relativePath: string, contents: string): void {
const workspace = this.requireWorkspace(workspaceId)
const browser = new FileSystemBrowser({ rootDir: workspace.path })
browser.writeFile(relativePath, contents)
}
async create(folder: string, name?: string): Promise<WorkspaceDescriptor> {
const id = `${Date.now().toString(36)}`