feat(voice): add spoken summary mode for conversation replies
This commit is contained in:
@@ -2,6 +2,8 @@ import type { PluginInput } from "@opencode-ai/plugin"
|
|||||||
import { createCodeNomadClient, getCodeNomadConfig } from "./lib/client"
|
import { createCodeNomadClient, getCodeNomadConfig } from "./lib/client"
|
||||||
import { createBackgroundProcessTools } from "./lib/background-process"
|
import { createBackgroundProcessTools } from "./lib/background-process"
|
||||||
|
|
||||||
|
let voiceModeEnabled = false
|
||||||
|
|
||||||
export async function CodeNomadPlugin(input: PluginInput) {
|
export async function CodeNomadPlugin(input: PluginInput) {
|
||||||
const config = getCodeNomadConfig()
|
const config = getCodeNomadConfig()
|
||||||
const client = createCodeNomadClient(config)
|
const client = createCodeNomadClient(config)
|
||||||
@@ -16,6 +18,11 @@ export async function CodeNomadPlugin(input: PluginInput) {
|
|||||||
pingTs: (event.properties as any)?.ts,
|
pingTs: (event.properties as any)?.ts,
|
||||||
},
|
},
|
||||||
}).catch(() => {})
|
}).catch(() => {})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if (event.type === "codenomad.voiceMode") {
|
||||||
|
voiceModeEnabled = Boolean((event.properties as { enabled?: unknown } | undefined)?.enabled)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -23,6 +30,13 @@ export async function CodeNomadPlugin(input: PluginInput) {
|
|||||||
tool: {
|
tool: {
|
||||||
...backgroundProcessTools,
|
...backgroundProcessTools,
|
||||||
},
|
},
|
||||||
|
async "chat.message"(_input: { sessionID: string }, output: { message: { system?: string } }) {
|
||||||
|
if (!voiceModeEnabled) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
output.message.system = [output.message.system, buildVoiceModePrompt()].filter(Boolean).join("\n\n")
|
||||||
|
},
|
||||||
async event(input: { event: any }) {
|
async event(input: { event: any }) {
|
||||||
const opencodeEvent = input?.event
|
const opencodeEvent = input?.event
|
||||||
if (!opencodeEvent || typeof opencodeEvent !== "object") return
|
if (!opencodeEvent || typeof opencodeEvent !== "object") return
|
||||||
@@ -30,3 +44,19 @@ export async function CodeNomadPlugin(input: PluginInput) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function buildVoiceModePrompt(): string {
|
||||||
|
return [
|
||||||
|
"Voice conversation mode is enabled.",
|
||||||
|
"Prepend your reply with a fenced code block using language `spoken`.",
|
||||||
|
"The `spoken` block should be the natural conversational reply you would say out loud to the user. It should be a concise spoken gist of the full response in 2 to 4 natural sentences.",
|
||||||
|
"In the spoken block, summarize the main outcome, recommendation, or next step. Sound conversational and natural, not like a document summary.",
|
||||||
|
"Do not include code, bullet lists, markdown formatting, or long technical detail in the spoken block.",
|
||||||
|
"Do not add generic phrases about whether the user should read more.",
|
||||||
|
"Only mention additional written detail when there is something specific that may matter for the user's next response, such as a tradeoff, caveat, risk, open question, exact diff, or test result.",
|
||||||
|
"When referring to that written detail, say `below` or `in the message` rather than `detailed section`.",
|
||||||
|
"After the `spoken` block, continue with your normal detailed response.",
|
||||||
|
"Example:",
|
||||||
|
"```spoken\nI implemented the relay-based voice-mode flow and it works with the current plugin bridge. The reconnect caveat is explained below.\n```",
|
||||||
|
].join("\n\n")
|
||||||
|
}
|
||||||
|
|||||||
@@ -240,6 +240,10 @@ export interface SpeechSynthesisResponse {
|
|||||||
mimeType: string
|
mimeType: string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface VoiceModeStateResponse {
|
||||||
|
enabled: boolean
|
||||||
|
}
|
||||||
|
|
||||||
export type WorkspaceEventType =
|
export type WorkspaceEventType =
|
||||||
| "workspace.created"
|
| "workspace.created"
|
||||||
| "workspace.started"
|
| "workspace.started"
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ import type { AuthManager } from "../auth/manager"
|
|||||||
import { registerAuthRoutes } from "./routes/auth"
|
import { registerAuthRoutes } from "./routes/auth"
|
||||||
import { sendUnauthorized, wantsHtml } from "../auth/http-auth"
|
import { sendUnauthorized, wantsHtml } from "../auth/http-auth"
|
||||||
import type { SpeechService } from "../speech/service"
|
import type { SpeechService } from "../speech/service"
|
||||||
|
import { PluginChannelManager } from "../plugins/channel"
|
||||||
|
|
||||||
interface HttpServerDeps {
|
interface HttpServerDeps {
|
||||||
bindHost: string
|
bindHost: string
|
||||||
@@ -173,6 +174,7 @@ export function createHttpServer(deps: HttpServerDeps) {
|
|||||||
eventBus: deps.eventBus,
|
eventBus: deps.eventBus,
|
||||||
logger: deps.logger.child({ component: "background-processes" }),
|
logger: deps.logger.child({ component: "background-processes" }),
|
||||||
})
|
})
|
||||||
|
const pluginChannel = new PluginChannelManager(deps.logger.child({ component: "plugin-channel" }))
|
||||||
|
|
||||||
registerAuthRoutes(app, { authManager: deps.authManager })
|
registerAuthRoutes(app, { authManager: deps.authManager })
|
||||||
|
|
||||||
@@ -256,7 +258,12 @@ export function createHttpServer(deps: HttpServerDeps) {
|
|||||||
workspaceManager: deps.workspaceManager,
|
workspaceManager: deps.workspaceManager,
|
||||||
})
|
})
|
||||||
registerSpeechRoutes(app, { speechService: deps.speechService })
|
registerSpeechRoutes(app, { speechService: deps.speechService })
|
||||||
registerPluginRoutes(app, { workspaceManager: deps.workspaceManager, eventBus: deps.eventBus, logger: proxyLogger })
|
registerPluginRoutes(app, {
|
||||||
|
workspaceManager: deps.workspaceManager,
|
||||||
|
eventBus: deps.eventBus,
|
||||||
|
logger: proxyLogger,
|
||||||
|
channel: pluginChannel,
|
||||||
|
})
|
||||||
registerBackgroundProcessRoutes(app, { backgroundProcessManager })
|
registerBackgroundProcessRoutes(app, { backgroundProcessManager })
|
||||||
registerInstanceProxyRoutes(app, { workspaceManager: deps.workspaceManager, logger: proxyLogger })
|
registerInstanceProxyRoutes(app, { workspaceManager: deps.workspaceManager, logger: proxyLogger })
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import { FastifyInstance } from "fastify"
|
import { FastifyInstance } from "fastify"
|
||||||
import { z } from "zod"
|
import { z } from "zod"
|
||||||
|
import type { VoiceModeStateResponse } from "../../api-types"
|
||||||
import type { WorkspaceManager } from "../../workspaces/manager"
|
import type { WorkspaceManager } from "../../workspaces/manager"
|
||||||
import type { EventBus } from "../../events/bus"
|
import type { EventBus } from "../../events/bus"
|
||||||
import type { Logger } from "../../logger"
|
import type { Logger } from "../../logger"
|
||||||
@@ -10,6 +11,7 @@ interface RouteDeps {
|
|||||||
workspaceManager: WorkspaceManager
|
workspaceManager: WorkspaceManager
|
||||||
eventBus: EventBus
|
eventBus: EventBus
|
||||||
logger: Logger
|
logger: Logger
|
||||||
|
channel: PluginChannelManager
|
||||||
}
|
}
|
||||||
|
|
||||||
const PluginEventSchema = z.object({
|
const PluginEventSchema = z.object({
|
||||||
@@ -17,9 +19,11 @@ const PluginEventSchema = z.object({
|
|||||||
properties: z.record(z.unknown()).optional(),
|
properties: z.record(z.unknown()).optional(),
|
||||||
})
|
})
|
||||||
|
|
||||||
export function registerPluginRoutes(app: FastifyInstance, deps: RouteDeps) {
|
const VoiceModeStateSchema = z.object({
|
||||||
const channel = new PluginChannelManager(deps.logger.child({ component: "plugin-channel" }))
|
enabled: z.boolean(),
|
||||||
|
})
|
||||||
|
|
||||||
|
export function registerPluginRoutes(app: FastifyInstance, deps: RouteDeps) {
|
||||||
app.get<{ Params: { id: string } }>("/workspaces/:id/plugin/events", (request, reply) => {
|
app.get<{ Params: { id: string } }>("/workspaces/:id/plugin/events", (request, reply) => {
|
||||||
const workspace = deps.workspaceManager.get(request.params.id)
|
const workspace = deps.workspaceManager.get(request.params.id)
|
||||||
if (!workspace) {
|
if (!workspace) {
|
||||||
@@ -33,10 +37,10 @@ export function registerPluginRoutes(app: FastifyInstance, deps: RouteDeps) {
|
|||||||
reply.raw.flushHeaders?.()
|
reply.raw.flushHeaders?.()
|
||||||
reply.hijack()
|
reply.hijack()
|
||||||
|
|
||||||
const registration = channel.register(request.params.id, reply)
|
const registration = deps.channel.register(request.params.id, reply)
|
||||||
|
|
||||||
const heartbeat = setInterval(() => {
|
const heartbeat = setInterval(() => {
|
||||||
channel.send(request.params.id, buildPingEvent())
|
deps.channel.send(request.params.id, buildPingEvent())
|
||||||
}, 15000)
|
}, 15000)
|
||||||
|
|
||||||
const close = () => {
|
const close = () => {
|
||||||
@@ -49,6 +53,24 @@ export function registerPluginRoutes(app: FastifyInstance, deps: RouteDeps) {
|
|||||||
request.raw.on("error", close)
|
request.raw.on("error", close)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
app.post<{ Params: { id: string }; Body: VoiceModeStateResponse }>("/workspaces/:id/plugin/voice-mode", (request, reply) => {
|
||||||
|
const workspace = deps.workspaceManager.get(request.params.id)
|
||||||
|
if (!workspace) {
|
||||||
|
reply.code(404).send({ error: "Workspace not found" })
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const payload = VoiceModeStateSchema.parse(request.body ?? {})
|
||||||
|
deps.channel.send(request.params.id, {
|
||||||
|
type: "codenomad.voiceMode",
|
||||||
|
properties: {
|
||||||
|
enabled: payload.enabled,
|
||||||
|
formatVersion: "v1",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return { enabled: payload.enabled }
|
||||||
|
})
|
||||||
|
|
||||||
const handleWildcard = async (request: any, reply: any) => {
|
const handleWildcard = async (request: any, reply: any) => {
|
||||||
const workspaceId = request.params.id as string
|
const workspaceId = request.params.id as string
|
||||||
const workspace = deps.workspaceManager.get(workspaceId)
|
const workspace = deps.workspaceManager.get(workspaceId)
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import type {
|
|||||||
SpeechSynthesisResponse,
|
SpeechSynthesisResponse,
|
||||||
SpeechTranscriptionResponse,
|
SpeechTranscriptionResponse,
|
||||||
ServerMeta,
|
ServerMeta,
|
||||||
|
VoiceModeStateResponse,
|
||||||
WorkspaceCreateRequest,
|
WorkspaceCreateRequest,
|
||||||
WorkspaceDescriptor,
|
WorkspaceDescriptor,
|
||||||
WorkspaceFileResponse,
|
WorkspaceFileResponse,
|
||||||
@@ -348,6 +349,12 @@ export const serverApi = {
|
|||||||
{ method: "POST" },
|
{ method: "POST" },
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
|
updateVoiceMode(instanceId: string, enabled: boolean): Promise<VoiceModeStateResponse> {
|
||||||
|
return request<VoiceModeStateResponse>(`/workspaces/${encodeURIComponent(instanceId)}/plugin/voice-mode`, {
|
||||||
|
method: "POST",
|
||||||
|
body: JSON.stringify({ enabled }),
|
||||||
|
})
|
||||||
|
},
|
||||||
fetchBackgroundProcessOutput(
|
fetchBackgroundProcessOutput(
|
||||||
instanceId: string,
|
instanceId: string,
|
||||||
processId: string,
|
processId: string,
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ interface PlaybackHandle {
|
|||||||
|
|
||||||
const log = getLogger("actions")
|
const log = getLogger("actions")
|
||||||
const [conversationModeInstances, setConversationModeInstances] = createSignal<Map<string, boolean>>(new Map())
|
const [conversationModeInstances, setConversationModeInstances] = createSignal<Map<string, boolean>>(new Map())
|
||||||
|
const LEADING_SPOKEN_BLOCK_REGEX = /^\s*```spoken[ \t]*\r?\n([\s\S]*?)\r?\n```(?:\r?\n|$)/i
|
||||||
|
|
||||||
const queuedKeys = new Set<string>()
|
const queuedKeys = new Set<string>()
|
||||||
const spokenKeysBySession = new Map<string, Set<string>>()
|
const spokenKeysBySession = new Map<string, Set<string>>()
|
||||||
@@ -107,6 +108,9 @@ export function canUseConversationMode(): boolean {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function setConversationModeEnabled(instanceId: string, enabled: boolean): void {
|
export function setConversationModeEnabled(instanceId: string, enabled: boolean): void {
|
||||||
|
const previous = isConversationModeEnabled(instanceId)
|
||||||
|
if (previous === enabled) return
|
||||||
|
|
||||||
setConversationModeInstances((prev) => {
|
setConversationModeInstances((prev) => {
|
||||||
const next = new Map(prev)
|
const next = new Map(prev)
|
||||||
if (enabled) {
|
if (enabled) {
|
||||||
@@ -120,6 +124,23 @@ export function setConversationModeEnabled(instanceId: string, enabled: boolean)
|
|||||||
if (!enabled) {
|
if (!enabled) {
|
||||||
clearConversationPlaybackForInstance(instanceId)
|
clearConversationPlaybackForInstance(instanceId)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void serverApi.updateVoiceMode(instanceId, enabled).catch((error) => {
|
||||||
|
log.error("Failed to update conversation mode", error)
|
||||||
|
setConversationModeInstances((prev) => {
|
||||||
|
const next = new Map(prev)
|
||||||
|
if (previous) {
|
||||||
|
next.set(instanceId, true)
|
||||||
|
} else {
|
||||||
|
next.delete(instanceId)
|
||||||
|
}
|
||||||
|
return next
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!previous) {
|
||||||
|
clearConversationPlaybackForInstance(instanceId)
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
export function toggleConversationMode(instanceId: string): void {
|
export function toggleConversationMode(instanceId: string): void {
|
||||||
@@ -188,7 +209,7 @@ export function handleConversationAssistantPartUpdated(instanceId: string, part:
|
|||||||
if (!isConversationModeEnabled(instanceId)) return
|
if (!isConversationModeEnabled(instanceId)) return
|
||||||
if (!isSpeakableSession(instanceId, sessionId)) return
|
if (!isSpeakableSession(instanceId, sessionId)) return
|
||||||
|
|
||||||
const text = resolveTextPartContent(part).trim()
|
const text = extractLeadingSpokenBlock(resolveTextPartContent(part))
|
||||||
if (!text) return
|
if (!text) return
|
||||||
|
|
||||||
const key = getEntryKey(instanceId, sessionId, messageId, partId)
|
const key = getEntryKey(instanceId, sessionId, messageId, partId)
|
||||||
@@ -505,3 +526,9 @@ function createObjectUrlFromBase64(audioBase64: string, mimeType: string): strin
|
|||||||
}
|
}
|
||||||
return URL.createObjectURL(new Blob([bytes], { type: mimeType || "audio/mpeg" }))
|
return URL.createObjectURL(new Blob([bytes], { type: mimeType || "audio/mpeg" }))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function extractLeadingSpokenBlock(text: string): string {
|
||||||
|
const match = text.match(LEADING_SPOKEN_BLOCK_REGEX)
|
||||||
|
if (!match?.[1]) return ""
|
||||||
|
return match[1].trim()
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user