261 lines
7.6 KiB
TypeScript
261 lines
7.6 KiB
TypeScript
import { createEffect, createSignal, onCleanup, type Accessor } from "solid-js"
|
|
import { showAlertDialog } from "../../stores/alerts"
|
|
import { loadSpeechCapabilities, speechCapabilities } from "../../stores/speech"
|
|
import { serverApi } from "../../lib/api-client"
|
|
import { useI18n } from "../../lib/i18n"
|
|
import { isElectronHost } from "../../lib/runtime-env"
|
|
|
|
interface UsePromptVoiceInputOptions {
|
|
prompt: Accessor<string>
|
|
setPrompt: (value: string) => void
|
|
getTextarea: () => HTMLTextAreaElement | null
|
|
enabled: Accessor<boolean>
|
|
disabled: Accessor<boolean>
|
|
}
|
|
|
|
type VoiceInputState = "idle" | "recording" | "transcribing"
|
|
|
|
export function usePromptVoiceInput(options: UsePromptVoiceInputOptions) {
|
|
const { t } = useI18n()
|
|
const [state, setState] = createSignal<VoiceInputState>("idle")
|
|
const [elapsedMs, setElapsedMs] = createSignal(0)
|
|
|
|
let mediaRecorder: MediaRecorder | null = null
|
|
let mediaStream: MediaStream | null = null
|
|
let timerId: number | undefined
|
|
let shouldTranscribe = true
|
|
let recordedChunks: Blob[] = []
|
|
let recordingStartedAt = 0
|
|
|
|
createEffect(() => {
|
|
void loadSpeechCapabilities()
|
|
})
|
|
|
|
onCleanup(() => {
|
|
cleanupMedia(false)
|
|
})
|
|
|
|
const isSupported = () => {
|
|
if (typeof window === "undefined") return false
|
|
return typeof window.MediaRecorder !== "undefined" && Boolean(navigator.mediaDevices?.getUserMedia)
|
|
}
|
|
|
|
const canUseVoiceInput = () => {
|
|
const capabilities = speechCapabilities()
|
|
return Boolean(
|
|
options.enabled() &&
|
|
isSupported() &&
|
|
capabilities?.available &&
|
|
capabilities?.configured &&
|
|
capabilities?.supportsStt,
|
|
)
|
|
}
|
|
|
|
async function toggleRecording(): Promise<void> {
|
|
if (state() === "recording") {
|
|
stopRecording()
|
|
return
|
|
}
|
|
|
|
await startRecording()
|
|
}
|
|
|
|
function stopRecording() {
|
|
if (!mediaRecorder || state() !== "recording") return
|
|
shouldTranscribe = true
|
|
mediaRecorder.stop()
|
|
setState("transcribing")
|
|
stopTimer()
|
|
}
|
|
|
|
function cancelRecording() {
|
|
if (!mediaRecorder || state() !== "recording") return
|
|
shouldTranscribe = false
|
|
mediaRecorder.stop()
|
|
cleanupMedia(false)
|
|
}
|
|
|
|
async function startRecording() {
|
|
if (!canUseVoiceInput() || options.disabled() || state() === "transcribing" || state() === "recording") return
|
|
|
|
if (!isSupported()) {
|
|
showAlertDialog(t("promptInput.voiceInput.error.unsupported"), {
|
|
title: t("promptInput.voiceInput.error.title"),
|
|
variant: "error",
|
|
})
|
|
return
|
|
}
|
|
|
|
try {
|
|
recordedChunks = []
|
|
shouldTranscribe = true
|
|
|
|
if (isElectronHost()) {
|
|
const granted = await (window as Window & { electronAPI?: ElectronAPI }).electronAPI?.requestMicrophoneAccess?.()
|
|
if (granted && !granted.granted) {
|
|
throw new Error(t("promptInput.voiceInput.error.permissionDenied"))
|
|
}
|
|
}
|
|
|
|
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true })
|
|
mediaRecorder = createRecorder(mediaStream)
|
|
|
|
mediaRecorder.addEventListener("dataavailable", (event) => {
|
|
if (event.data.size > 0) {
|
|
recordedChunks.push(event.data)
|
|
}
|
|
})
|
|
|
|
mediaRecorder.addEventListener("stop", () => {
|
|
void finalizeRecording()
|
|
})
|
|
|
|
recordingStartedAt = Date.now()
|
|
setElapsedMs(0)
|
|
setState("recording")
|
|
startTimer()
|
|
mediaRecorder.start()
|
|
} catch (error) {
|
|
cleanupMedia(false)
|
|
showAlertDialog(t("promptInput.voiceInput.error.permission"), {
|
|
title: t("promptInput.voiceInput.error.title"),
|
|
detail: error instanceof Error ? error.message : String(error),
|
|
variant: "error",
|
|
})
|
|
}
|
|
}
|
|
|
|
async function finalizeRecording() {
|
|
const recorder = mediaRecorder
|
|
const stream = mediaStream
|
|
mediaRecorder = null
|
|
mediaStream = null
|
|
|
|
if (!shouldTranscribe || recordedChunks.length === 0) {
|
|
recordedChunks = []
|
|
stopTracks(stream)
|
|
setState("idle")
|
|
setElapsedMs(0)
|
|
return
|
|
}
|
|
|
|
const mimeType = recorder?.mimeType || recordedChunks[0]?.type || "audio/webm"
|
|
|
|
try {
|
|
const audioBlob = new Blob(recordedChunks, { type: mimeType })
|
|
const transcription = await serverApi.transcribeAudio({
|
|
audioBase64: await blobToBase64(audioBlob),
|
|
mimeType,
|
|
})
|
|
if (transcription.text.trim()) {
|
|
insertTranscript(transcription.text.trim())
|
|
}
|
|
} catch (error) {
|
|
showAlertDialog(t("promptInput.voiceInput.error.transcribe"), {
|
|
title: t("promptInput.voiceInput.error.title"),
|
|
detail: error instanceof Error ? error.message : String(error),
|
|
variant: "error",
|
|
})
|
|
} finally {
|
|
recordedChunks = []
|
|
stopTracks(stream)
|
|
setState("idle")
|
|
setElapsedMs(0)
|
|
}
|
|
}
|
|
|
|
function insertTranscript(text: string) {
|
|
const current = options.prompt()
|
|
const textarea = options.getTextarea()
|
|
const start = textarea ? textarea.selectionStart : current.length
|
|
const end = textarea ? textarea.selectionEnd : current.length
|
|
const wasCursorAtEnd = end === current.length
|
|
const wasScrolledToBottom = textarea
|
|
? textarea.scrollHeight - (textarea.scrollTop + textarea.clientHeight) <= 4
|
|
: false
|
|
const before = current.slice(0, start)
|
|
const after = current.slice(end)
|
|
const prefix = ""
|
|
const suffix = after.length > 0 && !after.startsWith("\n") ? "\n" : ""
|
|
const nextValue = `${before}${prefix}${text}${suffix}${after}`
|
|
const cursor = before.length + prefix.length + text.length + suffix.length
|
|
|
|
options.setPrompt(nextValue)
|
|
if (textarea) {
|
|
setTimeout(() => {
|
|
textarea.focus()
|
|
textarea.setSelectionRange(cursor, cursor)
|
|
if (wasCursorAtEnd || wasScrolledToBottom) {
|
|
textarea.scrollTop = textarea.scrollHeight
|
|
}
|
|
}, 0)
|
|
}
|
|
}
|
|
|
|
function cleanupMedia(resetState = true) {
|
|
stopTimer()
|
|
if (mediaRecorder && mediaRecorder.state !== "inactive") {
|
|
mediaRecorder.stop()
|
|
}
|
|
mediaRecorder = null
|
|
stopTracks(mediaStream)
|
|
mediaStream = null
|
|
recordedChunks = []
|
|
if (resetState) {
|
|
setState("idle")
|
|
setElapsedMs(0)
|
|
}
|
|
}
|
|
|
|
function startTimer() {
|
|
stopTimer()
|
|
timerId = window.setInterval(() => {
|
|
setElapsedMs(Date.now() - recordingStartedAt)
|
|
}, 250)
|
|
}
|
|
|
|
function stopTimer() {
|
|
if (timerId !== undefined) {
|
|
window.clearInterval(timerId)
|
|
timerId = undefined
|
|
}
|
|
}
|
|
|
|
return {
|
|
state,
|
|
elapsedMs,
|
|
canUseVoiceInput,
|
|
startRecording,
|
|
stopRecording,
|
|
toggleRecording,
|
|
cancelRecording,
|
|
isRecording: () => state() === "recording",
|
|
isTranscribing: () => state() === "transcribing",
|
|
buttonTitle: () => {
|
|
if (state() === "recording") return t("promptInput.voiceInput.stop.title")
|
|
if (state() === "transcribing") return t("promptInput.voiceInput.transcribing.title")
|
|
return t("promptInput.voiceInput.start.title")
|
|
},
|
|
}
|
|
}
|
|
|
|
function createRecorder(stream: MediaStream): MediaRecorder {
|
|
const candidates = ["audio/webm;codecs=opus", "audio/webm", "audio/mp4", "audio/ogg;codecs=opus"]
|
|
const supported = candidates.find((candidate) => typeof MediaRecorder.isTypeSupported !== "function" || MediaRecorder.isTypeSupported(candidate))
|
|
return supported ? new MediaRecorder(stream, { mimeType: supported }) : new MediaRecorder(stream)
|
|
}
|
|
|
|
function stopTracks(stream: MediaStream | null) {
|
|
stream?.getTracks().forEach((track) => track.stop())
|
|
}
|
|
|
|
async function blobToBase64(blob: Blob): Promise<string> {
|
|
const buffer = await blob.arrayBuffer()
|
|
const bytes = new Uint8Array(buffer)
|
|
let binary = ""
|
|
for (const byte of bytes) {
|
|
binary += String.fromCharCode(byte)
|
|
}
|
|
return btoa(binary)
|
|
}
|