fix markdown entity decoding for inline code

This commit is contained in:
Shantur Rathore
2025-10-31 11:19:13 +00:00
parent 505a06de05
commit b669074106
2 changed files with 67 additions and 15 deletions

View File

@@ -1,5 +1,5 @@
import { createEffect, createSignal, onMount, onCleanup } from "solid-js" import { createEffect, createSignal, onMount, onCleanup } from "solid-js"
import { renderMarkdown, onLanguagesLoaded, initMarkdown } from "../lib/markdown" import { renderMarkdown, onLanguagesLoaded, initMarkdown, decodeHtmlEntities } from "../lib/markdown"
import type { TextPart } from "../types/message" import type { TextPart } from "../types/message"
interface MarkdownProps { interface MarkdownProps {
@@ -15,7 +15,8 @@ export function Markdown(props: MarkdownProps) {
createEffect(async () => { createEffect(async () => {
const part = props.part const part = props.part
const text = part.text || "" const rawText = typeof part.text === "string" ? part.text : ""
const text = decodeHtmlEntities(rawText)
const dark = Boolean(props.isDark) const dark = Boolean(props.isDark)
const themeKey = dark ? "dark" : "light" const themeKey = dark ? "dark" : "light"
@@ -72,7 +73,8 @@ export function Markdown(props: MarkdownProps) {
// Register listener for language loading completion // Register listener for language loading completion
const cleanupLanguageListener = onLanguagesLoaded(async () => { const cleanupLanguageListener = onLanguagesLoaded(async () => {
const part = props.part const part = props.part
const text = part.text || "" const rawText = typeof part.text === "string" ? part.text : ""
const text = decodeHtmlEntities(rawText)
if (latestRequestedText !== text) { if (latestRequestedText !== text) {
return return

View File

@@ -71,7 +71,8 @@ function resolveLanguage(token: string): { canonical: string | null; raw: string
// Check aliases // Check aliases
for (const [key, lang] of Object.entries(bundledLanguages)) { for (const [key, lang] of Object.entries(bundledLanguages)) {
if (lang.aliases?.includes(normalized)) { const aliases = (lang as { aliases?: string[] }).aliases
if (aliases?.includes(normalized)) {
return { canonical: key, raw: normalized } return { canonical: key, raw: normalized }
} }
} }
@@ -114,7 +115,7 @@ async function ensureLanguages(content: string) {
languageLoadQueue.push(async () => { languageLoadQueue.push(async () => {
try { try {
const h = await getOrCreateHighlighter() const h = await getOrCreateHighlighter()
await h.loadLanguage(langKey) await h.loadLanguage(langKey as never)
loadedLanguages.add(langKey) loadedLanguages.add(langKey)
triggerLanguageListeners() triggerLanguageListeners()
} catch { } catch {
@@ -131,6 +132,52 @@ async function ensureLanguages(content: string) {
} }
} }
export function decodeHtmlEntities(content: string): string {
if (!content.includes("&")) {
return content
}
const entityPattern = /&(#x?[0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]+);/g
const namedEntities: Record<string, string> = {
amp: "&",
lt: "<",
gt: ">",
quot: '"',
apos: "'",
nbsp: " ",
}
let result = content
let previous = ""
while (result.includes("&") && result !== previous) {
previous = result
result = result.replace(entityPattern, (match, entity) => {
if (!entity) {
return match
}
if (entity[0] === "#") {
const isHex = entity[1]?.toLowerCase() === "x"
const value = isHex ? parseInt(entity.slice(2), 16) : parseInt(entity.slice(1), 10)
if (!Number.isNaN(value)) {
try {
return String.fromCodePoint(value)
} catch {
return match
}
}
return match
}
const decoded = namedEntities[entity.toLowerCase()]
return decoded !== undefined ? decoded : match
})
}
return result
}
async function runLanguageLoadQueue() { async function runLanguageLoadQueue() {
if (isQueueRunning || languageLoadQueue.length === 0) { if (isQueueRunning || languageLoadQueue.length === 0) {
return return
@@ -161,7 +208,8 @@ function setupRenderer(isDark: boolean) {
const renderer = new marked.Renderer() const renderer = new marked.Renderer()
renderer.code = (code: string, lang: string | undefined) => { renderer.code = (code: string, lang: string | undefined) => {
const encodedCode = encodeURIComponent(code) const decodedCode = decodeHtmlEntities(code)
const encodedCode = encodeURIComponent(decodedCode)
// Use "text" as default when no language is specified // Use "text" as default when no language is specified
const resolvedLang = lang && lang.trim() ? lang.trim() : "text" const resolvedLang = lang && lang.trim() ? lang.trim() : "text"
@@ -182,7 +230,7 @@ function setupRenderer(isDark: boolean) {
// Skip highlighting for "text" language or when highlighter is not available // Skip highlighting for "text" language or when highlighter is not available
if (resolvedLang === "text" || !highlighter) { if (resolvedLang === "text" || !highlighter) {
return `<div class="markdown-code-block" data-language="${escapedLang}" data-code="${encodedCode}">${header}<pre><code>${escapeHtml(code)}</code></pre></div>` return `<div class="markdown-code-block" data-language="${escapedLang}" data-code="${encodedCode}">${header}<pre><code>${escapeHtml(decodedCode)}</code></pre></div>`
} }
// Resolve language and check if it's loaded // Resolve language and check if it's loaded
@@ -191,13 +239,13 @@ function setupRenderer(isDark: boolean) {
// Skip highlighting for "text" aliases // Skip highlighting for "text" aliases
if (langKey === "text" || raw === "text") { if (langKey === "text" || raw === "text") {
return `<div class="markdown-code-block" data-language="${escapedLang}" data-code="${encodedCode}">${header}<pre><code>${escapeHtml(code)}</code></pre></div>` return `<div class="markdown-code-block" data-language="${escapedLang}" data-code="${encodedCode}">${header}<pre><code>${escapeHtml(decodedCode)}</code></pre></div>`
} }
// Use highlighting if language is loaded, otherwise fall back to plain code // Use highlighting if language is loaded, otherwise fall back to plain code
if (loadedLanguages.has(langKey)) { if (loadedLanguages.has(langKey)) {
try { try {
const html = highlighter.codeToHtml(code, { const html = highlighter.codeToHtml(decodedCode, {
lang: langKey, lang: langKey,
theme: currentTheme === "dark" ? "github-dark" : "github-light", theme: currentTheme === "dark" ? "github-dark" : "github-light",
}) })
@@ -207,7 +255,7 @@ function setupRenderer(isDark: boolean) {
} }
} }
return `<div class="markdown-code-block" data-language="${escapedLang}" data-code="${encodedCode}">${header}<pre><code class="language-${escapedLang}">${escapeHtml(code)}</code></pre></div>` return `<div class="markdown-code-block" data-language="${escapedLang}" data-code="${encodedCode}">${header}<pre><code class="language-${escapedLang}">${escapeHtml(decodedCode)}</code></pre></div>`
} }
renderer.link = (href: string, title: string | null | undefined, text: string) => { renderer.link = (href: string, title: string | null | undefined, text: string) => {
@@ -216,7 +264,8 @@ function setupRenderer(isDark: boolean) {
} }
renderer.codespan = (code: string) => { renderer.codespan = (code: string) => {
return `<code class="inline-code">${escapeHtml(code)}</code>` const decoded = decodeHtmlEntities(code)
return `<code class="inline-code">${escapeHtml(decoded)}</code>`
} }
marked.use({ renderer }) marked.use({ renderer })
@@ -237,11 +286,13 @@ export async function renderMarkdown(content: string): Promise<string> {
await initMarkdown(currentTheme === "dark") await initMarkdown(currentTheme === "dark")
} }
const decoded = decodeHtmlEntities(content)
// Queue language loading but don't wait for it to complete // Queue language loading but don't wait for it to complete
await ensureLanguages(content) await ensureLanguages(decoded)
// Proceed to parse immediately - highlighting will be available on next render // Proceed to parse immediately - highlighting will be available on next render
return marked.parse(content) as Promise<string> return marked.parse(decoded) as Promise<string>
} }
export async function getSharedHighlighter(): Promise<Highlighter> { export async function getSharedHighlighter(): Promise<Highlighter> {
@@ -252,9 +303,8 @@ export function escapeHtml(text: string): string {
const map: Record<string, string> = { const map: Record<string, string> = {
"&": "&amp;", "&": "&amp;",
"<": "&lt;", "<": "&lt;",
">": "&gt;",
'"': "&quot;", '"': "&quot;",
"'": "&#039;", "'": "&#039;",
} }
return text.replace(/[&<>"']/g, (m) => map[m]) return text.replace(/[&<"']/g, (m) => map[m])
} }