]*>/gi, '')
text = text.replace(/<\/div>/gi, '\n')
text = text.replace(/
/gi, '\n')
text = text.replace(/<\/?h[1-6][^>]*>/gi, '\n')
text = text.replace(/<\/?blockquote[^>]*>/gi, '\n')
text = text.replace(/<\/?ul[^>]*>/gi, '\n')
text = text.replace(/<\/?ol[^>]*>/gi, '\n')
text = text.replace(/
]*>/gi, '• ')
text = text.replace(/<\/li>/gi, '\n')
// 2. 逐段解析
const blocks = text.split(/\n+/)
for (const block of blocks) {
if (!block.trim()) continue
const blockSegs = parseBlockToSegments(block)
if (!blockSegs.length) continue
// 纯图片行独立成段
if (blockSegs.length === 1 && blockSegs[0].type === 'image') {
lines.push('')
segments.push(blockSegs)
continue
}
// 行纯文本用于 lines(previewParagraphs 降级展示)
const lineText = decodeEntities(block.replace(/<[^>]+>/g, '')).trim()
lines.push(lineText)
segments.push(blockSegs)
}
return { lines, segments }
}
/** 清理 Markdown 格式标记(**加粗** *斜体* __加粗__ _斜体_ ~~删除线~~ `代码` 等)*/
function stripMarkdownFormatting(text) {
if (!text) return text
let s = text
s = s.replace(/^#{1,6}\s+/gm, '')
s = s.replace(/\*\*(.+?)\*\*/g, '$1')
s = s.replace(/__(.+?)__/g, '$1')
s = s.replace(/(?\s+/gm, '')
s = s.replace(/^---$/gm, '')
s = s.replace(/^\* /gm, '• ')
s = s.replace(/^- /gm, '• ')
s = s.replace(/^\d+\.\s/gm, '')
return s
}
/** 纯文本/Markdown 按行解析 */
function parsePlainTextToSegments(text) {
const cleaned = stripMarkdownFormatting(text)
const lines = cleaned.split('\n').map(l => l.trim()).filter(l => l.length > 0)
const segments = lines.map(line => [{ type: 'text', text: line }])
return { lines, segments }
}
/**
* 将原始内容解析为 contentSegments(用于阅读页展示)
* @param {string} rawContent
* @returns {{ lines: string[], segments: Array> }}
*/
function parseContent(rawContent) {
if (!rawContent || typeof rawContent !== 'string') {
return { lines: [], segments: [] }
}
if (isHtmlContent(rawContent)) {
return parseHtmlToSegments(rawContent)
}
return parsePlainTextToSegments(rawContent)
}
module.exports = {
parseContent,
isHtmlContent
}