]*>/gi, '')
text = text.replace(/<\/div>/gi, '\n')
text = text.replace(/
/gi, '\n')
text = text.replace(/<\/?h[1-6][^>]*>/gi, '\n')
text = text.replace(/<\/?blockquote[^>]*>/gi, '\n')
text = text.replace(/<\/?ul[^>]*>/gi, '\n')
text = text.replace(/<\/?ol[^>]*>/gi, '\n')
text = text.replace(/
]*>/gi, '• ')
text = text.replace(/<\/li>/gi, '\n')
// 2. 逐段解析:提取文本与 mention
const blocks = text.split(/\n+/)
for (const block of blocks) {
const blockSegments = []
const mentionRe = /]*data-type="mention"[^>]*>([^<]*)<\/span>/gi
let lastEnd = 0
let m
while ((m = mentionRe.exec(block)) !== null) {
const before = block.slice(lastEnd, m.index).replace(/<[^>]+>/g, '').replace(/ /g, ' ').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>')
if (before) blockSegments.push({ type: 'text', text: before })
const idMatch = m[0].match(/data-id="([^"]*)"/)
const labelMatch = m[0].match(/data-label="([^"]*)"/)
const userId = idMatch ? idMatch[1].trim() : ''
const nickname = labelMatch ? labelMatch[1].trim() : (m[1] || '').replace(/^@/, '').trim()
blockSegments.push({ type: 'mention', userId, nickname })
lastEnd = m.index + m[0].length
}
const after = block.slice(lastEnd).replace(/<[^>]+>/g, '').replace(/ /g, ' ').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>')
if (after) blockSegments.push({ type: 'text', text: after })
const lineText = block.replace(/<[^>]+>/g, '').replace(/ /g, ' ').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').trim()
if (lineText) {
lines.push(lineText)
segments.push(blockSegments.length ? blockSegments : [{ type: 'text', text: lineText }])
}
}
return { lines, segments }
}
/**
* 纯文本按行解析(无 mention)
*/
function parsePlainTextToSegments(text) {
const lines = text.split('\n').map(l => l.trim()).filter(l => l.length > 0)
const segments = lines.map(line => [{ type: 'text', text: line }])
return { lines, segments }
}
/**
* 将原始内容解析为 contentSegments(用于阅读页展示)
* @param {string} rawContent - 原始内容(TipTap HTML 或纯文本)
* @returns {{ lines: string[], segments: Array> }}
*/
function parseContent(rawContent) {
if (!rawContent || typeof rawContent !== 'string') {
return { lines: [], segments: [] }
}
if (isHtmlContent(rawContent)) {
return parseHtmlToSegments(rawContent)
}
return parsePlainTextToSegments(rawContent)
}
module.exports = {
parseContent,
isHtmlContent
}