94 lines
3.4 KiB
JavaScript
94 lines
3.4 KiB
JavaScript
/**
|
||
* Soul创业派对 - 内容解析工具
|
||
* 解析 TipTap HTML(含 <span data-type="mention">)为阅读页可展示的 segments
|
||
*/
|
||
|
||
/**
|
||
* 判断内容是否为 HTML(含标签)
|
||
*/
|
||
function isHtmlContent(content) {
|
||
if (!content || typeof content !== 'string') return false
|
||
const trimmed = content.trim()
|
||
return trimmed.includes('<') && trimmed.includes('>') && /<[a-z][^>]*>/i.test(trimmed)
|
||
}
|
||
|
||
/**
|
||
* 从 HTML 中解析出段落与 mention 片段
|
||
* TipTap mention: <span data-type="mention" data-id="..." data-label="...">@nickname</span>
|
||
*/
|
||
function parseHtmlToSegments(html) {
|
||
const lines = []
|
||
const segments = []
|
||
|
||
// 1. 块级元素拆成段落
|
||
let text = html
|
||
text = text.replace(/<\/p>\s*<p[^>]*>/gi, '\n\n')
|
||
text = text.replace(/<p[^>]*>/gi, '')
|
||
text = text.replace(/<\/p>/gi, '\n')
|
||
text = text.replace(/<div[^>]*>/gi, '')
|
||
text = text.replace(/<\/div>/gi, '\n')
|
||
text = text.replace(/<br\s*\/?>/gi, '\n')
|
||
text = text.replace(/<\/?h[1-6][^>]*>/gi, '\n')
|
||
text = text.replace(/<\/?blockquote[^>]*>/gi, '\n')
|
||
text = text.replace(/<\/?ul[^>]*>/gi, '\n')
|
||
text = text.replace(/<\/?ol[^>]*>/gi, '\n')
|
||
text = text.replace(/<li[^>]*>/gi, '• ')
|
||
text = text.replace(/<\/li>/gi, '\n')
|
||
|
||
// 2. 逐段解析:提取文本与 mention
|
||
const blocks = text.split(/\n+/)
|
||
for (const block of blocks) {
|
||
const blockSegments = []
|
||
const mentionRe = /<span[^>]*data-type="mention"[^>]*>([^<]*)<\/span>/gi
|
||
let lastEnd = 0
|
||
let m
|
||
while ((m = mentionRe.exec(block)) !== null) {
|
||
const before = block.slice(lastEnd, m.index).replace(/<[^>]+>/g, '').replace(/ /g, ' ').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>')
|
||
if (before) blockSegments.push({ type: 'text', text: before })
|
||
const idMatch = m[0].match(/data-id="([^"]*)"/)
|
||
const labelMatch = m[0].match(/data-label="([^"]*)"/)
|
||
const userId = idMatch ? idMatch[1].trim() : ''
|
||
const nickname = labelMatch ? labelMatch[1].trim() : (m[1] || '').replace(/^@/, '').trim()
|
||
blockSegments.push({ type: 'mention', userId, nickname })
|
||
lastEnd = m.index + m[0].length
|
||
}
|
||
const after = block.slice(lastEnd).replace(/<[^>]+>/g, '').replace(/ /g, ' ').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>')
|
||
if (after) blockSegments.push({ type: 'text', text: after })
|
||
const lineText = block.replace(/<[^>]+>/g, '').replace(/ /g, ' ').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').trim()
|
||
if (lineText) {
|
||
lines.push(lineText)
|
||
segments.push(blockSegments.length ? blockSegments : [{ type: 'text', text: lineText }])
|
||
}
|
||
}
|
||
return { lines, segments }
|
||
}
|
||
|
||
/**
|
||
* 纯文本按行解析(无 mention)
|
||
*/
|
||
function parsePlainTextToSegments(text) {
|
||
const lines = text.split('\n').map(l => l.trim()).filter(l => l.length > 0)
|
||
const segments = lines.map(line => [{ type: 'text', text: line }])
|
||
return { lines, segments }
|
||
}
|
||
|
||
/**
|
||
* 将原始内容解析为 contentSegments(用于阅读页展示)
|
||
* @param {string} rawContent - 原始内容(TipTap HTML 或纯文本)
|
||
* @returns {{ lines: string[], segments: Array<Array<{type, text?, userId?, nickname?}>> }}
|
||
*/
|
||
function parseContent(rawContent) {
|
||
if (!rawContent || typeof rawContent !== 'string') {
|
||
return { lines: [], segments: [] }
|
||
}
|
||
if (isHtmlContent(rawContent)) {
|
||
return parseHtmlToSegments(rawContent)
|
||
}
|
||
return parsePlainTextToSegments(rawContent)
|
||
}
|
||
|
||
module.exports = {
|
||
parseContent,
|
||
isHtmlContent
|
||
}
|