94 lines
3.4 KiB
JavaScript
94 lines
3.4 KiB
JavaScript
|
|
/**
|
|||
|
|
* Soul创业派对 - 内容解析工具
|
|||
|
|
* 解析 TipTap HTML(含 <span data-type="mention">)为阅读页可展示的 segments
|
|||
|
|
*/
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 判断内容是否为 HTML(含标签)
|
|||
|
|
*/
|
|||
|
|
function isHtmlContent(content) {
|
|||
|
|
if (!content || typeof content !== 'string') return false
|
|||
|
|
const trimmed = content.trim()
|
|||
|
|
return trimmed.includes('<') && trimmed.includes('>') && /<[a-z][^>]*>/i.test(trimmed)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 从 HTML 中解析出段落与 mention 片段
|
|||
|
|
* TipTap mention: <span data-type="mention" data-id="..." data-label="...">@nickname</span>
|
|||
|
|
*/
|
|||
|
|
function parseHtmlToSegments(html) {
|
|||
|
|
const lines = []
|
|||
|
|
const segments = []
|
|||
|
|
|
|||
|
|
// 1. 块级元素拆成段落
|
|||
|
|
let text = html
|
|||
|
|
text = text.replace(/<\/p>\s*<p[^>]*>/gi, '\n\n')
|
|||
|
|
text = text.replace(/<p[^>]*>/gi, '')
|
|||
|
|
text = text.replace(/<\/p>/gi, '\n')
|
|||
|
|
text = text.replace(/<div[^>]*>/gi, '')
|
|||
|
|
text = text.replace(/<\/div>/gi, '\n')
|
|||
|
|
text = text.replace(/<br\s*\/?>/gi, '\n')
|
|||
|
|
text = text.replace(/<\/?h[1-6][^>]*>/gi, '\n')
|
|||
|
|
text = text.replace(/<\/?blockquote[^>]*>/gi, '\n')
|
|||
|
|
text = text.replace(/<\/?ul[^>]*>/gi, '\n')
|
|||
|
|
text = text.replace(/<\/?ol[^>]*>/gi, '\n')
|
|||
|
|
text = text.replace(/<li[^>]*>/gi, '• ')
|
|||
|
|
text = text.replace(/<\/li>/gi, '\n')
|
|||
|
|
|
|||
|
|
// 2. 逐段解析:提取文本与 mention
|
|||
|
|
const blocks = text.split(/\n+/)
|
|||
|
|
for (const block of blocks) {
|
|||
|
|
const blockSegments = []
|
|||
|
|
const mentionRe = /<span[^>]*data-type="mention"[^>]*>([^<]*)<\/span>/gi
|
|||
|
|
let lastEnd = 0
|
|||
|
|
let m
|
|||
|
|
while ((m = mentionRe.exec(block)) !== null) {
|
|||
|
|
const before = block.slice(lastEnd, m.index).replace(/<[^>]+>/g, '').replace(/ /g, ' ').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>')
|
|||
|
|
if (before) blockSegments.push({ type: 'text', text: before })
|
|||
|
|
const idMatch = m[0].match(/data-id="([^"]*)"/)
|
|||
|
|
const labelMatch = m[0].match(/data-label="([^"]*)"/)
|
|||
|
|
const userId = idMatch ? idMatch[1].trim() : ''
|
|||
|
|
const nickname = labelMatch ? labelMatch[1].trim() : (m[1] || '').replace(/^@/, '').trim()
|
|||
|
|
blockSegments.push({ type: 'mention', userId, nickname })
|
|||
|
|
lastEnd = m.index + m[0].length
|
|||
|
|
}
|
|||
|
|
const after = block.slice(lastEnd).replace(/<[^>]+>/g, '').replace(/ /g, ' ').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>')
|
|||
|
|
if (after) blockSegments.push({ type: 'text', text: after })
|
|||
|
|
const lineText = block.replace(/<[^>]+>/g, '').replace(/ /g, ' ').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').trim()
|
|||
|
|
if (lineText) {
|
|||
|
|
lines.push(lineText)
|
|||
|
|
segments.push(blockSegments.length ? blockSegments : [{ type: 'text', text: lineText }])
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return { lines, segments }
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 纯文本按行解析(无 mention)
|
|||
|
|
*/
|
|||
|
|
function parsePlainTextToSegments(text) {
|
|||
|
|
const lines = text.split('\n').map(l => l.trim()).filter(l => l.length > 0)
|
|||
|
|
const segments = lines.map(line => [{ type: 'text', text: line }])
|
|||
|
|
return { lines, segments }
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 将原始内容解析为 contentSegments(用于阅读页展示)
|
|||
|
|
* @param {string} rawContent - 原始内容(TipTap HTML 或纯文本)
|
|||
|
|
* @returns {{ lines: string[], segments: Array<Array<{type, text?, userId?, nickname?}>> }}
|
|||
|
|
*/
|
|||
|
|
function parseContent(rawContent) {
|
|||
|
|
if (!rawContent || typeof rawContent !== 'string') {
|
|||
|
|
return { lines: [], segments: [] }
|
|||
|
|
}
|
|||
|
|
if (isHtmlContent(rawContent)) {
|
|||
|
|
return parseHtmlToSegments(rawContent)
|
|||
|
|
}
|
|||
|
|
return parsePlainTextToSegments(rawContent)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
module.exports = {
|
|||
|
|
parseContent,
|
|||
|
|
isHtmlContent
|
|||
|
|
}
|