Files
soul-yongping/miniprogram/utils/contentParser.js

94 lines
3.4 KiB
JavaScript
Raw Normal View History

/**
* Soul创业派对 - 内容解析工具
* 解析 TipTap HTML <span data-type="mention">为阅读页可展示的 segments
*/
/**
* 判断内容是否为 HTML含标签
*/
function isHtmlContent(content) {
if (!content || typeof content !== 'string') return false
const trimmed = content.trim()
return trimmed.includes('<') && trimmed.includes('>') && /<[a-z][^>]*>/i.test(trimmed)
}
/**
* HTML 中解析出段落与 mention 片段
* TipTap mention: <span data-type="mention" data-id="..." data-label="...">@nickname</span>
*/
function parseHtmlToSegments(html) {
const lines = []
const segments = []
// 1. 块级元素拆成段落
let text = html
text = text.replace(/<\/p>\s*<p[^>]*>/gi, '\n\n')
text = text.replace(/<p[^>]*>/gi, '')
text = text.replace(/<\/p>/gi, '\n')
text = text.replace(/<div[^>]*>/gi, '')
text = text.replace(/<\/div>/gi, '\n')
text = text.replace(/<br\s*\/?>/gi, '\n')
text = text.replace(/<\/?h[1-6][^>]*>/gi, '\n')
text = text.replace(/<\/?blockquote[^>]*>/gi, '\n')
text = text.replace(/<\/?ul[^>]*>/gi, '\n')
text = text.replace(/<\/?ol[^>]*>/gi, '\n')
text = text.replace(/<li[^>]*>/gi, '• ')
text = text.replace(/<\/li>/gi, '\n')
// 2. 逐段解析:提取文本与 mention
const blocks = text.split(/\n+/)
for (const block of blocks) {
const blockSegments = []
const mentionRe = /<span[^>]*data-type="mention"[^>]*>([^<]*)<\/span>/gi
let lastEnd = 0
let m
while ((m = mentionRe.exec(block)) !== null) {
const before = block.slice(lastEnd, m.index).replace(/<[^>]+>/g, '').replace(/&nbsp;/g, ' ').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>')
if (before) blockSegments.push({ type: 'text', text: before })
const idMatch = m[0].match(/data-id="([^"]*)"/)
const labelMatch = m[0].match(/data-label="([^"]*)"/)
const userId = idMatch ? idMatch[1].trim() : ''
const nickname = labelMatch ? labelMatch[1].trim() : (m[1] || '').replace(/^@/, '').trim()
blockSegments.push({ type: 'mention', userId, nickname })
lastEnd = m.index + m[0].length
}
const after = block.slice(lastEnd).replace(/<[^>]+>/g, '').replace(/&nbsp;/g, ' ').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>')
if (after) blockSegments.push({ type: 'text', text: after })
const lineText = block.replace(/<[^>]+>/g, '').replace(/&nbsp;/g, ' ').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').trim()
if (lineText) {
lines.push(lineText)
segments.push(blockSegments.length ? blockSegments : [{ type: 'text', text: lineText }])
}
}
return { lines, segments }
}
/**
* 纯文本按行解析 mention
*/
function parsePlainTextToSegments(text) {
const lines = text.split('\n').map(l => l.trim()).filter(l => l.length > 0)
const segments = lines.map(line => [{ type: 'text', text: line }])
return { lines, segments }
}
/**
* 将原始内容解析为 contentSegments用于阅读页展示
* @param {string} rawContent - 原始内容TipTap HTML 或纯文本
* @returns {{ lines: string[], segments: Array<Array<{type, text?, userId?, nickname?}>> }}
*/
function parseContent(rawContent) {
if (!rawContent || typeof rawContent !== 'string') {
return { lines: [], segments: [] }
}
if (isHtmlContent(rawContent)) {
return parseHtmlToSegments(rawContent)
}
return parsePlainTextToSegments(rawContent)
}
module.exports = {
parseContent,
isHtmlContent
}