Files
soul-yongping/miniprogram/utils/contentParser.js

94 lines
3.4 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Soul创业派对 - 内容解析工具
* 解析 TipTap HTML含 <span data-type="mention">)为阅读页可展示的 segments
*/
/**
* 判断内容是否为 HTML含标签
*/
function isHtmlContent(content) {
if (!content || typeof content !== 'string') return false
const trimmed = content.trim()
return trimmed.includes('<') && trimmed.includes('>') && /<[a-z][^>]*>/i.test(trimmed)
}
/**
* 从 HTML 中解析出段落与 mention 片段
* TipTap mention: <span data-type="mention" data-id="..." data-label="...">@nickname</span>
*/
function parseHtmlToSegments(html) {
const lines = []
const segments = []
// 1. 块级元素拆成段落
let text = html
text = text.replace(/<\/p>\s*<p[^>]*>/gi, '\n\n')
text = text.replace(/<p[^>]*>/gi, '')
text = text.replace(/<\/p>/gi, '\n')
text = text.replace(/<div[^>]*>/gi, '')
text = text.replace(/<\/div>/gi, '\n')
text = text.replace(/<br\s*\/?>/gi, '\n')
text = text.replace(/<\/?h[1-6][^>]*>/gi, '\n')
text = text.replace(/<\/?blockquote[^>]*>/gi, '\n')
text = text.replace(/<\/?ul[^>]*>/gi, '\n')
text = text.replace(/<\/?ol[^>]*>/gi, '\n')
text = text.replace(/<li[^>]*>/gi, '• ')
text = text.replace(/<\/li>/gi, '\n')
// 2. 逐段解析:提取文本与 mention
const blocks = text.split(/\n+/)
for (const block of blocks) {
const blockSegments = []
const mentionRe = /<span[^>]*data-type="mention"[^>]*>([^<]*)<\/span>/gi
let lastEnd = 0
let m
while ((m = mentionRe.exec(block)) !== null) {
const before = block.slice(lastEnd, m.index).replace(/<[^>]+>/g, '').replace(/&nbsp;/g, ' ').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>')
if (before) blockSegments.push({ type: 'text', text: before })
const idMatch = m[0].match(/data-id="([^"]*)"/)
const labelMatch = m[0].match(/data-label="([^"]*)"/)
const userId = idMatch ? idMatch[1].trim() : ''
const nickname = labelMatch ? labelMatch[1].trim() : (m[1] || '').replace(/^@/, '').trim()
blockSegments.push({ type: 'mention', userId, nickname })
lastEnd = m.index + m[0].length
}
const after = block.slice(lastEnd).replace(/<[^>]+>/g, '').replace(/&nbsp;/g, ' ').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>')
if (after) blockSegments.push({ type: 'text', text: after })
const lineText = block.replace(/<[^>]+>/g, '').replace(/&nbsp;/g, ' ').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').trim()
if (lineText) {
lines.push(lineText)
segments.push(blockSegments.length ? blockSegments : [{ type: 'text', text: lineText }])
}
}
return { lines, segments }
}
/**
* 纯文本按行解析(无 mention
*/
function parsePlainTextToSegments(text) {
const lines = text.split('\n').map(l => l.trim()).filter(l => l.length > 0)
const segments = lines.map(line => [{ type: 'text', text: line }])
return { lines, segments }
}
/**
* 将原始内容解析为 contentSegments用于阅读页展示
* @param {string} rawContent - 原始内容TipTap HTML 或纯文本)
* @returns {{ lines: string[], segments: Array<Array<{type, text?, userId?, nickname?}>> }}
*/
function parseContent(rawContent) {
if (!rawContent || typeof rawContent !== 'string') {
return { lines: [], segments: [] }
}
if (isHtmlContent(rawContent)) {
return parseHtmlToSegments(rawContent)
}
return parsePlainTextToSegments(rawContent)
}
module.exports = {
parseContent,
isHtmlContent
}