]*>/gi, '')
text = text.replace(/<\/div>/gi, '\n')
text = text.replace(/
/gi, '\n')
text = text.replace(/<\/?h[1-6][^>]*>/gi, '\n')
text = text.replace(/<\/?blockquote[^>]*>/gi, '\n')
text = text.replace(/<\/?ul[^>]*>/gi, '\n')
text = text.replace(/<\/?ol[^>]*>/gi, '\n')
text = text.replace(/
]*>/gi, '• ')
text = text.replace(/<\/li>/gi, '\n')
// 2. 逐段解析
const blocks = text.split(/\n+/)
for (const block of blocks) {
if (!block.trim()) continue
let blockSegs = parseBlockToSegments(block)
if (!blockSegs.length) continue
// 纯图片行独立成段
if (blockSegs.length === 1 && blockSegs[0].type === 'image') {
lines.push('')
segments.push(blockSegs)
continue
}
// 对 text 段再跑一遍 @人名 / #标签 自动匹配(处理未用 TipTap 插入而是手打的 @xxx)
if (config && (config.persons?.length || config.linkTags?.length)) {
const expanded = []
for (const seg of blockSegs) {
if (seg.type === 'text' && seg.text) {
const sub = matchLineToSegments(seg.text, config)
expanded.push(...sub)
} else {
expanded.push(seg)
}
}
blockSegs = expanded
}
// 行纯文本用于 lines(previewParagraphs 降级展示)
const lineText = decodeEntities(block.replace(/<[^>]+>/g, '')).trim()
lines.push(lineText)
segments.push(blockSegs)
}
return { lines, segments }
}
/** 清理 Markdown 格式标记(**加粗** *斜体* __加粗__ _斜体_ ~~删除线~~ `代码` 等)*/
function stripMarkdownFormatting(text) {
if (!text) return text
let s = text
s = s.replace(/^#{1,6}\s+/gm, '')
s = s.replace(/\*\*(.+?)\*\*/g, '$1')
s = s.replace(/__(.+?)__/g, '$1')
s = s.replace(/(?\s+/gm, '')
s = s.replace(/^---$/gm, '')
s = s.replace(/^\* /gm, '• ')
s = s.replace(/^- /gm, '• ')
s = s.replace(/^\d+\.\s/gm, '')
return s
}
/**
* 对一行纯文本进行 @人名 / #标签 自动匹配,返回 segments 数组
* config: { persons: [{personId, name, aliases}], linkTags: [{tagId, label, type, pagePath, mpKey, url, aliases}] }
*/
function matchLineToSegments(line, config) {
if (!config || (!config.persons?.length && !config.linkTags?.length)) {
return [{ type: 'text', text: line }]
}
const normalize = s => (s || '').trim().toLowerCase()
const personMap = {}
const tagMap = {}
for (const p of (config.persons || [])) {
const keys = [p.name, ...(p.aliases ? p.aliases.split(',') : [])].map(normalize).filter(Boolean)
for (const k of keys) { if (!personMap[k]) personMap[k] = p }
}
for (const t of (config.linkTags || [])) {
const keys = [t.label, ...(t.aliases ? t.aliases.split(',') : [])].map(normalize).filter(Boolean)
for (const k of keys) { if (!tagMap[k]) tagMap[k] = t }
}
const esc = n => n.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
const personNames = Object.keys(personMap).sort((a, b) => b.length - a.length).map(esc)
const tagLabels = Object.keys(tagMap).sort((a, b) => b.length - a.length).map(esc)
if (!personNames.length && !tagLabels.length) return [{ type: 'text', text: line }]
const parts = []
if (personNames.length) parts.push('[@@](' + personNames.join('|') + ')')
if (tagLabels.length) parts.push('[##](' + tagLabels.join('|') + ')')
const pattern = new RegExp(parts.join('|'), 'gi')
const segs = []
let lastEnd = 0
let m
while ((m = pattern.exec(line)) !== null) {
if (m.index > lastEnd) {
segs.push({ type: 'text', text: line.slice(lastEnd, m.index) })
}
const full = m[0]
const prefix = full[0]
const body = full.slice(1)
if (prefix === '@' || prefix === '@') {
const person = personMap[normalize(body)]
if (person) {
segs.push({ type: 'mention', userId: person.personId || '', nickname: person.name || body })
} else {
segs.push({ type: 'text', text: full })
}
} else {
const tag = tagMap[normalize(body)]
if (tag) {
segs.push({
type: 'linkTag',
label: tag.label || body,
url: tag.url || '',
tagType: tag.type || 'url',
pagePath: tag.pagePath || '',
tagId: tag.tagId || '',
appId: tag.appId || '',
mpKey: tag.mpKey || ''
})
} else {
segs.push({ type: 'text', text: full })
}
}
lastEnd = m.index + full.length
}
if (lastEnd < line.length) {
segs.push({ type: 'text', text: line.slice(lastEnd) })
}
return segs.length ? segs : [{ type: 'text', text: line }]
}
/** 纯文本/Markdown 按行解析 */
function parsePlainTextToSegments(text, config) {
const cleaned = stripMarkdownFormatting(text)
const lines = cleaned.split('\n').map(l => l.trim()).filter(l => l.length > 0)
const segments = lines.map(line => matchLineToSegments(line, config))
return { lines, segments }
}
/** 清理残留的 Markdown 图片引用文本(如 "image.png" ) */
function stripOrphanImageRefs(text) {
if (!text) return text
text = text.replace(/[^\s]*\.(?:png|jpg|jpeg|gif|webp|svg|bmp)!\[[^\]]*\]\([^)]*\)/gi, '')
text = text.replace(/!\[[^\]]*\]\([^)]*\)/g, '')
return text
}
/**
* 将原始内容解析为 contentSegments(用于阅读页展示)
* @param {string} rawContent
* @param {object} [config] - { persons: [], linkTags: [] }
* @returns {{ lines: string[], segments: Array> }}
*/
function parseContent(rawContent, config) {
if (!rawContent || typeof rawContent !== 'string') {
return { lines: [], segments: [] }
}
let content = stripOrphanImageRefs(rawContent)
if (isHtmlContent(content)) {
return parseHtmlToSegments(content, config)
}
return parsePlainTextToSegments(content, config)
}
module.exports = {
parseContent,
isHtmlContent
}