Files
soul-yongping/miniprogram/utils/contentParser.js
卡若 aca006e1b2 feat: 完成20260315用户管理3全部5个功能
1. 链接人和事:补充CKB_OPEN_API_KEY/ACCOUNT配置,新增fix-ckb批量创建获客计划API
2. 规则配置:打通DB规则与ruleEngine,新增/api/miniprogram/user-rules接口,
   ruleEngine改为从API动态加载规则并按enabled状态执行
3. 获客计划:修复获客数统计中personId/token不匹配导致永远为0的bug,
   管理端新增"修复CKB密钥"按钮
4. 支付问题:修复钱包充值和代付分享中openId缺失导致400错误,
   添加getOpenId()兜底逻辑
5. 朋友圈分享:shareToMoments改为复制文章前200字+省略号+手指箭头emoji

Made-with: Cursor
2026-03-15 23:00:42 +08:00

290 lines
10 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Soul创业派对 - 内容解析工具
* 解析 TipTap HTML 为阅读页可展示的 segments
*
* segment 类型:
* { type: 'text', text }
* { type: 'mention', userId, nickname } — @某人,点击加好友
* { type: 'linkTag', label, url } — #链接标签,点击跳转
* { type: 'image', src, alt } — 图片
*/
/** 判断内容是否为 HTML */
function isHtmlContent(content) {
if (!content || typeof content !== 'string') return false
const trimmed = content.trim()
return trimmed.includes('<') && trimmed.includes('>') && /<[a-z][^>]*>/i.test(trimmed)
}
/** 解码常见 HTML 实体 */
function decodeEntities(str) {
return str
.replace(/&nbsp;/g, ' ')
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
}
/**
* 将一个 HTML block 字符串解析为 segments 数组
* 处理三种内联元素mention / linkTag(span) / linkTag(a) / img
*/
function parseBlockToSegments(block) {
const segs = []
// 合并匹配所有内联元素
const tokenRe = /<span[^>]*data-type="mention"[^>]*>[\s\S]*?<\/span>|<span[^>]*data-type="linkTag"[^>]*>[\s\S]*?<\/span>|<a[^>]*href="([^"]*)"[^>]*>(#[^<]*)<\/a>|<img[^>]*\/?>/gi
let lastEnd = 0
let m
while ((m = tokenRe.exec(block)) !== null) {
// 前置纯文本
const before = decodeEntities(block.slice(lastEnd, m.index).replace(/<[^>]+>/g, ''))
if (before.trim()) segs.push({ type: 'text', text: before })
const tag = m[0]
if (/data-type="mention"/i.test(tag)) {
// @mention — TipTap mention span
const idMatch = tag.match(/data-id="([^"]*)"/)
const labelMatch = tag.match(/data-label="([^"]*)"/)
const innerText = tag.replace(/<[^>]+>/g, '')
const userId = idMatch ? idMatch[1].trim() : ''
const nickname = labelMatch ? labelMatch[1].trim() : innerText.replace(/^@/, '').trim()
if (userId || nickname) segs.push({ type: 'mention', userId, nickname })
} else if (/data-type="linkTag"/i.test(tag)) {
// #linkTag — 自定义 span 格式data-type="linkTag" data-url="..." data-tag-type="..." data-page-path="..." data-app-id="..."
const urlMatch = tag.match(/data-url="([^"]*)"/)
const tagTypeMatch = tag.match(/data-tag-type="([^"]*)"/)
const pagePathMatch = tag.match(/data-page-path="([^"]*)"/)
const tagIdMatch = tag.match(/data-tag-id="([^"]*)"/)
const appIdMatch = tag.match(/data-app-id="([^"]*)"/)
const mpKeyMatch = tag.match(/data-mp-key="([^"]*)"/)
const innerText = tag.replace(/<[^>]+>/g, '').replace(/^#/, '').trim()
const url = urlMatch ? urlMatch[1] : ''
const tagType = tagTypeMatch ? tagTypeMatch[1] : 'url'
const pagePath = pagePathMatch ? pagePathMatch[1] : ''
const tagId = tagIdMatch ? tagIdMatch[1] : ''
const appId = appIdMatch ? appIdMatch[1] : ''
const mpKey = mpKeyMatch ? mpKeyMatch[1] : ''
segs.push({ type: 'linkTag', label: innerText || '#', url, tagType, pagePath, tagId, appId, mpKey })
} else if (/^<a /i.test(tag)) {
// #linkTag — 旧格式 <a href>insertLinkTag 旧版产生url 可能为空)
// m[1] = href, m[2] = innerText以 # 开头)
const url = m[1] || ''
const label = (m[2] || '').replace(/^#/, '').trim()
// 旧格式没有 tagType在 onLinkTagTap 中会按 label 匹配缓存的 linkTags 配置降级处理
segs.push({ type: 'linkTag', label: label || '#', url, tagType: '', pagePath: '', tagId: '' })
} else if (/^<img /i.test(tag)) {
// 图片
const srcMatch = tag.match(/src="([^"]*)"/)
const altMatch = tag.match(/alt="([^"]*)"/)
if (srcMatch) {
segs.push({ type: 'image', src: srcMatch[1], alt: altMatch ? altMatch[1] : '' })
}
}
lastEnd = m.index + tag.length
}
// 尾部纯文本
const after = decodeEntities(block.slice(lastEnd).replace(/<[^>]+>/g, ''))
if (after.trim()) segs.push({ type: 'text', text: after })
return segs
}
/**
* 从 HTML 中解析出 lines纯文本行和 segments含富文本片段
* @param {string} html
* @param {object} [config] - { persons: [], linkTags: [] },用于对 text 段自动匹配 @人名 / #标签
*/
function parseHtmlToSegments(html, config) {
const lines = []
const segments = []
// 1. 块级标签换行,保留内联标签供后续解析
let text = html
text = text.replace(/<\/p>\s*<p[^>]*>/gi, '\n\n')
text = text.replace(/<p[^>]*>/gi, '')
text = text.replace(/<\/p>/gi, '\n')
text = text.replace(/<div[^>]*>/gi, '')
text = text.replace(/<\/div>/gi, '\n')
text = text.replace(/<br\s*\/?>/gi, '\n')
text = text.replace(/<\/?h[1-6][^>]*>/gi, '\n')
text = text.replace(/<\/?blockquote[^>]*>/gi, '\n')
text = text.replace(/<\/?ul[^>]*>/gi, '\n')
text = text.replace(/<\/?ol[^>]*>/gi, '\n')
text = text.replace(/<li[^>]*>/gi, '• ')
text = text.replace(/<\/li>/gi, '\n')
// 2. 逐段解析
const blocks = text.split(/\n+/)
for (const block of blocks) {
if (!block.trim()) continue
let blockSegs = parseBlockToSegments(block)
if (!blockSegs.length) continue
// 纯图片行独立成段
if (blockSegs.length === 1 && blockSegs[0].type === 'image') {
lines.push('')
segments.push(blockSegs)
continue
}
// 对 text 段再跑一遍 @人名 / #标签 自动匹配(处理未用 TipTap 插入而是手打的 @xxx
if (config && (config.persons?.length || config.linkTags?.length)) {
const expanded = []
for (const seg of blockSegs) {
if (seg.type === 'text' && seg.text) {
const sub = matchLineToSegments(seg.text, config)
expanded.push(...sub)
} else {
expanded.push(seg)
}
}
blockSegs = expanded
}
// 行纯文本用于 linespreviewParagraphs 降级展示)
const lineText = decodeEntities(block.replace(/<[^>]+>/g, '')).trim()
lines.push(lineText)
segments.push(blockSegs)
}
return { lines, segments }
}
/** 清理 Markdown 格式标记(**加粗** *斜体* __加粗__ _斜体_ ~~删除线~~ `代码` 等)*/
function stripMarkdownFormatting(text) {
if (!text) return text
let s = text
s = s.replace(/^#{1,6}\s+/gm, '')
s = s.replace(/\*\*(.+?)\*\*/g, '$1')
s = s.replace(/__(.+?)__/g, '$1')
s = s.replace(/(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)/g, '$1')
s = s.replace(/(?<!_)_(?!_)(.+?)(?<!_)_(?!_)/g, '$1')
s = s.replace(/~~(.+?)~~/g, '$1')
s = s.replace(/`([^`]+)`/g, '$1')
s = s.replace(/^>\s+/gm, '')
s = s.replace(/^---$/gm, '')
s = s.replace(/^\* /gm, '• ')
s = s.replace(/^- /gm, '• ')
s = s.replace(/^\d+\.\s/gm, '')
return s
}
/**
* 对一行纯文本进行 @人名 / #标签 自动匹配,返回 segments 数组
* config: { persons: [{personId, name, aliases}], linkTags: [{tagId, label, type, pagePath, mpKey, url, aliases}] }
*/
function matchLineToSegments(line, config) {
if (!config || (!config.persons?.length && !config.linkTags?.length)) {
return [{ type: 'text', text: line }]
}
const normalize = s => (s || '').trim().toLowerCase()
const personMap = {}
const tagMap = {}
for (const p of (config.persons || [])) {
const keys = [p.name, ...(p.aliases ? p.aliases.split(',') : [])].map(normalize).filter(Boolean)
for (const k of keys) { if (!personMap[k]) personMap[k] = p }
}
for (const t of (config.linkTags || [])) {
const keys = [t.label, ...(t.aliases ? t.aliases.split(',') : [])].map(normalize).filter(Boolean)
for (const k of keys) { if (!tagMap[k]) tagMap[k] = t }
}
const esc = n => n.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
const personNames = Object.keys(personMap).sort((a, b) => b.length - a.length).map(esc)
const tagLabels = Object.keys(tagMap).sort((a, b) => b.length - a.length).map(esc)
if (!personNames.length && !tagLabels.length) return [{ type: 'text', text: line }]
const parts = []
if (personNames.length) parts.push('[@](' + personNames.join('|') + ')')
if (tagLabels.length) parts.push('[#](' + tagLabels.join('|') + ')')
const pattern = new RegExp(parts.join('|'), 'gi')
const segs = []
let lastEnd = 0
let m
while ((m = pattern.exec(line)) !== null) {
if (m.index > lastEnd) {
segs.push({ type: 'text', text: line.slice(lastEnd, m.index) })
}
const full = m[0]
const prefix = full[0]
const body = full.slice(1)
if (prefix === '@' || prefix === '') {
const person = personMap[normalize(body)]
if (person) {
segs.push({ type: 'mention', userId: person.personId || '', nickname: person.name || body })
} else {
segs.push({ type: 'text', text: full })
}
} else {
const tag = tagMap[normalize(body)]
if (tag) {
segs.push({
type: 'linkTag',
label: tag.label || body,
url: tag.url || '',
tagType: tag.type || 'url',
pagePath: tag.pagePath || '',
tagId: tag.tagId || '',
appId: tag.appId || '',
mpKey: tag.mpKey || ''
})
} else {
segs.push({ type: 'text', text: full })
}
}
lastEnd = m.index + full.length
}
if (lastEnd < line.length) {
segs.push({ type: 'text', text: line.slice(lastEnd) })
}
return segs.length ? segs : [{ type: 'text', text: line }]
}
/** 纯文本/Markdown 按行解析 */
function parsePlainTextToSegments(text, config) {
const cleaned = stripMarkdownFormatting(text)
const lines = cleaned.split('\n').map(l => l.trim()).filter(l => l.length > 0)
const segments = lines.map(line => matchLineToSegments(line, config))
return { lines, segments }
}
/** 清理残留的 Markdown 图片引用文本(如 "image.png![](xxx)" */
function stripOrphanImageRefs(text) {
if (!text) return text
text = text.replace(/[^\s]*\.(?:png|jpg|jpeg|gif|webp|svg|bmp)!\[[^\]]*\]\([^)]*\)/gi, '')
text = text.replace(/!\[[^\]]*\]\([^)]*\)/g, '')
return text
}
/**
* 将原始内容解析为 contentSegments用于阅读页展示
* @param {string} rawContent
* @param {object} [config] - { persons: [], linkTags: [] }
* @returns {{ lines: string[], segments: Array<Array<segment>> }}
*/
function parseContent(rawContent, config) {
if (!rawContent || typeof rawContent !== 'string') {
return { lines: [], segments: [] }
}
let content = stripOrphanImageRefs(rawContent)
if (isHtmlContent(content)) {
return parseHtmlToSegments(content, config)
}
return parsePlainTextToSegments(content, config)
}
module.exports = {
parseContent,
isHtmlContent
}