Files
soul-yongping/miniprogram/utils/contentParser.js
Alex-larget db4b4b8b87 Add linked mini program functionality and enhance link tag handling
- Introduced `navigateToMiniProgramAppIdList` in app.json for mini program navigation.
- Updated link tag handling in the read page to support mini program keys and app IDs.
- Enhanced content parsing to include app ID and mini program key in link tags.
- Added linked mini programs management in the admin panel with API endpoints for CRUD operations.
- Improved UI for selecting linked mini programs in the content creation page.
2026-03-12 16:51:12 +08:00

173 lines
6.3 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Soul创业派对 - 内容解析工具
* 解析 TipTap HTML 为阅读页可展示的 segments
*
* segment 类型:
* { type: 'text', text }
* { type: 'mention', userId, nickname } — @某人,点击加好友
* { type: 'linkTag', label, url } — #链接标签,点击跳转
* { type: 'image', src, alt } — 图片
*/
/** 判断内容是否为 HTML */
function isHtmlContent(content) {
if (!content || typeof content !== 'string') return false
const trimmed = content.trim()
return trimmed.includes('<') && trimmed.includes('>') && /<[a-z][^>]*>/i.test(trimmed)
}
/** 解码常见 HTML 实体 */
function decodeEntities(str) {
return str
.replace(/&nbsp;/g, ' ')
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
}
/**
* 将一个 HTML block 字符串解析为 segments 数组
* 处理三种内联元素mention / linkTag(span) / linkTag(a) / img
*/
function parseBlockToSegments(block) {
const segs = []
// 合并匹配所有内联元素
const tokenRe = /<span[^>]*data-type="mention"[^>]*>[\s\S]*?<\/span>|<span[^>]*data-type="linkTag"[^>]*>[\s\S]*?<\/span>|<a[^>]*href="([^"]*)"[^>]*>(#[^<]*)<\/a>|<img[^>]*\/?>/gi
let lastEnd = 0
let m
while ((m = tokenRe.exec(block)) !== null) {
// 前置纯文本
const before = decodeEntities(block.slice(lastEnd, m.index).replace(/<[^>]+>/g, ''))
if (before.trim()) segs.push({ type: 'text', text: before })
const tag = m[0]
if (/data-type="mention"/i.test(tag)) {
// @mention — TipTap mention span
const idMatch = tag.match(/data-id="([^"]*)"/)
const labelMatch = tag.match(/data-label="([^"]*)"/)
const innerText = tag.replace(/<[^>]+>/g, '')
const userId = idMatch ? idMatch[1].trim() : ''
const nickname = labelMatch ? labelMatch[1].trim() : innerText.replace(/^@/, '').trim()
if (userId || nickname) segs.push({ type: 'mention', userId, nickname })
} else if (/data-type="linkTag"/i.test(tag)) {
// #linkTag — 自定义 span 格式data-type="linkTag" data-url="..." data-tag-type="..." data-page-path="..." data-app-id="..."
const urlMatch = tag.match(/data-url="([^"]*)"/)
const tagTypeMatch = tag.match(/data-tag-type="([^"]*)"/)
const pagePathMatch = tag.match(/data-page-path="([^"]*)"/)
const tagIdMatch = tag.match(/data-tag-id="([^"]*)"/)
const appIdMatch = tag.match(/data-app-id="([^"]*)"/)
const mpKeyMatch = tag.match(/data-mp-key="([^"]*)"/)
const innerText = tag.replace(/<[^>]+>/g, '').replace(/^#/, '').trim()
const url = urlMatch ? urlMatch[1] : ''
const tagType = tagTypeMatch ? tagTypeMatch[1] : 'url'
const pagePath = pagePathMatch ? pagePathMatch[1] : ''
const tagId = tagIdMatch ? tagIdMatch[1] : ''
const appId = appIdMatch ? appIdMatch[1] : ''
const mpKey = mpKeyMatch ? mpKeyMatch[1] : (tagType === 'miniprogram' ? appId : '')
segs.push({ type: 'linkTag', label: innerText || '#', url, tagType, pagePath, tagId, appId, mpKey })
} else if (/^<a /i.test(tag)) {
// #linkTag — 旧格式 <a href>insertLinkTag 旧版产生url 可能为空)
// m[1] = href, m[2] = innerText以 # 开头)
const url = m[1] || ''
const label = (m[2] || '').replace(/^#/, '').trim()
// 旧格式没有 tagType在 onLinkTagTap 中会按 label 匹配缓存的 linkTags 配置降级处理
segs.push({ type: 'linkTag', label: label || '#', url, tagType: '', pagePath: '', tagId: '' })
} else if (/^<img /i.test(tag)) {
// 图片
const srcMatch = tag.match(/src="([^"]*)"/)
const altMatch = tag.match(/alt="([^"]*)"/)
if (srcMatch) {
segs.push({ type: 'image', src: srcMatch[1], alt: altMatch ? altMatch[1] : '' })
}
}
lastEnd = m.index + tag.length
}
// 尾部纯文本
const after = decodeEntities(block.slice(lastEnd).replace(/<[^>]+>/g, ''))
if (after.trim()) segs.push({ type: 'text', text: after })
return segs
}
/**
* 从 HTML 中解析出 lines纯文本行和 segments含富文本片段
*/
function parseHtmlToSegments(html) {
const lines = []
const segments = []
// 1. 块级标签换行,保留内联标签供后续解析
let text = html
text = text.replace(/<\/p>\s*<p[^>]*>/gi, '\n\n')
text = text.replace(/<p[^>]*>/gi, '')
text = text.replace(/<\/p>/gi, '\n')
text = text.replace(/<div[^>]*>/gi, '')
text = text.replace(/<\/div>/gi, '\n')
text = text.replace(/<br\s*\/?>/gi, '\n')
text = text.replace(/<\/?h[1-6][^>]*>/gi, '\n')
text = text.replace(/<\/?blockquote[^>]*>/gi, '\n')
text = text.replace(/<\/?ul[^>]*>/gi, '\n')
text = text.replace(/<\/?ol[^>]*>/gi, '\n')
text = text.replace(/<li[^>]*>/gi, '• ')
text = text.replace(/<\/li>/gi, '\n')
// 2. 逐段解析
const blocks = text.split(/\n+/)
for (const block of blocks) {
if (!block.trim()) continue
const blockSegs = parseBlockToSegments(block)
if (!blockSegs.length) continue
// 纯图片行独立成段
if (blockSegs.length === 1 && blockSegs[0].type === 'image') {
lines.push('')
segments.push(blockSegs)
continue
}
// 行纯文本用于 linespreviewParagraphs 降级展示)
const lineText = decodeEntities(block.replace(/<[^>]+>/g, '')).trim()
lines.push(lineText)
segments.push(blockSegs)
}
return { lines, segments }
}
/** 纯文本按行解析(无 HTML 标签) */
function parsePlainTextToSegments(text) {
const lines = text.split('\n').map(l => l.trim()).filter(l => l.length > 0)
const segments = lines.map(line => [{ type: 'text', text: line }])
return { lines, segments }
}
/**
* 将原始内容解析为 contentSegments用于阅读页展示
* @param {string} rawContent
* @returns {{ lines: string[], segments: Array<Array<segment>> }}
*/
function parseContent(rawContent) {
if (!rawContent || typeof rawContent !== 'string') {
return { lines: [], segments: [] }
}
if (isHtmlContent(rawContent)) {
return parseHtmlToSegments(rawContent)
}
return parsePlainTextToSegments(rawContent)
}
module.exports = {
parseContent,
isHtmlContent
}