soul-yongping/miniprogram/utils/contentParser.js

/**
 * Soul创业派对 - 内容解析工具
 * 解析 TipTap HTML（含 <span data-type="mention">）为阅读页可展示的 segments
 */

/**
 * 判断内容是否为 HTML（含标签）
 */
function isHtmlContent(content) {
  if (!content || typeof content !== 'string') return false
  const trimmed = content.trim()
  return trimmed.includes('<') && trimmed.includes('>') && /<[a-z][^>]*>/i.test(trimmed)
}

/**
 * 从 HTML 中解析出段落与 mention 片段
 * TipTap mention: <span data-type="mention" data-id="..." data-label="...">@nickname</span>
 */
function parseHtmlToSegments(html) {
  const lines = []
  const segments = []

  // 1. 块级元素拆成段落
  let text = html
  text = text.replace(/<\/p>\s*<p[^>]*>/gi, '\n\n')
  text = text.replace(/<p[^>]*>/gi, '')
  text = text.replace(/<\/p>/gi, '\n')
  text = text.replace(/<div[^>]*>/gi, '')
  text = text.replace(/<\/div>/gi, '\n')
  text = text.replace(/<br\s*\/?>/gi, '\n')
  text = text.replace(/<\/?h[1-6][^>]*>/gi, '\n')
  text = text.replace(/<\/?blockquote[^>]*>/gi, '\n')
  text = text.replace(/<\/?ul[^>]*>/gi, '\n')
  text = text.replace(/<\/?ol[^>]*>/gi, '\n')
  text = text.replace(/<li[^>]*>/gi, '• ')
  text = text.replace(/<\/li>/gi, '\n')

  // 2. 逐段解析：提取文本与 mention
  const blocks = text.split(/\n+/)
  for (const block of blocks) {
    const blockSegments = []
    const mentionRe = /<span[^>]*data-type="mention"[^>]*>([^<]*)<\/span>/gi
    let lastEnd = 0
    let m
    while ((m = mentionRe.exec(block)) !== null) {
      const before = block.slice(lastEnd, m.index).replace(/<[^>]+>/g, '').replace(/&nbsp;/g, ' ').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>')
      if (before) blockSegments.push({ type: 'text', text: before })
      const idMatch = m[0].match(/data-id="([^"]*)"/)
      const labelMatch = m[0].match(/data-label="([^"]*)"/)
      const userId = idMatch ? idMatch[1].trim() : ''
      const nickname = labelMatch ? labelMatch[1].trim() : (m[1] || '').replace(/^@/, '').trim()
      blockSegments.push({ type: 'mention', userId, nickname })
      lastEnd = m.index + m[0].length
    }
    const after = block.slice(lastEnd).replace(/<[^>]+>/g, '').replace(/&nbsp;/g, ' ').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>')
    if (after) blockSegments.push({ type: 'text', text: after })
    const lineText = block.replace(/<[^>]+>/g, '').replace(/&nbsp;/g, ' ').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').trim()
    if (lineText) {
      lines.push(lineText)
      segments.push(blockSegments.length ? blockSegments : [{ type: 'text', text: lineText }])
    }
  }
  return { lines, segments }
}

/**
 * 纯文本按行解析（无 mention）
 */
function parsePlainTextToSegments(text) {
  const lines = text.split('\n').map(l => l.trim()).filter(l => l.length > 0)
  const segments = lines.map(line => [{ type: 'text', text: line }])
  return { lines, segments }
}

/**
 * 将原始内容解析为 contentSegments（用于阅读页展示）
 * @param {string} rawContent - 原始内容（TipTap HTML 或纯文本）
 * @returns {{ lines: string[], segments: Array<Array<{type, text?, userId?, nickname?}>> }}
 */
function parseContent(rawContent) {
  if (!rawContent || typeof rawContent !== 'string') {
    return { lines: [], segments: [] }
  }
  if (isHtmlContent(rawContent)) {
    return parseHtmlToSegments(rawContent)
  }
  return parsePlainTextToSegments(rawContent)
}

module.exports = {
  parseContent,
  isHtmlContent
}