Files
soul-yongping/scripts/sync_chapter_images_from_md.py
卡若 5724fba877 feat: 小程序超级个体/个人资料/CKB获客;VIP列表展示过滤;管理端与API联调
- 超级个体:去掉首位特例;列表仅展示有头像且非微信默认昵称(vip.go)
- 个人资料:居中头像、低调联系方式、点头像优先走存客宝 lead(ckbLeadToken)
- 阅读页分享朋友圈复制与 toast 去重
- soul-api: miniprogram users 带 ckbLeadToken;其它 handler 与路由调整
- 脚本:content_upload、miniprogram 上传辅助等

Made-with: Cursor
2026-03-22 08:34:28 +08:00

191 lines
6.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
从书稿 Markdown 中仅解析「图片」引用并上传到现网,再生成 HTML 写入 chapters.content。
- 只处理:`![](相对/绝对路径.png)` 等常见图片后缀;**不解析、不上传视频/附件**。
- 已是 `http(s)://` 的地址:不重复上传,原样写入 `<img src="...">`。
- 非图片后缀的 `![]()`:当作普通正文一行输出(不尝试上传)。
用法:
cd 一场soul的创业实验-永平
python3 scripts/sync_chapter_images_from_md.py --id 10.22 \\
--md "/path/to/第130场….md"
依赖: pip install pymysql requests
环境变量: SOUL_API_BASE 默认 https://soulapi.quwanzhi.com
"""
from __future__ import annotations
import argparse
import html
import importlib.util
import os
import re
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
IMAGE_EXT = frozenset({".png", ".jpg", ".jpeg", ".gif", ".webp"})
# 整行仅有一张图:![](...)
LINE_IMAGE_ONLY = re.compile(r"^\s*!\[([^\]]*)\]\(([^)]+)\)\s*$")
try:
import pymysql
import requests
except ImportError as e:
print("需要: pip install pymysql requests", e, file=sys.stderr)
sys.exit(1)
def load_db_config() -> dict:
mig = ROOT / "scripts" / "migrate_2026_sections.py"
spec = importlib.util.spec_from_file_location("_mig_db", mig)
mod = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(mod)
cfg = getattr(mod, "DB_CONFIG", None)
if not isinstance(cfg, dict):
sys.exit("migrate_2026_sections.py 中无有效 DB_CONFIG")
return cfg
def resolve_local_path(md_path: Path, ref: str) -> Path | None:
ref = ref.strip()
if not ref or ref.startswith(("http://", "https://")):
return None
p = (md_path.parent / ref).expanduser().resolve()
if p.is_file():
return p
return None
def guess_mime(path: Path) -> str:
ext = path.suffix.lower()
return {
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".gif": "image/gif",
".webp": "image/webp",
}.get(ext, "application/octet-stream")
def upload_image(local: Path, api_base: str) -> str:
url = f"{api_base.rstrip('/')}/api/upload"
mime = guess_mime(local)
with local.open("rb") as f:
files = {"file": (local.name, f, mime)}
data = {"folder": "book-images"}
r = requests.post(url, files=files, data=data, timeout=120)
r.raise_for_status()
j = r.json()
if not j.get("success"):
raise RuntimeError(j.get("error") or j.get("message") or str(j))
out = j.get("url") or (j.get("data") or {}).get("url")
if not out:
raise RuntimeError("响应无 url: " + str(j)[:500])
return str(out)
def md_to_html(md_path: Path, api_base: str) -> str:
raw = md_path.read_text(encoding="utf-8")
lines = raw.splitlines()
if lines and lines[0].lstrip().startswith("#"):
lines = lines[1:]
chunks: list[str] = []
upload_cache: dict[str, str] = {}
for line in lines:
if line.strip() == "---":
chunks.append("")
continue
m = LINE_IMAGE_ONLY.match(line)
if m:
alt, ref = m.group(1), m.group(2).strip()
if ref.startswith(("http://", "https://")):
chunks.append(
f'<p><img src="{html.escape(ref)}" alt="{html.escape(alt)}"/></p>'
)
continue
loc = resolve_local_path(md_path, ref)
if not loc:
chunks.append(f"<p>(图片路径无效:{html.escape(ref)}</p>")
continue
ext = loc.suffix.lower()
if ext not in IMAGE_EXT:
# 非图片(如视频):不解析、不上传,整行当正文
chunks.append(f"<p>{html.escape(line.strip())}</p>")
continue
key = str(loc)
if key not in upload_cache:
print(f"上传图片: {loc.name}", flush=True)
upload_cache[key] = upload_image(loc, api_base)
src = upload_cache[key]
chunks.append(
f'<p><img src="{html.escape(src)}" alt="{html.escape(alt)}"/></p>'
)
continue
if line.strip():
chunks.append(f"<p>{html.escape(line.strip())}</p>")
else:
chunks.append("")
# 合并连续空串为单个换行,避免多余空 <p>
html_parts: list[str] = []
for c in chunks:
if c == "":
if html_parts and html_parts[-1] != "":
html_parts.append("")
else:
html_parts.append(c)
return "\n".join(html_parts).strip() + "\n"
def main() -> None:
p = argparse.ArgumentParser(description="MD 内图片上传并写回 chapters仅图片")
p.add_argument("--id", required=True, help="章节 id如 10.22")
p.add_argument("--md", type=Path, required=True, help="文章 .md 路径")
p.add_argument(
"--api-base",
default=os.environ.get("SOUL_API_BASE", "https://soulapi.quwanzhi.com"),
help="API 根地址",
)
p.add_argument("--dry-run", action="store_true", help="只打印 HTML 前 800 字,不写库")
args = p.parse_args()
md_path = args.md.expanduser().resolve()
if not md_path.is_file():
sys.exit(f"文件不存在: {md_path}")
body = md_to_html(md_path, args.api_base)
word_count = len(body)
if args.dry_run:
print(body[:800])
print("… dry-runword_count=", word_count)
return
cfg = load_db_config()
conn = pymysql.connect(**cfg)
cur = conn.cursor()
cur.execute(
"UPDATE chapters SET content = %s, word_count = %s, updated_at = NOW() WHERE id = %s",
(body, word_count, args.id),
)
if cur.rowcount != 1:
conn.rollback()
sys.exit(f"更新失败id={args.id} rowcount={cur.rowcount}")
conn.commit()
conn.close()
print(f"已更新 {args.id} | word_count={word_count}")
if __name__ == "__main__":
main()