191 lines
6.0 KiB
Python
191 lines
6.0 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
从书稿 Markdown 中仅解析「图片」引用并上传到现网,再生成 HTML 写入 chapters.content。
|
|||
|
|
|
|||
|
|
- 只处理:`` 等常见图片后缀;**不解析、不上传视频/附件**。
|
|||
|
|
- 已是 `http(s)://` 的地址:不重复上传,原样写入 `<img src="...">`。
|
|||
|
|
- 非图片后缀的 `![]()`:当作普通正文一行输出(不尝试上传)。
|
|||
|
|
|
|||
|
|
用法:
|
|||
|
|
cd 一场soul的创业实验-永平
|
|||
|
|
python3 scripts/sync_chapter_images_from_md.py --id 10.22 \\
|
|||
|
|
--md "/path/to/第130场|….md"
|
|||
|
|
|
|||
|
|
依赖: pip install pymysql requests
|
|||
|
|
环境变量: SOUL_API_BASE 默认 https://soulapi.quwanzhi.com
|
|||
|
|
"""
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
import argparse
|
|||
|
|
import html
|
|||
|
|
import importlib.util
|
|||
|
|
import os
|
|||
|
|
import re
|
|||
|
|
import sys
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|||
|
|
|
|||
|
|
IMAGE_EXT = frozenset({".png", ".jpg", ".jpeg", ".gif", ".webp"})
|
|||
|
|
|
|||
|
|
# 整行仅有一张图:
|
|||
|
|
LINE_IMAGE_ONLY = re.compile(r"^\s*!\[([^\]]*)\]\(([^)]+)\)\s*$")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
import pymysql
|
|||
|
|
import requests
|
|||
|
|
except ImportError as e:
|
|||
|
|
print("需要: pip install pymysql requests", e, file=sys.stderr)
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def load_db_config() -> dict:
|
|||
|
|
mig = ROOT / "scripts" / "migrate_2026_sections.py"
|
|||
|
|
spec = importlib.util.spec_from_file_location("_mig_db", mig)
|
|||
|
|
mod = importlib.util.module_from_spec(spec)
|
|||
|
|
assert spec.loader is not None
|
|||
|
|
spec.loader.exec_module(mod)
|
|||
|
|
cfg = getattr(mod, "DB_CONFIG", None)
|
|||
|
|
if not isinstance(cfg, dict):
|
|||
|
|
sys.exit("migrate_2026_sections.py 中无有效 DB_CONFIG")
|
|||
|
|
return cfg
|
|||
|
|
|
|||
|
|
|
|||
|
|
def resolve_local_path(md_path: Path, ref: str) -> Path | None:
|
|||
|
|
ref = ref.strip()
|
|||
|
|
if not ref or ref.startswith(("http://", "https://")):
|
|||
|
|
return None
|
|||
|
|
p = (md_path.parent / ref).expanduser().resolve()
|
|||
|
|
if p.is_file():
|
|||
|
|
return p
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
def guess_mime(path: Path) -> str:
|
|||
|
|
ext = path.suffix.lower()
|
|||
|
|
return {
|
|||
|
|
".png": "image/png",
|
|||
|
|
".jpg": "image/jpeg",
|
|||
|
|
".jpeg": "image/jpeg",
|
|||
|
|
".gif": "image/gif",
|
|||
|
|
".webp": "image/webp",
|
|||
|
|
}.get(ext, "application/octet-stream")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def upload_image(local: Path, api_base: str) -> str:
|
|||
|
|
url = f"{api_base.rstrip('/')}/api/upload"
|
|||
|
|
mime = guess_mime(local)
|
|||
|
|
with local.open("rb") as f:
|
|||
|
|
files = {"file": (local.name, f, mime)}
|
|||
|
|
data = {"folder": "book-images"}
|
|||
|
|
r = requests.post(url, files=files, data=data, timeout=120)
|
|||
|
|
r.raise_for_status()
|
|||
|
|
j = r.json()
|
|||
|
|
if not j.get("success"):
|
|||
|
|
raise RuntimeError(j.get("error") or j.get("message") or str(j))
|
|||
|
|
out = j.get("url") or (j.get("data") or {}).get("url")
|
|||
|
|
if not out:
|
|||
|
|
raise RuntimeError("响应无 url: " + str(j)[:500])
|
|||
|
|
return str(out)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def md_to_html(md_path: Path, api_base: str) -> str:
|
|||
|
|
raw = md_path.read_text(encoding="utf-8")
|
|||
|
|
lines = raw.splitlines()
|
|||
|
|
if lines and lines[0].lstrip().startswith("#"):
|
|||
|
|
lines = lines[1:]
|
|||
|
|
|
|||
|
|
chunks: list[str] = []
|
|||
|
|
upload_cache: dict[str, str] = {}
|
|||
|
|
|
|||
|
|
for line in lines:
|
|||
|
|
if line.strip() == "---":
|
|||
|
|
chunks.append("")
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
m = LINE_IMAGE_ONLY.match(line)
|
|||
|
|
if m:
|
|||
|
|
alt, ref = m.group(1), m.group(2).strip()
|
|||
|
|
if ref.startswith(("http://", "https://")):
|
|||
|
|
chunks.append(
|
|||
|
|
f'<p><img src="{html.escape(ref)}" alt="{html.escape(alt)}"/></p>'
|
|||
|
|
)
|
|||
|
|
continue
|
|||
|
|
loc = resolve_local_path(md_path, ref)
|
|||
|
|
if not loc:
|
|||
|
|
chunks.append(f"<p>(图片路径无效:{html.escape(ref)})</p>")
|
|||
|
|
continue
|
|||
|
|
ext = loc.suffix.lower()
|
|||
|
|
if ext not in IMAGE_EXT:
|
|||
|
|
# 非图片(如视频):不解析、不上传,整行当正文
|
|||
|
|
chunks.append(f"<p>{html.escape(line.strip())}</p>")
|
|||
|
|
continue
|
|||
|
|
key = str(loc)
|
|||
|
|
if key not in upload_cache:
|
|||
|
|
print(f"上传图片: {loc.name} …", flush=True)
|
|||
|
|
upload_cache[key] = upload_image(loc, api_base)
|
|||
|
|
src = upload_cache[key]
|
|||
|
|
chunks.append(
|
|||
|
|
f'<p><img src="{html.escape(src)}" alt="{html.escape(alt)}"/></p>'
|
|||
|
|
)
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
if line.strip():
|
|||
|
|
chunks.append(f"<p>{html.escape(line.strip())}</p>")
|
|||
|
|
else:
|
|||
|
|
chunks.append("")
|
|||
|
|
|
|||
|
|
# 合并连续空串为单个换行,避免多余空 <p>
|
|||
|
|
html_parts: list[str] = []
|
|||
|
|
for c in chunks:
|
|||
|
|
if c == "":
|
|||
|
|
if html_parts and html_parts[-1] != "":
|
|||
|
|
html_parts.append("")
|
|||
|
|
else:
|
|||
|
|
html_parts.append(c)
|
|||
|
|
return "\n".join(html_parts).strip() + "\n"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main() -> None:
|
|||
|
|
p = argparse.ArgumentParser(description="MD 内图片上传并写回 chapters(仅图片)")
|
|||
|
|
p.add_argument("--id", required=True, help="章节 id,如 10.22")
|
|||
|
|
p.add_argument("--md", type=Path, required=True, help="文章 .md 路径")
|
|||
|
|
p.add_argument(
|
|||
|
|
"--api-base",
|
|||
|
|
default=os.environ.get("SOUL_API_BASE", "https://soulapi.quwanzhi.com"),
|
|||
|
|
help="API 根地址",
|
|||
|
|
)
|
|||
|
|
p.add_argument("--dry-run", action="store_true", help="只打印 HTML 前 800 字,不写库")
|
|||
|
|
args = p.parse_args()
|
|||
|
|
|
|||
|
|
md_path = args.md.expanduser().resolve()
|
|||
|
|
if not md_path.is_file():
|
|||
|
|
sys.exit(f"文件不存在: {md_path}")
|
|||
|
|
|
|||
|
|
body = md_to_html(md_path, args.api_base)
|
|||
|
|
word_count = len(body)
|
|||
|
|
|
|||
|
|
if args.dry_run:
|
|||
|
|
print(body[:800])
|
|||
|
|
print("… dry-run,word_count=", word_count)
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
cfg = load_db_config()
|
|||
|
|
conn = pymysql.connect(**cfg)
|
|||
|
|
cur = conn.cursor()
|
|||
|
|
cur.execute(
|
|||
|
|
"UPDATE chapters SET content = %s, word_count = %s, updated_at = NOW() WHERE id = %s",
|
|||
|
|
(body, word_count, args.id),
|
|||
|
|
)
|
|||
|
|
if cur.rowcount != 1:
|
|||
|
|
conn.rollback()
|
|||
|
|
sys.exit(f"更新失败:id={args.id} rowcount={cur.rowcount}")
|
|||
|
|
conn.commit()
|
|||
|
|
conn.close()
|
|||
|
|
print(f"已更新 {args.id} | word_count={word_count}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|