Files
soul-yongping/scripts/content_download.py

115 lines
4.5 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
从小程序/正式 API 下载单章正文保存为书稿目录下的 md 文件
用法
SOUL_TEST_ENV=soulapi python3 scripts/content_download.py 128
SOUL_TEST_ENV=soulapi python3 scripts/content_download.py --id 10.27
python3 scripts/content_download.py 128 --out-dir /path/to/2026每日派对干货
2026 场次第102场起对应 id 10.0110.0210.27第128场
"""
import argparse
import os
import sys
from pathlib import Path
# 项目根
PROJECT_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(PROJECT_ROOT / "scripts" / "test"))
try:
from config import API_BASE, ENV_LABEL
except Exception:
API_BASE = os.environ.get("SOUL_API_BASE", "https://soulapi.quwanzhi.com").rstrip("/")
ENV_LABEL = "env"
# 书稿 2026 目录默认路径(与上传 README 一致)
DEFAULT_BOOK_2026 = Path(
os.environ.get(
"SOUL_BOOK_2026",
"/Users/karuo/Documents/个人/2、我写的书/《一场soul的创业实验》/2026每日派对干货",
)
)
def field_to_id(field: int) -> str:
"""第 N 场≥102→ 10.xx第101场及以前在第九章为 9.xx。API 上第128场可能为 9.28。"""
if field >= 102:
n = field - 102 + 1
return f"10.{n:02d}"
if field >= 1:
return f"9.{field:02d}" # 第9章 9.019.99,按场次
raise ValueError("场次请用 1999")
def main():
parser = argparse.ArgumentParser(description="从小程序 API 下载单章为 md")
parser.add_argument("field", nargs="?", type=int, help="场次号,如 128 表示第128场")
parser.add_argument("--id", type=str, help="章节 id如 10.27(与 field 二选一)")
parser.add_argument("--out-dir", type=Path, default=DEFAULT_BOOK_2026, help="输出目录,默认 2026每日派对干货")
parser.add_argument("--base", type=str, default=API_BASE, help="API 根地址")
args = parser.parse_args()
if args.id:
chapter_id = args.id
elif args.field is not None:
chapter_id = field_to_id(args.field)
else:
parser.error("请指定 field如 128或 --id如 10.27")
base = args.base.rstrip("/")
try:
import requests
except ImportError:
print("请安装: pip install requests", file=sys.stderr)
sys.exit(1)
url = f"{base}/api/miniprogram/book/chapter/by-id/{chapter_id}"
print(f"环境: {ENV_LABEL} | GET {url}")
r = requests.get(url, timeout=30)
# 2026 场次可能仍挂在第9章9.28=第128场404 时用 9.xx 再试
if r.status_code == 404 and args.field is not None and not args.id and args.field >= 1 and args.field <= 101:
fallback_id = f"9.{args.field:02d}"
url = f"{base}/api/miniprogram/book/chapter/by-id/{fallback_id}"
print(f"404尝试第9章 id: {fallback_id} | GET {url}")
r = requests.get(url, timeout=30)
if r.status_code == 200:
chapter_id = fallback_id
if r.status_code == 404 and args.field is not None and not args.id and args.field >= 102:
fallback_id = f"9.{(args.field - 100):02d}" # 128 → 9.28
url = f"{base}/api/miniprogram/book/chapter/by-id/{fallback_id}"
print(f"404尝试第9章 id: {fallback_id} | GET {url}")
r = requests.get(url, timeout=30)
if r.status_code == 200:
chapter_id = fallback_id
r.raise_for_status()
data = r.json()
if not data.get("success"):
print("API 返回失败:", data.get("error", data), file=sys.stderr)
sys.exit(2)
content = data.get("content") or (data.get("data") or {}).get("content") or ""
section_title = data.get("sectionTitle") or (data.get("data") or {}).get("sectionTitle") or f"{args.field or '?'}"
if not content:
print("未获取到正文 content", file=sys.stderr)
sys.exit(3)
# 若标题里没有「第X场」用场次补上sectionTitle 可能是「赚最多那个月…」)
if args.field and "" not in section_title and "" not in section_title:
display_title = f"{args.field}场|{section_title}"
else:
display_title = section_title
out_dir = args.out_dir
out_dir.mkdir(parents=True, exist_ok=True)
out_file = out_dir / f"{display_title}.md"
body = f"# {display_title}\n\n{content.strip()}\n"
out_file.write_text(body, encoding="utf-8")
print(f"已写入: {out_file}")
print(f"字数: {len(content)}")
if __name__ == "__main__":
main()