#!/usr/bin/env python3 """ 将《一场soul的创业实验》全书(含图片)上传到飞书知识库子节点下。 飞书知识库链接:https://cunkebao.feishu.cn/wiki/FNP6wdvNKij7yMkb3xCce0CYnpd 需在该链接对应的「创业实验」节点下创建子页面并写入内容。 环境变量(必填): FEISHU_APP_ID 飞书应用 App ID(找卡若AI拿:卡若AI/02_卡人(水)/水桥_平台对接/飞书管理/) FEISHU_APP_SECRET 飞书应用 App Secret(同上) FEISHU_WIKI_NODE_TOKEN 知识库父节点 token,即链接中的 ID:FNP6wdvNKij7yMkb3xCce0CYnpd 可选:将上述变量写在 scripts/.env.feishu 中(每行 KEY=VALUE),本脚本会自动加载。 权限要求:应用需加入该知识库为成员(管理员),并开通 知识库、云文档 权限。 用法: python3 feishu_wiki_upload.py [--dry-run] [--only 4.6] python3 feishu_wiki_upload.py --full 按目录结构上传全书(同名节点复用,不重复建) --dry-run 仅检查配置与本地文件,不上传 --only 4.6 仅上传 4.6 一节(用于测试) --full 全书同步:建齐目录层级,有同名则覆盖该页正文 """ import argparse import json import os import re import sys import time import webbrowser from pathlib import Path try: import requests except ImportError: print("请安装 requests: pip install requests") sys.exit(1) # 卡若AI 飞书用户 token 路径(用户身份有知识库编辑权限时用) FEISHU_USER_TOKENS_JSON = Path("/Users/karuo/Documents/个人/卡若AI/02_卡人(水)/水桥_平台对接/飞书管理/脚本/.feishu_tokens.json") def load_env(): env_path = Path(__file__).resolve().parent / ".env.feishu" if env_path.exists(): for line in env_path.read_text(encoding="utf-8").splitlines(): line = line.strip() if line and not line.startswith("#") and "=" in line: k, v = line.split("=", 1) os.environ.setdefault(k.strip(), v.strip().replace('"', "").replace("'", "")) def _refresh_user_token(app_id: str, app_secret: str) -> str: """用 app 凭证 + 卡若AI .feishu_tokens.json 的 refresh_token 刷新用户 token 并写回文件。""" if not FEISHU_USER_TOKENS_JSON.exists(): return "" try: d = json.loads(FEISHU_USER_TOKENS_JSON.read_text(encoding="utf-8")) refresh = (d.get("refresh_token") or "").strip() if not refresh: return "" except Exception: return "" r = requests.post( "https://open.feishu.cn/open-apis/auth/v3/app_access_token/internal", json={"app_id": app_id, "app_secret": app_secret}, timeout=10, ) app_token = (r.json() or {}).get("app_access_token") if not app_token: return "" r2 = requests.post( "https://open.feishu.cn/open-apis/authen/v1/oidc/refresh_access_token", headers={"Authorization": f"Bearer {app_token}", "Content-Type": "application/json"}, json={"grant_type": "refresh_token", "refresh_token": refresh}, timeout=10, ) out = r2.json() or {} if out.get("code") != 0: return "" data = out.get("data") or {} new_access = (data.get("access_token") or "").strip() new_refresh = (data.get("refresh_token") or "").strip() or refresh if not new_access: return "" d["access_token"] = new_access d["refresh_token"] = new_refresh FEISHU_USER_TOKENS_JSON.write_text(json.dumps(d, ensure_ascii=False, indent=2), encoding="utf-8") return new_access def get_user_token(app_id: str = "", app_secret: str = "") -> str: """优先用用户 token(知识库编辑通常需用户身份);有 app 凭证时先刷新再返回。""" if os.environ.get("FEISHU_USER_TOKEN"): return os.environ["FEISHU_USER_TOKEN"].strip() if not FEISHU_USER_TOKENS_JSON.exists(): return "" try: d = json.loads(FEISHU_USER_TOKENS_JSON.read_text(encoding="utf-8")) if app_id and app_secret and d.get("refresh_token"): refreshed = _refresh_user_token(app_id, app_secret) if refreshed: return refreshed return (d.get("access_token") or "").strip() except Exception: pass return "" BASE = "https://open.feishu.cn/open-apis" WIKI_NODE_TOKEN = "FNP6wdvNKij7yMkb3xCce0CYnpd" BOOK_ROOT = Path(os.environ.get("SOUL_BOOK_ROOT", "/Users/karuo/Documents/个人/2、我写的书/《一场soul的创业实验》")) def get_tenant_access_token(app_id: str, app_secret: str) -> str: r = requests.post( f"{BASE}/auth/v3/tenant_access_token/internal", json={"app_id": app_id, "app_secret": app_secret}, timeout=10, ) d = r.json() if d.get("code") != 0: raise RuntimeError(f"获取 tenant_access_token 失败: {d}") return d["tenant_access_token"] def get_node_info(token: str, node_token: str) -> dict: r = requests.get( f"{BASE}/wiki/v2/spaces/get_node", params={"token": node_token}, headers={"Authorization": f"Bearer {token}"}, timeout=10, ) d = r.json() if d.get("code") != 0: raise RuntimeError(f"获取节点信息失败: {d}") return d["data"]["node"] def list_wiki_children(token: str, space_id: str, parent_node_token: str) -> list: """列出某节点下直接子节点,返回 [{node_token, obj_token, title}, ...]。""" out = [] page_token = None while True: params = {"parent_node_token": parent_node_token, "page_size": 50} if page_token: params["page_token"] = page_token r = requests.get( f"{BASE}/wiki/v2/spaces/{space_id}/nodes", headers={"Authorization": f"Bearer {token}"}, params=params, timeout=15, ) d = r.json() if d.get("code") != 0: raise RuntimeError(f"列出子节点失败: {d}") data = d.get("data") or {} for n in data.get("items") or []: out.append({ "node_token": n.get("node_token"), "obj_token": n.get("obj_token"), "title": (n.get("title") or "").strip() or "未命名", }) page_token = data.get("page_token") if not page_token: break time.sleep(0.15) return out def get_or_create_node(token: str, space_id: str, parent_node_token: str, title: str) -> tuple: """在 parent 下获取或创建标题为 title 的节点。返回 (node_token, obj_token, created: bool)。同名则复用。""" children = list_wiki_children(token, space_id, parent_node_token) for c in children: if (c.get("title") or "").strip() == title.strip(): return (c["node_token"], c["obj_token"], False) node = create_wiki_node(token, space_id, parent_node_token, title) return (node["node_token"], node["obj_token"], True) def create_wiki_node(token: str, space_id: str, parent_node_token: str, title: str, obj_type: str = "docx") -> dict: r = requests.post( f"{BASE}/wiki/v2/spaces/{space_id}/nodes", headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, json={ "obj_type": obj_type, "node_type": "origin", "parent_node_token": parent_node_token, "title": title, }, timeout=10, ) d = r.json() if d.get("code") != 0: raise RuntimeError(f"创建节点失败: {d}") return d["data"]["node"] def get_docx_block_children(token: str, document_id: str) -> list: """获取文档块列表,用于得到根块 id(docx 新文档的 document_id 即可作为根块 id 使用)""" r = requests.get( f"{BASE}/docx/v1/documents/{document_id}/blocks", params={"document_revision_id": -1, "page_size": 50}, headers={"Authorization": f"Bearer {token}"}, timeout=10, ) d = r.json() if d.get("code") != 0: raise RuntimeError(f"获取文档块失败: {d}") return d.get("data", {}).get("items", []) DOCX_CHILDREN_BATCH = 50 # 飞书单次创建子块上限 def create_docx_block_children(token: str, document_id: str, block_id: str, children: list, index: int = 0) -> dict: """在指定块下创建子块。children 格式见飞书 docx 创建块 API;单次最多 50 个。""" r = requests.post( f"{BASE}/docx/v1/documents/{document_id}/blocks/{block_id}/children", params={"document_revision_id": -1}, headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, json={"children": children, "index": index}, timeout=10, ) d = r.json() if d.get("code") != 0: raise RuntimeError(f"创建块失败: {d}") return d.get("data", {}) def create_docx_block_children_batched(token: str, document_id: str, block_id: str, children: list) -> None: """分批创建子块(每批最多 DOCX_CHILDREN_BATCH 个)。""" for i in range(0, len(children), DOCX_CHILDREN_BATCH): chunk = children[i : i + DOCX_CHILDREN_BATCH] create_docx_block_children(token, document_id, block_id, chunk, index=i) def clear_docx_children(token: str, document_id: str) -> bool: """清空文档根块下所有直接子块(用于同名页覆盖正文)。""" all_items = [] page_token = None while True: params = {"document_revision_id": -1, "page_size": 200} if page_token: params["page_token"] = page_token r = requests.get( f"{BASE}/docx/v1/documents/{document_id}/blocks", headers={"Authorization": f"Bearer {token}"}, params=params, timeout=15, ) d = r.json() if d.get("code") != 0: return False data = d.get("data") or {} all_items.extend(data.get("items") or []) page_token = data.get("page_token") if not page_token: break child_ids = [b["block_id"] for b in all_items if b.get("parent_id") == document_id and b.get("block_id")] if not child_ids: return True for i in range(0, len(child_ids), 50): batch = child_ids[i : i + 50] rd = requests.delete( f"{BASE}/docx/v1/documents/{document_id}/blocks/{document_id}/children/batch_delete", headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, json={"block_id_list": batch}, timeout=15, ) if (rd.json() or {}).get("code") != 0: return False time.sleep(0.1) return True def write_docx_content(token: str, doc_id: str, blocks: list, overwrite: bool = True) -> None: """向文档写入正文。若 overwrite 且已有子块则先清空再写。""" if overwrite: clear_docx_children(token, doc_id) create_docx_block_children_batched(token, doc_id, doc_id, blocks) # 节标题与文件名不一致时在此指定飞书页面标题(避免重复建页) TITLE_OVERRIDE = {"4.6 Soul被封号了:解决方案和干货": "4.6 Soul被封号了:如何处理?"} def _normalize_part(name: str) -> str: """目录名转飞书页名:_第一篇|真实的人 -> 第一篇|真实的人""" s = (name or "").strip() if s.startswith("_"): s = s[1:] return s def build_book_entries(book_root: Path) -> list: """按飞书目录顺序生成 (parent_key, title, md_path)。parent_key 为空表示根下。""" book_root = book_root.resolve() dirs_seen = set() # (parent_key, title) 已加入的目录 entries = [] # (parent_key, title, md_path or None) def ensure_dir(parent_key: str, title: str) -> None: k = (parent_key, title) if k in dirs_seen: return dirs_seen.add(k) entries.append((parent_key, title, None)) for rel in sorted(book_root.rglob("*.md"), key=lambda p: str(p)): try: rel = rel.relative_to(book_root) except ValueError: continue if rel.name.startswith(".") or rel.stem == "飞书同步说明": continue parts = rel.parts name_stem = rel.stem title = TITLE_OVERRIDE.get(name_stem, name_stem) if len(parts) == 1: ensure_dir("", title) entries.append(("", title, rel)) continue # 多级:第一篇/第1章/1.1.md 或 第二篇/第4章/4.6.md p0 = _normalize_part(parts[0]) if len(parts) == 2: ensure_dir("", p0) entries.append((p0, title, rel)) continue p1 = parts[1] ensure_dir("", p0) ensure_dir(p0, p1) if len(parts) == 3: entries.append((f"{p0}/{p1}", title, rel)) continue # 附录/附录1|...md p2 = parts[2] ensure_dir("", p0) ensure_dir(p0, p1) ensure_dir(f"{p0}/{p1}", p2) entries.append((f"{p0}/{p1}/{p2}", title, rel)) # 保证顺序:先所有「目录」再带文件的;同 parent 下按 title 排 dir_entries = [(pk, t, None) for (pk, t, p) in entries if p is None] file_entries = [(pk, t, p) for (pk, t, p) in entries if p is not None] # 目录按层级排:根下先,再一层子,再二层… def depth(k): return len([x for x in k.split("/") if x]) if k else 0 dir_entries.sort(key=lambda x: (depth(x[0]), x[0], x[1])) file_entries.sort(key=lambda x: (x[0], x[1])) seen_dir = set() out = [] for pk, t, _ in dir_entries: if (pk, t) in seen_dir: continue seen_dir.add((pk, t)) out.append((pk, t, None)) for pk, t, p in file_entries: out.append((pk, t, p)) return out def _strip_md_bold(text: str) -> str: """去掉 Markdown 粗体星号,飞书正文不保留 **。""" return re.sub(r"\*\*", "", text) def text_to_docx_blocks(md_text: str, assets_dir: Path) -> list: """将 markdown 转为 docx 子块列表(仅文本块;图片占位为文本说明;正文去掉 ** 等星号)。""" blocks = [] # 按双换行分段,每段一个文本块 segments = re.split(r"\n\n+", md_text) for seg in segments: seg = seg.strip() if not seg: continue # 若是图片行 ![alt](path),先插入一段说明文字,图片需在飞书内上传后插入 if seg.startswith("!["): m = re.match(r'!\[([^\]]*)\]\(([^)]+)\)', seg) if m: alt, path = m.group(1), m.group(2) blocks.append({ "block_type": 2, "text": { "elements": [{"type": "text_run", "text_run": {"content": f"[图片: {alt}]", "style": {}}}] }, }) continue # 普通段落:写入飞书时去掉 **,避免页面出现星号 lines = seg.split("\n") for line in lines: line = line.strip() if not line: continue blocks.append({ "block_type": 2, "text": { "elements": [{"type": "text_run", "text_run": {"content": _strip_md_bold(line) + "\n", "style": {}}}] }, }) return blocks def main(): load_env() parser = argparse.ArgumentParser(description="上传书稿到飞书知识库") parser.add_argument("--dry-run", action="store_true", help="仅检查配置与文件,不上传") parser.add_argument("--only", default="", help="仅上传指定节,如 4.6") parser.add_argument("--full", action="store_true", help="按目录结构上传全书,同名则覆盖该页") args = parser.parse_args() app_id = os.environ.get("FEISHU_APP_ID", "").strip() app_secret = os.environ.get("FEISHU_APP_SECRET", "").strip() node_token = os.environ.get("FEISHU_WIKI_NODE_TOKEN", WIKI_NODE_TOKEN).strip() if args.dry_run: print("dry-run: 检查本地文件与目标飞书链接。") print(" 飞书链接: https://cunkebao.feishu.cn/wiki/FNP6wdvNKij7yMkb3xCce0CYnpd") print(f" 书稿根目录: {BOOK_ROOT} (存在={BOOK_ROOT.exists()})") if args.full: entries = build_book_entries(BOOK_ROOT) print(f" 全书条目数: {len(entries)}(含目录页)") for i, (pk, t, p) in enumerate(entries[:12]): print(f" [{i+1}] {pk!r} / {t!r} {'-> ' + str(p) if p else '(目录)'}") if len(entries) > 12: print(f" ... 等共 {len(entries)} 项") else: section_path = BOOK_ROOT / "第二篇|真实的行业" / "第4章|内容商业篇" / "4.6 Soul被封号了:解决方案和干货.md" assets_dir = section_path.parent / "assets" print(f" 4.6 正文: {section_path} (存在={section_path.exists()})") print(f" 4.6 图片目录: {assets_dir} (存在={assets_dir.exists()})") if assets_dir.exists(): for f in assets_dir.iterdir(): if f.suffix.lower() in (".png", ".jpg", ".jpeg", ".gif"): print(f" - {f.name}") if not app_id or not app_secret: print(" 未配置 FEISHU_APP_ID/FEISHU_APP_SECRET,实际上传前请在 scripts/.env.feishu 中配置。") return 0 if not app_id or not app_secret: print("错误: 未配置飞书应用凭证。") print("请设置环境变量 FEISHU_APP_ID、FEISHU_APP_SECRET,或在 scripts/.env.feishu 中配置。") print("飞书开放平台: https://open.feishu.cn/app 创建应用并开通「知识库」「云文档」权限,") print("将应用添加为知识库成员后,把 App ID 与 App Secret 填入 .env.feishu。") sys.exit(1) tenant_token = get_tenant_access_token(app_id, app_secret) # 知识库创建节点需编辑权限:优先用用户 token(过期则用 app 凭证刷新) user_token = get_user_token(app_id, app_secret) token = user_token if user_token else tenant_token if user_token: print("使用用户 token 操作知识库") node = get_node_info(token, node_token) space_id = node["space_id"] parent_token = node_token print(f"知识库 space_id: {space_id}, 父节点: {parent_token}") write_token = user_token if user_token else tenant_token cache = {"": parent_token} # parent_key -> node_token(根用 FEISHU_WIKI_NODE_TOKEN) # 仅上传 4.6 if args.only == "4.6": section_path = BOOK_ROOT / "第二篇|真实的行业" / "第4章|内容商业篇" / "4.6 Soul被封号了:解决方案和干货.md" assets_dir = section_path.parent / "assets" if not section_path.exists(): print(f"文件不存在: {section_path}") sys.exit(1) content = section_path.read_text(encoding="utf-8") title = "4.6 Soul被封号了:如何处理?" _, doc_id, created = get_or_create_node(token, space_id, parent_token, title) blocks = text_to_docx_blocks(content, assets_dir) write_docx_content(write_token, doc_id, blocks, overwrite=True) print(f"已同步子页面: {title}, document_id={doc_id}" + (" (新建)" if created else " (覆盖)")) print("图片需在飞书该文档内手动上传并插入到 [图片: xxx] 位置;本地路径:", list(assets_dir.iterdir()) if assets_dir.exists() else []) doc_url = f"https://cunkebao.feishu.cn/docx/{doc_id}" webbrowser.open(doc_url) print(f"已打开: {doc_url}") return 0 # 仅上传 112场(第9章下) if args.only == "112场" or "112" in args.only: section_path = BOOK_ROOT / "第四篇|真实的赚钱" / "第9章|我在Soul上亲访的赚钱案例" / "第112场|一个人起头,维权挣了大半套房.md" if not section_path.exists(): print(f"文件不存在: {section_path}") sys.exit(1) entries = build_book_entries(BOOK_ROOT) target = next((e for e in entries if e[1] and "112场" in e[1]), None) if not target: print("未在全书条目中找到 112场") sys.exit(1) parent_key, title, md_path = target if not md_path: print("112场 对应的是目录项,无正文") sys.exit(1) # 先确保父链存在并写入 cache parts = [p for p in parent_key.split("/") if p] for i in range(len(parts) + 1): pk = "/".join(parts[:i]) if i else "" p_token = cache.get(pk) if p_token is None: print(f" 跳过(父未就绪): {pk!r}") continue need_title = parts[i] if i < len(parts) else title if i < len(parts): node_tok, _, _ = get_or_create_node(token, space_id, p_token, need_title) cache[pk + "/" + need_title if pk else need_title] = node_tok p_token = cache.get(parent_key) if p_token is None: print("父节点未解析到,请先执行 --full 一次或检查目录结构") sys.exit(1) _, doc_id, created = get_or_create_node(token, space_id, p_token, title) content = section_path.read_text(encoding="utf-8") assets_dir = section_path.parent / "assets" blocks = text_to_docx_blocks(content, assets_dir) write_docx_content(write_token, doc_id, blocks, overwrite=True) print(f"已同步子页面: {title}, document_id={doc_id}" + (" (新建)" if created else " (覆盖)")) doc_url = f"https://cunkebao.feishu.cn/docx/{doc_id}" webbrowser.open(doc_url) print(f"已打开: {doc_url}") return 0 # 全书上传:按目录建节点,有同名则复用并覆盖正文 if args.full: entries = build_book_entries(BOOK_ROOT) print(f"全书共 {len(entries)} 项(含目录页),开始同步…") created_count = 0 updated_count = 0 for parent_key, title, md_path in entries: p_token = cache.get(parent_key) if p_token is None: print(f" 跳过(父未就绪): {parent_key!r} / {title!r}") continue node_token, obj_token, created = get_or_create_node(token, space_id, p_token, title) current_key = f"{parent_key}/{title}" if parent_key else title cache[current_key] = node_token if md_path is not None: full_path = BOOK_ROOT / md_path full_path = BOOK_ROOT / (md_path if isinstance(md_path, str) else str(md_path)) if not full_path.exists(): print(f" 跳过(文件不存在): {md_path}") continue try: content = full_path.read_text(encoding="utf-8") except Exception as e: print(f" 跳过(读文件失败): {md_path} -> {e}") continue assets_dir = full_path.parent / "assets" blocks = text_to_docx_blocks(content, assets_dir) write_docx_content(write_token, obj_token, blocks, overwrite=True) if created: created_count += 1 else: updated_count += 1 print(f" 写入: {current_key}") else: if created: created_count += 1 print(f" 目录: {current_key}") print(f"完成。新建 {created_count} 个页面,覆盖 {updated_count} 个页面。") return 0 print("请使用 --only 4.6 或 --full 指定上传范围。") sys.exit(0) if __name__ == "__main__": main()