Files
soul-yongping/scripts/feishu_wiki_upload.py

565 lines
23 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
一场soul的创业实验全书含图片上传到飞书知识库子节点下
飞书知识库链接https://cunkebao.feishu.cn/wiki/FNP6wdvNKij7yMkb3xCce0CYnpd
需在该链接对应的创业实验节点下创建子页面并写入内容
环境变量必填
FEISHU_APP_ID 飞书应用 App ID找卡若AI拿卡若AI/02_卡人/水桥_平台对接/飞书管理/
FEISHU_APP_SECRET 飞书应用 App Secret同上
FEISHU_WIKI_NODE_TOKEN 知识库父节点 token即链接中的 IDFNP6wdvNKij7yMkb3xCce0CYnpd
可选将上述变量写在 scripts/.env.feishu 每行 KEY=VALUE本脚本会自动加载
权限要求应用需加入该知识库为成员管理员并开通 知识库云文档 权限
用法
python3 feishu_wiki_upload.py [--dry-run] [--only 4.6]
python3 feishu_wiki_upload.py --full 按目录结构上传全书同名节点复用不重复建
--dry-run 仅检查配置与本地文件不上传
--only 4.6 仅上传 4.6 一节用于测试
--full 全书同步建齐目录层级有同名则覆盖该页正文
"""
import argparse
import json
import os
import re
import sys
import time
import webbrowser
from pathlib import Path
try:
import requests
except ImportError:
print("请安装 requests: pip install requests")
sys.exit(1)
# 卡若AI 飞书用户 token 路径(用户身份有知识库编辑权限时用)
FEISHU_USER_TOKENS_JSON = Path("/Users/karuo/Documents/个人/卡若AI/02_卡人/水桥_平台对接/飞书管理/脚本/.feishu_tokens.json")
def load_env():
env_path = Path(__file__).resolve().parent / ".env.feishu"
if env_path.exists():
for line in env_path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if line and not line.startswith("#") and "=" in line:
k, v = line.split("=", 1)
os.environ.setdefault(k.strip(), v.strip().replace('"', "").replace("'", ""))
def _refresh_user_token(app_id: str, app_secret: str) -> str:
"""用 app 凭证 + 卡若AI .feishu_tokens.json 的 refresh_token 刷新用户 token 并写回文件。"""
if not FEISHU_USER_TOKENS_JSON.exists():
return ""
try:
d = json.loads(FEISHU_USER_TOKENS_JSON.read_text(encoding="utf-8"))
refresh = (d.get("refresh_token") or "").strip()
if not refresh:
return ""
except Exception:
return ""
r = requests.post(
"https://open.feishu.cn/open-apis/auth/v3/app_access_token/internal",
json={"app_id": app_id, "app_secret": app_secret},
timeout=10,
)
app_token = (r.json() or {}).get("app_access_token")
if not app_token:
return ""
r2 = requests.post(
"https://open.feishu.cn/open-apis/authen/v1/oidc/refresh_access_token",
headers={"Authorization": f"Bearer {app_token}", "Content-Type": "application/json"},
json={"grant_type": "refresh_token", "refresh_token": refresh},
timeout=10,
)
out = r2.json() or {}
if out.get("code") != 0:
return ""
data = out.get("data") or {}
new_access = (data.get("access_token") or "").strip()
new_refresh = (data.get("refresh_token") or "").strip() or refresh
if not new_access:
return ""
d["access_token"] = new_access
d["refresh_token"] = new_refresh
FEISHU_USER_TOKENS_JSON.write_text(json.dumps(d, ensure_ascii=False, indent=2), encoding="utf-8")
return new_access
def get_user_token(app_id: str = "", app_secret: str = "") -> str:
"""优先用用户 token知识库编辑通常需用户身份有 app 凭证时先刷新再返回。"""
if os.environ.get("FEISHU_USER_TOKEN"):
return os.environ["FEISHU_USER_TOKEN"].strip()
if not FEISHU_USER_TOKENS_JSON.exists():
return ""
try:
d = json.loads(FEISHU_USER_TOKENS_JSON.read_text(encoding="utf-8"))
if app_id and app_secret and d.get("refresh_token"):
refreshed = _refresh_user_token(app_id, app_secret)
if refreshed:
return refreshed
return (d.get("access_token") or "").strip()
except Exception:
pass
return ""
BASE = "https://open.feishu.cn/open-apis"
WIKI_NODE_TOKEN = "FNP6wdvNKij7yMkb3xCce0CYnpd"
BOOK_ROOT = Path(os.environ.get("SOUL_BOOK_ROOT", "/Users/karuo/Documents/个人/2、我写的书/《一场soul的创业实验》"))
def get_tenant_access_token(app_id: str, app_secret: str) -> str:
r = requests.post(
f"{BASE}/auth/v3/tenant_access_token/internal",
json={"app_id": app_id, "app_secret": app_secret},
timeout=10,
)
d = r.json()
if d.get("code") != 0:
raise RuntimeError(f"获取 tenant_access_token 失败: {d}")
return d["tenant_access_token"]
def get_node_info(token: str, node_token: str) -> dict:
r = requests.get(
f"{BASE}/wiki/v2/spaces/get_node",
params={"token": node_token},
headers={"Authorization": f"Bearer {token}"},
timeout=10,
)
d = r.json()
if d.get("code") != 0:
raise RuntimeError(f"获取节点信息失败: {d}")
return d["data"]["node"]
def list_wiki_children(token: str, space_id: str, parent_node_token: str) -> list:
"""列出某节点下直接子节点,返回 [{node_token, obj_token, title}, ...]。"""
out = []
page_token = None
while True:
params = {"parent_node_token": parent_node_token, "page_size": 50}
if page_token:
params["page_token"] = page_token
r = requests.get(
f"{BASE}/wiki/v2/spaces/{space_id}/nodes",
headers={"Authorization": f"Bearer {token}"},
params=params,
timeout=15,
)
d = r.json()
if d.get("code") != 0:
raise RuntimeError(f"列出子节点失败: {d}")
data = d.get("data") or {}
for n in data.get("items") or []:
out.append({
"node_token": n.get("node_token"),
"obj_token": n.get("obj_token"),
"title": (n.get("title") or "").strip() or "未命名",
})
page_token = data.get("page_token")
if not page_token:
break
time.sleep(0.15)
return out
def get_or_create_node(token: str, space_id: str, parent_node_token: str, title: str) -> tuple:
"""在 parent 下获取或创建标题为 title 的节点。返回 (node_token, obj_token, created: bool)。同名则复用。"""
children = list_wiki_children(token, space_id, parent_node_token)
for c in children:
if (c.get("title") or "").strip() == title.strip():
return (c["node_token"], c["obj_token"], False)
node = create_wiki_node(token, space_id, parent_node_token, title)
return (node["node_token"], node["obj_token"], True)
def create_wiki_node(token: str, space_id: str, parent_node_token: str, title: str, obj_type: str = "docx") -> dict:
r = requests.post(
f"{BASE}/wiki/v2/spaces/{space_id}/nodes",
headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
json={
"obj_type": obj_type,
"node_type": "origin",
"parent_node_token": parent_node_token,
"title": title,
},
timeout=10,
)
d = r.json()
if d.get("code") != 0:
raise RuntimeError(f"创建节点失败: {d}")
return d["data"]["node"]
def get_docx_block_children(token: str, document_id: str) -> list:
"""获取文档块列表,用于得到根块 iddocx 新文档的 document_id 即可作为根块 id 使用)"""
r = requests.get(
f"{BASE}/docx/v1/documents/{document_id}/blocks",
params={"document_revision_id": -1, "page_size": 50},
headers={"Authorization": f"Bearer {token}"},
timeout=10,
)
d = r.json()
if d.get("code") != 0:
raise RuntimeError(f"获取文档块失败: {d}")
return d.get("data", {}).get("items", [])
DOCX_CHILDREN_BATCH = 50 # 飞书单次创建子块上限
def create_docx_block_children(token: str, document_id: str, block_id: str, children: list, index: int = 0) -> dict:
"""在指定块下创建子块。children 格式见飞书 docx 创建块 API单次最多 50 个。"""
r = requests.post(
f"{BASE}/docx/v1/documents/{document_id}/blocks/{block_id}/children",
params={"document_revision_id": -1},
headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
json={"children": children, "index": index},
timeout=10,
)
d = r.json()
if d.get("code") != 0:
raise RuntimeError(f"创建块失败: {d}")
return d.get("data", {})
def create_docx_block_children_batched(token: str, document_id: str, block_id: str, children: list) -> None:
"""分批创建子块(每批最多 DOCX_CHILDREN_BATCH 个)。"""
for i in range(0, len(children), DOCX_CHILDREN_BATCH):
chunk = children[i : i + DOCX_CHILDREN_BATCH]
create_docx_block_children(token, document_id, block_id, chunk, index=i)
def clear_docx_children(token: str, document_id: str) -> bool:
"""清空文档根块下所有直接子块(用于同名页覆盖正文)。"""
all_items = []
page_token = None
while True:
params = {"document_revision_id": -1, "page_size": 200}
if page_token:
params["page_token"] = page_token
r = requests.get(
f"{BASE}/docx/v1/documents/{document_id}/blocks",
headers={"Authorization": f"Bearer {token}"},
params=params,
timeout=15,
)
d = r.json()
if d.get("code") != 0:
return False
data = d.get("data") or {}
all_items.extend(data.get("items") or [])
page_token = data.get("page_token")
if not page_token:
break
child_ids = [b["block_id"] for b in all_items if b.get("parent_id") == document_id and b.get("block_id")]
if not child_ids:
return True
for i in range(0, len(child_ids), 50):
batch = child_ids[i : i + 50]
rd = requests.delete(
f"{BASE}/docx/v1/documents/{document_id}/blocks/{document_id}/children/batch_delete",
headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
json={"block_id_list": batch},
timeout=15,
)
if (rd.json() or {}).get("code") != 0:
return False
time.sleep(0.1)
return True
def write_docx_content(token: str, doc_id: str, blocks: list, overwrite: bool = True) -> None:
"""向文档写入正文。若 overwrite 且已有子块则先清空再写。"""
if overwrite:
clear_docx_children(token, doc_id)
create_docx_block_children_batched(token, doc_id, doc_id, blocks)
# 节标题与文件名不一致时在此指定飞书页面标题(避免重复建页)
TITLE_OVERRIDE = {"4.6 Soul被封号了解决方案和干货": "4.6 Soul被封号了如何处理"}
def _normalize_part(name: str) -> str:
"""目录名转飞书页名_第一篇真实的人 -> 第一篇|真实的人"""
s = (name or "").strip()
if s.startswith("_"):
s = s[1:]
return s
def build_book_entries(book_root: Path) -> list:
"""按飞书目录顺序生成 (parent_key, title, md_path)。parent_key 为空表示根下。"""
book_root = book_root.resolve()
dirs_seen = set() # (parent_key, title) 已加入的目录
entries = [] # (parent_key, title, md_path or None)
def ensure_dir(parent_key: str, title: str) -> None:
k = (parent_key, title)
if k in dirs_seen:
return
dirs_seen.add(k)
entries.append((parent_key, title, None))
for rel in sorted(book_root.rglob("*.md"), key=lambda p: str(p)):
try:
rel = rel.relative_to(book_root)
except ValueError:
continue
if rel.name.startswith(".") or rel.stem == "飞书同步说明":
continue
parts = rel.parts
name_stem = rel.stem
title = TITLE_OVERRIDE.get(name_stem, name_stem)
if len(parts) == 1:
ensure_dir("", title)
entries.append(("", title, rel))
continue
# 多级:第一篇/第1章/1.1.md 或 第二篇/第4章/4.6.md
p0 = _normalize_part(parts[0])
if len(parts) == 2:
ensure_dir("", p0)
entries.append((p0, title, rel))
continue
p1 = parts[1]
ensure_dir("", p0)
ensure_dir(p0, p1)
if len(parts) == 3:
entries.append((f"{p0}/{p1}", title, rel))
continue
# 附录/附录1...md
p2 = parts[2]
ensure_dir("", p0)
ensure_dir(p0, p1)
ensure_dir(f"{p0}/{p1}", p2)
entries.append((f"{p0}/{p1}/{p2}", title, rel))
# 保证顺序:先所有「目录」再带文件的;同 parent 下按 title 排
dir_entries = [(pk, t, None) for (pk, t, p) in entries if p is None]
file_entries = [(pk, t, p) for (pk, t, p) in entries if p is not None]
# 目录按层级排:根下先,再一层子,再二层…
def depth(k):
return len([x for x in k.split("/") if x]) if k else 0
dir_entries.sort(key=lambda x: (depth(x[0]), x[0], x[1]))
file_entries.sort(key=lambda x: (x[0], x[1]))
seen_dir = set()
out = []
for pk, t, _ in dir_entries:
if (pk, t) in seen_dir:
continue
seen_dir.add((pk, t))
out.append((pk, t, None))
for pk, t, p in file_entries:
out.append((pk, t, p))
return out
def _strip_md_bold(text: str) -> str:
"""去掉 Markdown 粗体星号,飞书正文不保留 **。"""
return re.sub(r"\*\*", "", text)
def text_to_docx_blocks(md_text: str, assets_dir: Path) -> list:
"""将 markdown 转为 docx 子块列表(仅文本块;图片占位为文本说明;正文去掉 ** 等星号)。"""
blocks = []
# 按双换行分段,每段一个文本块
segments = re.split(r"\n\n+", md_text)
for seg in segments:
seg = seg.strip()
if not seg:
continue
# 若是图片行 ![alt](path),先插入一段说明文字,图片需在飞书内上传后插入
if seg.startswith("!["):
m = re.match(r'!\[([^\]]*)\]\(([^)]+)\)', seg)
if m:
alt, path = m.group(1), m.group(2)
blocks.append({
"block_type": 2,
"text": {
"elements": [{"type": "text_run", "text_run": {"content": f"[图片: {alt}]", "style": {}}}]
},
})
continue
# 普通段落:写入飞书时去掉 **,避免页面出现星号
lines = seg.split("\n")
for line in lines:
line = line.strip()
if not line:
continue
blocks.append({
"block_type": 2,
"text": {
"elements": [{"type": "text_run", "text_run": {"content": _strip_md_bold(line) + "\n", "style": {}}}]
},
})
return blocks
def main():
load_env()
parser = argparse.ArgumentParser(description="上传书稿到飞书知识库")
parser.add_argument("--dry-run", action="store_true", help="仅检查配置与文件,不上传")
parser.add_argument("--only", default="", help="仅上传指定节,如 4.6")
parser.add_argument("--full", action="store_true", help="按目录结构上传全书,同名则覆盖该页")
args = parser.parse_args()
app_id = os.environ.get("FEISHU_APP_ID", "").strip()
app_secret = os.environ.get("FEISHU_APP_SECRET", "").strip()
node_token = os.environ.get("FEISHU_WIKI_NODE_TOKEN", WIKI_NODE_TOKEN).strip()
if args.dry_run:
print("dry-run: 检查本地文件与目标飞书链接。")
print(" 飞书链接: https://cunkebao.feishu.cn/wiki/FNP6wdvNKij7yMkb3xCce0CYnpd")
print(f" 书稿根目录: {BOOK_ROOT} (存在={BOOK_ROOT.exists()})")
if args.full:
entries = build_book_entries(BOOK_ROOT)
print(f" 全书条目数: {len(entries)}(含目录页)")
for i, (pk, t, p) in enumerate(entries[:12]):
print(f" [{i+1}] {pk!r} / {t!r} {'-> ' + str(p) if p else '(目录)'}")
if len(entries) > 12:
print(f" ... 等共 {len(entries)}")
else:
section_path = BOOK_ROOT / "第二篇|真实的行业" / "第4章内容商业篇" / "4.6 Soul被封号了解决方案和干货.md"
assets_dir = section_path.parent / "assets"
print(f" 4.6 正文: {section_path} (存在={section_path.exists()})")
print(f" 4.6 图片目录: {assets_dir} (存在={assets_dir.exists()})")
if assets_dir.exists():
for f in assets_dir.iterdir():
if f.suffix.lower() in (".png", ".jpg", ".jpeg", ".gif"):
print(f" - {f.name}")
if not app_id or not app_secret:
print(" 未配置 FEISHU_APP_ID/FEISHU_APP_SECRET实际上传前请在 scripts/.env.feishu 中配置。")
return 0
if not app_id or not app_secret:
print("错误: 未配置飞书应用凭证。")
print("请设置环境变量 FEISHU_APP_ID、FEISHU_APP_SECRET或在 scripts/.env.feishu 中配置。")
print("飞书开放平台: https://open.feishu.cn/app 创建应用并开通「知识库」「云文档」权限,")
print("将应用添加为知识库成员后,把 App ID 与 App Secret 填入 .env.feishu。")
sys.exit(1)
tenant_token = get_tenant_access_token(app_id, app_secret)
# 知识库创建节点需编辑权限:优先用用户 token过期则用 app 凭证刷新)
user_token = get_user_token(app_id, app_secret)
token = user_token if user_token else tenant_token
if user_token:
print("使用用户 token 操作知识库")
node = get_node_info(token, node_token)
space_id = node["space_id"]
parent_token = node_token
print(f"知识库 space_id: {space_id}, 父节点: {parent_token}")
write_token = user_token if user_token else tenant_token
cache = {"": parent_token} # parent_key -> node_token根用 FEISHU_WIKI_NODE_TOKEN
# 仅上传 4.6
if args.only == "4.6":
section_path = BOOK_ROOT / "第二篇|真实的行业" / "第4章内容商业篇" / "4.6 Soul被封号了解决方案和干货.md"
assets_dir = section_path.parent / "assets"
if not section_path.exists():
print(f"文件不存在: {section_path}")
sys.exit(1)
content = section_path.read_text(encoding="utf-8")
title = "4.6 Soul被封号了如何处理"
_, doc_id, created = get_or_create_node(token, space_id, parent_token, title)
blocks = text_to_docx_blocks(content, assets_dir)
write_docx_content(write_token, doc_id, blocks, overwrite=True)
print(f"已同步子页面: {title}, document_id={doc_id}" + (" (新建)" if created else " (覆盖)"))
print("图片需在飞书该文档内手动上传并插入到 [图片: xxx] 位置;本地路径:", list(assets_dir.iterdir()) if assets_dir.exists() else [])
doc_url = f"https://cunkebao.feishu.cn/docx/{doc_id}"
webbrowser.open(doc_url)
print(f"已打开: {doc_url}")
return 0
# 仅上传 112场第9章下
if args.only == "112场" or "112" in args.only:
section_path = BOOK_ROOT / "第四篇|真实的赚钱" / "第9章我在Soul上亲访的赚钱案例" / "第112场一个人起头维权挣了大半套房.md"
if not section_path.exists():
print(f"文件不存在: {section_path}")
sys.exit(1)
entries = build_book_entries(BOOK_ROOT)
target = next((e for e in entries if e[1] and "112场" in e[1]), None)
if not target:
print("未在全书条目中找到 112场")
sys.exit(1)
parent_key, title, md_path = target
if not md_path:
print("112场 对应的是目录项,无正文")
sys.exit(1)
# 先确保父链存在并写入 cache
parts = [p for p in parent_key.split("/") if p]
for i in range(len(parts) + 1):
pk = "/".join(parts[:i]) if i else ""
p_token = cache.get(pk)
if p_token is None:
print(f" 跳过(父未就绪): {pk!r}")
continue
need_title = parts[i] if i < len(parts) else title
if i < len(parts):
node_tok, _, _ = get_or_create_node(token, space_id, p_token, need_title)
cache[pk + "/" + need_title if pk else need_title] = node_tok
p_token = cache.get(parent_key)
if p_token is None:
print("父节点未解析到,请先执行 --full 一次或检查目录结构")
sys.exit(1)
_, doc_id, created = get_or_create_node(token, space_id, p_token, title)
content = section_path.read_text(encoding="utf-8")
assets_dir = section_path.parent / "assets"
blocks = text_to_docx_blocks(content, assets_dir)
write_docx_content(write_token, doc_id, blocks, overwrite=True)
print(f"已同步子页面: {title}, document_id={doc_id}" + (" (新建)" if created else " (覆盖)"))
doc_url = f"https://cunkebao.feishu.cn/docx/{doc_id}"
webbrowser.open(doc_url)
print(f"已打开: {doc_url}")
return 0
# 全书上传:按目录建节点,有同名则复用并覆盖正文
if args.full:
entries = build_book_entries(BOOK_ROOT)
print(f"全书共 {len(entries)} 项(含目录页),开始同步…")
created_count = 0
updated_count = 0
for parent_key, title, md_path in entries:
p_token = cache.get(parent_key)
if p_token is None:
print(f" 跳过(父未就绪): {parent_key!r} / {title!r}")
continue
node_token, obj_token, created = get_or_create_node(token, space_id, p_token, title)
current_key = f"{parent_key}/{title}" if parent_key else title
cache[current_key] = node_token
if md_path is not None:
full_path = BOOK_ROOT / md_path
full_path = BOOK_ROOT / (md_path if isinstance(md_path, str) else str(md_path))
if not full_path.exists():
print(f" 跳过(文件不存在): {md_path}")
continue
try:
content = full_path.read_text(encoding="utf-8")
except Exception as e:
print(f" 跳过(读文件失败): {md_path} -> {e}")
continue
assets_dir = full_path.parent / "assets"
blocks = text_to_docx_blocks(content, assets_dir)
write_docx_content(write_token, obj_token, blocks, overwrite=True)
if created:
created_count += 1
else:
updated_count += 1
print(f" 写入: {current_key}")
else:
if created:
created_count += 1
print(f" 目录: {current_key}")
print(f"完成。新建 {created_count} 个页面,覆盖 {updated_count} 个页面。")
return 0
print("请使用 --only 4.6 或 --full 指定上传范围。")
sys.exit(0)
if __name__ == "__main__":
main()