565 lines
23 KiB
Python
565 lines
23 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
将《一场soul的创业实验》全书(含图片)上传到飞书知识库子节点下。
|
|||
|
|
|
|||
|
|
飞书知识库链接:https://cunkebao.feishu.cn/wiki/FNP6wdvNKij7yMkb3xCce0CYnpd
|
|||
|
|
需在该链接对应的「创业实验」节点下创建子页面并写入内容。
|
|||
|
|
|
|||
|
|
环境变量(必填):
|
|||
|
|
FEISHU_APP_ID 飞书应用 App ID(找卡若AI拿:卡若AI/02_卡人(水)/水桥_平台对接/飞书管理/)
|
|||
|
|
FEISHU_APP_SECRET 飞书应用 App Secret(同上)
|
|||
|
|
FEISHU_WIKI_NODE_TOKEN 知识库父节点 token,即链接中的 ID:FNP6wdvNKij7yMkb3xCce0CYnpd
|
|||
|
|
|
|||
|
|
可选:将上述变量写在 scripts/.env.feishu 中(每行 KEY=VALUE),本脚本会自动加载。
|
|||
|
|
|
|||
|
|
权限要求:应用需加入该知识库为成员(管理员),并开通 知识库、云文档 权限。
|
|||
|
|
|
|||
|
|
用法:
|
|||
|
|
python3 feishu_wiki_upload.py [--dry-run] [--only 4.6]
|
|||
|
|
python3 feishu_wiki_upload.py --full 按目录结构上传全书(同名节点复用,不重复建)
|
|||
|
|
--dry-run 仅检查配置与本地文件,不上传
|
|||
|
|
--only 4.6 仅上传 4.6 一节(用于测试)
|
|||
|
|
--full 全书同步:建齐目录层级,有同名则覆盖该页正文
|
|||
|
|
"""
|
|||
|
|
import argparse
|
|||
|
|
import json
|
|||
|
|
import os
|
|||
|
|
import re
|
|||
|
|
import sys
|
|||
|
|
import time
|
|||
|
|
import webbrowser
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
import requests
|
|||
|
|
except ImportError:
|
|||
|
|
print("请安装 requests: pip install requests")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
# 卡若AI 飞书用户 token 路径(用户身份有知识库编辑权限时用)
|
|||
|
|
FEISHU_USER_TOKENS_JSON = Path("/Users/karuo/Documents/个人/卡若AI/02_卡人(水)/水桥_平台对接/飞书管理/脚本/.feishu_tokens.json")
|
|||
|
|
|
|||
|
|
def load_env():
|
|||
|
|
env_path = Path(__file__).resolve().parent / ".env.feishu"
|
|||
|
|
if env_path.exists():
|
|||
|
|
for line in env_path.read_text(encoding="utf-8").splitlines():
|
|||
|
|
line = line.strip()
|
|||
|
|
if line and not line.startswith("#") and "=" in line:
|
|||
|
|
k, v = line.split("=", 1)
|
|||
|
|
os.environ.setdefault(k.strip(), v.strip().replace('"', "").replace("'", ""))
|
|||
|
|
|
|||
|
|
def _refresh_user_token(app_id: str, app_secret: str) -> str:
|
|||
|
|
"""用 app 凭证 + 卡若AI .feishu_tokens.json 的 refresh_token 刷新用户 token 并写回文件。"""
|
|||
|
|
if not FEISHU_USER_TOKENS_JSON.exists():
|
|||
|
|
return ""
|
|||
|
|
try:
|
|||
|
|
d = json.loads(FEISHU_USER_TOKENS_JSON.read_text(encoding="utf-8"))
|
|||
|
|
refresh = (d.get("refresh_token") or "").strip()
|
|||
|
|
if not refresh:
|
|||
|
|
return ""
|
|||
|
|
except Exception:
|
|||
|
|
return ""
|
|||
|
|
r = requests.post(
|
|||
|
|
"https://open.feishu.cn/open-apis/auth/v3/app_access_token/internal",
|
|||
|
|
json={"app_id": app_id, "app_secret": app_secret},
|
|||
|
|
timeout=10,
|
|||
|
|
)
|
|||
|
|
app_token = (r.json() or {}).get("app_access_token")
|
|||
|
|
if not app_token:
|
|||
|
|
return ""
|
|||
|
|
r2 = requests.post(
|
|||
|
|
"https://open.feishu.cn/open-apis/authen/v1/oidc/refresh_access_token",
|
|||
|
|
headers={"Authorization": f"Bearer {app_token}", "Content-Type": "application/json"},
|
|||
|
|
json={"grant_type": "refresh_token", "refresh_token": refresh},
|
|||
|
|
timeout=10,
|
|||
|
|
)
|
|||
|
|
out = r2.json() or {}
|
|||
|
|
if out.get("code") != 0:
|
|||
|
|
return ""
|
|||
|
|
data = out.get("data") or {}
|
|||
|
|
new_access = (data.get("access_token") or "").strip()
|
|||
|
|
new_refresh = (data.get("refresh_token") or "").strip() or refresh
|
|||
|
|
if not new_access:
|
|||
|
|
return ""
|
|||
|
|
d["access_token"] = new_access
|
|||
|
|
d["refresh_token"] = new_refresh
|
|||
|
|
FEISHU_USER_TOKENS_JSON.write_text(json.dumps(d, ensure_ascii=False, indent=2), encoding="utf-8")
|
|||
|
|
return new_access
|
|||
|
|
|
|||
|
|
|
|||
|
|
def get_user_token(app_id: str = "", app_secret: str = "") -> str:
|
|||
|
|
"""优先用用户 token(知识库编辑通常需用户身份);有 app 凭证时先刷新再返回。"""
|
|||
|
|
if os.environ.get("FEISHU_USER_TOKEN"):
|
|||
|
|
return os.environ["FEISHU_USER_TOKEN"].strip()
|
|||
|
|
if not FEISHU_USER_TOKENS_JSON.exists():
|
|||
|
|
return ""
|
|||
|
|
try:
|
|||
|
|
d = json.loads(FEISHU_USER_TOKENS_JSON.read_text(encoding="utf-8"))
|
|||
|
|
if app_id and app_secret and d.get("refresh_token"):
|
|||
|
|
refreshed = _refresh_user_token(app_id, app_secret)
|
|||
|
|
if refreshed:
|
|||
|
|
return refreshed
|
|||
|
|
return (d.get("access_token") or "").strip()
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
return ""
|
|||
|
|
|
|||
|
|
|
|||
|
|
BASE = "https://open.feishu.cn/open-apis"
|
|||
|
|
WIKI_NODE_TOKEN = "FNP6wdvNKij7yMkb3xCce0CYnpd"
|
|||
|
|
BOOK_ROOT = Path(os.environ.get("SOUL_BOOK_ROOT", "/Users/karuo/Documents/个人/2、我写的书/《一场soul的创业实验》"))
|
|||
|
|
|
|||
|
|
|
|||
|
|
def get_tenant_access_token(app_id: str, app_secret: str) -> str:
|
|||
|
|
r = requests.post(
|
|||
|
|
f"{BASE}/auth/v3/tenant_access_token/internal",
|
|||
|
|
json={"app_id": app_id, "app_secret": app_secret},
|
|||
|
|
timeout=10,
|
|||
|
|
)
|
|||
|
|
d = r.json()
|
|||
|
|
if d.get("code") != 0:
|
|||
|
|
raise RuntimeError(f"获取 tenant_access_token 失败: {d}")
|
|||
|
|
return d["tenant_access_token"]
|
|||
|
|
|
|||
|
|
|
|||
|
|
def get_node_info(token: str, node_token: str) -> dict:
|
|||
|
|
r = requests.get(
|
|||
|
|
f"{BASE}/wiki/v2/spaces/get_node",
|
|||
|
|
params={"token": node_token},
|
|||
|
|
headers={"Authorization": f"Bearer {token}"},
|
|||
|
|
timeout=10,
|
|||
|
|
)
|
|||
|
|
d = r.json()
|
|||
|
|
if d.get("code") != 0:
|
|||
|
|
raise RuntimeError(f"获取节点信息失败: {d}")
|
|||
|
|
return d["data"]["node"]
|
|||
|
|
|
|||
|
|
|
|||
|
|
def list_wiki_children(token: str, space_id: str, parent_node_token: str) -> list:
|
|||
|
|
"""列出某节点下直接子节点,返回 [{node_token, obj_token, title}, ...]。"""
|
|||
|
|
out = []
|
|||
|
|
page_token = None
|
|||
|
|
while True:
|
|||
|
|
params = {"parent_node_token": parent_node_token, "page_size": 50}
|
|||
|
|
if page_token:
|
|||
|
|
params["page_token"] = page_token
|
|||
|
|
r = requests.get(
|
|||
|
|
f"{BASE}/wiki/v2/spaces/{space_id}/nodes",
|
|||
|
|
headers={"Authorization": f"Bearer {token}"},
|
|||
|
|
params=params,
|
|||
|
|
timeout=15,
|
|||
|
|
)
|
|||
|
|
d = r.json()
|
|||
|
|
if d.get("code") != 0:
|
|||
|
|
raise RuntimeError(f"列出子节点失败: {d}")
|
|||
|
|
data = d.get("data") or {}
|
|||
|
|
for n in data.get("items") or []:
|
|||
|
|
out.append({
|
|||
|
|
"node_token": n.get("node_token"),
|
|||
|
|
"obj_token": n.get("obj_token"),
|
|||
|
|
"title": (n.get("title") or "").strip() or "未命名",
|
|||
|
|
})
|
|||
|
|
page_token = data.get("page_token")
|
|||
|
|
if not page_token:
|
|||
|
|
break
|
|||
|
|
time.sleep(0.15)
|
|||
|
|
return out
|
|||
|
|
|
|||
|
|
|
|||
|
|
def get_or_create_node(token: str, space_id: str, parent_node_token: str, title: str) -> tuple:
|
|||
|
|
"""在 parent 下获取或创建标题为 title 的节点。返回 (node_token, obj_token, created: bool)。同名则复用。"""
|
|||
|
|
children = list_wiki_children(token, space_id, parent_node_token)
|
|||
|
|
for c in children:
|
|||
|
|
if (c.get("title") or "").strip() == title.strip():
|
|||
|
|
return (c["node_token"], c["obj_token"], False)
|
|||
|
|
node = create_wiki_node(token, space_id, parent_node_token, title)
|
|||
|
|
return (node["node_token"], node["obj_token"], True)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def create_wiki_node(token: str, space_id: str, parent_node_token: str, title: str, obj_type: str = "docx") -> dict:
|
|||
|
|
r = requests.post(
|
|||
|
|
f"{BASE}/wiki/v2/spaces/{space_id}/nodes",
|
|||
|
|
headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
|
|||
|
|
json={
|
|||
|
|
"obj_type": obj_type,
|
|||
|
|
"node_type": "origin",
|
|||
|
|
"parent_node_token": parent_node_token,
|
|||
|
|
"title": title,
|
|||
|
|
},
|
|||
|
|
timeout=10,
|
|||
|
|
)
|
|||
|
|
d = r.json()
|
|||
|
|
if d.get("code") != 0:
|
|||
|
|
raise RuntimeError(f"创建节点失败: {d}")
|
|||
|
|
return d["data"]["node"]
|
|||
|
|
|
|||
|
|
|
|||
|
|
def get_docx_block_children(token: str, document_id: str) -> list:
|
|||
|
|
"""获取文档块列表,用于得到根块 id(docx 新文档的 document_id 即可作为根块 id 使用)"""
|
|||
|
|
r = requests.get(
|
|||
|
|
f"{BASE}/docx/v1/documents/{document_id}/blocks",
|
|||
|
|
params={"document_revision_id": -1, "page_size": 50},
|
|||
|
|
headers={"Authorization": f"Bearer {token}"},
|
|||
|
|
timeout=10,
|
|||
|
|
)
|
|||
|
|
d = r.json()
|
|||
|
|
if d.get("code") != 0:
|
|||
|
|
raise RuntimeError(f"获取文档块失败: {d}")
|
|||
|
|
return d.get("data", {}).get("items", [])
|
|||
|
|
|
|||
|
|
|
|||
|
|
DOCX_CHILDREN_BATCH = 50 # 飞书单次创建子块上限
|
|||
|
|
|
|||
|
|
def create_docx_block_children(token: str, document_id: str, block_id: str, children: list, index: int = 0) -> dict:
|
|||
|
|
"""在指定块下创建子块。children 格式见飞书 docx 创建块 API;单次最多 50 个。"""
|
|||
|
|
r = requests.post(
|
|||
|
|
f"{BASE}/docx/v1/documents/{document_id}/blocks/{block_id}/children",
|
|||
|
|
params={"document_revision_id": -1},
|
|||
|
|
headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
|
|||
|
|
json={"children": children, "index": index},
|
|||
|
|
timeout=10,
|
|||
|
|
)
|
|||
|
|
d = r.json()
|
|||
|
|
if d.get("code") != 0:
|
|||
|
|
raise RuntimeError(f"创建块失败: {d}")
|
|||
|
|
return d.get("data", {})
|
|||
|
|
|
|||
|
|
|
|||
|
|
def create_docx_block_children_batched(token: str, document_id: str, block_id: str, children: list) -> None:
|
|||
|
|
"""分批创建子块(每批最多 DOCX_CHILDREN_BATCH 个)。"""
|
|||
|
|
for i in range(0, len(children), DOCX_CHILDREN_BATCH):
|
|||
|
|
chunk = children[i : i + DOCX_CHILDREN_BATCH]
|
|||
|
|
create_docx_block_children(token, document_id, block_id, chunk, index=i)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def clear_docx_children(token: str, document_id: str) -> bool:
|
|||
|
|
"""清空文档根块下所有直接子块(用于同名页覆盖正文)。"""
|
|||
|
|
all_items = []
|
|||
|
|
page_token = None
|
|||
|
|
while True:
|
|||
|
|
params = {"document_revision_id": -1, "page_size": 200}
|
|||
|
|
if page_token:
|
|||
|
|
params["page_token"] = page_token
|
|||
|
|
r = requests.get(
|
|||
|
|
f"{BASE}/docx/v1/documents/{document_id}/blocks",
|
|||
|
|
headers={"Authorization": f"Bearer {token}"},
|
|||
|
|
params=params,
|
|||
|
|
timeout=15,
|
|||
|
|
)
|
|||
|
|
d = r.json()
|
|||
|
|
if d.get("code") != 0:
|
|||
|
|
return False
|
|||
|
|
data = d.get("data") or {}
|
|||
|
|
all_items.extend(data.get("items") or [])
|
|||
|
|
page_token = data.get("page_token")
|
|||
|
|
if not page_token:
|
|||
|
|
break
|
|||
|
|
child_ids = [b["block_id"] for b in all_items if b.get("parent_id") == document_id and b.get("block_id")]
|
|||
|
|
if not child_ids:
|
|||
|
|
return True
|
|||
|
|
for i in range(0, len(child_ids), 50):
|
|||
|
|
batch = child_ids[i : i + 50]
|
|||
|
|
rd = requests.delete(
|
|||
|
|
f"{BASE}/docx/v1/documents/{document_id}/blocks/{document_id}/children/batch_delete",
|
|||
|
|
headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
|
|||
|
|
json={"block_id_list": batch},
|
|||
|
|
timeout=15,
|
|||
|
|
)
|
|||
|
|
if (rd.json() or {}).get("code") != 0:
|
|||
|
|
return False
|
|||
|
|
time.sleep(0.1)
|
|||
|
|
return True
|
|||
|
|
|
|||
|
|
|
|||
|
|
def write_docx_content(token: str, doc_id: str, blocks: list, overwrite: bool = True) -> None:
|
|||
|
|
"""向文档写入正文。若 overwrite 且已有子块则先清空再写。"""
|
|||
|
|
if overwrite:
|
|||
|
|
clear_docx_children(token, doc_id)
|
|||
|
|
create_docx_block_children_batched(token, doc_id, doc_id, blocks)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# 节标题与文件名不一致时在此指定飞书页面标题(避免重复建页)
|
|||
|
|
TITLE_OVERRIDE = {"4.6 Soul被封号了:解决方案和干货": "4.6 Soul被封号了:如何处理?"}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _normalize_part(name: str) -> str:
|
|||
|
|
"""目录名转飞书页名:_第一篇|真实的人 -> 第一篇|真实的人"""
|
|||
|
|
s = (name or "").strip()
|
|||
|
|
if s.startswith("_"):
|
|||
|
|
s = s[1:]
|
|||
|
|
return s
|
|||
|
|
|
|||
|
|
|
|||
|
|
def build_book_entries(book_root: Path) -> list:
|
|||
|
|
"""按飞书目录顺序生成 (parent_key, title, md_path)。parent_key 为空表示根下。"""
|
|||
|
|
book_root = book_root.resolve()
|
|||
|
|
dirs_seen = set() # (parent_key, title) 已加入的目录
|
|||
|
|
entries = [] # (parent_key, title, md_path or None)
|
|||
|
|
|
|||
|
|
def ensure_dir(parent_key: str, title: str) -> None:
|
|||
|
|
k = (parent_key, title)
|
|||
|
|
if k in dirs_seen:
|
|||
|
|
return
|
|||
|
|
dirs_seen.add(k)
|
|||
|
|
entries.append((parent_key, title, None))
|
|||
|
|
|
|||
|
|
for rel in sorted(book_root.rglob("*.md"), key=lambda p: str(p)):
|
|||
|
|
try:
|
|||
|
|
rel = rel.relative_to(book_root)
|
|||
|
|
except ValueError:
|
|||
|
|
continue
|
|||
|
|
if rel.name.startswith(".") or rel.stem == "飞书同步说明":
|
|||
|
|
continue
|
|||
|
|
parts = rel.parts
|
|||
|
|
name_stem = rel.stem
|
|||
|
|
title = TITLE_OVERRIDE.get(name_stem, name_stem)
|
|||
|
|
if len(parts) == 1:
|
|||
|
|
ensure_dir("", title)
|
|||
|
|
entries.append(("", title, rel))
|
|||
|
|
continue
|
|||
|
|
# 多级:第一篇/第1章/1.1.md 或 第二篇/第4章/4.6.md
|
|||
|
|
p0 = _normalize_part(parts[0])
|
|||
|
|
if len(parts) == 2:
|
|||
|
|
ensure_dir("", p0)
|
|||
|
|
entries.append((p0, title, rel))
|
|||
|
|
continue
|
|||
|
|
p1 = parts[1]
|
|||
|
|
ensure_dir("", p0)
|
|||
|
|
ensure_dir(p0, p1)
|
|||
|
|
if len(parts) == 3:
|
|||
|
|
entries.append((f"{p0}/{p1}", title, rel))
|
|||
|
|
continue
|
|||
|
|
# 附录/附录1|...md
|
|||
|
|
p2 = parts[2]
|
|||
|
|
ensure_dir("", p0)
|
|||
|
|
ensure_dir(p0, p1)
|
|||
|
|
ensure_dir(f"{p0}/{p1}", p2)
|
|||
|
|
entries.append((f"{p0}/{p1}/{p2}", title, rel))
|
|||
|
|
|
|||
|
|
# 保证顺序:先所有「目录」再带文件的;同 parent 下按 title 排
|
|||
|
|
dir_entries = [(pk, t, None) for (pk, t, p) in entries if p is None]
|
|||
|
|
file_entries = [(pk, t, p) for (pk, t, p) in entries if p is not None]
|
|||
|
|
# 目录按层级排:根下先,再一层子,再二层…
|
|||
|
|
def depth(k):
|
|||
|
|
return len([x for x in k.split("/") if x]) if k else 0
|
|||
|
|
|
|||
|
|
dir_entries.sort(key=lambda x: (depth(x[0]), x[0], x[1]))
|
|||
|
|
file_entries.sort(key=lambda x: (x[0], x[1]))
|
|||
|
|
seen_dir = set()
|
|||
|
|
out = []
|
|||
|
|
for pk, t, _ in dir_entries:
|
|||
|
|
if (pk, t) in seen_dir:
|
|||
|
|
continue
|
|||
|
|
seen_dir.add((pk, t))
|
|||
|
|
out.append((pk, t, None))
|
|||
|
|
for pk, t, p in file_entries:
|
|||
|
|
out.append((pk, t, p))
|
|||
|
|
return out
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _strip_md_bold(text: str) -> str:
|
|||
|
|
"""去掉 Markdown 粗体星号,飞书正文不保留 **。"""
|
|||
|
|
return re.sub(r"\*\*", "", text)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def text_to_docx_blocks(md_text: str, assets_dir: Path) -> list:
|
|||
|
|
"""将 markdown 转为 docx 子块列表(仅文本块;图片占位为文本说明;正文去掉 ** 等星号)。"""
|
|||
|
|
blocks = []
|
|||
|
|
# 按双换行分段,每段一个文本块
|
|||
|
|
segments = re.split(r"\n\n+", md_text)
|
|||
|
|
for seg in segments:
|
|||
|
|
seg = seg.strip()
|
|||
|
|
if not seg:
|
|||
|
|
continue
|
|||
|
|
# 若是图片行 ,先插入一段说明文字,图片需在飞书内上传后插入
|
|||
|
|
if seg.startswith("!["):
|
|||
|
|
m = re.match(r'!\[([^\]]*)\]\(([^)]+)\)', seg)
|
|||
|
|
if m:
|
|||
|
|
alt, path = m.group(1), m.group(2)
|
|||
|
|
blocks.append({
|
|||
|
|
"block_type": 2,
|
|||
|
|
"text": {
|
|||
|
|
"elements": [{"type": "text_run", "text_run": {"content": f"[图片: {alt}]", "style": {}}}]
|
|||
|
|
},
|
|||
|
|
})
|
|||
|
|
continue
|
|||
|
|
# 普通段落:写入飞书时去掉 **,避免页面出现星号
|
|||
|
|
lines = seg.split("\n")
|
|||
|
|
for line in lines:
|
|||
|
|
line = line.strip()
|
|||
|
|
if not line:
|
|||
|
|
continue
|
|||
|
|
blocks.append({
|
|||
|
|
"block_type": 2,
|
|||
|
|
"text": {
|
|||
|
|
"elements": [{"type": "text_run", "text_run": {"content": _strip_md_bold(line) + "\n", "style": {}}}]
|
|||
|
|
},
|
|||
|
|
})
|
|||
|
|
return blocks
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
load_env()
|
|||
|
|
parser = argparse.ArgumentParser(description="上传书稿到飞书知识库")
|
|||
|
|
parser.add_argument("--dry-run", action="store_true", help="仅检查配置与文件,不上传")
|
|||
|
|
parser.add_argument("--only", default="", help="仅上传指定节,如 4.6")
|
|||
|
|
parser.add_argument("--full", action="store_true", help="按目录结构上传全书,同名则覆盖该页")
|
|||
|
|
args = parser.parse_args()
|
|||
|
|
|
|||
|
|
app_id = os.environ.get("FEISHU_APP_ID", "").strip()
|
|||
|
|
app_secret = os.environ.get("FEISHU_APP_SECRET", "").strip()
|
|||
|
|
node_token = os.environ.get("FEISHU_WIKI_NODE_TOKEN", WIKI_NODE_TOKEN).strip()
|
|||
|
|
|
|||
|
|
if args.dry_run:
|
|||
|
|
print("dry-run: 检查本地文件与目标飞书链接。")
|
|||
|
|
print(" 飞书链接: https://cunkebao.feishu.cn/wiki/FNP6wdvNKij7yMkb3xCce0CYnpd")
|
|||
|
|
print(f" 书稿根目录: {BOOK_ROOT} (存在={BOOK_ROOT.exists()})")
|
|||
|
|
if args.full:
|
|||
|
|
entries = build_book_entries(BOOK_ROOT)
|
|||
|
|
print(f" 全书条目数: {len(entries)}(含目录页)")
|
|||
|
|
for i, (pk, t, p) in enumerate(entries[:12]):
|
|||
|
|
print(f" [{i+1}] {pk!r} / {t!r} {'-> ' + str(p) if p else '(目录)'}")
|
|||
|
|
if len(entries) > 12:
|
|||
|
|
print(f" ... 等共 {len(entries)} 项")
|
|||
|
|
else:
|
|||
|
|
section_path = BOOK_ROOT / "第二篇|真实的行业" / "第4章|内容商业篇" / "4.6 Soul被封号了:解决方案和干货.md"
|
|||
|
|
assets_dir = section_path.parent / "assets"
|
|||
|
|
print(f" 4.6 正文: {section_path} (存在={section_path.exists()})")
|
|||
|
|
print(f" 4.6 图片目录: {assets_dir} (存在={assets_dir.exists()})")
|
|||
|
|
if assets_dir.exists():
|
|||
|
|
for f in assets_dir.iterdir():
|
|||
|
|
if f.suffix.lower() in (".png", ".jpg", ".jpeg", ".gif"):
|
|||
|
|
print(f" - {f.name}")
|
|||
|
|
if not app_id or not app_secret:
|
|||
|
|
print(" 未配置 FEISHU_APP_ID/FEISHU_APP_SECRET,实际上传前请在 scripts/.env.feishu 中配置。")
|
|||
|
|
return 0
|
|||
|
|
|
|||
|
|
if not app_id or not app_secret:
|
|||
|
|
print("错误: 未配置飞书应用凭证。")
|
|||
|
|
print("请设置环境变量 FEISHU_APP_ID、FEISHU_APP_SECRET,或在 scripts/.env.feishu 中配置。")
|
|||
|
|
print("飞书开放平台: https://open.feishu.cn/app 创建应用并开通「知识库」「云文档」权限,")
|
|||
|
|
print("将应用添加为知识库成员后,把 App ID 与 App Secret 填入 .env.feishu。")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
tenant_token = get_tenant_access_token(app_id, app_secret)
|
|||
|
|
# 知识库创建节点需编辑权限:优先用用户 token(过期则用 app 凭证刷新)
|
|||
|
|
user_token = get_user_token(app_id, app_secret)
|
|||
|
|
token = user_token if user_token else tenant_token
|
|||
|
|
if user_token:
|
|||
|
|
print("使用用户 token 操作知识库")
|
|||
|
|
node = get_node_info(token, node_token)
|
|||
|
|
space_id = node["space_id"]
|
|||
|
|
parent_token = node_token
|
|||
|
|
print(f"知识库 space_id: {space_id}, 父节点: {parent_token}")
|
|||
|
|
|
|||
|
|
write_token = user_token if user_token else tenant_token
|
|||
|
|
cache = {"": parent_token} # parent_key -> node_token(根用 FEISHU_WIKI_NODE_TOKEN)
|
|||
|
|
|
|||
|
|
# 仅上传 4.6
|
|||
|
|
if args.only == "4.6":
|
|||
|
|
section_path = BOOK_ROOT / "第二篇|真实的行业" / "第4章|内容商业篇" / "4.6 Soul被封号了:解决方案和干货.md"
|
|||
|
|
assets_dir = section_path.parent / "assets"
|
|||
|
|
if not section_path.exists():
|
|||
|
|
print(f"文件不存在: {section_path}")
|
|||
|
|
sys.exit(1)
|
|||
|
|
content = section_path.read_text(encoding="utf-8")
|
|||
|
|
title = "4.6 Soul被封号了:如何处理?"
|
|||
|
|
_, doc_id, created = get_or_create_node(token, space_id, parent_token, title)
|
|||
|
|
blocks = text_to_docx_blocks(content, assets_dir)
|
|||
|
|
write_docx_content(write_token, doc_id, blocks, overwrite=True)
|
|||
|
|
print(f"已同步子页面: {title}, document_id={doc_id}" + (" (新建)" if created else " (覆盖)"))
|
|||
|
|
print("图片需在飞书该文档内手动上传并插入到 [图片: xxx] 位置;本地路径:", list(assets_dir.iterdir()) if assets_dir.exists() else [])
|
|||
|
|
doc_url = f"https://cunkebao.feishu.cn/docx/{doc_id}"
|
|||
|
|
webbrowser.open(doc_url)
|
|||
|
|
print(f"已打开: {doc_url}")
|
|||
|
|
return 0
|
|||
|
|
|
|||
|
|
# 仅上传 112场(第9章下)
|
|||
|
|
if args.only == "112场" or "112" in args.only:
|
|||
|
|
section_path = BOOK_ROOT / "第四篇|真实的赚钱" / "第9章|我在Soul上亲访的赚钱案例" / "第112场|一个人起头,维权挣了大半套房.md"
|
|||
|
|
if not section_path.exists():
|
|||
|
|
print(f"文件不存在: {section_path}")
|
|||
|
|
sys.exit(1)
|
|||
|
|
entries = build_book_entries(BOOK_ROOT)
|
|||
|
|
target = next((e for e in entries if e[1] and "112场" in e[1]), None)
|
|||
|
|
if not target:
|
|||
|
|
print("未在全书条目中找到 112场")
|
|||
|
|
sys.exit(1)
|
|||
|
|
parent_key, title, md_path = target
|
|||
|
|
if not md_path:
|
|||
|
|
print("112场 对应的是目录项,无正文")
|
|||
|
|
sys.exit(1)
|
|||
|
|
# 先确保父链存在并写入 cache
|
|||
|
|
parts = [p for p in parent_key.split("/") if p]
|
|||
|
|
for i in range(len(parts) + 1):
|
|||
|
|
pk = "/".join(parts[:i]) if i else ""
|
|||
|
|
p_token = cache.get(pk)
|
|||
|
|
if p_token is None:
|
|||
|
|
print(f" 跳过(父未就绪): {pk!r}")
|
|||
|
|
continue
|
|||
|
|
need_title = parts[i] if i < len(parts) else title
|
|||
|
|
if i < len(parts):
|
|||
|
|
node_tok, _, _ = get_or_create_node(token, space_id, p_token, need_title)
|
|||
|
|
cache[pk + "/" + need_title if pk else need_title] = node_tok
|
|||
|
|
p_token = cache.get(parent_key)
|
|||
|
|
if p_token is None:
|
|||
|
|
print("父节点未解析到,请先执行 --full 一次或检查目录结构")
|
|||
|
|
sys.exit(1)
|
|||
|
|
_, doc_id, created = get_or_create_node(token, space_id, p_token, title)
|
|||
|
|
content = section_path.read_text(encoding="utf-8")
|
|||
|
|
assets_dir = section_path.parent / "assets"
|
|||
|
|
blocks = text_to_docx_blocks(content, assets_dir)
|
|||
|
|
write_docx_content(write_token, doc_id, blocks, overwrite=True)
|
|||
|
|
print(f"已同步子页面: {title}, document_id={doc_id}" + (" (新建)" if created else " (覆盖)"))
|
|||
|
|
doc_url = f"https://cunkebao.feishu.cn/docx/{doc_id}"
|
|||
|
|
webbrowser.open(doc_url)
|
|||
|
|
print(f"已打开: {doc_url}")
|
|||
|
|
return 0
|
|||
|
|
|
|||
|
|
# 全书上传:按目录建节点,有同名则复用并覆盖正文
|
|||
|
|
if args.full:
|
|||
|
|
entries = build_book_entries(BOOK_ROOT)
|
|||
|
|
print(f"全书共 {len(entries)} 项(含目录页),开始同步…")
|
|||
|
|
created_count = 0
|
|||
|
|
updated_count = 0
|
|||
|
|
for parent_key, title, md_path in entries:
|
|||
|
|
p_token = cache.get(parent_key)
|
|||
|
|
if p_token is None:
|
|||
|
|
print(f" 跳过(父未就绪): {parent_key!r} / {title!r}")
|
|||
|
|
continue
|
|||
|
|
node_token, obj_token, created = get_or_create_node(token, space_id, p_token, title)
|
|||
|
|
current_key = f"{parent_key}/{title}" if parent_key else title
|
|||
|
|
cache[current_key] = node_token
|
|||
|
|
if md_path is not None:
|
|||
|
|
full_path = BOOK_ROOT / md_path
|
|||
|
|
full_path = BOOK_ROOT / (md_path if isinstance(md_path, str) else str(md_path))
|
|||
|
|
if not full_path.exists():
|
|||
|
|
print(f" 跳过(文件不存在): {md_path}")
|
|||
|
|
continue
|
|||
|
|
try:
|
|||
|
|
content = full_path.read_text(encoding="utf-8")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" 跳过(读文件失败): {md_path} -> {e}")
|
|||
|
|
continue
|
|||
|
|
assets_dir = full_path.parent / "assets"
|
|||
|
|
blocks = text_to_docx_blocks(content, assets_dir)
|
|||
|
|
write_docx_content(write_token, obj_token, blocks, overwrite=True)
|
|||
|
|
if created:
|
|||
|
|
created_count += 1
|
|||
|
|
else:
|
|||
|
|
updated_count += 1
|
|||
|
|
print(f" 写入: {current_key}")
|
|||
|
|
else:
|
|||
|
|
if created:
|
|||
|
|
created_count += 1
|
|||
|
|
print(f" 目录: {current_key}")
|
|||
|
|
print(f"完成。新建 {created_count} 个页面,覆盖 {updated_count} 个页面。")
|
|||
|
|
return 0
|
|||
|
|
|
|||
|
|
print("请使用 --only 4.6 或 --full 指定上传范围。")
|
|||
|
|
sys.exit(0)
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|