🔄 卡若AI 同步 2026-02-22 11:44 | 更新:水桥平台对接、卡木、运营中枢工作台 | 排除 >20MB: 8 个
This commit is contained in:
295
02_卡人(水)/水桥_平台对接/飞书管理/脚本/feishu_wiki_md_upload.py
Normal file
295
02_卡人(水)/水桥_平台对接/飞书管理/脚本/feishu_wiki_md_upload.py
Normal file
@@ -0,0 +1,295 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
直接将 Markdown 文件(含图片)上传到飞书 Wiki。
|
||||
不依赖 JSON,直接解析 .md 并转换为飞书 blocks。
|
||||
|
||||
用法:
|
||||
python3 feishu_wiki_md_upload.py /path/to/article.md
|
||||
python3 feishu_wiki_md_upload.py "/Users/karuo/Documents/个人/2、我写的日记/火:开发分享/卡若:基因胶囊——AI技能可遗传化的实现与落地.md"
|
||||
"""
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import requests
|
||||
from pathlib import Path
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
PARENT_TOKEN = "KNf7wA8Rki1NSdkkSIqcdFtTnWb"
|
||||
|
||||
sys.path.insert(0, str(SCRIPT_DIR))
|
||||
import feishu_wiki_create_doc as fwd
|
||||
|
||||
|
||||
def upload_image_to_doc(token: str, doc_token: str, img_path: Path) -> str | None:
|
||||
"""上传图片到飞书文档,返回 file_token"""
|
||||
if not img_path.exists():
|
||||
return None
|
||||
size = img_path.stat().st_size
|
||||
if size > 20 * 1024 * 1024:
|
||||
return None
|
||||
url = "https://open.feishu.cn/open-apis/drive/v1/medias/upload_all"
|
||||
with open(img_path, "rb") as f:
|
||||
files = {
|
||||
"file_name": (None, img_path.name),
|
||||
"parent_type": (None, "docx_image"),
|
||||
"parent_node": (None, doc_token),
|
||||
"size": (None, str(size)),
|
||||
"file": (img_path.name, f, "image/png"),
|
||||
}
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
r = requests.post(url, headers=headers, files=files, timeout=60)
|
||||
if r.json().get("code") == 0:
|
||||
return r.json().get("data", {}).get("file_token")
|
||||
return None
|
||||
|
||||
|
||||
def _text_block(t: str):
|
||||
return {"block_type": 2, "text": {"elements": [{"text_run": {"content": t, "text_element_style": {}}}], "style": {}}}
|
||||
|
||||
|
||||
def _h1(t: str):
|
||||
return {"block_type": 3, "heading1": {"elements": [{"text_run": {"content": t, "text_element_style": {}}}], "style": {}}}
|
||||
|
||||
|
||||
def _h2(t: str):
|
||||
return {"block_type": 4, "heading2": {"elements": [{"text_run": {"content": t, "text_element_style": {}}}], "style": {}}}
|
||||
|
||||
|
||||
def _h3(t: str):
|
||||
return {"block_type": 5, "heading3": {"elements": [{"text_run": {"content": t, "text_element_style": {}}}], "style": {}}}
|
||||
|
||||
|
||||
def _code_block(code: str):
|
||||
return {"block_type": 15, "code": {"language": "Plain Text", "elements": [{"text_run": {"content": code, "text_element_style": {}}}]}}
|
||||
|
||||
|
||||
def md_to_blocks(md_path: Path, file_tokens: dict[str, str]) -> list:
|
||||
"""将 Markdown 解析为飞书 blocks。file_tokens: {相对路径或文件名: file_token}"""
|
||||
text = md_path.read_text(encoding="utf-8")
|
||||
blocks = []
|
||||
lines = text.split("\n")
|
||||
i = 0
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
|
||||
# 一级标题
|
||||
if line.startswith("# ") and not line.startswith("## "):
|
||||
blocks.append(_h1(line[2:].strip()))
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 二级标题
|
||||
if line.startswith("## ") and not line.startswith("### "):
|
||||
blocks.append(_h2(line[3:].strip()))
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 三级标题
|
||||
if line.startswith("### "):
|
||||
blocks.append(_h3(line[4:].strip()))
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 代码块:飞书 code block API 易报 invalid param,暂以文本块呈现
|
||||
if line.strip().startswith("```"):
|
||||
lang_raw = line.strip()[3:].strip()
|
||||
code_lines = []
|
||||
i += 1
|
||||
while i < len(lines) and not lines[i].strip().startswith("```"):
|
||||
code_lines.append(lines[i])
|
||||
i += 1
|
||||
if i < len(lines):
|
||||
i += 1
|
||||
code = "\n".join(code_lines)
|
||||
blocks.append(_text_block(f"```{lang_raw}\n{code}\n```"))
|
||||
continue
|
||||
|
||||
# 图片 :飞书 gallery/image 插入 API 易报 invalid param,用占位符 + 提示
|
||||
m = re.match(r'!\[([^\]]*)\]\(([^)]+)\)', line.strip())
|
||||
if m:
|
||||
alt, path = m.group(1), m.group(2)
|
||||
resolved = (md_path.parent / path).resolve()
|
||||
# 图片已上传到文档素材,但 API 插入块易失败,用占位符;用户可手动「插入→图片→文档素材」
|
||||
blocks.append(_text_block(f"📷 [图片: {alt or Path(path).name}](已上传至文档素材,可在飞书中插入)"))
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 引用块 >
|
||||
if line.startswith("> "):
|
||||
blocks.append(_text_block(line[2:].strip()))
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 分隔线
|
||||
if line.strip() in ("---", "***", "___"):
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 空行
|
||||
if not line.strip():
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 普通段落
|
||||
blocks.append(_text_block(line.rstrip()))
|
||||
i += 1
|
||||
|
||||
return blocks
|
||||
|
||||
|
||||
def upload_md_to_feishu(md_path: Path, parent_token: str = PARENT_TOKEN) -> tuple[bool, str]:
|
||||
"""将 Markdown 上传到飞书 Wiki,有同名则更新"""
|
||||
token = fwd.get_token(parent_token)
|
||||
if not token:
|
||||
return False, "Token 无效"
|
||||
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
||||
|
||||
title = md_path.stem
|
||||
if not title:
|
||||
title = md_path.name
|
||||
|
||||
r = requests.get(
|
||||
f"https://open.feishu.cn/open-apis/wiki/v2/spaces/get_node?token={parent_token}",
|
||||
headers=headers, timeout=30)
|
||||
if r.json().get("code") != 0:
|
||||
return False, r.json().get("msg", "get_node 失败")
|
||||
space_id = r.json()["data"]["node"].get("space_id") or \
|
||||
(r.json()["data"]["node"].get("space") or {}).get("space_id") or \
|
||||
r.json()["data"]["node"].get("origin_space_id")
|
||||
if not space_id:
|
||||
return False, "无法获取 space_id"
|
||||
|
||||
doc_token = None
|
||||
node_token = None
|
||||
nodes = []
|
||||
page_token = None
|
||||
while True:
|
||||
params = {"parent_node_token": parent_token, "page_size": 50}
|
||||
if page_token:
|
||||
params["page_token"] = page_token
|
||||
rr = requests.get(
|
||||
f"https://open.feishu.cn/open-apis/wiki/v2/spaces/{space_id}/nodes",
|
||||
headers=headers, params=params, timeout=30)
|
||||
if rr.json().get("code") != 0:
|
||||
break
|
||||
data = rr.json().get("data", {})
|
||||
nodes = data.get("nodes", []) or data.get("items", [])
|
||||
for n in nodes:
|
||||
t = n.get("title", "") or n.get("node", {}).get("title", "")
|
||||
if title in t or "基因胶囊" in t:
|
||||
doc_token = n.get("obj_token") or n.get("node_token")
|
||||
node_token = n.get("node_token")
|
||||
break
|
||||
if doc_token:
|
||||
break
|
||||
page_token = data.get("page_token")
|
||||
if not page_token:
|
||||
break
|
||||
|
||||
if not doc_token:
|
||||
create_r = requests.post(
|
||||
f"https://open.feishu.cn/open-apis/wiki/v2/spaces/{space_id}/nodes",
|
||||
headers=headers,
|
||||
json={
|
||||
"parent_node_token": parent_token,
|
||||
"obj_type": "docx",
|
||||
"node_type": "origin",
|
||||
"title": title,
|
||||
},
|
||||
timeout=30)
|
||||
cd = create_r.json()
|
||||
if cd.get("code") != 0:
|
||||
return False, cd.get("msg", str(cd))
|
||||
doc_token = cd.get("data", {}).get("node", {}).get("obj_token")
|
||||
node_token = cd.get("data", {}).get("node", {}).get("node_token")
|
||||
if not doc_token:
|
||||
doc_token = node_token
|
||||
print("📄 创建新文档")
|
||||
else:
|
||||
print("📋 更新已有文档")
|
||||
|
||||
file_tokens = {}
|
||||
for m in re.finditer(r'!\[([^\]]*)\]\(([^)]+)\)', md_path.read_text(encoding="utf-8")):
|
||||
path = m.group(2)
|
||||
resolved = (md_path.parent / path).resolve()
|
||||
if resolved.exists():
|
||||
ft = upload_image_to_doc(token, doc_token, resolved)
|
||||
if ft:
|
||||
file_tokens[str(resolved)] = ft
|
||||
file_tokens[path] = ft
|
||||
file_tokens[resolved.name] = ft
|
||||
print(f"✅ 图片上传: {resolved.name}")
|
||||
|
||||
if doc_token and doc_token != node_token:
|
||||
child_ids = []
|
||||
pt = None
|
||||
while True:
|
||||
params = {"page_size": 100}
|
||||
if pt:
|
||||
params["page_token"] = pt
|
||||
rb = requests.get(
|
||||
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks",
|
||||
headers=headers, params=params, timeout=30)
|
||||
if rb.json().get("code") != 0:
|
||||
break
|
||||
data = rb.json().get("data", {})
|
||||
items = data.get("items", [])
|
||||
for b in items:
|
||||
if b.get("parent_id") == doc_token:
|
||||
child_ids.append(b["block_id"])
|
||||
pt = data.get("page_token")
|
||||
if not pt:
|
||||
break
|
||||
if child_ids:
|
||||
for j in range(0, len(child_ids), 50):
|
||||
batch = child_ids[j : j + 50]
|
||||
requests.delete(
|
||||
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks/{doc_token}/children/batch_delete",
|
||||
headers=headers, json={"block_id_list": batch}, timeout=30)
|
||||
|
||||
blocks = md_to_blocks(md_path, file_tokens)
|
||||
|
||||
for i in range(0, len(blocks), 50):
|
||||
batch = blocks[i : i + 50]
|
||||
wr = requests.post(
|
||||
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks/{doc_token}/children",
|
||||
headers=headers,
|
||||
json={"children": batch, "index": i},
|
||||
timeout=30)
|
||||
if wr.json().get("code") != 0:
|
||||
return False, wr.json().get("msg", "写入失败")
|
||||
import time
|
||||
time.sleep(0.3)
|
||||
|
||||
url = f"https://cunkebao.feishu.cn/wiki/{node_token}"
|
||||
return True, url
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description="Markdown 直接上传到飞书 Wiki")
|
||||
ap.add_argument("md", nargs="?", default="/Users/karuo/Documents/个人/2、我写的日记/火:开发分享/卡若:基因胶囊——AI技能可遗传化的实现与落地.md", help="Markdown 文件路径")
|
||||
ap.add_argument("--parent", default=PARENT_TOKEN, help="父节点 token")
|
||||
args = ap.parse_args()
|
||||
|
||||
md_path = Path(args.md).expanduser().resolve()
|
||||
if not md_path.exists():
|
||||
print(f"❌ 文件不存在: {md_path}")
|
||||
sys.exit(1)
|
||||
|
||||
print("=" * 50)
|
||||
print(f"📤 Markdown 直接上传: {md_path.name}")
|
||||
print("=" * 50)
|
||||
ok, result = upload_md_to_feishu(md_path, args.parent)
|
||||
if ok:
|
||||
print(f"✅ 成功")
|
||||
print(f"📎 {result}")
|
||||
else:
|
||||
print(f"❌ 失败: {result}")
|
||||
sys.exit(1)
|
||||
print("=" * 50)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -132,11 +132,11 @@ def call_ollama(transcript: str) -> str:
|
||||
|
||||
|
||||
def fallback_by_keywords(transcript_path: str) -> list:
|
||||
"""规则备用:按关键词粗分主题段"""
|
||||
"""规则备用:按关键词粗分主题段,每段限制 45-120 秒"""
|
||||
segments = parse_srt_segments(transcript_path)
|
||||
if not segments:
|
||||
return []
|
||||
# 关键词 -> 主题
|
||||
total_duration = segments[-1]["end_sec"] if segments else 0
|
||||
theme_keywords = {
|
||||
"引出问题": ["问题", "遇到", "痛点", "为什么", "困惑", "难题"],
|
||||
"解决方案": ["方法", "解决", "怎么做", "技巧", "核心", "干货"],
|
||||
@@ -146,37 +146,42 @@ def fallback_by_keywords(transcript_path: str) -> list:
|
||||
"福利展示": ["福利", "限时", "赠送", "优惠", "免费"],
|
||||
"权威背书": ["专业", "背书", "资质", "成果", "证明"],
|
||||
}
|
||||
MIN_SEG = 45
|
||||
MAX_SEG = 120
|
||||
result = []
|
||||
used = set()
|
||||
used_until = 0 # 已使用到的时间点,避免重叠
|
||||
for theme, kws in theme_keywords.items():
|
||||
cands = []
|
||||
cands = [s for s in segments if s["start_sec"] >= used_until and any(kw in s["text"] for kw in kws)]
|
||||
if not cands:
|
||||
continue
|
||||
first = cands[0]
|
||||
start_sec = first["start_sec"]
|
||||
# 合并相邻字幕,但限制在 MAX_SEG 秒内
|
||||
end_sec = first["end_sec"]
|
||||
for s in segments:
|
||||
if s["start_sec"] in used:
|
||||
if s["start_sec"] < start_sec:
|
||||
continue
|
||||
txt = s["text"]
|
||||
if any(kw in txt for kw in kws):
|
||||
cands.append(s)
|
||||
if cands:
|
||||
# 取第一段匹配,扩展为完整段落(合并相邻)
|
||||
first = cands[0]
|
||||
start_sec = first["start_sec"]
|
||||
end_sec = first["end_sec"]
|
||||
for s in segments:
|
||||
if s["start_sec"] >= start_sec and s["start_sec"] <= end_sec + 30:
|
||||
end_sec = max(end_sec, s["end_sec"])
|
||||
for t in range(int(start_sec), int(end_sec) + 1, 10):
|
||||
used.add(t)
|
||||
h, m, s_ = start_sec // 3600, (start_sec % 3600) // 60, int(start_sec % 60)
|
||||
eh, em, es = end_sec // 3600, (end_sec % 3600) // 60, int(end_sec % 60)
|
||||
result.append({
|
||||
"theme": theme,
|
||||
"title": theme,
|
||||
"start_time": f"{int(h):02d}:{int(m):02d}:{int(s_):02d}",
|
||||
"end_time": f"{int(eh):02d}:{int(em):02d}:{int(es):02d}",
|
||||
"hook_3sec": f"精彩{theme}",
|
||||
"cta_ending": DEFAULT_CTA,
|
||||
"transcript_excerpt": first["text"][:60],
|
||||
})
|
||||
if s["start_sec"] > start_sec + MAX_SEG:
|
||||
break
|
||||
if s["end_sec"] <= end_sec + 15: # 连续/接近
|
||||
end_sec = max(end_sec, s["end_sec"])
|
||||
elif s["start_sec"] <= end_sec + 5: # 间隙小于5秒
|
||||
end_sec = min(s["end_sec"], start_sec + MAX_SEG)
|
||||
end_sec = min(end_sec, start_sec + MAX_SEG)
|
||||
if end_sec - start_sec < MIN_SEG:
|
||||
end_sec = min(start_sec + MIN_SEG, total_duration)
|
||||
used_until = end_sec + 10 # 下一段至少间隔10秒
|
||||
h, m, s_ = int(start_sec // 3600), int((start_sec % 3600) // 60), int(start_sec % 60)
|
||||
eh, em, es = int(end_sec // 3600), int((end_sec % 3600) // 60), int(end_sec % 60)
|
||||
result.append({
|
||||
"theme": theme,
|
||||
"title": theme,
|
||||
"start_time": f"{h:02d}:{m:02d}:{s_:02d}",
|
||||
"end_time": f"{eh:02d}:{em:02d}:{es:02d}",
|
||||
"hook_3sec": f"精彩{theme}",
|
||||
"cta_ending": DEFAULT_CTA,
|
||||
"transcript_excerpt": first["text"][:60],
|
||||
})
|
||||
return result
|
||||
|
||||
|
||||
|
||||
@@ -82,3 +82,4 @@
|
||||
| 2026-02-22 11:00:29 | 🔄 卡若AI 同步 2026-02-22 11:00 | 更新:卡土、运营中枢参考资料、运营中枢工作台 | 排除 >20MB: 8 个 |
|
||||
| 2026-02-22 11:07:02 | 🔄 卡若AI 同步 2026-02-22 11:07 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 8 个 |
|
||||
| 2026-02-22 11:32:57 | 🔄 卡若AI 同步 2026-02-22 11:32 | 更新:金仓、运营中枢工作台 | 排除 >20MB: 8 个 |
|
||||
| 2026-02-22 11:40:59 | 🔄 卡若AI 同步 2026-02-22 11:40 | 更新:水桥平台对接、卡木、运营中枢工作台 | 排除 >20MB: 8 个 |
|
||||
|
||||
@@ -85,3 +85,4 @@
|
||||
| 2026-02-22 11:00:29 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:00 | 更新:卡土、运营中枢参考资料、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||
| 2026-02-22 11:07:02 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:07 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||
| 2026-02-22 11:32:57 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:32 | 更新:金仓、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||
| 2026-02-22 11:40:59 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:40 | 更新:水桥平台对接、卡木、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||
|
||||
Reference in New Issue
Block a user