🔄 卡若AI 同步 2026-02-22 11:44 | 更新:水桥平台对接、卡木、运营中枢工作台 | 排除 >20MB: 8 个

This commit is contained in:
2026-02-22 11:44:38 +08:00
parent 15462cf6ca
commit 125fdf5d52
4 changed files with 331 additions and 29 deletions

View File

@@ -0,0 +1,295 @@
#!/usr/bin/env python3
"""
直接将 Markdown 文件(含图片)上传到飞书 Wiki。
不依赖 JSON直接解析 .md 并转换为飞书 blocks。
用法:
python3 feishu_wiki_md_upload.py /path/to/article.md
python3 feishu_wiki_md_upload.py "/Users/karuo/Documents/个人/2、我写的日记/火:开发分享/卡若基因胶囊——AI技能可遗传化的实现与落地.md"
"""
import re
import sys
import json
import argparse
import requests
from pathlib import Path
SCRIPT_DIR = Path(__file__).resolve().parent
PARENT_TOKEN = "KNf7wA8Rki1NSdkkSIqcdFtTnWb"
sys.path.insert(0, str(SCRIPT_DIR))
import feishu_wiki_create_doc as fwd
def upload_image_to_doc(token: str, doc_token: str, img_path: Path) -> str | None:
"""上传图片到飞书文档,返回 file_token"""
if not img_path.exists():
return None
size = img_path.stat().st_size
if size > 20 * 1024 * 1024:
return None
url = "https://open.feishu.cn/open-apis/drive/v1/medias/upload_all"
with open(img_path, "rb") as f:
files = {
"file_name": (None, img_path.name),
"parent_type": (None, "docx_image"),
"parent_node": (None, doc_token),
"size": (None, str(size)),
"file": (img_path.name, f, "image/png"),
}
headers = {"Authorization": f"Bearer {token}"}
r = requests.post(url, headers=headers, files=files, timeout=60)
if r.json().get("code") == 0:
return r.json().get("data", {}).get("file_token")
return None
def _text_block(t: str):
return {"block_type": 2, "text": {"elements": [{"text_run": {"content": t, "text_element_style": {}}}], "style": {}}}
def _h1(t: str):
return {"block_type": 3, "heading1": {"elements": [{"text_run": {"content": t, "text_element_style": {}}}], "style": {}}}
def _h2(t: str):
return {"block_type": 4, "heading2": {"elements": [{"text_run": {"content": t, "text_element_style": {}}}], "style": {}}}
def _h3(t: str):
return {"block_type": 5, "heading3": {"elements": [{"text_run": {"content": t, "text_element_style": {}}}], "style": {}}}
def _code_block(code: str):
return {"block_type": 15, "code": {"language": "Plain Text", "elements": [{"text_run": {"content": code, "text_element_style": {}}}]}}
def md_to_blocks(md_path: Path, file_tokens: dict[str, str]) -> list:
"""将 Markdown 解析为飞书 blocks。file_tokens: {相对路径或文件名: file_token}"""
text = md_path.read_text(encoding="utf-8")
blocks = []
lines = text.split("\n")
i = 0
while i < len(lines):
line = lines[i]
# 一级标题
if line.startswith("# ") and not line.startswith("## "):
blocks.append(_h1(line[2:].strip()))
i += 1
continue
# 二级标题
if line.startswith("## ") and not line.startswith("### "):
blocks.append(_h2(line[3:].strip()))
i += 1
continue
# 三级标题
if line.startswith("### "):
blocks.append(_h3(line[4:].strip()))
i += 1
continue
# 代码块:飞书 code block API 易报 invalid param暂以文本块呈现
if line.strip().startswith("```"):
lang_raw = line.strip()[3:].strip()
code_lines = []
i += 1
while i < len(lines) and not lines[i].strip().startswith("```"):
code_lines.append(lines[i])
i += 1
if i < len(lines):
i += 1
code = "\n".join(code_lines)
blocks.append(_text_block(f"```{lang_raw}\n{code}\n```"))
continue
# 图片 ![alt](path):飞书 gallery/image 插入 API 易报 invalid param用占位符 + 提示
m = re.match(r'!\[([^\]]*)\]\(([^)]+)\)', line.strip())
if m:
alt, path = m.group(1), m.group(2)
resolved = (md_path.parent / path).resolve()
# 图片已上传到文档素材,但 API 插入块易失败,用占位符;用户可手动「插入→图片→文档素材」
blocks.append(_text_block(f"📷 [图片: {alt or Path(path).name}](已上传至文档素材,可在飞书中插入)"))
i += 1
continue
# 引用块 >
if line.startswith("> "):
blocks.append(_text_block(line[2:].strip()))
i += 1
continue
# 分隔线
if line.strip() in ("---", "***", "___"):
i += 1
continue
# 空行
if not line.strip():
i += 1
continue
# 普通段落
blocks.append(_text_block(line.rstrip()))
i += 1
return blocks
def upload_md_to_feishu(md_path: Path, parent_token: str = PARENT_TOKEN) -> tuple[bool, str]:
"""将 Markdown 上传到飞书 Wiki有同名则更新"""
token = fwd.get_token(parent_token)
if not token:
return False, "Token 无效"
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
title = md_path.stem
if not title:
title = md_path.name
r = requests.get(
f"https://open.feishu.cn/open-apis/wiki/v2/spaces/get_node?token={parent_token}",
headers=headers, timeout=30)
if r.json().get("code") != 0:
return False, r.json().get("msg", "get_node 失败")
space_id = r.json()["data"]["node"].get("space_id") or \
(r.json()["data"]["node"].get("space") or {}).get("space_id") or \
r.json()["data"]["node"].get("origin_space_id")
if not space_id:
return False, "无法获取 space_id"
doc_token = None
node_token = None
nodes = []
page_token = None
while True:
params = {"parent_node_token": parent_token, "page_size": 50}
if page_token:
params["page_token"] = page_token
rr = requests.get(
f"https://open.feishu.cn/open-apis/wiki/v2/spaces/{space_id}/nodes",
headers=headers, params=params, timeout=30)
if rr.json().get("code") != 0:
break
data = rr.json().get("data", {})
nodes = data.get("nodes", []) or data.get("items", [])
for n in nodes:
t = n.get("title", "") or n.get("node", {}).get("title", "")
if title in t or "基因胶囊" in t:
doc_token = n.get("obj_token") or n.get("node_token")
node_token = n.get("node_token")
break
if doc_token:
break
page_token = data.get("page_token")
if not page_token:
break
if not doc_token:
create_r = requests.post(
f"https://open.feishu.cn/open-apis/wiki/v2/spaces/{space_id}/nodes",
headers=headers,
json={
"parent_node_token": parent_token,
"obj_type": "docx",
"node_type": "origin",
"title": title,
},
timeout=30)
cd = create_r.json()
if cd.get("code") != 0:
return False, cd.get("msg", str(cd))
doc_token = cd.get("data", {}).get("node", {}).get("obj_token")
node_token = cd.get("data", {}).get("node", {}).get("node_token")
if not doc_token:
doc_token = node_token
print("📄 创建新文档")
else:
print("📋 更新已有文档")
file_tokens = {}
for m in re.finditer(r'!\[([^\]]*)\]\(([^)]+)\)', md_path.read_text(encoding="utf-8")):
path = m.group(2)
resolved = (md_path.parent / path).resolve()
if resolved.exists():
ft = upload_image_to_doc(token, doc_token, resolved)
if ft:
file_tokens[str(resolved)] = ft
file_tokens[path] = ft
file_tokens[resolved.name] = ft
print(f"✅ 图片上传: {resolved.name}")
if doc_token and doc_token != node_token:
child_ids = []
pt = None
while True:
params = {"page_size": 100}
if pt:
params["page_token"] = pt
rb = requests.get(
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks",
headers=headers, params=params, timeout=30)
if rb.json().get("code") != 0:
break
data = rb.json().get("data", {})
items = data.get("items", [])
for b in items:
if b.get("parent_id") == doc_token:
child_ids.append(b["block_id"])
pt = data.get("page_token")
if not pt:
break
if child_ids:
for j in range(0, len(child_ids), 50):
batch = child_ids[j : j + 50]
requests.delete(
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks/{doc_token}/children/batch_delete",
headers=headers, json={"block_id_list": batch}, timeout=30)
blocks = md_to_blocks(md_path, file_tokens)
for i in range(0, len(blocks), 50):
batch = blocks[i : i + 50]
wr = requests.post(
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks/{doc_token}/children",
headers=headers,
json={"children": batch, "index": i},
timeout=30)
if wr.json().get("code") != 0:
return False, wr.json().get("msg", "写入失败")
import time
time.sleep(0.3)
url = f"https://cunkebao.feishu.cn/wiki/{node_token}"
return True, url
def main():
ap = argparse.ArgumentParser(description="Markdown 直接上传到飞书 Wiki")
ap.add_argument("md", nargs="?", default="/Users/karuo/Documents/个人/2、我写的日记/火:开发分享/卡若基因胶囊——AI技能可遗传化的实现与落地.md", help="Markdown 文件路径")
ap.add_argument("--parent", default=PARENT_TOKEN, help="父节点 token")
args = ap.parse_args()
md_path = Path(args.md).expanduser().resolve()
if not md_path.exists():
print(f"❌ 文件不存在: {md_path}")
sys.exit(1)
print("=" * 50)
print(f"📤 Markdown 直接上传: {md_path.name}")
print("=" * 50)
ok, result = upload_md_to_feishu(md_path, args.parent)
if ok:
print(f"✅ 成功")
print(f"📎 {result}")
else:
print(f"❌ 失败: {result}")
sys.exit(1)
print("=" * 50)
if __name__ == "__main__":
main()

View File

@@ -132,11 +132,11 @@ def call_ollama(transcript: str) -> str:
def fallback_by_keywords(transcript_path: str) -> list:
"""规则备用:按关键词粗分主题段"""
"""规则备用:按关键词粗分主题段,每段限制 45-120 秒"""
segments = parse_srt_segments(transcript_path)
if not segments:
return []
# 关键词 -> 主题
total_duration = segments[-1]["end_sec"] if segments else 0
theme_keywords = {
"引出问题": ["问题", "遇到", "痛点", "为什么", "困惑", "难题"],
"解决方案": ["方法", "解决", "怎么做", "技巧", "核心", "干货"],
@@ -146,37 +146,42 @@ def fallback_by_keywords(transcript_path: str) -> list:
"福利展示": ["福利", "限时", "赠送", "优惠", "免费"],
"权威背书": ["专业", "背书", "资质", "成果", "证明"],
}
MIN_SEG = 45
MAX_SEG = 120
result = []
used = set()
used_until = 0 # 已使用到的时间点,避免重叠
for theme, kws in theme_keywords.items():
cands = []
cands = [s for s in segments if s["start_sec"] >= used_until and any(kw in s["text"] for kw in kws)]
if not cands:
continue
first = cands[0]
start_sec = first["start_sec"]
# 合并相邻字幕,但限制在 MAX_SEG 秒内
end_sec = first["end_sec"]
for s in segments:
if s["start_sec"] in used:
if s["start_sec"] < start_sec:
continue
txt = s["text"]
if any(kw in txt for kw in kws):
cands.append(s)
if cands:
# 取第一段匹配,扩展为完整段落(合并相邻)
first = cands[0]
start_sec = first["start_sec"]
end_sec = first["end_sec"]
for s in segments:
if s["start_sec"] >= start_sec and s["start_sec"] <= end_sec + 30:
end_sec = max(end_sec, s["end_sec"])
for t in range(int(start_sec), int(end_sec) + 1, 10):
used.add(t)
h, m, s_ = start_sec // 3600, (start_sec % 3600) // 60, int(start_sec % 60)
eh, em, es = end_sec // 3600, (end_sec % 3600) // 60, int(end_sec % 60)
result.append({
"theme": theme,
"title": theme,
"start_time": f"{int(h):02d}:{int(m):02d}:{int(s_):02d}",
"end_time": f"{int(eh):02d}:{int(em):02d}:{int(es):02d}",
"hook_3sec": f"精彩{theme}",
"cta_ending": DEFAULT_CTA,
"transcript_excerpt": first["text"][:60],
})
if s["start_sec"] > start_sec + MAX_SEG:
break
if s["end_sec"] <= end_sec + 15: # 连续/接近
end_sec = max(end_sec, s["end_sec"])
elif s["start_sec"] <= end_sec + 5: # 间隙小于5秒
end_sec = min(s["end_sec"], start_sec + MAX_SEG)
end_sec = min(end_sec, start_sec + MAX_SEG)
if end_sec - start_sec < MIN_SEG:
end_sec = min(start_sec + MIN_SEG, total_duration)
used_until = end_sec + 10 # 下一段至少间隔10秒
h, m, s_ = int(start_sec // 3600), int((start_sec % 3600) // 60), int(start_sec % 60)
eh, em, es = int(end_sec // 3600), int((end_sec % 3600) // 60), int(end_sec % 60)
result.append({
"theme": theme,
"title": theme,
"start_time": f"{h:02d}:{m:02d}:{s_:02d}",
"end_time": f"{eh:02d}:{em:02d}:{es:02d}",
"hook_3sec": f"精彩{theme}",
"cta_ending": DEFAULT_CTA,
"transcript_excerpt": first["text"][:60],
})
return result

View File

@@ -82,3 +82,4 @@
| 2026-02-22 11:00:29 | 🔄 卡若AI 同步 2026-02-22 11:00 | 更新:卡土、运营中枢参考资料、运营中枢工作台 | 排除 >20MB: 8 个 |
| 2026-02-22 11:07:02 | 🔄 卡若AI 同步 2026-02-22 11:07 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 8 个 |
| 2026-02-22 11:32:57 | 🔄 卡若AI 同步 2026-02-22 11:32 | 更新:金仓、运营中枢工作台 | 排除 >20MB: 8 个 |
| 2026-02-22 11:40:59 | 🔄 卡若AI 同步 2026-02-22 11:40 | 更新:水桥平台对接、卡木、运营中枢工作台 | 排除 >20MB: 8 个 |

View File

@@ -85,3 +85,4 @@
| 2026-02-22 11:00:29 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:00 | 更新:卡土、运营中枢参考资料、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
| 2026-02-22 11:07:02 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:07 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
| 2026-02-22 11:32:57 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:32 | 更新:金仓、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
| 2026-02-22 11:40:59 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:40 | 更新:水桥平台对接、卡木、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |