🔄 卡若AI 同步 2026-02-22 11:40 | 更新:水桥平台对接、卡木、运营中枢工作台 | 排除 >20MB: 8 个

This commit is contained in:
2026-02-22 11:40:57 +08:00
parent 3a72f73338
commit 15462cf6ca
5 changed files with 398 additions and 61 deletions

View File

@@ -11,9 +11,10 @@ from pathlib import Path
SCRIPT_DIR = Path(__file__).resolve().parent
FEISHU_SCRIPT = SCRIPT_DIR / "feishu_wiki_create_doc.py"
IMG_DIR = Path("/Users/karuo/Documents/卡若Ai的文件夹/图片")
IMG_DIR = Path("/Users/karuo/Documents/个人/2、我写的日记/火:开发分享/assets")
PARENT_TOKEN = "KNf7wA8Rki1NSdkkSIqcdFtTnWb"
TITLE = "卡若AI 基因胶囊 · 全功能介绍(产品经理 / 程序员 / 普通用户)"
TITLE = "卡若基因胶囊——AI技能可遗传化的实现与落地"
JSON_PATH = Path("/Users/karuo/Documents/个人/2、我写的日记/火:开发分享/卡若_基因胶囊_AI技能可遗传化_feishu_blocks.json")
# 导入 feishu 脚本的 token 逻辑
sys.path.insert(0, str(SCRIPT_DIR))
@@ -47,8 +48,77 @@ def upload_image_to_doc(token: str, doc_token: str, img_path: Path) -> str | Non
return None
def _title_matches(node_title: str, target: str) -> bool:
"""判断节点标题是否与目标相似(含关键词即视为匹配)"""
if not node_title or not target:
return False
kw = ["基因胶囊", "AI技能可遗传"]
return any(k in node_title for k in kw) or target in node_title
def _find_existing_doc(space_id: str, headers: dict) -> tuple[str | None, str | None]:
"""查找父节点下是否已有同名/类似文档,返回 (doc_token, node_token)"""
page_token = None
while True:
params = {"parent_node_token": PARENT_TOKEN, "page_size": 50}
if page_token:
params["page_token"] = page_token
r = requests.get(
f"https://open.feishu.cn/open-apis/wiki/v2/spaces/{space_id}/nodes",
headers=headers, params=params, timeout=30)
if r.json().get("code") != 0:
return None, None
data = r.json().get("data", {})
nodes = data.get("nodes", []) or data.get("items", []) or []
for n in nodes:
title = n.get("title", "") or n.get("node", {}).get("title", "")
if _title_matches(title, TITLE):
obj = n.get("obj_token")
node = n.get("node_token")
return obj or node, node
page_token = data.get("page_token")
if not page_token:
break
return None, None
def _clear_doc_blocks(doc_token: str, headers: dict) -> bool:
"""清空文档内容(删除根节点下直接子块)"""
all_items = []
page_token = None
while True:
params = {"page_size": 100}
if page_token:
params["page_token"] = page_token
r = requests.get(
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks",
headers=headers, params=params, timeout=30)
if r.json().get("code") != 0:
return False
data = r.json().get("data", {})
items = data.get("items", [])
all_items.extend(items)
page_token = data.get("page_token")
if not page_token:
break
child_ids = [b["block_id"] for b in all_items if b.get("parent_id") == doc_token]
if not child_ids:
return True
# 分批删除(每次最多 50
for i in range(0, len(child_ids), 50):
batch = child_ids[i : i + 50]
rd = requests.delete(
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks/{doc_token}/children/batch_delete",
headers=headers, json={"block_id_list": batch}, timeout=30)
if rd.json().get("code") != 0:
return False
import time
time.sleep(0.3)
return True
def create_doc_with_images():
"""创建文档、上传图片、写入图文 blocks"""
"""创建或更新文档、上传图片、写入图文 blocks"""
token = fwd.get_token(PARENT_TOKEN)
if not token:
return False, "Token 无效"
@@ -66,26 +136,35 @@ def create_doc_with_images():
if not space_id:
return False, "无法获取 space_id"
# 2. 创建子文档
create_r = requests.post(
f"https://open.feishu.cn/open-apis/wiki/v2/spaces/{space_id}/nodes",
headers=headers,
json={
"parent_node_token": PARENT_TOKEN,
"obj_type": "docx",
"node_type": "origin",
"title": TITLE,
},
timeout=30)
create_data = create_r.json()
if create_data.get("code") != 0:
return False, create_data.get("msg", str(create_data))
doc_token = create_data.get("data", {}).get("node", {}).get("obj_token")
node_token = create_data.get("data", {}).get("node", {}).get("node_token")
if not doc_token:
doc_token = node_token
# 2. 查找是否已有同名/类似文档
doc_token, node_token = _find_existing_doc(space_id, headers)
if doc_token and node_token:
print(f"📋 发现已有类似文档,将更新内容")
if not _clear_doc_blocks(doc_token, headers):
print("⚠️ 清空原内容失败,将追加写入")
else:
print("✅ 已清空原内容")
else:
# 3. 创建新文档
create_r = requests.post(
f"https://open.feishu.cn/open-apis/wiki/v2/spaces/{space_id}/nodes",
headers=headers,
json={
"parent_node_token": PARENT_TOKEN,
"obj_type": "docx",
"node_type": "origin",
"title": TITLE,
},
timeout=30)
create_data = create_r.json()
if create_data.get("code") != 0:
return False, create_data.get("msg", str(create_data))
doc_token = create_data.get("data", {}).get("node", {}).get("obj_token")
node_token = create_data.get("data", {}).get("node", {}).get("node_token")
if not doc_token:
doc_token = node_token
# 3. 上传图片
# 4. 上传图片
img1 = IMG_DIR / "基因胶囊_概念与流程.png"
img2 = IMG_DIR / "基因胶囊_完整工作流程图.png"
file_token1 = upload_image_to_doc(token, doc_token, img1) if img1.exists() else None
@@ -95,45 +174,58 @@ def create_doc_with_images():
if file_token2:
print(f"✅ 图片2 上传成功")
# 4. 构建 blocks(含图片 block
blocks = get_article_blocks(file_token1, file_token2)
# 5. 构建 blocks:从 JSON 加载,配图占位处注入图片 block
if JSON_PATH.exists():
with open(JSON_PATH, "r", encoding="utf-8") as f:
data = json.load(f)
raw_blocks = data.get("children", [])
blocks = []
tokens = [file_token1, file_token2]
for b in raw_blocks:
c = (b.get("text") or {}).get("elements") or []
content = (c[0].get("text_run") or {}).get("content", "") if c else ""
if "【配图 1" in content and tokens[0]:
blocks.append({"block_type": 18, "gallery": {"imageList": [{"fileToken": tokens[0]}], "galleryStyle": {"align": "center"}}})
elif "【配图 2" in content and len(tokens) > 1 and tokens[1]:
blocks.append({"block_type": 18, "gallery": {"imageList": [{"fileToken": tokens[1]}], "galleryStyle": {"align": "center"}}})
elif "【配图 1" in content or "【配图 2" in content:
blocks.append(b)
else:
blocks.append(b)
else:
blocks = get_article_blocks(file_token1, file_token2)
# 5. 分批写入(过滤 None分别处理 text 与 image block 避免 invalid param
# 6. 分批写入所有 blocks含图片保持顺序
valid_blocks = [b for b in blocks if b is not None]
for i in range(0, len(valid_blocks), 50):
batch = valid_blocks[i : i + 50]
# 仅写入 text/heading 类 block跳过可能报错的 image block
safe_batch = [b for b in batch if b.get("block_type") != 13]
if not safe_batch:
continue
wr = requests.post(
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks/{doc_token}/children",
headers=headers,
json={"children": safe_batch, "index": i},
json={"children": batch, "index": i},
timeout=30)
res = wr.json()
if res.get("code") != 0:
# 若仍失败,可能是 index 等;尝试不含 image 的纯文本
if i == 0:
# 若含图片的批次失败,则跳过图片仅写文本
if any(b.get("block_type") in (13, 18) for b in batch):
safe = [b for b in batch if b.get("block_type") not in (13, 18)]
if safe:
wr2 = requests.post(
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks/{doc_token}/children",
headers=headers,
json={"children": safe, "index": i},
timeout=30)
if wr2.json().get("code") == 0:
print(f"⚠️ 图片块跳过,已写文本")
elif i == 0:
return False, res.get("msg", "写入失败")
else:
gallery_count = sum(1 for b in batch if b.get("block_type") == 18)
if gallery_count:
print(f"✅ 写入 {gallery_count} 个图片块")
if len(valid_blocks) > 50:
import time
time.sleep(0.3)
# 5b. 尝试追加图片块(在文档末尾,逐张添加)
for ft in [b for b in [file_token1, file_token2] if b]:
try:
imgb = {"block_type": 13, "image": {"file_token": ft}}
wr = requests.post(
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks/{doc_token}/children",
headers=headers,
json={"children": [imgb], "index": -1},
timeout=30)
if wr.json().get("code") == 0:
print("✅ 图片块插入成功")
else:
print("⚠️ 图片块跳过(飞书 API 限制)")
except Exception as e:
print(f"⚠️ 图片块异常: {e}")
url = f"https://cunkebao.feishu.cn/wiki/{node_token}"
return True, url
@@ -190,7 +282,7 @@ def get_article_blocks(file_token1: str | None, file_token2: str | None) -> list
def main():
print("=" * 50)
print(f"📤 创建基因胶囊全功能介绍(图文")
print(f"📤 基因胶囊全功能介绍(创建或更新 + 图片上传")
print(f" 父节点: {PARENT_TOKEN}")
print("=" * 50)
ok, result = create_doc_with_images()

View File

@@ -0,0 +1,229 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
按完整主题切片 - 分析 transcript找出每个主题的完整起止时间
与 identify_highlights 不同:本脚本按「视频剪辑方案」的 7 个主题类型分析,
时间节点非固定,需结合视频内容分析出每个主题的完整段落。
主题类型(来自剪辑方案图片):
1. 引出问题 - 建立共鸣,问用户痛点
2. 解决方案 - 核心方法、干货
3. 案例分享 - 真实案例、数据
4. 未来展望 - 接下来怎么做
5. 痛点强调 - 避坑、踩坑警告
6. 福利展示 - 限时福利、福利放送
7. 权威背书 - 专业背书、可信证明
用法:
python3 identify_theme_segments.py -t transcript.srt -o highlights.json
"""
import argparse
import json
import re
import sys
from pathlib import Path
OLLAMA_URL = "http://localhost:11434"
DEFAULT_CTA = "关注我,每天学一招私域干货"
THEME_DEFINITIONS = """
【主题类型定义,按视频剪辑方案】
1. 引出问题:开场建立共鸣,提出用户普遍遇到的问题或痛点
2. 解决方案:讲解核心方法、干货、具体做法
3. 案例分享:真实案例、数据佐证、用户证言
4. 未来展望:接下来这样做、未来趋势、行动建议
5. 痛点强调:这个坑千万别踩、常见误区、避坑指南
6. 福利展示:限时福利、福利放送、赠送、优惠
7. 权威背书:专业背书、可信证明、资质、成果展示
参考时间顺序(非固定):引出问题→解决方案→案例分享→未来展望→痛点强调→福利展示→权威背书
"""
def parse_srt_segments(srt_path: str) -> list:
"""解析 SRT 为 [{start_sec, end_sec, start_time, end_time, text}, ...]"""
with open(srt_path, "r", encoding="utf-8") as f:
content = f.read()
segments = []
pattern = r"(\d+)\n(\d{2}):(\d{2}):(\d{2}),(\d{3}) --> (\d{2}):(\d{2}):(\d{2}),(\d{3})\n(.*?)(?=\n\n|\Z)"
for m in re.findall(pattern, content, re.DOTALL):
sh, sm, ss = int(m[1]), int(m[2]), int(m[3])
eh, em, es = int(m[5]), int(m[6]), int(m[7])
start_sec = sh * 3600 + sm * 60 + ss
end_sec = eh * 3600 + em * 60 + es
text = m[9].strip().replace("\n", " ")
if len(text) > 2:
segments.append({
"start_sec": start_sec, "end_sec": end_sec,
"start_time": f"{sh:02d}:{sm:02d}:{ss:02d}",
"end_time": f"{eh:02d}:{em:02d}:{es:02d}",
"text": text,
})
return segments
def srt_to_timestamped_text(srt_path: str) -> str:
"""将 SRT 转为带时间戳的纯文本"""
segments = parse_srt_segments(srt_path)
return "\n".join(f"[{s['start_time']}] {s['text']}" for s in segments)
def _build_theme_prompt(transcript: str) -> str:
txt = transcript[:15000] if len(transcript) > 15000 else transcript
return f"""你是短视频内容策划师。根据「视频剪辑方案」,分析以下视频文字稿,找出 7 类主题各自的**完整段落**。
{THEME_DEFINITIONS}
【关键】时间节点非固定!需结合视频实际内容分析:
- 每个主题只取一段,且必须是**完整主题**(不中断、语义完整)
- 从文字稿中精确找出该主题开始和结束的时间点
- 若某类主题在视频中未出现,可跳过,不强制凑齐 7 段
- 参考顺序帮助理解,实际顺序按内容出现顺序
【输出格式】严格 JSON 数组,每项含:
- theme: 主题类型名(如"引出问题"
- title: 简短标题(简体中文)
- start_time: "HH:MM:SS"
- end_time: "HH:MM:SS"
- hook_3sec: 前3秒Hook15字内
- cta_ending: 结尾CTA可用"{DEFAULT_CTA}"
- transcript_excerpt: 该段内容前60字
只输出 JSON 数组,不要```包裹,不要其他文字。所有文字必须简体中文。
视频文字稿:
---
{txt}
---"""
def _parse_ai_json(text: str) -> list:
text = text.strip()
if text.startswith("```"):
text = re.sub(r"^```(?:json)?\s*", "", text)
text = re.sub(r"\s*```\s*$", "", text)
m = re.search(r"\[[\s\S]*\]", text)
if m:
return json.loads(m.group())
return json.loads(text)
def call_ollama(transcript: str) -> str:
"""调用 Ollama 分析主题"""
import requests
prompt = _build_theme_prompt(transcript)
try:
r = requests.post(
f"{OLLAMA_URL}/api/generate",
json={
"model": "qwen2.5:1.5b",
"prompt": prompt,
"stream": False,
"options": {"temperature": 0.2, "num_predict": 8192},
},
timeout=120,
)
if r.status_code != 200:
raise RuntimeError(f"Ollama {r.status_code}")
return r.json().get("response", "").strip()
except Exception as e:
raise RuntimeError(f"Ollama 调用失败: {e}") from e
def fallback_by_keywords(transcript_path: str) -> list:
"""规则备用:按关键词粗分主题段"""
segments = parse_srt_segments(transcript_path)
if not segments:
return []
# 关键词 -> 主题
theme_keywords = {
"引出问题": ["问题", "遇到", "痛点", "为什么", "困惑", "难题"],
"解决方案": ["方法", "解决", "怎么做", "技巧", "核心", "干货"],
"案例分享": ["案例", "例子", "数据", "客户", "赚了", "做了"],
"未来展望": ["接下来", "未来", "行动", "去做", "试试"],
"痛点强调": ["", "避坑", "千万别", "误区", "踩雷"],
"福利展示": ["福利", "限时", "赠送", "优惠", "免费"],
"权威背书": ["专业", "背书", "资质", "成果", "证明"],
}
result = []
used = set()
for theme, kws in theme_keywords.items():
cands = []
for s in segments:
if s["start_sec"] in used:
continue
txt = s["text"]
if any(kw in txt for kw in kws):
cands.append(s)
if cands:
# 取第一段匹配,扩展为完整段落(合并相邻)
first = cands[0]
start_sec = first["start_sec"]
end_sec = first["end_sec"]
for s in segments:
if s["start_sec"] >= start_sec and s["start_sec"] <= end_sec + 30:
end_sec = max(end_sec, s["end_sec"])
for t in range(int(start_sec), int(end_sec) + 1, 10):
used.add(t)
h, m, s_ = start_sec // 3600, (start_sec % 3600) // 60, int(start_sec % 60)
eh, em, es = end_sec // 3600, (end_sec % 3600) // 60, int(end_sec % 60)
result.append({
"theme": theme,
"title": theme,
"start_time": f"{int(h):02d}:{int(m):02d}:{int(s_):02d}",
"end_time": f"{int(eh):02d}:{int(em):02d}:{int(es):02d}",
"hook_3sec": f"精彩{theme}",
"cta_ending": DEFAULT_CTA,
"transcript_excerpt": first["text"][:60],
})
return result
def main():
parser = argparse.ArgumentParser(description="按完整主题分析 transcript")
parser.add_argument("--transcript", "-t", required=True, help="transcript.srt")
parser.add_argument("--output", "-o", required=True, help="highlights.json")
args = parser.parse_args()
transcript_path = Path(args.transcript)
if not transcript_path.exists():
print(f"❌ 不存在: {transcript_path}", file=sys.stderr)
sys.exit(1)
text = srt_to_timestamped_text(str(transcript_path))
if len(text) < 100:
print("❌ 文字稿过短", file=sys.stderr)
sys.exit(1)
data = None
try:
print("正在分析完整主题Ollama...")
raw = call_ollama(text)
data = _parse_ai_json(raw)
if data and isinstance(data, list):
# 校验时间格式
for i, h in enumerate(data):
if isinstance(h, dict):
if "start" in h and "start_time" not in h:
h["start_time"] = h.pop("start", "")
if "end" in h and "end_time" not in h:
h["end_time"] = h.pop("end", "")
h.setdefault("title", h.get("theme", f"主题{i+1}"))
h.setdefault("hook_3sec", h.get("title", "")[:15])
h.setdefault("cta_ending", DEFAULT_CTA)
data = [h for h in data if isinstance(h, dict) and h.get("start_time") and h.get("end_time")]
except Exception as e:
print(f"Ollama 失败 ({e}),使用规则备用", file=sys.stderr)
if not data or not isinstance(data, list):
print("使用规则备用(按关键词)", file=sys.stderr)
data = fallback_by_keywords(str(transcript_path))
out_path = Path(args.output)
out_path.parent.mkdir(parents=True, exist_ok=True)
with open(out_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f"✅ 已输出 {len(data)} 个完整主题: {out_path}")
if __name__ == "__main__":
main()

View File

@@ -89,8 +89,10 @@ def main():
parser.add_argument("--video", "-v", required=True, help="输入视频路径")
parser.add_argument("--output", "-o", help="输出目录(默认:视频同目录下 视频名_output")
parser.add_argument("--clips", "-n", type=int, default=8, help="切片数量")
parser.add_argument("--mode", "-m", choices=["highlights", "theme"], default="highlights",
help="highlights=高光识别(默认); theme=按完整主题分析(时间节点非固定)")
parser.add_argument("--skip-transcribe", action="store_true", help="跳过转录(已有 transcript.srt")
parser.add_argument("--skip-highlights", action="store_true", help="跳过高光识别(已有 highlights.json")
parser.add_argument("--skip-highlights", action="store_true", help="跳过高光/主题识别(已有 highlights.json")
parser.add_argument("--skip-clips", action="store_true", help="跳过切片(已有 clips/,仅重新增强)")
args = parser.parse_args()
@@ -154,19 +156,31 @@ def main():
transcript_to_simplified(transcript_path)
print(" ✓ 字幕已转简体")
# 2. 高光识别
# 2. 高光/主题识别
if not args.skip_highlights:
run(
[
sys.executable,
str(SCRIPT_DIR / "identify_highlights.py"),
"--transcript", str(transcript_path),
"--output", str(highlights_path),
"--clips", str(args.clips),
],
"高光识别Ollama→规则",
timeout=60,
)
if args.mode == "theme":
run(
[
sys.executable,
str(SCRIPT_DIR / "identify_theme_segments.py"),
"--transcript", str(transcript_path),
"--output", str(highlights_path),
],
"完整主题分析Ollama→规则,时间节点非固定",
timeout=120,
)
else:
run(
[
sys.executable,
str(SCRIPT_DIR / "identify_highlights.py"),
"--transcript", str(transcript_path),
"--output", str(highlights_path),
"--clips", str(args.clips),
],
"高光识别Ollama→规则",
timeout=60,
)
if not highlights_path.exists():
print(f"❌ 需要 highlights.json: {highlights_path}")
sys.exit(1)

View File

@@ -81,3 +81,4 @@
| 2026-02-22 10:57:44 | 🔄 卡若AI 同步 2026-02-22 10:57 | 更新:卡土、总索引与入口、运营中枢工作台 | 排除 >20MB: 8 个 |
| 2026-02-22 11:00:29 | 🔄 卡若AI 同步 2026-02-22 11:00 | 更新:卡土、运营中枢参考资料、运营中枢工作台 | 排除 >20MB: 8 个 |
| 2026-02-22 11:07:02 | 🔄 卡若AI 同步 2026-02-22 11:07 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 8 个 |
| 2026-02-22 11:32:57 | 🔄 卡若AI 同步 2026-02-22 11:32 | 更新:金仓、运营中枢工作台 | 排除 >20MB: 8 个 |

View File

@@ -84,3 +84,4 @@
| 2026-02-22 10:57:44 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 10:57 | 更新:卡土、总索引与入口、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
| 2026-02-22 11:00:29 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:00 | 更新:卡土、运营中枢参考资料、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
| 2026-02-22 11:07:02 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:07 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
| 2026-02-22 11:32:57 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:32 | 更新:金仓、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |