🔄 卡若AI 同步 2026-02-22 11:40 | 更新:水桥平台对接、卡木、运营中枢工作台 | 排除 >20MB: 8 个
This commit is contained in:
@@ -11,9 +11,10 @@ from pathlib import Path
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
FEISHU_SCRIPT = SCRIPT_DIR / "feishu_wiki_create_doc.py"
|
||||
IMG_DIR = Path("/Users/karuo/Documents/卡若Ai的文件夹/图片")
|
||||
IMG_DIR = Path("/Users/karuo/Documents/个人/2、我写的日记/火:开发分享/assets")
|
||||
PARENT_TOKEN = "KNf7wA8Rki1NSdkkSIqcdFtTnWb"
|
||||
TITLE = "卡若AI 基因胶囊 · 全功能介绍(产品经理 / 程序员 / 普通用户)"
|
||||
TITLE = "卡若:基因胶囊——AI技能可遗传化的实现与落地"
|
||||
JSON_PATH = Path("/Users/karuo/Documents/个人/2、我写的日记/火:开发分享/卡若_基因胶囊_AI技能可遗传化_feishu_blocks.json")
|
||||
|
||||
# 导入 feishu 脚本的 token 逻辑
|
||||
sys.path.insert(0, str(SCRIPT_DIR))
|
||||
@@ -47,8 +48,77 @@ def upload_image_to_doc(token: str, doc_token: str, img_path: Path) -> str | Non
|
||||
return None
|
||||
|
||||
|
||||
def _title_matches(node_title: str, target: str) -> bool:
|
||||
"""判断节点标题是否与目标相似(含关键词即视为匹配)"""
|
||||
if not node_title or not target:
|
||||
return False
|
||||
kw = ["基因胶囊", "AI技能可遗传"]
|
||||
return any(k in node_title for k in kw) or target in node_title
|
||||
|
||||
|
||||
def _find_existing_doc(space_id: str, headers: dict) -> tuple[str | None, str | None]:
|
||||
"""查找父节点下是否已有同名/类似文档,返回 (doc_token, node_token)"""
|
||||
page_token = None
|
||||
while True:
|
||||
params = {"parent_node_token": PARENT_TOKEN, "page_size": 50}
|
||||
if page_token:
|
||||
params["page_token"] = page_token
|
||||
r = requests.get(
|
||||
f"https://open.feishu.cn/open-apis/wiki/v2/spaces/{space_id}/nodes",
|
||||
headers=headers, params=params, timeout=30)
|
||||
if r.json().get("code") != 0:
|
||||
return None, None
|
||||
data = r.json().get("data", {})
|
||||
nodes = data.get("nodes", []) or data.get("items", []) or []
|
||||
for n in nodes:
|
||||
title = n.get("title", "") or n.get("node", {}).get("title", "")
|
||||
if _title_matches(title, TITLE):
|
||||
obj = n.get("obj_token")
|
||||
node = n.get("node_token")
|
||||
return obj or node, node
|
||||
page_token = data.get("page_token")
|
||||
if not page_token:
|
||||
break
|
||||
return None, None
|
||||
|
||||
|
||||
def _clear_doc_blocks(doc_token: str, headers: dict) -> bool:
|
||||
"""清空文档内容(删除根节点下直接子块)"""
|
||||
all_items = []
|
||||
page_token = None
|
||||
while True:
|
||||
params = {"page_size": 100}
|
||||
if page_token:
|
||||
params["page_token"] = page_token
|
||||
r = requests.get(
|
||||
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks",
|
||||
headers=headers, params=params, timeout=30)
|
||||
if r.json().get("code") != 0:
|
||||
return False
|
||||
data = r.json().get("data", {})
|
||||
items = data.get("items", [])
|
||||
all_items.extend(items)
|
||||
page_token = data.get("page_token")
|
||||
if not page_token:
|
||||
break
|
||||
child_ids = [b["block_id"] for b in all_items if b.get("parent_id") == doc_token]
|
||||
if not child_ids:
|
||||
return True
|
||||
# 分批删除(每次最多 50)
|
||||
for i in range(0, len(child_ids), 50):
|
||||
batch = child_ids[i : i + 50]
|
||||
rd = requests.delete(
|
||||
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks/{doc_token}/children/batch_delete",
|
||||
headers=headers, json={"block_id_list": batch}, timeout=30)
|
||||
if rd.json().get("code") != 0:
|
||||
return False
|
||||
import time
|
||||
time.sleep(0.3)
|
||||
return True
|
||||
|
||||
|
||||
def create_doc_with_images():
|
||||
"""创建文档、上传图片、写入图文 blocks"""
|
||||
"""创建或更新文档、上传图片、写入图文 blocks"""
|
||||
token = fwd.get_token(PARENT_TOKEN)
|
||||
if not token:
|
||||
return False, "Token 无效"
|
||||
@@ -66,26 +136,35 @@ def create_doc_with_images():
|
||||
if not space_id:
|
||||
return False, "无法获取 space_id"
|
||||
|
||||
# 2. 创建子文档
|
||||
create_r = requests.post(
|
||||
f"https://open.feishu.cn/open-apis/wiki/v2/spaces/{space_id}/nodes",
|
||||
headers=headers,
|
||||
json={
|
||||
"parent_node_token": PARENT_TOKEN,
|
||||
"obj_type": "docx",
|
||||
"node_type": "origin",
|
||||
"title": TITLE,
|
||||
},
|
||||
timeout=30)
|
||||
create_data = create_r.json()
|
||||
if create_data.get("code") != 0:
|
||||
return False, create_data.get("msg", str(create_data))
|
||||
doc_token = create_data.get("data", {}).get("node", {}).get("obj_token")
|
||||
node_token = create_data.get("data", {}).get("node", {}).get("node_token")
|
||||
if not doc_token:
|
||||
doc_token = node_token
|
||||
# 2. 查找是否已有同名/类似文档
|
||||
doc_token, node_token = _find_existing_doc(space_id, headers)
|
||||
if doc_token and node_token:
|
||||
print(f"📋 发现已有类似文档,将更新内容")
|
||||
if not _clear_doc_blocks(doc_token, headers):
|
||||
print("⚠️ 清空原内容失败,将追加写入")
|
||||
else:
|
||||
print("✅ 已清空原内容")
|
||||
else:
|
||||
# 3. 创建新文档
|
||||
create_r = requests.post(
|
||||
f"https://open.feishu.cn/open-apis/wiki/v2/spaces/{space_id}/nodes",
|
||||
headers=headers,
|
||||
json={
|
||||
"parent_node_token": PARENT_TOKEN,
|
||||
"obj_type": "docx",
|
||||
"node_type": "origin",
|
||||
"title": TITLE,
|
||||
},
|
||||
timeout=30)
|
||||
create_data = create_r.json()
|
||||
if create_data.get("code") != 0:
|
||||
return False, create_data.get("msg", str(create_data))
|
||||
doc_token = create_data.get("data", {}).get("node", {}).get("obj_token")
|
||||
node_token = create_data.get("data", {}).get("node", {}).get("node_token")
|
||||
if not doc_token:
|
||||
doc_token = node_token
|
||||
|
||||
# 3. 上传图片
|
||||
# 4. 上传图片
|
||||
img1 = IMG_DIR / "基因胶囊_概念与流程.png"
|
||||
img2 = IMG_DIR / "基因胶囊_完整工作流程图.png"
|
||||
file_token1 = upload_image_to_doc(token, doc_token, img1) if img1.exists() else None
|
||||
@@ -95,45 +174,58 @@ def create_doc_with_images():
|
||||
if file_token2:
|
||||
print(f"✅ 图片2 上传成功")
|
||||
|
||||
# 4. 构建 blocks(含图片 block)
|
||||
blocks = get_article_blocks(file_token1, file_token2)
|
||||
# 5. 构建 blocks:从 JSON 加载,配图占位处注入图片 block
|
||||
if JSON_PATH.exists():
|
||||
with open(JSON_PATH, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
raw_blocks = data.get("children", [])
|
||||
blocks = []
|
||||
tokens = [file_token1, file_token2]
|
||||
for b in raw_blocks:
|
||||
c = (b.get("text") or {}).get("elements") or []
|
||||
content = (c[0].get("text_run") or {}).get("content", "") if c else ""
|
||||
if "【配图 1" in content and tokens[0]:
|
||||
blocks.append({"block_type": 18, "gallery": {"imageList": [{"fileToken": tokens[0]}], "galleryStyle": {"align": "center"}}})
|
||||
elif "【配图 2" in content and len(tokens) > 1 and tokens[1]:
|
||||
blocks.append({"block_type": 18, "gallery": {"imageList": [{"fileToken": tokens[1]}], "galleryStyle": {"align": "center"}}})
|
||||
elif "【配图 1" in content or "【配图 2" in content:
|
||||
blocks.append(b)
|
||||
else:
|
||||
blocks.append(b)
|
||||
else:
|
||||
blocks = get_article_blocks(file_token1, file_token2)
|
||||
|
||||
# 5. 分批写入(过滤 None,分别处理 text 与 image block 避免 invalid param)
|
||||
# 6. 分批写入所有 blocks(含图片),保持顺序
|
||||
valid_blocks = [b for b in blocks if b is not None]
|
||||
for i in range(0, len(valid_blocks), 50):
|
||||
batch = valid_blocks[i : i + 50]
|
||||
# 仅写入 text/heading 类 block,跳过可能报错的 image block
|
||||
safe_batch = [b for b in batch if b.get("block_type") != 13]
|
||||
if not safe_batch:
|
||||
continue
|
||||
wr = requests.post(
|
||||
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks/{doc_token}/children",
|
||||
headers=headers,
|
||||
json={"children": safe_batch, "index": i},
|
||||
json={"children": batch, "index": i},
|
||||
timeout=30)
|
||||
res = wr.json()
|
||||
if res.get("code") != 0:
|
||||
# 若仍失败,可能是 index 等;尝试不含 image 的纯文本
|
||||
if i == 0:
|
||||
# 若含图片的批次失败,则跳过图片仅写文本
|
||||
if any(b.get("block_type") in (13, 18) for b in batch):
|
||||
safe = [b for b in batch if b.get("block_type") not in (13, 18)]
|
||||
if safe:
|
||||
wr2 = requests.post(
|
||||
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks/{doc_token}/children",
|
||||
headers=headers,
|
||||
json={"children": safe, "index": i},
|
||||
timeout=30)
|
||||
if wr2.json().get("code") == 0:
|
||||
print(f"⚠️ 图片块跳过,已写文本")
|
||||
elif i == 0:
|
||||
return False, res.get("msg", "写入失败")
|
||||
else:
|
||||
gallery_count = sum(1 for b in batch if b.get("block_type") == 18)
|
||||
if gallery_count:
|
||||
print(f"✅ 写入 {gallery_count} 个图片块")
|
||||
if len(valid_blocks) > 50:
|
||||
import time
|
||||
time.sleep(0.3)
|
||||
# 5b. 尝试追加图片块(在文档末尾,逐张添加)
|
||||
for ft in [b for b in [file_token1, file_token2] if b]:
|
||||
try:
|
||||
imgb = {"block_type": 13, "image": {"file_token": ft}}
|
||||
wr = requests.post(
|
||||
f"https://open.feishu.cn/open-apis/docx/v1/documents/{doc_token}/blocks/{doc_token}/children",
|
||||
headers=headers,
|
||||
json={"children": [imgb], "index": -1},
|
||||
timeout=30)
|
||||
if wr.json().get("code") == 0:
|
||||
print("✅ 图片块插入成功")
|
||||
else:
|
||||
print("⚠️ 图片块跳过(飞书 API 限制)")
|
||||
except Exception as e:
|
||||
print(f"⚠️ 图片块异常: {e}")
|
||||
|
||||
url = f"https://cunkebao.feishu.cn/wiki/{node_token}"
|
||||
return True, url
|
||||
@@ -190,7 +282,7 @@ def get_article_blocks(file_token1: str | None, file_token2: str | None) -> list
|
||||
|
||||
def main():
|
||||
print("=" * 50)
|
||||
print(f"📤 创建基因胶囊全功能介绍(图文)")
|
||||
print(f"📤 基因胶囊全功能介绍(创建或更新 + 图片上传)")
|
||||
print(f" 父节点: {PARENT_TOKEN}")
|
||||
print("=" * 50)
|
||||
ok, result = create_doc_with_images()
|
||||
|
||||
229
03_卡木(木)/木叶_视频内容/视频切片/脚本/identify_theme_segments.py
Normal file
229
03_卡木(木)/木叶_视频内容/视频切片/脚本/identify_theme_segments.py
Normal file
@@ -0,0 +1,229 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
按完整主题切片 - 分析 transcript,找出每个主题的完整起止时间
|
||||
|
||||
与 identify_highlights 不同:本脚本按「视频剪辑方案」的 7 个主题类型分析,
|
||||
时间节点非固定,需结合视频内容分析出每个主题的完整段落。
|
||||
|
||||
主题类型(来自剪辑方案图片):
|
||||
1. 引出问题 - 建立共鸣,问用户痛点
|
||||
2. 解决方案 - 核心方法、干货
|
||||
3. 案例分享 - 真实案例、数据
|
||||
4. 未来展望 - 接下来怎么做
|
||||
5. 痛点强调 - 避坑、踩坑警告
|
||||
6. 福利展示 - 限时福利、福利放送
|
||||
7. 权威背书 - 专业背书、可信证明
|
||||
|
||||
用法:
|
||||
python3 identify_theme_segments.py -t transcript.srt -o highlights.json
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
OLLAMA_URL = "http://localhost:11434"
|
||||
DEFAULT_CTA = "关注我,每天学一招私域干货"
|
||||
|
||||
THEME_DEFINITIONS = """
|
||||
【主题类型定义,按视频剪辑方案】
|
||||
1. 引出问题:开场建立共鸣,提出用户普遍遇到的问题或痛点
|
||||
2. 解决方案:讲解核心方法、干货、具体做法
|
||||
3. 案例分享:真实案例、数据佐证、用户证言
|
||||
4. 未来展望:接下来这样做、未来趋势、行动建议
|
||||
5. 痛点强调:这个坑千万别踩、常见误区、避坑指南
|
||||
6. 福利展示:限时福利、福利放送、赠送、优惠
|
||||
7. 权威背书:专业背书、可信证明、资质、成果展示
|
||||
|
||||
参考时间顺序(非固定):引出问题→解决方案→案例分享→未来展望→痛点强调→福利展示→权威背书
|
||||
"""
|
||||
|
||||
|
||||
def parse_srt_segments(srt_path: str) -> list:
|
||||
"""解析 SRT 为 [{start_sec, end_sec, start_time, end_time, text}, ...]"""
|
||||
with open(srt_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
segments = []
|
||||
pattern = r"(\d+)\n(\d{2}):(\d{2}):(\d{2}),(\d{3}) --> (\d{2}):(\d{2}):(\d{2}),(\d{3})\n(.*?)(?=\n\n|\Z)"
|
||||
for m in re.findall(pattern, content, re.DOTALL):
|
||||
sh, sm, ss = int(m[1]), int(m[2]), int(m[3])
|
||||
eh, em, es = int(m[5]), int(m[6]), int(m[7])
|
||||
start_sec = sh * 3600 + sm * 60 + ss
|
||||
end_sec = eh * 3600 + em * 60 + es
|
||||
text = m[9].strip().replace("\n", " ")
|
||||
if len(text) > 2:
|
||||
segments.append({
|
||||
"start_sec": start_sec, "end_sec": end_sec,
|
||||
"start_time": f"{sh:02d}:{sm:02d}:{ss:02d}",
|
||||
"end_time": f"{eh:02d}:{em:02d}:{es:02d}",
|
||||
"text": text,
|
||||
})
|
||||
return segments
|
||||
|
||||
|
||||
def srt_to_timestamped_text(srt_path: str) -> str:
|
||||
"""将 SRT 转为带时间戳的纯文本"""
|
||||
segments = parse_srt_segments(srt_path)
|
||||
return "\n".join(f"[{s['start_time']}] {s['text']}" for s in segments)
|
||||
|
||||
|
||||
def _build_theme_prompt(transcript: str) -> str:
|
||||
txt = transcript[:15000] if len(transcript) > 15000 else transcript
|
||||
return f"""你是短视频内容策划师。根据「视频剪辑方案」,分析以下视频文字稿,找出 7 类主题各自的**完整段落**。
|
||||
|
||||
{THEME_DEFINITIONS}
|
||||
|
||||
【关键】时间节点非固定!需结合视频实际内容分析:
|
||||
- 每个主题只取一段,且必须是**完整主题**(不中断、语义完整)
|
||||
- 从文字稿中精确找出该主题开始和结束的时间点
|
||||
- 若某类主题在视频中未出现,可跳过,不强制凑齐 7 段
|
||||
- 参考顺序帮助理解,实际顺序按内容出现顺序
|
||||
|
||||
【输出格式】严格 JSON 数组,每项含:
|
||||
- theme: 主题类型名(如"引出问题")
|
||||
- title: 简短标题(简体中文)
|
||||
- start_time: "HH:MM:SS"
|
||||
- end_time: "HH:MM:SS"
|
||||
- hook_3sec: 前3秒Hook,15字内
|
||||
- cta_ending: 结尾CTA(可用"{DEFAULT_CTA}")
|
||||
- transcript_excerpt: 该段内容前60字
|
||||
|
||||
只输出 JSON 数组,不要```包裹,不要其他文字。所有文字必须简体中文。
|
||||
|
||||
视频文字稿:
|
||||
---
|
||||
{txt}
|
||||
---"""
|
||||
|
||||
|
||||
def _parse_ai_json(text: str) -> list:
|
||||
text = text.strip()
|
||||
if text.startswith("```"):
|
||||
text = re.sub(r"^```(?:json)?\s*", "", text)
|
||||
text = re.sub(r"\s*```\s*$", "", text)
|
||||
m = re.search(r"\[[\s\S]*\]", text)
|
||||
if m:
|
||||
return json.loads(m.group())
|
||||
return json.loads(text)
|
||||
|
||||
|
||||
def call_ollama(transcript: str) -> str:
|
||||
"""调用 Ollama 分析主题"""
|
||||
import requests
|
||||
prompt = _build_theme_prompt(transcript)
|
||||
try:
|
||||
r = requests.post(
|
||||
f"{OLLAMA_URL}/api/generate",
|
||||
json={
|
||||
"model": "qwen2.5:1.5b",
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.2, "num_predict": 8192},
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
if r.status_code != 200:
|
||||
raise RuntimeError(f"Ollama {r.status_code}")
|
||||
return r.json().get("response", "").strip()
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Ollama 调用失败: {e}") from e
|
||||
|
||||
|
||||
def fallback_by_keywords(transcript_path: str) -> list:
|
||||
"""规则备用:按关键词粗分主题段"""
|
||||
segments = parse_srt_segments(transcript_path)
|
||||
if not segments:
|
||||
return []
|
||||
# 关键词 -> 主题
|
||||
theme_keywords = {
|
||||
"引出问题": ["问题", "遇到", "痛点", "为什么", "困惑", "难题"],
|
||||
"解决方案": ["方法", "解决", "怎么做", "技巧", "核心", "干货"],
|
||||
"案例分享": ["案例", "例子", "数据", "客户", "赚了", "做了"],
|
||||
"未来展望": ["接下来", "未来", "行动", "去做", "试试"],
|
||||
"痛点强调": ["坑", "避坑", "千万别", "误区", "踩雷"],
|
||||
"福利展示": ["福利", "限时", "赠送", "优惠", "免费"],
|
||||
"权威背书": ["专业", "背书", "资质", "成果", "证明"],
|
||||
}
|
||||
result = []
|
||||
used = set()
|
||||
for theme, kws in theme_keywords.items():
|
||||
cands = []
|
||||
for s in segments:
|
||||
if s["start_sec"] in used:
|
||||
continue
|
||||
txt = s["text"]
|
||||
if any(kw in txt for kw in kws):
|
||||
cands.append(s)
|
||||
if cands:
|
||||
# 取第一段匹配,扩展为完整段落(合并相邻)
|
||||
first = cands[0]
|
||||
start_sec = first["start_sec"]
|
||||
end_sec = first["end_sec"]
|
||||
for s in segments:
|
||||
if s["start_sec"] >= start_sec and s["start_sec"] <= end_sec + 30:
|
||||
end_sec = max(end_sec, s["end_sec"])
|
||||
for t in range(int(start_sec), int(end_sec) + 1, 10):
|
||||
used.add(t)
|
||||
h, m, s_ = start_sec // 3600, (start_sec % 3600) // 60, int(start_sec % 60)
|
||||
eh, em, es = end_sec // 3600, (end_sec % 3600) // 60, int(end_sec % 60)
|
||||
result.append({
|
||||
"theme": theme,
|
||||
"title": theme,
|
||||
"start_time": f"{int(h):02d}:{int(m):02d}:{int(s_):02d}",
|
||||
"end_time": f"{int(eh):02d}:{int(em):02d}:{int(es):02d}",
|
||||
"hook_3sec": f"精彩{theme}",
|
||||
"cta_ending": DEFAULT_CTA,
|
||||
"transcript_excerpt": first["text"][:60],
|
||||
})
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="按完整主题分析 transcript")
|
||||
parser.add_argument("--transcript", "-t", required=True, help="transcript.srt")
|
||||
parser.add_argument("--output", "-o", required=True, help="highlights.json")
|
||||
args = parser.parse_args()
|
||||
transcript_path = Path(args.transcript)
|
||||
if not transcript_path.exists():
|
||||
print(f"❌ 不存在: {transcript_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
text = srt_to_timestamped_text(str(transcript_path))
|
||||
if len(text) < 100:
|
||||
print("❌ 文字稿过短", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
data = None
|
||||
try:
|
||||
print("正在分析完整主题(Ollama)...")
|
||||
raw = call_ollama(text)
|
||||
data = _parse_ai_json(raw)
|
||||
if data and isinstance(data, list):
|
||||
# 校验时间格式
|
||||
for i, h in enumerate(data):
|
||||
if isinstance(h, dict):
|
||||
if "start" in h and "start_time" not in h:
|
||||
h["start_time"] = h.pop("start", "")
|
||||
if "end" in h and "end_time" not in h:
|
||||
h["end_time"] = h.pop("end", "")
|
||||
h.setdefault("title", h.get("theme", f"主题{i+1}"))
|
||||
h.setdefault("hook_3sec", h.get("title", "")[:15])
|
||||
h.setdefault("cta_ending", DEFAULT_CTA)
|
||||
data = [h for h in data if isinstance(h, dict) and h.get("start_time") and h.get("end_time")]
|
||||
except Exception as e:
|
||||
print(f"Ollama 失败 ({e}),使用规则备用", file=sys.stderr)
|
||||
|
||||
if not data or not isinstance(data, list):
|
||||
print("使用规则备用(按关键词)", file=sys.stderr)
|
||||
data = fallback_by_keywords(str(transcript_path))
|
||||
|
||||
out_path = Path(args.output)
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
print(f"✅ 已输出 {len(data)} 个完整主题: {out_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -89,8 +89,10 @@ def main():
|
||||
parser.add_argument("--video", "-v", required=True, help="输入视频路径")
|
||||
parser.add_argument("--output", "-o", help="输出目录(默认:视频同目录下 视频名_output)")
|
||||
parser.add_argument("--clips", "-n", type=int, default=8, help="切片数量")
|
||||
parser.add_argument("--mode", "-m", choices=["highlights", "theme"], default="highlights",
|
||||
help="highlights=高光识别(默认); theme=按完整主题分析(时间节点非固定)")
|
||||
parser.add_argument("--skip-transcribe", action="store_true", help="跳过转录(已有 transcript.srt)")
|
||||
parser.add_argument("--skip-highlights", action="store_true", help="跳过高光识别(已有 highlights.json)")
|
||||
parser.add_argument("--skip-highlights", action="store_true", help="跳过高光/主题识别(已有 highlights.json)")
|
||||
parser.add_argument("--skip-clips", action="store_true", help="跳过切片(已有 clips/,仅重新增强)")
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -154,19 +156,31 @@ def main():
|
||||
transcript_to_simplified(transcript_path)
|
||||
print(" ✓ 字幕已转简体")
|
||||
|
||||
# 2. 高光识别
|
||||
# 2. 高光/主题识别
|
||||
if not args.skip_highlights:
|
||||
run(
|
||||
[
|
||||
sys.executable,
|
||||
str(SCRIPT_DIR / "identify_highlights.py"),
|
||||
"--transcript", str(transcript_path),
|
||||
"--output", str(highlights_path),
|
||||
"--clips", str(args.clips),
|
||||
],
|
||||
"高光识别(Ollama→规则)",
|
||||
timeout=60,
|
||||
)
|
||||
if args.mode == "theme":
|
||||
run(
|
||||
[
|
||||
sys.executable,
|
||||
str(SCRIPT_DIR / "identify_theme_segments.py"),
|
||||
"--transcript", str(transcript_path),
|
||||
"--output", str(highlights_path),
|
||||
],
|
||||
"完整主题分析(Ollama→规则,时间节点非固定)",
|
||||
timeout=120,
|
||||
)
|
||||
else:
|
||||
run(
|
||||
[
|
||||
sys.executable,
|
||||
str(SCRIPT_DIR / "identify_highlights.py"),
|
||||
"--transcript", str(transcript_path),
|
||||
"--output", str(highlights_path),
|
||||
"--clips", str(args.clips),
|
||||
],
|
||||
"高光识别(Ollama→规则)",
|
||||
timeout=60,
|
||||
)
|
||||
if not highlights_path.exists():
|
||||
print(f"❌ 需要 highlights.json: {highlights_path}")
|
||||
sys.exit(1)
|
||||
|
||||
@@ -81,3 +81,4 @@
|
||||
| 2026-02-22 10:57:44 | 🔄 卡若AI 同步 2026-02-22 10:57 | 更新:卡土、总索引与入口、运营中枢工作台 | 排除 >20MB: 8 个 |
|
||||
| 2026-02-22 11:00:29 | 🔄 卡若AI 同步 2026-02-22 11:00 | 更新:卡土、运营中枢参考资料、运营中枢工作台 | 排除 >20MB: 8 个 |
|
||||
| 2026-02-22 11:07:02 | 🔄 卡若AI 同步 2026-02-22 11:07 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 8 个 |
|
||||
| 2026-02-22 11:32:57 | 🔄 卡若AI 同步 2026-02-22 11:32 | 更新:金仓、运营中枢工作台 | 排除 >20MB: 8 个 |
|
||||
|
||||
@@ -84,3 +84,4 @@
|
||||
| 2026-02-22 10:57:44 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 10:57 | 更新:卡土、总索引与入口、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||
| 2026-02-22 11:00:29 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:00 | 更新:卡土、运营中枢参考资料、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||
| 2026-02-22 11:07:02 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:07 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||
| 2026-02-22 11:32:57 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:32 | 更新:金仓、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||
|
||||
Reference in New Issue
Block a user