Files
karuo-ai/03_卡木(木)/木叶_视频内容/多平台分发/脚本/video_metadata.py

210 lines
8.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
统一视频元数据生成器 v1
根据视频文件名自动生成:标题、简介、标签、分区
支持各平台差异化输出B站/视频号/小红书/快手/抖音)
用法:
meta = VideoMeta.from_filename("AI最大的缺点是上下文太短这样来解决.mp4")
print(meta.title("B站"))
print(meta.description("B站"))
print(meta.tags("B站"))
"""
from __future__ import annotations
import re
from dataclasses import dataclass, field
from pathlib import Path
from content_filter import filter_for_platform
BRAND_TAG = "#卡若创业派对"
MINI_PROGRAM = "#小程序 卡若创业派对"
PLATFORM_CATEGORIES = {
"B站": {"tid": 160, "name": "生活 > 日常"},
"视频号": {"category": "科技数码"},
"小红书": {"category": "科技数码"},
"快手": {"category": "生活"},
"抖音": {"category": "科技"},
}
COMMON_TAGS = ["Soul派对", "创业", "认知觉醒", "副业", "商业思维"]
KEYWORD_TAGS = {
"AI": ["AI工具", "人工智能", "效率提升"],
"副业": ["副业", "副业入门", "副业收入"],
"创业": ["创业", "创业心态", "创业故事"],
"Soul": ["Soul派对", "Soul创业"],
"切片": ["切片分发", "自动化", "内容分发"],
"股权": ["创业股权", "团队管理"],
"疗愈": ["疗愈", "AI赋能", "疗愈商业"],
"模型": ["AI对比", "深度思考", "AI模型"],
"赚钱": ["创业心态", "商业思维"],
"装AI": ["AI服务", "传统行业"],
"坚持": ["坚持的力量", "自律"],
"视频": ["内容创作", "视频分发"],
}
CURATED_TITLES: dict[str, dict] = {
"AI最大的缺点是上下文太短这样来解决.mp4": {
"title": "AI的短板是记忆太短上下文一长就废了这个方法能解决",
"tags_extra": ["AI工具", "效率提升"],
},
"AI每天剪1000个视频 M4电脑24T素材库全网分发.mp4": {
"title": "M4芯片+24T素材库AI每天剪1000条视频自动全网分发",
"tags_extra": ["AI剪辑", "内容工厂"],
},
"Soul派对变现全链路 发视频就有钱,后端全解决.mp4": {
"title": "Soul派对怎么变现发视频就有收益后端体系全部搞定",
"tags_extra": ["Soul派对", "副业收入"],
},
"从0到切片发布 AI自动完成每天副业30条视频.mp4": {
"title": "从零到切片发布AI全自动完成每天副业产出30条视频",
"tags_extra": ["AI副业", "切片分发"],
},
"做副业的基本条件 苹果电脑和特殊访问工具.mp4": {
"title": "做副业的两个基本条件一台Mac和一个上网工具",
"tags_extra": ["副业入门", "工具推荐"],
},
"切片分发全自动化 从视频到发布一键完成.mp4": {
"title": "从录制到发布全自动化,一键切片分发五大平台",
"tags_extra": ["自动化", "内容分发"],
},
"创业团队4人平分25有啥危险 先跑钱再谈股权.mp4": {
"title": "创业团队4人平分25%股权有啥风险?先跑出收入再谈分配",
"tags_extra": ["创业股权", "团队管理"],
},
"坚持到120场是什么感觉 方向越确定执行越坚决.mp4": {
"title": "坚持到第120场派对是什么感觉方向越清晰执行越坚决",
"tags_extra": ["Soul派对", "坚持的力量"],
},
"帮人装AI一单300到1000块传统行业也能做.mp4": {
"title": "帮传统行业的人装AI工具一单收300到1000块简单好做",
"tags_extra": ["AI服务", "传统行业"],
},
"深度AI模型对比 哪个才是真正的AI不是语言模型.mp4": {
"title": "深度对比各大AI模型哪个才是真正的智能而不只是语言模型",
"tags_extra": ["AI对比", "深度思考"],
},
"疗愈师配AI助手能收多少钱 一个小团队5万到10万.mp4": {
"title": "疗愈师+AI助手组合一个小团队月收5万到10万",
"tags_extra": ["AI赋能", "疗愈商业"],
},
"赚钱没那么复杂,自信心才是核心问题.mp4": {
"title": "赚钱真没那么复杂,自信心才是卡住你的核心问题",
"tags_extra": ["创业心态", "自信"],
},
}
@dataclass
class VideoMeta:
"""单条视频的元数据"""
filename: str
base_title: str
tags_extra: list[str] = field(default_factory=list)
@classmethod
def from_filename(cls, filename: str) -> VideoMeta:
fname = Path(filename).name
curated = CURATED_TITLES.get(fname)
if curated:
return cls(
filename=fname,
base_title=curated["title"],
tags_extra=curated.get("tags_extra", []),
)
stem = Path(fname).stem
stem = re.sub(r'^\d+[._\-\s]*', '', stem)
stem = stem.replace('_', ' ').replace(' ', ' ').strip()
extra = []
for kw, tags in KEYWORD_TAGS.items():
if kw in stem:
extra.extend(tags)
extra = list(dict.fromkeys(extra))[:4]
return cls(filename=fname, base_title=stem or fname, tags_extra=extra)
def _smart_tags(self, platform: str) -> list[str]:
seen = set()
result = []
for t in self.tags_extra + COMMON_TAGS:
if t not in seen:
seen.add(t)
result.append(t)
return result[:8]
def title(self, platform: str, max_len: int = 80) -> str:
t = filter_for_platform(self.base_title, platform)
return t[:max_len]
def title_short(self, max_len: int = 20) -> str:
"""小红书标题≤20字"""
parts = re.split(r'[,!?\s]+', self.base_title)
return parts[0][:max_len] if parts else self.base_title[:max_len]
def hashtags(self, platform: str) -> str:
"""# 标签字符串"""
tags = self._smart_tags(platform)
parts = [f"#{t}" for t in tags]
parts.append(MINI_PROGRAM)
return " ".join(parts)
def description(self, platform: str, max_len: int = 500) -> str:
"""完整描述 = 标题 + 换行 + 标签 + 品牌"""
title = self.title(platform)
tags = self.hashtags(platform)
desc = f"{title}\n\n{tags}"
return filter_for_platform(desc[:max_len], platform)
def tags_list(self, platform: str) -> list[str]:
return self._smart_tags(platform)
def tags_str(self, platform: str) -> str:
"""逗号分隔B站 API 用)"""
return ",".join(self._smart_tags(platform))
def category(self, platform: str) -> dict:
return PLATFORM_CATEGORIES.get(platform, {})
def bilibili_meta(self) -> dict:
"""B站投稿需要的完整 meta"""
return {
"copyright": 1,
"source": "",
"desc": self.description("B站"),
"desc_format_id": 0,
"dynamic": "",
"interactive": 0,
"open_elec": 0,
"no_reprint": 1,
"subtitles": {"lan": "", "open": 0},
"tag": self.tags_str("B站"),
"tid": 160,
"title": self.title("B站", 80),
"up_close_danmaku": False,
"up_close_reply": False,
}
def generate_all_meta(video_dir: str) -> list[VideoMeta]:
"""批量生成目录下所有视频的元数据"""
videos = sorted(Path(video_dir).glob("*.mp4"))
return [VideoMeta.from_filename(str(v)) for v in videos]
if __name__ == "__main__":
from pathlib import Path
VIDEO_DIR = "/Users/karuo/Movies/soul视频/soul 派对 120场 20260320_output/成片"
metas = generate_all_meta(VIDEO_DIR)
for m in metas:
print(f"\n{'='*60}")
print(f"文件: {m.filename}")
print(f" B站标题: {m.title('B站')}")
print(f" B站简介: {m.description('B站')[:80]}...")
print(f" B站标签: {m.tags_str('B站')}")
print(f" 视频号: {m.description('视频号')[:80]}...")
print(f" 小红书标题: {m.title_short()}")