Files
soul-yongping/scripts/pack_soul_operation_skills.py

346 lines
11 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Soul 运营全链路技能包精简版只打包 SKILL / 脚本 / 小配置便于另一台机 pip/conda 重装
大文件媒体Cookie日志等一律不入包
用法
python3 scripts/pack_soul_operation_skills.py
输出
~/Downloads/Soul运营全链路技能包_精简_YYYYMMDD.zip
"""
from __future__ import annotations
import datetime as _dt
import json
import os
import shutil
import sys
import zipfile
from pathlib import Path
# 单文件超过此大小则跳过(字节)——非「代码/文档类」扩展名
MAX_FILE_BYTES = 512 * 1024 # 512KB
# 脚本与文档类可放宽(避免误跳过大 .py/.md仍远小于整包 200MB+
CODE_DOC_EXT = frozenset(
{
".py",
".md",
".mdc",
".sh",
".bash",
".zsh",
".txt",
".json",
".yaml",
".yml",
".toml",
".cfg",
".ini",
".sql",
".html",
".css",
".js",
".ts",
".tsx",
".jsx",
".svg",
".xml",
}
)
MAX_CODE_DOC_BYTES = 8 * 1024 * 1024 # 8MB
# 整段目录名匹配则不进包walk 时不进入)
SKIP_DIR_NAMES = frozenset(
{
"__pycache__",
".git",
".svn",
".browser_state",
"chromium_data",
"node_modules",
"venv",
".venv",
".mypy_cache",
".pytest_cache",
".tox",
"dist",
"build",
"eggs",
".eggs",
"htmlcov",
".ruff_cache",
# Cookie 到新机需重新登录导出,不入包
"cookies",
}
)
# 扩展名一律跳过(媒体/模型/压缩包等)
SKIP_EXTENSIONS = frozenset(
{
".mp4",
".mov",
".mkv",
".avi",
".webm",
".m4v",
".flv",
".wmv",
".zip",
".tar",
".gz",
".tgz",
".bz2",
".xz",
".rar",
".7z",
".dmg",
".iso",
".img",
".pt",
".pth",
".onnx",
".ckpt",
".safetensors",
".bin",
".exe",
".dll",
".so",
".dylib",
".wav",
".mp3",
".flac",
".aac",
".m4a",
".npz",
".npy",
".pkl",
".pickle",
".whl",
".parquet",
".arrow",
}
)
# 文件名(不含路径)强制跳过
SKIP_FILE_NAMES = frozenset(
{
".DS_Store",
"Thumbs.db",
"publish_log.json", # 分发日志可能巨大
".feishu_tokens.json", # 凭证,到新机用脚本重新获取更安全;若需带走可自行拷贝
}
)
# 卡若AI 根目录(按你本机实际修改)
KARUO_AI = Path("/Users/karuo/Documents/个人/卡若AI")
CURSOR_SKILLS = Path.home() / ".cursor" / "skills"
DOWNLOADS = Path.home() / "Downloads"
REPO_ROOT = Path(__file__).resolve().parents[1]
STAMP = _dt.date.today().strftime("%Y%m%d")
BUNDLE_TOP = f"Soul运营全链路技能包_精简_{STAMP}"
STAGING_PARENT = REPO_ROOT / ".tmp_skill_bundle"
STAGING = STAGING_PARENT / BUNDLE_TOP
# 统计
_stats: dict[str, int] = {"files": 0, "skipped_size": 0, "skipped_ext": 0, "skipped_dir": 0, "skipped_name": 0}
def should_skip_file(path: Path) -> tuple[bool, str]:
name = path.name
if name in SKIP_FILE_NAMES:
return True, "name"
ext = path.suffix.lower()
if ext in SKIP_EXTENSIONS:
return True, "ext"
try:
sz = path.stat().st_size
except OSError:
return True, "stat"
limit = MAX_CODE_DOC_BYTES if ext in CODE_DOC_EXT else MAX_FILE_BYTES
if sz > limit:
return True, "size"
return False, ""
def copy_tree_selective(src: Path, dst_root: Path, rel_base: Path) -> None:
"""将 src 下文件复制到 dst_root / rel_base遵守跳过规则。"""
if not src.is_dir():
print(f"SKIP 非目录: {src}", file=sys.stderr)
return
for root, dirnames, filenames in os_walk_topdown(src):
root_path = Path(root)
# 过滤要进入的子目录
for d in list(dirnames):
if d in SKIP_DIR_NAMES:
dirnames.remove(d)
_stats["skipped_dir"] += 1
rel = root_path.relative_to(src)
for fname in filenames:
fp = root_path / fname
skip, reason = should_skip_file(fp)
if skip:
if reason == "size":
_stats["skipped_size"] += 1
elif reason == "ext":
_stats["skipped_ext"] += 1
elif reason == "name":
_stats["skipped_name"] += 1
continue
dest_dir = dst_root / rel_base / rel
dest_dir.mkdir(parents=True, exist_ok=True)
dest = dest_dir / fname
shutil.copy2(fp, dest)
_stats["files"] += 1
def os_walk_topdown(src: Path):
"""与 os.walk 相同,但用 Path。"""
for r, dnames, fnames in os.walk(str(src), topdown=True):
yield Path(r), dnames, fnames
def copy_cursor_skill(name: str) -> None:
src = CURSOR_SKILLS / name
if not src.is_dir():
print(f"SKIP 无 Cursor skill: {src}", file=sys.stderr)
return
copy_tree_selective(src, STAGING, Path(".cursor") / "skills" / name)
def main() -> int:
if not KARUO_AI.is_dir():
print(f"ERROR: 未找到卡若AI目录: {KARUO_AI}", file=sys.stderr)
return 1
global _stats
_stats = {k: 0 for k in _stats}
if STAGING.exists():
shutil.rmtree(STAGING)
STAGING.mkdir(parents=True)
# Cursor 入口(通常只有 SKILL.md
for name in ("soul-operation-report", "soul-party-project"):
copy_cursor_skill(name)
kai_rel = Path("卡若AI")
def pack_sub(src_under_karuo: Path, rel_under_kai: Path) -> None:
"""src_under_karuo 为卡若AI下的绝对路径打入包内 卡若AI/rel_under_kai"""
if not src_under_karuo.exists():
print(f"SKIP 不存在: {src_under_karuo}", file=sys.stderr)
return
copy_tree_selective(src_under_karuo, STAGING, kai_rel / rel_under_kai)
pack_sub(
KARUO_AI / "02_卡人" / "水岸_项目管理",
Path("02_卡人") / "水岸_项目管理",
)
bridge = KARUO_AI / "02_卡人" / "水桥_平台对接"
for sub in ("飞书管理", "智能纪要", "Soul创业实验"):
pack_sub(bridge / sub, Path("02_卡人") / "水桥_平台对接" / sub)
wood = KARUO_AI / "03_卡木" / "木叶_视频内容"
for sub in (
"视频切片",
"多平台分发",
"抖音发布",
"B站发布",
"视频号发布",
"小红书发布",
"快手发布",
):
pack_sub(wood / sub, Path("03_卡木") / "木叶_视频内容" / sub)
idx = KARUO_AI / "运营中枢" / "工作台" / "00_账号与API索引.md"
if idx.is_file():
skip, _ = should_skip_file(idx)
if not skip:
dest = STAGING / kai_rel / "运营中枢" / "工作台" / idx.name
dest.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(idx, dest)
_stats["files"] += 1
# 写入 requirements 汇总(若各目录有 requirements.txt只列路径提示不合并
req_hint = STAGING / "重装依赖说明.md"
req_hint.write_text(
f"""# 重装依赖说明(精简包)
本包**不含**大文件与本地状态到新电脑请
1. **Python**建议 3.10+进入各含 `requirements.txt` 的脚本目录执行 `pip install -r requirements.txt`以各 SKILL 为准
2. **系统**`ffmpeg``ffprobe`视频切片视频转录见 SKILL 中的 **conda mlx-whisper** 环境说明
3. **Playwright**若飞书脚本需要`playwright install` 并按脚本说明登录**`.browser_state` 未打包**
4. **多平台分发**包内**不含 `cookies/` 目录**需在新机各平台重新登录导出 Cookie见多平台分发 SKILL
5. **飞书 Token**精简包默认**不含** `.feishu_tokens.json`请在新机用脚本流程重新授权若你刻意要迁移凭证请单独拷贝注意安全
---
打包策略摘要自动生成
- 代码/文档类`.py``.md` 单文件大于 **{MAX_CODE_DOC_BYTES // (1024 * 1024)} MB** 跳过其它类型大于 **{MAX_FILE_BYTES // 1024} KB** 跳过
- 跳过扩展名媒体压缩包模型权重等
- 跳过目录`cookies``node_modules``.browser_state``venv`
打包日期**{STAMP}**
""",
encoding="utf-8",
)
readme = STAGING / "解压后必读.md"
readme.write_text(
f"""# Soul 运营全链路技能包(精简版)
## 本包特点
- **体积小**不含视频/大日志/模型/Cookie 目录等到新机器按 `重装依赖说明.md` **重装环境与凭证**
- **日期**{STAMP}
## 包含
- `.cursor/skills/``soul-operation-report``soul-party-project`
- `卡若AI/` 下水岸飞书管理智能纪要Soul创业实验视频切片多平台分发与各平台发布目录中的 **SKILL脚本小配置**受大小与类型过滤
## 合并步骤
1. 解压后把 `卡若AI/` **合并**进你的卡若AI根目录先备份
2. `.cursor/skills/` 下两个文件夹复制到 `~/.cursor/skills/`
3. 阅读 **`重装依赖说明.md`**安装 Python 依赖FFmpegconda 环境等
4. 配置飞书妙记各平台 Cookie永平 `.env`见各 SKILL `Soul创业实验/上传/环境与TOKEN配置.md`
**安全**勿将含密钥的压缩包上传公开网盘
""",
encoding="utf-8",
)
# 打包统计写入 JSON便于核对
(STAGING / "_pack_stats.json").write_text(
json.dumps({**_stats, "max_file_bytes": MAX_FILE_BYTES, "stamp": STAMP}, ensure_ascii=False, indent=2),
encoding="utf-8",
)
DOWNLOADS.mkdir(parents=True, exist_ok=True)
zip_path = DOWNLOADS / f"{BUNDLE_TOP}.zip"
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
for f in STAGING.rglob("*"):
if f.is_file():
arcname = Path(BUNDLE_TOP) / f.relative_to(STAGING)
zf.write(f, arcname.as_posix())
mb = zip_path.stat().st_size / (1024 * 1024)
print(f"完成: {zip_path}")
print(f"大小: {mb:.2f} MB | 打入文件数: {_stats['files']}")
print(
f"跳过: 超体积 {_stats['skipped_size']} | 扩展名 {_stats['skipped_ext']} | 文件名 {_stats['skipped_name']} | 目录 {_stats['skipped_dir']}"
)
print(f"临时目录(可删): {STAGING}")
return 0
if __name__ == "__main__":
raise SystemExit(main())