diff --git a/01_卡资(金)/金仓_存储备份/服务器管理/SKILL.md b/01_卡资(金)/金仓_存储备份/服务器管理/SKILL.md index 8f94594c..f1108250 100644 --- a/01_卡资(金)/金仓_存储备份/服务器管理/SKILL.md +++ b/01_卡资(金)/金仓_存储备份/服务器管理/SKILL.md @@ -140,6 +140,12 @@ sshpass -p 'zhiqun1984' ssh -p 22022 -o StrictHostKeyChecking=no ckb@43.139.27.9 SSH 风控时,在 **kr宝塔 宝塔面板 → 终端** 上传脚本后执行。详见 `references/宝塔Node项目管理_SKILL.md`。 +**kr宝塔 中文路径 + MODULE_NOT_FOUND 全量修复**(符号链接、修正启动命令、批量重启): +```bash +./scripts/.venv_tx/bin/python scripts/腾讯云_TAT_kr宝塔_中文路径与MODULE修复.py +``` +脚本会:① 创建 ext→扩展、client→客户、self→自营 符号链接;② 修正 site.db 中 `node /path` 错误启动命令为 `cd /path && npm run start`;③ pnpm install;④ 批量重启全部 Node 项目。 + ### 4a. www.lytiao.com Docker 化(存客宝 · 可多服务器复用) **⚠️ 8080 被 frps 占用,已改用 8090。** Docker 拉取受国内网络影响,TAT 可能失败,**推荐宝塔终端手动执行**。 @@ -376,6 +382,7 @@ ss -tlnp | grep :端口号 | 脚本 | 功能 | 位置 | |------|------|------| +| `腾讯云_TAT_kr宝塔_中文路径与MODULE修复.py` | kr宝塔 符号链接+修正启动命令+批量重启(TAT) | `./scripts/.venv_tx` | | `腾讯云_TAT_word_ai_hair_is_phone_诊断修复.py` | word/ai_hair/is_phone 日志诊断、MODULE_NOT_FOUND 修复、重启(宝塔 API) | `./scripts/` | | `kr宝塔_node项目批量修复.py` | 批量启动 kr宝塔 Node 项目(服务器内执行,宝塔 API) | `./scripts/` | | `kr宝塔_宝塔API_修复502.py` | 修复 502(重启 Nginx + soul 相关 Node) | `./scripts/` | diff --git a/01_卡资(金)/金仓_存储备份/服务器管理/scripts/腾讯云_TAT_kr宝塔_中文路径与MODULE修复.py b/01_卡资(金)/金仓_存储备份/服务器管理/scripts/腾讯云_TAT_kr宝塔_中文路径与MODULE修复.py new file mode 100644 index 00000000..caea3913 --- /dev/null +++ b/01_卡资(金)/金仓_存储备份/服务器管理/scripts/腾讯云_TAT_kr宝塔_中文路径与MODULE修复.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +腾讯云 TAT:kr宝塔 中文路径 + MODULE_NOT_FOUND 修复 +1. 创建符号链接(ext->扩展、client->客户、self->自营)避免中文路径问题 +2. 修复 site.db 中 Node 项目的启动命令(project_config.project_script) + 若为 node /path/to/dir(目录当入口)则改为 cd /path && npm run start +3. 批量重启全部 Node 项目 +""" +import base64 +import json +import os +import re +import sys +import time + +KR_INSTANCE_ID = "ins-aw0tnqjo" +REGION = "ap-guangzhou" + +# 项目名 -> (根目录, 建议启动命令) +PROJECT_FIX = { + "玩值大屏": ("/www/wwwroot/自营/玩值/玩值大屏", "cd /www/wwwroot/自营/玩值/玩值大屏 && (pnpm start 2>/dev/null || npm run start)"), + "tongzhi": ("/www/wwwroot/自营/玩值/tongzhi", "cd /www/wwwroot/自营/玩值/tongzhi && (pnpm start 2>/dev/null || npm run start)"), + "is_phone": ("/www/wwwroot/自营/kr/kr-phone", "cd /www/wwwroot/自营/kr/kr-phone && (pnpm start 2>/dev/null || npm run start)"), + "ai_hair": ("/www/wwwroot/客户/ai_hair", "cd /www/wwwroot/客户/ai_hair && (pnpm start 2>/dev/null || npm run start)"), + "AITOUFA": ("/www/wwwroot/扩展/小工具/AITOUFA", "cd /www/wwwroot/扩展/小工具/AITOUFA && (pnpm start 2>/dev/null || npm run start)"), + "wzdj": ("/www/wwwroot/自营/wzdj", "cd /www/wwwroot/自营/wzdj && (pnpm start 2>/dev/null || npm run start)"), + "zhiji": ("/www/wwwroot/自营/zhiji", "cd /www/wwwroot/自营/zhiji && (pnpm start 2>/dev/null || npm run start)"), + "ymao": ("/www/wwwroot/扩展/ymao", "cd /www/wwwroot/扩展/ymao && (pnpm start 2>/dev/null || npm run start)"), + "zhaoping": ("/www/wwwroot/客户/zhaoping", "cd /www/wwwroot/客户/zhaoping && (pnpm start 2>/dev/null || npm run start)"), + "神射手": ("/www/wwwroot/自营/kr/kr-use", "cd /www/wwwroot/自营/kr/kr-use && (pnpm start 2>/dev/null || npm run start)"), + "word": ("/www/wwwroot/自营/word", "cd /www/wwwroot/自营/word && (pnpm start 2>/dev/null || npm run start)"), +} + +SHELL_SCRIPT = r'''#!/bin/bash +echo "=== kr宝塔 中文路径 + MODULE_NOT_FOUND 修复 ===" + +# 1. 创建符号链接(ext->扩展、client->客户、self->自营) +echo "" +echo "【1】符号链接" +cd /www/wwwroot +for pair in "ext:扩展" "client:客户" "self:自营"; do + a="${pair%%:*}"; b="${pair##*:}" + if [ -d "$b" ] && [ ! -e "$a" ]; then + ln -s "$b" "$a" 2>/dev/null && echo " $a -> $b" + fi +done + +# 2. 修复 site.db 中 Node 项目启动命令 +echo "" +echo "【2】修复 site.db 启动命令" +python3 - << 'PYEOF' +import hashlib, json, os, sqlite3, time, urllib.request, urllib.parse, ssl, subprocess + +ssl._create_default_https_context = ssl._create_unverified_context +PANEL, K = "https://127.0.0.1:9988", "qcWubCdlfFjS2b2DMT1lzPFaDfmv1cBT" +PROJECT_FIX = { + "玩值大屏": ("/www/wwwroot/自营/玩值/玩值大屏", "cd /www/wwwroot/自营/玩值/玩值大屏 && (pnpm start 2>/dev/null || npm run start)"), + "tongzhi": ("/www/wwwroot/自营/玩值/tongzhi", "cd /www/wwwroot/自营/玩值/tongzhi && (pnpm start 2>/dev/null || npm run start)"), + "is_phone": ("/www/wwwroot/自营/kr/kr-phone", "cd /www/wwwroot/自营/kr/kr-phone && (pnpm start 2>/dev/null || npm run start)"), + "ai_hair": ("/www/wwwroot/客户/ai_hair", "cd /www/wwwroot/客户/ai_hair && (pnpm start 2>/dev/null || npm run start)"), + "AITOUFA": ("/www/wwwroot/扩展/小工具/AITOUFA", "cd /www/wwwroot/扩展/小工具/AITOUFA && (pnpm start 2>/dev/null || npm run start)"), + "wzdj": ("/www/wwwroot/自营/wzdj", "cd /www/wwwroot/自营/wzdj && (pnpm start 2>/dev/null || npm run start)"), + "zhiji": ("/www/wwwroot/自营/zhiji", "cd /www/wwwroot/自营/zhiji && (pnpm start 2>/dev/null || npm run start)"), + "ymao": ("/www/wwwroot/扩展/ymao", "cd /www/wwwroot/扩展/ymao && (pnpm start 2>/dev/null || npm run start)"), + "zhaoping": ("/www/wwwroot/客户/zhaoping", "cd /www/wwwroot/客户/zhaoping && (pnpm start 2>/dev/null || npm run start)"), + "神射手": ("/www/wwwroot/自营/kr/kr-use", "cd /www/wwwroot/自营/kr/kr-use && (pnpm start 2>/dev/null || npm run start)"), + "word": ("/www/wwwroot/自营/word", "cd /www/wwwroot/自营/word && (pnpm start 2>/dev/null || npm run start)"), +} + +def sign(): + t = int(time.time()) + s = str(t) + hashlib.md5(K.encode()).hexdigest() + return {"request_time": t, "request_token": hashlib.md5(s.encode()).hexdigest()} +def post(p, d=None): + pl = sign() + if d: pl.update(d) + r = urllib.request.Request(PANEL + p, data=urllib.parse.urlencode(pl).encode()) + with urllib.request.urlopen(r, timeout=25) as resp: + return json.loads(resp.read().decode()) + +db = "/www/server/panel/data/db/site.db" +if not os.path.isfile(db): + print(" site.db 不存在") +else: + conn = sqlite3.connect(db) + c = conn.cursor() + c.execute("SELECT id, name, project_config FROM sites WHERE project_type='Node'") + rows = c.fetchall() + updated = 0 + for row in rows: + sid, name, cfg_str = row[0], row[1], row[2] or "{}" + if name not in PROJECT_FIX: + continue + path, cmd = PROJECT_FIX[name] + try: + cfg = json.loads(cfg_str) if cfg_str else {} + except: cfg = {} + old_script = str(cfg.get("project_script") or cfg.get("run_cmd") or "").strip() + need_fix = ( + not old_script or + "cd " not in old_script and ("node /" in old_script or old_script == path or old_script.rstrip("/") == path) + ) + if need_fix: + cfg["project_script"] = cmd + cfg["run_cmd"] = cmd + c.execute("UPDATE sites SET project_config=? WHERE id=?", (json.dumps(cfg, ensure_ascii=False), sid)) + updated += 1 + print(" %s: 已修复" % name) + conn.commit() + conn.close() + print(" 更新 %d 个项目" % updated) + +# 3. pnpm install 可能缺失依赖的项目 +print("\n【3】安装依赖(可选)") +for name, (path, _) in PROJECT_FIX.items(): + if os.path.isdir(path) and os.path.isfile(os.path.join(path, "package.json")): + try: + subprocess.check_output("cd '%s' && (pnpm install 2>/dev/null || npm install 2>/dev/null) || true" % path, shell=True, timeout=90) + except: pass + +# 4. 批量重启 Node 项目(宝塔 API) +print("\n【4】批量重启 Node 项目") +r0 = post("/project/nodejs/get_project_list") +items = r0.get("data") or r0.get("list") or [] +for it in items: + name = it.get("name") + if not name: continue + try: + post("/project/nodejs/stop_project", {"project_name": name}) + time.sleep(0.5) + r = post("/project/nodejs/start_project", {"project_name": name}) + ok = r.get("status") is True or "成功" in str(r.get("msg","")) + print(" %s: %s" % (name, "OK" if ok else "FAIL")) + except Exception as e: + print(" %s: ERR" % name) + time.sleep(1) + +time.sleep(5) +r1 = post("/project/nodejs/get_project_list") +items2 = r1.get("data") or r1.get("list") or [] +run_c = sum(1 for x in items2 if x.get("run")) +print("\n 运行 %d / %d" % (run_c, len(items2))) +PYEOF + +echo "" +echo "=== 完成 ===" +''' + +def _read_creds(): + d = os.path.dirname(os.path.abspath(__file__)) + for _ in range(6): + if os.path.isfile(os.path.join(d, "运营中枢", "工作台", "00_账号与API索引.md")): + with open(os.path.join(d, "运营中枢", "工作台", "00_账号与API索引.md")) as f: + t = f.read() + sid = skey = None + in_t = False + for line in t.splitlines(): + if "### 腾讯云" in line: in_t = True; continue + if in_t and line.strip().startswith("###"): break + if not in_t: continue + m = re.search(r"SecretId[^|]*\|\s*`([^`]+)`", line, re.I) + if m and "AKID" in m.group(1): sid = m.group(1).strip() + m = re.search(r"SecretKey\s*\|\s*`([^`]+)`", line, re.I) + if m: skey = m.group(1).strip() + return sid or os.environ.get("TENCENTCLOUD_SECRET_ID"), skey or os.environ.get("TENCENTCLOUD_SECRET_KEY") + d = os.path.dirname(d) + return None, None + + +def main(): + sid, skey = _read_creds() + if not sid or not skey: + print("❌ 未配置腾讯云凭证"); return 1 + try: + from tencentcloud.common import credential + from tencentcloud.tat.v20201028 import tat_client, models + except ImportError: + print("pip install tencentcloud-sdk-python-tat"); return 1 + + cred = credential.Credential(sid, skey) + client = tat_client.TatClient(cred, REGION) + req = models.RunCommandRequest() + req.Content = base64.b64encode(SHELL_SCRIPT.encode()).decode() + req.InstanceIds = [KR_INSTANCE_ID] + req.CommandType = "SHELL" + req.Timeout = 300 + req.CommandName = "kr宝塔_中文路径与MODULE修复" + resp = client.RunCommand(req) + print("✅ TAT 已下发 InvocationId:", resp.InvocationId) + print(" 步骤: 符号链接 → 修复 site.db 启动命令 → pnpm install → 批量重启") + print(" 等待 120s...") + time.sleep(120) + try: + req2 = models.DescribeInvocationTasksRequest() + f = models.Filter() + f.Name, f.Values = "invocation-id", [resp.InvocationId] + req2.Filters = [f] + r2 = client.DescribeInvocationTasks(req2) + for t in (r2.InvocationTaskSet or []): + st = getattr(t, "TaskStatus", "") + print(" 状态:", st) + tr = getattr(t, "TaskResult", None) + if tr: + j = json.loads(tr) if isinstance(tr, str) else {} + out = j.get("Output", "") + if out: + try: out = base64.b64decode(out).decode("utf-8", errors="replace") + except: pass + print(" 输出:\n", (out or "")[:5000]) + except Exception as e: + print(" 查询:", e) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/01_卡资(金)/金仓_存储备份/服务器管理/scripts/腾讯云_TAT_kr宝塔_全量修复.py b/01_卡资(金)/金仓_存储备份/服务器管理/scripts/腾讯云_TAT_kr宝塔_全量修复.py index e76057e3..47fa78d1 100644 --- a/01_卡资(金)/金仓_存储备份/服务器管理/scripts/腾讯云_TAT_kr宝塔_全量修复.py +++ b/01_卡资(金)/金仓_存储备份/服务器管理/scripts/腾讯云_TAT_kr宝塔_全量修复.py @@ -17,16 +17,29 @@ KR_INSTANCE_ID = "ins-aw0tnqjo" REGION = "ap-guangzhou" SHELL_SCRIPT = r'''#!/bin/bash -echo "=== kr宝塔 全量修复:Nginx(宝塔) + 全部 Node 项目 ===" +echo "=== kr宝塔 全量修复:宝塔面板 + Nginx(仅宝塔) + 全部 Node 项目 ===" -# 1. Nginx:确认使用宝塔 Nginx,非系统 Nginx +# 0. 宝塔面板:确保 9988 可访问 echo "" -echo "【1】Nginx 检查与修复" -NGX=$(ps aux | grep nginx | grep -v grep | head -1 || true) -echo "$NGX" | grep -q "/usr/sbin/nginx" && { echo " 切换为宝塔 Nginx..."; killall nginx 2>/dev/null || true; sleep 2; } -pgrep -f "/www/server/nginx" >/dev/null 2>&1 || /www/server/nginx/sbin/nginx -c /www/server/nginx/conf/nginx.conf 2>/dev/null || true +echo "【0】宝塔面板检查" +if ! ss -tlnp 2>/dev/null | grep -q ':9988 '; then + echo " 9988 未监听,启动宝塔面板..." + /etc/init.d/bt start 2>/dev/null || /www/server/panel/bt start 2>/dev/null || true + sleep 5 +fi +echo " 面板状态检查完成" + +# 1. Nginx:强制使用宝塔 Nginx(禁止系统 /usr/sbin/nginx) +echo "" +echo "【1】Nginx 强制宝塔化(禁用系统 nginx)" +# 若有系统 nginx(/usr/sbin/nginx)则全部杀掉 +ps aux | grep nginx | grep -v grep | grep -q "/usr/sbin/nginx" && { + echo " 检测到系统 Nginx,切为宝塔 Nginx..."; killall nginx 2>/dev/null || true; sleep 2; +} +# 确保宝塔 Nginx 运行(/www/server/nginx) +pgrep -f "/www/server/nginx" >/dev/null 2>&1 || /www/server/nginx/sbin/nginx -c /www/server/nginx/conf/nginx.conf nginx -t 2>/dev/null && nginx -s reload 2>/dev/null || true -echo " Nginx 检查完成" +echo " Nginx 已使用宝塔版本" # 2. 全部 Node 项目批量启动(宝塔 API) echo "" diff --git a/01_卡资(金)/金仓_存储备份/服务器管理/scripts/腾讯云_kr宝塔安全组放行9988.py b/01_卡资(金)/金仓_存储备份/服务器管理/scripts/腾讯云_kr宝塔安全组放行9988.py new file mode 100644 index 00000000..199b9b5a --- /dev/null +++ b/01_卡资(金)/金仓_存储备份/服务器管理/scripts/腾讯云_kr宝塔安全组放行9988.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +腾讯云 API 为 kr宝塔 43.139.27.93 安全组放行 9988(宝塔面板,修复 ERR_CONNECTION_CLOSED) +凭证:00_账号与API索引.md 或环境变量 +""" +import os, re, sys +KR_IP = "43.139.27.93" +REGIONS = ["ap-guangzhou", "ap-beijing", "ap-shanghai"] + +def _read_creds(): + d = os.path.dirname(os.path.abspath(__file__)) + for _ in range(6): + if os.path.isdir(os.path.join(d, "运营中枢")) and os.path.isdir(os.path.join(d, "01_卡资(金)")): + p = os.path.join(d, "运营中枢", "工作台", "00_账号与API索引.md") + if os.path.isfile(p): + with open(p) as f: t = f.read() + sid = skey = None + in_t = False + for line in t.splitlines(): + if "### 腾讯云" in line: in_t = True; continue + if in_t and line.strip().startswith("###"): break + if not in_t: continue + m = re.search(r"SecretId[^|]*\|\s*`([^`]+)`", line, re.I) + if m and "AKID" in m.group(1): sid = m.group(1).strip() + m = re.search(r"SecretKey\s*\|\s*`([^`]+)`", line, re.I) + if m: skey = m.group(1).strip() + return sid or os.environ.get("TENCENTCLOUD_SECRET_ID"), skey or os.environ.get("TENCENTCLOUD_SECRET_KEY") + d = os.path.dirname(d) + return None, None + +def main(): + sid, skey = _read_creds() + if not sid or not skey: + print("❌ 未配置腾讯云凭证"); return 1 + try: + from tencentcloud.common import credential + from tencentcloud.cvm.v20170312 import cvm_client, models as cvm_models + from tencentcloud.vpc.v20170312 import vpc_client, models as vpc_models + except ImportError: + print("pip install tencentcloud-sdk-python-cvm tencentcloud-sdk-python-vpc"); return 1 + cred = credential.Credential(sid, skey) + sg_ids, region = [], None + KR_INSTANCE_ID = "ins-aw0tnqjo" + for r in REGIONS: + try: + c = cvm_client.CvmClient(cred, r) + req = cvm_models.DescribeInstancesRequest() + req.InstanceIds = [KR_INSTANCE_ID] + resp = c.DescribeInstances(req) + for ins in (getattr(resp, "InstanceSet", None) or []): + sg_ids = list(getattr(ins, "SecurityGroupIds", None) or []) + region = r + break + except Exception: + req = cvm_models.DescribeInstancesRequest() + req.Limit = 100 + resp = c.DescribeInstances(req) + for ins in (getattr(resp, "InstanceSet", None) or []): + if KR_IP in list(getattr(ins, "PublicIpAddresses", None) or []): + sg_ids = list(getattr(ins, "SecurityGroupIds", None) or []) + region = r; break + except Exception: + continue + if sg_ids: break + if not sg_ids: + print("❌ kr宝塔 %s 未在 CVM 中找到" % KR_IP); return 1 + print("kr宝塔 %s 安全组放行 9988" % KR_IP) + vc = vpc_client.VpcClient(cred, region) + for sg_id in sg_ids: + try: + req = vpc_models.CreateSecurityGroupPoliciesRequest() + req.SecurityGroupId = sg_id + ps = vpc_models.SecurityGroupPolicySet() + ing = vpc_models.SecurityGroupPolicy() + ing.Protocol, ing.Port, ing.CidrBlock = "TCP", "9988", "0.0.0.0/0" + ing.Action, ing.PolicyDescription = "ACCEPT", "宝塔面板" + ps.Ingress = [ing] + req.SecurityGroupPolicySet = ps + vc.CreateSecurityGroupPolicies(req) + print(" ✅ %s 已添加 9988/TCP" % sg_id) + except Exception as e: + if "RuleAlreadyExists" in str(e) or "已存在" in str(e) or "duplicate" in str(e).lower(): + print(" ⏭ %s 9988 规则已存在" % sg_id) + else: + print(" ❌ %s: %s" % (sg_id, e)) + return 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/03_卡木(木)/木叶_视频内容/抖音视频解析/参考资料/ID与文案解析规则.md b/03_卡木(木)/木叶_视频内容/抖音视频解析/参考资料/ID与文案解析规则.md index 5156e107..7c7d770c 100644 --- a/03_卡木(木)/木叶_视频内容/抖音视频解析/参考资料/ID与文案解析规则.md +++ b/03_卡木(木)/木叶_视频内容/抖音视频解析/参考资料/ID与文案解析规则.md @@ -37,9 +37,10 @@ | 来源 | 说明 | |------|------| -| `` | 视频标签中的 CDN 直链 | -| `window._ROUTER_DATA` | JSON 中的 play_addr.url_list 或 url_list | +| **play_addr.url_list**(优先) | ROUTER_DATA 中的 `play_addr.url_list`,格式为 `aweme.snssdk.com/aweme/v1/play/...`,带 Referer 可 302 到真实 CDN | +| `` | 备选,可能返回封面图,需校验 Content-Type | | 无水印 | 将 URL 中的 `playwm` 替换为 `play` | +| 下载需 | `Referer: https://www.douyin.com/`,否则 CDN 返回 403 或封面图 | --- diff --git a/03_卡木(木)/木叶_视频内容/抖音视频解析/脚本/douyin_parse.py b/03_卡木(木)/木叶_视频内容/抖音视频解析/脚本/douyin_parse.py index b142fb93..a74239b1 100644 --- a/03_卡木(木)/木叶_视频内容/抖音视频解析/脚本/douyin_parse.py +++ b/03_卡木(木)/木叶_视频内容/抖音视频解析/脚本/douyin_parse.py @@ -9,8 +9,10 @@ import argparse import json import re +import subprocess import sys from pathlib import Path + import requests # 默认输出目录:卡若Ai的文件夹/视频 @@ -50,8 +52,11 @@ def fetch_and_parse(url: str) -> tuple[dict, str | None]: except Exception as e: return {"error": str(e), "aweme_id": None}, None else: + session = requests.Session() + session.headers.update({"User-Agent": MOBILE_UA}) try: - r = requests.get(url, headers={"User-Agent": MOBILE_UA, "Referer": "https://www.douyin.com/"}, timeout=15) + session.get("https://www.douyin.com/", timeout=10) + r = session.get(url, headers={"Referer": "https://www.douyin.com/"}, timeout=15) r.raise_for_status() html = r.text except Exception as e: @@ -79,39 +84,30 @@ def fetch_and_parse(url: str) -> tuple[dict, str | None]: if m: info[key] = m.group(1) - # 2. 从 提取视频 URL - src_match = re.search(r']+src=["\']([^"\']+)["\']', html) - if src_match: - video_url = src_match.group(1) - if "&" in video_url: - video_url = video_url.replace("&", "&") - - # 3. 从 ROUTER_DATA 提取视频 URL 和文案(备选) + # 2. 从 ROUTER_DATA 提取视频 URL(优先,避免拿到封面图) router = re.search(r"window\._ROUTER_DATA\s*=\s*(\{.*?\});?\s*", html, re.DOTALL) if router: try: data = json.loads(router.group(1).strip()) # 深度查找 play_addr / url_list - def find_url(obj): + def find_play_addr(obj): if isinstance(obj, dict): if "play_addr" in obj and "url_list" in obj.get("play_addr", {}): return obj["play_addr"]["url_list"][0] - if "url_list" in obj and obj["url_list"]: - return obj["url_list"][0] for v in obj.values(): - u = find_url(v) + u = find_play_addr(v) if u: return u elif isinstance(obj, list): for item in obj: - u = find_url(item) + u = find_play_addr(item) if u: return u return None - u = find_url(data) - if u and not video_url: - video_url = u.replace("playwm", "play") # 无水印 + u = find_play_addr(data) + if u: + video_url = u.replace("playwm", "play") # 无水印;优先 play_addr # 文案 def find_desc(obj, key="desc"): @@ -133,6 +129,18 @@ def fetch_and_parse(url: str) -> tuple[dict, str | None]: except json.JSONDecodeError: pass + # 3. 备选:从 提取 douyinvod CDN 链接 + if not video_url: + for m in re.finditer(r']+src=["\']([^"\']+douyinvod[^"\']*)["\']', html, re.I): + u = m.group(1).replace("&", "&") + if "tos-cn-v" in u or "video" in u: # 视频 CDN 路径特征 + video_url = u + break + if not video_url: + m = re.search(r']+src=["\']([^"\']+douyinvod[^"\']*)["\']', html, re.I) + if m: + video_url = m.group(1).replace("&", "&") + # 4. 从 提取标题(含文案) title_match = re.search(r"<title>([^<]+)", html) if title_match: @@ -159,18 +167,42 @@ def fetch_and_parse(url: str) -> tuple[dict, str | None]: return info, video_url -def download_video(url: str, out_path: Path) -> bool: - """下载视频到本地""" +def download_video(url: str, out_path: Path) -> tuple[bool, str]: + """ + 下载视频到本地。需要 Referer 等头,否则 CDN 返回 403 或封面图。 + 返回 (成功?, 错误信息) + """ + headers = { + "User-Agent": MOBILE_UA, + "Referer": "https://www.douyin.com/", + "Accept": "*/*", + "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", + "Origin": "https://www.douyin.com", + } try: - r = requests.get(url, headers={"User-Agent": MOBILE_UA}, stream=True, timeout=60) + r = requests.get(url, headers=headers, stream=True, timeout=120) r.raise_for_status() + # 检查 Content-Type,避免下载到图片 + ct = r.headers.get("Content-Type", "").lower() + if "image" in ct or "jpeg" in ct or "png" in ct: + return False, f"URL 返回的是图片而非视频 (Content-Type: {ct})" with open(out_path, "wb") as f: - for chunk in r.iter_content(chunk_size=8192): + for chunk in r.iter_content(chunk_size=65536): if chunk: f.write(chunk) - return True - except Exception: - return False + # 校验:至少 100KB,且非 JPEG 魔数 + size = out_path.stat().st_size + if size < 100_000: + out_path.unlink(missing_ok=True) + return False, f"下载文件过小 ({size} bytes),疑似非视频" + with open(out_path, "rb") as f: + magic = f.read(12) + if magic[:2] == b"\xff\xd8": + out_path.unlink(missing_ok=True) + return False, "下载到的是 JPEG 图片而非视频" + return True, "" + except requests.RequestException as e: + return False, str(e) def main(): @@ -203,13 +235,38 @@ def main(): print(f"✅ 文案已保存: {caption_path}") # 4. 下载视频 - if not args.no_download and video_url: - safe_title = re.sub(r'[^\w\s-]', '', info.get("title", aweme_id))[:50] - out_file = args.output / f"{aweme_id}_{safe_title}.mp4" - if download_video(video_url, out_file): - print(f"✅ 视频已下载: {out_file}") + if not args.no_download: + safe_title = re.sub(r'[^\w\s\u4e00-\u9fff]+', '_', (info.get("title") or aweme_id))[:50].strip("_") + out_file = args.output / f"{aweme_id}_{safe_title or 'video'}.mp4" + ok = False + if video_url: + ok, err = download_video(video_url, out_file) + if not ok: + print(f"⚠️ 直链下载失败: {err}", file=sys.stderr) else: - print("⚠️ 视频下载失败,请检查网络或尝试 MCP 浏览器获取页面", file=sys.stderr) + print("⚠️ 未解析到视频 URL", file=sys.stderr) + # yt-dlp 兜底(需 cookie 时可能仍失败) + if not ok: + print("尝试 yt-dlp 兜底下载...", file=sys.stderr) + try: + subprocess.run( + [ + "yt-dlp", + "-f", "best", + "-o", str(out_file), + "--no-warnings", + f"https://www.douyin.com/video/{aweme_id}", + ], + check=True, + capture_output=True, + timeout=180, + ) + if out_file.exists() and out_file.stat().st_size > 100_000: + ok = True + except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired) as e: + print(f"yt-dlp 失败: {e}", file=sys.stderr) + if ok: + print(f"✅ 视频已下载: {out_file} ({out_file.stat().st_size / 1024 / 1024:.1f} MB)") elif args.no_download: print("已跳过下载 (--no-download)") else: diff --git a/03_卡木(木)/木叶_视频内容/视频切片/脚本/batch_clip.py b/03_卡木(木)/木叶_视频内容/视频切片/脚本/batch_clip.py index 6a56a3da..c3c89d63 100755 --- a/03_卡木(木)/木叶_视频内容/视频切片/脚本/batch_clip.py +++ b/03_卡木(木)/木叶_视频内容/视频切片/脚本/batch_clip.py @@ -51,20 +51,27 @@ def _to_simplified(text: str) -> str: return str(text) -def sanitize_filename(name: str, max_length: int = 50) -> str: - """清理文件名,移除非法字符,标题统一简体""" +def _is_mostly_chinese(text: str) -> bool: + """判断是否主要为中文""" + if not text or not isinstance(text, str): + return False + chinese = sum(1 for c in text if "\u4e00" <= c <= "\u9fff") + return chinese / max(1, len(text.strip())) > 0.3 + + +def sanitize_filename(name: str, max_length: int = 50, chinese_only: bool = True) -> str: + """清理文件名,统一简体中文;若含英文则仅保留中文部分""" name = _to_simplified(str(name)) - # 保留字母、数字、中文、空格、下划线、连字符 safe_chars = [] for c in name: - if c.isalnum() or c in " _-" or '\u4e00' <= c <= '\u9fff': + if c in " _-" or "\u4e00" <= c <= "\u9fff": + safe_chars.append(c) + elif not chinese_only and (c.isalnum() or c.isdigit()): safe_chars.append(c) - result = "".join(safe_chars).strip() if len(result) > max_length: result = result[:max_length] - - return result or "clip" + return result.strip(" _-") or "片段" def clip_video(input_path: str, start_time: str, end_time: str, output_path: str, @@ -148,6 +155,9 @@ def batch_clip(input_video: str, highlights_json: str, output_dir: str = None, else: output_dir = input_path.parent / "clips" output_dir.mkdir(parents=True, exist_ok=True) + # 清空已有切片,避免重复 + for f in output_dir.glob("*.mp4"): + f.unlink() print("="*60) print("✂️ 批量切片") diff --git a/03_卡木(木)/木叶_视频内容/视频切片/脚本/identify_highlights.py b/03_卡木(木)/木叶_视频内容/视频切片/脚本/identify_highlights.py index 21425a1f..9a20cec3 100644 --- a/03_卡木(木)/木叶_视频内容/视频切片/脚本/identify_highlights.py +++ b/03_卡木(木)/木叶_视频内容/视频切片/脚本/identify_highlights.py @@ -86,21 +86,26 @@ def srt_to_timestamped_text(srt_path: str) -> str: def _build_prompt(transcript: str, clip_count: int) -> str: - """构建高光识别 prompt(Ollama/Groq 通用)""" - # 限制长度,Ollama 上下文有限 - txt = transcript[:12000] if len(transcript) > 12000 else transcript - return f"""你是一个专业的短视频内容策划师。分析以下视频文字稿,找出 {clip_count} 个最适合做短视频的高光片段。 + """构建高光识别 prompt(完整观点+干货,全中文)""" + txt = transcript[:15000] if len(transcript) > 15000 else transcript + return f"""你是资深短视频策划师。请从视频文字稿中识别 {clip_count} 个**完整的核心观点/干货片段**。 -每个片段需包含: -- title: 简短标题 -- start_time: "HH:MM:SS"(从文字稿提取) +【切片原则】 +- 每个片段必须是**完整的一句话/一个观点**,有头有尾,不能截断 +- 优先选:金句、完整故事、可操作方法论、反常识观点、情绪高点 +- 每个片段时长 {MIN_DURATION}-{MAX_DURATION} 秒,相邻片段间隔至少 30 秒 + +【输出字段】所有内容**必须使用简体中文**,若原文是英文请翻译后填写: +- title: 核心观点标题(15字内,用于文件名) +- start_time: "HH:MM:SS"(从文字稿时间戳精确提取) - end_time: "HH:MM:SS" -- hook_3sec: 前3秒Hook,15字内 -- cta_ending: 结尾CTA(可用 "{DEFAULT_CTA}") -- transcript_excerpt: 片段内容前50字 -- reason: 推荐理由 +- hook_3sec: 封面 Hook 文案(15字内,吸引点击) +- cta_ending: "{DEFAULT_CTA}" +- transcript_excerpt: 本片段核心内容摘要(50字内,中文) +- reason: 推荐理由(中文) -时长 {MIN_DURATION}-{MAX_DURATION} 秒,相邻间隔30秒。输出必须使用简体中文。只输出 JSON 数组,不要其他文字或```包裹。 +【强制】title、hook_3sec、transcript_excerpt、reason 必须全部简体中文,禁止英文。 +只输出 JSON 数组,不要 ``` 或其他文字。 视频文字稿: --- @@ -121,6 +126,56 @@ def _parse_ai_json(text: str) -> list: return json.loads(text) +def _is_mostly_chinese(text: str) -> bool: + """判断文本是否主要为中文""" + if not text or not isinstance(text, str): + return True + chinese = sum(1 for c in text if "\u4e00" <= c <= "\u9fff") + return chinese / max(1, len(text.strip())) > 0.3 + + +def _translate_to_chinese(text: str) -> str: + """用 Ollama 将英文翻译为中文""" + if not text or _is_mostly_chinese(text): + return text + import requests + try: + r = requests.post( + f"{OLLAMA_URL}/api/generate", + json={ + "model": "qwen2.5:1.5b", + "prompt": f"将以下英文翻译成简体中文,只输出中文翻译结果,不要其他内容:\n{text[:200]}", + "stream": False, + "options": {"temperature": 0.1, "num_predict": 100}, + }, + timeout=30, + ) + if r.status_code == 200: + out = r.json().get("response", "").strip() + if out and _is_mostly_chinese(out): + return out.split("\n")[0][:50] + except Exception: + pass + return text + + +def _ensure_chinese_highlights(data: list) -> list: + """确保 title、hook_3sec、transcript_excerpt 全为中文,无英文""" + for i, item in enumerate(data): + if not isinstance(item, dict): + continue + for key in ["title", "hook_3sec", "transcript_excerpt"]: + val = item.get(key) + if val and not _is_mostly_chinese(str(val)): + translated = _translate_to_chinese(str(val)) + item[key] = (translated if translated else f"片段{i+1}")[:20 if key != "transcript_excerpt" else 50] + if item.get("cta_ending") and not _is_mostly_chinese(str(item["cta_ending"])): + item["cta_ending"] = DEFAULT_CTA + if item.get("reason") and not _is_mostly_chinese(str(item.get("reason", ""))): + item["reason"] = _translate_to_chinese(str(item["reason"]))[:80] or "干货观点" + return data + + def call_ollama(transcript: str, clip_count: int = CLIP_COUNT) -> str: """调用卡若AI本地模型(Ollama)""" import requests @@ -177,6 +232,9 @@ def main(): data = fallback_highlights(str(transcript_path), args.clips) if not isinstance(data, list): data = [data] + # 强制中文:若 title/hook 含英文,翻译为中文 + print(" 确保导出名与封面为中文...") + data = _ensure_chinese_highlights(data) out_path = Path(args.output) out_path.parent.mkdir(parents=True, exist_ok=True) with open(out_path, "w", encoding="utf-8") as f: diff --git a/03_卡木(木)/木叶_视频内容/视频切片/脚本/soul_enhance.py b/03_卡木(木)/木叶_视频内容/视频切片/脚本/soul_enhance.py index 9857e4cb..0bf640d9 100644 --- a/03_卡木(木)/木叶_视频内容/视频切片/脚本/soul_enhance.py +++ b/03_卡木(木)/木叶_视频内容/视频切片/脚本/soul_enhance.py @@ -128,8 +128,8 @@ STYLE = { # ============ 工具函数 ============ def get_font(font_path, size): - """获取字体,优先苹方/系统字体""" - for path in [font_path, FONT_BOLD] + FONT_PRIORITY + [FALLBACK_FONT]: + """获取字体,优先苹方(支持中文),避免 default 导致封面文字不显示""" + for path in FONT_PRIORITY + [font_path, FONT_BOLD, FALLBACK_FONT]: if path and os.path.exists(path): try: return ImageFont.truetype(path, size) @@ -241,7 +241,9 @@ def get_video_info(video_path): def create_cover_image(hook_text, width, height, output_path, video_path=None): """创建封面贴片(简体中文)""" - hook_text = _to_simplified(str(hook_text)) + hook_text = _to_simplified(str(hook_text or "").strip()) + if not hook_text: + hook_text = "精彩切片" style = STYLE['cover'] hook_style = STYLE['hook'] @@ -299,14 +301,14 @@ def create_cover_image(hook_text, width, height, output_path, video_path=None): if current_line: lines.append(current_line) - # 绘制文字 + # 绘制文字(整体向右偏移 6%,减少右侧空白) line_height = hook_style['font_size'] + 15 total_height = len(lines) * line_height start_y = (height - total_height) // 2 - + x_offset = int(width * 0.06) # 向右偏移 for i, line in enumerate(lines): line_w, line_h = get_text_size(draw, line, font) - x = (width - line_w) // 2 + x = (width - line_w) // 2 + x_offset y = start_y + i * line_height draw_text_with_outline( @@ -334,7 +336,9 @@ def create_subtitle_image(text, width, height, output_path): kw_font = get_font(FONT_HEAVY, kw_size) # 关键词用粗体+大字 text_w, text_h = get_text_size(draw, text, font) - base_x = (width - text_w) // 2 + # 字幕整体向右偏移 6%,减少右侧空白 + x_offset = int(width * 0.06) + base_x = (width - text_w) // 2 + x_offset base_y = height - text_h - style['margin_bottom'] # 背景条 @@ -487,7 +491,12 @@ def enhance_clip(clip_path, output_path, highlight_info, temp_dir, transcript_pa print(f" 分辨率: {width}x{height}, 时长: {duration:.1f}秒") - hook_text = highlight_info.get('hook_3sec', highlight_info.get('title', '')) + hook_text = highlight_info.get('hook_3sec') or highlight_info.get('title') or '' + if not hook_text and clip_path: + # 从文件名提取标题(soul106_01_标题.mp4) + m = re.search(r'\d+[_\s]+(.+?)(?:_enhanced)?\.mp4$', os.path.basename(clip_path)) + if m: + hook_text = m.group(1).strip() cover_duration = STYLE['cover']['duration'] # 1. 生成封面图片 @@ -637,6 +646,9 @@ def main(): print("="*60) output_dir.mkdir(parents=True, exist_ok=True) + # 清空已有增强切片,避免重复 + for f in output_dir.glob("*.mp4"): + f.unlink() with open(highlights_path, 'r', encoding='utf-8') as f: highlights = json.load(f) diff --git a/03_卡木(木)/木叶_视频内容/视频切片/脚本/soul_slice_pipeline.py b/03_卡木(木)/木叶_视频内容/视频切片/脚本/soul_slice_pipeline.py index dbd89c81..e2cb2fd6 100644 --- a/03_卡木(木)/木叶_视频内容/视频切片/脚本/soul_slice_pipeline.py +++ b/03_卡木(木)/木叶_视频内容/视频切片/脚本/soul_slice_pipeline.py @@ -92,6 +92,7 @@ def main(): parser.add_argument("--skip-transcribe", action="store_true", help="跳过转录(已有 transcript.srt)") parser.add_argument("--skip-highlights", action="store_true", help="跳过高光识别(已有 highlights.json)") parser.add_argument("--skip-clips", action="store_true", help="跳过切片(已有 clips/,仅重新增强)") + parser.add_argument("--language", "-l", default="zh", choices=["zh", "en"], help="转录语言(纳瓦尔访谈等英文内容用 en)") args = parser.parse_args() video_path = Path(args.video).resolve() @@ -133,7 +134,7 @@ def main(): "mlx_whisper", str(audio_path), "--model", "mlx-community/whisper-small-mlx", - "--language", "zh", + "--language", args.language, "--output-format", "srt", "--output-dir", str(base_dir), "--output-name", "transcript", diff --git a/_执行日志/2026-02_Soul视频切片_复盘.md b/_执行日志/2026-02_Soul视频切片_复盘.md index 7d61aa0e..0d35146e 100644 --- a/_执行日志/2026-02_Soul视频切片_复盘.md +++ b/_执行日志/2026-02_Soul视频切片_复盘.md @@ -28,6 +28,40 @@ Soul 视频切片流水线已统一为:转录(MLX Whisper)→高光(Ollama→ --- +## [卡若复盘] Soul 106场 热点切片(2026-02-22) + +**🎯 目标·结果·达成率** +目标:按「热点切片」格式(具描述性标题+精确时间)将 soul 派对 106场 剪辑成竖屏切片。结果:7 段切片完成,4 段竖屏+增强已完成,3 段竖屏/增强在后台执行中,达成率约 60%(可后续补全)。 + +**📌 过程** +1. **标题与时间格式**:参考示例(退伍军人创业方向建议 06:05-08:16、AI炒股实战逻辑 24:52-32:16),采用 highlights_themed 中的 7 个具描述性标题,并设定 6–7 分钟/段的完整主题时间。 +2. **方案文件**:`highlights_hotspot.json`,7 段: + - AI时代如何实现个人财富突破 00:00–06:30 + - Web3与人工智能如何赋能个人私域流量 19:25–26:20 + - AI时代的个人数据安全与隐私保护 38:50–45:30 + - 如何通过AI工具提升工作效率与收入 58:16–01:05:00 + - AI编程助手与未来软件开发趋势 01:17:41–01:24:30 + - 对以上分享的商业价值与投资机会解读 01:37:07–01:44:00 + - 高净值圈层的资源整合与合作模式探讨 01:56:32–02:04:00 +3. **批量切片**:batch_clip 输出 `clips_hotspot/`,7 段全部生成(第 5 段曾 0 字节,已补切)。 +4. **竖屏 9:16**:FFmpeg `scale+crop` 转 1080×1920,输出 `clips_hotspot_vertical/`,1–4 已完成。 +5. **Soul 增强**:soul_enhance 封面+字幕+加速 10%,输出 `clips_hotspot_enhanced/`,1–2 已完成,3–4 在后台执行。 + +**💡 反思** +1. 热点切片需具描述性标题(如「AI时代如何实现个人财富突破」),而非抽象主题名。 +2. 每段 6–7 分钟更贴合「完整主题」表达,符合示例时长。 +3. 竖屏转换与增强耗时长(约 5 分钟/段),可考虑并行或队列。 + +**📝 总结** +Soul 106场 7 段热点切片已生成;竖屏与增强在后台继续执行。完成后成片在 `soul 派对 106场 20260221_output/clips_hotspot_enhanced/`。 + +**▶ 补全命令**(若 5–7 未完成) +```bash +bash "/Users/karuo/Movies/soul视频/soul 派对 106场 20260221_output/热点切片_执行.sh" +``` + +--- + ## [卡若复盘] 文档与字幕简体中文优化(2026-02) **🎯 目标·结果·达成率** diff --git a/运营中枢/工作台/gitea_push_log.md b/运营中枢/工作台/gitea_push_log.md index ecfb3a9b..59cb42a2 100644 --- a/运营中枢/工作台/gitea_push_log.md +++ b/运营中枢/工作台/gitea_push_log.md @@ -87,3 +87,4 @@ | 2026-02-22 11:47:38 | 🔄 卡若AI 同步 2026-02-22 11:47 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 8 个 | | 2026-02-22 11:58:17 | 🔄 卡若AI 同步 2026-02-22 11:58 | 更新:金仓、水桥平台对接、卡木、运营中枢工作台 | 排除 >20MB: 8 个 | | 2026-02-22 12:42:56 | 🔄 卡若AI 同步 2026-02-22 12:42 | 更新:金仓、卡木、运营中枢工作台 | 排除 >20MB: 8 个 | +| 2026-02-22 13:08:21 | 🔄 卡若AI 同步 2026-02-22 13:08 | 更新:卡木、总索引与入口、运营中枢工作台 | 排除 >20MB: 8 个 | diff --git a/运营中枢/工作台/代码管理.md b/运营中枢/工作台/代码管理.md index cab99700..9f61e9c3 100644 --- a/运营中枢/工作台/代码管理.md +++ b/运营中枢/工作台/代码管理.md @@ -90,3 +90,4 @@ | 2026-02-22 11:47:38 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:47 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) | | 2026-02-22 11:58:17 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 11:58 | 更新:金仓、水桥平台对接、卡木、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) | | 2026-02-22 12:42:56 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 12:42 | 更新:金仓、卡木、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) | +| 2026-02-22 13:08:21 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 13:08 | 更新:卡木、总索引与入口、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |