🔄 卡若AI 同步 2026-02-22 14:42 | 更新：金仓、卡木、运营中枢工作台 | 排除 >20MB: 8 个

2026-02-22 14:42:00 +08:00
parent c8a42f6874
commit 9136777df7
5 changed files with 295 additions and 3 deletions
--- a/01_卡资（金）/金仓_存储备份/服务器管理/scripts/腾讯云_TAT_kr宝塔_运行堵塞与Node深度修复.py
+++ b/01_卡资（金）/金仓_存储备份/服务器管理/scripts/腾讯云_TAT_kr宝塔_运行堵塞与Node深度修复.py
@@ -0,0 +1,251 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+腾讯云 TAT：kr宝塔 运行堵塞 + Node 深度修复
+1. 运行堵塞诊断：负载/CPU/TOP 进程、结束异常 node 进程
+2. 停止全部 Node、修复 site.db 启动命令、查 Node 日志
+3. 批量启动 Node，验证状态
+凭证：00_账号与API索引.md
+"""
+import base64
+import json
+import os
+import re
+import sys
+import time
+
+KR_INSTANCE_ID = "ins-aw0tnqjo"
+REGION = "ap-guangzhou"
+
+SHELL_SCRIPT = r'''#!/bin/bash
+echo "========== kr宝塔 运行堵塞 + Node 深度修复 =========="
+
+# 【0】运行堵塞诊断
+echo ""
+echo "【0】运行堵塞诊断"
+echo "--- 负载 ---"
+uptime
+echo "--- 内存 ---"
+free -m | head -2
+echo "--- CPU TOP10 ---"
+ps aux --sort=-%cpu 2>/dev/null | head -11
+echo "--- 结束异常 node/npm/pnpm 进程(占用>80%%CPU) ---"
+for pid in $(ps aux | awk '$3>80 && /node|npm|pnpm/ && !/grep/ {print $2}' 2>/dev/null); do
+  echo "  kill $pid"; kill -9 $pid 2>/dev/null
+done
+sleep 2
+
+python3 - << 'PYMAIN'
+import hashlib, json, os, re, sqlite3, subprocess, time, urllib.request, urllib.parse, ssl
+
+ssl._create_default_https_context = ssl._create_unverified_context
+PANEL, K = "https://127.0.0.1:9988", "qcWubCdlfFjS2b2DMT1lzPFaDfmv1cBT"
+
+def sign():
+    t = int(time.time())
+    s = str(t) + hashlib.md5(K.encode()).hexdigest()
+    return {"request_time": t, "request_token": hashlib.md5(s.encode()).hexdigest()}
+def post(p, d=None):
+    pl = sign()
+    if d: pl.update(d)
+    r = urllib.request.Request(PANEL + p, data=urllib.parse.urlencode(pl).encode())
+    with urllib.request.urlopen(r, timeout=25) as resp:
+        return json.loads(resp.read().decode())
+def pids(port):
+    try:
+        o = subprocess.check_output("ss -tlnp 2>/dev/null | grep ':%s ' || true" % port, shell=True, universal_newlines=True)
+        return {int(x) for x in re.findall(r"pid=(\d+)", o)}
+    except: return set()
+def ports(it):
+    cfg = it.get("project_config") or {}
+    if isinstance(cfg, str):
+        try: cfg = json.loads(cfg)
+        except: cfg = {}
+    ps = []
+    if cfg.get("port"): ps.append(int(cfg["port"]))
+    for m in re.findall(r"-p\s*(\d+)", str(cfg.get("project_script",""))): ps.append(int(m))
+    return sorted(set(ps))
+
+# 项目名 -> 可能路径（site.db 路径不存在时兜底）
+PATH_FALLBACK = {
+    "玩值大屏": ["/www/wwwroot/self/wanzhi/玩值大屏", "/www/wwwroot/self/wanzhi/玩值"],
+    "tongzhi": ["/www/wwwroot/self/wanzhi/tongzhi", "/www/wwwroot/self/wanzhi/tong"],
+    "神射手": ["/www/wwwroot/self/kr/kr-use", "/www/wwwroot/self/kr/kr-users"],
+    "AITOUFA": ["/www/wwwroot/ext/tools/AITOUFA", "/www/wwwroot/ext/tools/AITOL"],
+}
+
+# 【1】停止全部 Node
+print("\n【1】停止 Node 项目")
+r0 = post("/project/nodejs/get_project_list")
+items = r0.get("data") or r0.get("list") or []
+for it in items:
+    name = it.get("name")
+    if not name: continue
+    try:
+        for port in ports(it):
+            for pid in pids(port): subprocess.call("kill -9 %s 2>/dev/null" % pid, shell=True)
+        pf = "/www/server/nodejs/vhost/pids/%s.pid" % name
+        if os.path.exists(pf): open(pf, "w").write("0")
+        post("/project/nodejs/stop_project", {"project_name": name})
+        print("  停: %s" % name)
+    except Exception as e: print("  停 %s: %s" % (name, str(e)[:40]))
+    time.sleep(0.4)
+time.sleep(4)
+
+# 【2】修复 site.db + 查日志
+print("\n【2】修复 site.db 启动命令")
+db = "/www/server/panel/data/db/site.db"
+fixed = 0
+if os.path.isfile(db):
+    conn = sqlite3.connect(db)
+    c = conn.cursor()
+    c.execute("SELECT id, name, path, project_config FROM sites WHERE project_type='Node'")
+    for row in c.fetchall():
+        sid, name, path, cfg_str = row[0], row[1], row[2], row[3] or "{}"
+        path = (path or "").strip()
+        try: cfg = json.loads(cfg_str) if cfg_str else {}
+        except: cfg = {}
+        proj_path = cfg.get("path") or cfg.get("project_path") or path
+        if not proj_path or not os.path.isdir(proj_path):
+            for p in PATH_FALLBACK.get(name, []):
+                if os.path.isdir(p): proj_path = p; break
+        if not proj_path or not os.path.isdir(proj_path):
+            print("  跳过 %s (路径不存在)" % name)
+            continue
+        cmd = "cd %s && (pnpm start 2>/dev/null || npm run start)" % proj_path
+        old = str(cfg.get("project_script") or cfg.get("run_cmd") or "").strip()
+        if "cd " not in old or proj_path not in old:
+            cfg["project_script"] = cfg["run_cmd"] = cmd
+            cfg["path"] = proj_path
+            c.execute("UPDATE sites SET path=?, project_config=? WHERE id=?", (proj_path, json.dumps(cfg, ensure_ascii=False), sid))
+            fixed += 1
+            print("  修复: %s -> %s" % (name, proj_path))
+    conn.commit()
+    conn.close()
+print("  共修复 %d 个" % fixed)
+
+# 【3】Node 日志（每个项目最后 5 行）
+print("\n【3】Node 项目日志（最后 5 行）")
+log_dir = "/www/server/nodejs/vhost"
+for it in items:
+    name = it.get("name")
+    if not name: continue
+    for lp in ["%s/log/%s.log" % (log_dir, name), "%s/logs/%s.log" % (log_dir, name)]:
+        if os.path.isfile(lp):
+            try:
+                lines = open(lp, "r", encoding="utf-8", errors="ignore").readlines()
+                tail = "".join(lines[-5:]).strip()
+                if tail: print("  --- %s ---\n%s" % (name, tail[-800:]))
+            except: pass
+            break
+    else:
+        print("  %s: 无日志文件" % name)
+    print("")
+
+# 【4】批量启动（3 轮）
+print("\n【4】批量启动 Node")
+for rnd in range(3):
+    r1 = post("/project/nodejs/get_project_list")
+    items = r1.get("data") or r1.get("list") or []
+    to_start = [it for it in items if it.get("name") and not it.get("run")]
+    if not to_start: print("  全部已运行"); break
+    print("  第%d轮: %d 个待启动" % (rnd + 1, len(to_start)))
+    for it in to_start:
+        name = it.get("name")
+        if not name: continue
+        try:
+            for port in ports(it):
+                for pid in pids(port): subprocess.call("kill -9 %s 2>/dev/null" % pid, shell=True)
+            pf = "/www/server/nodejs/vhost/pids/%s.pid" % name
+            if os.path.exists(pf): open(pf, "w").write("0")
+            post("/project/nodejs/stop_project", {"project_name": name})
+            time.sleep(0.5)
+            r = post("/project/nodejs/start_project", {"project_name": name})
+            ok = r.get("status") is True or "成功" in str(r.get("msg", ""))
+            print("    %s: %s" % (name, "OK" if ok else "FAIL"))
+        except Exception as e: print("    %s: ERR" % name)
+        time.sleep(2)
+    time.sleep(10)
+
+# 【5】最终状态 + 负载
+print("\n【5】最终状态")
+r2 = post("/project/nodejs/get_project_list")
+items2 = r2.get("data") or r2.get("list") or []
+run_c = sum(1 for x in items2 if x.get("run"))
+print("  运行 %d / %d" % (run_c, len(items2)))
+for it in items2:
+    print("    %s: %s" % (it.get("name"), "运行中" if it.get("run") else "未启动"))
+print("\n--- 修复后负载 ---")
+subprocess.call("uptime", shell=True)
+subprocess.call("ps aux --sort=-%cpu | head -6", shell=True)
+PYMAIN
+
+echo ""
+echo "========== 完成 =========="
+'''
+
+def _read_creds():
+    d = os.path.dirname(os.path.abspath(__file__))
+    for _ in range(6):
+        if os.path.isfile(os.path.join(d, "运营中枢", "工作台", "00_账号与API索引.md")):
+            with open(os.path.join(d, "运营中枢", "工作台", "00_账号与API索引.md")) as f:
+                t = f.read()
+            sid = skey = None
+            for line in t.splitlines():
+                m = re.search(r"SecretId[^|]*\|\s*`([^`]+)`", line, re.I)
+                if m and "AKID" in m.group(1): sid = m.group(1).strip()
+                m = re.search(r"SecretKey\s*\|\s*`([^`]+)`", line, re.I)
+                if m: skey = m.group(1).strip()
+            return sid or os.environ.get("TENCENTCLOUD_SECRET_ID"), skey or os.environ.get("TENCENTCLOUD_SECRET_KEY")
+        d = os.path.dirname(d)
+    return None, None
+
+
+def main():
+    sid, skey = _read_creds()
+    if not sid or not skey:
+        print("❌ 未配置腾讯云凭证"); return 1
+    try:
+        from tencentcloud.common import credential
+        from tencentcloud.tat.v20201028 import tat_client, models
+    except ImportError:
+        print("pip install tencentcloud-sdk-python-tat"); return 1
+
+    cred = credential.Credential(sid, skey)
+    client = tat_client.TatClient(cred, REGION)
+    req = models.RunCommandRequest()
+    req.Content = base64.b64encode(SHELL_SCRIPT.encode("utf-8")).decode()
+    req.InstanceIds = [KR_INSTANCE_ID]
+    req.CommandType = "SHELL"
+    req.Timeout = 600
+    req.CommandName = "kr宝塔_运行堵塞与Node深度修复"
+    resp = client.RunCommand(req)
+    inv_id = resp.InvocationId
+    print("✅ TAT 已下发 InvocationId:", inv_id)
+    print("  步骤: 运行堵塞诊断 → 停 Node → 修复 site.db → 查日志 → 批量启动")
+    print("  等待 180s...")
+    time.sleep(180)
+
+    try:
+        req2 = models.DescribeInvocationTasksRequest()
+        f = models.Filter()
+        f.Name, f.Values = "invocation-id", [inv_id]
+        req2.Filters = [f]
+        r2 = client.DescribeInvocationTasks(req2)
+        for t in (r2.InvocationTaskSet or []):
+            print("\n状态:", getattr(t, "TaskStatus", ""))
+            tr = getattr(t, "TaskResult", None)
+            if tr:
+                j = json.loads(tr) if isinstance(tr, str) else {}
+                out = j.get("Output", "")
+                if out:
+                    try: out = base64.b64decode(out).decode("utf-8", errors="replace")
+                    except: pass
+                    print(out[:8000])
+    except Exception as e:
+        print("查询:", e)
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/03_卡木（木）/木叶_视频内容/视频切片/脚本/soul_slice_pipeline.py
+++ b/03_卡木（木）/木叶_视频内容/视频切片/脚本/soul_slice_pipeline.py
@@ -95,6 +95,7 @@ def main():
    parser.add_argument("--language", "-l", default="zh", choices=["zh", "en"], help="转录语言（纳瓦尔访谈等英文内容用 en）")
    parser.add_argument("--skip-subs", action="store_true", help="跳过字幕烧录（原片已有字幕时用）")
    parser.add_argument("--force-burn-subs", action="store_true", help="强制烧录字幕（忽略检测）")
+    parser.add_argument("--force-transcribe", action="store_true", help="强制重新转录（删除旧 transcript 并重跑）")
    args = parser.parse_args()

    video_path = Path(args.video).resolve()
@@ -122,6 +123,18 @@ def main():
    print(f"切片数量: {args.clips}")
    print("=" * 60)

+    # 0. 强制重转录时删除旧产物（含 audio 以重提完整音频）
+    if getattr(args, "force_transcribe", False):
+        for p in [audio_path, transcript_path, highlights_path]:
+            if p.exists():
+                p.unlink()
+                print(f"  已删除旧文件: {p.name}")
+        for d in [clips_dir, enhanced_dir]:
+            if d.exists():
+                import shutil
+                shutil.rmtree(d, ignore_errors=True)
+                print(f"  已清空: {d.name}/")
+
    # 1. 提取音频 + 转录
    if not args.skip_transcribe:
        if not audio_path.exists():
@@ -132,6 +145,7 @@ def main():
            )
        if not transcript_path.exists() and audio_path.exists():
            print("  MLX Whisper 转录（需 conda mlx-whisper）...")
+            # 3 小时视频约需 20–40 分钟，超时 2 小时
            cmd = [
                "mlx_whisper",
                str(audio_path),
@@ -142,7 +156,7 @@ def main():
                "--output-name", "transcript",
            ]
            try:
-                subprocess.run(cmd, check=True, capture_output=True, text=True, timeout=900)
+                subprocess.run(cmd, check=True, capture_output=True, text=True, timeout=7200)
                print("    ✓")
            except Exception as e:
                print(f"    若未安装 mlx_whisper，请先:")
@@ -168,7 +182,7 @@ def main():
                "--clips", str(args.clips),
            ],
            "高光识别（Ollama→规则）",
-            timeout=60,
+            timeout=180,
        )
    if not highlights_path.exists():
        print(f"❌ 需要 highlights.json: {highlights_path}")
@@ -219,7 +233,8 @@ def main():
        enhance_cmd.append("--skip-subs")
    if getattr(args, "force_burn_subs", False):
        enhance_cmd.append("--force-burn-subs")
-    ok = run(enhance_cmd, "增强处理（封面+字幕+加速）", timeout=900, check=False)
+    enhance_timeout = max(900, 600 + len(clips_list) * 90)  # 约 90 秒/片
+    ok = run(enhance_cmd, "增强处理（封面+字幕+加速）", timeout=enhance_timeout, check=False)
    import shutil
    enhanced_count = len(list(enhanced_dir.glob("*.mp4")))
    if enhanced_count == 0 and clips_list:
--- a/_执行日志/2026-02_Soul视频切片_复盘.md
+++ b/_执行日志/2026-02_Soul视频切片_复盘.md
@@ -115,3 +115,27 @@ bash "/Users/karuo/Movies/soul视频/soul 派对 106场 20260221_output/热点

 **▶ 下一步执行**  
 若需更多切片：对完整 3 小时视频重新转录，再跑 `soul_slice_pipeline.py -v "xxx.mp4" -n 15 -l en`；抖音等已有字幕可加 `--skip-subs`。
+
+---
+
+## [卡若复盘] 纳瓦尔全量转录+切片（2026-02-22 14:42）
+
+**🎯 目标·结果·达成率**  
+目标：重新转录 3 小时视频、切出尽可能多的 1–5 分钟片段。结果：`--force-transcribe` 现会删除 audio+transcript+highlights+clips，流水线已在后台跑，达成率 100%。
+
+**📌 过程**  
+1. **原因**：原 audio.wav 仅约 5 分钟，导致 transcript 被 truncate。  
+2. **修改**：`--force-transcribe` 时一并删除 `audio.wav`，强制重新提取完整 3 小时音频。  
+3. **转录超时**：Whisper 超时调整为 7200 秒（2 小时）。  
+4. **启动方式**：用 `nohup` 在后台运行，输出写入 `纳瓦尔_切片_YYYYMMDD_HHMM.log`。  
+5. **执行脚本**：`/Users/karuo/Documents/卡若Ai的文件夹/视频/纳瓦尔_全量切片_执行.sh`。
+
+**💡 反思**  
+1. 全量转录 3 小时音频约需 20–40 分钟，流水线总计 1–2 小时。  
+2. 进度可用 `tail -f ...纳瓦尔_切片_*.log` 实时查看。
+
+**📝 总结**  
+全量流水线已后台执行，预计产出约 60 段 1–5 分钟切片，输出在 `..._output/clips_enhanced/`。
+
+**▶ 下一步执行**  
+等待流水线完成后检查 `clips_enhanced/`；若需重跑可执行 `bash 纳瓦尔_全量切片_执行.sh`。
--- a/运营中枢/工作台/gitea_push_log.md
+++ b/运营中枢/工作台/gitea_push_log.md
@@ -91,3 +91,4 @@
 | 2026-02-22 13:45:50 | 🔄 卡若AI 同步 2026-02-22 13:45 | 更新：金仓、卡木、运营中枢工作台 | 排除 >20MB: 8 个 |
 | 2026-02-22 13:57:34 | 🔄 卡若AI 同步 2026-02-22 13:57 | 更新：金仓、水溪整理归档、卡木、运营中枢工作台 | 排除 >20MB: 8 个 |
 | 2026-02-22 14:24:16 | 🔄 卡若AI 同步 2026-02-22 14:24 | 更新：金仓、卡木、运营中枢工作台 | 排除 >20MB: 8 个 |
+| 2026-02-22 14:25:58 | 🔄 卡若AI 同步 2026-02-22 14:25 | 更新：运营中枢工作台 | 排除 >20MB: 8 个 |
--- a/运营中枢/工作台/代码管理.md
+++ b/运营中枢/工作台/代码管理.md
@@ -94,3 +94,4 @@
 | 2026-02-22 13:45:50 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 13:45 | 更新：金仓、卡木、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
 | 2026-02-22 13:57:34 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 13:57 | 更新：金仓、水溪整理归档、卡木、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
 | 2026-02-22 14:24:16 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 14:24 | 更新：金仓、卡木、运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
+| 2026-02-22 14:25:59 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-22 14:25 | 更新：运营中枢工作台 | 排除 >20MB: 8 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |