From 095da0b49796ec23d0a48d455bafcc38b9105f46 Mon Sep 17 00:00:00 2001
From: karuo <zhiqun@qq.com>
Date: Sun, 22 Mar 2026 08:06:20 +0800
Subject: [PATCH] =?UTF-8?q?=F0=9F=94=84=20=E5=8D=A1=E8=8B=A5AI=20=E5=90=8C?=
 =?UTF-8?q?=E6=AD=A5=202026-03-22=2008:06=20|=20=E6=9B=B4=E6=96=B0?=
 =?UTF-8?q?=EF=BC=9ACursor=E8=A7=84=E5=88=99=E3=80=81=E9=87=91=E4=BB=93?=
 =?UTF-8?q?=E3=80=81=E6=B0=B4=E6=BA=AA=E6=95=B4=E7=90=86=E5=BD=92=E6=A1=A3?=
 =?UTF-8?q?=E3=80=81=E5=8D=A1=E6=9C=A8=E3=80=81=E6=80=BB=E7=B4=A2=E5=BC=95?=
 =?UTF-8?q?=E4=B8=8E=E5=85=A5=E5=8F=A3=E3=80=81=E8=BF=90=E8=90=A5=E4=B8=AD?=
 =?UTF-8?q?=E6=9E=A2=E5=8F=82=E8=80=83=E8=B5=84=E6=96=99=E3=80=81=E8=BF=90?=
 =?UTF-8?q?=E8=90=A5=E4=B8=AD=E6=9E=A2=E5=B7=A5=E4=BD=9C=E5=8F=B0=20|=20?=
 =?UTF-8?q?=E6=8E=92=E9=99=A4=20>20MB:=2011=20=E4=B8=AA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .cursor/rules/karuo-ai.mdc                    |   2 +
 .../聊天记录管理/fallback/recent_chats_fallback.json |  12 +-
 .../水溪_整理归档/语音转写纠错/SKILL.md       |  51 +++
 03_卡木（木）/木叶_视频内容/视频切片/SKILL.md |   2 +
 .../木叶_视频内容/视频切片/脚本/soul_enhance.py | 292 +++++++++++++++---
 SKILL_REGISTRY.md                             |   5 +-
 运营中枢/参考资料/卡若闽南口音_ASR纠错库.json |  67 ++++
 运营中枢/工作台/gitea_push_log.md             |   1 +
 运营中枢/工作台/代码管理.md                   |   1 +
 .../工作台/脚本/apply_karuo_voice_corrections.py |  72 +++++
 10 files changed, 450 insertions(+), 55 deletions(-)
 create mode 100644 02_卡人（水）/水溪_整理归档/语音转写纠错/SKILL.md
 create mode 100644 运营中枢/参考资料/卡若闽南口音_ASR纠错库.json
 create mode 100644 运营中枢/工作台/脚本/apply_karuo_voice_corrections.py

diff --git a/.cursor/rules/karuo-ai.mdc b/.cursor/rules/karuo-ai.mdc
index 817c408a..07150108 100644
--- a/.cursor/rules/karuo-ai.mdc
+++ b/.cursor/rules/karuo-ai.mdc
@@ -12,6 +12,8 @@ alwaysApply: true
 
 1. 读 `BOOTSTRAP.md` → 2. 读 `SKILL_REGISTRY.md`（优先热技能） → 3. 读 `个人/1、卡若：本人/记忆.md` → 4. 可选读 `CURRENT_STATE.md` → 5. 匹配技能后读对应 `SKILL.md`
 
+**语音 / 闽南口音 ASR**：卡若常用语音输入；理解用户意图前应用 `运营中枢/参考资料/卡若闽南口音_ASR纠错库.json`（`corrections` 按 key **长度降序**替换）。新误听只追加该 JSON；流程见 `02_卡人（水）/水溪_整理归档/语音转写纠错/SKILL.md`。
+
 **执行流程/MAX Mode/复盘/检索顺序/冲突检测/并行处理**：均以 `BOOTSTRAP.md` 第四～五节为准，不在此重复。
 
 ## 记忆路径
diff --git a/01_卡资（金）/金仓_存储备份/聊天记录管理/fallback/recent_chats_fallback.json b/01_卡资（金）/金仓_存储备份/聊天记录管理/fallback/recent_chats_fallback.json
index 4f7a05c7..98342096 100644
--- a/01_卡资（金）/金仓_存储备份/聊天记录管理/fallback/recent_chats_fallback.json
+++ b/01_卡资（金）/金仓_存储备份/聊天记录管理/fallback/recent_chats_fallback.json
@@ -1,13 +1,21 @@
 {
-"updated": "2026-03-21T23:28:20.614235+00:00",
+"updated": "2026-03-22T00:06:04.309238+00:00",
 "conversations": [
 {
+"对话ID": "689fcb3d-379a-4ff1-b8d5-513d5da0ec90",
+"名称": "Voice input correction library",
+"项目": "卡若AI",
+"首条消息": "/Users/karuo/Documents/个人/卡若AI/03_卡木（木）/木叶_视频内容/视频切片/脚本/soul_enhance.py\n\n一个参考这一个纠错的一个规则，我是用语音输入的，然后用闽南话的口音，然后卡罗伊那个听我聊的那个过程当中所有的内容的一个纠错库，帮我有一个纠错的库，我语音输给你的文字的一个纠错库。每一对话的时候，解析的时候都看一下这个纠错库，然后来纠正这整个的一个东西，它不断的去迭代，那更新一下卡罗拉的 skill，那这个参上面这个是参考的文件，已经有产生了一些参考的一些文件，以后都是有新的纠错的话就直接帮我纠错。弄到那个纠错库里面，按我自己的长期的一些那个话术去迭代一下，然后聊天的内容去迭代一下这个纠错的那个绘画的那个表述，增加我表述的精准性。更新一下卡罗拉的 skill",
+"创建时间": "2026-03-22T00:02:58.917000+00:00",
+"消息数量": 48
+},
+{
 "对话ID": "bfd8e284-d1aa-4650-9a19-f4a3854d1580",
 "名称": "网站-小程序上传",
 "项目": "开发",
 "首条消息": "发布小程序",
 "创建时间": "2026-03-21T22:28:34.185000+00:00",
-"消息数量": 228
+"消息数量": 247
 },
 {
 "对话ID": "006964ad-087f-4028-940f-4725aaace815",
diff --git a/02_卡人（水）/水溪_整理归档/语音转写纠错/SKILL.md b/02_卡人（水）/水溪_整理归档/语音转写纠错/SKILL.md
new file mode 100644
index 00000000..a66035de
--- /dev/null
+++ b/02_卡人（水）/水溪_整理归档/语音转写纠错/SKILL.md
@@ -0,0 +1,51 @@
+---
+name: 语音转写纠错
+description: 卡若闽南口音普通话语音输入 ASR 误听纠正库与执行规范。触发词含语音输入、闽南话、听写纠错、ASR纠错、纠错库迭代。
+group: 水
+triggers: **语音输入、闽南话、闽南口音、听写、ASR、转写纠错、纠错库、误听、卡罗拉、卡罗伊**、口述、嘴瓢
+owner: 水溪
+version: "1.0"
+updated: "2026-03-22"
+---
+
+# 语音转写纠错（卡若 · 闽南口音 ASR）
+
+## 目标
+
+- 卡若常用**语音输入**，带**闽南口音普通话**时，输入法/听写易产生固定误听。
+- 使用**唯一纠错库**统一迭代，保证：Cursor 理解用户意图、字幕脚本（`soul_enhance` 等）用词一致、长期话术越来越准。
+
+## 唯一数据源
+
+| 文件 | 作用 |
+|------|------|
+| `运营中枢/参考资料/卡若闽南口音_ASR纠错库.json` | **主纠错表**：`corrections` 对象，`误听 → 正写` |
+
+## Agent 每轮对话（强制）
+
+1. **在推理与执行前**，将用户本轮自然语言视为可能含 ASR 噪声；在心中或用下述脚本对**关键片段**做一次纠正后再定意图（不必改用户原文展示，**内部理解**以纠正后为准）。
+2. 替换顺序：**按 key 长度降序**全文替换，避免短词截断长词（与 `soul_enhance.py` 一致）。
+3. 专有名词：Cursor、Claude、Soul、卡若AI 等按表中写法对齐。
+
+## 命令行 / 脚本复用
+
+```bash
+python3 "/Users/karuo/Documents/个人/卡若AI/运营中枢/工作台/脚本/apply_karuo_voice_corrections.py" "卡罗拉帮我改 skill"
+echo "卡罗拉更新 skill" | python3 ".../apply_karuo_voice_corrections.py"
+```
+
+## 迭代规则（发现新误听时）
+
+1. **直接编辑** `卡若闽南口音_ASR纠错库.json` 的 `corrections`，新增一行 `"误听汉字": "正确写法"`。
+2. 同步更新本文件顶部 `updated` 与 JSON 内 `updated` 字段（日期）。
+3. **禁止**在多处复制粘贴同一词典；视频字幕侧已通过 `soul_enhance.py` **启动时合并**本 JSON（覆盖同名 key）。
+
+## 与视频切片的关系
+
+- `03_卡木（木）/木叶_视频内容/视频切片/脚本/soul_enhance.py` 内置 `_CORRECTIONS_BASE`，运行时会再合并本 JSON，**JSON 优先覆盖**同名条目。
+- 视频场景专有纠错仍可保留在脚本内置表；**通用口语、人名、品牌**优先只维护 JSON。
+
+## 参考
+
+- 内置纠错写法参考：`木叶_视频内容/视频切片/脚本/soul_enhance.py` 中 `_CORRECTIONS_BASE` 与 `apply_platform_safety` 流程。
+- Cursor 总规则：`.cursor/rules/karuo-ai.mdc`（语音理解条目）。
diff --git a/03_卡木（木）/木叶_视频内容/视频切片/SKILL.md b/03_卡木（木）/木叶_视频内容/视频切片/SKILL.md
index f453eafc..128e1b94 100644
--- a/03_卡木（木）/木叶_视频内容/视频切片/SKILL.md
+++ b/03_卡木（木）/木叶_视频内容/视频切片/SKILL.md
@@ -12,6 +12,8 @@ updated: "2026-03-20"
 
 > **语言**：所有文档、字幕、封面文案统一使用**简体中文**。soul_enhance 自动繁转简。
 
+> **ASR 纠错**：字幕/封面用词除脚本内 `_CORRECTIONS_BASE` 外，会**自动合并** `运营中枢/参考资料/卡若闽南口音_ASR纠错库.json`（与卡若语音输入共用词表，JSON 覆盖同名 key）。迭代误听只改该 JSON 即可。
+
 > **Soul 视频输出**：Soul 剪辑的成片统一导出到 `/Users/karuo/Movies/soul视频/最终版/`，原视频在 `原视频/`，中间产物在 `其他/`。
 
 > **联动规则**：每次执行视频切片时，自动检查是否需要「切片动效包装」或「快速混剪」。若用户提到片头/片尾/程序化包装/批量封面，则联动调用 `切片动效包装/10秒视频` 模板渲染，再与切片合成。若用户提到快速混剪/混剪预告/快剪串联，则在切片或成片生成后再调用 `脚本/quick_montage.py` 输出一条节奏版预告。
diff --git a/03_卡木（木）/木叶_视频内容/视频切片/脚本/soul_enhance.py b/03_卡木（木）/木叶_视频内容/视频切片/脚本/soul_enhance.py
index 820ba67e..19daa3bf 100644
--- a/03_卡木（木）/木叶_视频内容/视频切片/脚本/soul_enhance.py
+++ b/03_卡木（木）/木叶_视频内容/视频切片/脚本/soul_enhance.py
@@ -217,7 +217,8 @@ def _to_simplified(text: str) -> str:
     return text
 
 # 常见转录错误修正（与 one_video 一致，按长度降序排列避免短词误替换）
-CORRECTIONS = {
+# 运行时会再合并 `运营中枢/参考资料/卡若闽南口音_ASR纠错库.json`（同名 key 以 JSON 为准）
+_CORRECTIONS_BASE = {
     # AI 工具名称 ─────────────────────────────────────────────────
     '小龙俠': 'AI工具', '小龍俠': 'AI工具', '小龍蝦': 'AI工具',
     '龍蝦': 'AI工具', '小龙虾': 'AI工具', '龙虾': 'AI工具',
@@ -230,6 +231,9 @@ CORRECTIONS = {
     '寿上': 'Soul上', '瘦上': 'Soul上', '亭上': 'Soul上',
     '这受': '这Soul', '受的': 'Soul的', '受里': 'Soul里',
     '受平台': 'Soul平台',
+    '受推流': '售推流',  # ASR 常把「soul」听成「受」
+    '做个数据': '整场数据',
+    '整个售的': '整个场的', '整个售': '整个场',
     # 私域/商业用语 ─────────────────────────────────────────────
     '私余': '私域', '施育': '私域', '私育': '私域',
     '统安': '同安', '信一下': '线上', '头里': '投入',
@@ -259,6 +263,28 @@ CORRECTIONS = {
     # （在 parse_srt 里过滤，这里不做）
 }
 
+_KARUO_VOICE_JSON = Path(__file__).resolve().parents[4] / "运营中枢" / "参考资料" / "卡若闽南口音_ASR纠错库.json"
+
+
+def _merge_karuo_voice_corrections(base: dict) -> dict:
+    """合并卡若闽南口音 ASR 纠错库；JSON 覆盖内置表中同名 key。"""
+    merged = dict(base)
+    try:
+        if _KARUO_VOICE_JSON.exists():
+            with open(_KARUO_VOICE_JSON, encoding="utf-8") as f:
+                blob = json.load(f)
+            extra = blob.get("corrections") if isinstance(blob, dict) else None
+            if isinstance(extra, dict):
+                for k, v in extra.items():
+                    if k is not None and v is not None:
+                        merged[str(k)] = str(v)
+    except Exception:
+        pass
+    return merged
+
+
+CORRECTIONS = _merge_karuo_voice_corrections(_CORRECTIONS_BASE)
+
 # 各平台违禁词 → 谐音/替代词（用于字幕、封面、文件名）
 # 原则：意思不变，表达更安全，避免平台限流/封号
 PLATFORM_VIOLATIONS = {
@@ -366,10 +392,14 @@ STYLE = {
     }
 }
 
-# 字幕与语音同步的全局延迟补偿（秒）；封面后留白再叠字幕；封面标题汉字上限（须在本文件先于 _limit_cover_title_cjk 定义）
-# 略抬高默认值，配合下方音轨/视频 PTS 差比例，减少「字先于人」
-SUBTITLE_DELAY_SEC = 2.15
+# 字幕与语音同步：已导出的切片文件时间轴从 0 起，与 transcript 绝对时间对齐，默认不再整体平移。
+# 仅当 ffprobe 发现音轨与视频首帧 PTS 差 > 阈值时，才加小量补偿（应对「未重封装、seek 错位」的源）。
+SUBTITLE_DELAY_SEC = 0.0
+SUBTITLE_PTS_OFFSET_THRESHOLD = 0.18  # 超过此秒数才加 delay
+SUBTITLE_DELAY_MAX = 1.2
 SUBS_START_AFTER_COVER_SEC = 3.0
+# 至少切除的静音总时长（秒）才触发重编码，避免无意义抖动
+MIN_SILENCE_TRIM_TOTAL_SEC = 0.25
 COVER_TITLE_MAX_CJK = 6
 
 # ============ 工具函数 ============
@@ -606,7 +636,7 @@ def parse_srt_for_clip(srt_path, start_sec, end_sec, delay_sec=None):
     """解析SRT，提取指定时间段的字幕。
 
     优化：
-    1. 字幕延迟补偿（delay_sec）：补偿 FFmpeg input seeking 关键帧偏移（2s 默认）
+    1. 字幕延迟补偿（delay_sec）：已导出切片默认 0；仅音画 PTS 错位时用较小正值
     2. 噪声行过滤：去掉单字母 L / Agent 等 ASR 幻觉行
     3. 文字质量提升：纠错 + 违禁词替换 + 通畅度修正
     4. whisper word-level SRT 自动识别：把单字/词条目先聚合成完整句，再用词时间轴做逐词显示
@@ -1255,6 +1285,134 @@ def create_silence_filter(silences, duration, margin=0.1):
     
     return '+'.join(selects)
 
+
+def kept_segments_from_silences(silences, duration, margin=0.1):
+    """与 create_silence_filter 一致：返回保留播放的时间段列表 [(s,e), ...]。"""
+    duration = float(duration)
+    if not silences:
+        return [(0.0, duration)]
+    silences = sorted(silences)
+    segments = []
+    last_end = 0.0
+    for start, end in silences:
+        if start > last_end + margin:
+            segments.append((last_end, start - margin))
+        last_end = max(last_end, end + margin)
+    if last_end < duration:
+        segments.append((last_end, duration))
+    if not segments:
+        return [(0.0, duration)]
+    return segments
+
+
+def map_time_remove_silences(t, kept_segments):
+    """原片时间 t（秒）→ 去掉静音后的新时间。"""
+    t = float(t)
+    acc = 0.0
+    for s, e in kept_segments:
+        if t <= s:
+            return acc
+        if t < e:
+            return acc + (t - s)
+        acc += (e - s)
+    return acc
+
+
+def remap_subtitles_after_trim(subtitles, kept_segments):
+    """就地更新字幕 start/end 与 word_times。"""
+    if not subtitles or not kept_segments:
+        return subtitles
+    for sub in subtitles:
+        ns = map_time_remove_silences(sub["start"], kept_segments)
+        ne = map_time_remove_silences(sub["end"], kept_segments)
+        if ne <= ns + 0.05:
+            ne = ns + 0.35
+        sub["start"], sub["end"] = ns, ne
+        wt = sub.get("word_times")
+        if wt:
+            for w in wt:
+                ws = map_time_remove_silences(w["start"], kept_segments)
+                we = map_time_remove_silences(w.get("end", w["start"]), kept_segments)
+                w["start"] = ws
+                w["end"] = max(we, ws + 0.02)
+    return subtitles
+
+
+def _valid_kept_segments(kept_segments, duration, min_dur=0.03):
+    out = []
+    duration = float(duration)
+    for s, e in kept_segments:
+        s = max(0.0, float(s))
+        e = min(float(e), duration)
+        if e - s >= min_dur:
+            out.append((s, e))
+    return out
+
+
+def ffmpeg_trim_kept_segments(input_path, output_path, kept_segments, duration):
+    """用 trim+atrim+concat 切除静音，音画同步。失败返回 False。"""
+    segs = _valid_kept_segments(kept_segments, duration)
+    if not segs:
+        return False
+    dur = float(duration)
+    kept_total = sum(e - s for s, e in segs)
+    if kept_total <= 0:
+        return False
+    # 几乎未剪：直接复制
+    if kept_total >= dur - 0.08:
+        try:
+            shutil.copy(input_path, output_path)
+            return True
+        except OSError:
+            return False
+
+    n = len(segs)
+    parts = []
+    if n == 1:
+        s, e = segs[0]
+        d = e - s
+        fc = (
+            f"[0:v]trim=start={s}:duration={d},setpts=PTS-STARTPTS[outv];"
+            f"[0:a]atrim=start={s}:duration={d},asetpts=PTS-STARTPTS[outa]"
+        )
+    else:
+        for i, (s, e) in enumerate(segs):
+            d = e - s
+            parts.append(f"[0:v]trim=start={s}:duration={d},setpts=PTS-STARTPTS[v{i}]")
+            parts.append(f"[0:a]atrim=start={s}:duration={d},asetpts=PTS-STARTPTS[a{i}]")
+        vconcat = "".join(f"[v{i}]" for i in range(n))
+        aconcat = "".join(f"[a{i}]" for i in range(n))
+        parts.append(f"{vconcat}concat=n={n}:v=1:a=0[outv]")
+        parts.append(f"{aconcat}concat=n={n}:v=0:a=1[outa]")
+        fc = ";".join(parts)
+
+    cmd = [
+        "ffmpeg",
+        "-y",
+        "-i",
+        input_path,
+        "-filter_complex",
+        fc,
+        "-map",
+        "[outv]",
+        "-map",
+        "[outa]",
+        "-c:v",
+        "libx264",
+        "-preset",
+        "fast",
+        "-crf",
+        "22",
+        "-c:a",
+        "aac",
+        "-b:a",
+        "192k",
+        output_path,
+    ]
+    r = subprocess.run(cmd, capture_output=True, text=True)
+    return r.returncode == 0 and os.path.exists(output_path)
+
+
 def _parse_clip_index(filename: str) -> int:
     """从文件名解析切片序号。
     
@@ -1273,7 +1431,7 @@ def _parse_clip_index(filename: str) -> int:
 def enhance_clip(clip_path, output_path, highlight_info, temp_dir, transcript_path,
                  force_burn_subs=False, skip_subs=False, vertical=False,
                  crop_vf=None, overlay_x=None, typewriter_subs=False,
-                 vertical_fit_full=False):
+                 vertical_fit_full=False, trim_silence=True):
     """增强单个切片。vertical=True 时输出竖条，宽由 --crop-vf 决定（原生包络常见 560～750×1080；旧 498 为两段裁或 scale）。
     vertical_fit_full：整幅 16:9 缩放入 498×1080 + 上下黑边。
     """
@@ -1282,8 +1440,10 @@ def enhance_clip(clip_path, output_path, highlight_info, temp_dir, transcript_pa
     
     video_info = get_video_info(clip_path)
     width, height = video_info['width'], video_info['height']
-    duration = video_info['duration']
-    
+    duration = float(video_info['duration'])
+    original_duration = duration
+    working_clip = clip_path
+
     print(f"  分辨率: {width}x{height}, 时长: {duration:.1f}秒")
     
     # 封面与成片文件名统一：都用主题 title（去杠），名字与标题一致、无杠更清晰
@@ -1320,14 +1480,9 @@ def enhance_clip(clip_path, output_path, highlight_info, temp_dir, transcript_pa
         vf_use = CROP_VF
         overlay_pos = "0:0"
     
-    # 1. 生成封面
-    print(f"  [1/5] 封面生成中…", flush=True)
-    cover_img = os.path.join(temp_dir, 'cover.png')
-    create_cover_image(hook_text, out_w, out_h, cover_img, clip_path)
-    print(f"  ✓ 封面生成", flush=True)
-    
-    # 2. 字幕逻辑：有字幕则烧录（图像 overlay：每张图 -loop 1 才能按时间 enable 显示）
+    # 1. 字幕解析（相对原切片时间轴；去静音后会整体平移时间）
     sub_images = []
+    subtitles = []
     do_burn_subs = not skip_subs and (force_burn_subs or not detect_burned_subs(clip_path))
     if skip_subs:
         print(f"  ⊘ 跳过字幕烧录（--skip-subs）")
@@ -1343,10 +1498,10 @@ def enhance_clip(clip_path, output_path, highlight_info, temp_dir, transcript_pa
                 start_sec = int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2])
         except (IndexError, ValueError):
             start_sec = 0
-        end_sec = start_sec + duration
+        end_sec = start_sec + original_duration
 
-        # 动态字幕延迟：检测切片实际首帧 PTS，与请求 start_time 做差
-        actual_delay = SUBTITLE_DELAY_SEC
+        # 已导出切片：默认 delay=0。仅当音/视频首帧 PTS 明显不一致时小幅推迟字幕，贴人声。
+        actual_delay = float(SUBTITLE_DELAY_SEC)
         try:
             pts_cmd = [
                 "ffprobe", "-v", "quiet", "-select_streams", "v:0",
@@ -1356,27 +1511,26 @@ def enhance_clip(clip_path, output_path, highlight_info, temp_dir, transcript_pa
                 str(clip_path),
             ]
             pts_r = subprocess.run(pts_cmd, capture_output=True, text=True, timeout=10)
-            if pts_r.returncode == 0 and pts_r.stdout.strip():
+            audio_cmd = [
+                "ffprobe", "-v", "quiet", "-select_streams", "a:0",
+                "-show_entries", "frame=pts_time",
+                "-read_intervals", "%+0.5",
+                "-print_format", "csv=p=0",
+                str(clip_path),
+            ]
+            audio_r = subprocess.run(audio_cmd, capture_output=True, text=True, timeout=10)
+            if (
+                pts_r.returncode == 0
+                and pts_r.stdout.strip()
+                and audio_r.returncode == 0
+                and audio_r.stdout.strip()
+            ):
                 first_pts = float(pts_r.stdout.strip().split("\n")[0].strip())
-                # batch_clip 把 -ss 放在 -i 前面，FFmpeg 将 PTS 重置为 0
-                # 但实际音频起点可能比请求的 start_sec 早 0-4 秒（关键帧对齐）
-                # first_pts 接近 0，真正的偏移量在 batch_clip 的 seeking 行为里
-                # 更可靠的方法：检测音频首个有效帧的 PTS
-                audio_cmd = [
-                    "ffprobe", "-v", "quiet", "-select_streams", "a:0",
-                    "-show_entries", "frame=pts_time",
-                    "-read_intervals", "%+0.5",
-                    "-print_format", "csv=p=0",
-                    str(clip_path),
-                ]
-                audio_r = subprocess.run(audio_cmd, capture_output=True, text=True, timeout=10)
-                if audio_r.returncode == 0 and audio_r.stdout.strip():
-                    audio_pts = float(audio_r.stdout.strip().split("\n")[0].strip())
-                    # 视频帧 PTS 与音频帧 PTS 的差值揭示 input seeking 对齐误差
-                    offset = abs(first_pts - audio_pts)
-                    # 按比例推迟字幕，更贴人声；夹紧区间避免过激或失控
-                    raw_delay = SUBTITLE_DELAY_SEC + offset * 0.72
-                    actual_delay = max(1.65, min(4.0, raw_delay))
+                audio_pts = float(audio_r.stdout.strip().split("\n")[0].strip())
+                offset = abs(first_pts - audio_pts)
+                if offset > SUBTITLE_PTS_OFFSET_THRESHOLD:
+                    actual_delay = min(SUBTITLE_DELAY_MAX, offset * 0.85)
+                    print(f"  ✓ 音画 PTS 差 {offset:.2f}s → 字幕延迟补偿 {actual_delay:.2f}s", flush=True)
         except Exception:
             pass
 
@@ -1384,34 +1538,63 @@ def enhance_clip(clip_path, output_path, highlight_info, temp_dir, transcript_pa
         for sub in subtitles:
             if not _is_mostly_chinese(sub['text']):
                 sub['text'] = _translate_to_chinese(sub['text']) or sub['text']
-        # 仅过滤整句为规则/模板的条目，保留所有对白（含重复句，保证字幕连续）
         subtitles = _filter_relevant_subtitles(subtitles)
-        # 异常转录检测：若整篇几乎同一句话，不烧录错误字幕，避免成片出现“像图片”的无效字
         if _is_bad_transcript(subtitles):
-            print(f"  ⚠ 转录稿异常（大量重复同一句），已跳过字幕烧录；请用 MLX Whisper 对该视频重新生成 transcript.srt 后再跑成片", flush=True)
+            print(
+                f"  ⚠ 转录稿异常（大量重复同一句），已跳过字幕烧录；请用 MLX Whisper 对该视频重新生成 transcript.srt 后再跑成片",
+                flush=True,
+            )
             sys.stdout.flush()
             subtitles = []
         else:
             mode = "逐字渐显" if typewriter_subs else "随语音走动"
             print(f"  ✓ 字幕解析 ({len(subtitles)}条)，将烧录为{mode}字幕", flush=True)
+
+    # 2. 去静音：trim+concat 重编码，并 remap 字幕时间轴（此前仅检测未切除，成片仍带长停顿）
+    silences = detect_silence(clip_path, SILENCE_THRESHOLD, SILENCE_MIN_DURATION)
+    kept = kept_segments_from_silences(silences, original_duration)
+    removed_total = original_duration - sum(e - s for s, e in kept)
+    if trim_silence and removed_total >= MIN_SILENCE_TRIM_TOTAL_SEC:
+        trim_out = os.path.join(temp_dir, "trim_silence.mp4")
+        if ffmpeg_trim_kept_segments(clip_path, trim_out, kept, original_duration):
+            working_clip = trim_out
+            duration = float(get_video_info(working_clip)["duration"])
+            if subtitles:
+                remap_subtitles_after_trim(subtitles, kept)
+            print(
+                f"  ✓ 去静音：约减 {removed_total:.1f}s → 基长 {duration:.1f}s（检出 {len(silences)} 段）",
+                flush=True,
+            )
+        else:
+            print(f"  ⚠ 去静音编码失败，沿用原片", flush=True)
+            duration = original_duration
+    else:
+        print(
+            f"  ✓ 静音 {len(silences)} 段，可剪 {removed_total:.2f}s（<{MIN_SILENCE_TRIM_TOTAL_SEC}s 不剪）",
+            flush=True,
+        )
+
+    # 3. 封面图（与去静音后首帧一致）
+    print(f"  [1/5] 封面生成中…", flush=True)
+    cover_img = os.path.join(temp_dir, "cover.png")
+    create_cover_image(hook_text, out_w, out_h, cover_img, working_clip)
+    print(f"  ✓ 封面生成", flush=True)
+
+    if subtitles:
         if typewriter_subs:
             sub_images = build_typewriter_subtitle_images(
                 subtitles, temp_dir, out_w, out_h, subtitle_overlay_start
             )
         else:
             for i, sub in enumerate(subtitles):
-                img_path = os.path.join(temp_dir, f'sub_{i:04d}.png')
-                create_subtitle_image(sub['text'], out_w, out_h, img_path)
-                sub_images.append({'path': img_path, 'start': sub['start'], 'end': sub['end']})
+                img_path = os.path.join(temp_dir, f"sub_{i:04d}.png")
+                create_subtitle_image(sub["text"], out_w, out_h, img_path)
+                sub_images.append({"path": img_path, "start": sub["start"], "end": sub["end"]})
     if sub_images:
         print(f"  ✓ 字幕图片 ({len(sub_images)}张)", flush=True)
-    
-    # 4. 检测静音
-    silences = detect_silence(clip_path, SILENCE_THRESHOLD, SILENCE_MIN_DURATION)
-    print(f"  ✓ 静音检测 ({len(silences)}段)")
-    
-    # 5. 构建FFmpeg命令
-    current_video = clip_path
+
+    # 4. 构建 FFmpeg 链（从去静音后的 working_clip 起）
+    current_video = working_clip
     
     # 5.1 添加封面（封面图 -loop 1 保证前若干秒完整显示；竖条时叠在 overlay_pos）
     print(f"  [2/5] 封面烧录中…", flush=True)
@@ -1623,6 +1806,11 @@ def main():
         action="store_true",
         help="竖屏成片不裁中间竖条：整幅 16:9 等比缩放入 498×1080，上下黑边，画面显示全；封面/字幕先叠满横版再缩放",
     )
+    parser.add_argument(
+        "--no-trim-silence",
+        action="store_true",
+        help="不去除静音长停顿（默认会切除 silencedetect 检出的静音并同步平移字幕时间轴）",
+    )
     args = parser.parse_args()
     
     clips_dir = Path(args.clips) if args.clips else CLIPS_DIR
@@ -1652,6 +1840,7 @@ def main():
     vfit = getattr(args, "vertical_fit_full", False)
     print(
         f"功能: 封面+字幕+加速10%+去语气词"
+        + ("+去长静音" if not getattr(args, "no_trim_silence", False) else "")
         + ("+竖屏条(高1080宽随vf)" if vertical else "")
         + ("+全画面letterbox(不裁竖条)" if vertical and vfit else "")
         + ("+逐字字幕" if typewriter else "")
@@ -1709,6 +1898,7 @@ def main():
                 overlay_x=overlay_x_arg,
                 typewriter_subs=typewriter,
                 vertical_fit_full=vfit,
+                trim_silence=not getattr(args, "no_trim_silence", False),
             ):
                 success_count += 1
         finally:
diff --git a/SKILL_REGISTRY.md b/SKILL_REGISTRY.md
index 37c6236e..a081346a 100644
--- a/SKILL_REGISTRY.md
+++ b/SKILL_REGISTRY.md
@@ -1,8 +1,8 @@
 # 卡若AI 技能注册表（Skill Registry）
 
 > **一张表查所有技能**。任何 AI 拿到这张表，就能按关键词找到对应技能的 SKILL.md 路径并执行。
-> 76 技能 | 15 成员 | 5 负责人
-> 版本：5.8 | 更新：2026-03-21
+> 77 技能 | 15 成员 | 5 负责人
+> 版本：5.9 | 更新：2026-03-22
 >
 > **技能配置、安装、删除、掌管人登记** → 见 **`运营中枢/工作台/01_技能控制台.md`**。
 
@@ -84,6 +84,7 @@
 | W02 | 文档清洗 | 水溪 | PDF转Markdown | `02_卡人（水）/水溪_整理归档/文档清洗/SKILL.md` | PDF/Word 转结构化 Markdown |
 | W03 | 对话归档 | 水溪 | 归档今日对话 | `02_卡人（水）/水溪_整理归档/对话归档/SKILL.md` | AI 对话记录收集与归类 |
 | W03a | **项目调研** | 水溪 | **项目调研、平台分析、A群、A群聊天记录、聊天记录清理、对话分类、对话分类号、按项目归档、调研归档、APP资料、其他APP、各APP** | `02_卡人（水）/水溪_整理归档/项目调研/SKILL.md` | 平台分析/项目调研/各APP资料/群聊/对话分类统一归档到 开发/7.项目调研，按项目分子目录 |
+| W03b | **语音转写纠错** | 水溪 | **语音输入、闽南话、闽南口音、听写、ASR、转写纠错、纠错库、误听、口述、嘴瓢、卡罗拉、卡罗伊** | `02_卡人（水）/水溪_整理归档/语音转写纠错/SKILL.md` | 闽南口音 ASR 误听→正写唯一 JSON；每轮理解前按长词优先替换；`soul_enhance` 自动合并 |
 | W04 | 自动记忆管理 | 水溪 | 记忆、存入记忆 | `02_卡人（水）/水溪_整理归档/自动记忆管理/SKILL.md` | 长期记忆写入与检索 |
 | W05 | 需求拆解与计划制定 | 水泉 | 需求拆解、任务分析 | `02_卡人（水）/水泉_规划拆解/需求拆解与计划制定/SKILL.md` | 大需求拆成可执行步骤 |
 | W06 | 任务规划 | 水泉 | 任务规划、制定计划 | `02_卡人（水）/水泉_规划拆解/任务规划/SKILL.md` | 制定执行计划与排期 |
diff --git a/运营中枢/参考资料/卡若闽南口音_ASR纠错库.json b/运营中枢/参考资料/卡若闽南口音_ASR纠错库.json
new file mode 100644
index 00000000..9411e750
--- /dev/null
+++ b/运营中枢/参考资料/卡若闽南口音_ASR纠错库.json
@@ -0,0 +1,67 @@
+{
+  "version": 1,
+  "updated": "2026-03-22",
+  "description": "卡若（闽南口音普通话）语音输入→文字 的常见 ASR 误听纠正。Cursor 理解用户话术前、字幕/脚本处理时均应用；长词优先替换。",
+  "apply_rules": [
+    "按 corrections 的 key 字符串长度降序做全文替换，避免短词误伤长词",
+    "JSON 中的条目若在 soul_enhance 内置表已有同名 key，以本文件为准（运行时合并覆盖）",
+    "发现新误听：直接把「误听→正写」追加进 corrections，可附 note 说明场景"
+  ],
+  "corrections": {
+    "卡罗拉": "卡若",
+    "卡罗伊": "卡若",
+    "卡洛拉": "卡若",
+    "卡若依": "卡若",
+    "卡若伊": "卡若",
+    "小龙俠": "AI工具",
+    "小龍俠": "AI工具",
+    "小龍蝦": "AI工具",
+    "龍蝦": "AI工具",
+    "小龙虾": "AI工具",
+    "龙虾": "AI工具",
+    "克劳德": "Claude",
+    "科劳德": "Claude",
+    "颗色": "Cursor",
+    "库色": "Cursor",
+    "可索": "Cursor",
+    "蝌蚁": "科技AI",
+    "蝌蛇": "Cursor",
+    "科色": "Cursor",
+    "蝌蚪": "Cursor",
+    "受上": "Soul上",
+    "搜上": "Soul上",
+    "售上": "Soul上",
+    "寿上": "Soul上",
+    "瘦上": "Soul上",
+    "亭上": "Soul上",
+    "这受": "这Soul",
+    "受的": "Soul的",
+    "受里": "Soul里",
+    "受平台": "Soul平台",
+    "私余": "私域",
+    "施育": "私域",
+    "私育": "私域",
+    "统安": "同安",
+    "信一下": "线上",
+    "头里": "投入",
+    "幅画": "负责",
+    "经历论": "净利润",
+    "成于": "乘以",
+    "马的": "码的",
+    "猜济": "拆解",
+    "巨圣": "矩阵",
+    "货客": "获客",
+    "受伤命": "搜索引擎",
+    "附身": "副业",
+    "附产": "副产",
+    "基因交狼": "技能包",
+    "基因交流": "技能传授",
+    "Ski-er": "智能体",
+    "Skier": "智能体",
+    "SKI-er": "智能体"
+  },
+  "notes": [
+    "「吹」在部分语境被 ASR 写成 Claude——已在 soul_enhance 内置表保留，勿在全局误替换；对话里若明确说工具名再按表纠正",
+    "Soul 相关「受→Soul」仅在与平台、推流、语音房语境下成立，若句义是「受伤」等请勿机械替换（模型结合上下文判断）"
+  ]
+}
diff --git a/运营中枢/工作台/gitea_push_log.md b/运营中枢/工作台/gitea_push_log.md
index b9ef3605..5aa156e6 100644
--- a/运营中枢/工作台/gitea_push_log.md
+++ b/运营中枢/工作台/gitea_push_log.md
@@ -412,3 +412,4 @@
 | 2026-03-21 12:22:27 | 🔄 卡若AI 同步 2026-03-21 12:22 | 更新：金仓、运营中枢、运营中枢工作台 | 排除 >20MB: 11 个 |
 | 2026-03-21 13:37:01 | 🔄 卡若AI 同步 2026-03-21 13:36 | 更新：金仓、水桥平台对接、运营中枢、运营中枢工作台 | 排除 >20MB: 11 个 |
 | 2026-03-22 06:54:39 | 🔄 卡若AI 同步 2026-03-22 06:54 | 更新：Cursor规则、金仓、水桥平台对接、总索引与入口、运营中枢工作台 | 排除 >20MB: 11 个 |
+| 2026-03-22 07:34:07 | 🔄 卡若AI 同步 2026-03-22 07:34 | 更新：金仓、水桥平台对接、卡木、运营中枢工作台 | 排除 >20MB: 11 个 |
diff --git a/运营中枢/工作台/代码管理.md b/运营中枢/工作台/代码管理.md
index 27fd0734..2a8a6892 100644
--- a/运营中枢/工作台/代码管理.md
+++ b/运营中枢/工作台/代码管理.md
@@ -415,3 +415,4 @@
 | 2026-03-21 12:22:27 | 成功 | 成功 | 🔄 卡若AI 同步 2026-03-21 12:22 | 更新：金仓、运营中枢、运营中枢工作台 | 排除 >20MB: 11 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
 | 2026-03-21 13:37:01 | 成功 | 成功 | 🔄 卡若AI 同步 2026-03-21 13:36 | 更新：金仓、水桥平台对接、运营中枢、运营中枢工作台 | 排除 >20MB: 11 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
 | 2026-03-22 06:54:39 | 成功 | 成功 | 🔄 卡若AI 同步 2026-03-22 06:54 | 更新：Cursor规则、金仓、水桥平台对接、总索引与入口、运营中枢工作台 | 排除 >20MB: 11 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
+| 2026-03-22 07:34:07 | 成功 | 成功 | 🔄 卡若AI 同步 2026-03-22 07:34 | 更新：金仓、水桥平台对接、卡木、运营中枢工作台 | 排除 >20MB: 11 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
diff --git a/运营中枢/工作台/脚本/apply_karuo_voice_corrections.py b/运营中枢/工作台/脚本/apply_karuo_voice_corrections.py
new file mode 100644
index 00000000..d2eb2922
--- /dev/null
+++ b/运营中枢/工作台/脚本/apply_karuo_voice_corrections.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+"""
+加载「卡若闽南口音_ASR纠错库.json」，对文本做全文替换（key 长度降序）。
+供命令行管道、其它脚本 import 使用。
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+_REPO = Path(__file__).resolve().parents[3]
+DEFAULT_JSON = _REPO / "运营中枢" / "参考资料" / "卡若闽南口音_ASR纠错库.json"
+
+
+def load_corrections(path: Path | None = None) -> dict[str, str]:
+    p = path or DEFAULT_JSON
+    if not p.exists():
+        return {}
+    with open(p, encoding="utf-8") as f:
+        data = json.load(f)
+    raw = data.get("corrections") if isinstance(data, dict) else None
+    if not isinstance(raw, dict):
+        return {}
+    out: dict[str, str] = {}
+    for k, v in raw.items():
+        if k is None or v is None:
+            continue
+        ks, vs = str(k).strip(), str(v).strip()
+        if ks:
+            out[ks] = vs
+    return out
+
+
+def apply_corrections(text: str, corrections: dict[str, str] | None = None) -> str:
+    if text is None:
+        return ""
+    corr = corrections if corrections is not None else load_corrections()
+    if not corr:
+        return str(text)
+    result = str(text)
+    for wrong, right in sorted(corr.items(), key=lambda x: len(x[0]), reverse=True):
+        if wrong:
+            result = result.replace(wrong, right)
+    return result
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="应用卡若闽南口音 ASR 纠错库")
+    parser.add_argument(
+        "-f",
+        "--file",
+        type=Path,
+        default=None,
+        help="纠错 JSON 路径（默认仓库内 运营中枢/参考资料/卡若闽南口音_ASR纠错库.json）",
+    )
+    parser.add_argument("text", nargs="?", default=None, help="直接传入字符串；省略则从 stdin 读入")
+    args = parser.parse_args()
+    corr = load_corrections(args.file)
+    if args.text is not None:
+        sys.stdout.write(apply_corrections(args.text, corr))
+        if not args.text.endswith("\n"):
+            sys.stdout.write("\n")
+        return 0
+    stdin = sys.stdin.read()
+    sys.stdout.write(apply_corrections(stdin, corr))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())