🔄 卡若AI 同步 2026-03-11 00:47 | 更新:卡木、运营中枢工作台 | 排除 >20MB: 11 个
This commit is contained in:
@@ -317,8 +317,45 @@ def improve_subtitle_punctuation(text: str) -> str:
|
|||||||
t = t + '。'
|
t = t + '。'
|
||||||
return apply_platform_safety(t)
|
return apply_platform_safety(t)
|
||||||
|
|
||||||
def parse_srt_for_clip(srt_path, start_sec, end_sec):
|
def _detect_clip_pts_offset(clip_path: str) -> float:
|
||||||
"""解析SRT,提取指定时间段的字幕"""
|
"""探测切片实际起始 PTS(秒),用于补偿 -ss input seeking 的关键帧偏移。
|
||||||
|
|
||||||
|
batch_clip 用 -ss before -i(input seeking),FFmpeg 会 seek 到最近关键帧,
|
||||||
|
实际起始帧可能比请求的 start_time 早 0~3 秒。探测这个偏移量,字幕做相应延迟。
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
r = subprocess.run(
|
||||||
|
['ffprobe', '-v', 'error', '-select_streams', 'v:0',
|
||||||
|
'-show_entries', 'frame=pts_time', '-read_intervals', '%+#1',
|
||||||
|
'-of', 'csv=p=0', clip_path],
|
||||||
|
capture_output=True, text=True, timeout=10
|
||||||
|
)
|
||||||
|
if r.returncode == 0 and r.stdout.strip():
|
||||||
|
first_pts = float(r.stdout.strip().splitlines()[0])
|
||||||
|
return first_pts # 通常是 0 或接近 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
# 字幕与语音同步的全局延迟补偿(秒)
|
||||||
|
# batch_clip -ss input seeking 导致实际切割比请求早 0~3 秒(关键帧对齐)
|
||||||
|
# 字幕按 highlights.start_time 算相对时间,会比实际音频提前
|
||||||
|
# 加正值延迟 = 字幕往后推 = 与声音更同步
|
||||||
|
SUBTITLE_DELAY_SEC = 0.8 # 根据实测 Soul 视频关键帧间隔约 2s,取保守值
|
||||||
|
|
||||||
|
|
||||||
|
def parse_srt_for_clip(srt_path, start_sec, end_sec, delay_sec=None):
|
||||||
|
"""解析SRT,提取指定时间段的字幕。
|
||||||
|
|
||||||
|
优化:
|
||||||
|
1. 字幕延迟补偿(delay_sec):补偿 FFmpeg input seeking 关键帧偏移,让字幕与声音同步
|
||||||
|
2. 合并过短字幕:相邻字幕 <1.2s 且文字可拼接时自动合并,减少闪烁
|
||||||
|
3. 最小显示时长:每条至少显示 1.2s,避免一闪而过看不清
|
||||||
|
"""
|
||||||
|
if delay_sec is None:
|
||||||
|
delay_sec = SUBTITLE_DELAY_SEC
|
||||||
|
|
||||||
with open(srt_path, 'r', encoding='utf-8') as f:
|
with open(srt_path, 'r', encoding='utf-8') as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
|
|
||||||
@@ -330,31 +367,53 @@ def parse_srt_for_clip(srt_path, start_sec, end_sec):
|
|||||||
parts = t.split(':')
|
parts = t.split(':')
|
||||||
return int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2])
|
return int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2])
|
||||||
|
|
||||||
subtitles = []
|
raw_subs = []
|
||||||
for match in matches:
|
for match in matches:
|
||||||
sub_start = time_to_sec(match[1])
|
sub_start = time_to_sec(match[1])
|
||||||
sub_end = time_to_sec(match[2])
|
sub_end = time_to_sec(match[2])
|
||||||
text = match[3].strip()
|
text = match[3].strip()
|
||||||
|
|
||||||
# 检查是否与片段时间范围重叠
|
|
||||||
if sub_end > start_sec and sub_start < end_sec + 2:
|
if sub_end > start_sec and sub_start < end_sec + 2:
|
||||||
# 调整为相对于片段开始的相对时间
|
rel_start = max(0, sub_start - start_sec + delay_sec)
|
||||||
rel_start = max(0, sub_start - start_sec)
|
rel_end = sub_end - start_sec + delay_sec
|
||||||
rel_end = sub_end - start_sec
|
|
||||||
|
|
||||||
# 繁转简 + 修正错误 + 清理语气词
|
|
||||||
text = _to_simplified(text)
|
text = _to_simplified(text)
|
||||||
for w, c in CORRECTIONS.items():
|
for w, c in CORRECTIONS.items():
|
||||||
text = text.replace(w, c)
|
text = text.replace(w, c)
|
||||||
cleaned_text = clean_filler_words(text)
|
cleaned_text = clean_filler_words(text)
|
||||||
if len(cleaned_text) > 1: # 过滤太短的
|
if len(cleaned_text) > 1:
|
||||||
subtitles.append({
|
raw_subs.append({
|
||||||
'start': max(0, rel_start),
|
'start': max(0, rel_start),
|
||||||
'end': rel_end,
|
'end': max(rel_start + 0.5, rel_end),
|
||||||
'text': cleaned_text
|
'text': cleaned_text
|
||||||
})
|
})
|
||||||
|
|
||||||
return subtitles
|
# 合并过短的连续字幕(<1.2s 且总长 <25字),让每条有足够阅读时间
|
||||||
|
MIN_DISPLAY = 1.2
|
||||||
|
merged = []
|
||||||
|
i = 0
|
||||||
|
while i < len(raw_subs):
|
||||||
|
cur = dict(raw_subs[i])
|
||||||
|
dur = cur['end'] - cur['start']
|
||||||
|
# 尝试向后合并
|
||||||
|
while dur < MIN_DISPLAY and i + 1 < len(raw_subs):
|
||||||
|
nxt = raw_subs[i + 1]
|
||||||
|
gap = nxt['start'] - cur['end']
|
||||||
|
combined_text = cur['text'] + ',' + nxt['text']
|
||||||
|
if gap <= 0.5 and len(combined_text) <= 25:
|
||||||
|
cur['end'] = nxt['end']
|
||||||
|
cur['text'] = combined_text
|
||||||
|
dur = cur['end'] - cur['start']
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
# 强制最小显示时长
|
||||||
|
if cur['end'] - cur['start'] < MIN_DISPLAY:
|
||||||
|
cur['end'] = cur['start'] + MIN_DISPLAY
|
||||||
|
merged.append(cur)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
def _filter_relevant_subtitles(subtitles):
|
def _filter_relevant_subtitles(subtitles):
|
||||||
|
|||||||
@@ -277,3 +277,4 @@
|
|||||||
| 2026-03-10 20:45:03 | 🔄 卡若AI 同步 2026-03-10 20:45 | 更新:卡木、运营中枢工作台 | 排除 >20MB: 11 个 |
|
| 2026-03-10 20:45:03 | 🔄 卡若AI 同步 2026-03-10 20:45 | 更新:卡木、运营中枢工作台 | 排除 >20MB: 11 个 |
|
||||||
| 2026-03-10 20:55:38 | 🔄 卡若AI 同步 2026-03-10 20:55 | 更新:卡木、运营中枢工作台 | 排除 >20MB: 11 个 |
|
| 2026-03-10 20:55:38 | 🔄 卡若AI 同步 2026-03-10 20:55 | 更新:卡木、运营中枢工作台 | 排除 >20MB: 11 个 |
|
||||||
| 2026-03-10 23:34:57 | 🔄 卡若AI 同步 2026-03-10 23:34 | 更新:金仓、卡木、运营中枢工作台 | 排除 >20MB: 11 个 |
|
| 2026-03-10 23:34:57 | 🔄 卡若AI 同步 2026-03-10 23:34 | 更新:金仓、卡木、运营中枢工作台 | 排除 >20MB: 11 个 |
|
||||||
|
| 2026-03-11 00:31:13 | 🔄 卡若AI 同步 2026-03-11 00:31 | 更新:金仓、卡木、运营中枢工作台 | 排除 >20MB: 11 个 |
|
||||||
|
|||||||
@@ -280,3 +280,4 @@
|
|||||||
| 2026-03-10 20:45:03 | 成功 | 成功 | 🔄 卡若AI 同步 2026-03-10 20:45 | 更新:卡木、运营中枢工作台 | 排除 >20MB: 11 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
| 2026-03-10 20:45:03 | 成功 | 成功 | 🔄 卡若AI 同步 2026-03-10 20:45 | 更新:卡木、运营中枢工作台 | 排除 >20MB: 11 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||||
| 2026-03-10 20:55:38 | 成功 | 成功 | 🔄 卡若AI 同步 2026-03-10 20:55 | 更新:卡木、运营中枢工作台 | 排除 >20MB: 11 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
| 2026-03-10 20:55:38 | 成功 | 成功 | 🔄 卡若AI 同步 2026-03-10 20:55 | 更新:卡木、运营中枢工作台 | 排除 >20MB: 11 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||||
| 2026-03-10 23:34:57 | 成功 | 成功 | 🔄 卡若AI 同步 2026-03-10 23:34 | 更新:金仓、卡木、运营中枢工作台 | 排除 >20MB: 11 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
| 2026-03-10 23:34:57 | 成功 | 成功 | 🔄 卡若AI 同步 2026-03-10 23:34 | 更新:金仓、卡木、运营中枢工作台 | 排除 >20MB: 11 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||||
|
| 2026-03-11 00:31:13 | 成功 | 成功 | 🔄 卡若AI 同步 2026-03-11 00:31 | 更新:金仓、卡木、运营中枢工作台 | 排除 >20MB: 11 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||||
|
|||||||
Reference in New Issue
Block a user