#!/usr/bin/env python # -*- coding: UTF-8 -*- ''' @Project: NarratoAI @File : subtitle_merger @Author : viccy @Date : 2025/5/6 下午4:00 ''' import re import os from datetime import datetime, timedelta def parse_time(time_str): """解析时间字符串为timedelta对象""" hours, minutes, seconds_ms = time_str.split(':') seconds, milliseconds = seconds_ms.split(',') td = timedelta( hours=int(hours), minutes=int(minutes), seconds=int(seconds), milliseconds=int(milliseconds) ) return td def format_time(td): """将timedelta对象格式化为SRT时间字符串""" total_seconds = int(td.total_seconds()) hours = total_seconds // 3600 minutes = (total_seconds % 3600) // 60 seconds = total_seconds % 60 milliseconds = td.microseconds // 1000 return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}" def parse_edited_time_range(time_range_str): """从editedTimeRange字符串中提取时间范围""" if not time_range_str: return None, None parts = time_range_str.split('-') if len(parts) != 2: return None, None start_time_str, end_time_str = parts # 将HH:MM:SS格式转换为timedelta start_h, start_m, start_s = map(int, start_time_str.split(':')) end_h, end_m, end_s = map(int, end_time_str.split(':')) start_time = timedelta(hours=start_h, minutes=start_m, seconds=start_s) end_time = timedelta(hours=end_h, minutes=end_m, seconds=end_s) return start_time, end_time def merge_subtitle_files(subtitle_items, output_file=None): """ 合并多个SRT字幕文件 参数: subtitle_items: 字典列表,每个字典包含subtitle文件路径和editedTimeRange output_file: 输出文件的路径,如果为None则自动生成 返回: 合并后的字幕文件路径 """ # 按照editedTimeRange的开始时间排序 sorted_items = sorted(subtitle_items, key=lambda x: parse_edited_time_range(x.get('editedTimeRange', ''))[0] or timedelta()) merged_subtitles = [] subtitle_index = 1 for item in sorted_items: if not item.get('subtitle') or not os.path.exists(item.get('subtitle')): continue # 从editedTimeRange获取起始时间偏移 offset_time, _ = parse_edited_time_range(item.get('editedTimeRange', '')) if offset_time is None: print(f"警告: 无法从项目 {item.get('_id')} 的editedTimeRange中提取时间范围,跳过该项") continue with open(item['subtitle'], 'r', encoding='utf-8') as file: content = file.read() # 解析字幕文件 subtitle_blocks = re.split(r'\n\s*\n', content.strip()) for block in subtitle_blocks: lines = block.strip().split('\n') if len(lines) < 3: # 确保块有足够的行数 continue # 解析时间轴行 time_line = lines[1] time_parts = time_line.split(' --> ') if len(time_parts) != 2: continue start_time = parse_time(time_parts[0]) end_time = parse_time(time_parts[1]) # 应用时间偏移 adjusted_start_time = start_time + offset_time adjusted_end_time = end_time + offset_time # 重建字幕块 adjusted_time_line = f"{format_time(adjusted_start_time)} --> {format_time(adjusted_end_time)}" text_lines = lines[2:] new_block = [ str(subtitle_index), adjusted_time_line, *text_lines ] merged_subtitles.append('\n'.join(new_block)) subtitle_index += 1 # 确定输出文件路径 if output_file is None: dir_path = os.path.dirname(sorted_items[0]['subtitle']) first_start = parse_edited_time_range(sorted_items[0]['editedTimeRange'])[0] last_end = parse_edited_time_range(sorted_items[-1]['editedTimeRange'])[1] first_start_h, first_start_m, first_start_s = int(first_start.seconds // 3600), int((first_start.seconds % 3600) // 60), int(first_start.seconds % 60) last_end_h, last_end_m, last_end_s = int(last_end.seconds // 3600), int((last_end.seconds % 3600) // 60), int(last_end.seconds % 60) first_start_str = f"{first_start_h:02d}_{first_start_m:02d}_{first_start_s:02d}" last_end_str = f"{last_end_h:02d}_{last_end_m:02d}_{last_end_s:02d}" output_file = os.path.join(dir_path, f"merged_subtitle_{first_start_str}-{last_end_str}.srt") # 合并所有字幕块 merged_content = '\n\n'.join(merged_subtitles) # 写入合并后的内容 with open(output_file, 'w', encoding='utf-8') as file: file.write(merged_content) return output_file if __name__ == '__main__': # 测试数据 test_data = [ {'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!', 'timestamp': '00:00:00-00:01:15', 'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!', 'OST': 0, '_id': 1, 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3', 'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt', 'sourceTimeRange': '00:00:00-00:00:26', 'duration': 26, 'editedTimeRange': '00:00:00-00:00:26' }, {'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!', 'timestamp': '00:01:15-00:04:40', 'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…', 'OST': 0, '_id': 2, 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3', 'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt', 'sourceTimeRange': '00:01:15-00:01:29', 'duration': 14, 'editedTimeRange': '00:00:26-00:00:40' }, {'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。', 'timestamp': '00:04:58-00:05:45', 'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!', 'OST': 0, '_id': 4, 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3', 'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt', 'sourceTimeRange': '00:04:58-00:05:20', 'duration': 22, 'editedTimeRange': '00:00:57-00:01:19' }, {'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!', 'timestamp': '00:05:45-00:06:00', 'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!', 'OST': 0, '_id': 5, 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3', 'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt', 'sourceTimeRange': '00:05:45-00:05:53', 'duration': 8, 'editedTimeRange': '00:01:19-00:01:27' } ] output_file = merge_subtitle_files(test_data) print(f"字幕文件已合并至: {output_file}")