File size: 9,459 Bytes
dc80a97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
#!/usr/bin/env python3
"""
视频内容安全检测器
在MiniGPT4-Video基础上集成巨量引擎规则检测
"""

import os
import sys
import argparse
import time
import json
from datetime import datetime

# 导入巨量引擎规则
from prohibited_rules import ProhibitedRulesEngine

# 导入MiniGPT4-Video的核心函数
from minigpt4_video_inference import generate_prediction, get_subtitles, extract_subtitles

class VideoContentSafetyChecker:
    """视频内容安全检测器"""
    
    def __init__(self):
        self.rules_engine = ProhibitedRulesEngine()
        print("🛡️ 巨量引擎规则已加载 (299条规则)")
    
    def analyze_video_with_safety_check(self, video_path, instruction="请详细描述这个视频的内容", gen_subtitles=True):
        """
        对视频进行完整分析:MiniGPT4-Video理解 + 巨量引擎安全检测
        """
        print(f"🎬 开始分析视频: {video_path}")
        print(f"📋 分析指令: {instruction}")
        
        start_time = time.time()
        
        # 1. 使用MiniGPT4-Video进行视频理解
        print("\n🔍 第1步: MiniGPT4-Video智能分析...")
        try:
            video_content = generate_prediction(
                video_path, 
                instruction, 
                gen_subtitles=gen_subtitles, 
                stream=False
            )
            print(f"✅ 视频理解完成: {video_content[:100]}...")
        except Exception as e:
            return {
                "error": f"MiniGPT4-Video分析失败: {str(e)}",
                "timestamp": datetime.now().isoformat()
            }
        
        # 2. 提取字幕内容
        print("\n🎤 第2步: 提取字幕内容...")
        subtitle_text = ""
        if gen_subtitles:
            try:
                subtitle_path = get_subtitles(video_path)
                if subtitle_path and os.path.exists(subtitle_path):
                    subtitles = extract_subtitles(subtitle_path)
                    subtitle_text = " ".join([sub[2] for sub in subtitles])
                    print(f"✅ 字幕提取完成: {len(subtitle_text)}字符")
                else:
                    print("⚠️ 未找到字幕文件")
            except Exception as e:
                print(f"⚠️ 字幕提取失败: {e}")
        else:
            print("⏭️ 跳过字幕提取")
        
        # 3. 巨量引擎安全检测
        print("\n🛡️ 第3步: 巨量引擎安全检测...")
        combined_content = f"{video_content} {subtitle_text}".strip()
        
        try:
            safety_result = self.rules_engine.check_all_content(combined_content, "")
            
            # 确定风险等级
            if safety_result["high_risk"]["found"]:
                risk_level = "P0"  # 高危
                risk_reason = f"高危违规: {', '.join([v['category'] for v in safety_result['high_risk']['violations'][:3]])}"
                risk_details = safety_result["high_risk"]["violations"]
            elif safety_result["medium_risk"]["found"]:
                risk_level = "P1"  # 中危
                risk_reason = f"中危违规: {', '.join([v['category'] for v in safety_result['medium_risk']['violations'][:3]])}"
                risk_details = safety_result["medium_risk"]["violations"]
            elif safety_result["low_risk"]["found"]:
                risk_level = "P2"  # 低危
                risk_reason = f"低危违规: {', '.join([v['category'] for v in safety_result['low_risk']['violations'][:3]])}"
                risk_details = safety_result["low_risk"]["violations"]
            else:
                risk_level = "P3"  # 合规
                risk_reason = "内容合规"
                risk_details = []
            
            print(f"✅ 安全检测完成: {risk_level} - {risk_reason}")
            
        except Exception as e:
            print(f"❌ 安全检测失败: {e}")
            risk_level = "ERROR"
            risk_reason = f"检测失败: {str(e)}"
            risk_details = []
            safety_result = {}
        
        # 4. 组装完整结果
        analysis_time = time.time() - start_time
        
        result = {
            "video_analysis": {
                "video_path": video_path,
                "content_description": video_content,
                "subtitle_content": subtitle_text if subtitle_text else "无字幕内容",
                "analysis_instruction": instruction
            },
            "safety_assessment": {
                "risk_level": risk_level,
                "risk_reason": risk_reason,
                "violation_details": risk_details[:5],  # 最多显示5个违规详情
                "total_violations": safety_result.get("total_violations", 0),
                "high_risk_count": len(safety_result.get("high_risk", {}).get("violations", [])),
                "medium_risk_count": len(safety_result.get("medium_risk", {}).get("violations", [])),
                "low_risk_count": len(safety_result.get("low_risk", {}).get("violations", []))
            },
            "metadata": {
                "analysis_time_seconds": round(analysis_time, 2),
                "timestamp": datetime.now().isoformat(),
                "has_subtitles": bool(subtitle_text),
                "combined_content_length": len(combined_content)
            }
        }
        
        return result
    
    def format_result_report(self, result):
        """格式化输出分析报告"""
        if "error" in result:
            print(f"\n❌ 分析失败: {result['error']}")
            return
        
        print("\n" + "="*80)
        print("📋 智能视频内容安全分析报告")
        print("="*80)
        
        # 视频分析部分
        video_analysis = result["video_analysis"]
        print(f"🎬 视频路径: {video_analysis['video_path']}")
        print(f"📝 内容描述: {video_analysis['content_description']}")
        print(f"🎤 字幕内容: {video_analysis['subtitle_content'][:100]}...")
        
        # 安全评估部分
        safety = result["safety_assessment"]
        risk_level = safety["risk_level"]
        
        # 根据风险等级使用不同颜色标识
        risk_emoji = {
            "P0": "🚨",  # 高危
            "P1": "⚠️",   # 中危  
            "P2": "⚡",   # 低危
            "P3": "✅",   # 合规
            "ERROR": "❌"
        }
        
        print(f"\n{risk_emoji.get(risk_level, '❓')} 风险等级: {risk_level}")
        print(f"📋 风险原因: {safety['risk_reason']}")
        print(f"📊 违规统计: 总计{safety['total_violations']}项 (高危{safety['high_risk_count']} | 中危{safety['medium_risk_count']} | 低危{safety['low_risk_count']})")
        
        # 违规详情
        if safety["violation_details"]:
            print(f"\n🔍 主要违规详情:")
            for i, violation in enumerate(safety["violation_details"], 1):
                print(f"   {i}. {violation.get('category', 'N/A')}: {violation.get('description', 'N/A')}")
        
        # 元数据
        metadata = result["metadata"]
        print(f"\n⏱️ 分析耗时: {metadata['analysis_time_seconds']}秒")
        print(f"📅 分析时间: {metadata['timestamp']}")
        print(f"💾 内容长度: {metadata['combined_content_length']}字符")
        
        print("="*80)


def main():
    parser = argparse.ArgumentParser(description="视频内容安全检测器 - 集成MiniGPT4-Video和巨量引擎规则")
    parser.add_argument("--video_path", type=str, required=True, help="视频文件路径")
    parser.add_argument("--question", type=str, default="请详细描述这个视频的内容,包括场景、人物、对话和主要活动", help="分析指令")
    parser.add_argument("--add_subtitles", action='store_true', help="是否生成和分析字幕")
    parser.add_argument("--output_json", type=str, help="输出JSON结果到文件")
    parser.add_argument("--quiet", action='store_true', help="静默模式,只输出最终结果")
    
    args = parser.parse_args()
    
    # 检查视频文件是否存在
    if not os.path.exists(args.video_path):
        print(f"❌ 错误: 视频文件不存在 - {args.video_path}")
        sys.exit(1)
    
    # 初始化检测器
    if not args.quiet:
        print("🚀 初始化视频内容安全检测器...")
    
    checker = VideoContentSafetyChecker()
    
    # 执行分析
    result = checker.analyze_video_with_safety_check(
        video_path=args.video_path,
        instruction=args.question,
        gen_subtitles=args.add_subtitles
    )
    
    # 输出结果
    if args.quiet:
        # 静默模式,只输出关键信息
        if "error" in result:
            print(f"ERROR: {result['error']}")
        else:
            safety = result["safety_assessment"]
            print(f"RISK_LEVEL: {safety['risk_level']}")
            print(f"RISK_REASON: {safety['risk_reason']}")
    else:
        # 完整报告模式
        checker.format_result_report(result)
    
    # 保存JSON结果
    if args.output_json:
        try:
            with open(args.output_json, 'w', encoding='utf-8') as f:
                json.dump(result, f, ensure_ascii=False, indent=2)
            print(f"💾 结果已保存到: {args.output_json}")
        except Exception as e:
            print(f"❌ 保存失败: {e}")


if __name__ == "__main__":
    main()