Spaces:

weiyi01191
/

DeepOperateAI-Video

Running

File size: 9,459 Bytes

dc80a97

#!/usr/bin/env python3
"""
视频内容安全检测器
在MiniGPT4-Video基础上集成巨量引擎规则检测
"""

import os
import sys
import argparse
import time
import json
from datetime import datetime

# 导入巨量引擎规则
from prohibited_rules import ProhibitedRulesEngine

# 导入MiniGPT4-Video的核心函数
from minigpt4_video_inference import generate_prediction, get_subtitles, extract_subtitles

class VideoContentSafetyChecker:
    """视频内容安全检测器"""
    
    def __init__(self):
        self.rules_engine = ProhibitedRulesEngine()
        print("🛡️ 巨量引擎规则已加载 (299条规则)")
    
    def analyze_video_with_safety_check(self, video_path, instruction="请详细描述这个视频的内容", gen_subtitles=True):
        """
        对视频进行完整分析：MiniGPT4-Video理解 + 巨量引擎安全检测
        """
        print(f"🎬 开始分析视频: {video_path}")
        print(f"📋 分析指令: {instruction}")
        
        start_time = time.time()
        
        # 1. 使用MiniGPT4-Video进行视频理解
        print("\n🔍 第1步: MiniGPT4-Video智能分析...")
        try:
            video_content = generate_prediction(
                video_path, 
                instruction, 
                gen_subtitles=gen_subtitles, 
                stream=False
            )
            print(f"✅ 视频理解完成: {video_content[:100]}...")
        except Exception as e:
            return {
                "error": f"MiniGPT4-Video分析失败: {str(e)}",
                "timestamp": datetime.now().isoformat()
            }
        
        # 2. 提取字幕内容
        print("\n🎤 第2步: 提取字幕内容...")
        subtitle_text = ""
        if gen_subtitles:
            try:
                subtitle_path = get_subtitles(video_path)
                if subtitle_path and os.path.exists(subtitle_path):
                    subtitles = extract_subtitles(subtitle_path)
                    subtitle_text = " ".join([sub[2] for sub in subtitles])
                    print(f"✅ 字幕提取完成: {len(subtitle_text)}字符")
                else:
                    print("⚠️ 未找到字幕文件")
            except Exception as e:
                print(f"⚠️ 字幕提取失败: {e}")
        else:
            print("⏭️ 跳过字幕提取")
        
        # 3. 巨量引擎安全检测
        print("\n🛡️ 第3步: 巨量引擎安全检测...")
        combined_content = f"{video_content} {subtitle_text}".strip()
        
        try:
            safety_result = self.rules_engine.check_all_content(combined_content, "")
            
            # 确定风险等级
            if safety_result["high_risk"]["found"]:
                risk_level = "P0"  # 高危
                risk_reason = f"高危违规: {', '.join([v['category'] for v in safety_result['high_risk']['violations'][:3]])}"
                risk_details = safety_result["high_risk"]["violations"]
            elif safety_result["medium_risk"]["found"]:
                risk_level = "P1"  # 中危
                risk_reason = f"中危违规: {', '.join([v['category'] for v in safety_result['medium_risk']['violations'][:3]])}"
                risk_details = safety_result["medium_risk"]["violations"]
            elif safety_result["low_risk"]["found"]:
                risk_level = "P2"  # 低危
                risk_reason = f"低危违规: {', '.join([v['category'] for v in safety_result['low_risk']['violations'][:3]])}"
                risk_details = safety_result["low_risk"]["violations"]
            else:
                risk_level = "P3"  # 合规
                risk_reason = "内容合规"
                risk_details = []
            
            print(f"✅ 安全检测完成: {risk_level} - {risk_reason}")
            
        except Exception as e:
            print(f"❌ 安全检测失败: {e}")
            risk_level = "ERROR"
            risk_reason = f"检测失败: {str(e)}"
            risk_details = []
            safety_result = {}
        
        # 4. 组装完整结果
        analysis_time = time.time() - start_time
        
        result = {
            "video_analysis": {
                "video_path": video_path,
                "content_description": video_content,
                "subtitle_content": subtitle_text if subtitle_text else "无字幕内容",
                "analysis_instruction": instruction
            },
            "safety_assessment": {
                "risk_level": risk_level,
                "risk_reason": risk_reason,
                "violation_details": risk_details[:5],  # 最多显示5个违规详情
                "total_violations": safety_result.get("total_violations", 0),
                "high_risk_count": len(safety_result.get("high_risk", {}).get("violations", [])),
                "medium_risk_count": len(safety_result.get("medium_risk", {}).get("violations", [])),
                "low_risk_count": len(safety_result.get("low_risk", {}).get("violations", []))
            },
            "metadata": {
                "analysis_time_seconds": round(analysis_time, 2),
                "timestamp": datetime.now().isoformat(),
                "has_subtitles": bool(subtitle_text),
                "combined_content_length": len(combined_content)
            }
        }
        
        return result
    
    def format_result_report(self, result):
        """格式化输出分析报告"""
        if "error" in result:
            print(f"\n❌ 分析失败: {result['error']}")
            return
        
        print("\n" + "="*80)
        print("📋 智能视频内容安全分析报告")
        print("="*80)
        
        # 视频分析部分
        video_analysis = result["video_analysis"]
        print(f"🎬 视频路径: {video_analysis['video_path']}")
        print(f"📝 内容描述: {video_analysis['content_description']}")
        print(f"🎤 字幕内容: {video_analysis['subtitle_content'][:100]}...")
        
        # 安全评估部分
        safety = result["safety_assessment"]
        risk_level = safety["risk_level"]
        
        # 根据风险等级使用不同颜色标识
        risk_emoji = {
            "P0": "🚨",  # 高危
            "P1": "⚠️",   # 中危  
            "P2": "⚡",   # 低危
            "P3": "✅",   # 合规
            "ERROR": "❌"
        }
        
        print(f"\n{risk_emoji.get(risk_level, '❓')} 风险等级: {risk_level}")
        print(f"📋 风险原因: {safety['risk_reason']}")
        print(f"📊 违规统计: 总计{safety['total_violations']}项 (高危{safety['high_risk_count']} | 中危{safety['medium_risk_count']} | 低危{safety['low_risk_count']})")
        
        # 违规详情
        if safety["violation_details"]:
            print(f"\n🔍 主要违规详情:")
            for i, violation in enumerate(safety["violation_details"], 1):
                print(f"   {i}. {violation.get('category', 'N/A')}: {violation.get('description', 'N/A')}")
        
        # 元数据
        metadata = result["metadata"]
        print(f"\n⏱️ 分析耗时: {metadata['analysis_time_seconds']}秒")
        print(f"📅 分析时间: {metadata['timestamp']}")
        print(f"💾 内容长度: {metadata['combined_content_length']}字符")
        
        print("="*80)


def main():
    parser = argparse.ArgumentParser(description="视频内容安全检测器 - 集成MiniGPT4-Video和巨量引擎规则")
    parser.add_argument("--video_path", type=str, required=True, help="视频文件路径")
    parser.add_argument("--question", type=str, default="请详细描述这个视频的内容，包括场景、人物、对话和主要活动", help="分析指令")
    parser.add_argument("--add_subtitles", action='store_true', help="是否生成和分析字幕")
    parser.add_argument("--output_json", type=str, help="输出JSON结果到文件")
    parser.add_argument("--quiet", action='store_true', help="静默模式，只输出最终结果")
    
    args = parser.parse_args()
    
    # 检查视频文件是否存在
    if not os.path.exists(args.video_path):
        print(f"❌ 错误: 视频文件不存在 - {args.video_path}")
        sys.exit(1)
    
    # 初始化检测器
    if not args.quiet:
        print("🚀 初始化视频内容安全检测器...")
    
    checker = VideoContentSafetyChecker()
    
    # 执行分析
    result = checker.analyze_video_with_safety_check(
        video_path=args.video_path,
        instruction=args.question,
        gen_subtitles=args.add_subtitles
    )
    
    # 输出结果
    if args.quiet:
        # 静默模式，只输出关键信息
        if "error" in result:
            print(f"ERROR: {result['error']}")
        else:
            safety = result["safety_assessment"]
            print(f"RISK_LEVEL: {safety['risk_level']}")
            print(f"RISK_REASON: {safety['risk_reason']}")
    else:
        # 完整报告模式
        checker.format_result_report(result)
    
    # 保存JSON结果
    if args.output_json:
        try:
            with open(args.output_json, 'w', encoding='utf-8') as f:
                json.dump(result, f, ensure_ascii=False, indent=2)
            print(f"💾 结果已保存到: {args.output_json}")
        except Exception as e:
            print(f"❌ 保存失败: {e}")


if __name__ == "__main__":
    main()