DeepOperateAI-Video / video_content_safety_checker.py
weiyi01191's picture
Upload 207 files
dc80a97
#!/usr/bin/env python3
"""
视频内容安全检测器
在MiniGPT4-Video基础上集成巨量引擎规则检测
"""
import os
import sys
import argparse
import time
import json
from datetime import datetime
# 导入巨量引擎规则
from prohibited_rules import ProhibitedRulesEngine
# 导入MiniGPT4-Video的核心函数
from minigpt4_video_inference import generate_prediction, get_subtitles, extract_subtitles
class VideoContentSafetyChecker:
"""视频内容安全检测器"""
def __init__(self):
self.rules_engine = ProhibitedRulesEngine()
print("🛡️ 巨量引擎规则已加载 (299条规则)")
def analyze_video_with_safety_check(self, video_path, instruction="请详细描述这个视频的内容", gen_subtitles=True):
"""
对视频进行完整分析:MiniGPT4-Video理解 + 巨量引擎安全检测
"""
print(f"🎬 开始分析视频: {video_path}")
print(f"📋 分析指令: {instruction}")
start_time = time.time()
# 1. 使用MiniGPT4-Video进行视频理解
print("\n🔍 第1步: MiniGPT4-Video智能分析...")
try:
video_content = generate_prediction(
video_path,
instruction,
gen_subtitles=gen_subtitles,
stream=False
)
print(f"✅ 视频理解完成: {video_content[:100]}...")
except Exception as e:
return {
"error": f"MiniGPT4-Video分析失败: {str(e)}",
"timestamp": datetime.now().isoformat()
}
# 2. 提取字幕内容
print("\n🎤 第2步: 提取字幕内容...")
subtitle_text = ""
if gen_subtitles:
try:
subtitle_path = get_subtitles(video_path)
if subtitle_path and os.path.exists(subtitle_path):
subtitles = extract_subtitles(subtitle_path)
subtitle_text = " ".join([sub[2] for sub in subtitles])
print(f"✅ 字幕提取完成: {len(subtitle_text)}字符")
else:
print("⚠️ 未找到字幕文件")
except Exception as e:
print(f"⚠️ 字幕提取失败: {e}")
else:
print("⏭️ 跳过字幕提取")
# 3. 巨量引擎安全检测
print("\n🛡️ 第3步: 巨量引擎安全检测...")
combined_content = f"{video_content} {subtitle_text}".strip()
try:
safety_result = self.rules_engine.check_all_content(combined_content, "")
# 确定风险等级
if safety_result["high_risk"]["found"]:
risk_level = "P0" # 高危
risk_reason = f"高危违规: {', '.join([v['category'] for v in safety_result['high_risk']['violations'][:3]])}"
risk_details = safety_result["high_risk"]["violations"]
elif safety_result["medium_risk"]["found"]:
risk_level = "P1" # 中危
risk_reason = f"中危违规: {', '.join([v['category'] for v in safety_result['medium_risk']['violations'][:3]])}"
risk_details = safety_result["medium_risk"]["violations"]
elif safety_result["low_risk"]["found"]:
risk_level = "P2" # 低危
risk_reason = f"低危违规: {', '.join([v['category'] for v in safety_result['low_risk']['violations'][:3]])}"
risk_details = safety_result["low_risk"]["violations"]
else:
risk_level = "P3" # 合规
risk_reason = "内容合规"
risk_details = []
print(f"✅ 安全检测完成: {risk_level} - {risk_reason}")
except Exception as e:
print(f"❌ 安全检测失败: {e}")
risk_level = "ERROR"
risk_reason = f"检测失败: {str(e)}"
risk_details = []
safety_result = {}
# 4. 组装完整结果
analysis_time = time.time() - start_time
result = {
"video_analysis": {
"video_path": video_path,
"content_description": video_content,
"subtitle_content": subtitle_text if subtitle_text else "无字幕内容",
"analysis_instruction": instruction
},
"safety_assessment": {
"risk_level": risk_level,
"risk_reason": risk_reason,
"violation_details": risk_details[:5], # 最多显示5个违规详情
"total_violations": safety_result.get("total_violations", 0),
"high_risk_count": len(safety_result.get("high_risk", {}).get("violations", [])),
"medium_risk_count": len(safety_result.get("medium_risk", {}).get("violations", [])),
"low_risk_count": len(safety_result.get("low_risk", {}).get("violations", []))
},
"metadata": {
"analysis_time_seconds": round(analysis_time, 2),
"timestamp": datetime.now().isoformat(),
"has_subtitles": bool(subtitle_text),
"combined_content_length": len(combined_content)
}
}
return result
def format_result_report(self, result):
"""格式化输出分析报告"""
if "error" in result:
print(f"\n❌ 分析失败: {result['error']}")
return
print("\n" + "="*80)
print("📋 智能视频内容安全分析报告")
print("="*80)
# 视频分析部分
video_analysis = result["video_analysis"]
print(f"🎬 视频路径: {video_analysis['video_path']}")
print(f"📝 内容描述: {video_analysis['content_description']}")
print(f"🎤 字幕内容: {video_analysis['subtitle_content'][:100]}...")
# 安全评估部分
safety = result["safety_assessment"]
risk_level = safety["risk_level"]
# 根据风险等级使用不同颜色标识
risk_emoji = {
"P0": "🚨", # 高危
"P1": "⚠️", # 中危
"P2": "⚡", # 低危
"P3": "✅", # 合规
"ERROR": "❌"
}
print(f"\n{risk_emoji.get(risk_level, '❓')} 风险等级: {risk_level}")
print(f"📋 风险原因: {safety['risk_reason']}")
print(f"📊 违规统计: 总计{safety['total_violations']}项 (高危{safety['high_risk_count']} | 中危{safety['medium_risk_count']} | 低危{safety['low_risk_count']})")
# 违规详情
if safety["violation_details"]:
print(f"\n🔍 主要违规详情:")
for i, violation in enumerate(safety["violation_details"], 1):
print(f" {i}. {violation.get('category', 'N/A')}: {violation.get('description', 'N/A')}")
# 元数据
metadata = result["metadata"]
print(f"\n⏱️ 分析耗时: {metadata['analysis_time_seconds']}秒")
print(f"📅 分析时间: {metadata['timestamp']}")
print(f"💾 内容长度: {metadata['combined_content_length']}字符")
print("="*80)
def main():
parser = argparse.ArgumentParser(description="视频内容安全检测器 - 集成MiniGPT4-Video和巨量引擎规则")
parser.add_argument("--video_path", type=str, required=True, help="视频文件路径")
parser.add_argument("--question", type=str, default="请详细描述这个视频的内容,包括场景、人物、对话和主要活动", help="分析指令")
parser.add_argument("--add_subtitles", action='store_true', help="是否生成和分析字幕")
parser.add_argument("--output_json", type=str, help="输出JSON结果到文件")
parser.add_argument("--quiet", action='store_true', help="静默模式,只输出最终结果")
args = parser.parse_args()
# 检查视频文件是否存在
if not os.path.exists(args.video_path):
print(f"❌ 错误: 视频文件不存在 - {args.video_path}")
sys.exit(1)
# 初始化检测器
if not args.quiet:
print("🚀 初始化视频内容安全检测器...")
checker = VideoContentSafetyChecker()
# 执行分析
result = checker.analyze_video_with_safety_check(
video_path=args.video_path,
instruction=args.question,
gen_subtitles=args.add_subtitles
)
# 输出结果
if args.quiet:
# 静默模式,只输出关键信息
if "error" in result:
print(f"ERROR: {result['error']}")
else:
safety = result["safety_assessment"]
print(f"RISK_LEVEL: {safety['risk_level']}")
print(f"RISK_REASON: {safety['risk_reason']}")
else:
# 完整报告模式
checker.format_result_report(result)
# 保存JSON结果
if args.output_json:
try:
with open(args.output_json, 'w', encoding='utf-8') as f:
json.dump(result, f, ensure_ascii=False, indent=2)
print(f"💾 结果已保存到: {args.output_json}")
except Exception as e:
print(f"❌ 保存失败: {e}")
if __name__ == "__main__":
main()