Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
视频内容安全检测器 | |
在MiniGPT4-Video基础上集成巨量引擎规则检测 | |
""" | |
import os | |
import sys | |
import argparse | |
import time | |
import json | |
from datetime import datetime | |
# 导入巨量引擎规则 | |
from prohibited_rules import ProhibitedRulesEngine | |
# 导入MiniGPT4-Video的核心函数 | |
from minigpt4_video_inference import generate_prediction, get_subtitles, extract_subtitles | |
class VideoContentSafetyChecker: | |
"""视频内容安全检测器""" | |
def __init__(self): | |
self.rules_engine = ProhibitedRulesEngine() | |
print("🛡️ 巨量引擎规则已加载 (299条规则)") | |
def analyze_video_with_safety_check(self, video_path, instruction="请详细描述这个视频的内容", gen_subtitles=True): | |
""" | |
对视频进行完整分析:MiniGPT4-Video理解 + 巨量引擎安全检测 | |
""" | |
print(f"🎬 开始分析视频: {video_path}") | |
print(f"📋 分析指令: {instruction}") | |
start_time = time.time() | |
# 1. 使用MiniGPT4-Video进行视频理解 | |
print("\n🔍 第1步: MiniGPT4-Video智能分析...") | |
try: | |
video_content = generate_prediction( | |
video_path, | |
instruction, | |
gen_subtitles=gen_subtitles, | |
stream=False | |
) | |
print(f"✅ 视频理解完成: {video_content[:100]}...") | |
except Exception as e: | |
return { | |
"error": f"MiniGPT4-Video分析失败: {str(e)}", | |
"timestamp": datetime.now().isoformat() | |
} | |
# 2. 提取字幕内容 | |
print("\n🎤 第2步: 提取字幕内容...") | |
subtitle_text = "" | |
if gen_subtitles: | |
try: | |
subtitle_path = get_subtitles(video_path) | |
if subtitle_path and os.path.exists(subtitle_path): | |
subtitles = extract_subtitles(subtitle_path) | |
subtitle_text = " ".join([sub[2] for sub in subtitles]) | |
print(f"✅ 字幕提取完成: {len(subtitle_text)}字符") | |
else: | |
print("⚠️ 未找到字幕文件") | |
except Exception as e: | |
print(f"⚠️ 字幕提取失败: {e}") | |
else: | |
print("⏭️ 跳过字幕提取") | |
# 3. 巨量引擎安全检测 | |
print("\n🛡️ 第3步: 巨量引擎安全检测...") | |
combined_content = f"{video_content} {subtitle_text}".strip() | |
try: | |
safety_result = self.rules_engine.check_all_content(combined_content, "") | |
# 确定风险等级 | |
if safety_result["high_risk"]["found"]: | |
risk_level = "P0" # 高危 | |
risk_reason = f"高危违规: {', '.join([v['category'] for v in safety_result['high_risk']['violations'][:3]])}" | |
risk_details = safety_result["high_risk"]["violations"] | |
elif safety_result["medium_risk"]["found"]: | |
risk_level = "P1" # 中危 | |
risk_reason = f"中危违规: {', '.join([v['category'] for v in safety_result['medium_risk']['violations'][:3]])}" | |
risk_details = safety_result["medium_risk"]["violations"] | |
elif safety_result["low_risk"]["found"]: | |
risk_level = "P2" # 低危 | |
risk_reason = f"低危违规: {', '.join([v['category'] for v in safety_result['low_risk']['violations'][:3]])}" | |
risk_details = safety_result["low_risk"]["violations"] | |
else: | |
risk_level = "P3" # 合规 | |
risk_reason = "内容合规" | |
risk_details = [] | |
print(f"✅ 安全检测完成: {risk_level} - {risk_reason}") | |
except Exception as e: | |
print(f"❌ 安全检测失败: {e}") | |
risk_level = "ERROR" | |
risk_reason = f"检测失败: {str(e)}" | |
risk_details = [] | |
safety_result = {} | |
# 4. 组装完整结果 | |
analysis_time = time.time() - start_time | |
result = { | |
"video_analysis": { | |
"video_path": video_path, | |
"content_description": video_content, | |
"subtitle_content": subtitle_text if subtitle_text else "无字幕内容", | |
"analysis_instruction": instruction | |
}, | |
"safety_assessment": { | |
"risk_level": risk_level, | |
"risk_reason": risk_reason, | |
"violation_details": risk_details[:5], # 最多显示5个违规详情 | |
"total_violations": safety_result.get("total_violations", 0), | |
"high_risk_count": len(safety_result.get("high_risk", {}).get("violations", [])), | |
"medium_risk_count": len(safety_result.get("medium_risk", {}).get("violations", [])), | |
"low_risk_count": len(safety_result.get("low_risk", {}).get("violations", [])) | |
}, | |
"metadata": { | |
"analysis_time_seconds": round(analysis_time, 2), | |
"timestamp": datetime.now().isoformat(), | |
"has_subtitles": bool(subtitle_text), | |
"combined_content_length": len(combined_content) | |
} | |
} | |
return result | |
def format_result_report(self, result): | |
"""格式化输出分析报告""" | |
if "error" in result: | |
print(f"\n❌ 分析失败: {result['error']}") | |
return | |
print("\n" + "="*80) | |
print("📋 智能视频内容安全分析报告") | |
print("="*80) | |
# 视频分析部分 | |
video_analysis = result["video_analysis"] | |
print(f"🎬 视频路径: {video_analysis['video_path']}") | |
print(f"📝 内容描述: {video_analysis['content_description']}") | |
print(f"🎤 字幕内容: {video_analysis['subtitle_content'][:100]}...") | |
# 安全评估部分 | |
safety = result["safety_assessment"] | |
risk_level = safety["risk_level"] | |
# 根据风险等级使用不同颜色标识 | |
risk_emoji = { | |
"P0": "🚨", # 高危 | |
"P1": "⚠️", # 中危 | |
"P2": "⚡", # 低危 | |
"P3": "✅", # 合规 | |
"ERROR": "❌" | |
} | |
print(f"\n{risk_emoji.get(risk_level, '❓')} 风险等级: {risk_level}") | |
print(f"📋 风险原因: {safety['risk_reason']}") | |
print(f"📊 违规统计: 总计{safety['total_violations']}项 (高危{safety['high_risk_count']} | 中危{safety['medium_risk_count']} | 低危{safety['low_risk_count']})") | |
# 违规详情 | |
if safety["violation_details"]: | |
print(f"\n🔍 主要违规详情:") | |
for i, violation in enumerate(safety["violation_details"], 1): | |
print(f" {i}. {violation.get('category', 'N/A')}: {violation.get('description', 'N/A')}") | |
# 元数据 | |
metadata = result["metadata"] | |
print(f"\n⏱️ 分析耗时: {metadata['analysis_time_seconds']}秒") | |
print(f"📅 分析时间: {metadata['timestamp']}") | |
print(f"💾 内容长度: {metadata['combined_content_length']}字符") | |
print("="*80) | |
def main(): | |
parser = argparse.ArgumentParser(description="视频内容安全检测器 - 集成MiniGPT4-Video和巨量引擎规则") | |
parser.add_argument("--video_path", type=str, required=True, help="视频文件路径") | |
parser.add_argument("--question", type=str, default="请详细描述这个视频的内容,包括场景、人物、对话和主要活动", help="分析指令") | |
parser.add_argument("--add_subtitles", action='store_true', help="是否生成和分析字幕") | |
parser.add_argument("--output_json", type=str, help="输出JSON结果到文件") | |
parser.add_argument("--quiet", action='store_true', help="静默模式,只输出最终结果") | |
args = parser.parse_args() | |
# 检查视频文件是否存在 | |
if not os.path.exists(args.video_path): | |
print(f"❌ 错误: 视频文件不存在 - {args.video_path}") | |
sys.exit(1) | |
# 初始化检测器 | |
if not args.quiet: | |
print("🚀 初始化视频内容安全检测器...") | |
checker = VideoContentSafetyChecker() | |
# 执行分析 | |
result = checker.analyze_video_with_safety_check( | |
video_path=args.video_path, | |
instruction=args.question, | |
gen_subtitles=args.add_subtitles | |
) | |
# 输出结果 | |
if args.quiet: | |
# 静默模式,只输出关键信息 | |
if "error" in result: | |
print(f"ERROR: {result['error']}") | |
else: | |
safety = result["safety_assessment"] | |
print(f"RISK_LEVEL: {safety['risk_level']}") | |
print(f"RISK_REASON: {safety['risk_reason']}") | |
else: | |
# 完整报告模式 | |
checker.format_result_report(result) | |
# 保存JSON结果 | |
if args.output_json: | |
try: | |
with open(args.output_json, 'w', encoding='utf-8') as f: | |
json.dump(result, f, ensure_ascii=False, indent=2) | |
print(f"💾 结果已保存到: {args.output_json}") | |
except Exception as e: | |
print(f"❌ 保存失败: {e}") | |
if __name__ == "__main__": | |
main() |