Spaces:
Running
Running
File size: 9,459 Bytes
dc80a97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
#!/usr/bin/env python3
"""
视频内容安全检测器
在MiniGPT4-Video基础上集成巨量引擎规则检测
"""
import os
import sys
import argparse
import time
import json
from datetime import datetime
# 导入巨量引擎规则
from prohibited_rules import ProhibitedRulesEngine
# 导入MiniGPT4-Video的核心函数
from minigpt4_video_inference import generate_prediction, get_subtitles, extract_subtitles
class VideoContentSafetyChecker:
"""视频内容安全检测器"""
def __init__(self):
self.rules_engine = ProhibitedRulesEngine()
print("🛡️ 巨量引擎规则已加载 (299条规则)")
def analyze_video_with_safety_check(self, video_path, instruction="请详细描述这个视频的内容", gen_subtitles=True):
"""
对视频进行完整分析:MiniGPT4-Video理解 + 巨量引擎安全检测
"""
print(f"🎬 开始分析视频: {video_path}")
print(f"📋 分析指令: {instruction}")
start_time = time.time()
# 1. 使用MiniGPT4-Video进行视频理解
print("\n🔍 第1步: MiniGPT4-Video智能分析...")
try:
video_content = generate_prediction(
video_path,
instruction,
gen_subtitles=gen_subtitles,
stream=False
)
print(f"✅ 视频理解完成: {video_content[:100]}...")
except Exception as e:
return {
"error": f"MiniGPT4-Video分析失败: {str(e)}",
"timestamp": datetime.now().isoformat()
}
# 2. 提取字幕内容
print("\n🎤 第2步: 提取字幕内容...")
subtitle_text = ""
if gen_subtitles:
try:
subtitle_path = get_subtitles(video_path)
if subtitle_path and os.path.exists(subtitle_path):
subtitles = extract_subtitles(subtitle_path)
subtitle_text = " ".join([sub[2] for sub in subtitles])
print(f"✅ 字幕提取完成: {len(subtitle_text)}字符")
else:
print("⚠️ 未找到字幕文件")
except Exception as e:
print(f"⚠️ 字幕提取失败: {e}")
else:
print("⏭️ 跳过字幕提取")
# 3. 巨量引擎安全检测
print("\n🛡️ 第3步: 巨量引擎安全检测...")
combined_content = f"{video_content} {subtitle_text}".strip()
try:
safety_result = self.rules_engine.check_all_content(combined_content, "")
# 确定风险等级
if safety_result["high_risk"]["found"]:
risk_level = "P0" # 高危
risk_reason = f"高危违规: {', '.join([v['category'] for v in safety_result['high_risk']['violations'][:3]])}"
risk_details = safety_result["high_risk"]["violations"]
elif safety_result["medium_risk"]["found"]:
risk_level = "P1" # 中危
risk_reason = f"中危违规: {', '.join([v['category'] for v in safety_result['medium_risk']['violations'][:3]])}"
risk_details = safety_result["medium_risk"]["violations"]
elif safety_result["low_risk"]["found"]:
risk_level = "P2" # 低危
risk_reason = f"低危违规: {', '.join([v['category'] for v in safety_result['low_risk']['violations'][:3]])}"
risk_details = safety_result["low_risk"]["violations"]
else:
risk_level = "P3" # 合规
risk_reason = "内容合规"
risk_details = []
print(f"✅ 安全检测完成: {risk_level} - {risk_reason}")
except Exception as e:
print(f"❌ 安全检测失败: {e}")
risk_level = "ERROR"
risk_reason = f"检测失败: {str(e)}"
risk_details = []
safety_result = {}
# 4. 组装完整结果
analysis_time = time.time() - start_time
result = {
"video_analysis": {
"video_path": video_path,
"content_description": video_content,
"subtitle_content": subtitle_text if subtitle_text else "无字幕内容",
"analysis_instruction": instruction
},
"safety_assessment": {
"risk_level": risk_level,
"risk_reason": risk_reason,
"violation_details": risk_details[:5], # 最多显示5个违规详情
"total_violations": safety_result.get("total_violations", 0),
"high_risk_count": len(safety_result.get("high_risk", {}).get("violations", [])),
"medium_risk_count": len(safety_result.get("medium_risk", {}).get("violations", [])),
"low_risk_count": len(safety_result.get("low_risk", {}).get("violations", []))
},
"metadata": {
"analysis_time_seconds": round(analysis_time, 2),
"timestamp": datetime.now().isoformat(),
"has_subtitles": bool(subtitle_text),
"combined_content_length": len(combined_content)
}
}
return result
def format_result_report(self, result):
"""格式化输出分析报告"""
if "error" in result:
print(f"\n❌ 分析失败: {result['error']}")
return
print("\n" + "="*80)
print("📋 智能视频内容安全分析报告")
print("="*80)
# 视频分析部分
video_analysis = result["video_analysis"]
print(f"🎬 视频路径: {video_analysis['video_path']}")
print(f"📝 内容描述: {video_analysis['content_description']}")
print(f"🎤 字幕内容: {video_analysis['subtitle_content'][:100]}...")
# 安全评估部分
safety = result["safety_assessment"]
risk_level = safety["risk_level"]
# 根据风险等级使用不同颜色标识
risk_emoji = {
"P0": "🚨", # 高危
"P1": "⚠️", # 中危
"P2": "⚡", # 低危
"P3": "✅", # 合规
"ERROR": "❌"
}
print(f"\n{risk_emoji.get(risk_level, '❓')} 风险等级: {risk_level}")
print(f"📋 风险原因: {safety['risk_reason']}")
print(f"📊 违规统计: 总计{safety['total_violations']}项 (高危{safety['high_risk_count']} | 中危{safety['medium_risk_count']} | 低危{safety['low_risk_count']})")
# 违规详情
if safety["violation_details"]:
print(f"\n🔍 主要违规详情:")
for i, violation in enumerate(safety["violation_details"], 1):
print(f" {i}. {violation.get('category', 'N/A')}: {violation.get('description', 'N/A')}")
# 元数据
metadata = result["metadata"]
print(f"\n⏱️ 分析耗时: {metadata['analysis_time_seconds']}秒")
print(f"📅 分析时间: {metadata['timestamp']}")
print(f"💾 内容长度: {metadata['combined_content_length']}字符")
print("="*80)
def main():
parser = argparse.ArgumentParser(description="视频内容安全检测器 - 集成MiniGPT4-Video和巨量引擎规则")
parser.add_argument("--video_path", type=str, required=True, help="视频文件路径")
parser.add_argument("--question", type=str, default="请详细描述这个视频的内容,包括场景、人物、对话和主要活动", help="分析指令")
parser.add_argument("--add_subtitles", action='store_true', help="是否生成和分析字幕")
parser.add_argument("--output_json", type=str, help="输出JSON结果到文件")
parser.add_argument("--quiet", action='store_true', help="静默模式,只输出最终结果")
args = parser.parse_args()
# 检查视频文件是否存在
if not os.path.exists(args.video_path):
print(f"❌ 错误: 视频文件不存在 - {args.video_path}")
sys.exit(1)
# 初始化检测器
if not args.quiet:
print("🚀 初始化视频内容安全检测器...")
checker = VideoContentSafetyChecker()
# 执行分析
result = checker.analyze_video_with_safety_check(
video_path=args.video_path,
instruction=args.question,
gen_subtitles=args.add_subtitles
)
# 输出结果
if args.quiet:
# 静默模式,只输出关键信息
if "error" in result:
print(f"ERROR: {result['error']}")
else:
safety = result["safety_assessment"]
print(f"RISK_LEVEL: {safety['risk_level']}")
print(f"RISK_REASON: {safety['risk_reason']}")
else:
# 完整报告模式
checker.format_result_report(result)
# 保存JSON结果
if args.output_json:
try:
with open(args.output_json, 'w', encoding='utf-8') as f:
json.dump(result, f, ensure_ascii=False, indent=2)
print(f"💾 结果已保存到: {args.output_json}")
except Exception as e:
print(f"❌ 保存失败: {e}")
if __name__ == "__main__":
main() |