import gradio as gr import os import base64 import json import time os.system("pip install openai") from openai import OpenAI, APITimeoutError, APIError import io import logging # 尝试导入PIL(pillow的实际包名),并处理导入错误 try: from PIL import Image PIL_AVAILABLE = True except ImportError: PIL_AVAILABLE = False logging.warning("未找到pillow库,图片处理功能将不可用") # 配置日志 logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) API_KEY = os.getenv('baidu_api_key') # 初始化OpenAI客户端(替换为你的百度千帆API密钥) client = OpenAI( base_url='https://qianfan.baidubce.com/v2', api_key=API_KEY # 请替换为实际密钥 ) # ---------------------- Base64图片读取函数 ---------------------- def read_base64_from_file(file_path): """从指定文本文件读取Base64编码(处理空文件、路径错误等异常)""" try: # 检查文件是否存在 if not os.path.exists(file_path): logging.warning(f"Base64文件不存在:{file_path}") return None # 读取文件内容(去除空行和空格) with open(file_path, 'r', encoding='utf-8') as f: base64_str = f.read().strip() # 验证Base64有效性(简单检查长度是否为4的倍数) if len(base64_str) % 4 != 0: logging.error(f"{file_path} 中Base64编码无效(长度非4的倍数)") return None return base64_str except Exception as e: logging.error(f"读取{file_path}失败:{str(e)}") return None def base64_to_pil_image(base64_str): """将Base64字符串转换为PIL Image对象(用于Gradio渲染)""" try: if not base64_str: return None # 解码Base64为字节流 image_bytes = base64.b64decode(base64_str) # 转换为PIL Image image = Image.open(io.BytesIO(image_bytes)) return image except Exception as e: logging.error(f"Base64转图片失败:{str(e)}") return None # ---------------------- 核心工具函数 ---------------------- def compress_image(image_path, max_size=(1024, 1024)): """图片压缩处理,增加pillow可用性检查""" if not PIL_AVAILABLE: raise Exception("未安装pillow库,请执行 `pip install pillow` 安装后重试") try: with Image.open(image_path) as img: # 处理透明背景(转为白色背景) if img.mode in ('RGBA', 'LA'): background = Image.new(img.mode[:-1], img.size, (255, 255, 255)) background.paste(img, img.split()[-1]) img = background # 按比例缩小图片(不拉伸) img.thumbnail(max_size, Image.Resampling.LANCZOS) # 保存到内存并转Base64 img_byte_arr = io.BytesIO() img.save(img_byte_arr, format='JPEG', quality=80) return base64.b64encode(img_byte_arr.getvalue()).decode('utf-8') except Exception as e: logger.error(f"图片处理失败: {str(e)}") raise Exception(f"图片处理失败: {str(e)}") def clean_json_response(raw_content): """清理模型返回的响应,移除代码块标记,确保JSON可解析""" # 移除开头的 ```json/```JSON 标记 if raw_content.startswith('```json') or raw_content.startswith('```JSON'): raw_content = raw_content[7:] # 移除结尾的 ``` 标记 if raw_content.endswith('```'): raw_content = raw_content[:-3] # 去除首尾多余空格/换行 return raw_content.strip() def format_analysis_result(result_json): """将JSON结果格式化为结构化的Markdown文本,用于页面展示""" if not isinstance(result_json, dict): return "⚠️ 分析结果格式异常,无法结构化展示" # 初始化Markdown内容 md_content = "# 智能体截图分析报告\n\n" # 1. 页面评分模块 scoring = result_json.get("页面评分", {}) if scoring: md_content += "## 一、页面评分(1-10分)\n" md_content += "| 评价维度 | 得分 | 评价说明 |\n" md_content += "|----------------|------|---------------------------|\n" # 处理每个评分项(匹配 "维度" 和 "维度_comment") score_items = ["overall", "design", "usability", "functionality", "responsiveness"] item_names = { "overall": "整体评价", "design": "设计美感", "usability": "易用性", "functionality": "功能完整性", "responsiveness": "响应式设计" } for item in score_items: score = scoring.get(item, "N/A") comment = scoring.get(f"{item}_comment", "无") md_content += f"| {item_names[item]} | {score} | {comment} |\n" md_content += "\n" # 2. 智能体能力拆解模块 capabilities = result_json.get("智能体能力拆解", {}) if capabilities: md_content += "## 二、智能体能力拆解\n" # 核心功能 core_funcs = capabilities.get("core_functions", []) if core_funcs: md_content += "### 1. 核心功能\n" for i, func in enumerate(core_funcs, 1): md_content += f"- **{i}.** {func}\n" md_content += "\n" # 优势 strengths = capabilities.get("strengths", []) if strengths: md_content += "### 2. 优势\n" for i, strength in enumerate(strengths, 1): md_content += f"- **{i}.** {strength}\n" md_content += "\n" # 劣势 weaknesses = capabilities.get("weaknesses", []) if weaknesses: md_content += "### 3. 劣势\n" for i, weakness in enumerate(weaknesses, 1): md_content += f"- **{i}.** {weakness}\n" md_content += "\n" # 潜在用途 potential_uses = capabilities.get("potential_uses", []) if potential_uses: md_content += "### 4. 潜在用途\n" for i, use in enumerate(potential_uses, 1): md_content += f"- **{i}.** {use}\n" md_content += "\n" # 改进方向 improvement_areas = capabilities.get("improvement_areas", []) if improvement_areas: md_content += "### 5. 改进方向\n" for i, area in enumerate(improvement_areas, 1): md_content += f"- **{i}.** {area}\n" md_content += "\n" # 详细分析 detailed_analysis = capabilities.get("detailed_analysis", "无") if detailed_analysis != "无": md_content += "### 6. 详细分析\n" md_content += f">{detailed_analysis}\n\n" # 若结果不完整,补充提示 if not scoring and not capabilities: md_content += "⚠️ 未获取到有效分析结果,请检查图片内容或重试" return md_content def analyze_image(input_image): """AI分析核心函数:返回【结构化Markdown结果】+【原始JSON】+【状态】""" # 初始化返回值(结构化结果、原始JSON、状态) structured_result = "" raw_json = "" status = "就绪" # 依赖检查 if not PIL_AVAILABLE: status = "❌ 缺少依赖" structured_result = "未安装pillow库,请关闭应用并执行 `pip install pillow` 安装后重试" yield structured_result, raw_json, status return # 图片检查 if not input_image: status = "请先上传图片" structured_result = "请点击左侧「上传截图」区域,选择JPG/PNG格式的智能体网页截图" yield structured_result, raw_json, status return try: # 1. 图片准备阶段 status = "正在准备图片..." structured_result = "🔄 正在读取并准备图片文件..." yield structured_result, raw_json, status time.sleep(0.1) # 2. 图片压缩阶段 status = "正在压缩图片..." structured_result = "🔄 正在压缩图片(确保分析效率,不影响质量)..." yield structured_result, raw_json, status image_base64 = compress_image(input_image) time.sleep(0.1) # 3. 请求发送阶段 status = "正在发送分析请求..." structured_result = "🔄 正在向AI模型发送分析请求(约3-10秒)..." yield structured_result, raw_json, status # 构建AI请求(明确输出格式要求,减少清理工作) messages = [ { "role": "user", "content": [ { "type": "text", "text": "请分析这张智能体网页的截图,并严格按照以下格式返回JSON:\n\ {\n\ \"页面评分\": {\n\ \"overall\": 分数(1-10),\n\ \"overall_comment\": \"简短评价\",\n\ \"design\": 分数(1-10),\n\ \"design_comment\": \"简短评价\",\n\ \"usability\": 分数(1-10),\n\ \"usability_comment\": \"简短评价\",\n\ \"functionality\": 分数(1-10),\n\ \"functionality_comment\": \"简短评价\",\n\ \"responsiveness\": 分数(1-10),\n\ \"responsiveness_comment\": \"简短评价\"\n\ },\n\ \"智能体能力拆解\": {\n\ \"core_functions\": [\"功能1\", \"功能2\", ...],\n\ \"strengths\": [\"优势1\", \"优势2\", ...],\n\ \"weaknesses\": [\"劣势1\", \"劣势2\", ...],\n\ \"potential_uses\": [\"用途1\", \"用途2\", ...],\n\ \"improvement_areas\": [\"改进1\", \"改进2\", ...],\n\ \"detailed_analysis\": \"详细综合分析文本\"\n\ }\n\ }\n\ 【注意】:仅返回纯JSON,不要包含代码块标记(如```json)、解释文本等额外内容!" }, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"} } ] } ] # 4. AI分析阶段 status = "AI正在分析(约3-10秒)..." structured_result = f"🔄 AI正在深度分析图片内容...\n\n当前进度:\n- 图片已上传:✅\n- 模型已接收:✅\n- 分析中:⌛" yield structured_result, raw_json, status # 调用ERNIE-VL模型 response = client.chat.completions.create( model="ernie-4.5-turbo-vl", # 简化模型名,避免版本兼容性问题 messages=messages, temperature=0.2, top_p=0.8, extra_body={"penalty_score": 1}, timeout=30 ) # 5. 结果整理阶段 status = "正在整理分析结果..." structured_result = "🔄 AI分析完成,正在整理结构化报告..." yield structured_result, raw_json, status time.sleep(0.1) # 处理AI响应 raw_content = response.choices[0].message.content cleaned_content = clean_json_response(raw_content) raw_json = json.dumps(json.loads(cleaned_content), ensure_ascii=False, indent=2) # 格式化原始JSON # 生成结构化展示结果 result_json = json.loads(cleaned_content) structured_result = format_analysis_result(result_json) status = "✅ 分析完成!" yield structured_result, raw_json, status # 异常处理(各阶段错误提示) except APITimeoutError: status = "❌ 分析超时" structured_result = "⚠️ API调用超时(超过30秒)\n\n可能原因:\n1. 网络不稳定\n2. 模型负载较高\n建议:检查网络后重试" yield structured_result, raw_json, status except APIError as e: status = "❌ API错误" structured_result = f"⚠️ 百度千帆API错误\n\n错误详情:\n{str(e)}\n\n建议:检查API密钥是否正确,或前往百度智能云控制台确认服务状态" yield structured_result, raw_json, status except json.JSONDecodeError as e: status = "❌ 格式错误" structured_result = f"⚠️ AI返回结果非标准JSON\n\n原始内容:\n{cleaned_content[:500]}...\n\n错误详情:{str(e)}" yield structured_result, cleaned_content, status # 返回原始错误内容 except Exception as e: status = "❌ 分析失败" structured_result = f"⚠️ 分析过程出错\n\n错误详情:\n{str(e)}\n\n建议:检查图片格式(仅支持JPG/PNG)或重试" yield structured_result, raw_json, status # ---------------------- 界面设计 ---------------------- with gr.Blocks(title="智能体截图分析工具", theme=gr.themes.Soft()) as demo: # 存储原始JSON结果(用于下载) raw_json_store = gr.State("") # 预加载Base64图片(启动时读取一次) logo_base64 = read_base64_from_file("logo.txt") # 从logo.txt读取Logo的Base64 demo_base64 = read_base64_from_file("demo.txt") # 从demo.txt读取示例图的Base64 logo_image = base64_to_pil_image(logo_base64) # 转换为PIL Image demo_image = base64_to_pil_image(demo_base64) # 转换为PIL Image # 顶部Logo/标题区(改用Base64图片) with gr.Row(elem_id="logo_row", visible=logo_image is not None): gr.Image( value=logo_image, # 直接使用Base64转换后的PIL Image height=50, width=165, interactive=False, show_label=False, show_download_button=False ) if logo_image is None: gr.Markdown("# 智能体截图分析工具", elem_id="fallback_title") # 功能说明(简洁明了) gr.Markdown("### 功能说明", visible=logo_image is not None) gr.Markdown(""" 上传智能体网页截图(JPG/PNG),系统将自动完成: 1. 页面质量评分(整体评价、设计美感等5个维度) 2. 智能体能力拆解(核心功能、优势、潜在用途等) 3. 生成结构化报告,支持下载原始JSON结果 """, elem_id="description") # 核心交互区域(左侧上传 + 右侧结果) with gr.Row(elem_id="main_content", variant="panel"): # 左侧:图片上传区(简洁布局) with gr.Column(scale=1, elem_id="upload_col"): input_image = gr.Image( type="filepath", label="上传截图", height=300, show_label=True, elem_id="image_upload" ) # 示例图片区域(改用Base64加载的示例图) example_images = [demo_image] if demo_image is not None else [] if example_images: gr.Examples( examples=example_images, inputs=input_image, label="示例截图(点击可直接使用)", elem_id="examples_box" ) analyze_btn = gr.Button( "开始分析", variant="primary", size="lg", elem_id="analyze_btn", interactive=PIL_AVAILABLE # 无pillow时禁用按钮 ) # 依赖提示(无pillow时显示) if not PIL_AVAILABLE: gr.Markdown("⚠️ 未安装pillow库,无法处理图片\n请执行 `pip install pillow` 后重启应用", elem_id="dependency_warning") # 右侧:结果展示区 with gr.Column(scale=2, elem_id="result_col", variant="panel"): # 状态显示(顶部固定) status_display = gr.Textbox( label="当前状态", interactive=False, value="就绪:请上传截图并点击「开始分析」", elem_id="status_box", max_lines=2 ) # 结果标签页(结构化报告 + 原始JSON) with gr.Tabs(elem_id="result_tabs"): # 标签1:结构化报告(默认显示) with gr.Tab("结构化分析报告", elem_id="structured_tab"): structured_result = gr.Markdown( value=""" # 等待分析... ## 操作指引 1. 点击左侧「上传截图」区域,选择智能体网页的JPG/PNG图片 2. 点击「开始分析」按钮,等待3-10秒 3. 分析完成后,此处将显示结构化报告 ## 示例图说明 若左侧有示例图片,可直接点击示例快速测试 """, elem_id="structured_result" ) # 标签2:原始JSON(供开发/调试使用) with gr.Tab("原始JSON结果", elem_id="raw_json_tab"): raw_json_result = gr.Textbox( label=None, lines=20, placeholder="分析完成后,此处将显示格式化的原始JSON结果...", elem_id="raw_json_box", container=True ) # 操作按钮区(下载 + 清除) with gr.Row(elem_id="action_buttons"): download_btn = gr.DownloadButton( "下载JSON结果", label=None, elem_id="download_btn", visible=False # 初始隐藏,分析成功后显示 ) clear_btn = gr.Button( "清除结果", variant="secondary", size="sm", elem_id="clear_btn" ) # ---------------------- 交互逻辑 ---------------------- # 1. 分析按钮:触发分析流程 analyze_btn.click( fn=analyze_image, inputs=input_image, outputs=[structured_result, raw_json_result, status_display] ).then( # 分析完成后:更新原始JSON存储 + 控制下载按钮显示 fn=lambda raw_json, status: ( raw_json, # 更新原始JSON存储 gr.update(visible=status.startswith("✅")) # 成功时显示下载按钮 ), inputs=[raw_json_result, status_display], outputs=[raw_json_store, download_btn] ) # 2. 下载按钮:下载原始JSON结果 download_btn.click( fn=lambda result: (result, "analysis_result.json"), inputs=raw_json_store, outputs=download_btn, show_progress=False ) # 3. 清除按钮:重置所有结果和状态 clear_btn.click( fn=lambda: ( """ # 等待分析... ## 操作指引 1. 点击左侧「上传截图」区域,选择智能体网页的JPG/PNG图片 2. 点击「开始分析」按钮,等待3-10秒 3. 分析完成后,此处将显示结构化报告 ## 示例图说明 若左侧有示例图片,可直接点击示例快速测试 """, # 重置结构化报告 "", # 清空原始JSON "就绪:请上传截图并点击「开始分析」", # 重置状态 gr.update(visible=False), # 隐藏下载按钮 "" # 清空原始JSON存储 ), outputs=[structured_result, raw_json_result, status_display, download_btn, raw_json_store] ) # ---------------------- 样式优化 ---------------------- demo.load( None, None, None, js="""() => { // 结果区整体样式 const resultCol = document.getElementById('result_col'); if (resultCol) { resultCol.style.padding = '20px'; resultCol.style.borderRadius = '8px'; } // 状态框样式和动态颜色 const statusBox = document.getElementById('status_box'); if (statusBox) { statusBox.style.marginBottom = '15px'; statusBox.style.padding = '8px'; // 定时检查状态文本并更新颜色 setInterval(() => { const statusText = statusBox.value || ''; if (typeof statusText === 'string') { if (statusText.startsWith('✅')) statusBox.style.color = '#27ae60'; else if (statusText.startsWith('❌')) statusBox.style.color = '#e74c3c'; else if (statusText.includes('分析中') || statusText.includes('🔄')) statusBox.style.color = '#3498db'; else statusBox.style.color = '#34495e'; } }, 100); } // 按钮区样式 const actionButtons = document.getElementById('action_buttons'); if (actionButtons) { actionButtons.style.marginTop = '15px'; actionButtons.style.gap = '10px'; } // 结构化报告样式优化 const structuredResult = document.getElementById('structured_result'); if (structuredResult) { structuredResult.style.padding = '10px 0'; } }""" ) # ---------------------- 应用启动 ---------------------- if __name__ == "__main__": try: # 依赖检测 required_imports = { "gradio": "gradio", "openai": "openai", "PIL": "pillow", # pillow实际导入的是PIL "requests": "requests" } for import_name, package_name in required_imports.items(): try: __import__(import_name) except ImportError: raise ImportError(package_name) # 启动应用(适配旧版本Gradio) demo.launch( # server_name="0.0.0.0", # server_port=1919, # share=False, # max_threads=4, # quiet=True ) except ImportError as e: logger.critical(f"缺少依赖包:请执行 `pip install {e.args[0]}` 安装") except Exception as e: logger.critical(f"应用启动失败:{str(e)}")