import torch from PIL import Image from transformers import AutoProcessor, AutoModelForPreTraining import gradio as gr import json import traceback import os import re model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct" token = os.getenv("HUGGINGFACE_TOKEN").strip() processor = AutoProcessor.from_pretrained(model_name, token=token) model = AutoModelForPreTraining.from_pretrained( model_name, quantization_config={"load_in_4bit": True}, token=token ) if torch.cuda.is_available(): model = model.to('cuda') def analyze_image(image, prompt): messages = [ {"role": "user", "content": [ {"type": "image"}, {"type": "text", "text": prompt} ]} ] input_text = processor.apply_chat_template(messages, add_generation_prompt=True) inputs = processor( image, input_text, add_special_tokens=False, return_tensors="pt" ).to(model.device) with torch.no_grad(): output = model.generate(**inputs, max_new_tokens=100) full_response = processor.decode(output[0]) try: # Find all JSON-like structures in the response json_matches = list(re.finditer(r'\{.*?\}', full_response, re.DOTALL)) if json_matches: # Take the last match last_json_str = json_matches[-1].group(0) try: processed_json = json.loads(last_json_str) except json.JSONDecodeError as e: processed_json = {"error": f"Invalid JSON in model output: {e}", "full_response": full_response} else: processed_json = {"error": "No JSON found in model output", "full_response": full_response} except Exception as e: processed_json = {"error": str(e), "full_response": full_response} return full_response, processed_json default_prompt = """Analyze this image and determine if it contains a data logger. A data logger is typically a small, black electronic device used to monitor and record data over time, such as voltage, temperature, or current, via external sensors. Carefully examine the image and provide a detailed response. If a data logger is present in the image, respond with: {"present": true, "reason": "Detailed explanation of why you believe it's a data logger, including specific visual cues you've identified"} If no data logger is visible, respond with: {"present": false, "reason": "Detailed explanation of why you believe there's no data logger, describing what you see instead"} Ensure your response is in valid JSON format """ iface = gr.Interface( fn=analyze_image, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Prompt", value=default_prompt, lines=10) ], outputs=[ gr.Textbox(label="Full Response", lines=10), gr.JSON(label="Processed JSON") ], title="Llama 3.2 Vision", cache_examples=False, description=" ", examples=[ ["bad.png", default_prompt] ] ) iface.launch() # import torch # from PIL import Image # from transformers import AutoProcessor, AutoModelForPreTraining # import gradio as gr # import json # import traceback # import os # import re # model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct" # token = os.getenv("HUGGINGFACE_TOKEN").strip() # processor = AutoProcessor.from_pretrained(model_name, token=token) # model = AutoModelForPreTraining.from_pretrained( # model_name, # quantization_config={"load_in_4bit": True}, # token=token # ) # if torch.cuda.is_available(): # model = model.to('cuda') # def analyze_image(image, prompt): # messages = [ # {"role": "user", "content": [ # {"type": "image"}, # {"type": "text", "text": prompt} # ]} # ] # input_text = processor.apply_chat_template(messages, add_generation_prompt=True) # inputs = processor( # image, # input_text, # add_special_tokens=False, # return_tensors="pt" # ).to(model.device) # with torch.no_grad(): # output = model.generate(**inputs, max_new_tokens=100) # full_response = processor.decode(output[0]) # print("Full response:", full_response) # Debug print # # return full_response # try: # json_match = re.search(r'\{.*?\}', full_response, re.DOTALL) # if json_match: # json_str = json_match.group(0) # try: # return json.loads(json_str) # except json.JSONDecodeError as e: # print(f"JSON decode error: {e}") # return {"error": "Invalid JSON in model output", "full_response": full_response} # else: # return {"error": "No JSON found in model output", "full_response": full_response} # except Exception as e: # print(f"Error in analyze_image: {e}") # return {"Full Response": str(e), "full_response": full_response} # default_prompt = """Analyze this image and determine if it contains a data logger. # A data logger is typically a small, black electronic device used to monitor and record data # over time, such as voltage, temperature, or current, via external sensors. # If a data logger is present in the image, respond with: # {"present": true, "reason": "Brief explanation of why you believe it's a data logger"} # If no data logger is visible, respond with: # {"present": false, "reason": "Brief explanation of why you believe there's no data logger"} # Ensure your response is in valid JSON format.""" # iface = gr.Interface( # fn=analyze_image, # inputs=[ # gr.Image(type="pil", label="Upload Image"), # gr.Textbox(label="Prompt", value=default_prompt, lines=10) # ], # outputs=gr.JSON(label="Analysis Result"), # title="Data Logger Detection using Llama 3.2 Vision", # description="Upload an image and customize the prompt to check if it contains a data logger.", # examples=[ # ["bad.png", default_prompt] # ] # ) # iface.launch()