Spaces:
Sleeping
Sleeping
File size: 6,065 Bytes
36b757c d654351 36b757c 81db3fd ffb829e 36b757c b73c970 d6c8e75 36b757c d654351 36b757c ce78321 ffb829e ce78321 2d145da ce78321 ffb829e ce78321 2d145da ecc5376 2d145da ecc5376 2d145da ecc5376 2d145da ecc5376 2d145da ecc5376 2d145da 32e7c60 2d145da 32e7c60 2d145da 32e7c60 2d145da 32e7c60 5d15a12 32e7c60 2d145da a19c964 2d145da 32e7c60 a19c964 32e7c60 d6c8e75 2d145da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForPreTraining
import gradio as gr
import json
import traceback
import os
import re
model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
token = os.getenv("HUGGINGFACE_TOKEN").strip()
processor = AutoProcessor.from_pretrained(model_name, token=token)
model = AutoModelForPreTraining.from_pretrained(
model_name,
quantization_config={"load_in_4bit": True},
token=token
)
if torch.cuda.is_available():
model = model.to('cuda')
def analyze_image(image, prompt):
messages = [
{"role": "user", "content": [
{"type": "image"},
{"type": "text", "text": prompt}
]}
]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(
image,
input_text,
add_special_tokens=False,
return_tensors="pt"
).to(model.device)
with torch.no_grad():
output = model.generate(**inputs, max_new_tokens=100)
full_response = processor.decode(output[0])
try:
# Find all JSON-like structures in the response
json_matches = list(re.finditer(r'\{.*?\}', full_response, re.DOTALL))
if json_matches:
# Take the last match
last_json_str = json_matches[-1].group(0)
try:
processed_json = json.loads(last_json_str)
except json.JSONDecodeError as e:
processed_json = {"error": f"Invalid JSON in model output: {e}", "full_response": full_response}
else:
processed_json = {"error": "No JSON found in model output", "full_response": full_response}
except Exception as e:
processed_json = {"error": str(e), "full_response": full_response}
return full_response, processed_json
default_prompt = """Analyze this image and determine if it contains a data logger. A data logger is typically a small, black electronic device used to monitor and record data over time, such as voltage, temperature, or current, via external sensors.
Carefully examine the image and provide a detailed response. If a data logger is present in the image, respond with:
{"present": true, "reason": "Detailed explanation of why you believe it's a data logger, including specific visual cues you've identified"}
If no data logger is visible, respond with:
{"present": false, "reason": "Detailed explanation of why you believe there's no data logger, describing what you see instead"}
Ensure your response is in valid JSON format """
iface = gr.Interface(
fn=analyze_image,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Textbox(label="Prompt", value=default_prompt, lines=10)
],
outputs=[
gr.Textbox(label="Full Response", lines=10),
gr.JSON(label="Processed JSON")
],
title="Llama 3.2 Vision",
cache_examples=False,
description=" ",
examples=[
["bad.png", default_prompt]
]
)
iface.launch()
# import torch
# from PIL import Image
# from transformers import AutoProcessor, AutoModelForPreTraining
# import gradio as gr
# import json
# import traceback
# import os
# import re
# model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
# token = os.getenv("HUGGINGFACE_TOKEN").strip()
# processor = AutoProcessor.from_pretrained(model_name, token=token)
# model = AutoModelForPreTraining.from_pretrained(
# model_name,
# quantization_config={"load_in_4bit": True},
# token=token
# )
# if torch.cuda.is_available():
# model = model.to('cuda')
# def analyze_image(image, prompt):
# messages = [
# {"role": "user", "content": [
# {"type": "image"},
# {"type": "text", "text": prompt}
# ]}
# ]
# input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
# inputs = processor(
# image,
# input_text,
# add_special_tokens=False,
# return_tensors="pt"
# ).to(model.device)
# with torch.no_grad():
# output = model.generate(**inputs, max_new_tokens=100)
# full_response = processor.decode(output[0])
# print("Full response:", full_response) # Debug print
# # return full_response
# try:
# json_match = re.search(r'\{.*?\}', full_response, re.DOTALL)
# if json_match:
# json_str = json_match.group(0)
# try:
# return json.loads(json_str)
# except json.JSONDecodeError as e:
# print(f"JSON decode error: {e}")
# return {"error": "Invalid JSON in model output", "full_response": full_response}
# else:
# return {"error": "No JSON found in model output", "full_response": full_response}
# except Exception as e:
# print(f"Error in analyze_image: {e}")
# return {"Full Response": str(e), "full_response": full_response}
# default_prompt = """Analyze this image and determine if it contains a data logger.
# A data logger is typically a small, black electronic device used to monitor and record data
# over time, such as voltage, temperature, or current, via external sensors.
# If a data logger is present in the image, respond with:
# {"present": true, "reason": "Brief explanation of why you believe it's a data logger"}
# If no data logger is visible, respond with:
# {"present": false, "reason": "Brief explanation of why you believe there's no data logger"}
# Ensure your response is in valid JSON format."""
# iface = gr.Interface(
# fn=analyze_image,
# inputs=[
# gr.Image(type="pil", label="Upload Image"),
# gr.Textbox(label="Prompt", value=default_prompt, lines=10)
# ],
# outputs=gr.JSON(label="Analysis Result"),
# title="Data Logger Detection using Llama 3.2 Vision",
# description="Upload an image and customize the prompt to check if it contains a data logger.",
# examples=[
# ["bad.png", default_prompt]
# ]
# )
# iface.launch() |