import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForPreTraining
import gradio as gr
import json
import traceback
import os
import re

model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
token = os.getenv("HUGGINGFACE_TOKEN").strip()

processor = AutoProcessor.from_pretrained(model_name, token=token)
model = AutoModelForPreTraining.from_pretrained(
    model_name,
    quantization_config={"load_in_4bit": True},
    token=token
)

if torch.cuda.is_available():
    model = model.to('cuda')

def analyze_image(image, prompt):
    messages = [
        {"role": "user", "content": [
            {"type": "image"},
            {"type": "text", "text": prompt}
        ]}
    ]
    input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
    inputs = processor(
        image,
        input_text,
        add_special_tokens=False,
        return_tensors="pt"
    ).to(model.device)
    
    with torch.no_grad():
        output = model.generate(**inputs, max_new_tokens=100)
    
    full_response = processor.decode(output[0])
    
    try:
        # Find all JSON-like structures in the response
        json_matches = list(re.finditer(r'\{.*?\}', full_response, re.DOTALL))
        
        if json_matches:
            # Take the last match
            last_json_str = json_matches[-1].group(0)
            try:
                processed_json = json.loads(last_json_str)
            except json.JSONDecodeError as e:
                processed_json = {"error": f"Invalid JSON in model output: {e}", "full_response": full_response}
        else:
            processed_json = {"error": "No JSON found in model output", "full_response": full_response}
    except Exception as e:
        processed_json = {"error": str(e), "full_response": full_response}
    
    return full_response, processed_json

default_prompt = """Analyze this image and determine if it contains a data logger. A data logger is typically a small, black electronic device used to monitor and record data over time, such as voltage, temperature, or current, via external sensors.

Carefully examine the image and provide a detailed response. If a data logger is present in the image, respond with:
{"present": true, "reason": "Detailed explanation of why you believe it's a data logger, including specific visual cues you've identified"}

If no data logger is visible, respond with:
{"present": false, "reason": "Detailed explanation of why you believe there's no data logger, describing what you see instead"}

Ensure your response is in valid JSON format """

iface = gr.Interface(
    fn=analyze_image,
    inputs=[
        gr.Image(type="pil", label="Upload Image"),
        gr.Textbox(label="Prompt", value=default_prompt, lines=10)
    ],
    outputs=[
        gr.Textbox(label="Full Response", lines=10),
        gr.JSON(label="Processed JSON")
    ],
    title="Llama 3.2 Vision",
    cache_examples=False,
    description=" ",
    examples=[
        ["bad.png", default_prompt]  
    ]
)

iface.launch()

# import torch
# from PIL import Image
# from transformers import AutoProcessor, AutoModelForPreTraining
# import gradio as gr
# import json
# import traceback
# import os
# import re

# model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
# token = os.getenv("HUGGINGFACE_TOKEN").strip()

# processor = AutoProcessor.from_pretrained(model_name, token=token)
# model = AutoModelForPreTraining.from_pretrained(
#     model_name,
#     quantization_config={"load_in_4bit": True},
#     token=token
# )

# if torch.cuda.is_available():
#     model = model.to('cuda')

# def analyze_image(image, prompt):
#     messages = [
#         {"role": "user", "content": [
#             {"type": "image"},
#             {"type": "text", "text": prompt}
#         ]}
#     ]
    
#     input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
#     inputs = processor(
#         image,
#         input_text,
#         add_special_tokens=False,
#         return_tensors="pt"
#     ).to(model.device)

#     with torch.no_grad():
#         output = model.generate(**inputs, max_new_tokens=100)
    
#     full_response = processor.decode(output[0])
#     print("Full response:", full_response)  # Debug print

#     # return full_response
#     try:
#         json_match = re.search(r'\{.*?\}', full_response, re.DOTALL)
#         if json_match:
#             json_str = json_match.group(0)
#             try:
#                 return json.loads(json_str)
#             except json.JSONDecodeError as e:
#                 print(f"JSON decode error: {e}")
#                 return {"error": "Invalid JSON in model output", "full_response": full_response}
#         else:
#             return {"error": "No JSON found in model output", "full_response": full_response}
#     except Exception as e:
#         print(f"Error in analyze_image: {e}")
#         return {"Full Response": str(e), "full_response": full_response}


# default_prompt = """Analyze this image and determine if it contains a data logger. 
# A data logger is typically a small, black electronic device used to monitor and record data 
# over time, such as voltage, temperature, or current, via external sensors.

# If a data logger is present in the image, respond with:
# {"present": true, "reason": "Brief explanation of why you believe it's a data logger"}

# If no data logger is visible, respond with:
# {"present": false, "reason": "Brief explanation of why you believe there's no data logger"}

# Ensure your response is in valid JSON format."""

# iface = gr.Interface(
#     fn=analyze_image,
#     inputs=[
#         gr.Image(type="pil", label="Upload Image"),
#         gr.Textbox(label="Prompt", value=default_prompt, lines=10)
#     ],
#     outputs=gr.JSON(label="Analysis Result"),
#     title="Data Logger Detection using Llama 3.2 Vision",
#     description="Upload an image and customize the prompt to check if it contains a data logger.",
#     examples=[
#         ["bad.png", default_prompt]
#     ]
# )

# iface.launch()