test_space / app.py
anmoldograpsl's picture
Update app.py
3aec62f verified
raw
history blame
1.62 kB
import os
import gradio as gr
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
from peft import get_peft_model, LoraConfig, TaskType
from huggingface_hub import login
# Step 1: Log in to Hugging Face
hf_token = os.getenv("HF_TOKEN")
login(token=hf_token)
# Step 2: Load the private model and processor
model_name = "anushettypsl/paligemma_vqav2" # Replace with the actual model link
processor = BlipProcessor.from_pretrained(model_name)
base_model = BlipForConditionalGeneration.from_pretrained(model_name)
# Step 3: Set up PEFT configuration (if needed)
lora_config = LoraConfig(
r=16, # Rank
lora_alpha=32, # Scaling factor
lora_dropout=0.1, # Dropout
task_type=TaskType.VISUAL_QUESTION_ANSWERING, # Adjust according to your model's task
)
# Step 4: Get the PEFT model
peft_model = get_peft_model(base_model, lora_config)
# Step 5: Define the prediction function
def predict(image):
# Preprocess the image
image = processor(image, return_tensors="pt").pixel_values
# Generate output using the model
with torch.no_grad():
output = peft_model.generate(image)
# Decode the output to text
generated_text = processor.decode(output[0], skip_special_tokens=True)
return generated_text
# Step 6: Create the Gradio interface
interface = gr.Interface(
fn=predict,
inputs=gr.Image(type="pil"), # Image input
outputs="text", # Text output
title="Image-to-Text Model",
description="Upload an image to generate a descriptive text."
)
# Step 7: Launch the Gradio app
interface.launch()