Spaces:

anmoldograpsl
/

test_space

Sleeping

test_space / app.py

Update app.py

3aec62f verified 10 months ago

1.62 kB


	import os
	import gradio as gr
	from PIL import Image
	from transformers import BlipProcessor, BlipForConditionalGeneration
	from peft import get_peft_model, LoraConfig, TaskType
	from huggingface_hub import login

	# Step 1: Log in to Hugging Face
	hf_token = os.getenv("HF_TOKEN")
	login(token=hf_token)

	# Step 2: Load the private model and processor
	model_name = "anushettypsl/paligemma_vqav2" # Replace with the actual model link
	processor = BlipProcessor.from_pretrained(model_name)
	base_model = BlipForConditionalGeneration.from_pretrained(model_name)

	# Step 3: Set up PEFT configuration (if needed)
	lora_config = LoraConfig(
	r=16, # Rank
	lora_alpha=32, # Scaling factor
	lora_dropout=0.1, # Dropout
	task_type=TaskType.VISUAL_QUESTION_ANSWERING, # Adjust according to your model's task
	)

	# Step 4: Get the PEFT model
	peft_model = get_peft_model(base_model, lora_config)

	# Step 5: Define the prediction function
	def predict(image):
	# Preprocess the image
	image = processor(image, return_tensors="pt").pixel_values
	# Generate output using the model
	with torch.no_grad():
	output = peft_model.generate(image)
	# Decode the output to text
	generated_text = processor.decode(output[0], skip_special_tokens=True)
	return generated_text

	# Step 6: Create the Gradio interface
	interface = gr.Interface(
	fn=predict,
	inputs=gr.Image(type="pil"), # Image input
	outputs="text", # Text output
	title="Image-to-Text Model",
	description="Upload an image to generate a descriptive text."
	)

	# Step 7: Launch the Gradio app
	interface.launch()