Spaces:

anmoldograpsl
/

test_space

Sleeping

App Files Files Community

anmoldograpsl commited on Oct 14, 2024

Commit

dab33d1

verified ·

1 Parent(s): 3605bde

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -13

app.py CHANGED Viewed

@@ -1,32 +1,50 @@
 import os
 import gradio as gr
 from transformers import BlipProcessor, BlipForConditionalGeneration
 from huggingface_hub import login
-from PIL import Image
-# Step 1: Authenticate with Hugging Face using your token
 hf_token = os.getenv("HF_TOKEN")
-login(token=hf_token)  # Paste your token here
-# Step 2: Load the processor and the private model
-model_name = "anushettypsl/paligemma_vqav2"  # Replace with actual model link
 processor = BlipProcessor.from_pretrained(model_name)
-model = BlipForConditionalGeneration.from_pretrained(model_name)
-# Step 3: Define the prediction function
 def predict(image):
-    inputs = processor(image, return_tensors="pt")
-    outputs = model.generate(**inputs)
-    generated_text = processor.decode(outputs[0], skip_special_tokens=True)
     return generated_text
-# Step 4: Create the Gradio interface
 interface = gr.Interface(
     fn=predict,
     inputs=gr.Image(type="pil"),  # Image input
     outputs="text",               # Text output
-    title="Image-to-Text Model"
 )
-# Step 5: Launch the app
 interface.launch()

 import os
 import gradio as gr
+from PIL import Image
 from transformers import BlipProcessor, BlipForConditionalGeneration
+from peft import get_peft_model, LoraConfig, TaskType
 from huggingface_hub import login
+# Step 1: Log in to Hugging Face
 hf_token = os.getenv("HF_TOKEN")
+login(token=hf_token)
+# Step 2: Load the private model and processor
+model_name = "your-friend/private-model-link"  # Replace with the actual model link
 processor = BlipProcessor.from_pretrained(model_name)
+base_model = BlipForConditionalGeneration.from_pretrained(model_name)
+# Step 3: Set up PEFT configuration (if needed)
+lora_config = LoraConfig(
+    r=16,  # Rank
+    lora_alpha=32,  # Scaling factor
+    lora_dropout=0.1,  # Dropout
+    task_type=TaskType.VISUAL_QUESTION_ANSWERING,  # Adjust according to your model's task
+)
+# Step 4: Get the PEFT model
+peft_model = get_peft_model(base_model, lora_config)
+# Step 5: Define the prediction function
 def predict(image):
+    # Preprocess the image
+    image = processor(image, return_tensors="pt").pixel_values
+    # Generate output using the model
+    with torch.no_grad():
+        output = peft_model.generate(image)
+    # Decode the output to text
+    generated_text = processor.decode(output[0], skip_special_tokens=True)
     return generated_text
+# Step 6: Create the Gradio interface
 interface = gr.Interface(
     fn=predict,
     inputs=gr.Image(type="pil"),  # Image input
     outputs="text",               # Text output
+    title="Image-to-Text Model",
+    description="Upload an image to generate a descriptive text."
 )
+# Step 7: Launch the Gradio app
 interface.launch()