anmoldograpsl commited on
Commit
dab33d1
·
verified ·
1 Parent(s): 3605bde

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -13
app.py CHANGED
@@ -1,32 +1,50 @@
 
1
  import os
2
  import gradio as gr
 
3
  from transformers import BlipProcessor, BlipForConditionalGeneration
 
4
  from huggingface_hub import login
5
- from PIL import Image
6
 
7
- # Step 1: Authenticate with Hugging Face using your token
8
  hf_token = os.getenv("HF_TOKEN")
9
- login(token=hf_token) # Paste your token here
10
 
11
- # Step 2: Load the processor and the private model
12
- model_name = "anushettypsl/paligemma_vqav2" # Replace with actual model link
13
  processor = BlipProcessor.from_pretrained(model_name)
14
- model = BlipForConditionalGeneration.from_pretrained(model_name)
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- # Step 3: Define the prediction function
17
  def predict(image):
18
- inputs = processor(image, return_tensors="pt")
19
- outputs = model.generate(**inputs)
20
- generated_text = processor.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
21
  return generated_text
22
 
23
- # Step 4: Create the Gradio interface
24
  interface = gr.Interface(
25
  fn=predict,
26
  inputs=gr.Image(type="pil"), # Image input
27
  outputs="text", # Text output
28
- title="Image-to-Text Model"
 
29
  )
30
 
31
- # Step 5: Launch the app
32
  interface.launch()
 
1
+
2
  import os
3
  import gradio as gr
4
+ from PIL import Image
5
  from transformers import BlipProcessor, BlipForConditionalGeneration
6
+ from peft import get_peft_model, LoraConfig, TaskType
7
  from huggingface_hub import login
 
8
 
9
+ # Step 1: Log in to Hugging Face
10
  hf_token = os.getenv("HF_TOKEN")
11
+ login(token=hf_token)
12
 
13
+ # Step 2: Load the private model and processor
14
+ model_name = "your-friend/private-model-link" # Replace with the actual model link
15
  processor = BlipProcessor.from_pretrained(model_name)
16
+ base_model = BlipForConditionalGeneration.from_pretrained(model_name)
17
+
18
+ # Step 3: Set up PEFT configuration (if needed)
19
+ lora_config = LoraConfig(
20
+ r=16, # Rank
21
+ lora_alpha=32, # Scaling factor
22
+ lora_dropout=0.1, # Dropout
23
+ task_type=TaskType.VISUAL_QUESTION_ANSWERING, # Adjust according to your model's task
24
+ )
25
+
26
+ # Step 4: Get the PEFT model
27
+ peft_model = get_peft_model(base_model, lora_config)
28
 
29
+ # Step 5: Define the prediction function
30
  def predict(image):
31
+ # Preprocess the image
32
+ image = processor(image, return_tensors="pt").pixel_values
33
+ # Generate output using the model
34
+ with torch.no_grad():
35
+ output = peft_model.generate(image)
36
+ # Decode the output to text
37
+ generated_text = processor.decode(output[0], skip_special_tokens=True)
38
  return generated_text
39
 
40
+ # Step 6: Create the Gradio interface
41
  interface = gr.Interface(
42
  fn=predict,
43
  inputs=gr.Image(type="pil"), # Image input
44
  outputs="text", # Text output
45
+ title="Image-to-Text Model",
46
+ description="Upload an image to generate a descriptive text."
47
  )
48
 
49
+ # Step 7: Launch the Gradio app
50
  interface.launch()