from transformers import BlipForQuestionAnswering
from transformers.utils import logging
from transformers import AutoProcessor
import gradio as gr
import torch
import requests
from PIL import Image
logging.set_verbosity_error()
model = BlipForQuestionAnswering.from_pretrained(
"Salesforce/blip-vqa-base")
processor = AutoProcessor.from_pretrained(
"Salesforce/blip-vqa-base")
def process_image(input_type, image_url, image_upload, question):
if input_type == "URL":
raw_image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
else:
raw_image = image_upload
inputs = processor(raw_image, text=question, return_tensors="pt")
out = model.generate(**inputs)[0]
answer = str(processor.decode(out, skip_special_tokens=True)).capitalize()
print(answer)
answer = (
f"""
Answer to you question about the image
{answer}
"""
)
return answer
def display_image_from_url(image_url):
if image_url:
image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
return image
return None
def toggle_inputs(input_type):
if input_type == "URL":
return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)
else:
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
with gr.Blocks() as demo:
gr.Markdown(
"""
# Question the Image - test & demo app by Srinivas.V..
Paste either URL of an image or upload the image, type-in your question about the image and submit.
""")
input_type = gr.Radio(choices=["URL", "Upload"], label="Input Type")
image_url = gr.Textbox(label="Image URL", visible=False)
url_image = gr.Image(type="pil", label="URL Image", visible=False)
image_upload = gr.Image(type="pil", label="Upload Image", visible=False)
question = gr.Textbox(label="Type the question", visible=False, lines=2)
input_type.change(fn=toggle_inputs, inputs=input_type, outputs=[image_url, url_image, image_upload, question])
image_url.change(fn=display_image_from_url, inputs=image_url, outputs=url_image)
submit_btn = gr.Button("Submit")
processed_image = gr.HTML(label="Answer to your question about the image")
submit_btn.click(fn=process_image, inputs=[input_type, image_url, image_upload, question], outputs=processed_image)
demo.launch(debug=True, share=True)