from transformers import BlipForQuestionAnswering from transformers.utils import logging from transformers import AutoProcessor import gradio as gr import torch import requests from PIL import Image logging.set_verbosity_error() model = BlipForQuestionAnswering.from_pretrained( "Salesforce/blip-vqa-base") processor = AutoProcessor.from_pretrained( "Salesforce/blip-vqa-base") def process_image(input_type, image_url, image_upload, question): if input_type == "URL": raw_image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB') else: raw_image = image_upload inputs = processor(raw_image, text=question, return_tensors="pt") out = model.generate(**inputs)[0] answer = str(processor.decode(out, skip_special_tokens=True)).capitalize() print(answer) answer = ( f"""

Answer to you question about the image


{answer}

""" ) return answer def display_image_from_url(image_url): if image_url: image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB') return image return None def toggle_inputs(input_type): if input_type == "URL": return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=True) else: return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True) with gr.Blocks() as demo: gr.Markdown( """ # Question the Image - test & demo app by Srinivas.V.. Paste either URL of an image or upload the image, type-in your question about the image and submit. """) input_type = gr.Radio(choices=["URL", "Upload"], label="Input Type") image_url = gr.Textbox(label="Image URL", visible=False) url_image = gr.Image(type="pil", label="URL Image", visible=False) image_upload = gr.Image(type="pil", label="Upload Image", visible=False) question = gr.Textbox(label="Type the question", visible=False, lines=2) input_type.change(fn=toggle_inputs, inputs=input_type, outputs=[image_url, url_image, image_upload, question]) image_url.change(fn=display_image_from_url, inputs=image_url, outputs=url_image) submit_btn = gr.Button("Submit") processed_image = gr.HTML(label="Answer to your question about the image") submit_btn.click(fn=process_image, inputs=[input_type, image_url, image_upload, question], outputs=processed_image) demo.launch(debug=True, share=True)