import gradio as gr from huggingface_hub import login import os hf_token = os.getenv("HuggingFaceApiKey") if hf_token: login(token=hf_token) # Load Processor from transformers import AutoProcessor model_id = "google/paligemma-3b-pt-224" processor = AutoProcessor.from_pretrained(model_id) from transformers import PaliGemmaForConditionalGeneration model_id = "dmusingu/PaliGemma-CXR" model = PaliGemmaForConditionalGeneration.from_pretrained(model_id) def answer_question(image, question): # Process the image and question inputs = processor(images=image, text=question, return_tensors="pt", padding=True) # Perform the inference outputs = model.generate(**inputs, max_new_tokens= 50)[0] outputs = processor.decode(outputs[inputs["input_ids"].shape[1]:], skip_special_tokens = True) return outputs # Define the Gradio interface iface = gr.Interface( fn=answer_question, inputs=[gr.Image(type="pil"), gr.Textbox(label="Question")], outputs=gr.Textbox(label="Answer"), title="PaliGemma-CXR: Report Generation, VQA, Object detection, Segmentation, Classification", description="Upload an image of a chest X-ray and ask a question and the model will answer." ) iface.launch()