Spaces:
Sleeping
Sleeping
File size: 3,113 Bytes
1575964 0ea0ad2 8739f35 1575964 8739f35 0ea0ad2 8739f35 1575964 8739f35 1575964 8739f35 1575964 8739f35 1575964 8739f35 0ea0ad2 1575964 8739f35 1575964 8739f35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import streamlit as st
from transformers import pipeline
from PIL import Image, ImageDraw
import io
# -------------------------
# Load models once (cached)
# -------------------------
@st.cache_resource
def load_pipelines():
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
detector = pipeline("object-detection", model="facebook/detr-resnet-50")
vqa = pipeline("visual-question-answering", model="Salesforce/blip-vqa-base")
return captioner, detector, vqa
captioner, detector, vqa = load_pipelines()
# -------------------------
# Streamlit UI
# -------------------------
st.set_page_config(page_title="Vision Chatbot+", page_icon="πΌοΈ")
st.title("πΌοΈ Vision Chatbot+")
st.write("Upload an image to get **captions, emojis, object detection (with boxes!), and Q&A**")
uploaded_file = st.file_uploader("Upload an image...", type=["jpg", "jpeg", "png"])
if uploaded_file:
image = Image.open(uploaded_file).convert("RGB")
st.image(image, caption="Uploaded Image", use_container_width=True)
# ---- Captioning ----
with st.spinner("Generating caption..."):
caption = captioner(image)[0]["generated_text"]
st.subheader("π Caption")
st.success(caption)
# ---- Emoji Mode ----
emoji_map = {
"dog": "πΆ", "cat": "π±", "ball": "β½", "frisbee": "π₯",
"man": "π¨", "woman": "π©", "child": "π§",
"car": "π", "bicycle": "π²", "horse": "π", "bird": "π¦",
"food": "π", "drink": "π₯€", "tree": "π³"
}
emoji_caption = " ".join(
emoji_map.get(word.lower(), word) for word in caption.split()
)
st.subheader("π Emoji Mode")
st.info(emoji_caption)
# ---- Object Detection ----
with st.spinner("Detecting objects..."):
detections = detector(image)
st.subheader("π Objects Detected")
# Draw bounding boxes
draw_img = image.copy()
draw = ImageDraw.Draw(draw_img)
for obj in detections:
box = obj["box"]
label = f"{obj['label']} ({obj['score']:.2f})"
# Draw rectangle
draw.rectangle(
[(box["xmin"], box["ymin"]), (box["xmax"], box["ymax"])],
outline="red", width=3
)
# Add label above box
draw.text((box["xmin"], box["ymin"] - 10), label, fill="red")
st.write(f"- {label}")
st.image(draw_img, caption="Objects with bounding boxes", use_container_width=True)
# ---- Download Button ----
buf = io.BytesIO()
draw_img.save(buf, format="PNG")
byte_im = buf.getvalue()
st.download_button(
label="π₯ Download Annotated Image",
data=byte_im,
file_name="annotated_image.png",
mime="image/png"
)
# ---- Visual Question Answering ----
st.subheader("β Ask a Question About the Image")
user_q = st.text_input("Type your question (e.g., 'What is the dog doing?')")
if user_q:
with st.spinner("Thinking..."):
answer = vqa({"question": user_q, "image": image})
st.success(f"**Answer:** {answer[0]['answer']}")
|