Spaces:

Harsh1306
/

IndianMonumentCaptioner

Sleeping

File size: 6,701 Bytes

import os
import json
import streamlit as st
from PIL import Image, UnidentifiedImageError, ExifTags
import requests
from io import BytesIO
import wikipedia
from BharatCaptioner import identify_landmark
from groq import Groq
import hashlib
import time  # To simulate character-by-character display

# Initialize Groq API client
os.environ["GROQ_API_KEY"] = "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"
client = Groq()

st.title("BharatCaptioner with Conversational Chatbot")
st.write(
    "A tool to identify/describe Indian Landmarks in Indic Languages and chat about the image."
)

# Sidebar details
st.sidebar.title("Developed by Harsh Sanga")
st.sidebar.write(
    "**For the Code**: [GitHub Repo](https://github.com/h-sanga)"
)
st.sidebar.write(
    "**Connect with me**: [LinkedIn](https://www.linkedin.com/in/harsh-sanga-2375a9272/)"
)

# Image upload or URL input
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
url = st.text_input("Or enter a valid image URL...")

# Initialize session state variables
if "image_hash" not in st.session_state:
    st.session_state["image_hash"] = None
if "chat_history" not in st.session_state:
    st.session_state["chat_history"] = []
if "chatbot_started" not in st.session_state:
    st.session_state["chatbot_started"] = False

image = None
error_message = None
landmark = None
summary = None
caption = None

# Function to correct image orientation
def correct_image_orientation(img):
    try:
        for orientation in ExifTags.TAGS.keys():
            if ExifTags.TAGS[orientation] == "Orientation":
                break
        exif = img._getexif()
        if exif is not None:
            orientation = exif[orientation]
            if orientation == 3:
                img = img.rotate(180, expand=True)
            elif orientation == 6:
                img = img.rotate(270, expand=True)
            elif orientation == 8:
                img = img.rotate(90, expand=True)
    except (AttributeError, KeyError, IndexError):
        pass
    return img

# Function to get a unique hash for the image
def get_image_hash(image):
    img_bytes = image.tobytes()
    return hashlib.md5(img_bytes).hexdigest()

# Check if new image or URL is uploaded and reset the chat history if necessary
def reset_chat_if_new_image():
    global image, landmark, summary, caption
    new_image_hash = None

    # Process the new image or URL
    if uploaded_file:
        image = Image.open(uploaded_file)
        image = correct_image_orientation(image)
        new_image_hash = get_image_hash(image)
    elif url:
        try:
            response = requests.get(url)
            response.raise_for_status()
            image = Image.open(BytesIO(response.content))
            image = correct_image_orientation(image)
            new_image_hash = get_image_hash(image)
        except (requests.exceptions.RequestException, UnidentifiedImageError):
            image = None
            new_image_hash = None
            error_message = (
                "Error: The provided URL is invalid or the image could not be loaded."
            )
            st.error(error_message)
    else:
        image = None

    # If the image is new, reset the chat and session state
    if new_image_hash and new_image_hash != st.session_state["image_hash"]:
        st.session_state["image_hash"] = new_image_hash
        st.session_state["chat_history"] = []
        st.session_state["chatbot_started"] = False  # Reset chatbot status

    return image

# Call the reset function to check for new images or URL
image = reset_chat_if_new_image()

# If an image is provided
if image is not None:
    # Keep the original image size for processing
    original_image = image.copy()  # Create a copy for identification

    # Identify the landmark using BharatCaptioner
    landmark, prob = identify_landmark(original_image)
    summary = wikipedia.summary(landmark, sentences=3)  # Shortened summary
    st.write(f"**Landmark Identified:** {landmark}")

    # Display a smaller version of the image in the sidebar
    with st.sidebar:
        small_image = original_image.resize((128, 128))  # Resize for display
        st.image(small_image, caption=f"Landmark: {landmark}", use_column_width=True)
        # st.write(f"**Landmark:** {landmark}")

    # Display the original image before the conversation
    st.image(original_image, caption=f"Image of {landmark}", use_column_width=True)

    # Chatbot functionality
    st.write("### Chat with the Chatbot about the Image")
    caption = f"The landmark in the image is {landmark}. {summary}"

    # Chatbot introduction message
    if not st.session_state["chatbot_started"]:
        chatbot_intro = f"Hello! I see the image is of **{landmark}**. {summary} **Would you like to know more** about this landmark?"
        st.session_state["chat_history"].append(
            {"role": "assistant", "content": chatbot_intro}
        )
        st.session_state["chatbot_started"] = True

    # Display chat history
    for message in st.session_state.chat_history:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

    # User input
    user_prompt = st.chat_input("Ask the Chatbot about the image...")

    if user_prompt:
        st.session_state["chat_history"].append({"role": "user", "content": user_prompt})
        st.chat_message("user").markdown(user_prompt)

        # Send the user's message to the chatbot
        messages = [
            {
                "role": "system",
                "content": "You are a helpful image conversational assistant, specialized in explaining about the monuments/landmarks of india. Give answer in points and in detail but dont hallucinate."
                + f"The caption of the image is: {caption}",
            },
            *st.session_state["chat_history"],
        ]

        # Simulate character-by-character response
        response = client.chat.completions.create(
            model="llama-3.1-8b-instant", messages=messages
        )
        assistant_response = response.choices[0].message.content

        # Character-by-character output simulation
        with st.chat_message("assistant"):
            response_container = st.empty()  # Placeholder for response
            response_text = ""
            for char in assistant_response:
                response_text += char
                time.sleep(0.005)  # Adjust speed of character display
                response_container.markdown(response_text)

        # Append full response after display
        st.session_state["chat_history"].append(
            {"role": "assistant", "content": assistant_response}
        )