import os import json import streamlit as st from PIL import Image, UnidentifiedImageError, ExifTags import requests from io import BytesIO import wikipedia from BharatCaptioner import identify_landmark from groq import Groq import hashlib import time # To simulate character-by-character display # Initialize Groq API client os.environ["GROQ_API_KEY"] = "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" client = Groq() st.title("BharatCaptioner with Conversational Chatbot") st.write( "A tool to identify/describe Indian Landmarks in Indic Languages and chat about the image." ) # Sidebar details st.sidebar.title("Developed by Harsh Sanga") st.sidebar.write( "**For the Code**: [GitHub Repo](https://github.com/h-sanga)" ) st.sidebar.write( "**Connect with me**: [LinkedIn](https://www.linkedin.com/in/harsh-sanga-2375a9272/)" ) # Image upload or URL input uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) url = st.text_input("Or enter a valid image URL...") # Initialize session state variables if "image_hash" not in st.session_state: st.session_state["image_hash"] = None if "chat_history" not in st.session_state: st.session_state["chat_history"] = [] if "chatbot_started" not in st.session_state: st.session_state["chatbot_started"] = False image = None error_message = None landmark = None summary = None caption = None # Function to correct image orientation def correct_image_orientation(img): try: for orientation in ExifTags.TAGS.keys(): if ExifTags.TAGS[orientation] == "Orientation": break exif = img._getexif() if exif is not None: orientation = exif[orientation] if orientation == 3: img = img.rotate(180, expand=True) elif orientation == 6: img = img.rotate(270, expand=True) elif orientation == 8: img = img.rotate(90, expand=True) except (AttributeError, KeyError, IndexError): pass return img # Function to get a unique hash for the image def get_image_hash(image): img_bytes = image.tobytes() return hashlib.md5(img_bytes).hexdigest() # Check if new image or URL is uploaded and reset the chat history if necessary def reset_chat_if_new_image(): global image, landmark, summary, caption new_image_hash = None # Process the new image or URL if uploaded_file: image = Image.open(uploaded_file) image = correct_image_orientation(image) new_image_hash = get_image_hash(image) elif url: try: response = requests.get(url) response.raise_for_status() image = Image.open(BytesIO(response.content)) image = correct_image_orientation(image) new_image_hash = get_image_hash(image) except (requests.exceptions.RequestException, UnidentifiedImageError): image = None new_image_hash = None error_message = ( "Error: The provided URL is invalid or the image could not be loaded." ) st.error(error_message) else: image = None # If the image is new, reset the chat and session state if new_image_hash and new_image_hash != st.session_state["image_hash"]: st.session_state["image_hash"] = new_image_hash st.session_state["chat_history"] = [] st.session_state["chatbot_started"] = False # Reset chatbot status return image # Call the reset function to check for new images or URL image = reset_chat_if_new_image() # If an image is provided if image is not None: # Keep the original image size for processing original_image = image.copy() # Create a copy for identification # Identify the landmark using BharatCaptioner landmark, prob = identify_landmark(original_image) summary = wikipedia.summary(landmark, sentences=3) # Shortened summary st.write(f"**Landmark Identified:** {landmark}") # Display a smaller version of the image in the sidebar with st.sidebar: small_image = original_image.resize((128, 128)) # Resize for display st.image(small_image, caption=f"Landmark: {landmark}", use_column_width=True) # st.write(f"**Landmark:** {landmark}") # Display the original image before the conversation st.image(original_image, caption=f"Image of {landmark}", use_column_width=True) # Chatbot functionality st.write("### Chat with the Chatbot about the Image") caption = f"The landmark in the image is {landmark}. {summary}" # Chatbot introduction message if not st.session_state["chatbot_started"]: chatbot_intro = f"Hello! I see the image is of **{landmark}**. {summary} **Would you like to know more** about this landmark?" st.session_state["chat_history"].append( {"role": "assistant", "content": chatbot_intro} ) st.session_state["chatbot_started"] = True # Display chat history for message in st.session_state.chat_history: with st.chat_message(message["role"]): st.markdown(message["content"]) # User input user_prompt = st.chat_input("Ask the Chatbot about the image...") if user_prompt: st.session_state["chat_history"].append({"role": "user", "content": user_prompt}) st.chat_message("user").markdown(user_prompt) # Send the user's message to the chatbot messages = [ { "role": "system", "content": "You are a helpful image conversational assistant, specialized in explaining about the monuments/landmarks of india. Give answer in points and in detail but dont hallucinate." + f"The caption of the image is: {caption}", }, *st.session_state["chat_history"], ] # Simulate character-by-character response response = client.chat.completions.create( model="llama-3.1-8b-instant", messages=messages ) assistant_response = response.choices[0].message.content # Character-by-character output simulation with st.chat_message("assistant"): response_container = st.empty() # Placeholder for response response_text = "" for char in assistant_response: response_text += char time.sleep(0.005) # Adjust speed of character display response_container.markdown(response_text) # Append full response after display st.session_state["chat_history"].append( {"role": "assistant", "content": assistant_response} )