File size: 6,701 Bytes
23c2ce9
 
 
 
 
 
 
 
 
 
6248b37
23c2ce9
6248b37
c100878
23c2ce9
 
 
 
 
 
 
 
eacf903
23c2ce9
da02406
23c2ce9
 
da02406
23c2ce9
 
 
 
 
 
6248b37
 
 
 
 
 
 
 
23c2ce9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6248b37
23c2ce9
6248b37
 
23c2ce9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6248b37
 
 
 
23c2ce9
 
 
 
 
 
 
 
6248b37
 
23c2ce9
 
6248b37
23c2ce9
6248b37
23c2ce9
6248b37
23c2ce9
6248b37
 
 
 
 
 
23c2ce9
 
 
 
 
6248b37
 
23c2ce9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6248b37
23c2ce9
 
6248b37
23c2ce9
 
 
6248b37
23c2ce9
 
6248b37
23c2ce9
 
6248b37
23c2ce9
 
 
 
6248b37
 
 
 
 
 
 
 
 
 
 
 
23c2ce9
 
 
6248b37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import os
import json
import streamlit as st
from PIL import Image, UnidentifiedImageError, ExifTags
import requests
from io import BytesIO
import wikipedia
from BharatCaptioner import identify_landmark
from groq import Groq
import hashlib
import time  # To simulate character-by-character display

# Initialize Groq API client
os.environ["GROQ_API_KEY"] = "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"
client = Groq()

st.title("BharatCaptioner with Conversational Chatbot")
st.write(
    "A tool to identify/describe Indian Landmarks in Indic Languages and chat about the image."
)

# Sidebar details
st.sidebar.title("Developed by Harsh Sanga")
st.sidebar.write(
    "**For the Code**: [GitHub Repo](https://github.com/h-sanga)"
)
st.sidebar.write(
    "**Connect with me**: [LinkedIn](https://www.linkedin.com/in/harsh-sanga-2375a9272/)"
)

# Image upload or URL input
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
url = st.text_input("Or enter a valid image URL...")

# Initialize session state variables
if "image_hash" not in st.session_state:
    st.session_state["image_hash"] = None
if "chat_history" not in st.session_state:
    st.session_state["chat_history"] = []
if "chatbot_started" not in st.session_state:
    st.session_state["chatbot_started"] = False

image = None
error_message = None
landmark = None
summary = None
caption = None

# Function to correct image orientation
def correct_image_orientation(img):
    try:
        for orientation in ExifTags.TAGS.keys():
            if ExifTags.TAGS[orientation] == "Orientation":
                break
        exif = img._getexif()
        if exif is not None:
            orientation = exif[orientation]
            if orientation == 3:
                img = img.rotate(180, expand=True)
            elif orientation == 6:
                img = img.rotate(270, expand=True)
            elif orientation == 8:
                img = img.rotate(90, expand=True)
    except (AttributeError, KeyError, IndexError):
        pass
    return img

# Function to get a unique hash for the image
def get_image_hash(image):
    img_bytes = image.tobytes()
    return hashlib.md5(img_bytes).hexdigest()

# Check if new image or URL is uploaded and reset the chat history if necessary
def reset_chat_if_new_image():
    global image, landmark, summary, caption
    new_image_hash = None

    # Process the new image or URL
    if uploaded_file:
        image = Image.open(uploaded_file)
        image = correct_image_orientation(image)
        new_image_hash = get_image_hash(image)
    elif url:
        try:
            response = requests.get(url)
            response.raise_for_status()
            image = Image.open(BytesIO(response.content))
            image = correct_image_orientation(image)
            new_image_hash = get_image_hash(image)
        except (requests.exceptions.RequestException, UnidentifiedImageError):
            image = None
            new_image_hash = None
            error_message = (
                "Error: The provided URL is invalid or the image could not be loaded."
            )
            st.error(error_message)
    else:
        image = None

    # If the image is new, reset the chat and session state
    if new_image_hash and new_image_hash != st.session_state["image_hash"]:
        st.session_state["image_hash"] = new_image_hash
        st.session_state["chat_history"] = []
        st.session_state["chatbot_started"] = False  # Reset chatbot status

    return image

# Call the reset function to check for new images or URL
image = reset_chat_if_new_image()

# If an image is provided
if image is not None:
    # Keep the original image size for processing
    original_image = image.copy()  # Create a copy for identification

    # Identify the landmark using BharatCaptioner
    landmark, prob = identify_landmark(original_image)
    summary = wikipedia.summary(landmark, sentences=3)  # Shortened summary
    st.write(f"**Landmark Identified:** {landmark}")

    # Display a smaller version of the image in the sidebar
    with st.sidebar:
        small_image = original_image.resize((128, 128))  # Resize for display
        st.image(small_image, caption=f"Landmark: {landmark}", use_column_width=True)
        # st.write(f"**Landmark:** {landmark}")

    # Display the original image before the conversation
    st.image(original_image, caption=f"Image of {landmark}", use_column_width=True)

    # Chatbot functionality
    st.write("### Chat with the Chatbot about the Image")
    caption = f"The landmark in the image is {landmark}. {summary}"

    # Chatbot introduction message
    if not st.session_state["chatbot_started"]:
        chatbot_intro = f"Hello! I see the image is of **{landmark}**. {summary} **Would you like to know more** about this landmark?"
        st.session_state["chat_history"].append(
            {"role": "assistant", "content": chatbot_intro}
        )
        st.session_state["chatbot_started"] = True

    # Display chat history
    for message in st.session_state.chat_history:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

    # User input
    user_prompt = st.chat_input("Ask the Chatbot about the image...")

    if user_prompt:
        st.session_state["chat_history"].append({"role": "user", "content": user_prompt})
        st.chat_message("user").markdown(user_prompt)

        # Send the user's message to the chatbot
        messages = [
            {
                "role": "system",
                "content": "You are a helpful image conversational assistant, specialized in explaining about the monuments/landmarks of india. Give answer in points and in detail but dont hallucinate."
                + f"The caption of the image is: {caption}",
            },
            *st.session_state["chat_history"],
        ]

        # Simulate character-by-character response
        response = client.chat.completions.create(
            model="llama-3.1-8b-instant", messages=messages
        )
        assistant_response = response.choices[0].message.content

        # Character-by-character output simulation
        with st.chat_message("assistant"):
            response_container = st.empty()  # Placeholder for response
            response_text = ""
            for char in assistant_response:
                response_text += char
                time.sleep(0.005)  # Adjust speed of character display
                response_container.markdown(response_text)

        # Append full response after display
        st.session_state["chat_history"].append(
            {"role": "assistant", "content": assistant_response}
        )