Spaces:

ZunairaHawwar
/

IcodeGuru_Chatbot

Running

App Files Files Community

IcodeGuru_Chatbot / app.py

ZunairaHawwar

Update app.py

ce5efcc verified 4 days ago

raw

history blame contribute delete

25.8 kB

	# app.py - Complete Enhanced ICodeGuru Chatbot
	import os
	import json
	import uuid
	import time
	import base64
	import datetime
	from typing import List, Optional, Dict, Any
	import streamlit as st
	import streamlit.components.v1 as components
	import nest_asyncio
	from dataclasses import dataclass, asdict
	from pathlib import Path

	# LangChain imports (your teammate's backend)
	from langchain.vectorstores import Chroma
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.document_loaders import JSONLoader, DirectoryLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_groq import ChatGroq
	from langchain.chains import RetrievalQA
	from langchain.prompts import PromptTemplate
	from langchain.schema import Document
	from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
	from langchain.memory import ConversationBufferMemory
	from langchain.chains import ConversationalRetrievalChain

	# Enhanced components
	from components import render_response_box, render_enhanced_response_box
	from user_manager import UserManager, UserProfile
	from chat_manager import ChatManager, ChatSession

	# Apply asyncio patch for Streamlit compatibility
	nest_asyncio.apply()

	# ========== Configuration ==========
	GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
	if not GROQ_API_KEY:
	st.error("⚠️ GROQ_API_KEY environment variable is not set!")
	st.stop()

	GROQ_MODEL = "llama3-8b-8192"
	EMBEDDING_MODEL = "all-MiniLM-L6-v2"
	CHROMA_PERSIST_DIR = "./chroma_db"
	DOCS_DIR = "./docs"
	USER_DATA_DIR = "./user_data"
	CHAT_DATA_DIR = "./chat_data"

	# Ensure directories exist
	for directory in [USER_DATA_DIR, CHAT_DATA_DIR, DOCS_DIR]:
	Path(directory).mkdir(exist_ok=True)

	# ========== Page Configuration ==========
	st.set_page_config(
	page_title="ICodeGuru AI Assistant",
	page_icon="🤖",
	layout="centered",
	initial_sidebar_state="expanded"
	)

	# Load CSS with error handling
	try:
	with open("style.css") as f:
	st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
	except FileNotFoundError:
	st.warning("style.css file not found. Using default styling.")

	# ========== Initialize Managers ==========
	@st.cache_resource
	def get_user_manager():
	return UserManager(USER_DATA_DIR)

	@st.cache_resource
	def get_chat_manager():
	return ChatManager(CHAT_DATA_DIR)

	user_manager = get_user_manager()
	chat_manager = get_chat_manager()

	# ========== Logo Function ==========
	def get_base64_image(image_path):
	try:
	with open(image_path, "rb") as img_file:
	return f"data:image/jpeg;base64,{base64.b64encode(img_file.read()).decode()}"
	except FileNotFoundError:
	return "data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iNjAiIGhlaWdodD0iNjAiIHZpZXdCb3g9IjAgMCA2MCA2MCIgZmlsbD0ibm9uZSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KPGNpcmNsZSBjeD0iMzAiIGN5PSIzMCIgcj0iMzAiIGZpbGw9IiM2NjdlZWEiLz4KPHR5cGUgPSJ0ZXh0Ij5JQzwvdGV4dD4KPC9zdmc+"

	# ========== User Authentication ==========
	def render_user_auth():
	"""Render user authentication interface"""
	if 'user_id' not in st.session_state:
	st.session_state.user_id = None

	if not st.session_state.user_id:
	st.sidebar.markdown("### 👤 User Profile")

	auth_option = st.sidebar.radio("Choose option:", ["Login", "Create New Profile"])

	if auth_option == "Create New Profile":
	with st.sidebar.form("create_profile"):
	username = st.text_input("Username", placeholder="Enter username")
	display_name = st.text_input("Display Name", placeholder="Your display name")
	expertise_level = st.selectbox("Programming Experience",
	["Beginner", "Intermediate", "Advanced", "Expert"])
	preferred_languages = st.multiselect("Preferred Languages",
	["Python", "JavaScript", "Java", "C++", "C#", "Go", "Rust", "PHP", "Ruby"])
	learning_goals = st.text_area("Learning Goals",
	placeholder="What do you want to learn?")

	if st.form_submit_button("Create Profile"):
	if username and display_name:
	try:
	profile = UserProfile(
	user_id=str(uuid.uuid4()),
	username=username,
	display_name=display_name,
	expertise_level=expertise_level,
	preferred_languages=preferred_languages,
	learning_goals=learning_goals
	)
	user_manager.create_user(profile)
	st.session_state.user_id = profile.user_id
	st.session_state.current_user = profile
	st.rerun()
	except Exception as e:
	st.error(f"Error creating profile: {str(e)}")
	else:
	st.error("Username and Display Name are required!")

	else: # Login
	existing_users = user_manager.get_all_usernames()
	if existing_users:
	selected_username = st.sidebar.selectbox("Select Username", existing_users)

	if st.sidebar.button("Login"):
	profile = user_manager.get_user_by_username(selected_username)
	if profile:
	st.session_state.user_id = profile.user_id
	st.session_state.current_user = profile
	st.rerun()
	else:
	st.sidebar.info("No existing profiles. Create a new one!")

	else:
	# User is logged in
	user = st.session_state.get('current_user')
	if user:
	st.sidebar.markdown(f"### 👋 Welcome, {user.display_name}!")
	st.sidebar.markdown(f"Level: {user.expertise_level}")

	if st.sidebar.button("Logout"):
	st.session_state.user_id = None
	st.session_state.current_user = None
	if 'current_session_id' in st.session_state:
	del st.session_state.current_session_id
	st.rerun()

	# ========== Enhanced LangChain RAG System ==========
	class EnhancedLangChainRAGSystem:
	def __init__(self):
	self.embeddings = None
	self.vectorstore = None
	self.llm = None
	self.retrieval_chain = None
	self.memory = ConversationBufferMemory(
	memory_key="chat_history",
	return_messages=True,
	output_key="answer"
	)
	self.setup_components()

	def setup_components(self):
	"""Setup all LangChain components."""
	self.embeddings = HuggingFaceEmbeddings(
	model_name=EMBEDDING_MODEL,
	model_kwargs={'device': 'cpu'},
	encode_kwargs={'normalize_embeddings': True}
	)

	self.llm = ChatGroq(
	groq_api_key=GROQ_API_KEY,
	model_name=GROQ_MODEL,
	temperature=0.1,
	max_tokens=1024
	)

	self.load_vectorstore()
	self.setup_retrieval_chain()

	def load_vectorstore(self):
	"""Load existing vectorstore or create empty one."""
	try:
	self.vectorstore = Chroma(
	persist_directory=CHROMA_PERSIST_DIR,
	embedding_function=self.embeddings,
	collection_name="icodeguru_knowledge"
	)
	except Exception as e:
	self.vectorstore = Chroma(
	persist_directory=CHROMA_PERSIST_DIR,
	embedding_function=self.embeddings,
	collection_name="icodeguru_knowledge"
	)

	def setup_retrieval_chain(self):
	"""Setup the conversational retrieval chain with personalization."""
	def get_personalized_prompt():
	user = st.session_state.get('current_user')
	if user:
	user_context = f"""
	User Profile Context:
	- Name: {user.display_name}
	- Experience Level: {user.expertise_level}
	- Preferred Languages: {', '.join(user.preferred_languages) if user.preferred_languages else 'None specified'}
	- Learning Goals: {user.learning_goals or 'None specified'}

	Please tailor your response to match the user's experience level and preferences.
	"""
	else:
	user_context = "User profile not available. Provide general guidance."

	return f"""You are an expert assistant for iCodeGuru, a programming education platform.
	{user_context}

	Use the following context to answer the user's question comprehensively and accurately.
	Always provide relevant video links, website links, or resources when available in the context.
	Refer strictly to the provided context. If the answer isn't found in the context, explicitly say: "The provided knowledge base doesn't contain this information."

	Context: {{context}}
	Chat History: {{chat_history}}
	Human: {{question}}"""

	PROMPT = PromptTemplate(
	template=get_personalized_prompt(),
	input_variables=["context", "chat_history", "question"]
	)

	try:
	retriever = self.vectorstore.as_retriever(
	search_type="similarity",
	search_kwargs={"k": 4}
	)

	self.retrieval_chain = ConversationalRetrievalChain.from_llm(
	llm=self.llm,
	retriever=retriever,
	memory=self.memory,
	combine_docs_chain_kwargs={"prompt": PROMPT},
	return_source_documents=True,
	verbose=False
	)

	except Exception as e:
	self.retrieval_chain = None

	def load_and_process_documents(self) -> List[Document]:
	"""Load and process JSON documents from the docs directory."""
	documents = []

	if not os.path.exists(DOCS_DIR):
	return documents

	json_files = [f for f in os.listdir(DOCS_DIR) if f.endswith('.json')]

	if not json_files:
	return documents

	for filename in json_files:
	file_path = os.path.join(DOCS_DIR, filename)
	try:
	loader = JSONLoader(
	file_path=file_path,
	jq_schema='.[]',
	text_content=False
	)
	file_docs = loader.load()

	for doc in file_docs:
	doc.metadata['source_file'] = filename
	doc.metadata['file_path'] = file_path

	documents.extend(file_docs)

	except Exception as e:
	continue

	return documents

	def split_documents(self, documents: List[Document]) -> List[Document]:
	"""Split documents into smaller chunks."""
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=800,
	chunk_overlap=100,
	length_function=len,
	separators=["\n\n", "\n", " ", ""]
	)

	chunks = text_splitter.split_documents(documents)
	return chunks

	def clear_knowledge_base(self):
	"""Clear the existing knowledge base."""
	try:
	if self.vectorstore:
	self.vectorstore.delete_collection()
	self.vectorstore = Chroma(
	persist_directory=CHROMA_PERSIST_DIR,
	embedding_function=self.embeddings,
	collection_name="icodeguru_knowledge"
	)
	except Exception as e:
	pass

	def ingest_documents(self):
	"""Complete document ingestion pipeline."""
	documents = self.load_and_process_documents()

	if not documents:
	return False

	chunks = self.split_documents(documents)

	if not chunks:
	return False

	try:
	self.clear_knowledge_base()
	self.vectorstore.add_documents(chunks)
	self.vectorstore.persist()
	self.setup_retrieval_chain()
	return True

	except Exception as e:
	return False

	def get_answer(self, question: str) -> dict:
	"""Get answer for a user question."""
	if not self.retrieval_chain:
	return {
	"answer": "⚠️ Knowledge base is initializing. Please try again in a moment.",
	"source_documents": []
	}

	try:
	doc_count = 0
	try:
	doc_count = self.vectorstore._collection.count()
	except:
	try:
	test_results = self.vectorstore.similarity_search("test", k=1)
	doc_count = len(test_results) if test_results else 0
	except:
	doc_count = 0

	if doc_count == 0:
	return {
	"answer": "I'm ready to help! However, I don't have any specific documents loaded in my knowledge base right now. I can still answer general programming questions based on my training. Feel free to ask anything!",
	"source_documents": []
	}

	response = self.retrieval_chain({"question": question})
	return response

	except Exception as e:
	return {
	"answer": f"I apologize, but I encountered an issue processing your question. Could you please try rephrasing it?",
	"source_documents": []
	}

	def reset_conversation(self):
	"""Reset the conversation memory."""
	self.memory.clear()

	# Initialize the RAG system
	@st.cache_resource
	def get_rag_system():
	"""Cache the RAG system to avoid reinitialization."""
	return EnhancedLangChainRAGSystem()

	# ========== Session Management ==========
	def initialize_chat_session():
	"""Initialize or load chat session"""
	if 'current_session_id' not in st.session_state:
	user_id = st.session_state.get('user_id')
	if user_id:
	session_id = chat_manager.create_session(user_id)
	st.session_state.current_session_id = session_id
	st.session_state.messages = []
	else:
	st.session_state.messages = []
	else:
	# Load existing session messages
	session = chat_manager.get_session(st.session_state.current_session_id)
	if session:
	st.session_state.messages = []
	for msg in session.messages:
	st.session_state.messages.append({
	"role": msg.role,
	"content": msg.content,
	"message_id": msg.message_id,
	"rating": msg.rating,
	"is_bookmarked": msg.is_bookmarked,
	"source_documents": msg.source_documents
	})

	# ========== Chat History Management ==========
	def render_chat_history_sidebar():
	"""Render chat history in sidebar"""
	if st.session_state.get('user_id'):
	user_sessions = chat_manager.get_user_sessions(st.session_state.user_id)

	if user_sessions:
	st.sidebar.markdown("### 💬 Chat History")

	for session in user_sessions[:10]: # Show last 10 sessions
	session_title = session.title[:30] + "..." if len(session.title) > 30 else session.title

	col1, col2 = st.sidebar.columns([3, 1])

	with col1:
	if st.button(session_title, key=f"session_{session.session_id}"):
	st.session_state.current_session_id = session.session_id
	initialize_chat_session()
	st.rerun()

	with col2:
	if st.button("🗑️", key=f"delete_{session.session_id}", help="Delete session"):
	chat_manager.delete_session(session.session_id)
	if st.session_state.get('current_session_id') == session.session_id:
	del st.session_state.current_session_id
	st.rerun()

	# ========== Enhanced Sidebar Features ==========
	def render_enhanced_sidebar():
	"""Render enhanced sidebar with all features"""
	global GROQ_MODEL
	# User Authentication
	render_user_auth()

	if st.session_state.get('user_id'):
	# Chat History
	render_chat_history_sidebar()

	st.sidebar.markdown("---")

	# New Chat Button
	if st.sidebar.button("🆕 New Chat", type="primary"):
	user_id = st.session_state.user_id
	session_id = chat_manager.create_session(user_id)
	st.session_state.current_session_id = session_id
	st.session_state.messages = []
	get_rag_system().reset_conversation()
	st.rerun()

	# Model Selection
	st.sidebar.markdown("### 🧠 AI Settings")
	model_options = ["llama3-8b-8192", "llama3-70b-8192"]
	selected_model = st.sidebar.selectbox("Choose LLM Model", model_options, index=0)

	if selected_model != GROQ_MODEL:
	GROQ_MODEL = selected_model
	get_rag_system().llm.model_name = selected_model

	# Knowledge Base Management
	st.sidebar.markdown("### 📚 Knowledge Base")
	if st.sidebar.button("🔄 Refresh Knowledge Base"):
	with st.spinner("Refreshing knowledge base..."):
	success = get_rag_system().ingest_documents()
	if success:
	st.sidebar.success("✅ Knowledge base refreshed!")
	else:
	st.sidebar.warning("⚠️ No documents found to load")

	# Export Chat History
	st.sidebar.markdown("### 📤 Export")
	if st.sidebar.button("📄 Export Chat History"):
	if st.session_state.get('current_session_id'):
	export_data = chat_manager.export_chat_history(
	st.session_state.user_id,
	st.session_state.current_session_id
	)
	if export_data:
	st.sidebar.download_button(
	label="⬇️ Download JSON",
	data=json.dumps(export_data, indent=2),
	file_name=f"chat_export_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
	mime="application/json"
	)

	# User Statistics
	st.sidebar.markdown("### 📊 Your Stats")
	user_stats = user_manager.get_user_stats(st.session_state.user_id)
	chat_stats = chat_manager.get_chat_statistics(st.session_state.user_id)

	col1, col2 = st.sidebar.columns(2)
	with col1:
	st.metric("Total Chats", chat_stats.get('total_sessions', 0))
	with col2:
	st.metric("Messages", chat_stats.get('total_messages', 0))

	st.sidebar.metric("Bookmarks", chat_stats.get('bookmarked_messages', 0))

	# Bookmarked Messages
	bookmarked = chat_manager.get_bookmarked_messages(st.session_state.user_id)
	if bookmarked:
	st.sidebar.markdown("### 🔖 Bookmarked Responses")
	for bookmark in bookmarked[:5]: # Show 5 most recent
	message_preview = bookmark['message']['content'][:50] + "..."
	if st.sidebar.button(message_preview, key=f"bookmark_{bookmark['message']['message_id']}"):
	# Show full bookmarked message
	st.sidebar.write(bookmark['message']['content'])

	# ========== Message Rating Handler ==========
	def handle_component_value():
	"""Handle component interactions (ratings, bookmarks)"""
	if 'component_value' in st.session_state and st.session_state.component_value:
	data = st.session_state.component_value

	if data.get('action') == 'rate_message':
	chat_manager.rate_message(
	data['session_id'],
	data['message_id'],
	data['rating']
	)

	elif data.get('action') == 'bookmark_message':
	chat_manager.bookmark_message(
	data['session_id'],
	data['message_id'],
	data['is_bookmarked']
	)

	# Clear the component value
	st.session_state.component_value = None

	# ========== Main App Logic ==========
	def main():
	"""Main application logic"""

	# Handle component interactions
	handle_component_value()

	# Display logo and header
	image_data_url = get_base64_image("10001.jpeg")
	st.markdown(f"""
	<div class="custom-header">
	<h1><img src="{image_data_url}" class="chatbot-logo" alt="Bot" /> ICodeGuru AI Assistant</h1>
	</div>
	""", unsafe_allow_html=True)

	# Render enhanced sidebar
	render_enhanced_sidebar()

	# Initialize RAG system
	rag_system = get_rag_system()

	# Check if user is logged in
	if not st.session_state.get('user_id'):
	st.info("👈 Please login or create a profile to start chatting!")
	return

	# Initialize chat session
	initialize_chat_session()

	# Generate response function
	def generate_response(user_query):
	"""Generate AI response using LangChain system"""
	if not user_query or not user_query.strip():
	return "Please provide a valid question."

	try:
	response = rag_system.get_answer(user_query)
	answer = response.get("answer", "I apologize, but I couldn't generate a response. Please try again.")

	source_docs = response.get("source_documents", [])
	if source_docs:
	sources_text = "\n\n📚 Sources:\n"
	for i, doc in enumerate(source_docs[:2], 1):
	source_file = doc.metadata.get('source_file', 'Unknown')
	content_preview = doc.page_content[:100] + "..." if len(doc.page_content) > 100 else doc.page_content
	sources_text += f"{i}. {source_file}: {content_preview}\n"

	answer += sources_text

	return answer, [doc.metadata.get('source_file', '') for doc in source_docs]

	except Exception as e:
	return "I apologize, but I encountered an issue processing your question. Could you please try again.", []

	# Display chat messages
	for i, msg in enumerate(st.session_state.messages):
	with st.chat_message(msg["role"]):
	if msg["role"] == "assistant":
	message_id = msg.get("message_id", f"msg-{i}")
	session_id = st.session_state.get("current_session_id", "")

	render_enhanced_response_box(
	msg["content"],
	message_id,
	session_id,
	is_bookmarked=msg.get("is_bookmarked", False),
	rating=msg.get("rating"),
	show_actions=True
	)
	else:
	st.markdown(msg["content"])

	# Chat input
	prompt = st.chat_input("Type your message...")

	if prompt:
	# Add user message to session
	user_message_id = chat_manager.add_message(
	st.session_state.current_session_id,
	"user",
	prompt
	)

	# Add to session state
	st.session_state.messages.append({
	"role": "user",
	"content": prompt,
	"message_id": user_message_id
	})

	with st.chat_message("user"):
	st.markdown(prompt)

	# Generate and display assistant response
	with st.chat_message("assistant"):
	with st.spinner("Thinking..."):
	full_response, source_docs = generate_response(prompt)

	# Add assistant message to session
	assistant_message_id = chat_manager.add_message(
	st.session_state.current_session_id,
	"assistant",
	full_response,
	source_docs
	)

	# Display response with enhanced box
	render_enhanced_response_box(
	full_response,
	assistant_message_id,
	st.session_state.current_session_id,
	is_bookmarked=False,
	rating=None,
	show_actions=True
	)

	# Add to session state
	st.session_state.messages.append({
	"role": "assistant",
	"content": full_response,
	"message_id": assistant_message_id,
	"rating": None,
	"is_bookmarked": False,
	"source_documents": source_docs
	})

	# Update user chat count
	user_manager.increment_chat_count(st.session_state.user_id)

	if __name__ == "__main__":
	main()