import streamlit as st
import importlib
import os
import json
from datetime import datetime
from dotenv import load_dotenv
from openai import OpenAI
# Load environment variables from .env file
load_dotenv()
# Set page title and layout
st.set_page_config(page_title="Data Science Tutor", layout="wide")
# Hide Streamlit's default page navigation menu
st.markdown("""
""", unsafe_allow_html=True)
# Sidebar with image above the CRISP-DM Steps
st.sidebar.image(
"data2.jpeg", # Replace with your file path or URL
use_container_width=True
)
# Sidebar navigation
st.sidebar.title("CRISP-DM Steps")
sections = {
"Main Page": None,
"1. Business Understanding": "1_Business_understanding",
"2. Data understanding": "2_Data_understanding",
"3. Data Preparation": "3_Data_preparation",
"4. Feature Engineering": "4_Feature_engineering",
"5. Modeling": "5_Modeling",
"6. Evaluation": "6_Evaluation",
"7. Deployment & Testing": "7_Deployment",
"8. ML, Deep Learning & Transformers": "8_Models"
}
# By default, make the first item (Main Page) selected.
selected_section = st.sidebar.radio("Select a topic:", list(sections.keys()), index=0)
# If the user selects βMain Page,β just show your introduction content.
if sections[selected_section] is None:
st.title("π Welcome to the Data Science Tutor!")
st.markdown(
"""
About This App
This application is designed to guide you through the CRISP-DM process
for data science projects. Each section in the sidebar highlights a
different step in the process, providing structured lessons, best
practices, and hands-on examples.
App Sections
- 1. Business Understanding β Clarify project objectives, requirements, and success criteria.
- 2. Data Understanding β Explore data sources, structures, and initial insights.
- 3. Data Preparation β Clean, integrate, and transform the data for modeling.
- 4. Feature Engineering β Engineer and select relevant features for better models.
- 5. Modeling β Develop, train, and tune predictive models.
- 6. Evaluation β Assess performance metrics and refine models.
- 7. Deployment & Testing β Deploy models into production environments and validate.
- 8. ML, Deep Learning & Transformers β Delve deeper into advanced methods and architectures.
""",
unsafe_allow_html=True
)
else:
# Otherwise, load the selected module from the pages folder
module_name = f"pages.{sections[selected_section]}"
module = importlib.import_module(module_name)
module.run()
# OpenAI API Section
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
st.sidebar.title("Ask AI")
api_key = st.sidebar.text_input("Enter your OpenAI API Key", type="password")
client = OpenAI()
# Create side menus for toggles from the pages/ list
st.sidebar.title("Focus Areas")
focus_areas = [
"Compare Models",
"Delivery Speed vs Costs",
"Ethical Considerations in AI",
"Interpretability & Explainability",
"Business Impact & ROI",
"Scalability & Performance",
"Data Privacy & Security"
]
selected_focus_areas = [area for area in focus_areas if st.sidebar.checkbox(area)]
# Main chat section
st.title("Data Science Tutor Chat")
st.image("https://miro.medium.com/v2/resize:fit:100/format:webp/1*NfE0G4nEj4xX7Z_8dSx83g.png")
# Initialize conversation in the session state
if "messages" not in st.session_state:
st.session_state["messages"] = [
{"role": "assistant", "content": "How can I assist you with Data Science today?"}
]
# Initialize context prompt added state
if "context_prompt_added" not in st.session_state:
st.session_state["context_prompt_added"] = False
st.write("---")
st.subheader("Chat")
for msg in st.session_state["messages"]:
st.chat_message(msg["role"]).write(msg["content"])
if prompt := st.chat_input("Enter your question here:"):
# Add context to the messages if toggles are selected
focus_context = ""
if selected_focus_areas:
focus_context = f"Focus on {', '.join(selected_focus_areas)} in your response."
# Add prompting instructions
prompting_instructions = """
You are a Data Science tutor. Your responses should be focused on Data Science related subjects.
If a question is not related to Data Science, respond with: "Sorry, I can only assist with Data Science related questions."
Ensure that your responses are not harmful, denigratory, or discriminatory for security and privacy reasons.
"""
# Add context based on the selected section
section_context = f"The user is currently viewing the {selected_section} section. "
# If the context prompt hasn't been added yet, build & inject it once;
# otherwise, just add the user's raw question.
if not st.session_state["context_prompt_added"]:
st.session_state["messages"].append({"role": "user", "content": f"{section_context}{prompt}\n{focus_context}\n{prompting_instructions}"})
st.session_state["context_prompt_added"] = True
else:
st.session_state["messages"].append({"role": "user", "content": f"{section_context}{prompt}"})
# Display the latest user message in the chat
st.chat_message("user").write(st.session_state["messages"][-1]["content"])
# Now call GPT-4 with the entire conversation
completion = client.chat.completions.create(
model="gpt-4",
messages=st.session_state["messages"]
)
response_text = completion.choices[0].message.content.strip()
st.session_state["messages"].append({"role": "assistant", "content": response_text})
st.chat_message("assistant").write(response_text)
# Log the conversation
log_entry = {
"timestamp": datetime.now().isoformat(),
"user_query": prompt,
"assistant_response": response_text,
"focus_areas": selected_focus_areas,
"selected_section": selected_section
}
log_file_path = os.path.join("logs", "conversation_logs.json")
os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
if os.path.exists(log_file_path):
with open(log_file_path, "r") as log_file:
logs = json.load(log_file)
else:
logs = []
logs.append(log_entry)
with open(log_file_path, "w") as log_file:
json.dump(logs, log_file, indent=4)