import streamlit as st import importlib import os import json from datetime import datetime from dotenv import load_dotenv from openai import OpenAI # Load environment variables from .env file load_dotenv() # Set page title and layout st.set_page_config(page_title="Data Science Tutor", layout="wide") # Hide Streamlit's default page navigation menu st.markdown(""" """, unsafe_allow_html=True) # Sidebar with image above the CRISP-DM Steps st.sidebar.image( "data2.jpeg", # Replace with your file path or URL use_container_width=True ) # Sidebar navigation st.sidebar.title("CRISP-DM Steps") sections = { "Main Page": None, "1. Business Understanding": "1_Business_understanding", "2. Data understanding": "2_Data_understanding", "3. Data Preparation": "3_Data_preparation", "4. Feature Engineering": "4_Feature_engineering", "5. Modeling": "5_Modeling", "6. Evaluation": "6_Evaluation", "7. Deployment & Testing": "7_Deployment", "8. ML, Deep Learning & Transformers": "8_Models" } # By default, make the first item (Main Page) selected. selected_section = st.sidebar.radio("Select a topic:", list(sections.keys()), index=0) # If the user selects “Main Page,” just show your introduction content. if sections[selected_section] is None: st.title("🚀 Welcome to the Data Science Tutor!") st.markdown( """

About This App

This application is designed to guide you through the CRISP-DM process for data science projects. Each section in the sidebar highlights a different step in the process, providing structured lessons, best practices, and hands-on examples.

App Sections

1. Business Understanding – Clarify project objectives, requirements, and success criteria.
2. Data Understanding – Explore data sources, structures, and initial insights.
3. Data Preparation – Clean, integrate, and transform the data for modeling.
4. Feature Engineering – Engineer and select relevant features for better models.
5. Modeling – Develop, train, and tune predictive models.
6. Evaluation – Assess performance metrics and refine models.
7. Deployment & Testing – Deploy models into production environments and validate.
8. ML, Deep Learning & Transformers – Delve deeper into advanced methods and architectures.

""", unsafe_allow_html=True ) else: # Otherwise, load the selected module from the pages folder module_name = f"pages.{sections[selected_section]}" module = importlib.import_module(module_name) module.run() # OpenAI API Section api_key = os.getenv("OPENAI_API_KEY") if not api_key: st.sidebar.title("Ask AI") api_key = st.sidebar.text_input("Enter your OpenAI API Key", type="password") client = OpenAI() # Create side menus for toggles from the pages/ list st.sidebar.title("Focus Areas") focus_areas = [ "Compare Models", "Delivery Speed vs Costs", "Ethical Considerations in AI", "Interpretability & Explainability", "Business Impact & ROI", "Scalability & Performance", "Data Privacy & Security" ] selected_focus_areas = [area for area in focus_areas if st.sidebar.checkbox(area)] # Main chat section st.title("Data Science Tutor Chat") st.image("https://miro.medium.com/v2/resize:fit:100/format:webp/1*NfE0G4nEj4xX7Z_8dSx83g.png") # Initialize conversation in the session state if "messages" not in st.session_state: st.session_state["messages"] = [ {"role": "assistant", "content": "How can I assist you with Data Science today?"} ] # Initialize context prompt added state if "context_prompt_added" not in st.session_state: st.session_state["context_prompt_added"] = False st.write("---") st.subheader("Chat") for msg in st.session_state["messages"]: st.chat_message(msg["role"]).write(msg["content"]) if prompt := st.chat_input("Enter your question here:"): # Add context to the messages if toggles are selected focus_context = "" if selected_focus_areas: focus_context = f"Focus on {', '.join(selected_focus_areas)} in your response." # Add prompting instructions prompting_instructions = """ You are a Data Science tutor. Your responses should be focused on Data Science related subjects. If a question is not related to Data Science, respond with: "Sorry, I can only assist with Data Science related questions." Ensure that your responses are not harmful, denigratory, or discriminatory for security and privacy reasons. """ # Add context based on the selected section section_context = f"The user is currently viewing the {selected_section} section. " # If the context prompt hasn't been added yet, build & inject it once; # otherwise, just add the user's raw question. if not st.session_state["context_prompt_added"]: st.session_state["messages"].append({"role": "user", "content": f"{section_context}{prompt}\n{focus_context}\n{prompting_instructions}"}) st.session_state["context_prompt_added"] = True else: st.session_state["messages"].append({"role": "user", "content": f"{section_context}{prompt}"}) # Display the latest user message in the chat st.chat_message("user").write(st.session_state["messages"][-1]["content"]) # Now call GPT-4 with the entire conversation completion = client.chat.completions.create( model="gpt-4", messages=st.session_state["messages"] ) response_text = completion.choices[0].message.content.strip() st.session_state["messages"].append({"role": "assistant", "content": response_text}) st.chat_message("assistant").write(response_text) # Log the conversation log_entry = { "timestamp": datetime.now().isoformat(), "user_query": prompt, "assistant_response": response_text, "focus_areas": selected_focus_areas, "selected_section": selected_section } log_file_path = os.path.join("logs", "conversation_logs.json") os.makedirs(os.path.dirname(log_file_path), exist_ok=True) if os.path.exists(log_file_path): with open(log_file_path, "r") as log_file: logs = json.load(log_file) else: logs = [] logs.append(log_entry) with open(log_file_path, "w") as log_file: json.dump(logs, log_file, indent=4)