|
import streamlit as st |
|
import importlib |
|
import os |
|
import json |
|
from datetime import datetime |
|
from dotenv import load_dotenv |
|
from openai import OpenAI |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
st.set_page_config(page_title="Data Science Tutor", layout="wide") |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
/* Hide the default "Pages" menu in the top-left sidebar */ |
|
[data-testid="stSidebarNav"] { |
|
display: none; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.sidebar.image( |
|
"data2.jpeg", |
|
use_container_width=True |
|
) |
|
|
|
|
|
st.sidebar.title("CRISP-DM Steps") |
|
sections = { |
|
"Main Page": None, |
|
"1. Business Understanding": "1_Business_understanding", |
|
"2. Data understanding": "2_Data_understanding", |
|
"3. Data Preparation": "3_Data_preparation", |
|
"4. Feature Engineering": "4_Feature_engineering", |
|
"5. Modeling": "5_Modeling", |
|
"6. Evaluation": "6_Evaluation", |
|
"7. Deployment & Testing": "7_Deployment", |
|
"8. ML, Deep Learning & Transformers": "8_Models" |
|
} |
|
|
|
|
|
selected_section = st.sidebar.radio("Select a topic:", list(sections.keys()), index=0) |
|
|
|
|
|
if sections[selected_section] is None: |
|
st.title("π Welcome to the Data Science Tutor!") |
|
st.markdown( |
|
""" |
|
<div style="color: #2FA4E7; margin-top: 1rem;"> |
|
<h2>About This App</h2> |
|
<p> |
|
This application is designed to guide you through the CRISP-DM process |
|
for data science projects. Each section in the sidebar highlights a |
|
different step in the process, providing structured lessons, best |
|
practices, and hands-on examples. |
|
</p> |
|
<h3>App Sections</h3> |
|
<ul> |
|
<li><strong>1. Business Understanding</strong> β Clarify project objectives, requirements, and success criteria.</li> |
|
<li><strong>2. Data Understanding</strong> β Explore data sources, structures, and initial insights.</li> |
|
<li><strong>3. Data Preparation</strong> β Clean, integrate, and transform the data for modeling.</li> |
|
<li><strong>4. Feature Engineering</strong> β Engineer and select relevant features for better models.</li> |
|
<li><strong>5. Modeling</strong> β Develop, train, and tune predictive models.</li> |
|
<li><strong>6. Evaluation</strong> β Assess performance metrics and refine models.</li> |
|
<li><strong>7. Deployment & Testing</strong> β Deploy models into production environments and validate.</li> |
|
<li><strong>8. ML, Deep Learning & Transformers</strong> β Delve deeper into advanced methods and architectures.</li> |
|
</ul> |
|
</div> |
|
""", |
|
unsafe_allow_html=True |
|
) |
|
else: |
|
|
|
module_name = f"pages.{sections[selected_section]}" |
|
module = importlib.import_module(module_name) |
|
module.run() |
|
|
|
|
|
|
|
api_key = os.getenv("OPENAI_API_KEY") |
|
if not api_key: |
|
st.sidebar.title("Ask AI") |
|
api_key = st.sidebar.text_input("Enter your OpenAI API Key", type="password") |
|
|
|
client = OpenAI() |
|
|
|
|
|
st.sidebar.title("Focus Areas") |
|
focus_areas = [ |
|
"Compare Models", |
|
"Delivery Speed vs Costs", |
|
"Ethical Considerations in AI", |
|
"Interpretability & Explainability", |
|
"Business Impact & ROI", |
|
"Scalability & Performance", |
|
"Data Privacy & Security" |
|
] |
|
selected_focus_areas = [area for area in focus_areas if st.sidebar.checkbox(area)] |
|
|
|
|
|
st.title("Data Science Tutor Chat") |
|
st.image("https://miro.medium.com/v2/resize:fit:100/format:webp/1*NfE0G4nEj4xX7Z_8dSx83g.png") |
|
|
|
|
|
if "messages" not in st.session_state: |
|
st.session_state["messages"] = [ |
|
{"role": "assistant", "content": "How can I assist you with Data Science today?"} |
|
] |
|
|
|
|
|
if "context_prompt_added" not in st.session_state: |
|
st.session_state["context_prompt_added"] = False |
|
|
|
st.write("---") |
|
st.subheader("Chat") |
|
|
|
for msg in st.session_state["messages"]: |
|
st.chat_message(msg["role"]).write(msg["content"]) |
|
|
|
if prompt := st.chat_input("Enter your question here:"): |
|
|
|
focus_context = "" |
|
if selected_focus_areas: |
|
focus_context = f"Focus on {', '.join(selected_focus_areas)} in your response." |
|
|
|
|
|
prompting_instructions = """ |
|
You are a Data Science tutor. Your responses should be focused on Data Science related subjects. |
|
If a question is not related to Data Science, respond with: "Sorry, I can only assist with Data Science related questions." |
|
Ensure that your responses are not harmful, denigratory, or discriminatory for security and privacy reasons. |
|
""" |
|
|
|
section_context = f"The user is currently viewing the {selected_section} section. " |
|
|
|
|
|
|
|
if not st.session_state["context_prompt_added"]: |
|
st.session_state["messages"].append({"role": "user", "content": f"{section_context}{prompt}\n{focus_context}\n{prompting_instructions}"}) |
|
st.session_state["context_prompt_added"] = True |
|
else: |
|
st.session_state["messages"].append({"role": "user", "content": f"{section_context}{prompt}"}) |
|
|
|
|
|
st.chat_message("user").write(st.session_state["messages"][-1]["content"]) |
|
|
|
|
|
completion = client.chat.completions.create( |
|
model="gpt-4", |
|
messages=st.session_state["messages"] |
|
) |
|
response_text = completion.choices[0].message.content.strip() |
|
|
|
st.session_state["messages"].append({"role": "assistant", "content": response_text}) |
|
st.chat_message("assistant").write(response_text) |
|
|
|
|
|
log_entry = { |
|
"timestamp": datetime.now().isoformat(), |
|
"user_query": prompt, |
|
"assistant_response": response_text, |
|
"focus_areas": selected_focus_areas, |
|
"selected_section": selected_section |
|
} |
|
log_file_path = os.path.join("logs", "conversation_logs.json") |
|
os.makedirs(os.path.dirname(log_file_path), exist_ok=True) |
|
if os.path.exists(log_file_path): |
|
with open(log_file_path, "r") as log_file: |
|
logs = json.load(log_file) |
|
else: |
|
logs = [] |
|
logs.append(log_entry) |
|
with open(log_file_path, "w") as log_file: |
|
json.dump(logs, log_file, indent=4) |