import os import json import re import time from typing import Dict, Any, List, Optional, Annotated from chatbot_model import ( UserMemory, ChatbotState, ProfileAnalysisModel, JobFitModel, ContentGenerationModel, ) from llm_utils import call_llm_and_parse from profile_preprocessing import ( preprocess_profile, initialize_state, normalize_url ) import os from openai import OpenAI import streamlit as st import hashlib from dotenv import load_dotenv from pydantic import BaseModel, Field,ValidationError # import pdb; pdb.set_trace() from scraping_profile import scrape_linkedin_profile from langchain_openai import ChatOpenAI from langchain_core.messages import SystemMessage, HumanMessage, AIMessage,BaseMessage,ToolMessage from langchain_core.tools import tool from langgraph.graph import StateGraph, END,START from langgraph.checkpoint.memory import MemorySaver from langgraph.graph import add_messages # if your framework exposes this from langgraph.prebuilt import ToolNode,tools_condition,InjectedState import dirtyjson import sqlite3 from langgraph.checkpoint.sqlite import SqliteSaver # ========== 1. ENVIRONMENT & LLM SETUP ========== load_dotenv() groq_key = os.getenv("GROQ_API_KEY") assert groq_key, "GROQ_API_KEY not found in environment!" groq_client=OpenAI( api_key=os.getenv("GROQ_API_KEY"), base_url="https://api.groq.com/openai/v1" ) def normalize_url(url): return url.strip().rstrip('/') def validate_state(state: dict) -> None: """ Validate given state dict against ChatbotState schema. Displays result in Streamlit instead of printing. """ # st.write("=== Validating chatbot state ===") try: ChatbotState.model_validate(state) # st.success("βœ… State is valid!") except ValidationError as e: st.error("❌ Validation failed!") errors_list = [] for error in e.errors(): loc = " β†’ ".join(str(item) for item in error['loc']) msg = error['msg'] errors_list.append(f"- At: {loc}\n Error: {msg}") st.write("\n".join(errors_list)) # Optionally show raw validation error too: st.expander("See raw validation error").write(str(e)) st.stop() user_memory = UserMemory() # ========== 7. AGENT FUNCTIONS ========== def profile_analysis_prompt(profile: Dict[str, str]) -> str: return f""" You are a top-tier LinkedIn career coach and AI analyst. Analyze the following candidate profile carefully. Candidate profile data: FullName: {profile.get("FullName", "")} Headline: {profile.get("Headline", "")} JobTitle: {profile.get("JobTitle", "")} CompanyName: {profile.get("CompanyName", "")} CompanyIndustry: {profile.get("CompanyIndustry", "")} CurrentJobDuration: {profile.get("CurrentJobDuration", "")} About: {profile.get("About", "")} Experiences: {profile.get("Experiences", "")} Skills: {profile.get("Skills", "")} Educations: {profile.get("Educations", "")} Certifications: {profile.get("Certifications", "")} HonorsAndAwards: {profile.get("HonorsAndAwards", "")} Verifications: {profile.get("Verifications", "")} Highlights: {profile.get("Highlights", "")} Projects: {profile.get("Projects", "")} Publications: {profile.get("Publications", "")} Patents: {profile.get("Patents", "")} Courses: {profile.get("Courses", "")} TestScores: {profile.get("TestScores", "")} Identify and summarize: 1. strengths: - technical strengths (skills, tools, frameworks) - project strengths (impactful projects, innovation) - educational strengths (degrees, certifications, awards) - soft skills and personality traits (teamwork, leadership) 2. weaknesses: - missing or weak technical skills - gaps in projects, experience, or education - unclear profile sections or missing context 3. actionable suggestions: - concrete ways to improve profile headline, about section, or add projects - suggestions to learn or highlight new skills - ideas to make the profile more attractive for recruiters Important instructions: - Respond ONLY with valid JSON. - Do NOT include text before or after JSON. - Be concise but detailed. Example JSON format: {{ "strengths": {{ "technical": ["...", "..."], "projects": ["...", "..."], "education": ["...", "..."], "soft_skills": ["...", "..."] }}, "weaknesses": {{ "technical_gaps": ["...", "..."], "project_or_experience_gaps": ["...", "..."], "missing_context": ["...", "..."] }}, "suggestions": [ "...", "...", "..." ] }} """.strip() def job_fit_prompt(sections: Dict[str, str], target_role: str) -> str: return f""" You are an expert career coach and recruiter. Compare the following candidate profile against the typical requirements for the role of "{target_role}". Candidate Profile: - Headline: {sections.get('headline', '')} - About: {sections.get('about', '')} - Job Title: {sections.get('job_title', '')} - Company: {sections.get('company_name', '')} - Industry: {sections.get('company_industry', '')} - Current Job Duration: {sections.get('current_job_duration', '')} - Skills: {sections.get('skills', '')} - Projects: {sections.get('projects', '')} - Educations: {sections.get('educations', '')} - Certifications: {sections.get('certifications', '')} - Honors & Awards: {sections.get('honors_and_awards', '')} - Experiences: {sections.get('experiences', '')} **Instructions:** - Respond ONLY with valid JSON. - Your JSON must exactly match the following schema: {{ "match_score": 85, "missing_skills": ["Skill1", "Skill2"], "suggestions": ["...", "...", "..."] }} - "match_score": integer from 0–100 estimating how well the profile fits the target role. - "missing_skills": key missing or weakly mentioned skills. - "suggestions": 3 actionable recommendations to improve fit (e.g., learn tools, rewrite headline). Do NOT include explanations, text outside JSON, or markdown. Start with '{{' and end with '}}'. The JSON must be directly parseable. """.strip() # --- Tool: Profile Analyzer --- @tool def profile_analyzer(state: Annotated[ChatbotState, InjectedState]) -> dict: """ Tool: Analyze the overall full user's profile to give strengths, weaknesses, suggestions. This is needed only if full analysis of profile is needed. Returns the full analysis in the form of a json. - It takes no arguments """ # Get summarized profile (dictionary of strings) profile = getattr(state, "profile", {}) or {} # Build prompt prompt = profile_analysis_prompt(profile) # Call the LLM & parse structured result analysis_model = call_llm_and_parse(groq_client,prompt, ProfileAnalysisModel) analysis_dict = analysis_model.model_dump() # Save to state and user memory state.profile_analysis = analysis_dict user_memory.save("profile_analysis", analysis_dict) print("πŸ’Ύ [DEBUG] Saved analysis to user memory.") print("πŸ“¦ [DEBUG] Updated state.profile_analysis with analysis.") return analysis_dict # --- Tool: Job Matcher --- @tool def job_matcher( state: Annotated[ChatbotState, InjectedState], target_role: str = None ) -> dict: """ Tool: Analyze how well the user's profile fits the target role. - If user is asking if he is a good fit for a certain role, or needs to see if his profile is compatible with a certain role, call this. - Takes target_role as an argument. - this tool is needed when match score, missing skills, suggestions are needed based on a job name given. """ print(f"target role is {target_role}") # Update state.target_role if provided sections = getattr(state, "sections", {}) # Build prompt prompt = job_fit_prompt(sections, target_role) # Call LLM and parse try: job_fit_model = call_llm_and_parse(groq_client,prompt, JobFitModel) job_fit_dict = job_fit_model.model_dump() job_fit_dict["target_role"] = target_role except Exception as e: job_fit_dict = { "target_role":target_role, "match_score": 0, "missing_skills": [], "suggestions": ["Parsing failed or incomplete response."] } # Save to state and user memory state.job_fit = job_fit_dict user_memory.save("job_fit", job_fit_dict) return job_fit_dict @tool def extract_from_state_tool( state: Annotated[ChatbotState, InjectedState], key: str ) -> dict: """ This tool is used if user wants to ask about any particular part of this profile. Use this if a singe section is targeted. It expects key as an arguement, that represents what the user is wanting to look at, from his profile. Argument: key: only pass one from the below list, identify one thing the user wants to look into and choose that: "sections.about", "sections.headline", "sections.skills", "sections.projects", "sections.educations", "sections.certifications", "sections.honors_and_awards", "sections.experiences", "sections.publications", "sections.patents", "sections.courses", "sections.test_scores", "sections.verifications", "sections.highlights", "sections.job_title", "sections.company_name", "sections.company_industry", "sections.current_job_duration", "sections.full_name", "enhanced_content,"profile_analysis", "job_fit", "target_role", "editing_section" """ value = state try: for part in key.split('.'): # Support both dict and Pydantic model if isinstance(value, dict): value = value.get(part) elif hasattr(value, part): value = getattr(value, part) else: value = None if value is None: break except Exception: value = None return {"result": value} tools = [ profile_analyzer, job_matcher, extract_from_state_tool ] llm = ChatOpenAI( api_key=groq_key, base_url="https://api.groq.com/openai/v1", model="llama3-8b-8192", temperature=0 ) llm_with_tools = llm.bind_tools(tools) # ========== 8. LANGGRAPH PIPELINE ========== def chatbot_node(state: ChatbotState) -> ChatbotState: validate_state(state) messages = state.get("messages", []) system_prompt = """ You are a helpful AI assistant specialized in LinkedIn profile coaching. Guidelines: - Greet the user if they greet you, and explain you can help analyze, enhance, and improve their LinkedIn profile. - Prefer using tools instead of answering directly whenever this can give better, data-backed answers. - Call only one tool at a time. Never call multiple tools together. When to use tools: - If the user asks to show a section (like About, Projects, etc.): call extract_from_state_tool, unless you already have that section stored. - If the user asks to enhance a section: use extract_from_state_tool first if you don’t already have that section, then enhance it. - If the user requests a full profile analysis: use profile_analyzer. - If the user wants to know how well they fit a target job role: use job_matcher with the given role. - Use tools to check strengths, weaknesses, missing skills, or improvement suggestions. - If the tool was just called recently and info is still fresh, you may answer directly. Important: - Never describe or print JSON of a tool call. - Never say "I'm about to call a tool" β€” just call the tool properly. - Keep answers clear, helpful, and actionable. Your goal: help the user see, improve, and analyze their LinkedIn profile. """ # Build messages & invoke LLM messages = [SystemMessage(content=system_prompt)] + messages[-2:] # messages = [SystemMessage(content=system_prompt)] response = llm_with_tools.invoke(messages) if hasattr(response, "tool_calls") and response.tool_calls: first_tool = response.tool_calls[0] tool_name = first_tool.get("name") if isinstance(first_tool, dict) else getattr(first_tool, "name", None) tool_args = first_tool.get("args") if isinstance(first_tool, dict) else getattr(first_tool, "args", {}) print(f"[DEBBBBUUUUGGG] using tool {tool_name}") # DEBUG print("[DEBUG] LLM response:", response) state.setdefault("messages", []).append(response) return state # --- Graph definition --- graph = StateGraph(state_schema=ChatbotState) graph.add_node("chatbot", chatbot_node) graph.add_node("tools", ToolNode(tools)) graph.add_edge(START, "chatbot") graph.add_conditional_edges("chatbot", tools_condition) graph.add_edge("tools","chatbot") graph.set_entry_point("chatbot") # --- Streamlit UI --- st.set_page_config(page_title="πŸ’Ό LinkedIn AI Career Assistant", page_icon="πŸ€–", layout="wide") st.title("πŸ§‘β€πŸ’Ό LinkedIn AI Career Assistant") # --- Checkpointer and graph initialization --- if "checkpointer" not in st.session_state: print("Current working directory:", os.getcwd()) print("Files in working directory:", os.listdir(".")) conn = sqlite3.connect("checkpoints1.db", check_same_thread=False) st.session_state["checkpointer"] = SqliteSaver(conn) checkpointer = st.session_state["checkpointer"] if "app_graph" not in st.session_state: st.session_state["app_graph"] = graph.compile(checkpointer=checkpointer) app_graph = st.session_state["app_graph"] # Find or create thread def find_thread_id_for_url(checkpointer, url, max_threads=100): search_url = normalize_url(url) for tid in range(max_threads): config = {"configurable": {"thread_id": str(tid), "checkpoint_ns": ""}} state = checkpointer.get(config) if state and "channel_values" in state: user_state = state["channel_values"] stored_url = normalize_url(user_state.get("profile_url", "")) if stored_url == search_url: return str(tid), user_state return None, None def delete_thread_checkpoint(checkpointer, thread_id): # For SqliteSaver, use the delete_thread method if available if hasattr(checkpointer, "delete_thread"): checkpointer.delete_thread(thread_id) else: # For in-memory or custom checkpointers, implement as needed pass def get_next_thread_id(checkpointer, max_threads=100): used = set() for tid in range(max_threads): config = {"configurable": {"thread_id": str(tid), "checkpoint_ns": ""}} if checkpointer.get(config): used.add(tid) for tid in range(max_threads): if tid not in used: return str(tid) raise RuntimeError("No available thread_id") # --- Session selection and state initialization --- if "chat_mode" not in st.session_state: profile_url = st.text_input("Profile URL (e.g., https://www.linkedin.com/in/username/)") if not profile_url: st.info("Please enter a valid LinkedIn profile URL above to start.") st.stop() valid_pattern = r"^https://www\.linkedin\.com/in/[^/]+/?$" if not re.match(valid_pattern, profile_url.strip()): st.error("❌ Invalid LinkedIn profile URL. Make sure it matches the format.") st.stop() url = profile_url.strip() existing_thread_id, previous_state = find_thread_id_for_url(checkpointer, url) # Defensive: ensure required fields required_fields = ["profile", "sections"] if previous_state and not all(f in previous_state and previous_state[f] for f in required_fields): st.warning("Previous session is missing required data. Please start a new chat.") previous_state = None if previous_state: st.info("A previous session found. Choose:") col1, col2 = st.columns(2) if col1.button("Continue previous chat"): st.session_state["chat_mode"] = "continue" st.session_state["thread_id"] = existing_thread_id st.session_state.state = previous_state st.rerun() elif col2.button("Start new chat"): delete_thread_checkpoint(checkpointer, existing_thread_id) with st.spinner("Fetching and processing profile... ⏳"): raw=scrape_linkedin_profile(url) thread_id = existing_thread_id st.session_state["chat_mode"] = "new" st.session_state["thread_id"] = thread_id st.session_state.state = initialize_state(raw) st.session_state.state["profile_url"] = normalize_url(url) st.session_state.state["messages"] = [] st.rerun() st.stop() else: with st.spinner("Fetching and processing profile... ⏳"): raw=scrape_linkedin_profile(url) thread_id = get_next_thread_id(checkpointer) st.session_state["thread_id"] = thread_id st.session_state["chat_mode"] = "new" st.session_state.state = initialize_state(raw) st.session_state.state["profile_url"] = normalize_url(url) st.session_state.state["messages"] = [] st.rerun() # --- Main chat UI (only after chat_mode is set) --- state = st.session_state.state thread_id = st.session_state.get("thread_id") st.subheader("πŸ’¬ Chat with your AI Assistant") messages = state.get("messages", []) chat_container = st.container() with chat_container: st.markdown( """ """, unsafe_allow_html=True, ) job_fit = state.get("job_fit") for msg in messages: if isinstance(msg, HumanMessage): st.markdown( f"""
πŸ§‘β€πŸ’» You {msg.content}
""", unsafe_allow_html=True, ) elif isinstance(msg, AIMessage): if not msg.content or not msg.content.strip(): continue st.markdown( f"""
AI
πŸ€– AI {msg.content}
""", unsafe_allow_html=True, ) elif isinstance(msg, ToolMessage): raw_content = msg.content or "(no content)" try: parsed = json.loads(raw_content) except Exception: parsed = None if parsed and isinstance(parsed, dict): # --- Profile analysis format --- if all(k in parsed for k in ("strengths", "weaknesses", "suggestions")): strengths = parsed["strengths"] weaknesses = parsed["weaknesses"] suggestions = parsed["suggestions"] formatted_html = f"""

πŸ’ͺ Strengths

⚠️ Weaknesses

πŸ›  Suggestions to improve

""" st.markdown(f"""
Tool
πŸ“Š Profile Analysis {formatted_html}
""", unsafe_allow_html=True) # --- Job fit format --- elif "match_score" in parsed: percent = parsed["match_score"] suggestions = parsed.get("suggestions", []) missing = parsed.get("missing_skills", []) target_role = parsed.get('target_role', 'unspecified') state["target_role"]=target_role suggestions_html = "
".join(f"β€’ {s}" for s in suggestions) missing_html = "
".join(f"β€’ {s}" for s in missing) st.markdown(f"""
Tool
πŸ“Š Job Fit 🎯 Target Role: {target_role}
{percent}%
Missing Skills:
{missing_html}

Suggestions:
{suggestions_html}
""", unsafe_allow_html=True) # --- Section text format --- elif "result" in parsed: text = parsed["result"] st.markdown(f"""
Tool
πŸ“„ Section Content {text}
""", unsafe_allow_html=True) else: st.markdown( f"""
⚠️ Unknown {getattr(msg, 'content', str(msg))}
""", unsafe_allow_html=True, ) st.markdown('
', unsafe_allow_html=True) st.markdown("---") user_input = st.chat_input( placeholder="Ask about your LinkedIn profile, e.g., 'Analyze my profile, How do I fit for AI role?, How is my about section?, Enhance my headline,etc...'" ) if user_input and user_input.strip(): state.setdefault("messages", []).append(HumanMessage(content=user_input.strip())) validate_state(state) thread_id = st.session_state.get("thread_id") config = {"configurable": {"thread_id": thread_id}} with st.spinner("Processing your request..."): st.session_state.state = app_graph.invoke(state, config) st.rerun()