from typing import List, Dict, Any from urllib.parse import urlparse # ========== 3. PROFILE PREPROCESSING HELPERS ========== def normalize_url(url): return url.strip().rstrip('/') def summarize_skills(skills: List[Dict]) -> str: return ', '.join([s.get('title', '') for s in skills if s.get('title')]) def summarize_projects(projects: List[Dict]) -> str: summaries = [] for p in projects: title = p.get('title', '') desc = '' if p.get('subComponents'): for comp in p['subComponents']: for d in comp.get('description', []): if d.get('type') == 'textComponent': desc += d.get('text', '') + ' ' summaries.append(f"{title}: {desc.strip()}") return '\n'.join(summaries) def summarize_educations(educations: List[Dict]) -> str: return ', '.join([ f"{e.get('title', '')} ({e.get('subtitle', '')}, {e.get('caption', '')})" for e in educations if e.get('title') ]) def summarize_certs(certs: List[Dict]) -> str: return ', '.join([ f"{c.get('title', '')} ({c.get('subtitle', '')}, {c.get('caption', '')})" for c in certs if c.get('title') ]) def summarize_test_scores(scores: List[Dict]) -> str: return ', '.join([ f"{s.get('title', '')} ({s.get('subtitle', '')})" for s in scores if s.get('title') ]) def summarize_generic(items: List[Dict], key='title') -> str: return ', '.join([item.get(key, '') for item in items if item.get(key)]) # === Preprocess raw profile into summarized profile === def preprocess_profile(raw_profile: Dict[str, Any]) -> Dict[str, str]: return { "FullName": raw_profile.get("fullName", ""), "profile_url": raw_profile.get("linkedinUrl",""), "Headline": raw_profile.get("headline", ""), "JobTitle": raw_profile.get("jobTitle", ""), "CompanyName": raw_profile.get("companyName", ""), "CompanyIndustry": raw_profile.get("companyIndustry", ""), "CurrentJobDuration": str(raw_profile.get("currentJobDuration", "")), "About": raw_profile.get("about", ""), "Experiences": summarize_generic(raw_profile.get("experiences", []), key='title'), "Skills": summarize_skills(raw_profile.get("skills", [])), "Educations": summarize_educations(raw_profile.get("educations", [])), "Certifications": summarize_certs(raw_profile.get("licenseAndCertificates", [])), "HonorsAndAwards": summarize_generic(raw_profile.get("honorsAndAwards", []), key='title'), "Verifications": summarize_generic(raw_profile.get("verifications", []), key='title'), "Highlights": summarize_generic(raw_profile.get("highlights", []), key='title'), "Projects": summarize_projects(raw_profile.get("projects", [])), "Publications": summarize_generic(raw_profile.get("publications", []), key='title'), "Patents": summarize_generic(raw_profile.get("patents", []), key='title'), "Courses": summarize_generic(raw_profile.get("courses", []), key='title'), "TestScores": summarize_test_scores(raw_profile.get("testScores", [])) } # === Create & fill state === def initialize_state(raw_profile: Dict[str, Any]) -> Dict[str,Any]: """ Initializes the chatbot state used in LangGraph: - Keeps both raw and processed profile - Splits important sections for quick access - Initializes placeholders for tool outputs - Adds empty chat history for conversation context """ # Your preprocessing function that cleans / normalizes scraped profile profile = preprocess_profile(raw_profile) print(f"initializing url as {profile['profile_url']}") state: Dict[str, Any] = { "profile": profile, # Cleaned & normalized profile "profile_url": normalize_url(profile.get("profile_url","") or ""), # === Separate sections (make sure all are strings, never None) === "sections": { "about": profile.get("About", "") or "", "headline": profile.get("Headline", "") or "", "skills": profile.get("Skills", "") or "", "projects": profile.get("Projects", "") or "", "educations": profile.get("Educations", "") or "", "certifications": profile.get("Certifications", "") or "", "honors_and_awards": profile.get("HonorsAndAwards", "") or "", "experiences": profile.get("Experiences", "") or "", "publications": profile.get("Publications", "") or "", "patents": profile.get("Patents", "") or "", "courses": profile.get("Courses", "") or "", "test_scores": profile.get("TestScores", "") or "", "verifications": profile.get("Verifications", "") or "", "highlights": profile.get("Highlights", "") or "", "job_title": profile.get("JobTitle", "") or "", "company_name": profile.get("CompanyName", "") or "", "company_industry": profile.get("CompanyIndustry", "") or "", "current_job_duration": profile.get("CurrentJobDuration", "") or "", "full_name": profile.get("FullName", "") or "" }, # === Placeholders populated by tools === "enhanced_content": {}, # Populated by ContentGenerator tool "profile_analysis": None, # Can be None initially (Optional) "job_fit": None, # Can be None initially (Optional) "target_role": None, # Optional[str] "editing_section": None, # Optional[str] # === Chat history === # Pydantic expects list of dicts like {"role": "user", "content": "..."} "messages": [], "next_tool_name": None } return state