Spaces:
Sleeping
Sleeping
from typing import List, Dict, Any | |
from urllib.parse import urlparse | |
# ========== 3. PROFILE PREPROCESSING HELPERS ========== | |
def normalize_url(url): | |
return url.strip().rstrip('/') | |
def summarize_skills(skills: List[Dict]) -> str: | |
return ', '.join([s.get('title', '') for s in skills if s.get('title')]) | |
def summarize_projects(projects: List[Dict]) -> str: | |
summaries = [] | |
for p in projects: | |
title = p.get('title', '') | |
desc = '' | |
if p.get('subComponents'): | |
for comp in p['subComponents']: | |
for d in comp.get('description', []): | |
if d.get('type') == 'textComponent': | |
desc += d.get('text', '') + ' ' | |
summaries.append(f"{title}: {desc.strip()}") | |
return '\n'.join(summaries) | |
def summarize_educations(educations: List[Dict]) -> str: | |
return ', '.join([ | |
f"{e.get('title', '')} ({e.get('subtitle', '')}, {e.get('caption', '')})" | |
for e in educations if e.get('title') | |
]) | |
def summarize_certs(certs: List[Dict]) -> str: | |
return ', '.join([ | |
f"{c.get('title', '')} ({c.get('subtitle', '')}, {c.get('caption', '')})" | |
for c in certs if c.get('title') | |
]) | |
def summarize_test_scores(scores: List[Dict]) -> str: | |
return ', '.join([ | |
f"{s.get('title', '')} ({s.get('subtitle', '')})" | |
for s in scores if s.get('title') | |
]) | |
def summarize_generic(items: List[Dict], key='title') -> str: | |
return ', '.join([item.get(key, '') for item in items if item.get(key)]) | |
# === Preprocess raw profile into summarized profile === | |
def preprocess_profile(raw_profile: Dict[str, Any]) -> Dict[str, str]: | |
return { | |
"FullName": raw_profile.get("fullName", ""), | |
"profile_url": raw_profile.get("linkedinUrl",""), | |
"Headline": raw_profile.get("headline", ""), | |
"JobTitle": raw_profile.get("jobTitle", ""), | |
"CompanyName": raw_profile.get("companyName", ""), | |
"CompanyIndustry": raw_profile.get("companyIndustry", ""), | |
"CurrentJobDuration": str(raw_profile.get("currentJobDuration", "")), | |
"About": raw_profile.get("about", ""), | |
"Experiences": summarize_generic(raw_profile.get("experiences", []), key='title'), | |
"Skills": summarize_skills(raw_profile.get("skills", [])), | |
"Educations": summarize_educations(raw_profile.get("educations", [])), | |
"Certifications": summarize_certs(raw_profile.get("licenseAndCertificates", [])), | |
"HonorsAndAwards": summarize_generic(raw_profile.get("honorsAndAwards", []), key='title'), | |
"Verifications": summarize_generic(raw_profile.get("verifications", []), key='title'), | |
"Highlights": summarize_generic(raw_profile.get("highlights", []), key='title'), | |
"Projects": summarize_projects(raw_profile.get("projects", [])), | |
"Publications": summarize_generic(raw_profile.get("publications", []), key='title'), | |
"Patents": summarize_generic(raw_profile.get("patents", []), key='title'), | |
"Courses": summarize_generic(raw_profile.get("courses", []), key='title'), | |
"TestScores": summarize_test_scores(raw_profile.get("testScores", [])) | |
} | |
# === Create & fill state === | |
def initialize_state(raw_profile: Dict[str, Any]) -> Dict[str,Any]: | |
""" | |
Initializes the chatbot state used in LangGraph: | |
- Keeps both raw and processed profile | |
- Splits important sections for quick access | |
- Initializes placeholders for tool outputs | |
- Adds empty chat history for conversation context | |
""" | |
# Your preprocessing function that cleans / normalizes scraped profile | |
profile = preprocess_profile(raw_profile) | |
print(f"initializing url as {profile['profile_url']}") | |
state: Dict[str, Any] = { | |
"profile": profile, # Cleaned & normalized profile | |
"profile_url": normalize_url(profile.get("profile_url","") or ""), | |
# === Separate sections (make sure all are strings, never None) === | |
"sections": { | |
"about": profile.get("About", "") or "", | |
"headline": profile.get("Headline", "") or "", | |
"skills": profile.get("Skills", "") or "", | |
"projects": profile.get("Projects", "") or "", | |
"educations": profile.get("Educations", "") or "", | |
"certifications": profile.get("Certifications", "") or "", | |
"honors_and_awards": profile.get("HonorsAndAwards", "") or "", | |
"experiences": profile.get("Experiences", "") or "", | |
"publications": profile.get("Publications", "") or "", | |
"patents": profile.get("Patents", "") or "", | |
"courses": profile.get("Courses", "") or "", | |
"test_scores": profile.get("TestScores", "") or "", | |
"verifications": profile.get("Verifications", "") or "", | |
"highlights": profile.get("Highlights", "") or "", | |
"job_title": profile.get("JobTitle", "") or "", | |
"company_name": profile.get("CompanyName", "") or "", | |
"company_industry": profile.get("CompanyIndustry", "") or "", | |
"current_job_duration": profile.get("CurrentJobDuration", "") or "", | |
"full_name": profile.get("FullName", "") or "" | |
}, | |
# === Placeholders populated by tools === | |
"enhanced_content": {}, # Populated by ContentGenerator tool | |
"profile_analysis": None, # Can be None initially (Optional) | |
"job_fit": None, # Can be None initially (Optional) | |
"target_role": None, # Optional[str] | |
"editing_section": None, # Optional[str] | |
# === Chat history === | |
# Pydantic expects list of dicts like {"role": "user", "content": "..."} | |
"messages": [], | |
"next_tool_name": None | |
} | |
return state | |