Spaces:
Sleeping
Sleeping
File size: 5,931 Bytes
5318b09 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
from typing import List, Dict, Any
from urllib.parse import urlparse
# ========== 3. PROFILE PREPROCESSING HELPERS ==========
def normalize_url(url):
return url.strip().rstrip('/')
def summarize_skills(skills: List[Dict]) -> str:
return ', '.join([s.get('title', '') for s in skills if s.get('title')])
def summarize_projects(projects: List[Dict]) -> str:
summaries = []
for p in projects:
title = p.get('title', '')
desc = ''
if p.get('subComponents'):
for comp in p['subComponents']:
for d in comp.get('description', []):
if d.get('type') == 'textComponent':
desc += d.get('text', '') + ' '
summaries.append(f"{title}: {desc.strip()}")
return '\n'.join(summaries)
def summarize_educations(educations: List[Dict]) -> str:
return ', '.join([
f"{e.get('title', '')} ({e.get('subtitle', '')}, {e.get('caption', '')})"
for e in educations if e.get('title')
])
def summarize_certs(certs: List[Dict]) -> str:
return ', '.join([
f"{c.get('title', '')} ({c.get('subtitle', '')}, {c.get('caption', '')})"
for c in certs if c.get('title')
])
def summarize_test_scores(scores: List[Dict]) -> str:
return ', '.join([
f"{s.get('title', '')} ({s.get('subtitle', '')})"
for s in scores if s.get('title')
])
def summarize_generic(items: List[Dict], key='title') -> str:
return ', '.join([item.get(key, '') for item in items if item.get(key)])
# === Preprocess raw profile into summarized profile ===
def preprocess_profile(raw_profile: Dict[str, Any]) -> Dict[str, str]:
return {
"FullName": raw_profile.get("fullName", ""),
"profile_url": raw_profile.get("linkedinUrl",""),
"Headline": raw_profile.get("headline", ""),
"JobTitle": raw_profile.get("jobTitle", ""),
"CompanyName": raw_profile.get("companyName", ""),
"CompanyIndustry": raw_profile.get("companyIndustry", ""),
"CurrentJobDuration": str(raw_profile.get("currentJobDuration", "")),
"About": raw_profile.get("about", ""),
"Experiences": summarize_generic(raw_profile.get("experiences", []), key='title'),
"Skills": summarize_skills(raw_profile.get("skills", [])),
"Educations": summarize_educations(raw_profile.get("educations", [])),
"Certifications": summarize_certs(raw_profile.get("licenseAndCertificates", [])),
"HonorsAndAwards": summarize_generic(raw_profile.get("honorsAndAwards", []), key='title'),
"Verifications": summarize_generic(raw_profile.get("verifications", []), key='title'),
"Highlights": summarize_generic(raw_profile.get("highlights", []), key='title'),
"Projects": summarize_projects(raw_profile.get("projects", [])),
"Publications": summarize_generic(raw_profile.get("publications", []), key='title'),
"Patents": summarize_generic(raw_profile.get("patents", []), key='title'),
"Courses": summarize_generic(raw_profile.get("courses", []), key='title'),
"TestScores": summarize_test_scores(raw_profile.get("testScores", []))
}
# === Create & fill state ===
def initialize_state(raw_profile: Dict[str, Any]) -> Dict[str,Any]:
"""
Initializes the chatbot state used in LangGraph:
- Keeps both raw and processed profile
- Splits important sections for quick access
- Initializes placeholders for tool outputs
- Adds empty chat history for conversation context
"""
# Your preprocessing function that cleans / normalizes scraped profile
profile = preprocess_profile(raw_profile)
print(f"initializing url as {profile['profile_url']}")
state: Dict[str, Any] = {
"profile": profile, # Cleaned & normalized profile
"profile_url": normalize_url(profile.get("profile_url","") or ""),
# === Separate sections (make sure all are strings, never None) ===
"sections": {
"about": profile.get("About", "") or "",
"headline": profile.get("Headline", "") or "",
"skills": profile.get("Skills", "") or "",
"projects": profile.get("Projects", "") or "",
"educations": profile.get("Educations", "") or "",
"certifications": profile.get("Certifications", "") or "",
"honors_and_awards": profile.get("HonorsAndAwards", "") or "",
"experiences": profile.get("Experiences", "") or "",
"publications": profile.get("Publications", "") or "",
"patents": profile.get("Patents", "") or "",
"courses": profile.get("Courses", "") or "",
"test_scores": profile.get("TestScores", "") or "",
"verifications": profile.get("Verifications", "") or "",
"highlights": profile.get("Highlights", "") or "",
"job_title": profile.get("JobTitle", "") or "",
"company_name": profile.get("CompanyName", "") or "",
"company_industry": profile.get("CompanyIndustry", "") or "",
"current_job_duration": profile.get("CurrentJobDuration", "") or "",
"full_name": profile.get("FullName", "") or ""
},
# === Placeholders populated by tools ===
"enhanced_content": {}, # Populated by ContentGenerator tool
"profile_analysis": None, # Can be None initially (Optional)
"job_fit": None, # Can be None initially (Optional)
"target_role": None, # Optional[str]
"editing_section": None, # Optional[str]
# === Chat history ===
# Pydantic expects list of dicts like {"role": "user", "content": "..."}
"messages": [],
"next_tool_name": None
}
return state
|