File size: 5,931 Bytes
5318b09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from typing import List, Dict, Any
from urllib.parse import urlparse
# ========== 3. PROFILE PREPROCESSING HELPERS ==========
def normalize_url(url):
    return url.strip().rstrip('/')

def summarize_skills(skills: List[Dict]) -> str:
    return ', '.join([s.get('title', '') for s in skills if s.get('title')])

def summarize_projects(projects: List[Dict]) -> str:
    summaries = []
    for p in projects:
        title = p.get('title', '')
        desc = ''
        if p.get('subComponents'):
            for comp in p['subComponents']:
                for d in comp.get('description', []):
                    if d.get('type') == 'textComponent':
                        desc += d.get('text', '') + ' '
        summaries.append(f"{title}: {desc.strip()}")
    return '\n'.join(summaries)

def summarize_educations(educations: List[Dict]) -> str:
    return ', '.join([
        f"{e.get('title', '')} ({e.get('subtitle', '')}, {e.get('caption', '')})"
        for e in educations if e.get('title')
    ])

def summarize_certs(certs: List[Dict]) -> str:
    return ', '.join([
        f"{c.get('title', '')} ({c.get('subtitle', '')}, {c.get('caption', '')})"
        for c in certs if c.get('title')
    ])

def summarize_test_scores(scores: List[Dict]) -> str:
    return ', '.join([
        f"{s.get('title', '')} ({s.get('subtitle', '')})"
        for s in scores if s.get('title')
    ])

def summarize_generic(items: List[Dict], key='title') -> str:
    return ', '.join([item.get(key, '') for item in items if item.get(key)])


# === Preprocess raw profile into summarized profile ===
def preprocess_profile(raw_profile: Dict[str, Any]) -> Dict[str, str]:
    return {
        "FullName": raw_profile.get("fullName", ""),
        "profile_url": raw_profile.get("linkedinUrl",""),
        "Headline": raw_profile.get("headline", ""),
        "JobTitle": raw_profile.get("jobTitle", ""),
        "CompanyName": raw_profile.get("companyName", ""),
        "CompanyIndustry": raw_profile.get("companyIndustry", ""),
        "CurrentJobDuration": str(raw_profile.get("currentJobDuration", "")),
        "About": raw_profile.get("about", ""),
        "Experiences": summarize_generic(raw_profile.get("experiences", []), key='title'),
        "Skills": summarize_skills(raw_profile.get("skills", [])),
        "Educations": summarize_educations(raw_profile.get("educations", [])),
        "Certifications": summarize_certs(raw_profile.get("licenseAndCertificates", [])),
        "HonorsAndAwards": summarize_generic(raw_profile.get("honorsAndAwards", []), key='title'),
        "Verifications": summarize_generic(raw_profile.get("verifications", []), key='title'),
        "Highlights": summarize_generic(raw_profile.get("highlights", []), key='title'),
        "Projects": summarize_projects(raw_profile.get("projects", [])),
        "Publications": summarize_generic(raw_profile.get("publications", []), key='title'),
        "Patents": summarize_generic(raw_profile.get("patents", []), key='title'),
        "Courses": summarize_generic(raw_profile.get("courses", []), key='title'),
        "TestScores": summarize_test_scores(raw_profile.get("testScores", []))
    }

# === Create & fill state ===


def initialize_state(raw_profile: Dict[str, Any]) -> Dict[str,Any]:
    """

    Initializes the chatbot state used in LangGraph:

    - Keeps both raw and processed profile

    - Splits important sections for quick access

    - Initializes placeholders for tool outputs

    - Adds empty chat history for conversation context

    """
    # Your preprocessing function that cleans / normalizes scraped profile
    profile = preprocess_profile(raw_profile)
    print(f"initializing url as {profile['profile_url']}")

    state: Dict[str, Any] = {
        "profile": profile,             # Cleaned & normalized profile
        "profile_url": normalize_url(profile.get("profile_url","") or ""),

        # === Separate sections (make sure all are strings, never None) ===
        "sections": {
            "about": profile.get("About", "") or "",
            "headline": profile.get("Headline", "") or "",
            "skills": profile.get("Skills", "") or "",
            "projects": profile.get("Projects", "") or "",
            "educations": profile.get("Educations", "") or "",
            "certifications": profile.get("Certifications", "") or "",
            "honors_and_awards": profile.get("HonorsAndAwards", "") or "",
            "experiences": profile.get("Experiences", "") or "",
            "publications": profile.get("Publications", "") or "",
            "patents": profile.get("Patents", "") or "",
            "courses": profile.get("Courses", "") or "",
            "test_scores": profile.get("TestScores", "") or "",
            "verifications": profile.get("Verifications", "") or "",
            "highlights": profile.get("Highlights", "") or "",
            "job_title": profile.get("JobTitle", "") or "",
            "company_name": profile.get("CompanyName", "") or "",
            "company_industry": profile.get("CompanyIndustry", "") or "",
            "current_job_duration": profile.get("CurrentJobDuration", "") or "",
            "full_name": profile.get("FullName", "") or ""
        },

        # === Placeholders populated by tools ===
        "enhanced_content": {},        # Populated by ContentGenerator tool
        "profile_analysis": None,      # Can be None initially (Optional)
        "job_fit": None,               # Can be None initially (Optional)
        "target_role": None,           # Optional[str]
        "editing_section": None,       # Optional[str]

        # === Chat history ===
        # Pydantic expects list of dicts like {"role": "user", "content": "..."}
        "messages": [],
        "next_tool_name": None
    }
    

    return state