from typing import List, Dict, Any
from urllib.parse import urlparse
# ========== 3. PROFILE PREPROCESSING HELPERS ==========
def normalize_url(url):
    return url.strip().rstrip('/')

def summarize_skills(skills: List[Dict]) -> str:
    return ', '.join([s.get('title', '') for s in skills if s.get('title')])

def summarize_projects(projects: List[Dict]) -> str:
    summaries = []
    for p in projects:
        title = p.get('title', '')
        desc = ''
        if p.get('subComponents'):
            for comp in p['subComponents']:
                for d in comp.get('description', []):
                    if d.get('type') == 'textComponent':
                        desc += d.get('text', '') + ' '
        summaries.append(f"{title}: {desc.strip()}")
    return '\n'.join(summaries)

def summarize_educations(educations: List[Dict]) -> str:
    return ', '.join([
        f"{e.get('title', '')} ({e.get('subtitle', '')}, {e.get('caption', '')})"
        for e in educations if e.get('title')
    ])

def summarize_certs(certs: List[Dict]) -> str:
    return ', '.join([
        f"{c.get('title', '')} ({c.get('subtitle', '')}, {c.get('caption', '')})"
        for c in certs if c.get('title')
    ])

def summarize_test_scores(scores: List[Dict]) -> str:
    return ', '.join([
        f"{s.get('title', '')} ({s.get('subtitle', '')})"
        for s in scores if s.get('title')
    ])

def summarize_generic(items: List[Dict], key='title') -> str:
    return ', '.join([item.get(key, '') for item in items if item.get(key)])


# === Preprocess raw profile into summarized profile ===
def preprocess_profile(raw_profile: Dict[str, Any]) -> Dict[str, str]:
    return {
        "FullName": raw_profile.get("fullName", ""),
        "profile_url": raw_profile.get("linkedinUrl",""),
        "Headline": raw_profile.get("headline", ""),
        "JobTitle": raw_profile.get("jobTitle", ""),
        "CompanyName": raw_profile.get("companyName", ""),
        "CompanyIndustry": raw_profile.get("companyIndustry", ""),
        "CurrentJobDuration": str(raw_profile.get("currentJobDuration", "")),
        "About": raw_profile.get("about", ""),
        "Experiences": summarize_generic(raw_profile.get("experiences", []), key='title'),
        "Skills": summarize_skills(raw_profile.get("skills", [])),
        "Educations": summarize_educations(raw_profile.get("educations", [])),
        "Certifications": summarize_certs(raw_profile.get("licenseAndCertificates", [])),
        "HonorsAndAwards": summarize_generic(raw_profile.get("honorsAndAwards", []), key='title'),
        "Verifications": summarize_generic(raw_profile.get("verifications", []), key='title'),
        "Highlights": summarize_generic(raw_profile.get("highlights", []), key='title'),
        "Projects": summarize_projects(raw_profile.get("projects", [])),
        "Publications": summarize_generic(raw_profile.get("publications", []), key='title'),
        "Patents": summarize_generic(raw_profile.get("patents", []), key='title'),
        "Courses": summarize_generic(raw_profile.get("courses", []), key='title'),
        "TestScores": summarize_test_scores(raw_profile.get("testScores", []))
    }

# === Create & fill state ===


def initialize_state(raw_profile: Dict[str, Any]) -> Dict[str,Any]:
    """
    Initializes the chatbot state used in LangGraph:
    - Keeps both raw and processed profile
    - Splits important sections for quick access
    - Initializes placeholders for tool outputs
    - Adds empty chat history for conversation context
    """
    # Your preprocessing function that cleans / normalizes scraped profile
    profile = preprocess_profile(raw_profile)
    print(f"initializing url as {profile['profile_url']}")

    state: Dict[str, Any] = {
        "profile": profile,             # Cleaned & normalized profile
        "profile_url": normalize_url(profile.get("profile_url","") or ""),

        # === Separate sections (make sure all are strings, never None) ===
        "sections": {
            "about": profile.get("About", "") or "",
            "headline": profile.get("Headline", "") or "",
            "skills": profile.get("Skills", "") or "",
            "projects": profile.get("Projects", "") or "",
            "educations": profile.get("Educations", "") or "",
            "certifications": profile.get("Certifications", "") or "",
            "honors_and_awards": profile.get("HonorsAndAwards", "") or "",
            "experiences": profile.get("Experiences", "") or "",
            "publications": profile.get("Publications", "") or "",
            "patents": profile.get("Patents", "") or "",
            "courses": profile.get("Courses", "") or "",
            "test_scores": profile.get("TestScores", "") or "",
            "verifications": profile.get("Verifications", "") or "",
            "highlights": profile.get("Highlights", "") or "",
            "job_title": profile.get("JobTitle", "") or "",
            "company_name": profile.get("CompanyName", "") or "",
            "company_industry": profile.get("CompanyIndustry", "") or "",
            "current_job_duration": profile.get("CurrentJobDuration", "") or "",
            "full_name": profile.get("FullName", "") or ""
        },

        # === Placeholders populated by tools ===
        "enhanced_content": {},        # Populated by ContentGenerator tool
        "profile_analysis": None,      # Can be None initially (Optional)
        "job_fit": None,               # Can be None initially (Optional)
        "target_role": None,           # Optional[str]
        "editing_section": None,       # Optional[str]

        # === Chat history ===
        # Pydantic expects list of dicts like {"role": "user", "content": "..."}
        "messages": [],
        "next_tool_name": None
    }
    

    return state