File size: 3,498 Bytes
8e7f687
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# Message router extracted from app.py

import json
import re
from config.prompts import (
    ROUTER_SCHEMA, ROUTER_SYSTEM_PROMPT, WHY_HIRE_REGEX, 
    canonical_why_hire_pitch, CONTACT_COLLECTION_PROMPT
)
from config.settings import USE_CANONICAL_WHY_HIRE


class MessageRouter:
    """Handles message classification and routing logic"""
    
    def __init__(self, openai_client):
        self.openai = openai_client
    
    def classify(self, message: str) -> dict:
        """Classify user message using AI with regex fallback for email detection"""
        messages = [{"role": "system", "content": ROUTER_SYSTEM_PROMPT}]
        # Optionally prepend few-shots for stability:
        # messages = [{"role": "system", "content": system}, *fewshots]
        messages.append({"role": "user", "content": message})

        resp = self.openai.chat.completions.create(
            model="gemini-2.5-flash",
            messages=messages,
            response_format={
                "type": "json_schema",
                "json_schema": {"name": "router", "schema": ROUTER_SCHEMA}
            },
            temperature=0.0,
            top_p=1.0,
            max_tokens=200
        )

        try:
            parsed = json.loads(resp.choices[0].message.content)
            # Minimal defensive checks
            if not isinstance(parsed, dict) or "intent" not in parsed:
                raise ValueError("schema mismatch")
            
            # Hybrid approach: If AI missed email, catch with regex
            if parsed["intent"] != "contact_exchange":
                email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
                if re.search(email_pattern, message):
                    parsed["intent"] = "contact_exchange"
                    parsed["requires_contact"] = False
                    parsed["matched_phrases"].append("email_detected_by_regex")
            
            return parsed
        except Exception:
            # Safe, schema-conformant fallback
            return {
                "intent": "career",
                "why_hire": False,
                "requires_contact": False,
                "confidence": 0.0,
                "matched_phrases": []
            }
    
    def should_use_canonical_why_hire(self, message: str, why_hire_flag: bool, mode: str) -> bool:
        """Check if canonical pitch should be used"""
        if mode != "career":
            return False
        if WHY_HIRE_REGEX.search(message):
            return True
        if why_hire_flag:
            return True
        return False
    
    def get_response_for_route(self, message: str, route: dict, mode: str) -> str | None:
        """Get immediate response based on routing, or None to continue to chat"""
        intent = route.get("intent", "career")
        why_hire_flag = bool(route.get("why_hire"))
        requires_contact_flag = bool(route.get("requires_contact"))
        
        # Handle boundary cases
        if intent == "other":
            from config.settings import BOUNDARY_REPLY
            return BOUNDARY_REPLY
        
        # Handle contact collection for interested users
        if requires_contact_flag:
            return CONTACT_COLLECTION_PROMPT
        
        # Handle canonical "why hire" pitch
        if USE_CANONICAL_WHY_HIRE and self.should_use_canonical_why_hire(message, why_hire_flag, mode):
            return canonical_why_hire_pitch()
        
        # Continue to regular chat
        return None