|
|
import openai |
|
|
import numpy as np |
|
|
import re |
|
|
from typing import List, Tuple |
|
|
from config import EMBED_MODEL |
|
|
|
|
|
def get_embedding(text: str) -> List[float]: |
|
|
"""Generate embedding for a given text.""" |
|
|
text_strip = text.replace("\n", " ").strip() |
|
|
response = openai.embeddings.create(input=[text_strip], model=EMBED_MODEL) |
|
|
return response.data[0].embedding |
|
|
|
|
|
def cosine_similarity(a: List[float], b: List[float]) -> float: |
|
|
"""Calculate cosine similarity between two vectors.""" |
|
|
a = np.array(a) |
|
|
b = np.array(b) |
|
|
if np.linalg.norm(a) == 0 or np.linalg.norm(b) == 0: |
|
|
return 0.0 |
|
|
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) |
|
|
|
|
|
def clean_time(time_str: str) -> str: |
|
|
"""Clean up time string.""" |
|
|
if not time_str: |
|
|
return "" |
|
|
|
|
|
time_match = re.search(r'(\d{1,2}):?(\d{0,2})\s*(AM|PM)', time_str, re.IGNORECASE) |
|
|
if time_match: |
|
|
hour = time_match.group(1) |
|
|
minute = time_match.group(2) or "00" |
|
|
ampm = time_match.group(3).upper() |
|
|
return f"{hour}:{minute} {ampm}" |
|
|
|
|
|
return time_str.strip() |
|
|
|
|
|
def find_top_k_matches(user_embedding, dataset, k=3): |
|
|
"""Find top k matching entries from a dataset.""" |
|
|
scored = [] |
|
|
for entry_id, text, emb in dataset: |
|
|
score = cosine_similarity(user_embedding, emb) |
|
|
scored.append((score, entry_id, text)) |
|
|
scored.sort(reverse=True) |
|
|
return scored[:k] |
|
|
|
|
|
def classify_intent(question: str) -> str: |
|
|
""" |
|
|
Classify the user's intent into: |
|
|
Mode A: Recommendation Mode (Workshops, Dates, Availability, Recommendations) |
|
|
Mode B: Front Desk Mode (Default - Everything else) |
|
|
""" |
|
|
prompt = f"""Classify the following user question into one of two modes: |
|
|
1. "Mode A - Recommendation Mode": Use this if the user is asking about workshops, specific dates, what's available this month, asking for recommendations, or career goals (like getting an agent). |
|
|
2. "Mode B - Front Desk Mode": Use this for broad introductory questions, kids classes, signing up, summit, instructor roles, auditing, online vs in-studio, general policies, or specific questions about existing classes. |
|
|
|
|
|
User Question: "{question}" |
|
|
|
|
|
Response must be exactly "Mode A" or "Mode B".""" |
|
|
|
|
|
try: |
|
|
response = openai.chat.completions.create( |
|
|
model="gpt-4o-mini", |
|
|
messages=[{"role": "user", "content": prompt}], |
|
|
temperature=0, |
|
|
max_tokens=5 |
|
|
) |
|
|
prediction = response.choices[0].message.content.strip() |
|
|
if "Mode A" in prediction: |
|
|
return "Mode A" |
|
|
return "Mode B" |
|
|
except Exception as e: |
|
|
print(f"Error in intent classification: {e}") |
|
|
return "Mode B" |
|
|
|
|
|
def should_include_email(question: str) -> bool: |
|
|
""" |
|
|
Determine if the contact email should be shown based on user intent. |
|
|
Allowed for: Payments, Refunds, Attendance issues, Account problems. |
|
|
""" |
|
|
from config import EMAIL_ONLY_KEYWORDS |
|
|
import re |
|
|
|
|
|
question_lower = question.lower() |
|
|
for word in EMAIL_ONLY_KEYWORDS: |
|
|
pattern = rf'\b{re.escape(word)}\b' |
|
|
if re.search(pattern, question_lower): |
|
|
return True |
|
|
|
|
|
return False |
|
|
|
|
|
def classify_user_type(question: str, history: List[dict] = None) -> str: |
|
|
""" |
|
|
Classify the user type into: |
|
|
- new_actor |
|
|
- experienced_actor |
|
|
- parent |
|
|
- current_student |
|
|
- unknown |
|
|
""" |
|
|
history_str = "" |
|
|
if history: |
|
|
history_str = "\nConversation context:\n" + "\n".join([f"{m['role']}: {m['content'][:100]}..." for m in history[-3:]]) |
|
|
|
|
|
prompt = f"""Classify the user into exactly one of these categories based on their question and context: |
|
|
1. "new_actor": Just starting out, has no experience, or is asking how to begin. |
|
|
2. "experienced_actor": Already has credits, mentions agents, looking for advanced workshops, or refers to their career progress. |
|
|
3. "parent": Asking on behalf of their child, mentions "my kid", "my son", "my daughter", "teens". |
|
|
4. "current_student": Refers to past/current classes at Get Scene, mentions a specific GSP membership, or asks about recurring student workshops. |
|
|
5. "unknown": Not enough information yet. |
|
|
|
|
|
User Question: "{question}"{history_str} |
|
|
|
|
|
Response must be exactly one of: new_actor, experienced_actor, parent, current_student, unknown.""" |
|
|
|
|
|
try: |
|
|
response = openai.chat.completions.create( |
|
|
model="gpt-4o-mini", |
|
|
messages=[{"role": "user", "content": prompt}], |
|
|
temperature=0, |
|
|
max_tokens=10 |
|
|
) |
|
|
prediction = response.choices[0].message.content.strip().lower() |
|
|
valid_types = ["new_actor", "experienced_actor", "parent", "current_student", "unknown"] |
|
|
for t in valid_types: |
|
|
if t in prediction: |
|
|
return t |
|
|
return "unknown" |
|
|
except Exception as e: |
|
|
print(f"Error in user type classification: {e}") |
|
|
return "unknown" |