import re from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from langchain.memory import ConversationBufferMemory import torch, unsloth, triton # ─── LOAD MODEL & TOKENIZER ───────────────────────────────────────────── # Adjust paths or HF repo as needed FINETUNED_DIR = "/content/drive/MyDrive/bitext-qlora-tinyllama" bnb_cfg = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.bfloat16 ) tokenizer = AutoTokenizer.from_pretrained(FINETUNED_DIR, use_fast=False) tokenizer.pad_token_id = tokenizer.eos_token_id tokenizer.padding_side = "left" tokenizer.truncation_side = "right" model = AutoModelForCausalLM.from_pretrained( FINETUNED_DIR, quantization_config=bnb_cfg, device_map="auto", trust_remote_code=True ) # ─── MEMORY & PIPELINE ───────────────────────────────────────────────── memory = ConversationBufferMemory(memory_key="user_lines", human_prefix="User", ai_prefix="Assistant", return_messages=False) stored_order = None pending_intent = None chat_pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, trust_remote_code=True, return_full_text=False ) # ─── HELPERS & HANDLERS ──────────────────────────────────────────────── order_re = re.compile(r"#(\\d{1,10})") def extract_order(text): m = order_re.search(text) return m.group(1) if m else None def handle_status(o): return f"Order #{o} is in transit and should arrive in 3–5 business days." def handle_eta(o): return (f"Delivery for order #{o} typically takes 3–5 days; " f"you can track it at https://track.example.com/{o}") def handle_track(o): return f"Track order #{o} here: https://track.example.com/{o}" def handle_link(o): return f"Here’s the latest tracking link for order #{o}: https://track.example.com/{o}" def handle_return_policy(_=None): return ("Our return policy allows returns of unused items in their original packaging " "within 30 days of receipt. Would you like me to connect you with a human agent?") def handle_gratitude(_=None): return "You’re welcome! Is there anything else I can help with?" def handle_escalation(_=None): return "I’m sorry, I don’t have that information. Would you like me to connect you with a human agent?" # ─── MAIN CHAT FUNCTION ──────────────────────────────────────────────── def chat_with_memory(user_input: str) -> str: global stored_order, pending_intent memory.save_context({"input": user_input}, {"output": ""}) new_o = extract_order(user_input) if new_o: stored_order = new_o if pending_intent in ("status","eta","track","link"): fn = {"status":handle_status,"eta":handle_eta, "track":handle_track,"link":handle_link}[pending_intent] reply = fn(stored_order) pending_intent = None memory.save_context({"input": user_input}, {"output": reply}) return reply ui = user_input.lower().strip() if any(tok in ui for tok in ["thank you","thanks","thx"]): reply = handle_gratitude() elif "return" in ui: reply = handle_return_policy() elif any(k in ui for k in ["status","where is my order","check status"]): intent = "status" elif any(k in ui for k in ["how long","eta","delivery time"]): intent = "eta" elif any(k in ui for k in ["how can i track","track my order","where is my package"]): intent = "track" elif "tracking link" in ui or "resend" in ui: intent = "link" else: intent = "fallback" if intent in ("status","eta","track","link"): if not stored_order: pending_intent = intent reply = "Sure—what’s your order number (e.g., #12345)?" else: reply = {"status":handle_status,"eta":handle_eta, "track":handle_track,"link":handle_link}[intent](stored_order) else: reply = handle_escalation() memory.save_context({"input": user_input}, {"output": reply}) return reply