File size: 4,047 Bytes
030fde7 24ee0b7 c90e25b 24ee0b7 c90e25b 24ee0b7 817e0ff c90e25b 817e0ff c90e25b 24ee0b7 817e0ff 030fde7 b2a7d5a 030fde7 0a92d7a 030fde7 c90e25b b2a7d5a 140fca2 24ee0b7 140fca2 0a92d7a 24ee0b7 0a92d7a 24ee0b7 c90e25b 0a92d7a 140fca2 24ee0b7 b2a7d5a 24ee0b7 140fca2 24ee0b7 140fca2 030fde7 c90e25b 2927b93 24ee0b7 2927b93 030fde7 0a92d7a 030fde7 817e0ff 030fde7 24ee0b7 030fde7 817e0ff 030fde7 b2a7d5a 24ee0b7 030fde7 24ee0b7 817e0ff 24ee0b7 c90e25b 24ee0b7 817e0ff 24ee0b7 817e0ff 24ee0b7 030fde7 c90e25b 030fde7 817e0ff ff3e9d9 817e0ff ff3e9d9 030fde7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import gradio as gr
import chromadb
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import re
# --- 1. Load Models ---
print("Loading sentence-transformer model for retrieval...")
retriever_model = SentenceTransformer('all-MiniLM-L6-v2')
print("Retriever model loaded.")
# --- THIS IS THE UPDATED LINE ---
print("Loading generative model for answering (google/flan-t5-base)...")
# Using the balanced 'base' model for better performance and reliability.
generator_pipe = pipeline("text2text-generation", model="google/flan-t5-base", device=-1)
print("Generative model loaded.")
# --- END OF UPDATE ---
# --- 2. Setup ChromaDB ---
client = chromadb.Client()
try:
collection = client.create_collection("whatsapp_chat_v2")
print("ChromaDB collection created.")
# --- Data Loading and Cleaning ---
try:
print("Loading data from my_data.txt...")
with open('my_data.txt', 'r', encoding='utf-8') as f:
lines = [line.strip() for line in f if line.strip()]
message_pattern = re.compile(r'^\[?.*?\]?\s*.*?:\s*(.*)')
cleaned_documents = []
for line in lines:
match = message_pattern.match(line)
if match and match.group(1):
cleaned_documents.append(match.group(1).strip())
if not cleaned_documents:
print("ERROR: Could not extract any valid messages from my_data.txt.")
cleaned_documents = ["Error: The data file 'my_data.txt' could not be processed."]
else:
print(f"Successfully loaded and cleaned {len(cleaned_documents)} messages.")
documents = cleaned_documents
except FileNotFoundError:
print("Error: my_data.txt not found.")
documents = ["Error: my_data.txt not found. Please make sure the file is uploaded."]
# --- Batch Processing ---
batch_size = 5000
print("Starting to process and add documents in batches...")
for i in range(0, len(documents), batch_size):
end_i = min(i + batch_size, len(documents))
batch_docs = documents[i:end_i]
print(f"Processing batch of {len(batch_docs)} documents...")
batch_embeddings = retriever_model.encode(batch_docs)
batch_ids = [f"id_{j}" for j in range(i, end_i)]
collection.add(
embeddings=batch_embeddings.tolist(),
documents=batch_docs,
ids=batch_ids
)
print("All documents have been successfully added to ChromaDB.")
except ValueError:
collection = client.get_collection("whatsapp_chat_v2")
print("ChromaDB collection loaded.")
# --- 3. Define Chatbot Logic ---
def chatbot_response(message, history):
query_embedding = retriever_model.encode([message]).tolist()
results = collection.query(
query_embeddings=query_embedding,
n_results=5 # Using 5 results is a good balance for the base model
)
retrieved_documents = results['documents'][0]
if not retrieved_documents or "Error:" in retrieved_documents[0]:
return "I'm sorry, I couldn't find any relevant information in the chat history. 🤔"
context = "\n- ".join(retrieved_documents)
prompt = f"""
Based on the following excerpts from a WhatsApp chat, provide a helpful and accurate answer to the user's question.
Chat Context:
- {context}
Question:
{message}
Answer:
"""
generated_text = generator_pipe(prompt, max_length=150, num_beams=5, early_stopping=True)
response = generated_text[0]['generated_text']
return response
# --- 4. Create the Gradio Interface ---
iface = gr.ChatInterface(
fn=chatbot_response,
title="WhatsApp Chat Bot ⚡️",
description="Ask me anything about this WhatsApp chat history. (Powered by flan-t5-base)",
theme="soft",
examples=["What was the final decision on the project deadline?", "Summarize the conversation about the event."],
cache_examples=False
)
# Launch the app
iface.launch() |