Spaces:
Sleeping
Sleeping
import os | |
import pandas as pd | |
import requests | |
from flask import Flask, request, jsonify | |
from flask_cors import CORS | |
from fuzzywuzzy import fuzz, process | |
app = Flask(__name__) | |
CORS(app) | |
# --- Configuration --- | |
# Get the Hugging Face API token from the Space's secrets | |
HF_API_TOKEN = os.environ.get("HF_API_TOKEN") | |
# The URL for the Sentence Similarity Inference API | |
API_URL = "https://api-inference.huggingface.co/models/sentence-transformers/paraphrase-MiniLM-L6-v2" | |
KNOWLEDGE_BASE_PATH = 'dataset.csv' | |
# --- Load Data Once on Startup --- | |
# This is lightweight and will not cause a crash | |
try: | |
df = pd.read_csv(KNOWLEDGE_BASE_PATH) | |
df.dropna(subset=['questions', 'answers'], inplace=True) | |
QUESTIONS_LIST = df['questions'].tolist() | |
print("--- Knowledge base loaded successfully. ---") | |
except FileNotFoundError: | |
df = None | |
QUESTIONS_LIST = [] | |
print(f"FATAL: Knowledge base file not found at {KNOWLEDGE_BASE_PATH}") | |
def get_greeting_response(text): | |
greetings = { | |
"hello": "Hello! π How can I help you today?", "hi": "Hi there! How can I assist you?", | |
"hey": "Hey! What can I do for you?", "how are you": "I'm just a bot, but I'm ready to help! π", | |
"thank you": "You're welcome! Is there anything else I can help with?", "thanks": "You're welcome!", | |
"bye": "Thank you for chatting. Goodbye! π", "goodbye": "Goodbye! Have a great day. π" | |
} | |
text_lower = text.lower() | |
for greet, response in greetings.items(): | |
if fuzz.partial_ratio(greet, text_lower) > 85: | |
return response | |
return None | |
def get_bot_response(user_query): | |
# First, check for simple greetings | |
greeting = get_greeting_response(user_query) | |
if greeting: | |
return greeting | |
# Then, check for a very close fuzzy match to avoid unnecessary API calls | |
best_match, score = process.extractOne(user_query, QUESTIONS_LIST, scorer=fuzz.token_sort_ratio) | |
if score > 85: | |
idx = QUESTIONS_LIST.index(best_match) | |
return df.iloc[idx]['answers'] | |
# If no close match, use the powerful Sentence Similarity API | |
if not HF_API_TOKEN: | |
return "Error: Hugging Face API token is not configured." | |
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} | |
payload = { | |
"inputs": { | |
"source_sentence": user_query, | |
"sentences": QUESTIONS_LIST | |
} | |
} | |
try: | |
response = requests.post(API_URL, headers=headers, json=payload, timeout=20) | |
response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx) | |
scores = response.json() | |
if scores and isinstance(scores, list): | |
# Find the index of the highest score | |
best_score_index = scores.index(max(scores)) | |
# If the best score is above a certain threshold, return the answer | |
if scores[best_score_index] > 0.5: | |
return df.iloc[best_score_index]['answers'] | |
except requests.exceptions.RequestException as e: | |
print(f"API Request Error: {e}") | |
return "Sorry, I'm having trouble reaching my knowledge circuits right now." | |
return "I'm sorry, I don't have information on that topic. Could you try asking in a different way?" | |
def chat(): | |
if df is None: | |
return jsonify({'error': 'Service is unavailable due to a missing knowledge base.'}), 503 | |
try: | |
data = request.get_json() | |
user_message = data.get('message') | |
if not user_message: | |
return jsonify({'error': 'No message provided'}), 400 | |
bot_response = get_bot_response(user_message) | |
return jsonify({'response': bot_response}) | |
except Exception as e: | |
print(f"Error in /api/chat: {e}") | |
return jsonify({'error': 'An internal server error occurred'}), 500 |