import os import pandas as pd import requests from flask import Flask, request, jsonify from flask_cors import CORS from fuzzywuzzy import fuzz, process app = Flask(__name__) CORS(app) # --- Configuration --- # Get the Hugging Face API token from the Space's secrets HF_API_TOKEN = os.environ.get("HF_API_TOKEN") # The URL for the Sentence Similarity Inference API API_URL = "https://api-inference.huggingface.co/models/sentence-transformers/paraphrase-MiniLM-L6-v2" KNOWLEDGE_BASE_PATH = 'dataset.csv' # --- Load Data Once on Startup --- # This is lightweight and will not cause a crash try: df = pd.read_csv(KNOWLEDGE_BASE_PATH) df.dropna(subset=['questions', 'answers'], inplace=True) QUESTIONS_LIST = df['questions'].tolist() print("--- Knowledge base loaded successfully. ---") except FileNotFoundError: df = None QUESTIONS_LIST = [] print(f"FATAL: Knowledge base file not found at {KNOWLEDGE_BASE_PATH}") def get_greeting_response(text): greetings = { "hello": "Hello! 👋 How can I help you today?", "hi": "Hi there! How can I assist you?", "hey": "Hey! What can I do for you?", "how are you": "I'm just a bot, but I'm ready to help! 😊", "thank you": "You're welcome! Is there anything else I can help with?", "thanks": "You're welcome!", "bye": "Thank you for chatting. Goodbye! 👋", "goodbye": "Goodbye! Have a great day. 👋" } text_lower = text.lower() for greet, response in greetings.items(): if fuzz.partial_ratio(greet, text_lower) > 85: return response return None def get_bot_response(user_query): # First, check for simple greetings greeting = get_greeting_response(user_query) if greeting: return greeting # Then, check for a very close fuzzy match to avoid unnecessary API calls best_match, score = process.extractOne(user_query, QUESTIONS_LIST, scorer=fuzz.token_sort_ratio) if score > 85: idx = QUESTIONS_LIST.index(best_match) return df.iloc[idx]['answers'] # If no close match, use the powerful Sentence Similarity API if not HF_API_TOKEN: return "Error: Hugging Face API token is not configured." headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} payload = { "inputs": { "source_sentence": user_query, "sentences": QUESTIONS_LIST } } try: response = requests.post(API_URL, headers=headers, json=payload, timeout=20) response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx) scores = response.json() if scores and isinstance(scores, list): # Find the index of the highest score best_score_index = scores.index(max(scores)) # If the best score is above a certain threshold, return the answer if scores[best_score_index] > 0.5: return df.iloc[best_score_index]['answers'] except requests.exceptions.RequestException as e: print(f"API Request Error: {e}") return "Sorry, I'm having trouble reaching my knowledge circuits right now." return "I'm sorry, I don't have information on that topic. Could you try asking in a different way?" @app.route('/api/chat', methods=['POST']) def chat(): if df is None: return jsonify({'error': 'Service is unavailable due to a missing knowledge base.'}), 503 try: data = request.get_json() user_message = data.get('message') if not user_message: return jsonify({'error': 'No message provided'}), 400 bot_response = get_bot_response(user_message) return jsonify({'response': bot_response}) except Exception as e: print(f"Error in /api/chat: {e}") return jsonify({'error': 'An internal server error occurred'}), 500