Spaces:
Sleeping
Sleeping
| import time | |
| import gradio as gr | |
| from datasets import load_dataset, Dataset | |
| from huggingface_hub import hf_hub_download | |
| from sentence_transformers import SentenceTransformer, util | |
| import torch | |
| # --------------------------- | |
| # CONFIGURATION | |
| # --------------------------- | |
| HF_TOKEN = "<YOUR_HF_TOKEN>" # set your HF token | |
| DATASET_NAME = "guardian-ai-qna" | |
| MAX_QUESTIONS = 5 # max questions per TIME_WINDOW | |
| TIME_WINDOW = 3600 # 1 hour in seconds | |
| EMBED_MODEL = "all-MiniLM-L6-v2" # small but effective embedding model | |
| # --------------------------- | |
| # LOAD OR CREATE DATASET | |
| # --------------------------- | |
| try: | |
| dataset = load_dataset(DATASET_NAME, use_auth_token=HF_TOKEN) | |
| dataset = dataset["train"] | |
| except: | |
| dataset = Dataset.from_dict({"question": [], "answer": []}) | |
| # --------------------------- | |
| # EMBEDDING MODEL | |
| # --------------------------- | |
| embedder = SentenceTransformer(EMBED_MODEL) | |
| # Precompute embeddings for existing Q&A | |
| if len(dataset) > 0: | |
| dataset_embeddings = embedder.encode(dataset["question"], convert_to_tensor=True) | |
| else: | |
| dataset_embeddings = torch.empty((0, embedder.get_sentence_embedding_dimension())) | |
| # --------------------------- | |
| # USER RATE LIMITING | |
| # --------------------------- | |
| user_limits = {} | |
| def check_rate_limit(session_id): | |
| current_time = time.time() | |
| if session_id not in user_limits: | |
| user_limits[session_id] = {"count": 0, "start_time": current_time} | |
| user_data = user_limits[session_id] | |
| if current_time - user_data["start_time"] > TIME_WINDOW: | |
| user_data["count"] = 0 | |
| user_data["start_time"] = current_time | |
| if user_data["count"] >= MAX_QUESTIONS: | |
| return False, f"You have reached the max of {MAX_QUESTIONS} questions. Please wait before asking more." | |
| user_data["count"] += 1 | |
| return True, None | |
| # --------------------------- | |
| # HELPER FUNCTIONS | |
| # --------------------------- | |
| def find_similar_answer(user_input): | |
| if len(dataset) == 0: | |
| return None | |
| query_emb = embedder.encode(user_input, convert_to_tensor=True) | |
| scores = util.cos_sim(query_emb, dataset_embeddings) | |
| top_idx = torch.argmax(scores) | |
| top_score = scores[0][top_idx].item() | |
| if top_score > 0.6: # threshold for similarity | |
| return dataset["answer"][top_idx] | |
| return None | |
| def save_qna(question, answer): | |
| global dataset, dataset_embeddings | |
| new_entry = Dataset.from_dict({"question": [question], "answer": [answer]}) | |
| dataset = Dataset.from_dict({ | |
| "question": dataset["question"] + new_entry["question"], | |
| "answer": dataset["answer"] + new_entry["answer"] | |
| }) | |
| # update embeddings incrementally | |
| new_emb = embedder.encode([question], convert_to_tensor=True) | |
| if len(dataset_embeddings) == 0: | |
| dataset_embeddings = new_emb | |
| else: | |
| dataset_embeddings = torch.vstack([dataset_embeddings, new_emb]) | |
| # save to HF dataset (push to hub) | |
| dataset.push_to_hub(DATASET_NAME, token=HF_TOKEN) | |
| # --------------------------- | |
| # MAIN CHAT FUNCTION | |
| # --------------------------- | |
| def chat(history, user_input, session_id="default"): | |
| # Rate limit check | |
| allowed, message = check_rate_limit(session_id) | |
| if not allowed: | |
| history.append(("System", message)) | |
| return history, history | |
| # Check existing similar Q&A | |
| response = find_similar_answer(user_input) | |
| if not response: | |
| # Fallback / simple generative response | |
| response = f"Guardian AI: Sorry, I don’t know the answer yet. I’m learning!" | |
| # Save new Q&A for incremental learning | |
| save_qna(user_input, response) | |
| # Update chat history | |
| history.append((user_input, response)) | |
| return history, history | |
| # --------------------------- | |
| # GRADIO INTERFACE | |
| # --------------------------- | |
| with gr.Blocks() as app: | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox(label="Your question") | |
| session_state = gr.State("default") # default session | |
| def user_submit(message, history, session_id): | |
| return chat(history, message, session_id) | |
| msg.submit(user_submit, inputs=[msg, chatbot, session_state], outputs=[chatbot, chatbot]) | |
| # Launch app | |
| app.launch(server_name="0.0.0.0", server_port=7860, share=True) | |