Spaces:

pradeepsengarr
/

Custom_Rag_Bot

Running

App Files Files Community

pradeepsengarr commited on Jun 3

Commit

14b7206

verified ·

1 Parent(s): 2b518ec

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -33

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import gradio as gr
-import fitz  # PyMuPDF
 import faiss
 import numpy as np
 from sentence_transformers import SentenceTransformer
@@ -8,31 +8,21 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from huggingface_hub import login
-# Load Hugging Face Token from environment
 hf_token = os.environ.get("HUGGINGFACE_TOKEN")
 if not hf_token:
-    raise ValueError("⚠️ Please set the HUGGINGFACE_TOKEN environment variable.")
 login(token=hf_token)
-# Load embedding model
 embed_model = SentenceTransformer("BAAI/bge-base-en-v1.5")
-# Load small, fast LLM (great for CPU)
 model_id = "tiiuae/falcon-rw-1b"
 tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    device_map={"": "cpu"},
-    torch_dtype="auto",
-    token=hf_token
-)
 llm = pipeline("text-generation", model=model, tokenizer=tokenizer)
-# Globals
 index = None
 doc_texts = []
-# Extract text from PDF or TXT (handle Hugging Face Spaces file upload)
 def extract_text(file):
     text = ""
     file_path = file.name if hasattr(file, 'name') else file
@@ -44,14 +34,13 @@ def extract_text(file):
         with open(file_path, "r", encoding="utf-8") as f:
             text = f.read()
     else:
-        return "❌ Unsupported file type."
     return text
-# Process file and build FAISS index
 def process_file(file):
     global index, doc_texts
     text = extract_text(file)
-    if text.startswith("❌"):
         return text
     splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
@@ -62,40 +51,39 @@ def process_file(file):
     index = faiss.IndexFlatL2(dim)
     index.add(embeddings)
-    return "✅ File processed successfully! Ask your question below."
-# Generate answer
 def generate_answer(question):
     global index, doc_texts
     if index is None or not doc_texts:
-        return "⚠️ Please upload and process a document first."
     question_emb = embed_model.encode([question], convert_to_numpy=True)
     _, I = index.search(question_emb, k=3)
     context = "\n".join([doc_texts[i] for i in I[0]])
-    prompt = f"""[System: You are a helpful assistant. Answer based on the context.]
-Context:
-{context}
-Question: {question}
-Answer:"""
     result = llm(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
     return result[0]["generated_text"].split("Answer:")[-1].strip()
-# Gradio UI
-with gr.Blocks(title="RAG Chatbot (CPU-Optimized)") as demo:
-    gr.Markdown("## 📚 Upload PDF/TXT and Ask Questions (Fast CPU RAG Bot)")
     with gr.Row():
-        file_input = gr.File(label="📁 Upload PDF or TXT", file_types=[".pdf", ".txt"])
-        upload_output = gr.Textbox(label="Upload Status", interactive=False)
     with gr.Row():
-        question_input = gr.Textbox(label="❓ Ask a Question", placeholder="E.g. What is the document about?")
-        answer_output = gr.Textbox(label="💬 Answer", interactive=False)
     file_input.change(fn=process_file, inputs=file_input, outputs=upload_output)
     question_input.submit(fn=generate_answer, inputs=question_input, outputs=answer_output)

 import os
 import gradio as gr
+import fitz
 import faiss
 import numpy as np
 from sentence_transformers import SentenceTransformer
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from huggingface_hub import login
 hf_token = os.environ.get("HUGGINGFACE_TOKEN")
 if not hf_token:
+    raise ValueError("Hugging Face token not found.")
 login(token=hf_token)
 embed_model = SentenceTransformer("BAAI/bge-base-en-v1.5")
 model_id = "tiiuae/falcon-rw-1b"
 tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
+model = AutoModelForCausalLM.from_pretrained(model_id, device_map={"": "cpu"}, torch_dtype="auto", token=hf_token)
 llm = pipeline("text-generation", model=model, tokenizer=tokenizer)
 index = None
 doc_texts = []
 def extract_text(file):
     text = ""
     file_path = file.name if hasattr(file, 'name') else file
         with open(file_path, "r", encoding="utf-8") as f:
             text = f.read()
     else:
+        return "Unsupported file type."
     return text
 def process_file(file):
     global index, doc_texts
     text = extract_text(file)
+    if text.startswith("Unsupported"):
         return text
     splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
     index = faiss.IndexFlatL2(dim)
     index.add(embeddings)
+    return "Document processed successfully. You can now ask questions."
 def generate_answer(question):
     global index, doc_texts
     if index is None or not doc_texts:
+        return "Please upload and process a document first."
     question_emb = embed_model.encode([question], convert_to_numpy=True)
     _, I = index.search(question_emb, k=3)
     context = "\n".join([doc_texts[i] for i in I[0]])
+    prompt = (
+        f"You are an intelligent assistant. Use the context below to answer the user's question clearly, "
+        f"politely, and completely. Do not just extract text — give a helpful response.\n\n"
+        f"Context:\n{context}\n\n"
+        f"User's Question: {question}\n\n"
+        f"Answer:"
+    )
     result = llm(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
     return result[0]["generated_text"].split("Answer:")[-1].strip()
+with gr.Blocks(title="Document Q&A Assistant") as demo:
+    gr.Markdown("<h1 style='text-align: center;'>📄 Document AI Assistant</h1>")
+    gr.Markdown("Upload a PDF or TXT file, and ask questions about its content. The assistant will provide answers using the document as context.")
     with gr.Row():
+        file_input = gr.File(label="Upload PDF or TXT", file_types=[".pdf", ".txt"])
+        upload_output = gr.Textbox(label="Upload Status")
     with gr.Row():
+        question_input = gr.Textbox(label="Ask a Question", placeholder="What is this document about?")
+        answer_output = gr.Textbox(label="Answer")
     file_input.change(fn=process_file, inputs=file_input, outputs=upload_output)
     question_input.submit(fn=generate_answer, inputs=question_input, outputs=answer_output)