pradeepsengarr commited on
Commit
9b56ad1
Β·
verified Β·
1 Parent(s): d0bb60c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from sentence_transformers import SentenceTransformer
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
+ import faiss
5
+ import numpy as np
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ import fitz # PyMuPDF
8
+
9
+ # Load models
10
+ embed_model = SentenceTransformer("BAAI/bge-base-en-v1.5")
11
+
12
+ model_id = "mistralai/Mistral-7B-Instruct-v0.1"
13
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ model_id,
16
+ device_map="auto",
17
+ load_in_4bit=True
18
+ )
19
+ llm = pipeline("text-generation", model=model, tokenizer=tokenizer)
20
+
21
+ # Globals
22
+ index = None
23
+ doc_texts = []
24
+
25
+ # PDF/Text extraction
26
+ def extract_text(file):
27
+ if file.name.endswith(".pdf"):
28
+ text = ""
29
+ doc = fitz.open(file.name)
30
+ for page in doc:
31
+ text += page.get_text()
32
+ return text
33
+ elif file.name.endswith(".txt"):
34
+ return file.read().decode("utf-8")
35
+ else:
36
+ return "❌ Invalid file type."
37
+
38
+ # File processing
39
+ def process_file(file):
40
+ global index, doc_texts
41
+ text = extract_text(file)
42
+ if text.startswith("❌"):
43
+ return text
44
+
45
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
46
+ doc_texts = text_splitter.split_text(text)
47
+ embeddings = embed_model.encode(doc_texts)
48
+
49
+ dim = embeddings.shape[1]
50
+ index = faiss.IndexFlatL2(dim)
51
+ index.add(np.array(embeddings))
52
+
53
+ return "βœ… File processed successfully. You can now ask questions!"
54
+
55
+ # Context + LLM response
56
+ def generate_answer(question):
57
+ if index is None:
58
+ return "⚠️ Please upload and process a file first."
59
+
60
+ question_embedding = embed_model.encode([question])
61
+ _, I = index.search(np.array(question_embedding), k=3)
62
+ context = "\n".join([doc_texts[i] for i in I[0]])
63
+
64
+ prompt = f"""[System: You are a helpful assistant. Answer strictly based on the context.]
65
+
66
+ Context:
67
+ {context}
68
+
69
+ Question: {question}
70
+ Answer:"""
71
+
72
+ result = llm(prompt, max_new_tokens=300, do_sample=True, temperature=0.7)
73
+ return result[0]["generated_text"].split("Answer:")[-1].strip()
74
+
75
+ # Gradio UI
76
+ with gr.Blocks(title="RAG Chatbot") as demo:
77
+ gr.Markdown("## πŸ“š RAG Chatbot - Upload PDF/TXT and Ask Questions")
78
+
79
+ with gr.Row():
80
+ file_input = gr.File(label="πŸ“ Upload .pdf or .txt", file_types=[".pdf", ".txt"])
81
+ upload_status = gr.Textbox(label="πŸ“₯ Upload Status", interactive=False)
82
+
83
+ with gr.Row():
84
+ question_box = gr.Textbox(label="❓ Ask a Question", placeholder="Type your question here...")
85
+ answer_box = gr.Textbox(label="πŸ’¬ Answer", interactive=False)
86
+
87
+ file_input.change(fn=process_file, inputs=file_input, outputs=upload_status)
88
+ question_box.submit(fn=generate_answer, inputs=question_box, outputs=answer_box)
89
+
90
+ demo.launch()