pradeepsengarr commited on
Commit
d64804c
Β·
verified Β·
1 Parent(s): adc1d58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +285 -167
app.py CHANGED
@@ -1,245 +1,363 @@
 
1
  import re
2
  import faiss
 
 
 
3
  import numpy as np
4
- from typing import List
5
  from sentence_transformers import SentenceTransformer
6
  from transformers import pipeline
7
- import gradio as gr
8
 
9
- # Helper: clean and normalize text
10
- def clean_text(text: str) -> str:
11
- text = re.sub(r'\s+', ' ', text)
12
- text = text.strip()
13
- return text
14
 
15
- # Main class for Document Retrieval & Q&A
16
  class SmartDocumentRAG:
17
- def __init__(self,
18
- embedder_model='sentence-transformers/all-MiniLM-L6-v2',
19
- qa_model='distilbert-base-cased-distilled-squad',
20
- summarization_model='facebook/bart-large-cnn'):
21
-
22
- print("Loading models... this may take a moment.")
23
-
24
- # Embedding model for semantic search
25
  self.embedder = SentenceTransformer(embedder_model)
26
 
27
- # Q&A pipeline for answering questions
28
  self.qa_pipeline = pipeline('question-answering', model=qa_model, tokenizer=qa_model)
29
 
30
- # Summarization pipeline for document summaries
31
- self.summarizer = pipeline('summarization', model=summarization_model, tokenizer=summarization_model)
32
-
33
- # Initialize document storage and index
34
- self.documents: List[str] = []
 
35
  self.index = None
36
  self.is_indexed = False
37
- self.document_summary = ""
38
- self.raw_text = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- # --- Document processing ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- def chunk_text(self, text: str, max_len: int = 250) -> List[str]:
43
- # Split text into smaller chunks of max_len tokens approx (words here)
44
- words = text.split()
45
  chunks = []
46
- for i in range(0, len(words), max_len):
47
- chunk = ' '.join(words[i:i+max_len])
48
- chunks.append(clean_text(chunk))
 
 
 
 
 
 
49
  return chunks
50
-
 
 
 
51
  def process_documents(self, files) -> str:
52
  if not files:
53
  return "❌ No files uploaded!"
54
-
55
- all_text = ""
56
  try:
57
- for file_obj in files:
58
- filename = file_obj.name
59
- file_bytes = file_obj.read()
60
- ext = filename.split('.')[-1].lower()
61
-
62
- text = ""
63
- if ext == 'pdf':
64
- import fitz # PyMuPDF
65
- doc = fitz.open(stream=file_bytes, filetype="pdf")
66
- for page in doc:
67
- text += page.get_text()
68
- doc.close()
69
- elif ext == 'docx':
70
- import docx2txt
71
- import io
72
- # docx2txt accepts path or file-like; use BytesIO
73
- text = docx2txt.process(io.BytesIO(file_bytes))
74
- elif ext in ['txt', 'text']:
75
- text = file_bytes.decode('utf-8', errors='ignore')
76
  else:
77
- return f"❌ Unsupported file type: {ext}"
78
-
79
- all_text += "\n\n" + text
80
-
81
- all_text = clean_text(all_text)
82
- self.raw_text = all_text
83
- # Chunk documents
84
- self.documents = self.chunk_text(all_text)
85
-
86
- if not self.documents:
87
- return "❌ No text extracted from documents."
88
-
89
- # Build FAISS index
90
- embeddings = self.embedder.encode(self.documents, convert_to_numpy=True, show_progress_bar=True)
91
- embeddings = embeddings.astype('float32')
92
-
 
93
  dimension = embeddings.shape[1]
 
94
  self.index = faiss.IndexFlatIP(dimension)
95
  faiss.normalize_L2(embeddings)
96
- self.index.add(embeddings)
97
-
98
  self.is_indexed = True
99
-
100
- # Create summary
101
- self.document_summary = self.create_document_summary(all_text)
102
-
103
- return f"βœ… Processed {len(self.documents)} text chunks from documents. Summary generated."
104
-
 
105
  except Exception as e:
106
- return f"❌ Error processing documents: {str(e)}"
107
-
108
- # --- Semantic search ---
 
 
109
  def find_relevant_content(self, query: str, top_k: int = 3) -> str:
110
- if not self.is_indexed or not self.index:
111
  return ""
 
112
  try:
113
  query_embedding = self.embedder.encode([query], convert_to_numpy=True)
114
  faiss.normalize_L2(query_embedding)
115
- scores, indices = self.index.search(query_embedding.astype('float32'), top_k)
116
-
 
 
117
  relevant_chunks = []
118
  for score, idx in zip(scores[0], indices[0]):
119
- if idx < len(self.documents) and score > 0.15: # threshold tuned to reduce noise
120
  relevant_chunks.append(self.documents[idx])
121
-
122
- if not relevant_chunks:
123
- return ""
124
-
125
- return ' '.join(relevant_chunks)
126
  except Exception as e:
127
- print(f"Error in semantic search: {e}")
128
  return ""
 
 
 
 
129
 
130
- # --- Summarization ---
131
- def create_document_summary(self, text: str) -> str:
132
- try:
133
- # Limit input size for summarizer to ~1000 tokens to avoid issues
134
- max_input_length = 1000
135
- input_text = text[:max_input_length] + ('...' if len(text) > max_input_length else '')
136
- summary_output = self.summarizer(input_text, max_length=150, min_length=40, do_sample=False)
137
- summary = summary_output[0]['summary_text']
138
- return summary
139
- except Exception as e:
140
- # fallback simple heuristic summary
141
- sentences = re.split(r'(?<=[.!?]) +', text)
142
- return sentences[0][:300] + ('...' if len(sentences[0]) > 300 else '')
143
 
144
- # --- Question answering ---
145
- def answer_question(self, query: str) -> str:
146
  if not query.strip():
147
- return "❓ Please ask a question!"
 
148
  if not self.is_indexed:
149
- return "πŸ“ Please upload and process documents first!"
150
-
151
  query_lower = query.lower()
152
- # Summary shortcut
 
153
  if any(word in query_lower for word in ['summary', 'summarize', 'overview', 'about']):
154
- return f"πŸ“„ Document Summary:\n\n{self.document_summary}"
155
-
156
- # Get relevant context
 
 
 
157
  context = self.find_relevant_content(query, top_k=3)
158
  if not context:
159
- return "πŸ” No relevant information found for your question."
160
-
161
  try:
162
- # Q&A pipeline expects question + context separately
 
163
  result = self.qa_pipeline(question=query, context=context)
164
-
165
  answer = result.get('answer', '').strip()
166
  score = result.get('score', 0.0)
167
-
168
- # Confidence thresholding & hallucination check
169
  if score < 0.20 or not answer or answer.lower() in ['no answer', '']:
170
- return "I don't know based on the provided documents."
171
-
172
- # Optional heuristic: if answer too short or irrelevant to question, fallback
173
  if len(answer) < 3 or (query_lower not in answer.lower() and score < 0.35):
174
- return "I don't know based on the provided documents."
175
-
176
- # Return answer + snippet from context for transparency
177
- return f"**Answer:** {answer}\n\n*Context snippet:* {context[:300]}..."
 
 
 
 
 
178
  except Exception as e:
179
- return f"❌ Error answering question: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
 
181
 
182
- # --- Gradio UI ---
183
 
 
184
  def create_interface():
185
  rag_system = SmartDocumentRAG()
186
-
187
- with gr.Blocks(title="🧠 Enhanced Document Q&A System", theme=gr.themes.Soft()) as demo:
188
  gr.Markdown("""
189
  # 🧠 Enhanced Document Q&A System
190
 
 
 
191
  **Features:**
192
- - 🎯 DistilBERT for Q&A with confidence checks
193
- - ⚑ Sentence-BERT + FAISS semantic search
194
- - πŸ“Š Strong summarization with BART-large-CNN
195
- - πŸ” Transparent answers with context snippets
196
  """)
197
-
198
  with gr.Tab("πŸ“€ Upload & Process"):
199
  with gr.Row():
200
  with gr.Column():
201
- file_upload = gr.File(
202
- label="πŸ“ Upload Documents (PDF, DOCX, TXT)",
203
- file_count="multiple",
204
- file_types=[".pdf", ".docx", ".txt"],
205
- height=150
206
- )
207
- process_btn = gr.Button("πŸ”„ Process Documents", variant="primary", size="lg")
208
  with gr.Column():
209
- process_status = gr.Textbox(label="πŸ“‹ Processing Status", lines=10, interactive=False)
210
-
211
- process_btn.click(
212
- fn=rag_system.process_documents,
213
- inputs=[file_upload],
214
- outputs=[process_status]
215
- )
216
-
217
  with gr.Tab("❓ Q&A"):
218
  with gr.Row():
219
  with gr.Column():
220
- question_input = gr.Textbox(
221
- label="πŸ€” Ask Your Question",
222
- placeholder="e.g., What is the person's name? How many years of experience? What skills do they have?",
223
- lines=3
224
- )
225
  with gr.Row():
226
  ask_btn = gr.Button("🧠 Get Answer", variant="primary")
227
  summary_btn = gr.Button("πŸ“Š Get Summary", variant="secondary")
228
  with gr.Column():
229
  answer_output = gr.Textbox(label="πŸ’‘ Answer", lines=8, interactive=False)
230
-
231
- ask_btn.click(
232
- fn=rag_system.answer_question,
233
- inputs=[question_input],
234
- outputs=[answer_output]
235
- )
236
-
237
- summary_btn.click(
238
- fn=lambda: rag_system.answer_question("summary"),
239
- inputs=[],
240
- outputs=[answer_output]
241
- )
242
-
243
  return demo
244
 
245
 
 
1
+ import os
2
  import re
3
  import faiss
4
+ import docx
5
+ import PyPDF2
6
+ import gradio as gr
7
  import numpy as np
8
+ from typing import List, Dict
9
  from sentence_transformers import SentenceTransformer
10
  from transformers import pipeline
 
11
 
 
 
 
 
 
12
 
 
13
  class SmartDocumentRAG:
14
+ def __init__(self, embedder_model='sentence-transformers/all-MiniLM-L6-v2', qa_model='distilbert-base-cased-distilled-squad'):
15
+ # Load sentence embedding model
 
 
 
 
 
 
16
  self.embedder = SentenceTransformer(embedder_model)
17
 
18
+ # Load Q&A pipeline model
19
  self.qa_pipeline = pipeline('question-answering', model=qa_model, tokenizer=qa_model)
20
 
21
+ # Document and index initialization
22
+ self.documents = []
23
+ self.document_metadata = []
24
+ self.raw_text = ""
25
+ self.document_summary = ""
26
+ self.document_type = ""
27
  self.index = None
28
  self.is_indexed = False
29
+ self.model_type = "distilbert-qa" # Can add flan-t5 or others as needed
30
+
31
+ ####################
32
+ # Text Extraction
33
+ ####################
34
+ def extract_text_from_file(self, file_path: str) -> str:
35
+ ext = os.path.splitext(file_path)[1].lower()
36
+ try:
37
+ if ext == '.pdf':
38
+ return self.extract_from_pdf(file_path)
39
+ elif ext == '.docx':
40
+ return self.extract_from_docx(file_path)
41
+ elif ext == '.txt':
42
+ return self.extract_from_txt(file_path)
43
+ else:
44
+ return f"Unsupported file type: {ext}"
45
+ except Exception as e:
46
+ return f"Error reading file: {e}"
47
+
48
+ def extract_from_pdf(self, file_path: str) -> str:
49
+ text = ""
50
+ try:
51
+ with open(file_path, 'rb') as f:
52
+ reader = PyPDF2.PdfReader(f)
53
+ for page in reader.pages:
54
+ txt = page.extract_text() or ""
55
+ cleaned = self.clean_text(txt)
56
+ text += cleaned + "\n"
57
+ return text.strip()
58
+ except Exception as e:
59
+ return f"Error reading PDF: {e}"
60
+
61
+ def extract_from_docx(self, file_path: str) -> str:
62
+ try:
63
+ doc = docx.Document(file_path)
64
+ paragraphs = [self.clean_text(p.text) for p in doc.paragraphs if p.text.strip()]
65
+ return "\n".join(paragraphs)
66
+ except Exception as e:
67
+ return f"Error reading DOCX: {e}"
68
+
69
+ def extract_from_txt(self, file_path: str) -> str:
70
+ encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
71
+ for enc in encodings:
72
+ try:
73
+ with open(file_path, 'r', encoding=enc) as f:
74
+ return self.clean_text(f.read())
75
+ except UnicodeDecodeError:
76
+ continue
77
+ except Exception as e:
78
+ return f"Error reading TXT: {e}"
79
+ return "Could not decode TXT file."
80
+
81
+ def clean_text(self, text: str) -> str:
82
+ # Normalize whitespace, fix broken words, remove weird chars
83
+ text = re.sub(r'\s+', ' ', text)
84
+ text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text) # Fix camel case merges
85
+ text = text.strip()
86
+ return text
87
 
88
+ ####################
89
+ # Document Type Detection & Summary
90
+ ####################
91
+ def detect_document_type(self, text: str) -> str:
92
+ lower_text = text.lower()
93
+ if any(k in lower_text for k in ['abstract', 'study', 'research', 'methodology']):
94
+ return 'research'
95
+ elif any(k in lower_text for k in ['company', 'business', 'organization', 'financial']):
96
+ return 'business'
97
+ else:
98
+ return 'general'
99
+
100
+ def create_document_summary(self, text: str) -> str:
101
+ sentences = re.split(r'(?<=[.!?]) +', text)
102
+ sentences = [s.strip() for s in sentences if len(s.strip()) > 10]
103
+
104
+ if self.document_type == 'research':
105
+ return self.extract_research_summary(sentences)
106
+ elif self.document_type == 'business':
107
+ return self.extract_business_summary(sentences)
108
+ else:
109
+ return self.extract_general_summary(sentences)
110
+
111
+ def extract_research_summary(self, sentences: List[str]) -> str:
112
+ for s in sentences[:7]:
113
+ if any(w in s.lower() for w in ['abstract', 'study', 'research']):
114
+ return s[:300] + ('...' if len(s) > 300 else '')
115
+ return sentences[0][:300] if sentences else "Research document."
116
+
117
+ def extract_business_summary(self, sentences: List[str]) -> str:
118
+ for s in sentences[:5]:
119
+ if any(w in s.lower() for w in ['company', 'business', 'organization']):
120
+ return s[:300] + ('...' if len(s) > 300 else '')
121
+ return sentences[0][:300] if sentences else "Business document."
122
+
123
+ def extract_general_summary(self, sentences: List[str]) -> str:
124
+ return sentences[0][:300] + ('...' if len(sentences[0]) > 300 else '') if sentences else "General document."
125
+
126
+ ####################
127
+ # Chunking
128
+ ####################
129
+ def enhanced_chunk_text(self, text: str, chunk_size: int = 3, overlap: int = 1) -> List[Dict]:
130
+ if not text.strip():
131
+ return []
132
+
133
+ sentences = re.split(r'(?<=[.!?]) +', text)
134
+ sentences = [s.strip() for s in sentences if len(s.strip()) > 10]
135
 
 
 
 
136
  chunks = []
137
+ for i in range(0, len(sentences), chunk_size - overlap):
138
+ chunk_sents = sentences[i:i + chunk_size]
139
+ if chunk_sents:
140
+ chunk_text = " ".join(chunk_sents)
141
+ chunks.append({
142
+ "text": chunk_text,
143
+ "sentence_indices": list(range(i, min(i + chunk_size, len(sentences)))),
144
+ "doc_type": self.document_type
145
+ })
146
  return chunks
147
+
148
+ ####################
149
+ # Processing uploaded files
150
+ ####################
151
  def process_documents(self, files) -> str:
152
  if not files:
153
  return "❌ No files uploaded!"
154
+
 
155
  try:
156
+ all_text = ""
157
+ processed_files = []
158
+
159
+ for file in files:
160
+ if file is None:
161
+ continue
162
+ file_text = self.extract_text_from_file(file.name)
163
+ if not file_text.startswith("Error") and not file_text.startswith("Unsupported"):
164
+ all_text += " " + file_text
165
+ processed_files.append(os.path.basename(file.name))
 
 
 
 
 
 
 
 
 
166
  else:
167
+ return f"❌ {file_text}"
168
+
169
+ if not all_text.strip():
170
+ return "❌ No text extracted from files!"
171
+
172
+ self.raw_text = all_text.strip()
173
+ self.document_type = self.detect_document_type(self.raw_text)
174
+ self.document_summary = self.create_document_summary(self.raw_text)
175
+
176
+ chunks = self.enhanced_chunk_text(self.raw_text)
177
+ if not chunks:
178
+ return "❌ No valid chunks created!"
179
+
180
+ self.documents = [c["text"] for c in chunks]
181
+ self.document_metadata = chunks
182
+
183
+ embeddings = self.embedder.encode(self.documents, show_progress_bar=False, convert_to_numpy=True)
184
  dimension = embeddings.shape[1]
185
+
186
  self.index = faiss.IndexFlatIP(dimension)
187
  faiss.normalize_L2(embeddings)
188
+ self.index.add(embeddings.astype('float32'))
189
+
190
  self.is_indexed = True
191
+
192
+ return (f"βœ… Processed {len(processed_files)} files: {', '.join(processed_files)}\n"
193
+ f"πŸ“„ Document Type: {self.document_type.title()}\n"
194
+ f"πŸ” Created {len(self.documents)} chunks\n"
195
+ f"πŸ“ Summary: {self.document_summary}\n"
196
+ f"πŸš€ Ready for Q&A!")
197
+
198
  except Exception as e:
199
+ return f"❌ Error processing documents: {e}"
200
+
201
+ ####################
202
+ # Search & Answer
203
+ ####################
204
  def find_relevant_content(self, query: str, top_k: int = 3) -> str:
205
+ if not self.is_indexed:
206
  return ""
207
+
208
  try:
209
  query_embedding = self.embedder.encode([query], convert_to_numpy=True)
210
  faiss.normalize_L2(query_embedding)
211
+
212
+ k = min(top_k, len(self.documents))
213
+ scores, indices = self.index.search(query_embedding.astype('float32'), k)
214
+
215
  relevant_chunks = []
216
  for score, idx in zip(scores[0], indices[0]):
217
+ if idx < len(self.documents) and score > 0.15:
218
  relevant_chunks.append(self.documents[idx])
219
+
220
+ return " ".join(relevant_chunks)
221
+
 
 
222
  except Exception as e:
223
+ print(f"Search error: {e}")
224
  return ""
225
+
226
+ def answer_question(self, query: str) -> str:
227
+ """
228
+ Answer the user's question based on processed documents.
229
 
230
+ Features:
231
+ - Returns document summary if query asks for summary.
232
+ - Uses semantic search to find relevant context.
233
+ - Uses QA pipeline with prompt-style input.
234
+ - Applies confidence threshold to reduce hallucinations.
235
+ - Returns a fallback message if answer is unreliable.
236
+ """
 
 
 
 
 
 
237
 
 
 
238
  if not query.strip():
239
+ return "❓ Please ask a valid question."
240
+
241
  if not self.is_indexed:
242
+ return "πŸ“ Please upload and process documents before asking questions."
243
+
244
  query_lower = query.lower()
245
+
246
+ # Handle summary requests
247
  if any(word in query_lower for word in ['summary', 'summarize', 'overview', 'about']):
248
+ if self.document_summary:
249
+ return f"πŸ“„ Document Summary:\n\n{self.document_summary}"
250
+ else:
251
+ return "⚠️ Summary not available. Please process documents first."
252
+
253
+ # Find relevant chunks for context
254
  context = self.find_relevant_content(query, top_k=3)
255
  if not context:
256
+ return "πŸ” Sorry, no relevant information was found for your question. Try rephrasing."
257
+
258
  try:
259
+ # Prepare input for QA pipeline (some QA pipelines accept question and context separately)
260
+ # For distilbert QA pipeline:
261
  result = self.qa_pipeline(question=query, context=context)
262
+
263
  answer = result.get('answer', '').strip()
264
  score = result.get('score', 0.0)
265
+
266
+ # Confidence threshold to prevent hallucination
267
  if score < 0.20 or not answer or answer.lower() in ['no answer', '']:
268
+ return "πŸ€” I couldn't find a confident answer to your question based on the documents."
269
+
270
+ # Optional heuristic: check if answer is too generic or unrelated
271
  if len(answer) < 3 or (query_lower not in answer.lower() and score < 0.35):
272
+ return "πŸ€” I couldn't find a confident answer to your question based on the documents."
273
+
274
+ # Return answer with a snippet of context for transparency
275
+ snippet = context[:300].strip()
276
+ if len(context) > 300:
277
+ snippet += "..."
278
+
279
+ return f"**Answer:** {answer}\n\n*Context snippet:* {snippet}"
280
+
281
  except Exception as e:
282
+ # If model fails, fallback to simple answer or message
283
+ return f"❌ An error occurred while answering your question: {str(e)}"
284
+
285
+ def extract_direct_answer(self, query: str, context: str) -> str:
286
+ lower_query = query.lower()
287
+
288
+ # Extract names (simple heuristic)
289
+ if any(k in lower_query for k in ['name', 'who is', 'who']):
290
+ names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', context)
291
+ if names:
292
+ return f"**Name:** {names[0]}"
293
+
294
+ # Extract experience years
295
+ if any(k in lower_query for k in ['experience', 'years']):
296
+ exp = re.findall(r'(\d+)[\+\-\s]*(?:years?|yrs?)', context.lower())
297
+ if exp:
298
+ return f"**Experience:** {exp[0]} years"
299
+
300
+ # Extract skills
301
+ if any(k in lower_query for k in ['skill', 'technology', 'tech']):
302
+ skills_regex = r'\b(Python|Java|JavaScript|React|Node|SQL|AWS|Docker|Kubernetes|Git|HTML|CSS|Angular|Vue|Spring|Django|Flask|MongoDB|PostgreSQL)\b'
303
+ skills_found = list(set(re.findall(skills_regex, context, re.I)))
304
+ if skills_found:
305
+ return f"**Skills mentioned:** {', '.join(skills_found)}"
306
+
307
+ # Extract education
308
+ if any(k in lower_query for k in ['education', 'degree', 'university']):
309
+ edu = re.findall(r'(?:Bachelor|Master|PhD|B\.?S\.?|M\.?S\.?|B\.?A\.?|M\.?A\.?).*?(?:in|of)\s+([^.]+)', context, re.I)
310
+ if edu:
311
+ return f"**Education:** {edu[0]}"
312
+
313
+ # Fallback: first sentence
314
+ sentences = re.split(r'(?<=[.!?]) +', context)
315
+ if sentences:
316
+ return f"**Answer:** {sentences[0]}"
317
 
318
+ return "I found relevant information but could not extract a precise answer."
319
 
 
320
 
321
+ # Gradio interface creation
322
  def create_interface():
323
  rag_system = SmartDocumentRAG()
324
+
325
+ with gr.Blocks(title="🧠 Enhanced Document Q&A", theme=gr.themes.Soft()) as demo:
326
  gr.Markdown("""
327
  # 🧠 Enhanced Document Q&A System
328
 
329
+ **Optimized with Better Chunking, Summaries, and Reduced Hallucination**
330
+
331
  **Features:**
332
+ - 🎯 DistilBERT Q&A pipeline for accurate answers
333
+ - ⚑ SentenceTransformer embeddings + FAISS semantic search
334
+ - πŸ“Š Improved document summaries & chunking
335
+ - πŸ” Direct answer fallback for facts extraction
336
  """)
337
+
338
  with gr.Tab("πŸ“€ Upload & Process"):
339
  with gr.Row():
340
  with gr.Column():
341
+ file_upload = gr.File(label="πŸ“ Upload Documents", file_types=[".pdf", ".docx", ".txt"], file_count="multiple", interactive=True)
342
+ process_btn = gr.Button("πŸ”„ Process Documents", variant="primary")
 
 
 
 
 
343
  with gr.Column():
344
+ process_status = gr.Textbox(label="πŸ“‹ Processing Status", lines=8, interactive=False)
345
+
346
+ process_btn.click(fn=rag_system.process_documents, inputs=[file_upload], outputs=[process_status])
347
+
 
 
 
 
348
  with gr.Tab("❓ Q&A"):
349
  with gr.Row():
350
  with gr.Column():
351
+ question_input = gr.Textbox(label="πŸ€” Ask Your Question", placeholder="Enter your question here...", lines=3)
 
 
 
 
352
  with gr.Row():
353
  ask_btn = gr.Button("🧠 Get Answer", variant="primary")
354
  summary_btn = gr.Button("πŸ“Š Get Summary", variant="secondary")
355
  with gr.Column():
356
  answer_output = gr.Textbox(label="πŸ’‘ Answer", lines=8, interactive=False)
357
+
358
+ ask_btn.click(fn=rag_system.answer_question, inputs=[question_input], outputs=[answer_output])
359
+ summary_btn.click(fn=lambda: rag_system.answer_question("summary"), inputs=[], outputs=[answer_output])
360
+
 
 
 
 
 
 
 
 
 
361
  return demo
362
 
363