RanaZaeem12 commited on
Commit
c8b4723
Β·
verified Β·
1 Parent(s): 3c64a05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -6
app.py CHANGED
@@ -3,16 +3,24 @@ from PyPDF2 import PdfReader
3
  import docx
4
  from pptx import Presentation
5
  from transformers import pipeline
6
- import os
7
 
8
- st.title("Multi-Document Q&A App πŸ“„πŸ’¬")
 
9
 
 
10
  qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
11
 
12
- uploaded_files = st.file_uploader("Upload PDF, Word, or PPT files", type=["pdf", "docx", "pptx"], accept_multiple_files=True)
 
 
 
 
 
13
 
 
14
  all_text = ""
15
 
 
16
  def extract_text_from_pdf(file):
17
  reader = PdfReader(file)
18
  return "\n".join([page.extract_text() or "" for page in reader.pages])
@@ -30,6 +38,7 @@ def extract_text_from_pptx(file):
30
  text.append(shape.text)
31
  return "\n".join(text)
32
 
 
33
  for file in uploaded_files:
34
  file_type = file.name.split('.')[-1].lower()
35
  if file_type == "pdf":
@@ -39,10 +48,14 @@ for file in uploaded_files:
39
  elif file_type == "pptx":
40
  all_text += extract_text_from_pptx(file) + "\n"
41
 
 
42
  if all_text:
43
- st.success("Files processed. You can now ask questions.")
44
- question = st.text_input("Ask a question based on your uploaded files:")
45
 
46
  if question:
47
  result = qa_pipeline(question=question, context=all_text)
48
- st.write("**Answer:**", result['answer'])
 
 
 
 
3
  import docx
4
  from pptx import Presentation
5
  from transformers import pipeline
 
6
 
7
+ # Title of the app
8
+ st.title("πŸ“š Multi-Document Q&A App")
9
 
10
+ # Load question-answering pipeline from Hugging Face
11
  qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
12
 
13
+ # File uploader for multiple file types
14
+ uploaded_files = st.file_uploader(
15
+ "Upload PDF, Word (.docx), or PPT (.pptx) files",
16
+ type=["pdf", "docx", "pptx"],
17
+ accept_multiple_files=True
18
+ )
19
 
20
+ # Combine text from all files
21
  all_text = ""
22
 
23
+ # File processing functions
24
  def extract_text_from_pdf(file):
25
  reader = PdfReader(file)
26
  return "\n".join([page.extract_text() or "" for page in reader.pages])
 
38
  text.append(shape.text)
39
  return "\n".join(text)
40
 
41
+ # Extract text from uploaded files
42
  for file in uploaded_files:
43
  file_type = file.name.split('.')[-1].lower()
44
  if file_type == "pdf":
 
48
  elif file_type == "pptx":
49
  all_text += extract_text_from_pptx(file) + "\n"
50
 
51
+ # Show input for question if files were processed
52
  if all_text:
53
+ st.success("βœ… Files processed. Ask your question below.")
54
+ question = st.text_input("❓ Ask a question:")
55
 
56
  if question:
57
  result = qa_pipeline(question=question, context=all_text)
58
+ st.write("πŸ“Œ **Answer:**", result['answer'])
59
+ else:
60
+ st.info("Upload some files to begin...")
61
+