Spaces:

RanaZaeem12
/

ExtractAnswer

Sleeping

App Files Files Community

RanaZaeem12 commited on May 11

Commit

3c64a05

verified ·

1 Parent(s): 3672982

Create app.py

Browse files

Files changed (1) hide show

app.py +48 -0

app.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import streamlit as st
+from PyPDF2 import PdfReader
+import docx
+from pptx import Presentation
+from transformers import pipeline
+import os
+st.title("Multi-Document Q&A App 📄💬")
+qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
+uploaded_files = st.file_uploader("Upload PDF, Word, or PPT files", type=["pdf", "docx", "pptx"], accept_multiple_files=True)
+all_text = ""
+def extract_text_from_pdf(file):
+    reader = PdfReader(file)
+    return "\n".join([page.extract_text() or "" for page in reader.pages])
+def extract_text_from_docx(file):
+    doc = docx.Document(file)
+    return "\n".join([para.text for para in doc.paragraphs])
+def extract_text_from_pptx(file):
+    prs = Presentation(file)
+    text = []
+    for slide in prs.slides:
+        for shape in slide.shapes:
+            if hasattr(shape, "text"):
+                text.append(shape.text)
+    return "\n".join(text)
+for file in uploaded_files:
+    file_type = file.name.split('.')[-1].lower()
+    if file_type == "pdf":
+        all_text += extract_text_from_pdf(file) + "\n"
+    elif file_type == "docx":
+        all_text += extract_text_from_docx(file) + "\n"
+    elif file_type == "pptx":
+        all_text += extract_text_from_pptx(file) + "\n"
+if all_text:
+    st.success("Files processed. You can now ask questions.")
+    question = st.text_input("Ask a question based on your uploaded files:")
+    if question:
+        result = qa_pipeline(question=question, context=all_text)
+        st.write("**Answer:**", result['answer'])