File size: 1,856 Bytes
3c64a05
 
 
 
 
 
c8b4723
 
3c64a05
c8b4723
3c64a05
 
c8b4723
 
 
 
 
 
3c64a05
c8b4723
3c64a05
 
c8b4723
3c64a05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8b4723
3c64a05
 
 
 
 
 
 
 
 
c8b4723
3c64a05
c8b4723
 
3c64a05
 
 
c8b4723
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import streamlit as st
from PyPDF2 import PdfReader
import docx
from pptx import Presentation
from transformers import pipeline

# Title of the app
st.title("πŸ“š Multi-Document Q&A App")

# Load question-answering pipeline from Hugging Face
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

# File uploader for multiple file types
uploaded_files = st.file_uploader(
    "Upload PDF, Word (.docx), or PPT (.pptx) files",
    type=["pdf", "docx", "pptx"],
    accept_multiple_files=True
)

# Combine text from all files
all_text = ""

# File processing functions
def extract_text_from_pdf(file):
    reader = PdfReader(file)
    return "\n".join([page.extract_text() or "" for page in reader.pages])

def extract_text_from_docx(file):
    doc = docx.Document(file)
    return "\n".join([para.text for para in doc.paragraphs])

def extract_text_from_pptx(file):
    prs = Presentation(file)
    text = []
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text.append(shape.text)
    return "\n".join(text)

# Extract text from uploaded files
for file in uploaded_files:
    file_type = file.name.split('.')[-1].lower()
    if file_type == "pdf":
        all_text += extract_text_from_pdf(file) + "\n"
    elif file_type == "docx":
        all_text += extract_text_from_docx(file) + "\n"
    elif file_type == "pptx":
        all_text += extract_text_from_pptx(file) + "\n"

# Show input for question if files were processed
if all_text:
    st.success("βœ… Files processed. Ask your question below.")
    question = st.text_input("❓ Ask a question:")

    if question:
        result = qa_pipeline(question=question, context=all_text)
        st.write("πŸ“Œ **Answer:**", result['answer'])
else:
    st.info("Upload some files to begin...")