import streamlit as st from PyPDF2 import PdfReader import docx from pptx import Presentation from transformers import pipeline # Title of the app st.title("📚 Multi-Document Q&A App") # Load question-answering pipeline from Hugging Face qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad") # File uploader for multiple file types uploaded_files = st.file_uploader( "Upload PDF, Word (.docx), or PPT (.pptx) files", type=["pdf", "docx", "pptx"], accept_multiple_files=True ) # Combine text from all files all_text = "" # File processing functions def extract_text_from_pdf(file): reader = PdfReader(file) return "\n".join([page.extract_text() or "" for page in reader.pages]) def extract_text_from_docx(file): doc = docx.Document(file) return "\n".join([para.text for para in doc.paragraphs]) def extract_text_from_pptx(file): prs = Presentation(file) text = [] for slide in prs.slides: for shape in slide.shapes: if hasattr(shape, "text"): text.append(shape.text) return "\n".join(text) # Extract text from uploaded files for file in uploaded_files: file_type = file.name.split('.')[-1].lower() if file_type == "pdf": all_text += extract_text_from_pdf(file) + "\n" elif file_type == "docx": all_text += extract_text_from_docx(file) + "\n" elif file_type == "pptx": all_text += extract_text_from_pptx(file) + "\n" # Show input for question if files were processed if all_text: st.success("✅ Files processed. Ask your question below.") question = st.text_input("❓ Ask a question:") if question: result = qa_pipeline(question=question, context=all_text) st.write("📌 **Answer:**", result['answer']) else: st.info("Upload some files to begin...")