Spaces:
Sleeping
Sleeping
import PyPDF2 | |
from PyPDF2 import PdfReader | |
import docx | |
import os | |
import logging | |
from textwrap import dedent | |
import gradio as gr | |
def extract_text_from_file(file): | |
if file is None: | |
return "No file uploaded!" | |
# Determine file type | |
file_type = file.name.split('.')[-1].lower() | |
text = "" | |
try: | |
if file_type == "pdf": | |
# Extract text from PDF | |
reader = PdfReader(file) | |
for page in reader.pages: | |
text += page.extract_text() | |
elif file_type == "docx": | |
# Extract text from DOCX | |
doc = docx.Document(file) | |
for paragraph in doc.paragraphs: | |
text += paragraph.text + "\n" | |
elif file_type == "txt": | |
# Extract text from TXT | |
text = file.read().decode("utf-8") | |
else: | |
return "Unsupported file type! Please upload a PDF, DOCX, or TXT file." | |
except Exception as e: | |
return f"Error reading file: {str(e)}" | |
return text.strip() | |