Spaces:
Runtime error
Runtime error
File size: 816 Bytes
4ad299d 9cb30e2 4ad299d 9cb30e2 4ad299d 9cb30e2 4ad299d 9cb30e2 4ad299d 9cb30e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
import fitz # PyMuPDF
import pandas as pd
from pptx import Presentation
def extract_text_from_file(v_file_path):
"""
Extracts text from PDF, PPTX, or CSV files.
"""
v_text = ""
if v_file_path.lower().endswith('.pdf'):
obj_pdf = fitz.open(v_file_path)
for obj_page in obj_pdf:
v_text += obj_page.get_text()
obj_pdf.close()
elif v_file_path.lower().endswith('.pptx'):
obj_ppt = Presentation(v_file_path)
for obj_slide in obj_ppt.slides:
for obj_shape in obj_slide.shapes:
if obj_shape.has_text_frame:
v_text += obj_shape.text_frame.text + "\n"
elif v_file_path.lower().endswith('.csv'):
v_data = pd.read_csv(v_file_path)
v_text += v_data.to_string()
return v_text
|