Spaces:

bstraehle
/

grady

Running

App Files Files Community

grady / utils.py

bstraehle

Update utils.py

d0d0aee verified 3 months ago

raw

history blame contribute delete

2.1 kB

	import os
	import pandas as pd
	from docx import Document
	from pptx import Presentation

	def get_questions(file_path, level):
	df = pd.read_json(file_path, lines=True)

	if level > 0:
	df = df[df["Level"] == level]

	result=[]

	for index, row in df.iterrows():
	result.append([row["Question"], row["Level"], row["Final answer"], row["file_name"]])

	return result

	def is_ext(file_path, ext):
	return os.path.splitext(file_path)[1].lower() == ext.lower()

	def read_file_json(file_path):
	df = None

	if is_ext(file_path, ".csv"):
	df = pd.read_csv(file_path)
	elif is_ext(file_path, ".xls") or is_ext(file_path, ".xlsx"):
	df = pd.read_excel(file_path)
	elif is_ext(file_path, ".json") or is_ext(file_path, ".jsonl"):
	df = pd.read_json(file_path)

	return "" if df is None else df.to_json()

	def read_docx_text(file_path):
	doc = Document(file_path)

	text = []

	for block in doc.element.body:
	if block.tag.endswith("p"):
	for paragraph in doc.paragraphs:
	if paragraph._element == block:
	if paragraph.style.name.startswith("Heading"):
	text.append("\n" + paragraph.text + "\n")
	elif paragraph.text:
	text.append(paragraph.text)
	elif block.tag.endswith("tbl"):
	for table in doc.tables:
	if table._element == block:
	for row in table.rows:
	row_text = []
	for cell in row.cells:
	row_text.append(cell.text.strip())
	text.append(" \| ".join(row_text))

	return "\n".join(text)

	def read_pptx_text(file_path):
	prs = Presentation(file_path)

	text = []

	for slide in prs.slides:
	slide_text = []
	for shape in slide.shapes:
	if hasattr(shape, "text"):
	slide_text.append(shape.text)
	text.append("\n".join(slide_text))

	return "\n\n".join(text)