Spaces:
Starting
Starting
File size: 1,125 Bytes
488dc3e 1bbca12 488dc3e 1bbca12 488dc3e 1bbca12 488dc3e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
from langchain_core.tools import tool
import pandas as pd
import PyPDF2
import logging
import os
logger = logging.getLogger(__name__)
@tool
async def file_parser_tool(task_id: str, file_type: str) -> str:
"""Parse a file based on task_id and file_type"""
try:
file_path = f"temp_{task_id}.{file_type}"
if not os.path.exists(file_path):
logger.warning(f"File not found: {file_path}")
return "File not found"
if file_type == "csv":
df = pd.read_csv(file_path)
return df.to_string()
elif file_type == "txt":
with open(file_path, "r", encoding="utf-8") as f:
return f.read()
elif file_type == "pdf":
with open(file_path, "rb") as f:
reader = PyPDF2.PdfReader(f)
text = "".join(page.extract_text() for page in reader.pages)
return text
else:
return f"Unsupported file type: {file_type}"
except Exception as e:
logger.error(f"Error parsing file for task {task_id}: {e}")
return f"Error: {str(e)}" |