Spaces:
Runtime error
Runtime error
from typing import Any, Optional, Dict | |
from smolagents.tools import Tool | |
import os | |
import tempfile | |
import requests | |
from urllib.parse import urlparse | |
import uuid | |
import pandas as pd | |
class DocumentProcessingTool(Tool): | |
name = "document_processing" | |
description = "Process various document types including saving files, downloading files, and analyzing CSV/Excel files." | |
inputs = { | |
'action': {'type': 'string', 'description': 'The action to perform: "save", "download", "analyze_csv", "analyze_excel"'}, | |
'content': {'type': 'string', 'description': 'The content to process: text content for save, URL for download, filepath for analysis'} | |
} | |
output_type = "string" | |
def __init__(self): | |
"""Initialize the document processing tool""" | |
super().__init__() | |
try: | |
import pandas as pd | |
except ImportError as e: | |
raise ImportError("You must install required packages: pip install pandas openpyxl") from e | |
self.is_initialized = True | |
def _save_and_read_file(self, content: str) -> str: | |
""" | |
Save content to a file and return the path. | |
Args: | |
content (str): the content to save to the file | |
""" | |
temp_dir = tempfile.gettempdir() | |
random_filename = f"file_{uuid.uuid4().hex[:8]}.txt" | |
filepath = os.path.join(temp_dir, random_filename) | |
with open(filepath, "w") as f: | |
f.write(content) | |
return f"File saved to {filepath}. You can read this file to process its contents." | |
def _download_file_from_url(self, url: str) -> str: | |
""" | |
Download a file from a URL and save it to a temporary location. | |
Args: | |
url (str): the URL of the file to download. | |
""" | |
try: | |
# Generate random filename with original extension if available | |
path = urlparse(url).path | |
ext = os.path.splitext(path)[1] or '.tmp' | |
random_filename = f"downloaded_{uuid.uuid4().hex[:8]}{ext}" | |
# Create temporary file | |
temp_dir = tempfile.gettempdir() | |
filepath = os.path.join(temp_dir, random_filename) | |
# Download the file | |
response = requests.get(url, stream=True) | |
response.raise_for_status() | |
# Save the file | |
with open(filepath, "wb") as f: | |
for chunk in response.iter_content(chunk_size=8192): | |
f.write(chunk) | |
return f"File downloaded to {filepath}. You can read this file to process its contents." | |
except Exception as e: | |
return f"Error downloading file: {str(e)}" | |
def _analyze_csv_file(self, file_path: str) -> str: | |
""" | |
Analyze a CSV file using pandas. | |
Args: | |
file_path (str): the path to the CSV file. | |
""" | |
try: | |
# Read the CSV file | |
df = pd.read_csv(file_path) | |
# Run various analyses | |
result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n" | |
result += f"Columns: {', '.join(df.columns)}\n\n" | |
# Add summary statistics | |
result += "Summary statistics:\n" | |
result += str(df.describe()) | |
result += "\n\nTop 50 rows content:\n" | |
result += str(df.head(50)) | |
return result | |
except Exception as e: | |
return f"Error analyzing CSV file: {str(e)}" | |
def _analyze_excel_file(self, file_path: str) -> str: | |
""" | |
Analyze an Excel file using pandas. | |
Args: | |
file_path (str): the path to the Excel file. | |
""" | |
try: | |
# Read the Excel file | |
df = pd.read_excel(file_path) | |
# Run various analyses | |
result = f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n" | |
result += f"Columns: {', '.join(df.columns)}\n\n" | |
# Add summary statistics | |
result += "Summary statistics:\n" | |
result += str(df.describe()) | |
result += "\n\nTop 50 rows content:\n" | |
result += str(df.head(50)) | |
return result | |
except Exception as e: | |
return f"Error analyzing Excel file: {str(e)}" | |
def forward(self, action: str, content: str) -> str: | |
""" | |
Process documents based on the specified action. | |
Args: | |
action (str): The action to perform (save, download, analyze_csv, analyze_excel) | |
content (str): The content to process | |
Returns: | |
str: Result of the operation | |
""" | |
try: | |
if action == "save": | |
return self._save_and_read_file(content) | |
elif action == "download": | |
return self._download_file_from_url(content) | |
elif action == "analyze_csv": | |
return self._analyze_csv_file(content) | |
elif action == "analyze_excel": | |
return self._analyze_excel_file(content) | |
else: | |
if ((content.find(".xls") != -1) or (content.find(".xlsx") != -1)): | |
return self._analyze_excel_file(content) | |
elif (content.find("csv") != -1): | |
return self._analyze_csv_file(content) | |
else: | |
return f"Error: Invalid action '{action}'. Valid actions are: save, download, analyze_csv, analyze_excel" | |
except Exception as e: | |
return f"Error performing {action} operation: {str(e)}" | |