Spaces:
Runtime error
Runtime error
File size: 5,700 Bytes
664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa b617c72 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b 16a27fa 664c17b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
from typing import Any, Optional, Dict
from smolagents.tools import Tool
import os
import tempfile
import requests
from urllib.parse import urlparse
import uuid
import pandas as pd
class DocumentProcessingTool(Tool):
name = "document_processing"
description = "Process various document types including saving files, downloading files, and analyzing CSV/Excel files."
inputs = {
'action': {'type': 'string', 'description': 'The action to perform: "save", "download", "analyze_csv", "analyze_excel"'},
'content': {'type': 'string', 'description': 'The content to process: text content for save, URL for download, filepath for analysis'}
}
output_type = "string"
def __init__(self):
"""Initialize the document processing tool"""
super().__init__()
try:
import pandas as pd
except ImportError as e:
raise ImportError("You must install required packages: pip install pandas openpyxl") from e
self.is_initialized = True
def _save_and_read_file(self, content: str) -> str:
"""
Save content to a file and return the path.
Args:
content (str): the content to save to the file
"""
temp_dir = tempfile.gettempdir()
random_filename = f"file_{uuid.uuid4().hex[:8]}.txt"
filepath = os.path.join(temp_dir, random_filename)
with open(filepath, "w") as f:
f.write(content)
return f"File saved to {filepath}. You can read this file to process its contents."
def _download_file_from_url(self, url: str) -> str:
"""
Download a file from a URL and save it to a temporary location.
Args:
url (str): the URL of the file to download.
"""
try:
# Generate random filename with original extension if available
path = urlparse(url).path
ext = os.path.splitext(path)[1] or '.tmp'
random_filename = f"downloaded_{uuid.uuid4().hex[:8]}{ext}"
# Create temporary file
temp_dir = tempfile.gettempdir()
filepath = os.path.join(temp_dir, random_filename)
# Download the file
response = requests.get(url, stream=True)
response.raise_for_status()
# Save the file
with open(filepath, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
return f"File downloaded to {filepath}. You can read this file to process its contents."
except Exception as e:
return f"Error downloading file: {str(e)}"
def _analyze_csv_file(self, file_path: str) -> str:
"""
Analyze a CSV file using pandas.
Args:
file_path (str): the path to the CSV file.
"""
try:
# Read the CSV file
df = pd.read_csv(file_path)
# Run various analyses
result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
result += f"Columns: {', '.join(df.columns)}\n\n"
# Add summary statistics
result += "Summary statistics:\n"
result += str(df.describe())
result += "\n\nTop 50 rows content:\n"
result += str(df.head(50))
return result
except Exception as e:
return f"Error analyzing CSV file: {str(e)}"
def _analyze_excel_file(self, file_path: str) -> str:
"""
Analyze an Excel file using pandas.
Args:
file_path (str): the path to the Excel file.
"""
try:
# Read the Excel file
df = pd.read_excel(file_path)
# Run various analyses
result = f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
result += f"Columns: {', '.join(df.columns)}\n\n"
# Add summary statistics
result += "Summary statistics:\n"
result += str(df.describe())
result += "\n\nTop 50 rows content:\n"
result += str(df.head(50))
return result
except Exception as e:
return f"Error analyzing Excel file: {str(e)}"
def forward(self, action: str, content: str) -> str:
"""
Process documents based on the specified action.
Args:
action (str): The action to perform (save, download, analyze_csv, analyze_excel)
content (str): The content to process
Returns:
str: Result of the operation
"""
try:
if action == "save":
return self._save_and_read_file(content)
elif action == "download":
return self._download_file_from_url(content)
elif action == "analyze_csv":
return self._analyze_csv_file(content)
elif action == "analyze_excel":
return self._analyze_excel_file(content)
else:
if ((content.find(".xls") != -1) or (content.find(".xlsx") != -1)):
return self._analyze_excel_file(content)
elif (content.find("csv") != -1):
return self._analyze_csv_file(content)
else:
return f"Error: Invalid action '{action}'. Valid actions are: save, download, analyze_csv, analyze_excel"
except Exception as e:
return f"Error performing {action} operation: {str(e)}"
|