File size: 5,700 Bytes
664c17b
 
 
 
 
16a27fa
 
664c17b
 
 
 
16a27fa
664c17b
16a27fa
 
664c17b
 
 
16a27fa
 
664c17b
 
 
 
16a27fa
664c17b
 
16a27fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
664c17b
16a27fa
 
 
 
 
 
 
 
 
 
664c17b
 
16a27fa
 
 
664c17b
 
16a27fa
 
664c17b
 
 
16a27fa
 
 
 
 
 
664c17b
16a27fa
 
664c17b
16a27fa
 
 
664c17b
16a27fa
 
 
 
 
664c17b
16a27fa
664c17b
16a27fa
 
 
 
 
 
 
 
664c17b
16a27fa
 
b617c72
16a27fa
 
 
 
 
 
 
 
 
 
 
664c17b
16a27fa
664c17b
16a27fa
 
 
664c17b
16a27fa
 
 
 
 
 
 
664c17b
16a27fa
 
664c17b
16a27fa
 
664c17b
 
16a27fa
664c17b
16a27fa
 
664c17b
 
16a27fa
 
 
 
 
 
664c17b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
from typing import Any, Optional, Dict
from smolagents.tools import Tool
import os
import tempfile
import requests
from urllib.parse import urlparse
import uuid
import pandas as pd

class DocumentProcessingTool(Tool):
    name = "document_processing"
    description = "Process various document types including saving files, downloading files, and analyzing CSV/Excel files."
    inputs = {
        'action': {'type': 'string', 'description': 'The action to perform: "save", "download", "analyze_csv", "analyze_excel"'},
        'content': {'type': 'string', 'description': 'The content to process: text content for save, URL for download, filepath for analysis'}
    }
    output_type = "string"

    def __init__(self):
        """Initialize the document processing tool"""
        super().__init__()
        try:
            import pandas as pd
        except ImportError as e:
            raise ImportError("You must install required packages: pip install pandas openpyxl") from e
        
        self.is_initialized = True

    def _save_and_read_file(self, content: str) -> str:
        """
        Save content to a file and return the path.
        Args:
            content (str): the content to save to the file
        """
        temp_dir = tempfile.gettempdir()
        random_filename = f"file_{uuid.uuid4().hex[:8]}.txt"
        filepath = os.path.join(temp_dir, random_filename)

        with open(filepath, "w") as f:
            f.write(content)

        return f"File saved to {filepath}. You can read this file to process its contents."

    def _download_file_from_url(self, url: str) -> str:
        """
        Download a file from a URL and save it to a temporary location.
        Args:
            url (str): the URL of the file to download.
        """
        try:
            # Generate random filename with original extension if available
            path = urlparse(url).path
            ext = os.path.splitext(path)[1] or '.tmp'
            random_filename = f"downloaded_{uuid.uuid4().hex[:8]}{ext}"

            # Create temporary file
            temp_dir = tempfile.gettempdir()
            filepath = os.path.join(temp_dir, random_filename)

            # Download the file
            response = requests.get(url, stream=True)
            response.raise_for_status()

            # Save the file
            with open(filepath, "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)

            return f"File downloaded to {filepath}. You can read this file to process its contents."
        except Exception as e:
            return f"Error downloading file: {str(e)}"

    def _analyze_csv_file(self, file_path: str) -> str:
        """
        Analyze a CSV file using pandas.
        Args:
            file_path (str): the path to the CSV file.
        """
        try:
            # Read the CSV file
            df = pd.read_csv(file_path)

            # Run various analyses
            result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
            result += f"Columns: {', '.join(df.columns)}\n\n"

            # Add summary statistics
            result += "Summary statistics:\n"
            result += str(df.describe())
            result += "\n\nTop 50 rows content:\n"
            result += str(df.head(50))

            return result
        except Exception as e:
            return f"Error analyzing CSV file: {str(e)}"

    def _analyze_excel_file(self, file_path: str) -> str:
        """
        Analyze an Excel file using pandas.
        Args:
            file_path (str): the path to the Excel file.            
        """
        try:
            # Read the Excel file
            df = pd.read_excel(file_path)

            # Run various analyses
            result = f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
            result += f"Columns: {', '.join(df.columns)}\n\n"

            # Add summary statistics
            result += "Summary statistics:\n"
            result += str(df.describe())
            result += "\n\nTop 50 rows content:\n"
            result += str(df.head(50))           

            return result
        except Exception as e:
            return f"Error analyzing Excel file: {str(e)}"

    def forward(self, action: str, content: str) -> str:
        """
        Process documents based on the specified action.
        
        Args:
            action (str): The action to perform (save, download, analyze_csv, analyze_excel)
            content (str): The content to process
            
        Returns:
            str: Result of the operation
        """
        try:
            if action == "save":
                return self._save_and_read_file(content)
                
            elif action == "download":
                return self._download_file_from_url(content)
                
            elif action == "analyze_csv":
                return self._analyze_csv_file(content)
                
            elif action == "analyze_excel":                
                return self._analyze_excel_file(content)
                
            else:
                if ((content.find(".xls") != -1) or (content.find(".xlsx") != -1)):
                    return self._analyze_excel_file(content)
                elif (content.find("csv") != -1):
                    return self._analyze_csv_file(content)
                else:
                    return f"Error: Invalid action '{action}'. Valid actions are: save, download, analyze_csv, analyze_excel"
                
        except Exception as e:
            return f"Error performing {action} operation: {str(e)}"