#!/usr/bin/env python3 # Testing the spreadsheet tool with a downloaded Excel file import os import sys # Add the parent directory to sys.path to find the src module sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from src.spreadsheet_tool import SpreadsheetTool def main(): # Initialize the spreadsheet tool spreadsheet_tool = SpreadsheetTool() # Path to the downloaded Excel file # Need to navigate up one level and then to downloaded_files project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) excel_file_path = os.path.join(project_root, "downloaded_files", "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx") print(f"Testing SpreadsheetTool with file: {excel_file_path}") print(f"File exists: {os.path.exists(excel_file_path)}") # Parse the spreadsheet print("\n--- PARSING SPREADSHEET ---") parsed_data = spreadsheet_tool.parse_spreadsheet(excel_file_path) if parsed_data.get("error"): print(f"Error: {parsed_data['error']}") return # Display basic information about the spreadsheet print(f"\nSpreadsheet contains {len(parsed_data['sheet_names'])} sheets:") print(f"Sheet names: {parsed_data['sheet_names']}") # Display a summary of each sheet print("\n--- SHEET SUMMARIES ---") for sheet_name, info in parsed_data["summary"].items(): print(f"\nSheet: {sheet_name}") print(f" Dimensions: {info['shape'][0]} rows × {info['shape'][1]} columns") print(f" Column names: {info['columns']}") print(f" Numeric columns: {info['numeric_columns']}") print(f" Text columns: {info['text_columns']}") print(f" Contains null values: {info['has_nulls']}") # Display a sample of the first 3 rows print(f"\n Sample data (first 3 rows):") for i, row in enumerate(info['first_few_rows']): print(f" Row {i+1}: {row}") # Test the query_data method for numeric operations print("\n--- TESTING QUERY OPERATIONS ---") for query in ["sum", "average", "count"]: print(f"\nTesting '{query}' operation:") query_result = spreadsheet_tool.query_data(parsed_data, query) if query_result.get("error"): print(f" Error: {query_result['error']}") else: # Remove data_structure from output to keep it cleaner if "data_structure" in query_result: del query_result["data_structure"] print(f" Result: {query_result}") # Test extracting specific data print("\n--- TESTING DATA EXTRACTION ---") # We'll extract data from the first sheet first_sheet = parsed_data["sheet_names"][0] all_columns = parsed_data["summary"][first_sheet]["columns"] # Extract first two columns from the first sheet if len(all_columns) >= 2: extract_columns = all_columns[:2] print(f"\nExtracting columns {extract_columns} from sheet '{first_sheet}':") extract_result = spreadsheet_tool.extract_specific_data( parsed_data, sheet_name=first_sheet, column_names=extract_columns ) if extract_result.get("error"): print(f" Error: {extract_result['error']}") else: print(f" Extracted data shape: {extract_result['shape']}") print(f" First few rows:") for i, row in enumerate(extract_result['data'][:3]): print(f" Row {i+1}: {row}") if __name__ == "__main__": main()