import pandas as pd from typing import Optional, List, Dict, Any import streamlit as st class helpers: """ A utility class that provides helper methods for data processing and error handling. This class contains methods for converting dates, filtering error records, and managing DataFrame columns in the context of data validation and reporting. Attributes: interface: Interface object containing configuration settings ERRDICT: Dictionary containing error definitions and descriptions """ def __init__(self, interface = None, errdict = None): """ Initialize the helpers class with interface and error dictionary. Args: interface: Interface object containing configuration settings errdict: Dictionary containing error definitions and descriptions """ self.interface = interface self.ERRDICT = errdict def get_version(self) -> str: return "Helpertools 1.0.0" def convert_dates_if_needed(self, df: pd.DataFrame, date_cols: List[str] = None) -> pd.DataFrame: """ Converts date columns in the DataFrame to string format based on configuration. This method handles date conversions according to the interface configuration. It can either convert specific date columns or use default date column names. Args: df (pd.DataFrame): The DataFrame containing date columns to convert date_cols (List[str], optional): List of column names containing dates. Defaults to None, which uses predefined columns. Returns: pd.DataFrame: DataFrame with converted date columns Raises: Exception: If date conversion fails and TOLLERANZAZERO is True """ if self.interface.CONVERTIDATESTRINGHE: try: # Convert 'Data di nascita' column df["Data di nascita"] = df["Data di nascita"].dt.strftime('%d/%m/%Y') # Convert 'Data fine contratto (o data fine assistenza se diversa)' column df["Data fine contratto (o data fine assistenza se diversa)"] = df["Data fine contratto (o data fine assistenza se diversa)"].dt.strftime('%d/%m/%Y') # Convert 'Data inizio contratto (o data inizio assistenza se diversa)' column df["Data inizio contratto (o data inizio assistenza se diversa)"] = df["Data inizio contratto (o data inizio assistenza se diversa)"].dt.strftime('%d/%m/%Y') except Exception as e: st.error(f"Errore durante conversione date in stringhe; errore: {e}") if self.interface.TOLLERANZAZERO: st.error("Elaborazione terminata") st.stop() else: if not date_cols: date_cols = ["Data di nascita", "Data inizio contratto (o data inizio assistenza se diversa)", "Data fine contratto (o data fine assistenza se diversa)"] try: for col in date_cols: if col in df.columns: # Convert to datetime (coerce errors) and then to string df[col] = pd.to_datetime(df[col], errors='coerce').dt.strftime('%d/%m/%Y') except Exception as e: st.error(f"Errore durante conversione date, errore: {e}") if self.interface.TOLLERANZAZERO: st.error("Elaborazione terminata") st.stop() return df def make_df_solo_errori(self, df) -> pd.DataFrame: """ Creates a DataFrame containing only records with errors. This method filters the input DataFrame to include only rows that have at least one error flag set to True, excluding specific error types like 'errMassimo543'. Args: df (pd.DataFrame): The input DataFrame containing error flag columns Returns: pd.DataFrame: A filtered DataFrame containing only records with errors """ dffinal: pd.DataFrame = df # Create logical condition # at least one error per error type for e in self.ERRDICT: # do not want this column in the "error" df as there are too many of them if e == "errMassimo543": continue # first instance of "condition" # this way, it is created with the correct type # local() contiene variabili e functions nel namespace locale if "condizione" not in locals(): condizione: pd.Series = dffinal[e] == True else: # simple solution: concatenate conditions for checks made condizione: pd.Series = condizione | dffinal[e] == True # create df with only records with error dffinal = dffinal[condizione] return dffinal def drop_columns(self, df: pd.DataFrame, errordict) -> pd.DataFrame: """ Drops specified columns from the DataFrame based on error dictionary keys. This method attempts to remove columns specified in the error dictionary. It handles cases where columns may not exist in the DataFrame and provides appropriate warnings. Args: df (pd.DataFrame): The DataFrame to modify errordict (Dict): Dictionary containing column names to drop Returns: pd.DataFrame: The modified DataFrame with specified columns removed Raises: Exception: If column removal fails and TOLLERANZAZERO is True """ #st.write(errordict) # Drop columns based on keys in ERRORDICT for k in errordict.keys(): #st.write(k) try: df = df.drop([k], axis=1) except Exception as e: # if self.DEBUGGENERAL: st.warning( f"Non รจ stato possibile trovare la colonna {k}; errore: {e}" ) if self.DEBUGWARNINGS or self.TOLLERANZAZERO: st.error("Esecuzione terminata") st.stop() return df