|
import pandas as pd |
|
from typing import Optional, List, Dict, Any |
|
import streamlit as st |
|
|
|
class helpers: |
|
""" |
|
A utility class that provides helper methods for data processing and error handling. |
|
|
|
This class contains methods for converting dates, filtering error records, and |
|
managing DataFrame columns in the context of data validation and reporting. |
|
|
|
Attributes: |
|
interface: Interface object containing configuration settings |
|
ERRDICT: Dictionary containing error definitions and descriptions |
|
""" |
|
|
|
def __init__(self, interface = None, errdict = None): |
|
""" |
|
Initialize the helpers class with interface and error dictionary. |
|
|
|
Args: |
|
interface: Interface object containing configuration settings |
|
errdict: Dictionary containing error definitions and descriptions |
|
""" |
|
self.interface = interface |
|
self.ERRDICT = errdict |
|
|
|
def get_version(self) -> str: |
|
return "Helpertools 1.0.0" |
|
|
|
def convert_dates_if_needed(self, df: pd.DataFrame, date_cols: List[str] = None) -> pd.DataFrame: |
|
""" |
|
Converts date columns in the DataFrame to string format based on configuration. |
|
|
|
This method handles date conversions according to the interface configuration. |
|
It can either convert specific date columns or use default date column names. |
|
|
|
Args: |
|
df (pd.DataFrame): The DataFrame containing date columns to convert |
|
date_cols (List[str], optional): List of column names containing dates. |
|
Defaults to None, which uses predefined columns. |
|
|
|
Returns: |
|
pd.DataFrame: DataFrame with converted date columns |
|
|
|
Raises: |
|
Exception: If date conversion fails and TOLLERANZAZERO is True |
|
""" |
|
if self.interface.CONVERTIDATESTRINGHE: |
|
try: |
|
|
|
df["Data di nascita"] = df["Data di nascita"].dt.strftime('%d/%m/%Y') |
|
|
|
|
|
df["Data fine contratto (o data fine assistenza se diversa)"] = df["Data fine contratto (o data fine assistenza se diversa)"].dt.strftime('%d/%m/%Y') |
|
|
|
|
|
df["Data inizio contratto (o data inizio assistenza se diversa)"] = df["Data inizio contratto (o data inizio assistenza se diversa)"].dt.strftime('%d/%m/%Y') |
|
except Exception as e: |
|
st.error(f"Errore durante conversione date in stringhe; errore: {e}") |
|
if self.interface.TOLLERANZAZERO: |
|
st.error("Elaborazione terminata") |
|
st.stop() |
|
else: |
|
if not date_cols: |
|
date_cols = ["Data di nascita", "Data inizio contratto (o data inizio assistenza se diversa)", "Data fine contratto (o data fine assistenza se diversa)"] |
|
try: |
|
for col in date_cols: |
|
if col in df.columns: |
|
|
|
df[col] = pd.to_datetime(df[col], errors='coerce').dt.strftime('%d/%m/%Y') |
|
except Exception as e: |
|
st.error(f"Errore durante conversione date, errore: {e}") |
|
if self.interface.TOLLERANZAZERO: |
|
st.error("Elaborazione terminata") |
|
st.stop() |
|
return df |
|
|
|
|
|
def make_df_solo_errori(self, df) -> pd.DataFrame: |
|
""" |
|
Creates a DataFrame containing only records with errors. |
|
|
|
This method filters the input DataFrame to include only rows that have |
|
at least one error flag set to True, excluding specific error types like |
|
'errMassimo543'. |
|
|
|
Args: |
|
df (pd.DataFrame): The input DataFrame containing error flag columns |
|
|
|
Returns: |
|
pd.DataFrame: A filtered DataFrame containing only records with errors |
|
""" |
|
dffinal: pd.DataFrame = df |
|
|
|
|
|
for e in self.ERRDICT: |
|
|
|
|
|
if e == "errMassimo543": |
|
continue |
|
|
|
|
|
if "condizione" not in locals(): |
|
condizione: pd.Series = dffinal[e] == True |
|
else: |
|
|
|
condizione: pd.Series = condizione | dffinal[e] == True |
|
|
|
|
|
dffinal = dffinal[condizione] |
|
|
|
return dffinal |
|
|
|
def drop_columns(self, df: pd.DataFrame, errordict) -> pd.DataFrame: |
|
""" |
|
Drops specified columns from the DataFrame based on error dictionary keys. |
|
|
|
This method attempts to remove columns specified in the error dictionary. |
|
It handles cases where columns may not exist in the DataFrame and provides |
|
appropriate warnings. |
|
|
|
Args: |
|
df (pd.DataFrame): The DataFrame to modify |
|
errordict (Dict): Dictionary containing column names to drop |
|
|
|
Returns: |
|
pd.DataFrame: The modified DataFrame with specified columns removed |
|
|
|
Raises: |
|
Exception: If column removal fails and TOLLERANZAZERO is True |
|
""" |
|
|
|
|
|
for k in errordict.keys(): |
|
|
|
try: |
|
df = df.drop([k], axis=1) |
|
except Exception as e: |
|
|
|
st.warning( |
|
f"Non è stato possibile trovare la colonna {k}; errore: {e}" |
|
) |
|
if self.DEBUGWARNINGS or self.TOLLERANZAZERO: |
|
st.error("Esecuzione terminata") |
|
st.stop() |
|
return df |