File size: 6,253 Bytes
b073d38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import pandas as pd
from typing import Optional, List, Dict, Any
import streamlit as st
class helpers:
"""
A utility class that provides helper methods for data processing and error handling.
This class contains methods for converting dates, filtering error records, and
managing DataFrame columns in the context of data validation and reporting.
Attributes:
interface: Interface object containing configuration settings
ERRDICT: Dictionary containing error definitions and descriptions
"""
def __init__(self, interface = None, errdict = None):
"""
Initialize the helpers class with interface and error dictionary.
Args:
interface: Interface object containing configuration settings
errdict: Dictionary containing error definitions and descriptions
"""
self.interface = interface
self.ERRDICT = errdict
def get_version(self) -> str:
return "Helpertools 1.0.0"
def convert_dates_if_needed(self, df: pd.DataFrame, date_cols: List[str] = None) -> pd.DataFrame:
"""
Converts date columns in the DataFrame to string format based on configuration.
This method handles date conversions according to the interface configuration.
It can either convert specific date columns or use default date column names.
Args:
df (pd.DataFrame): The DataFrame containing date columns to convert
date_cols (List[str], optional): List of column names containing dates.
Defaults to None, which uses predefined columns.
Returns:
pd.DataFrame: DataFrame with converted date columns
Raises:
Exception: If date conversion fails and TOLLERANZAZERO is True
"""
if self.interface.CONVERTIDATESTRINGHE:
try:
# Convert 'Data di nascita' column
df["Data di nascita"] = df["Data di nascita"].dt.strftime('%d/%m/%Y')
# Convert 'Data fine contratto (o data fine assistenza se diversa)' column
df["Data fine contratto (o data fine assistenza se diversa)"] = df["Data fine contratto (o data fine assistenza se diversa)"].dt.strftime('%d/%m/%Y')
# Convert 'Data inizio contratto (o data inizio assistenza se diversa)' column
df["Data inizio contratto (o data inizio assistenza se diversa)"] = df["Data inizio contratto (o data inizio assistenza se diversa)"].dt.strftime('%d/%m/%Y')
except Exception as e:
st.error(f"Errore durante conversione date in stringhe; errore: {e}")
if self.interface.TOLLERANZAZERO:
st.error("Elaborazione terminata")
st.stop()
else:
if not date_cols:
date_cols = ["Data di nascita", "Data inizio contratto (o data inizio assistenza se diversa)", "Data fine contratto (o data fine assistenza se diversa)"]
try:
for col in date_cols:
if col in df.columns:
# Convert to datetime (coerce errors) and then to string
df[col] = pd.to_datetime(df[col], errors='coerce').dt.strftime('%d/%m/%Y')
except Exception as e:
st.error(f"Errore durante conversione date, errore: {e}")
if self.interface.TOLLERANZAZERO:
st.error("Elaborazione terminata")
st.stop()
return df
def make_df_solo_errori(self, df) -> pd.DataFrame:
"""
Creates a DataFrame containing only records with errors.
This method filters the input DataFrame to include only rows that have
at least one error flag set to True, excluding specific error types like
'errMassimo543'.
Args:
df (pd.DataFrame): The input DataFrame containing error flag columns
Returns:
pd.DataFrame: A filtered DataFrame containing only records with errors
"""
dffinal: pd.DataFrame = df
# Create logical condition
# at least one error per error type
for e in self.ERRDICT:
# do not want this column in the "error" df as there are too many of them
if e == "errMassimo543":
continue
# first instance of "condition" # this way, it is created with the correct type
# local() contiene variabili e functions nel namespace locale
if "condizione" not in locals():
condizione: pd.Series = dffinal[e] == True
else:
# simple solution: concatenate conditions for checks made
condizione: pd.Series = condizione | dffinal[e] == True
# create df with only records with error
dffinal = dffinal[condizione]
return dffinal
def drop_columns(self, df: pd.DataFrame, errordict) -> pd.DataFrame:
"""
Drops specified columns from the DataFrame based on error dictionary keys.
This method attempts to remove columns specified in the error dictionary.
It handles cases where columns may not exist in the DataFrame and provides
appropriate warnings.
Args:
df (pd.DataFrame): The DataFrame to modify
errordict (Dict): Dictionary containing column names to drop
Returns:
pd.DataFrame: The modified DataFrame with specified columns removed
Raises:
Exception: If column removal fails and TOLLERANZAZERO is True
"""
#st.write(errordict)
# Drop columns based on keys in ERRORDICT
for k in errordict.keys():
#st.write(k)
try:
df = df.drop([k], axis=1)
except Exception as e:
# if self.DEBUGGENERAL:
st.warning(
f"Non è stato possibile trovare la colonna {k}; errore: {e}"
)
if self.DEBUGWARNINGS or self.TOLLERANZAZERO:
st.error("Esecuzione terminata")
st.stop()
return df |