diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -1,8 +1,8 @@ import os - os.system("pip install --upgrade gradio") -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel # ConfigDict is not used by YourModel directly, but arbitrary_types_allowed is a Pydantic V2 feature. + # For Pydantic V1 it would be from pydantic.config import ConfigDict import numpy as np import pandas as pd import matplotlib.pyplot as plt @@ -15,6 +15,7 @@ import io from PIL import Image import tempfile +# This class seems to be a placeholder or for future Pydantic use. Included as per the provided code. class YourModel(BaseModel): class Config: arbitrary_types_allowed = True @@ -41,226 +42,229 @@ class BioprocessModel: @staticmethod def logistic(time, xo, xm, um): - # xo: initial biomass, xm: max biomass, um: max specific growth rate - if xm == 0 or (xo / xm == 1 and np.any(um * time > 0)): + if xm == 0 or (xo / xm >= 1 and np.any(um * time > 0)): # xm must be > xo for growth + # If xo/xm == 1, it implies X(t) = xo. If um*time > 0, exp term grows, leading to issues. + # If xo > xm, the model is not physically meaningful for growth. return np.full_like(time, np.nan) - # Add a small epsilon to prevent division by zero or log of zero in edge cases term_exp = np.exp(um * time) - denominator = (1 - (xo / xm) * (1 - term_exp)) - denominator = np.where(denominator == 0, 1e-9, denominator) # Avoid division by zero - # Ensure xo/xm is not 1 if (1-exp(um*time)) is also 0 (i.e. um*time = 0) - # This is usually handled by xo < xm constraint in fitting - return (xo * term_exp) / denominator + # Denominator: Xm - Xo + Xo * exp(um*t) + denominator = (xm - xo + xo * term_exp) + denominator = np.where(denominator == 0, 1e-9, denominator) + return (xo * xm * term_exp) / denominator + @staticmethod def gompertz(time, xm, um, lag): - # xm: max biomass, um: max specific growth rate, lag: lag time - if xm == 0: + if xm <= 0 or um <=0 : # lag can be 0 or positive return np.full_like(time, np.nan) - # Add small epsilon to prevent log(0) if exp_term becomes very large negative exp_term = (um * np.e / xm) * (lag - time) + 1 - # Clamp large negative values in exp_term to avoid overflow in np.exp(-np.exp(exp_term)) - exp_term_clipped = np.clip(exp_term, -np.inf, 700) # exp(709) is around max float + exp_term_clipped = np.clip(exp_term, -np.inf, 700) return xm * np.exp(-np.exp(exp_term_clipped)) @staticmethod def moser(time, Xm, um, Ks): # Xm: max biomass, um: max specific growth rate, Ks: Monod constant (here acting as time shift) - # This is a simplified form, not the substrate-dependent Moser. + if Xm <=0 or um <=0: return np.full_like(time, np.nan) return Xm * (1 - np.exp(-um * (time - Ks))) @staticmethod def baranyi(time, X0, Xm, um, lag): - # X0: initial biomass, Xm: max biomass, um: max specific growth rate, lag: lag time - # Ensure parameters are valid to prevent math errors - if X0 <= 0 or Xm <= X0 or um <= 0: # lag can be 0 + if X0 <= 0 or Xm <= X0 or um <= 0 or lag < 0: # lag should be non-negative return np.full_like(time, np.nan) - # Adjustment function A(t) - # Using h0 = um for simplicity in A(t) calculation - # A_t = t + (1/um) * np.log(np.exp(-um*t) + np.exp(-um*lag) - np.exp(-um*(t+lag))) - # Argument of log in A(t): - log_arg_A = np.exp(-um * t) + np.exp(-um * lag) - np.exp(-um * (t + lag)) - log_arg_A = np.where(log_arg_A <= 1e-9, 1e-9, log_arg_A) # Prevent log(0 or negative) - A_t = t + (1 / um) * np.log(log_arg_A) + h0 = um # Specific growth rate related term, often taken as um + # q0 = 1 / (np.exp(h0 * lag) -1) # if using q0 formulation + # A(t) = t + (1/h0) * np.log((np.exp(-h0 * t) + q0) / (1 + q0)) # Alternative A(t) + + # Simpler A(t) as provided in the original snippet's theory section + log_arg_A = np.exp(-um * time) + np.exp(-um * lag) - np.exp(-um * (time + lag)) + log_arg_A = np.where(log_arg_A <= 1e-9, 1e-9, log_arg_A) + A_t = time + (1 / um) * np.log(log_arg_A) - # Main Baranyi equation part exp_um_At = np.exp(um * A_t) - # Clamp large values to prevent overflow if Xm/X0 is large exp_um_At_clipped = np.clip(exp_um_At, -np.inf, 700) - numerator = (Xm / X0) * exp_um_At_clipped - denominator = (Xm / X0 - 1) + exp_um_At_clipped - denominator = np.where(denominator == 0, 1e-9, denominator) # Avoid division by zero + # Original Baranyi-Roberts form: + # X(t) = X0 * ( (Xm/X0 * exp(um*A_t)) / (Xm/X0 - 1 + exp(um*A_t)) ) is not quite right. + # It should be: X(t) = Xm - log(1 + (exp(um*A_t)-1) / exp(Xm-X0)) -- this is for log(X) + # Or using the form: log(X(t)/X0) = um*A(t) - log(1 + (exp(um*A(t))-1)/(Xm/X0)) + # So, X(t) = X0 * exp(um*A(t) - log(1 + (exp(um*A(t))-1)/(Xm/X0))) + # X(t) = X0 * exp(um*A(t)) / (1 + (exp(um*A(t))-1)/(Xm/X0)) + # X(t) = X0 * exp(um*A(t)) * (Xm/X0) / (Xm/X0 + exp(um*A(t)) - 1) + # X(t) = Xm * exp(um*A(t)) / (Xm/X0 - 1 + exp(um*A(t))) + + # Using the form from the snippet's theory section (rearranged for X(t)): + # ln X(t) = ln X0 + um A(t) - ln(1 + (e^(um A(t)) - 1) / (Xm/X0)) + # X(t) = X0 * exp( um A(t) - ln(1 + (exp(um A(t)) - 1) / (Xm/X0)) ) + # X(t) = X0 * exp(um A(t)) / (1 + (exp(um A(t)) - 1) / (Xm/X0)) + + # Let Y = exp(um * A_t_clipped) + # X(t) = X0 * Y / (1 + (Y - 1) / (Xm / X0)) + # X(t) = X0 * Y * (Xm / X0) / (Xm / X0 + Y - 1) + # X(t) = Xm * Y / (Xm / X0 - 1 + Y) - return X0 * (numerator / denominator) + numerator = Xm * exp_um_At_clipped + denominator = (Xm / X0 - 1) + exp_um_At_clipped + denominator = np.where(denominator == 0, 1e-9, denominator) + return numerator / denominator @staticmethod def logistic_diff(X, t, params): - # params for logistic_diff: [xo, xm, um] (xo is not used in diff eq, but passed for consistency) - _, xm, um = params + _, xm, um = params # xo is initial condition, not part of ODE itself other than for X(0) if xm == 0: return 0 return um * X * (1 - X / xm) @staticmethod def gompertz_diff(X, t, params): - # params for gompertz_diff: [xm, um, lag] xm, um, lag = params - if xm == 0: return 0 - # This is d(Gompertz)/dt - # Gompertz: xm * exp(-exp( (um*e/xm)*(lag-t)+1 )) + if xm == 0 or X == 0 : return 0 # Avoid log(0) or division by zero if Xm or X is zero + # Using dX/dt = um * X * log(Xm/X) (alternative form, simpler than direct derivative of complex Gompertz) + # This form is often used but implies a slightly different Gompertz model. + # For consistency with the analytical Gompertz form, the direct derivative is: # Let k = (um*e/xm) - # Let u = (k*(lag-t)+1) - # dX/dt = X * (-exp(u)) * k * (-1) = X * k * exp(u) + # Let u_val = k*(lag-t)+1 + # dX/dt = X * k * exp(u_val) k_val = um * np.e / xm u_val = k_val * (lag - t) + 1 - u_val_clipped = np.clip(u_val, -np.inf, 700) - return X * k_val * np.exp(u_val_clipped) + u_val_clipped = np.clip(u_val, -np.inf, 700) # Avoid overflow in exp + # Ensure X is positive for this to be meaningful + return X * k_val * np.exp(u_val_clipped) if X > 1e-9 else 0.0 @staticmethod def moser_diff(X, t, params): - # params for moser_diff: [Xm, um, Ks] - Xm, um, _ = params # Ks is not directly in this simplified dX/dt + Xm, um, _ = params # Ks is part of the integrated form, not directly in this simplified dX/dt return um * (Xm - X) - # No differential form for Baranyi in this version due to complexity. - def substrate(self, time, so, p, q, biomass_params_list): if self.biomass_model is None or not biomass_params_list: return np.full_like(time, np.nan) X_t = self.biomass_model(time, *biomass_params_list) - if np.any(np.isnan(X_t)): - return np.full_like(time, np.nan) + if np.any(np.isnan(X_t)): return np.full_like(time, np.nan) integral_X = np.zeros_like(X_t) if len(time) > 1: dt = np.diff(time, prepend=time[0] - (time[1]-time[0] if len(time)>1 else 1)) integral_X = np.cumsum(X_t * dt) - # Determine X0 (initial biomass) from the fitted parameters - if self.model_type == 'logistic' or self.model_type == 'baranyi': - X0 = biomass_params_list[0] # xo or X0 is the first parameter - elif self.model_type == 'gompertz': - X0 = self.gompertz(0, *biomass_params_list) - elif self.model_type == 'moser': - X0 = self.moser(0, *biomass_params_list) - else: - X0 = X_t[0] # Fallback - - X0 = X0 if not np.isnan(X0) else (biomass_params_list[0] if biomass_params_list else 0) - - + X0_calc = 0 + if self.model_type in ['logistic', 'baranyi']: X0_calc = biomass_params_list[0] + elif self.model_type == 'gompertz': X0_calc = self.gompertz(0, *biomass_params_list) + elif self.model_type == 'moser': X0_calc = self.moser(0, *biomass_params_list) + else: X0_calc = X_t[0] + X0 = X0_calc if not np.isnan(X0_calc) else (biomass_params_list[0] if biomass_params_list else 0) return so - p * (X_t - X0) - q * integral_X def product(self, time, po, alpha, beta, biomass_params_list): if self.biomass_model is None or not biomass_params_list: return np.full_like(time, np.nan) X_t = self.biomass_model(time, *biomass_params_list) - if np.any(np.isnan(X_t)): - return np.full_like(time, np.nan) + if np.any(np.isnan(X_t)): return np.full_like(time, np.nan) integral_X = np.zeros_like(X_t) if len(time) > 1: dt = np.diff(time, prepend=time[0] - (time[1]-time[0] if len(time)>1 else 1)) integral_X = np.cumsum(X_t * dt) - if self.model_type == 'logistic' or self.model_type == 'baranyi': - X0 = biomass_params_list[0] - elif self.model_type == 'gompertz': - X0 = self.gompertz(0, *biomass_params_list) - elif self.model_type == 'moser': - X0 = self.moser(0, *biomass_params_list) - else: - X0 = X_t[0] - - X0 = X0 if not np.isnan(X0) else (biomass_params_list[0] if biomass_params_list else 0) - + X0_calc = 0 + if self.model_type in ['logistic', 'baranyi']: X0_calc = biomass_params_list[0] + elif self.model_type == 'gompertz': X0_calc = self.gompertz(0, *biomass_params_list) + elif self.model_type == 'moser': X0_calc = self.moser(0, *biomass_params_list) + else: X0_calc = X_t[0] + X0 = X0_calc if not np.isnan(X0_calc) else (biomass_params_list[0] if biomass_params_list else 0) return po + alpha * (X_t - X0) + beta * integral_X def process_data(self, df): - # ... (same as before) biomass_cols = [col for col in df.columns if col[1] == 'Biomasa'] substrate_cols = [col for col in df.columns if col[1] == 'Sustrato'] product_cols = [col for col in df.columns if col[1] == 'Producto'] if not any(col[1] == 'Tiempo' for col in df.columns): raise ValueError("La columna 'Tiempo' no se encuentra en el DataFrame.") - time_col = [col for col in df.columns if col[1] == 'Tiempo'][0] - time = df[time_col].dropna().values # Ensure no NaNs in time - - if len(biomass_cols) > 0: - data_biomass = [df[col].dropna().values for col in biomass_cols] # dropna for each replicate - # Ensure all replicates have same length as time after dropna - min_len = len(time) - data_biomass_aligned = [] - for rep_data in data_biomass: - if len(rep_data) == min_len: - data_biomass_aligned.append(rep_data) - # else: print warning or handle misaligned data - - if data_biomass_aligned: - data_biomass_np = np.array(data_biomass_aligned) - self.datax.append(data_biomass_np) - self.dataxp.append(np.mean(data_biomass_np, axis=0)) - self.datax_std.append(np.std(data_biomass_np, axis=0, ddof=1)) - else: # If no valid replicates after alignment - self.datax.append(np.array([])) - self.dataxp.append(np.array([])) - self.datax_std.append(np.array([])) + time_col_tuple = [col for col in df.columns if col[1] == 'Tiempo'][0] + # df[time_col_tuple] might be a DataFrame if multiple top-level columns have 'Tiempo' + # Assuming 'Tiempo' is unique or we take the first one. + if isinstance(df[time_col_tuple], pd.DataFrame): # Multi-level access + time = df[time_col_tuple].iloc[:,0].dropna().values # Take first column if it's a df + else: # Single series + time = df[time_col_tuple].dropna().values + + + def process_component(cols, data_list, data_p_list, data_std_list): + if len(cols) > 0: + # For each top-level experiment group that has this component + # We need to average replicates *within* each experiment group if they exist + # The current structure of cols is [(ExpA, Biomasa, Rep1), (ExpA, Biomasa, Rep2), (ExpB, Biomasa, Rep1)] + # This process_data is called per sheet, which usually means one experiment group or averages. + # Let's assume df passed here is for a single "experiment" or "sheet average" context. + + # If cols are like [('Exp1', 'Biomasa', 'R1'), ('Exp1', 'Biomasa', 'R2')] + # df[cols] will give a DataFrame with these columns. + component_df = df[cols] + + # Drop rows where all replicates are NaN for that time point + component_df_cleaned = component_df.dropna(how='all') + # Align with time: reindex and then drop NaNs again if necessary + # This requires time to be processed first and available. + # For simplicity, we'll assume lengths are managed by prior dropna on time and individual series. + + data_replicates = [component_df[col].dropna().values for col in component_df.columns] + + # Align replicates to the minimum common length after individual dropna + if not data_replicates: # No data at all + data_list.append(np.array([])); data_p_list.append(np.array([])); data_std_list.append(np.array([])) + return - else: - self.datax.append(np.array([])) - self.dataxp.append(np.array([])) - self.datax_std.append(np.array([])) - - - if len(substrate_cols) > 0: - data_substrate = [df[col].dropna().values for col in substrate_cols] - min_len = len(time) - data_substrate_aligned = [] - for rep_data in data_substrate: - if len(rep_data) == min_len: - data_substrate_aligned.append(rep_data) - - if data_substrate_aligned: - data_substrate_np = np.array(data_substrate_aligned) - self.datas.append(data_substrate_np) - self.datasp.append(np.mean(data_substrate_np, axis=0)) - self.datas_std.append(np.std(data_substrate_np, axis=0, ddof=1)) - else: - self.datas.append(np.array([])) - self.datasp.append(np.array([])) - self.datas_std.append(np.array([])) - else: - self.datas.append(np.array([])) - self.datasp.append(np.array([])) - self.datas_std.append(np.array([])) - - if len(product_cols) > 0: - data_product = [df[col].dropna().values for col in product_cols] - min_len = len(time) - data_product_aligned = [] - for rep_data in data_product: - if len(rep_data) == min_len: - data_product_aligned.append(rep_data) - - if data_product_aligned: - data_product_np = np.array(data_product_aligned) - self.datap.append(data_product_np) - self.datapp.append(np.mean(data_product_np, axis=0)) - self.datap_std.append(np.std(data_product_np, axis=0, ddof=1)) + min_len_rep = min(len(r) for r in data_replicates) if data_replicates else 0 + + # Ensure min_len_rep is not longer than the main time vector for this sheet + min_len = min(min_len_rep, len(time)) + + aligned_replicates = [rep[:min_len] for rep in data_replicates if len(rep) >= min_len] + + + if aligned_replicates: + data_np = np.array(aligned_replicates) + data_list.append(data_np) + data_p_list.append(np.mean(data_np, axis=0)) + data_std_list.append(np.std(data_np, axis=0, ddof=1 if data_np.shape[0] > 1 else 0)) + else: + data_list.append(np.array([])); data_p_list.append(np.array([])); data_std_list.append(np.array([])) else: - self.datap.append(np.array([])) - self.datapp.append(np.array([])) - self.datap_std.append(np.array([])) - else: - self.datap.append(np.array([])) - self.datapp.append(np.array([])) - self.datap_std.append(np.array([])) + data_list.append(np.array([])); data_p_list.append(np.array([])); data_std_list.append(np.array([])) + + # Adjust time vector based on components with fewest data points after NaNs + # This is tricky because process_component aligns to its own min_len and then to global time. + # A better approach might be to find the overall minimum length across T, X, S, P for the sheet. + + # For now, time is taken as is, and components align to it. + # This means if a component has fewer points than time, it will be truncated. + # If time has NaNs, it's already handled. + + process_component(biomass_cols, self.datax, self.dataxp, self.datax_std) + process_component(substrate_cols, self.datas, self.datasp, self.datas_std) + process_component(product_cols, self.datap, self.datapp, self.datap_std) + + # Ensure all processed data (xp, sp, pp) are aligned to the shortest one or to time. + # Current process_component aligns each to 'time'. + # If, for example, self.dataxp[-1] is shorter than time due to more NaNs in biomass, + # then time should be truncated for fitting. + + min_valid_len = len(time) + if self.dataxp and len(self.dataxp[-1]) > 0: min_valid_len = min(min_valid_len, len(self.dataxp[-1])) + if self.datasp and len(self.datasp[-1]) > 0: min_valid_len = min(min_valid_len, len(self.datasp[-1])) + if self.datapp and len(self.datapp[-1]) > 0: min_valid_len = min(min_valid_len, len(self.datapp[-1])) + + self.time = time[:min_valid_len] + if self.dataxp and len(self.dataxp[-1]) > 0: self.dataxp[-1] = self.dataxp[-1][:min_valid_len] + if self.datax_std and len(self.datax_std[-1]) > 0: self.datax_std[-1] = self.datax_std[-1][:min_valid_len] + + if self.datasp and len(self.datasp[-1]) > 0: self.datasp[-1] = self.datasp[-1][:min_valid_len] + if self.datas_std and len(self.datas_std[-1]) > 0: self.datas_std[-1] = self.datas_std[-1][:min_valid_len] - self.time = time + if self.datapp and len(self.datapp[-1]) > 0: self.datapp[-1] = self.datapp[-1][:min_valid_len] + if self.datap_std and len(self.datap_std[-1]) > 0: self.datap_std[-1] = self.datap_std[-1][:min_valid_len] def fit_model(self): @@ -275,43 +279,60 @@ class BioprocessModel: self.biomass_diff = self.moser_diff elif self.model_type == 'baranyi': self.biomass_model = self.baranyi - self.biomass_diff = None # No ODE form for Baranyi in this version + self.biomass_diff = None else: raise ValueError(f"Modelo de biomasa desconocido: {self.model_type}") - def fit_biomass(self, time, biomass): - # Ensure time and biomass are 1D arrays of the same length and numeric time = np.asarray(time, dtype=float) biomass = np.asarray(biomass, dtype=float) + if len(time) != len(biomass): - print("Error: Tiempo y biomasa deben tener la misma longitud.") + print(f"Error: Tiempo ({len(time)}) y biomasa ({len(biomass)}) deben tener la misma longitud para {self.model_type}.") + # Attempt to reconcile if one is prefix of other, or take min length + min_len = min(len(time), len(biomass)) + time = time[:min_len] + biomass = biomass[:min_len] + if min_len < 3 : # Need at least 3 points for 3-param models + print("No hay suficientes datos válidos después de alinear.") + self.r2['biomass'] = np.nan; self.rmse['biomass'] = np.nan + return None + + + valid_indices = ~np.isnan(time) & ~np.isnan(biomass) + time = time[valid_indices] + biomass = biomass[valid_indices] + if len(time) < (3 if self.model_type != 'baranyi' else 4): # Baranyi needs 4 params + print(f"No hay suficientes datos válidos ({len(time)}) después de remover NaNs para {self.model_type}.") + self.r2['biomass'] = np.nan; self.rmse['biomass'] = np.nan return None - if np.any(np.isnan(time)) or np.any(np.isnan(biomass)): - print("Error: Tiempo o biomasa contienen NaNs.") - # Attempt to remove NaNs consistently - valid_indices = ~np.isnan(time) & ~np.isnan(biomass) - time = time[valid_indices] - biomass = biomass[valid_indices] - if len(time) < 3: # Need at least 3 points for 3-param models - print("No hay suficientes datos válidos después de remover NaNs.") - return None + if len(biomass) == 0 or biomass[0] <= 0: # Initial biomass must be positive + print(f"Biomasa inicial no válida (<=0) o datos de biomasa vacíos para {self.model_type}.") + self.r2['biomass'] = np.nan; self.rmse['biomass'] = np.nan + return None + try: if len(np.unique(biomass)) < 2 : - print(f"Biomasa constante para {self.model_type}, no se puede ajustar el modelo.") - self.r2['biomass'] = np.nan; self.rmse['biomass'] = np.nan - return None + print(f"Biomasa constante para {self.model_type}, no se puede ajustar el modelo significativamente.") + # Still, can try to fit, might result in um=0 or similar. + # self.r2['biomass'] = np.nan; self.rmse['biomass'] = np.nan + # return None # Or let it try and see + + popt = None + y_pred = None - popt = None # Initialize popt if self.model_type == 'logistic': xo_guess = biomass[0] if biomass[0] > 1e-6 else 1e-3 xm_guess = max(biomass) * 1.1 if max(biomass) > xo_guess else xo_guess * 2 - if xm_guess <= xo_guess: xm_guess = xo_guess + 1e-3 - p0 = [xo_guess, xm_guess, 0.1] - bounds = ([1e-9, biomass[0] if biomass[0]>1e-9 else 1e-9, 1e-9], [max(biomass)*0.99 if max(biomass)>0 else 1, np.inf, np.inf]) - # Ensure xo_guess is within bounds[0][0] and bounds[1][0] - p0[0] = np.clip(p0[0], bounds[0][0], bounds[1][0]) + if xm_guess <= xo_guess: xm_guess = xo_guess + 1e-3 # Ensure Xm > Xo + p0 = [xo_guess, xm_guess, 0.1] # Xo, Xm, um + # Bounds: Xo > 0, Xm > Xo, um > 0 + bounds = ([1e-9, biomass[0] + 1e-9 if biomass[0]>1e-9 else 1e-9, 1e-9], + [max(biomass)*0.999 if max(biomass)>0 else 1, np.inf, np.inf]) + p0[0] = np.clip(p0[0], bounds[0][0], bounds[1][0]) # Ensure Xo_guess is within its bounds + p0[1] = np.clip(p0[1], max(bounds[0][1], p0[0] + 1e-9), bounds[1][1]) # Ensure Xm_guess > Xo_guess and within bounds + popt, _ = curve_fit(self.logistic, time, biomass, p0=p0, maxfev=self.maxfev, bounds=bounds, ftol=1e-9, xtol=1e-9) if popt[1] <= popt[0]: # xm <= xo print(f"Advertencia: En modelo logístico, Xm ({popt[1]:.2f}) no es mayor que Xo ({popt[0]:.2f}). Ajuste puede no ser válido.") @@ -321,15 +342,15 @@ class BioprocessModel: elif self.model_type == 'gompertz': xm_guess = max(biomass) if max(biomass) > 0 else 1.0 um_guess = 0.1 - # A simple lag estimate: time until biomass reaches, say, 10% of (max-min) min_bio = min(biomass) lag_thresh = min_bio + 0.1 * (max(biomass) - min_bio) lag_indices = np.where(biomass > lag_thresh)[0] - lag_guess = time[lag_indices[0]] if len(lag_indices) > 0 else time[0] + lag_guess = time[lag_indices[0]] if len(lag_indices) > 0 and time[lag_indices[0]] >=0 else (time[0] if time[0] >=0 else 0) - p0 = [xm_guess, um_guess, lag_guess] + p0 = [xm_guess, um_guess, lag_guess] # Xm, um, lag + # Bounds: Xm > 0, um > 0, lag >= 0 bounds = ([min(biomass) if min(biomass)>1e-9 else 1e-9, 1e-9, 0], - [np.inf, np.inf, max(time) if len(time)>0 else 100]) + [np.inf, np.inf, max(time)*1.1 if len(time)>0 and max(time)>0 else 100]) popt, _ = curve_fit(self.gompertz, time, biomass, p0=p0, maxfev=self.maxfev, bounds=bounds, ftol=1e-9, xtol=1e-9) self.params['biomass'] = {'Xm': popt[0], 'um': popt[1], 'lag': popt[2]} y_pred = self.gompertz(time, *popt) @@ -337,10 +358,11 @@ class BioprocessModel: elif self.model_type == 'moser': Xm_guess = max(biomass) if max(biomass) > 0 else 1.0 um_guess = 0.1 - Ks_guess = time[0] - p0 = [Xm_guess, um_guess, Ks_guess] - bounds = ([min(biomass) if min(biomass)>1e-9 else 1e-9, 1e-9, -max(time) if len(time)>0 else -100], # Ks can be negative - [np.inf, np.inf, max(time) if len(time)>0 else 100]) + Ks_guess = time[0] # Ks is a time-like parameter here + p0 = [Xm_guess, um_guess, Ks_guess] # Xm, um, Ks + # Bounds: Xm > 0, um > 0, Ks can be around t_initial + bounds = ([min(biomass) if min(biomass)>1e-9 else 1e-9, 1e-9, -max(abs(time))*0.5 if len(time)>0 else -100], + [np.inf, np.inf, max(abs(time))*1.5 if len(time)>0 else 100]) popt, _ = curve_fit(self.moser, time, biomass, p0=p0, maxfev=self.maxfev, bounds=bounds, ftol=1e-9, xtol=1e-9) self.params['biomass'] = {'Xm': popt[0], 'um': popt[1], 'Ks': popt[2]} y_pred = self.moser(time, *popt) @@ -348,43 +370,42 @@ class BioprocessModel: elif self.model_type == 'baranyi': X0_guess = biomass[0] if biomass[0] > 1e-6 else 1e-3 Xm_guess = max(biomass) if max(biomass) > X0_guess else X0_guess * 2 - if Xm_guess <= X0_guess: Xm_guess = X0_guess + 1e-3 # Ensure Xm > X0 + if Xm_guess <= X0_guess: Xm_guess = X0_guess + 1e-3 um_guess = 0.1 - min_bio = X0_guess - lag_thresh = min_bio + 0.1 * (Xm_guess - min_bio) - lag_indices = np.where(biomass > lag_thresh)[0] - lag_guess = time[lag_indices[0]] if len(lag_indices) > 0 and time[lag_indices[0]] > 0 else (time[0] if time[0] > 1e-9 else 1e-9) # lag must be >0 for some A(t) forms - if lag_guess <= 0: lag_guess = 1e-9 # Ensure lag is positive for Baranyi A(t) log - - p0 = [X0_guess, Xm_guess, um_guess, lag_guess] + min_bio_b = X0_guess + lag_thresh_b = min_bio_b + 0.1 * (Xm_guess - min_bio_b) + lag_indices_b = np.where(biomass > lag_thresh_b)[0] + lag_guess_b = time[lag_indices_b[0]] if len(lag_indices_b) > 0 and time[lag_indices_b[0]] >= 0 else (time[0] if time[0] >=0 else 0) + + p0 = [X0_guess, Xm_guess, um_guess, lag_guess_b] # X0, Xm, um, lag + # Bounds: X0>0, Xm>X0, um>0, lag>=0 bounds = ( - [1e-9, biomass[0] if biomass[0]>1e-9 else 1e-9, 1e-9, 1e-9], # X0, Xm, um, lag > 0 - [max(biomass)*0.99 if max(biomass)>0 else 1, np.inf, np.inf, max(time) if len(time)>0 else 100] + [1e-9, biomass[0] + 1e-9 if biomass[0]>1e-9 else 1e-9, 1e-9, 0], + [max(biomass)*0.999 if max(biomass)>0 else 1, np.inf, np.inf, max(time)*1.1 if len(time)>0 and max(time)>0 else 100] ) - p0[0] = np.clip(p0[0], bounds[0][0], bounds[1][0]) # Clip X0_guess - p0[3] = np.clip(p0[3], bounds[0][3], bounds[1][3]) # Clip lag_guess + p0[0] = np.clip(p0[0], bounds[0][0], bounds[1][0]) + p0[1] = np.clip(p0[1], max(bounds[0][1], p0[0] + 1e-9), bounds[1][1]) + p0[3] = np.clip(p0[3], bounds[0][3], bounds[1][3]) popt, _ = curve_fit(self.baranyi, time, biomass, p0=p0, maxfev=self.maxfev, bounds=bounds, ftol=1e-9, xtol=1e-9) - if popt[1] <= popt[0]: # Xm <= X0 + if popt[1] <= popt[0]: print(f"Advertencia: En modelo Baranyi, Xm ({popt[1]:.2f}) no es mayor que X0 ({popt[0]:.2f}). Ajuste puede no ser válido.") self.params['biomass'] = {'X0': popt[0], 'Xm': popt[1], 'um': popt[2], 'lag': popt[3]} y_pred = self.baranyi(time, *popt) - else: print(f"Modelo {self.model_type} no implementado para ajuste de biomasa.") return None - if np.any(np.isnan(y_pred)) or np.any(np.isinf(y_pred)): + if y_pred is None or np.any(np.isnan(y_pred)) or np.any(np.isinf(y_pred)): print(f"Predicción de biomasa contiene NaN/Inf para {self.model_type}. Ajuste fallido.") self.r2['biomass'] = np.nan; self.rmse['biomass'] = np.nan + self.params['biomass'] = {} # Clear params if fit is bad return None ss_res = np.sum((biomass - y_pred) ** 2) ss_tot = np.sum((biomass - np.mean(biomass)) ** 2) - if ss_tot == 0: - self.r2['biomass'] = 1.0 if ss_res < 1e-9 else 0.0 # Perfect fit if residuals are also ~0 - else: - self.r2['biomass'] = 1 - (ss_res / ss_tot) + if ss_tot == 0: self.r2['biomass'] = 1.0 if ss_res < 1e-9 else 0.0 + else: self.r2['biomass'] = 1 - (ss_res / ss_tot) self.rmse['biomass'] = np.sqrt(mean_squared_error(biomass, y_pred)) return y_pred @@ -400,31 +421,30 @@ class BioprocessModel: return None def fit_substrate(self, time, substrate, biomass_params_dict): - if not biomass_params_dict: - print(f"Error en fit_substrate_{self.model_type}: Parámetros de biomasa no disponibles.") + if not biomass_params_dict or not self.params.get('biomass'): # Check if biomass fit was successful + print(f"Error en fit_substrate_{self.model_type}: Parámetros de biomasa no disponibles o ajuste fallido.") + self.r2['substrate'] = np.nan; self.rmse['substrate'] = np.nan return None + + # Ensure time and substrate are aligned and valid + time = np.asarray(time, dtype=float) + substrate = np.asarray(substrate, dtype=float) + valid_indices = ~np.isnan(time) & ~np.isnan(substrate) + time = time[valid_indices] + substrate = substrate[valid_indices] + if len(time) < 3: # Need at least 3 points for 3-param Luedeking-Piret + print(f"No hay suficientes datos válidos de sustrato ({len(time)}) para {self.model_type}.") + self.r2['substrate'] = np.nan; self.rmse['substrate'] = np.nan + return None + try: - # Extract parameters based on model type into a list for self.substrate - if self.model_type == 'logistic': - # Expected by self.logistic: xo, xm, um - biomass_params_values = [biomass_params_dict['Xo'], biomass_params_dict['Xm'], biomass_params_dict['um']] - elif self.model_type == 'gompertz': - # Expected by self.gompertz: xm, um, lag - biomass_params_values = [biomass_params_dict['Xm'], biomass_params_dict['um'], biomass_params_dict['lag']] - elif self.model_type == 'moser': - # Expected by self.moser: Xm, um, Ks - biomass_params_values = [biomass_params_dict['Xm'], biomass_params_dict['um'], biomass_params_dict['Ks']] - elif self.model_type == 'baranyi': - # Expected by self.baranyi: X0, Xm, um, lag - biomass_params_values = [biomass_params_dict['X0'], biomass_params_dict['Xm'], biomass_params_dict['um'], biomass_params_dict['lag']] - else: - return None + biomass_params_values = list(biomass_params_dict.values()) so_guess = substrate[0] if len(substrate) > 0 else 1.0 p_guess = 0.1 q_guess = 0.01 p0 = [so_guess, p_guess, q_guess] - bounds = ([0, 0, 0], [np.inf, np.inf, np.inf]) + bounds = ([0, 0, -np.inf], [np.inf, np.inf, np.inf]) # q can sometimes be negative if there's release popt, _ = curve_fit( lambda t, so, p, q: self.substrate(t, so, p, q, biomass_params_values), @@ -436,6 +456,7 @@ class BioprocessModel: if np.any(np.isnan(y_pred)) or np.any(np.isinf(y_pred)): print(f"Predicción de sustrato contiene NaN/Inf para {self.model_type}. Ajuste fallido.") self.r2['substrate'] = np.nan; self.rmse['substrate'] = np.nan + self.params['substrate'] = {} return None ss_res = np.sum((substrate - y_pred) ** 2) @@ -454,26 +475,29 @@ class BioprocessModel: return None def fit_product(self, time, product, biomass_params_dict): - if not biomass_params_dict: - print(f"Error en fit_product_{self.model_type}: Parámetros de biomasa no disponibles.") + if not biomass_params_dict or not self.params.get('biomass'): + print(f"Error en fit_product_{self.model_type}: Parámetros de biomasa no disponibles o ajuste fallido.") + self.r2['product'] = np.nan; self.rmse['product'] = np.nan return None + + time = np.asarray(time, dtype=float) + product = np.asarray(product, dtype=float) + valid_indices = ~np.isnan(time) & ~np.isnan(product) + time = time[valid_indices] + product = product[valid_indices] + if len(time) < 3: + print(f"No hay suficientes datos válidos de producto ({len(time)}) para {self.model_type}.") + self.r2['product'] = np.nan; self.rmse['product'] = np.nan + return None + try: - if self.model_type == 'logistic': - biomass_params_values = [biomass_params_dict['Xo'], biomass_params_dict['Xm'], biomass_params_dict['um']] - elif self.model_type == 'gompertz': - biomass_params_values = [biomass_params_dict['Xm'], biomass_params_dict['um'], biomass_params_dict['lag']] - elif self.model_type == 'moser': - biomass_params_values = [biomass_params_dict['Xm'], biomass_params_dict['um'], biomass_params_dict['Ks']] - elif self.model_type == 'baranyi': - biomass_params_values = [biomass_params_dict['X0'], biomass_params_dict['Xm'], biomass_params_dict['um'], biomass_params_dict['lag']] - else: - return None + biomass_params_values = list(biomass_params_dict.values()) po_guess = product[0] if len(product) > 0 else 0.0 alpha_guess = 0.1 beta_guess = 0.01 p0 = [po_guess, alpha_guess, beta_guess] - bounds = ([0, 0, 0], [np.inf, np.inf, np.inf]) + bounds = ([0, 0, -np.inf], [np.inf, np.inf, np.inf]) # beta can be negative (product degradation) popt, _ = curve_fit( lambda t, po, alpha, beta: self.product(t, po, alpha, beta, biomass_params_values), @@ -485,6 +509,7 @@ class BioprocessModel: if np.any(np.isnan(y_pred)) or np.any(np.isinf(y_pred)): print(f"Predicción de producto contiene NaN/Inf para {self.model_type}. Ajuste fallido.") self.r2['product'] = np.nan; self.rmse['product'] = np.nan + self.params['product'] = {} return None ss_res = np.sum((product - y_pred) ** 2) @@ -503,39 +528,36 @@ class BioprocessModel: return None def generate_fine_time_grid(self, time): - # ... (same as before) - if time is None or len(time) < 2: # Need at least 2 points to define a range + if time is None or len(time) < 2: return np.array([0]) if (time is None or len(time)==0) else np.array(time) time_min, time_max = np.min(time), np.max(time) - if time_min == time_max: # If all time points are the same - return np.array([time_min]) + if time_min == time_max: return np.array([time_min]) time_fine = np.linspace(time_min, time_max, 500) return time_fine - - def system(self, y, t, biomass_params_list, substrate_params_list, product_params_list, model_type_for_ode): - # model_type_for_ode is passed to ensure we use the correct diff eq + def system(self, y, t, biomass_params_list_ode, substrate_params_list, product_params_list, model_type_for_ode): X, S, P = y dXdt = 0.0 + # Ensure X is non-negative for diff eqs + X = max(X, 0) + if model_type_for_ode == 'logistic': - # biomass_params_list for logistic: [Xo, Xm, um] - dXdt = self.logistic_diff(X, t, biomass_params_list) + dXdt = self.logistic_diff(X, t, biomass_params_list_ode) elif model_type_for_ode == 'gompertz': - # biomass_params_list for gompertz: [Xm, um, lag] - dXdt = self.gompertz_diff(X, t, biomass_params_list) + dXdt = self.gompertz_diff(X, t, biomass_params_list_ode) elif model_type_for_ode == 'moser': - # biomass_params_list for moser: [Xm, um, Ks] - dXdt = self.moser_diff(X, t, biomass_params_list) - # No ODE for Baranyi in this version + dXdt = self.moser_diff(X, t, biomass_params_list_ode) else: - # This case should ideally be prevented before calling system if model has no diff eq - print(f"Advertencia: Ecuación diferencial no definida para el modelo {model_type_for_ode} en la función 'system'. dXdt=0.") + print(f"Advertencia: Ecuación diferencial no definida para {model_type_for_ode}. dXdt=0.") dXdt = 0.0 p_val = substrate_params_list[1] if len(substrate_params_list) > 1 else 0 q_val = substrate_params_list[2] if len(substrate_params_list) > 2 else 0 + # dSdt = -p_val * dXdt - q_val * X # Original + # Ensure S does not go below zero if q_val is positive dSdt = -p_val * dXdt - q_val * X + if S <= 0 and dSdt < 0 : dSdt = 0 # Stop consumption if S is zero or negative alpha_val = product_params_list[1] if len(product_params_list) > 1 else 0 beta_val = product_params_list[2] if len(product_params_list) > 2 else 0 @@ -543,41 +565,40 @@ class BioprocessModel: return [dXdt, dSdt, dPdt] def get_initial_conditions(self, time, biomass, substrate, product): - X0_exp = biomass[0] if biomass is not None and len(biomass) > 0 else 0 - S0_exp = substrate[0] if substrate is not None and len(substrate) > 0 else 0 - P0_exp = product[0] if product is not None and len(product) > 0 else 0 + # Use experimental data for initial conditions if available and valid + X0_exp = biomass[0] if biomass is not None and len(biomass) > 0 and np.isfinite(biomass[0]) else 0.0 + S0_exp = substrate[0] if substrate is not None and len(substrate) > 0 and np.isfinite(substrate[0]) else 0.0 + P0_exp = product[0] if product is not None and len(product) > 0 and np.isfinite(product[0]) else 0.0 X0 = X0_exp + # Override with fitted X0 if available and model supports it directly if 'biomass' in self.params and self.params['biomass']: - if self.model_type == 'logistic': - X0 = self.params['biomass'].get('Xo', X0_exp) - elif self.model_type == 'baranyi': # Baranyi also has X0 as a direct parameter - X0 = self.params['biomass'].get('X0', X0_exp) + if self.model_type in ['logistic', 'baranyi']: + X0 = self.params['biomass'].get('Xo', self.params['biomass'].get('X0', X0_exp)) # Xo or X0 + # For Gompertz and Moser, X(t=0) is calculated from their parameters elif self.model_type == 'gompertz' and self.biomass_model: - # For Gompertz, X(t=0) needs to be calculated from its parameters - # Parameters: Xm, um, lag params_list = [self.params['biomass'].get('Xm',1), self.params['biomass'].get('um',0.1), self.params['biomass'].get('lag',0)] - X0_calc = self.biomass_model(0, *params_list) - X0 = X0_calc if not np.isnan(X0_calc) else X0_exp + if all(k in self.params['biomass'] for k in ['Xm', 'um', 'lag']): + X0_calc = self.biomass_model(0, *params_list) + X0 = X0_calc if np.isfinite(X0_calc) else X0_exp elif self.model_type == 'moser' and self.biomass_model: - # For Moser, X(t=0) needs to be calculated - # Parameters: Xm, um, Ks params_list = [self.params['biomass'].get('Xm',1), self.params['biomass'].get('um',0.1), self.params['biomass'].get('Ks',0)] - X0_calc = self.biomass_model(0, *params_list) - X0 = X0_calc if not np.isnan(X0_calc) else X0_exp + if all(k in self.params['biomass'] for k in ['Xm', 'um', 'Ks']): + X0_calc = self.biomass_model(0, *params_list) + X0 = X0_calc if np.isfinite(X0_calc) else X0_exp S0 = self.params.get('substrate', {}).get('so', S0_exp) P0 = self.params.get('product', {}).get('po', P0_exp) - X0 = X0 if not np.isnan(X0) else 0.0 - S0 = S0 if not np.isnan(S0) else 0.0 - P0 = P0 if not np.isnan(P0) else 0.0 + X0 = max(X0 if np.isfinite(X0) else 0.0, 1e-9) # Ensure X0 is small positive if zero + S0 = S0 if np.isfinite(S0) else 0.0 + P0 = P0 if np.isfinite(P0) else 0.0 return [X0, S0, P0] def solve_differential_equations(self, time, biomass, substrate, product): - if self.biomass_diff is None: # Check if a differential equation is defined for this model + if self.biomass_diff is None: print(f"ODE solving no está soportado para el modelo {self.model_type}. Se usarán resultados de curve_fit.") - return None, None, None, time # Return None for solutions, original time + return None, None, None, time if 'biomass' not in self.params or not self.params['biomass']: print("No hay parámetros de biomasa, no se pueden resolver las EDO.") @@ -586,15 +607,15 @@ class BioprocessModel: print("Tiempo no válido para resolver EDOs.") return None, None, None, np.array([]) - # Prepare biomass_params_list for ODE system based on self.model_type - # This list should match what the respective _diff function expects + biomass_params_list_ode = [] if self.model_type == 'logistic': + # For logistic_diff: [xo_not_used_in_ode, xm, um] + # xo is an initial condition for odeint, xm and um are parameters for the diff eq biomass_params_list_ode = [self.params['biomass']['Xo'], self.params['biomass']['Xm'], self.params['biomass']['um']] elif self.model_type == 'gompertz': biomass_params_list_ode = [self.params['biomass']['Xm'], self.params['biomass']['um'], self.params['biomass']['lag']] elif self.model_type == 'moser': biomass_params_list_ode = [self.params['biomass']['Xm'], self.params['biomass']['um'], self.params['biomass']['Ks']] - # Baranyi does not have biomass_diff implemented here, so it's caught by self.biomass_diff is None else: print(f"Tipo de modelo de biomasa desconocido para EDO: {self.model_type}") return None, None, None, time @@ -618,15 +639,15 @@ class BioprocessModel: try: sol = odeint(self.system, initial_conditions, time_fine, - args=(biomass_params_list_ode, substrate_params_list, product_params_list, self.model_type), # Pass self.model_type for routing in self.system - rtol=1e-6, atol=1e-6) + args=(biomass_params_list_ode, substrate_params_list, product_params_list, self.model_type), + rtol=1e-6, atol=1e-6, hmax= (time_fine[-1]-time_fine[0])/100.0 if len(time_fine)>1 else 0.0) # Add hmax except Exception as e: print(f"Error al resolver EDOs con odeint: {e}") try: print("Intentando con método 'lsoda'...") sol = odeint(self.system, initial_conditions, time_fine, args=(biomass_params_list_ode, substrate_params_list, product_params_list, self.model_type), - rtol=1e-6, atol=1e-6, method='lsoda') + rtol=1e-6, atol=1e-6, method='lsoda', hmax= (time_fine[-1]-time_fine[0])/100.0 if len(time_fine)>1 else 0.0) except Exception as e_lsoda: print(f"Error al resolver EDOs con odeint (método lsoda): {e_lsoda}") return None, None, None, time_fine @@ -637,7 +658,7 @@ class BioprocessModel: return X, S, P, time_fine def plot_results(self, time, biomass, substrate, product, - y_pred_biomass_fit, y_pred_substrate_fit, y_pred_product_fit, # Renamed to avoid confusion + y_pred_biomass_fit, y_pred_substrate_fit, y_pred_product_fit, biomass_std=None, substrate_std=None, product_std=None, experiment_name='', legend_position='best', params_position='upper right', show_legend=True, show_params=True, @@ -646,168 +667,137 @@ class BioprocessModel: use_differential=False, axis_labels=None, show_error_bars=True, error_cap_size=3, error_line_width=1): - # Initialize predictions with curve_fit results y_pred_biomass, y_pred_substrate, y_pred_product = y_pred_biomass_fit, y_pred_substrate_fit, y_pred_product_fit + time_to_plot_curves = self.generate_fine_time_grid(time) # Default fine grid for curves - if y_pred_biomass is None and not (use_differential and self.biomass_diff is not None): + if y_pred_biomass_fit is None and not (use_differential and self.biomass_diff is not None and 'biomass' in self.params and self.params['biomass']): print(f"No se pudo ajustar biomasa para {experiment_name} con {self.model_type} y no se usan EDO. Omitiendo figura.") return None - # Check if ODE should be used and is supported can_use_ode = use_differential and self.biomass_diff is not None and 'biomass' in self.params and self.params['biomass'] if use_differential and self.biomass_diff is None: - print(f"Modelo {self.model_type} no soporta EDOs. Usando ajuste directo.") + print(f"Modelo {self.model_type} no soporta EDOs. Usando ajuste directo si está disponible.") if axis_labels is None: axis_labels = {'x_label': 'Tiempo', 'biomass_label': 'Biomasa', 'substrate_label': 'Sustrato', 'product_label': 'Producto'} sns.set_style(style) - time_to_plot = time - + if can_use_ode: X_ode, S_ode, P_ode, time_fine_ode = self.solve_differential_equations(time, biomass, substrate, product) if X_ode is not None: y_pred_biomass, y_pred_substrate, y_pred_product = X_ode, S_ode, P_ode - time_to_plot = time_fine_ode - else: + time_to_plot_curves = time_fine_ode # Use ODE time grid + else: # ODE failed, fall back to curve_fit results on fine grid print(f"Fallo al resolver EDOs para {experiment_name}, usando resultados de curve_fit si existen.") - time_to_plot = self.generate_fine_time_grid(time) # Use fine grid for curve_fit if ODE fails - # Re-evaluate curve_fit results on fine grid if they exist if y_pred_biomass_fit is not None and self.biomass_model and 'biomass' in self.params and self.params['biomass']: biomass_params_values = list(self.params['biomass'].values()) - y_pred_biomass = self.biomass_model(time_to_plot, *biomass_params_values) - if y_pred_substrate_fit is not None and 'substrate' in self.params and self.params['substrate']: + y_pred_biomass = self.biomass_model(time_to_plot_curves, *biomass_params_values) + if y_pred_substrate_fit is not None and 'substrate' in self.params and self.params.get('substrate'): substrate_params_values = list(self.params['substrate'].values()) - y_pred_substrate = self.substrate(time_to_plot, *substrate_params_values, biomass_params_values) - if y_pred_product_fit is not None and 'product' in self.params and self.params['product']: + y_pred_substrate = self.substrate(time_to_plot_curves, *substrate_params_values, biomass_params_values) + else: y_pred_substrate = np.full_like(time_to_plot_curves, np.nan) + if y_pred_product_fit is not None and 'product' in self.params and self.params.get('product'): product_params_values = list(self.params['product'].values()) - y_pred_product = self.product(time_to_plot, *product_params_values, biomass_params_values) - - else: # Not using ODE or ODE not supported, use curve_fit results on a fine grid - time_to_plot = self.generate_fine_time_grid(time) + y_pred_product = self.product(time_to_plot_curves, *product_params_values, biomass_params_values) + else: y_pred_product = np.full_like(time_to_plot_curves, np.nan) + else: # Biomass fit also failed + y_pred_biomass = np.full_like(time_to_plot_curves, np.nan) + y_pred_substrate = np.full_like(time_to_plot_curves, np.nan) + y_pred_product = np.full_like(time_to_plot_curves, np.nan) + else: # Not using ODE or ODE not supported, use curve_fit results on fine grid if y_pred_biomass_fit is not None and self.biomass_model and 'biomass' in self.params and self.params['biomass']: - biomass_params_values = list(self.params['biomass'].values()) # Get latest params - y_pred_biomass = self.biomass_model(time_to_plot, *biomass_params_values) - if y_pred_substrate_fit is not None and 'substrate' in self.params and self.params['substrate']: + biomass_params_values = list(self.params['biomass'].values()) + y_pred_biomass = self.biomass_model(time_to_plot_curves, *biomass_params_values) + if y_pred_substrate_fit is not None and 'substrate' in self.params and self.params.get('substrate'): substrate_params_values = list(self.params['substrate'].values()) - y_pred_substrate = self.substrate(time_to_plot, *substrate_params_values, biomass_params_values) - else: # If substrate fit failed or no data, plot NaNs - y_pred_substrate = np.full_like(time_to_plot, np.nan) - if y_pred_product_fit is not None and 'product' in self.params and self.params['product']: + y_pred_substrate = self.substrate(time_to_plot_curves, *substrate_params_values, biomass_params_values) + else: y_pred_substrate = np.full_like(time_to_plot_curves, np.nan) + if y_pred_product_fit is not None and 'product' in self.params and self.params.get('product'): product_params_values = list(self.params['product'].values()) - y_pred_product = self.product(time_to_plot, *product_params_values, biomass_params_values) - else: # If product fit failed or no data, plot NaNs - y_pred_product = np.full_like(time_to_plot, np.nan) - else: # Biomass fit failed - y_pred_biomass = np.full_like(time_to_plot, np.nan) - y_pred_substrate = np.full_like(time_to_plot, np.nan) - y_pred_product = np.full_like(time_to_plot, np.nan) - + y_pred_product = self.product(time_to_plot_curves, *product_params_values, biomass_params_values) + else: y_pred_product = np.full_like(time_to_plot_curves, np.nan) + else: + y_pred_biomass = np.full_like(time_to_plot_curves, np.nan) + y_pred_substrate = np.full_like(time_to_plot_curves, np.nan) + y_pred_product = np.full_like(time_to_plot_curves, np.nan) - fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 15)) - fig.suptitle(f'{experiment_name} ({self.model_type.capitalize()})', fontsize=16) + fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 15), sharex=True) + fig.suptitle(f'{experiment_name} ({self.model_type.capitalize()}){" - EDO" if can_use_ode and X_ode is not None else ""}', fontsize=16) plots_config = [ - (ax1, biomass, y_pred_biomass, biomass_std, axis_labels['biomass_label'], 'Modelo', self.params.get('biomass', {}), + (ax1, biomass, y_pred_biomass, biomass_std, axis_labels['biomass_label'], 'Modelo Biomasa', self.params.get('biomass', {}), self.r2.get('biomass', np.nan), self.rmse.get('biomass', np.nan)), - (ax2, substrate, y_pred_substrate, substrate_std, axis_labels['substrate_label'], 'Modelo', self.params.get('substrate', {}), + (ax2, substrate, y_pred_substrate, substrate_std, axis_labels['substrate_label'], 'Modelo Sustrato', self.params.get('substrate', {}), self.r2.get('substrate', np.nan), self.rmse.get('substrate', np.nan)), - (ax3, product, y_pred_product, product_std, axis_labels['product_label'], 'Modelo', self.params.get('product', {}), + (ax3, product, y_pred_product, product_std, axis_labels['product_label'], 'Modelo Producto', self.params.get('product', {}), self.r2.get('product', np.nan), self.rmse.get('product', np.nan)) ] - # ... (rest of plot_results is the same as your provided code, using the new y_pred variables) - for idx, (ax, data_exp, y_pred_model, data_std_exp, ylabel, model_name_legend, params_dict, r2_val, rmse_val) in enumerate(plots_config): + + for idx, (ax, data_exp, y_pred_model_curve, data_std_exp, ylabel, model_name_legend, params_dict, r2_val, rmse_val) in enumerate(plots_config): + # Plot experimental data if data_exp is not None and len(data_exp) > 0 and not np.all(np.isnan(data_exp)): if show_error_bars and data_std_exp is not None and len(data_std_exp) == len(data_exp) and not np.all(np.isnan(data_std_exp)): - ax.errorbar( - time, data_exp, yerr=data_std_exp, - fmt=marker_style, color=point_color, - label='Datos experimentales', - capsize=error_cap_size, - elinewidth=error_line_width, - markeredgewidth=1 - ) + ax.errorbar(time, data_exp, yerr=data_std_exp, fmt=marker_style, color=point_color, + label='Datos experimentales', capsize=error_cap_size, elinewidth=error_line_width, markeredgewidth=1, markersize=5) else: - ax.plot(time, data_exp, marker=marker_style, linestyle='', color=point_color, - label='Datos experimentales') + ax.plot(time, data_exp, marker=marker_style, linestyle='', color=point_color, label='Datos experimentales', markersize=5) else: - ax.text(0.5, 0.5, 'No hay datos experimentales para mostrar.', - horizontalalignment='center', verticalalignment='center', - transform=ax.transAxes, fontsize=10, color='gray') - - if y_pred_model is not None and len(y_pred_model) > 0 and not np.all(np.isnan(y_pred_model)): - ax.plot(time_to_plot, y_pred_model, linestyle=line_style, color=line_color, label=model_name_legend) - # ... (rest of messages for failed fits) - elif idx == 0 and (y_pred_biomass_fit is None and not can_use_ode): # If biomass fit failed and ODE not possible - ax.text(0.5, 0.6, 'Modelo de biomasa no ajustado.', - horizontalalignment='center', verticalalignment='center', - transform=ax.transAxes, fontsize=10, color='red') - elif (idx == 1 and y_pred_substrate_fit is None and not can_use_ode) or \ - (idx == 2 and y_pred_product_fit is None and not can_use_ode) : - if not ('biomass' in self.params and self.params['biomass']): # If biomass params are missing - ax.text(0.5, 0.4, 'Modelo no ajustado (depende de biomasa).', - horizontalalignment='center', verticalalignment='center', - transform=ax.transAxes, fontsize=10, color='orange') - elif y_pred_model is None or np.all(np.isnan(y_pred_model)): # If this specific model (S or P) failed - ax.text(0.5, 0.4, 'Modelo no ajustado.', - horizontalalignment='center', verticalalignment='center', - transform=ax.transAxes, fontsize=10, color='orange') - - - ax.set_xlabel(axis_labels['x_label']) + ax.text(0.5, 0.5, 'No hay datos experimentales.', transform=ax.transAxes, ha='center', va='center', color='gray') + + # Plot model curve + if y_pred_model_curve is not None and len(y_pred_model_curve) > 0 and not np.all(np.isnan(y_pred_model_curve)): + ax.plot(time_to_plot_curves, y_pred_model_curve, linestyle=line_style, color=line_color, label=model_name_legend) + elif idx == 0 and (y_pred_biomass_fit is None and not (can_use_ode and X_ode is not None)): # Biomass model failed + ax.text(0.5, 0.6, 'Modelo de biomasa no ajustado.', transform=ax.transAxes, ha='center', va='center', color='red') + elif idx > 0 and not (self.params.get('biomass') and y_pred_biomass_fit is not None): # S/P model depends on biomass + ax.text(0.5, 0.4, 'No ajustado (depende de biomasa).', transform=ax.transAxes, ha='center', va='center', color='orange') + elif y_pred_model_curve is None or np.all(np.isnan(y_pred_model_curve)): # Specific S/P model failed + ax.text(0.5, 0.4, 'Modelo no ajustado.', transform=ax.transAxes, ha='center', va='center', color='orange') + ax.set_ylabel(ylabel) - if show_legend: - ax.legend(loc=legend_position) + if show_legend: ax.legend(loc=legend_position) ax.set_title(f'{ylabel}') - if show_params and params_dict and any(np.isfinite(v) for v in params_dict.values()): # Show if any param is finite - param_text_list = [] - for k, v_param in params_dict.items(): - param_text_list.append(f"{k} = {v_param:.3g}" if np.isfinite(v_param) else f"{k} = N/A") + if show_params and params_dict and any(np.isfinite(v) for v in params_dict.values()): + param_text_list = [f"{k} = {v_param:.3g}" if np.isfinite(v_param) else f"{k} = N/A" for k, v_param in params_dict.items()] param_text = '\n'.join(param_text_list) - r2_display = f"{r2_val:.3f}" if np.isfinite(r2_val) else "N/A" rmse_display = f"{rmse_val:.3f}" if np.isfinite(rmse_val) else "N/A" text = f"{param_text}\nR² = {r2_display}\nRMSE = {rmse_display}" if params_position == 'outside right': - bbox_props = dict(boxstyle='round,pad=0.3', facecolor='wheat', alpha=0.5) - fig.subplots_adjust(right=0.75) - ax.annotate(text, xy=(1.05, 0.5), xycoords='axes fraction', - xytext=(10,0), textcoords='offset points', - verticalalignment='center', horizontalalignment='left', - bbox=bbox_props) + fig.subplots_adjust(right=0.75) # Make space + ax.annotate(text, xy=(1.05, 0.5), xycoords='axes fraction', xytext=(10,0), textcoords='offset points', + verticalalignment='center', horizontalalignment='left', bbox={'boxstyle': 'round,pad=0.3', 'facecolor':'wheat', 'alpha':0.7}) else: text_x, ha = (0.95, 'right') if 'right' in params_position else (0.05, 'left') text_y, va = (0.95, 'top') if 'upper' in params_position else (0.05, 'bottom') - ax.text(text_x, text_y, text, transform=ax.transAxes, - verticalalignment=va, horizontalalignment=ha, - bbox={'boxstyle': 'round,pad=0.3', 'facecolor':'wheat', 'alpha':0.5}) - elif show_params : # No params or all NaN - ax.text(0.5, 0.3, 'Parámetros no disponibles.', - horizontalalignment='center', verticalalignment='center', - transform=ax.transAxes, fontsize=9, color='grey') - - - plt.tight_layout(rect=[0, 0.03, 1, 0.95]) - buf = io.BytesIO() - fig.savefig(buf, format='png', bbox_inches='tight') - buf.seek(0) - image = Image.open(buf).convert("RGB") - plt.close(fig) + ax.text(text_x, text_y, text, transform=ax.transAxes, verticalalignment=va, horizontalalignment=ha, + bbox={'boxstyle': 'round,pad=0.3', 'facecolor':'wheat', 'alpha':0.7}) + elif show_params : + ax.text(0.5, 0.3, 'Parámetros no disponibles.', transform=ax.transAxes, ha='center', va='center', color='grey') + + ax3.set_xlabel(axis_labels['x_label']) # Set xlabel only for the last subplot + plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust for suptitle + if params_position == 'outside right': fig.subplots_adjust(right=0.70) # Ensure space if text is outside + + buf = io.BytesIO(); fig.savefig(buf, format='png', bbox_inches='tight'); buf.seek(0) + image = Image.open(buf).convert("RGB"); plt.close(fig) return image def plot_combined_results(self, time, biomass, substrate, product, - y_pred_biomass_fit, y_pred_substrate_fit, y_pred_product_fit, # Renamed + y_pred_biomass_fit, y_pred_substrate_fit, y_pred_product_fit, biomass_std=None, substrate_std=None, product_std=None, experiment_name='', legend_position='best', params_position='upper right', show_legend=True, show_params=True, style='whitegrid', - line_color='#0000FF', point_color='#000000', line_style='-', marker_style='o', + line_color='#0000FF', point_color='#000000', line_style='-', marker_style='o', # These are defaults, overridden below use_differential=False, axis_labels=None, show_error_bars=True, error_cap_size=3, error_line_width=1): y_pred_biomass, y_pred_substrate, y_pred_product = y_pred_biomass_fit, y_pred_substrate_fit, y_pred_product_fit + time_to_plot_curves = self.generate_fine_time_grid(time) - if y_pred_biomass is None and not (use_differential and self.biomass_diff is not None): + if y_pred_biomass_fit is None and not (use_differential and self.biomass_diff is not None and 'biomass' in self.params and self.params['biomass']): print(f"No se pudo ajustar biomasa para {experiment_name} con {self.model_type} (combinado). Omitiendo figura.") return None @@ -817,125 +807,115 @@ class BioprocessModel: if axis_labels is None: axis_labels = {'x_label': 'Tiempo', 'biomass_label': 'Biomasa', 'substrate_label': 'Sustrato', 'product_label': 'Producto'} sns.set_style(style) - time_to_plot = time if can_use_ode: X_ode, S_ode, P_ode, time_fine_ode = self.solve_differential_equations(time, biomass, substrate, product) if X_ode is not None: y_pred_biomass, y_pred_substrate, y_pred_product = X_ode, S_ode, P_ode - time_to_plot = time_fine_ode + time_to_plot_curves = time_fine_ode else: print(f"Fallo al resolver EDOs para {experiment_name} (combinado), usando resultados de curve_fit.") - time_to_plot = self.generate_fine_time_grid(time) if y_pred_biomass_fit is not None and self.biomass_model and 'biomass' in self.params and self.params['biomass']: biomass_params_values = list(self.params['biomass'].values()) - y_pred_biomass = self.biomass_model(time_to_plot, *biomass_params_values) - if y_pred_substrate_fit is not None and 'substrate' in self.params and self.params['substrate']: + y_pred_biomass = self.biomass_model(time_to_plot_curves, *biomass_params_values) + if y_pred_substrate_fit is not None and 'substrate' in self.params and self.params.get('substrate'): substrate_params_values = list(self.params['substrate'].values()) - y_pred_substrate = self.substrate(time_to_plot, *substrate_params_values, biomass_params_values) - if y_pred_product_fit is not None and 'product' in self.params and self.params['product']: + y_pred_substrate = self.substrate(time_to_plot_curves, *substrate_params_values, biomass_params_values) + else: y_pred_substrate = np.full_like(time_to_plot_curves, np.nan) + if y_pred_product_fit is not None and 'product' in self.params and self.params.get('product'): product_params_values = list(self.params['product'].values()) - y_pred_product = self.product(time_to_plot, *product_params_values, biomass_params_values) - else: # Not using ODE or ODE not supported - time_to_plot = self.generate_fine_time_grid(time) + y_pred_product = self.product(time_to_plot_curves, *product_params_values, biomass_params_values) + else: y_pred_product = np.full_like(time_to_plot_curves, np.nan) + else: + y_pred_biomass = np.full_like(time_to_plot_curves, np.nan); y_pred_substrate = np.full_like(time_to_plot_curves, np.nan); y_pred_product = np.full_like(time_to_plot_curves, np.nan) + else: if y_pred_biomass_fit is not None and self.biomass_model and 'biomass' in self.params and self.params['biomass']: biomass_params_values = list(self.params['biomass'].values()) - y_pred_biomass = self.biomass_model(time_to_plot, *biomass_params_values) - if y_pred_substrate_fit is not None and 'substrate' in self.params and self.params['substrate']: + y_pred_biomass = self.biomass_model(time_to_plot_curves, *biomass_params_values) + if y_pred_substrate_fit is not None and 'substrate' in self.params and self.params.get('substrate'): substrate_params_values = list(self.params['substrate'].values()) - y_pred_substrate = self.substrate(time_to_plot, *substrate_params_values, biomass_params_values) - else: y_pred_substrate = np.full_like(time_to_plot, np.nan) - if y_pred_product_fit is not None and 'product' in self.params and self.params['product']: + y_pred_substrate = self.substrate(time_to_plot_curves, *substrate_params_values, biomass_params_values) + else: y_pred_substrate = np.full_like(time_to_plot_curves, np.nan) + if y_pred_product_fit is not None and 'product' in self.params and self.params.get('product'): product_params_values = list(self.params['product'].values()) - y_pred_product = self.product(time_to_plot, *product_params_values, biomass_params_values) - else: y_pred_product = np.full_like(time_to_plot, np.nan) + y_pred_product = self.product(time_to_plot_curves, *product_params_values, biomass_params_values) + else: y_pred_product = np.full_like(time_to_plot_curves, np.nan) else: - y_pred_biomass = np.full_like(time_to_plot, np.nan) - y_pred_substrate = np.full_like(time_to_plot, np.nan) - y_pred_product = np.full_like(time_to_plot, np.nan) + y_pred_biomass = np.full_like(time_to_plot_curves, np.nan); y_pred_substrate = np.full_like(time_to_plot_curves, np.nan); y_pred_product = np.full_like(time_to_plot_curves, np.nan) - fig, ax1 = plt.subplots(figsize=(12, 7)) - fig.suptitle(f'{experiment_name} ({self.model_type.capitalize()})', fontsize=16) - # ... (rest of plot_combined_results is the same, using new y_pred variables and error bar params) - colors = {'Biomasa': 'blue', 'Sustrato': 'green', 'Producto': 'red'} - data_colors = {'Biomasa': 'darkblue', 'Sustrato': 'darkgreen', 'Producto': 'darkred'} - model_colors = {'Biomasa': 'cornflowerblue', 'Sustrato': 'limegreen', 'Producto': 'salmon'} + fig, ax1 = plt.subplots(figsize=(12, 7)) + fig.suptitle(f'{experiment_name} ({self.model_type.capitalize()}){" - EDO" if can_use_ode and X_ode is not None else ""}', fontsize=16) + + # Define distinct colors for combined plot + # Using point_color for data points (passed from UI) and line_color for model lines (passed from UI) + # For combined plot, we need three distinct colors for lines and points. + # Let's use a palette or define them. UI provides one line_color and one point_color. + # We can derive variations or use fixed ones for combined. + # Using fixed distinct colors for clarity in combined plot: + color_palette = sns.color_palette("tab10", 3) + data_colors = {'Biomasa': color_palette[0], 'Sustrato': color_palette[1], 'Producto': color_palette[2]} + model_colors = {'Biomasa': color_palette[0], 'Sustrato': color_palette[1], 'Producto': color_palette[2]} # Same for model lines, but can be made lighter/darker ax1.set_xlabel(axis_labels['x_label']) - ax1.set_ylabel(axis_labels['biomass_label'], color=colors['Biomasa']) + ax1.set_ylabel(axis_labels['biomass_label'], color=data_colors['Biomasa']) if biomass is not None and len(biomass) > 0 and not np.all(np.isnan(biomass)): if show_error_bars and biomass_std is not None and len(biomass_std) == len(biomass) and not np.all(np.isnan(biomass_std)): - ax1.errorbar( - time, biomass, yerr=biomass_std, - fmt=marker_style, color=data_colors['Biomasa'], - label=f'{axis_labels["biomass_label"]} (Datos)', - capsize=error_cap_size, elinewidth=error_line_width, markersize=5 - ) + ax1.errorbar(time, biomass, yerr=biomass_std, fmt=marker_style, color=data_colors['Biomasa'], + label=f'{axis_labels["biomass_label"]} (Datos)', capsize=error_cap_size, elinewidth=error_line_width, markersize=5, alpha=0.7) else: - ax1.plot(time, biomass, marker=marker_style, linestyle='', color=data_colors['Biomasa'], - label=f'{axis_labels["biomass_label"]} (Datos)', markersize=5) + ax1.plot(time, biomass, marker=marker_style, linestyle='', color=data_colors['Biomasa'], label=f'{axis_labels["biomass_label"]} (Datos)', markersize=5, alpha=0.7) if y_pred_biomass is not None and len(y_pred_biomass) > 0 and not np.all(np.isnan(y_pred_biomass)): - ax1.plot(time_to_plot, y_pred_biomass, linestyle=line_style, color=model_colors['Biomasa'], - label=f'{axis_labels["biomass_label"]} (Modelo)') - ax1.tick_params(axis='y', labelcolor=colors['Biomasa']) + ax1.plot(time_to_plot_curves, y_pred_biomass, linestyle=line_style, color=model_colors['Biomasa'], label=f'{axis_labels["biomass_label"]} (Modelo)') + ax1.tick_params(axis='y', labelcolor=data_colors['Biomasa']) ax2 = ax1.twinx() - ax2.set_ylabel(axis_labels['substrate_label'], color=colors['Sustrato']) + ax2.set_ylabel(axis_labels['substrate_label'], color=data_colors['Sustrato']) if substrate is not None and len(substrate) > 0 and not np.all(np.isnan(substrate)): if show_error_bars and substrate_std is not None and len(substrate_std) == len(substrate) and not np.all(np.isnan(substrate_std)): - ax2.errorbar( - time, substrate, yerr=substrate_std, - fmt=marker_style, color=data_colors['Sustrato'], - label=f'{axis_labels["substrate_label"]} (Datos)', - capsize=error_cap_size, elinewidth=error_line_width, markersize=5 - ) + ax2.errorbar(time, substrate, yerr=substrate_std, fmt=marker_style, color=data_colors['Sustrato'], + label=f'{axis_labels["substrate_label"]} (Datos)', capsize=error_cap_size, elinewidth=error_line_width, markersize=5, alpha=0.7) else: - ax2.plot(time, substrate, marker=marker_style, linestyle='', color=data_colors['Sustrato'], - label=f'{axis_labels["substrate_label"]} (Datos)', markersize=5) + ax2.plot(time, substrate, marker=marker_style, linestyle='', color=data_colors['Sustrato'], label=f'{axis_labels["substrate_label"]} (Datos)', markersize=5, alpha=0.7) if y_pred_substrate is not None and len(y_pred_substrate) > 0 and not np.all(np.isnan(y_pred_substrate)): - ax2.plot(time_to_plot, y_pred_substrate, linestyle=line_style, color=model_colors['Sustrato'], - label=f'{axis_labels["substrate_label"]} (Modelo)') - ax2.tick_params(axis='y', labelcolor=colors['Sustrato']) + ax2.plot(time_to_plot_curves, y_pred_substrate, linestyle=line_style, color=model_colors['Sustrato'], label=f'{axis_labels["substrate_label"]} (Modelo)') + ax2.tick_params(axis='y', labelcolor=data_colors['Sustrato']) ax3 = ax1.twinx() - ax3.spines["right"].set_position(("axes", 1.15)) - ax3.set_frame_on(True); ax3.patch.set_visible(False) - ax3.set_ylabel(axis_labels['product_label'], color=colors['Producto']) + ax3.spines["right"].set_position(("axes", 1.15)) # Offset the third axis + ax3.set_frame_on(True); ax3.patch.set_visible(False) + ax3.set_ylabel(axis_labels['product_label'], color=data_colors['Producto']) if product is not None and len(product) > 0 and not np.all(np.isnan(product)): if show_error_bars and product_std is not None and len(product_std) == len(product) and not np.all(np.isnan(product_std)): - ax3.errorbar( - time, product, yerr=product_std, - fmt=marker_style, color=data_colors['Producto'], - label=f'{axis_labels["product_label"]} (Datos)', - capsize=error_cap_size, elinewidth=error_line_width, markersize=5 - ) + ax3.errorbar(time, product, yerr=product_std, fmt=marker_style, color=data_colors['Producto'], + label=f'{axis_labels["product_label"]} (Datos)', capsize=error_cap_size, elinewidth=error_line_width, markersize=5, alpha=0.7) else: - ax3.plot(time, product, marker=marker_style, linestyle='', color=data_colors['Producto'], - label=f'{axis_labels["product_label"]} (Datos)', markersize=5) + ax3.plot(time, product, marker=marker_style, linestyle='', color=data_colors['Producto'], label=f'{axis_labels["product_label"]} (Datos)', markersize=5, alpha=0.7) if y_pred_product is not None and len(y_pred_product) > 0 and not np.all(np.isnan(y_pred_product)): - ax3.plot(time_to_plot, y_pred_product, linestyle=line_style, color=model_colors['Producto'], - label=f'{axis_labels["product_label"]} (Modelo)') - ax3.tick_params(axis='y', labelcolor=colors['Producto']) - - lines_labels_collect = [] - for ax_current in [ax1, ax2, ax3]: - h, l = ax_current.get_legend_handles_labels() - if h: lines_labels_collect.append((h,l)) + ax3.plot(time_to_plot_curves, y_pred_product, linestyle=line_style, color=model_colors['Producto'], label=f'{axis_labels["product_label"]} (Modelo)') + ax3.tick_params(axis='y', labelcolor=data_colors['Producto']) + + # Collect legend handles and labels from all axes + handles, labels = [], [] + for ax_ in [ax1, ax2, ax3]: + h, l = ax_.get_legend_handles_labels() + handles.extend(h) + labels.extend(l) - if lines_labels_collect: - lines, labels = [sum(lol, []) for lol in zip(*[(h,l) for h,l in lines_labels_collect])] - unique_labels_dict = dict(zip(labels, lines)) - if show_legend: ax1.legend(unique_labels_dict.values(), unique_labels_dict.keys(), loc=legend_position) + # Create a single legend if requested + if show_legend and handles: + # Remove duplicate labels/handles + by_label = dict(zip(labels, handles)) + ax1.legend(by_label.values(), by_label.keys(), loc=legend_position) if show_params: texts_to_display = [] - param_categories = [ + param_categories_data = [ (axis_labels['biomass_label'], self.params.get('biomass', {}), self.r2.get('biomass', np.nan), self.rmse.get('biomass', np.nan)), (axis_labels['substrate_label'], self.params.get('substrate', {}), self.r2.get('substrate', np.nan), self.rmse.get('substrate', np.nan)), (axis_labels['product_label'], self.params.get('product', {}), self.r2.get('product', np.nan), self.rmse.get('product', np.nan)) ] - for label, params_dict, r2_val, rmse_val in param_categories: + for label, params_dict, r2_val, rmse_val in param_categories_data: if params_dict and any(np.isfinite(v) for v in params_dict.values()): param_text_list = [f" {k} = {v_par:.3g}" if np.isfinite(v_par) else f" {k} = N/A" for k,v_par in params_dict.items()] param_text = '\n'.join(param_text_list) @@ -944,43 +924,46 @@ class BioprocessModel: texts_to_display.append(f"{label}:\n{param_text}\n R² = {r2_display}\n RMSE = {rmse_display}") elif params_dict: texts_to_display.append(f"{label}:\n Parámetros no válidos o N/A") total_text = "\n\n".join(texts_to_display) + if total_text: if params_position == 'outside right': - fig.subplots_adjust(right=0.70) - fig.text(0.72, 0.5, total_text, transform=fig.transFigure, + fig.subplots_adjust(right=0.68) # Make more space for text + fig.text(0.70, 0.5, total_text, transform=fig.transFigure, verticalalignment='center', horizontalalignment='left', bbox=dict(boxstyle='round,pad=0.3', facecolor='wheat', alpha=0.7), fontsize=8) else: text_x, ha = (0.95, 'right') if 'right' in params_position else (0.05, 'left') text_y, va = (0.95, 'top') if 'upper' in params_position else (0.05, 'bottom') + # Place text relative to ax1 ax1.text(text_x, text_y, total_text, transform=ax1.transAxes, verticalalignment=va, horizontalalignment=ha, bbox=dict(boxstyle='round,pad=0.3', facecolor='wheat', alpha=0.7), fontsize=8) - plt.tight_layout(rect=[0, 0.03, 1, 0.95]) - if params_position == 'outside right': fig.subplots_adjust(right=0.70) + plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust for suptitle + if params_position == 'outside right': fig.subplots_adjust(right=0.68) # Final adjustment + buf = io.BytesIO(); fig.savefig(buf, format='png', bbox_inches='tight'); buf.seek(0) image = Image.open(buf).convert("RGB"); plt.close(fig) return image def process_all_data(file, legend_position, params_position, model_types_selected, experiment_names_str, - lower_bounds_str, upper_bounds_str, + lower_bounds_str, upper_bounds_str, # Note: bounds_str are not currently used in BioprocessModel mode, style, line_color, point_color, line_style, marker_style, show_legend, show_params, use_differential, maxfev_val, axis_labels_dict, show_error_bars, error_cap_size, error_line_width): - # ... (Excel reading and sheet iteration logic remains the same) - if file is None: return [], pd.DataFrame(), "Por favor, sube un archivo Excel." + if file is None: return [], pd.DataFrame(), "Por favor, sube un archivo Excel.", {} try: xls = pd.ExcelFile(file.name if hasattr(file, 'name') else file) sheet_names = xls.sheet_names - if not sheet_names: return [], pd.DataFrame(), "El archivo Excel está vacío." - except Exception as e: return [], pd.DataFrame(), f"Error al leer el archivo Excel: {e}" + if not sheet_names: return [], pd.DataFrame(), "El archivo Excel está vacío.", {} + except Exception as e: return [], pd.DataFrame(), f"Error al leer el archivo Excel: {e}", {} figures = [] comparison_data = [] + all_parameters_collected = {} # To store parameters for export experiment_names_list = experiment_names_str.strip().split('\n') if experiment_names_str.strip() else [] all_plot_messages = [] @@ -991,58 +974,92 @@ def process_all_data(file, legend_position, params_position, model_types_selecte try: df = pd.read_excel(xls, sheet_name=sheet_name, header=[0, 1]) if df.empty: all_plot_messages.append(f"Hoja '{sheet_name}' vacía."); continue - if not any(col_level2 == 'Tiempo' for _, col_level2 in df.columns): - all_plot_messages.append(f"Hoja '{sheet_name}' sin 'Tiempo'."); continue + if not any(col[1] == 'Tiempo' for col in df.columns if isinstance(col, tuple) and len(col) > 1): + all_plot_messages.append(f"Hoja '{sheet_name}' sin columna ('Experimento', 'Tiempo')."); continue except Exception as e: all_plot_messages.append(f"Error leyendo hoja '{sheet_name}': {e}."); continue - model_dummy_for_sheet = BioprocessModel() # To process sheet data once + model_dummy_for_sheet = BioprocessModel() try: model_dummy_for_sheet.process_data(df) except ValueError as e: all_plot_messages.append(f"Error procesando datos de '{sheet_name}': {e}."); continue - # Ensure dataxp, datasp, datapp are populated for average/combinado modes - # These should be populated by model_dummy_for_sheet.process_data() - # If they are empty lists, it means no valid data was found for that component. - if mode == 'independent': - # ... (independent mode logic remains largely the same) - # Ensure time_exp, biomass_exp etc. are correctly extracted and validated - grouped_cols = df.columns.get_level_values(0).unique() - for exp_idx, exp_col_name in enumerate(grouped_cols): - current_experiment_name = f"{current_experiment_name_base} - Exp {exp_idx + 1} ({exp_col_name})" - exp_df_slice = df[exp_col_name] - + # df.columns.get_level_values(0) gets the top-level column names (experiment groups) + # Example: If columns are [('ExpA', 'Tiempo'), ('ExpA', 'Biomasa'), ('ExpB', 'Tiempo'), ...] + # unique_exp_groups will be ['ExpA', 'ExpB'] + unique_exp_groups = df.columns.get_level_values(0).unique() + + for exp_group_idx, exp_group_name in enumerate(unique_exp_groups): + current_experiment_name = f"{current_experiment_name_base} - {exp_group_name}" + # Select columns for the current experiment group + exp_df_slice_multi = df[exp_group_name] # This is a DataFrame with 'Tiempo', 'Biomasa', etc. as columns + try: - time_exp = exp_df_slice['Tiempo'].dropna().astype(float).values - biomass_exp = exp_df_slice['Biomasa'].dropna().astype(float).values if 'Biomasa' in exp_df_slice else np.array([]) - substrate_exp = exp_df_slice['Sustrato'].dropna().astype(float).values if 'Sustrato' in exp_df_slice else np.array([]) - product_exp = exp_df_slice['Producto'].dropna().astype(float).values if 'Producto' in exp_df_slice else np.array([]) + # For independent mode, we expect one series per component (or average if replicates are under this exp_group_name) + # We need to re-process this slice to get averages if there are replicates within this exp_group_name + # Or, assume 'independent' means each ('ExpGroup', 'Component', 'RepX') is truly independent. + # The current process_data averages replicates. If we want truly independent replicates, + # the logic here needs to iterate through individual replicate columns. + # For now, let's assume exp_df_slice_multi is the data for ONE experiment, possibly with replicates averaged by process_data. + + # Create a temporary model to process this specific slice + temp_model_for_slice = BioprocessModel() + # Construct a DataFrame that process_data can handle (needs multi-index with one top level) + # This is a bit convoluted. If process_data expects the full sheet structure, + # then for 'independent' mode, we might need to adjust how data is fed. + # Let's simplify: assume exp_df_slice_multi has 'Tiempo', 'Biomasa', etc. as simple columns. + # And we need to handle potential replicates within this slice. + + # Re-evaluate data extraction for 'independent' + # Time should be common for this exp_group_name + time_exp_series = exp_df_slice_multi['Tiempo'] + time_exp = time_exp_series.iloc[:,0].dropna().astype(float).values if isinstance(time_exp_series, pd.DataFrame) else time_exp_series.dropna().astype(float).values + + + def get_component_data(component_name): + if component_name in exp_df_slice_multi: + comp_series_or_df = exp_df_slice_multi[component_name] + if isinstance(comp_series_or_df, pd.DataFrame): # Replicates exist + # Average replicates for this independent experiment + return comp_series_or_df.mean(axis=1).dropna().astype(float).values, \ + comp_series_or_df.std(axis=1, ddof=1).dropna().astype(float).values + else: # Single series + return comp_series_or_df.dropna().astype(float).values, None # No std for single series + return np.array([]), None + + biomass_exp, biomass_std_exp = get_component_data('Biomasa') + substrate_exp, substrate_std_exp = get_component_data('Sustrato') + product_exp, product_std_exp = get_component_data('Producto') + + # Align all data to the shortest length after NaNs + min_len = len(time_exp) + if len(biomass_exp) > 0: min_len = min(min_len, len(biomass_exp)) + if len(substrate_exp) > 0: min_len = min(min_len, len(substrate_exp)) + if len(product_exp) > 0: min_len = min(min_len, len(product_exp)) + + time_exp = time_exp[:min_len] + if len(biomass_exp) > 0: biomass_exp = biomass_exp[:min_len] + if biomass_std_exp is not None and len(biomass_std_exp) > 0: biomass_std_exp = biomass_std_exp[:min_len] + if len(substrate_exp) > 0: substrate_exp = substrate_exp[:min_len] + if substrate_std_exp is not None and len(substrate_std_exp) > 0: substrate_std_exp = substrate_std_exp[:min_len] + if len(product_exp) > 0: product_exp = product_exp[:min_len] + if product_std_exp is not None and len(product_std_exp) > 0: product_std_exp = product_std_exp[:min_len] + if len(time_exp) == 0: all_plot_messages.append(f"Sin datos de tiempo para {current_experiment_name}."); continue if len(biomass_exp) == 0: all_plot_messages.append(f"Sin datos de biomasa para {current_experiment_name}.") for mt in model_types_selected: comparison_data.append({'Experimento': current_experiment_name, 'Modelo': mt.capitalize(), 'R² Biomasa': np.nan, 'RMSE Biomasa': np.nan}) continue - # Align data if lengths differ due to NaNs (simple truncation to min length) - min_len = min(len(time_exp), len(biomass_exp) if len(biomass_exp)>0 else len(time_exp), - len(substrate_exp) if len(substrate_exp)>0 else len(time_exp), - len(product_exp) if len(product_exp)>0 else len(time_exp)) - time_exp = time_exp[:min_len] - if len(biomass_exp)>0: biomass_exp = biomass_exp[:min_len] - if len(substrate_exp)>0: substrate_exp = substrate_exp[:min_len] - if len(product_exp)>0: product_exp = product_exp[:min_len] - - except KeyError as e: all_plot_messages.append(f"Faltan columnas en '{current_experiment_name}': {e}."); continue + except KeyError as e: all_plot_messages.append(f"Faltan columnas (Tiempo, Biomasa, etc.) en '{current_experiment_name}' bajo '{exp_group_name}': {e}."); continue except Exception as e_data: all_plot_messages.append(f"Error extrayendo datos para '{current_experiment_name}': {e_data}."); continue - biomass_std_exp, substrate_std_exp, product_std_exp = None, None, None # No std for independent mode here - for model_type_iter in model_types_selected: model_instance = BioprocessModel(model_type=model_type_iter, maxfev=maxfev_val) - model_instance.fit_model() # Sets self.biomass_model, self.biomass_diff + model_instance.fit_model() y_pred_biomass = model_instance.fit_biomass(time_exp, biomass_exp) y_pred_substrate, y_pred_product = None, None if y_pred_biomass is not None and model_instance.params.get('biomass'): @@ -1050,6 +1067,7 @@ def process_all_data(file, legend_position, params_position, model_types_selecte if len(product_exp) > 0: y_pred_product = model_instance.fit_product(time_exp, product_exp, model_instance.params['biomass']) else: all_plot_messages.append(f"Ajuste biomasa falló: {current_experiment_name}, {model_type_iter}.") + all_parameters_collected.setdefault(current_experiment_name, {})[model_type_iter] = model_instance.params comparison_data.append({ 'Experimento': current_experiment_name, 'Modelo': model_type_iter.capitalize(), 'R² Biomasa': model_instance.r2.get('biomass', np.nan), 'RMSE Biomasa': model_instance.rmse.get('biomass', np.nan), @@ -1058,7 +1076,7 @@ def process_all_data(file, legend_position, params_position, model_types_selecte }) fig = model_instance.plot_results( time_exp, biomass_exp, substrate_exp, product_exp, - y_pred_biomass, y_pred_substrate, y_pred_product, # Pass curve_fit results + y_pred_biomass, y_pred_substrate, y_pred_product, biomass_std_exp, substrate_std_exp, product_std_exp, current_experiment_name, legend_position, params_position, show_legend, show_params, style, line_color, point_color, line_style, marker_style, @@ -1068,18 +1086,16 @@ def process_all_data(file, legend_position, params_position, model_types_selecte if fig: figures.append(fig) elif mode in ['average', 'combinado']: - # ... (average/combinado mode logic remains largely the same) - current_experiment_name = f"{current_experiment_name_base} - Promedio" - time_avg = model_dummy_for_sheet.time + current_experiment_name = f"{current_experiment_name_base} - Promedio Hoja" + time_avg = model_dummy_for_sheet.time # Already processed and aligned time - # Check if dataxp, datasp, datapp are available from process_data biomass_avg = model_dummy_for_sheet.dataxp[-1] if model_dummy_for_sheet.dataxp and len(model_dummy_for_sheet.dataxp[-1]) > 0 else np.array([]) substrate_avg = model_dummy_for_sheet.datasp[-1] if model_dummy_for_sheet.datasp and len(model_dummy_for_sheet.datasp[-1]) > 0 else np.array([]) product_avg = model_dummy_for_sheet.datapp[-1] if model_dummy_for_sheet.datapp and len(model_dummy_for_sheet.datapp[-1]) > 0 else np.array([]) - biomass_std_avg = model_dummy_for_sheet.datax_std[-1] if model_dummy_for_sheet.datax_std and len(model_dummy_for_sheet.datax_std[-1]) == len(biomass_avg) else None - substrate_std_avg = model_dummy_for_sheet.datas_std[-1] if model_dummy_for_sheet.datas_std and len(model_dummy_for_sheet.datas_std[-1]) == len(substrate_avg) else None - product_std_avg = model_dummy_for_sheet.datap_std[-1] if model_dummy_for_sheet.datap_std and len(model_dummy_for_sheet.datap_std[-1]) == len(product_avg) else None + biomass_std_avg = model_dummy_for_sheet.datax_std[-1] if model_dummy_for_sheet.datax_std and model_dummy_for_sheet.datax_std[-1] is not None and len(model_dummy_for_sheet.datax_std[-1]) == len(biomass_avg) else None + substrate_std_avg = model_dummy_for_sheet.datas_std[-1] if model_dummy_for_sheet.datas_std and model_dummy_for_sheet.datas_std[-1] is not None and len(model_dummy_for_sheet.datas_std[-1]) == len(substrate_avg) else None + product_std_avg = model_dummy_for_sheet.datap_std[-1] if model_dummy_for_sheet.datap_std and model_dummy_for_sheet.datap_std[-1] is not None and len(model_dummy_for_sheet.datap_std[-1]) == len(product_avg) else None if time_avg is None or len(time_avg) == 0: all_plot_messages.append(f"Sin datos de tiempo promedio para '{sheet_name}'."); continue if len(biomass_avg) == 0: @@ -1097,6 +1113,7 @@ def process_all_data(file, legend_position, params_position, model_types_selecte if len(product_avg) > 0: y_pred_product = model_instance.fit_product(time_avg, product_avg, model_instance.params['biomass']) else: all_plot_messages.append(f"Ajuste biomasa promedio falló: {current_experiment_name}, {model_type_iter}.") + all_parameters_collected.setdefault(current_experiment_name, {})[model_type_iter] = model_instance.params comparison_data.append({ 'Experimento': current_experiment_name, 'Modelo': model_type_iter.capitalize(), 'R² Biomasa': model_instance.r2.get('biomass', np.nan), 'RMSE Biomasa': model_instance.rmse.get('biomass', np.nan), @@ -1106,7 +1123,7 @@ def process_all_data(file, legend_position, params_position, model_types_selecte plot_func = model_instance.plot_combined_results if mode == 'combinado' else model_instance.plot_results fig = plot_func( time_avg, biomass_avg, substrate_avg, product_avg, - y_pred_biomass, y_pred_substrate, y_pred_product, # Pass curve_fit results + y_pred_biomass, y_pred_substrate, y_pred_product, biomass_std_avg, substrate_std_avg, product_std_avg, current_experiment_name, legend_position, params_position, show_legend, show_params, style, line_color, point_color, line_style, marker_style, @@ -1117,11 +1134,15 @@ def process_all_data(file, legend_position, params_position, model_types_selecte comparison_df = pd.DataFrame(comparison_data) if not comparison_df.empty: - for col in ['R² Biomasa', 'RMSE Biomasa', 'R² Sustrato', 'RMSE Sustrato', 'R² Producto', 'RMSE Producto']: - if col in comparison_df.columns: comparison_df[col] = pd.to_numeric(comparison_df[col], errors='coerce') + cols_to_sort = ['R² Biomasa', 'R² Sustrato', 'R² Producto', 'RMSE Biomasa', 'RMSE Sustrato', 'RMSE Producto'] + existing_cols_to_sort = [col for col in cols_to_sort if col in comparison_df.columns] + ascending_map = {'R²': False, 'RMSE': True} # R2 higher is better, RMSE lower is better + sort_ascending = [True, True] + [ascending_map[col.split(' ')[0]] for col in existing_cols_to_sort] + + comparison_df_sorted = comparison_df.sort_values( - by=['Experimento', 'Modelo', 'R² Biomasa', 'R² Sustrato', 'R² Producto', 'RMSE Biomasa', 'RMSE Sustrato', 'RMSE Producto'], - ascending=[True, True, False, False, False, True, True, True] + by=['Experimento', 'Modelo'] + existing_cols_to_sort, + ascending=sort_ascending ).reset_index(drop=True) else: comparison_df_sorted = pd.DataFrame(columns=[ @@ -1130,10 +1151,11 @@ def process_all_data(file, legend_position, params_position, model_types_selecte ]) final_message = "Procesamiento completado." - if all_plot_messages: final_message += " Mensajes:\n" + "\n".join(all_plot_messages) + if all_plot_messages: final_message += " Mensajes:\n" + "\n".join(list(set(all_plot_messages))) # Unique messages if not figures and not comparison_df_sorted.empty: final_message += "\nNo se generaron gráficos, pero hay datos en la tabla." elif not figures and comparison_df_sorted.empty: final_message += "\nNo se generaron gráficos ni datos para la tabla." - return figures, comparison_df_sorted, final_message + + return figures, comparison_df_sorted, final_message, all_parameters_collected MODEL_CHOICES = [ @@ -1141,146 +1163,171 @@ MODEL_CHOICES = [ ("Gompertz (3-parám)", "gompertz"), ("Moser (3-parám)", "moser"), ("Baranyi (4-parám)", "baranyi") - # Add more models here as ("Display Name (X-param)", "internal_model_name") ] def create_interface(): with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# Modelos Cinéticos de Bioprocesos") - # ... (Markdown descriptions remain the same) - gr.Markdown(r""" - Análisis y visualización de datos de bioprocesos utilizando modelos cinéticos como Logístico, Gompertz y Moser para el crecimiento de biomasa, - y el modelo de Luedeking-Piret para el consumo de sustrato y la formación de producto. - Nuevos modelos como Baranyi (4 parámetros) han sido añadidos. - - **Instrucciones:** - 1. Sube un archivo Excel. El archivo debe tener una estructura de MultiIndex en las columnas: - - Nivel 0: Nombre del experimento/tratamiento (ej: "Control", "Tratamiento A") - - Nivel 1: Tipo de dato ("Tiempo", "Biomasa", "Sustrato", "Producto") - - Si hay réplicas, deben estar como columnas separadas bajo el mismo nombre de experimento (Nivel 0) y tipo de dato (Nivel 1). - Ejemplo: (Control, Biomasa, Rep1), (Control, Biomasa, Rep2). El código promediará estas réplicas para los modos "average" y "combinado". - Para el modo "independent", se asume una sola serie de datos por (Experimento, TipoDato). - 2. Selecciona el/los tipo(s) de modelo(s) de biomasa a ajustar. Los modelos están agrupados por el número de parámetros. - 3. Elige el modo de análisis: - - `independent`: Analiza cada experimento (columna de Nivel 0) individualmente. - - `average`: Promedia los datos de todos los experimentos dentro de una hoja y ajusta los modelos a estos promedios. Se grafica en subplots separados. - - `combinado`: Similar a `average`, pero grafica Biomasa, Sustrato y Producto en un solo gráfico con múltiples ejes Y. - 4. Configura las opciones de graficación (leyenda, parámetros, estilos, colores, etc.). - 5. (Opcional) Personaliza los nombres de los experimentos y los títulos de los ejes. - 6. Haz clic en "Simular" para generar los gráficos y la tabla comparativa. - 7. Puedes exportar la tabla de resultados a Excel o CSV. - """) - gr.Markdown(r""" - ## Ecuaciones Diferenciales Utilizadas (Simplificado) - - **Biomasa:** - - - Logístico (3p: $X_0, X_m, \mu_m$): - $$ X(t) = \frac{X_0 X_m e^{\mu_m t}}{X_m - X_0 + X_0 e^{\mu_m t}} \quad \text{o} \quad \frac{dX}{dt} = \mu_m X\left(1 - \frac{X}{X_m}\right) $$ - - - Gompertz (3p: $X_m, \mu_m, \lambda$): - $$ X(t) = X_m \exp\left(-\exp\left(\frac{\mu_m e}{X_m}(\lambda-t)+1\right)\right) \quad \text{o} \quad \frac{dX}{dt} = \mu_m X \ln\left(\frac{X_m}{X}\right) \text{ (forma alternativa)} $$ - - Moser (3p: $X_m, \mu_m, K_s$ - forma simplificada): - $$ X(t)=X_m(1-e^{-\mu_m(t-K_s)}) \quad \text{o} \quad \frac{dX}{dt}=\mu_m(X_m - X) $$ - - - Baranyi (4p: $X_0, X_m, \mu_m, \lambda$): - $$ \ln X(t) = \ln X_0 + \mu_m A(t) - \ln\left(1 + \frac{e^{\mu_m A(t)}-1}{X_m/X_0}\right) $$ - $$ A(t) = t + \frac{1}{\mu_m} \ln(e^{-\mu_m t} + e^{-\mu_m \lambda} - e^{-\mu_m(t+\lambda)}) $$ - (Ecuación diferencial compleja, no usada para ODE en esta versión) - - **Sustrato y Producto (Luedeking-Piret):** - $$ \frac{dS}{dt} = -p \frac{dX}{dt} - q X \quad ; \quad \frac{dP}{dt} = \alpha \frac{dX}{dt} + \beta X $$ - Parámetros: $X_m, \mu_m, X_0, \lambda (\text{lag}), K_s, p, q, \alpha, \beta$. - """) - - - with gr.Row(): - file_input = gr.File(label="Subir archivo Excel (.xlsx)", file_types=['.xlsx']) - mode = gr.Radio(["independent", "average", "combinado"], label="Modo de Análisis", value="independent", - info="Independent: cada experimento. Average/Combinado: promedio de la hoja.") - - with gr.Accordion("Configuración de Modelos y Simulación", open=True): # Open by default - model_types_selected_ui = gr.CheckboxGroup( - choices=MODEL_CHOICES, # Use the global list of (DisplayName, value) - label="Tipo(s) de Modelo de Biomasa", - value=["logistic"] # Default selected internal value - ) - use_differential_ui = gr.Checkbox(label="Usar Ecuaciones Diferenciales para Graficar (experimental)", value=False, - info="Si se marca, las curvas se generan resolviendo las EDOs (si el modelo lo soporta). Si no, por ajuste directo.") - maxfev_input_ui = gr.Number(label="maxfev (Máx. evaluaciones para el ajuste)", value=50000, minimum=1000, step=1000) - experiment_names_str_ui = gr.Textbox( - label="Nombres de los experimentos/hojas (uno por línea, opcional)", - placeholder="Nombre para Hoja 1\nNombre para Hoja 2\n...", - lines=3, - info="Si se deja vacío, se usarán los nombres de las hojas o 'Exp X'." - ) - # ... (rest of the UI for graph settings, axis labels, error bars remains the same) - with gr.Accordion("Configuración de Gráficos", open=False): - with gr.Row(): - with gr.Column(scale=1): - legend_position_ui = gr.Radio(choices=["upper left", "upper right", "lower left", "lower right", "best"], label="Posición de Leyenda", value="best") - show_legend_ui = gr.Checkbox(label="Mostrar Leyenda", value=True) - with gr.Column(scale=1): - params_position_ui = gr.Radio(choices=["upper left", "upper right", "lower left", "lower right", "outside right"], label="Posición de Parámetros", value="upper right") - show_params_ui = gr.Checkbox(label="Mostrar Parámetros", value=True) - with gr.Row(): - style_dropdown_ui = gr.Dropdown(choices=['white', 'dark', 'whitegrid', 'darkgrid', 'ticks'], label="Estilo de Gráfico (Seaborn)", value='whitegrid') - line_color_picker_ui = gr.ColorPicker(label="Color de Línea (Modelo)", value='#0072B2') - point_color_picker_ui = gr.ColorPicker(label="Color de Puntos (Datos)", value='#D55E00') - with gr.Row(): - line_style_dropdown_ui = gr.Dropdown(choices=['-', '--', '-.', ':'], label="Estilo de Línea", value='-') - marker_style_dropdown_ui = gr.Dropdown(choices=['o', 's', '^', 'v', 'D', 'x', '+', '*'], label="Estilo de Marcador (Puntos)", value='o') - with gr.Row(): - x_axis_label_input_ui = gr.Textbox(label="Título Eje X", value="Tiempo (h)", placeholder="Tiempo (unidades)") - biomass_axis_label_input_ui = gr.Textbox(label="Título Eje Y (Biomasa)", value="Biomasa (g/L)", placeholder="Biomasa (unidades)") - with gr.Row(): - substrate_axis_label_input_ui = gr.Textbox(label="Título Eje Y (Sustrato)", value="Sustrato (g/L)", placeholder="Sustrato (unidades)") - product_axis_label_input_ui = gr.Textbox(label="Título Eje Y (Producto)", value="Producto (g/L)", placeholder="Producto (unidades)") + with gr.Tab("Teoría y Uso"): + gr.Markdown(r""" + Análisis y visualización de datos de bioprocesos utilizando modelos cinéticos como Logístico, Gompertz y Moser para el crecimiento de biomasa, + y el modelo de Luedeking-Piret para el consumo de sustrato y la formación de producto. + Nuevos modelos como Baranyi (4 parámetros) han sido añadidos. + + **Instrucciones de Uso:** + 1. **Subir archivo Excel (.xlsx):** + El archivo debe tener una estructura de MultiIndex en las columnas: + - Nivel 0: Nombre del experimento/tratamiento (ej: "Control", "Tratamiento A"). Este es el identificador principal del grupo experimental. + - Nivel 1: Tipo de dato ("Tiempo", "Biomasa", "Sustrato", "Producto"). + - Nivel 2 (Opcional): Identificador de réplica (ej: "R1", "R2", "Rep1"). Si no hay réplicas explícitas, este nivel puede omitirse o ser uniforme. + + *Ejemplo de estructura de columnas para una hoja:* + ``` + | Control | Control | Control | TratamientoA | ... | + |-----------------|-----------------|-----------------|-----------------|-----| + | Tiempo | Biomasa | Biomasa | Tiempo | ... | + | (unidad/único) | R1 (unidad) | R2 (unidad) | (unidad/único) | ... | + ``` + - La columna 'Tiempo' debe estar presente para cada grupo experimental (Nivel 0). + - Si hay réplicas (Nivel 2), estas serán promediadas para los modos "average" y "combinado". + - Para el modo "independent", cada grupo de Nivel 0 (ej. "Control", "TratamientoA") se analiza por separado. Si dentro de un grupo de Nivel 0 hay réplicas de Biomasa, Sustrato o Producto, estas también se promediarán para ese análisis independiente. + + 2. **Seleccionar Modelo(s) de Biomasa:** En la pestaña "Simulación", elige uno o más modelos de la lista. + 3. **Elegir Modo de Análisis:** + - `independent`: Analiza cada grupo experimental (Nivel 0 de las columnas) de forma individual. Si hay múltiples hojas, cada grupo en cada hoja se trata independientemente. + - `average`: Para cada hoja del Excel, promedia todos los datos de Biomasa, Sustrato y Producto (a través de todos los grupos de Nivel 0 y sus réplicas) y ajusta los modelos a estos promedios de hoja. Los resultados (Biomasa, Sustrato, Producto) se grafican en subplots separados. + - `combinado`: Similar a `average`, pero grafica Biomasa, Sustrato y Producto promediados de la hoja en un solo gráfico con múltiples ejes Y. + 4. **Configurar Opciones:** Ajusta la visualización de gráficos, nombres de experimentos/hojas, y parámetros de simulación. + 5. **Ejecutar:** Haz clic en "Simular y Graficar". + 6. **Resultados:** Visualiza los gráficos y la tabla comparativa. Exporta la tabla (Excel/CSV) o los parámetros del modelo (Excel). + """) + + gr.Markdown(r""" + ## Modelos Matemáticos para Bioprocesos + + **1. Modelo Logístico (3 parámetros):** $X_0, X_m, \mu_m$ + $$ X(t) = \frac{X_0 X_m e^{\mu_m t}}{X_m - X_0 + X_0 e^{\mu_m t}} \quad \text{o} \quad \frac{dX}{dt} = \mu_m X\left(1 - \frac{X}{X_m}\right) $$ + + **2. Modelo Gompertz (3 parámetros):** $X_m, \mu_m, \lambda$ (tiempo de latencia) + $$ X(t) = X_m \exp\left(-\exp\left(\frac{\mu_m e}{X_m}(\lambda-t)+1\right)\right) \quad \text{o} \quad \frac{dX}{dt} = X \cdot \frac{\mu_m e}{X_m} \cdot \exp\left(\frac{\mu_m e}{X_m}(\lambda-t)+1\right) $$ + + **3. Modelo de Moser (simplificado, 3 parámetros):** $X_m, \mu_m, K_s$ (constante de afinidad/tiempo) + $$ X(t)=X_m(1-e^{-\mu_m(t-K_s)}) \quad \text{o} \quad \frac{dX}{dt}=\mu_m(X_m - X) $$ + + **4. Modelo de Baranyi (4 parámetros):** $X_0, X_m, \mu_m, \lambda$ + $$ \ln X(t) = \ln X_0 + \mu_m A(t) - \ln\left(1 + \frac{e^{\mu_m A(t)}-1}{X_m/X_0}\right) $$ + $$ \text{donde } A(t) = t + \frac{1}{\mu_m} \ln(e^{-\mu_m t} + e^{-\mu_m \lambda} - e^{-\mu_m(t+\lambda)}) $$ + (Ecuación diferencial compleja, no usada para solución ODE en esta versión) + + **Modelo de Luedeking-Piret (Sustrato y Producto):** + - Consumo de Sustrato ($S$): $S_0, p, q$ + $$ \frac{dS}{dt} = -p \frac{dX}{dt} - q X $$ + - Formación de Producto ($P$): $P_0, \alpha, \beta$ + $$ \frac{dP}{dt} = \alpha \frac{dX}{dt} + \beta X $$ + + **Métricas de Evaluación:** + - $R^2$: Coeficiente de determinación (0-1, mayor es mejor) + - RMSE: Raíz del error cuadrático medio (menor es mejor) + """) + + with gr.Tab("Simulación"): with gr.Row(): - show_error_bars_ui = gr.Checkbox(label="Mostrar barras de error", value=True) - error_cap_size_ui = gr.Slider(label="Tamaño de tapa de barras de error", minimum=1, maximum=10, step=1, value=3) - error_line_width_ui = gr.Slider(label="Grosor de línea de error", minimum=0.5, maximum=5, step=0.5, value=1.0) + file_input = gr.File(label="Subir archivo Excel (.xlsx)", file_types=['.xlsx']) + mode = gr.Radio(["independent", "average", "combinado"], label="Modo de Análisis", value="independent", + info="Independent: cada grupo experimental. Average/Combinado: promedio de la hoja.") + + with gr.Accordion("Configuración de Modelos y Simulación", open=True): + model_types_selected_ui = gr.CheckboxGroup( + choices=MODEL_CHOICES, label="Tipo(s) de Modelo de Biomasa", value=["logistic"] + ) + use_differential_ui = gr.Checkbox(label="Usar Ecuaciones Diferenciales para Graficar (experimental)", value=False, + info="Si se marca, las curvas se generan resolviendo EDOs (si el modelo lo soporta y el ajuste inicial es bueno). Si no, por ajuste directo de la ecuación integrada.") + maxfev_input_ui = gr.Number(label="maxfev (Máx. evaluaciones para ajuste)", value=50000, minimum=1000, step=1000) + experiment_names_str_ui = gr.Textbox( + label="Nombres de las Hojas (uno por línea, opcional)", + placeholder="Nombre para Hoja 1\nNombre para Hoja 2\n...", lines=3, + info="Si se deja vacío, se usarán los nombres de las hojas del Excel." + ) + + with gr.Accordion("Configuración de Gráficos", open=False): + with gr.Row(): + with gr.Column(scale=1): + legend_position_ui = gr.Radio(choices=["upper left", "upper right", "lower left", "lower right", "best"], label="Posición de Leyenda", value="best") + show_legend_ui = gr.Checkbox(label="Mostrar Leyenda", value=True) + with gr.Column(scale=1): + params_position_ui = gr.Radio(choices=["upper left", "upper right", "lower left", "lower right", "outside right"], label="Posición de Parámetros", value="upper right") + show_params_ui = gr.Checkbox(label="Mostrar Parámetros", value=True) + with gr.Row(): + style_dropdown_ui = gr.Dropdown(choices=['white', 'dark', 'whitegrid', 'darkgrid', 'ticks'], label="Estilo de Gráfico (Seaborn)", value='whitegrid') + line_color_picker_ui = gr.ColorPicker(label="Color de Línea (Modelo)", value='#0072B2') # For individual plots + point_color_picker_ui = gr.ColorPicker(label="Color de Puntos (Datos)", value='#D55E00') # For individual plots + with gr.Row(): + line_style_dropdown_ui = gr.Dropdown(choices=['-', '--', '-.', ':'], label="Estilo de Línea", value='-') + marker_style_dropdown_ui = gr.Dropdown(choices=['o', 's', '^', 'v', 'D', 'x', '+', '*'], label="Estilo de Marcador (Puntos)", value='o') + with gr.Row(): + x_axis_label_input_ui = gr.Textbox(label="Título Eje X", value="Tiempo (h)", placeholder="Tiempo (unidades)") + biomass_axis_label_input_ui = gr.Textbox(label="Título Eje Y (Biomasa)", value="Biomasa (g/L)", placeholder="Biomasa (unidades)") + with gr.Row(): + substrate_axis_label_input_ui = gr.Textbox(label="Título Eje Y (Sustrato)", value="Sustrato (g/L)", placeholder="Sustrato (unidades)") + product_axis_label_input_ui = gr.Textbox(label="Título Eje Y (Producto)", value="Producto (g/L)", placeholder="Producto (unidades)") + with gr.Row(): + show_error_bars_ui = gr.Checkbox(label="Mostrar barras de error (si hay std)", value=True) + error_cap_size_ui = gr.Slider(label="Tamaño de tapa (barras de error)", minimum=0, maximum=10, step=1, value=3) + error_line_width_ui = gr.Slider(label="Grosor línea (barras de error)", minimum=0.5, maximum=5, step=0.5, value=1.0) + + with gr.Accordion("Configuración Avanzada de Ajuste (No implementado aún)", open=False): # Placeholder + with gr.Row(): + lower_bounds_str_ui = gr.Textbox(label="Lower Bounds (JSON, no usado)", lines=3, placeholder='{"logistic": {"Xo": 0, ...}}') + upper_bounds_str_ui = gr.Textbox(label="Upper Bounds (JSON, no usado)", lines=3, placeholder='{"logistic": {"Xm": 100, ...}}') + + simulate_btn = gr.Button("Simular y Graficar", variant="primary") + + with gr.Tab("Resultados"): + status_message_ui = gr.Textbox(label="Estado del Procesamiento", interactive=False, lines=2) + output_gallery_ui = gr.Gallery(label="Resultados Gráficos", columns=[2,1], height=600, object_fit="contain", preview=True) + output_table_ui = gr.Dataframe( + label="Tabla Comparativa de Modelos", + headers=["Experimento", "Modelo", + "R² Biomasa", "RMSE Biomasa", + "R² Sustrato", "RMSE Sustrato", + "R² Producto", "RMSE Producto"], + interactive=False, wrap=True, height=400 + ) + state_df_ui = gr.State(pd.DataFrame()) + state_params_ui = gr.State({}) # For parameter export - with gr.Accordion("Configuración Avanzada de Ajuste (No implementado aún)", open=False): with gr.Row(): - lower_bounds_str_ui = gr.Textbox(label="Lower Bounds (no usado actualmente)", lines=3) - upper_bounds_str_ui = gr.Textbox(label="Upper Bounds (no usado actualmente)", lines=3) - - simulate_btn = gr.Button("Simular y Graficar", variant="primary") - status_message_ui = gr.Textbox(label="Estado del Procesamiento", interactive=False) - output_gallery_ui = gr.Gallery(label="Resultados Gráficos", columns=[2,1], height='auto', object_fit="contain") - output_table_ui = gr.Dataframe( - label="Tabla Comparativa de Modelos", - headers=["Experimento", "Modelo", "R² Biomasa", "RMSE Biomasa", - "R² Sustrato", "RMSE Sustrato", "R² Producto", "RMSE Producto"], - interactive=False, wrap=True - ) - state_df_ui = gr.State(pd.DataFrame()) # To store the dataframe for export + export_excel_btn = gr.Button("Exportar Tabla a Excel") + export_csv_btn = gr.Button("Exportar Tabla a CSV") + export_params_btn = gr.Button("Exportar Parámetros a Excel") + + download_file_output_ui = gr.File(label="Descargar archivo", interactive=False) + # --- Event Handlers --- def run_simulation_interface(file, legend_pos, params_pos, models_sel, analysis_mode, exp_names, - low_bounds, up_bounds, plot_style, + low_bounds_str, up_bounds_str, plot_style, line_col, point_col, line_sty, marker_sty, show_leg, show_par, use_diff, maxfev, x_label, biomass_label, substrate_label, product_label, show_error_bars_arg, error_cap_size_arg, error_line_width_arg): - if file is None: return [], pd.DataFrame(), "Error: Por favor, sube un archivo Excel.", pd.DataFrame() + if file is None: return [], pd.DataFrame(), "Error: Por favor, sube un archivo Excel.", pd.DataFrame(), {} axis_labels = { 'x_label': x_label if x_label else 'Tiempo', 'biomass_label': biomass_label if biomass_label else 'Biomasa', 'substrate_label': substrate_label if substrate_label else 'Sustrato', 'product_label': product_label if product_label else 'Producto' } - if not models_sel: return [], pd.DataFrame(), "Error: Por favor, selecciona al menos un modelo.", pd.DataFrame() + if not models_sel: return [], pd.DataFrame(), "Error: Por favor, selecciona al menos un modelo.", pd.DataFrame(), {} - figures, comparison_df, message = process_all_data( + figures, comparison_df, message, collected_params = process_all_data( file, legend_pos, params_pos, models_sel, exp_names, - low_bounds, up_bounds, analysis_mode, plot_style, + low_bounds_str, up_bounds_str, analysis_mode, plot_style, line_col, point_col, line_sty, marker_sty, show_leg, show_par, use_diff, int(maxfev), axis_labels, show_error_bars_arg, error_cap_size_arg, error_line_width_arg ) - return figures, comparison_df, message, comparison_df + return figures, comparison_df, message, comparison_df, collected_params simulate_btn.click( fn=run_simulation_interface, @@ -1292,40 +1339,95 @@ def create_interface(): x_axis_label_input_ui, biomass_axis_label_input_ui, substrate_axis_label_input_ui, product_axis_label_input_ui, show_error_bars_ui, error_cap_size_ui, error_line_width_ui ], - outputs=[output_gallery_ui, output_table_ui, status_message_ui, state_df_ui] + outputs=[output_gallery_ui, output_table_ui, status_message_ui, state_df_ui, state_params_ui] ) - with gr.Row(): - export_excel_btn = gr.Button("Exportar Tabla a Excel") - export_csv_btn = gr.Button("Exportar Tabla a CSV") - - download_file_output_ui = gr.File(label="Descargar archivo", interactive=False) - - def export_excel_interface(df_to_export): + def export_df_to_file(df_to_export, file_format="excel"): if df_to_export is None or df_to_export.empty: - with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as tmp: tmp.write(b"No hay datos para exportar."); return tmp.name + # Create a temporary text file with a message + with tempfile.NamedTemporaryFile(suffix=".txt", delete=False, mode="w", encoding="utf-8") as tmp: + tmp.write("No hay datos en la tabla para exportar.") + return tmp.name, "No hay datos para exportar." try: - with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False, mode='w+b') as tmp: - df_to_export.to_excel(tmp.name, index=False); return tmp.name + suffix = ".xlsx" if file_format == "excel" else ".csv" + delete_after = False # Gradio handles deletion for gr.File output + + with tempfile.NamedTemporaryFile(suffix=suffix, delete=delete_after, mode='w+b' if file_format=="excel" else 'w') as tmp_file: + if file_format == "excel": + df_to_export.to_excel(tmp_file.name, index=False) + else: # CSV + df_to_export.to_csv(tmp_file.name, index=False, encoding='utf-8') + return tmp_file.name, f"Tabla exportada a {suffix[1:]} exitosamente." except Exception as e: - with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as tmp: tmp.write(f"Error al exportar a Excel: {e}".encode()); return tmp.name - - export_excel_btn.click(fn=export_excel_interface, inputs=state_df_ui, outputs=download_file_output_ui) + with tempfile.NamedTemporaryFile(suffix=".txt", delete=False, mode="w", encoding="utf-8") as tmp_err: + tmp_err.write(f"Error al exportar la tabla: {str(e)}") + return tmp_err.name, f"Error exportando tabla: {str(e)}" + + export_excel_btn.click( + fn=lambda df: export_df_to_file(df, "excel"), + inputs=state_df_ui, + outputs=[download_file_output_ui, status_message_ui] + ) + export_csv_btn.click( + fn=lambda df: export_df_to_file(df, "csv"), + inputs=state_df_ui, + outputs=[download_file_output_ui, status_message_ui] + ) - def export_csv_interface(df_to_export): - if df_to_export is None or df_to_export.empty: - with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as tmp: tmp.write(b"No hay datos para exportar."); return tmp.name + def export_parameters_interface(params_state_dict): + if not params_state_dict: + with tempfile.NamedTemporaryFile(suffix=".txt", delete=False, mode="w", encoding="utf-8") as tmp: + tmp.write("No hay parámetros para exportar.") + return tmp.name, "No hay parámetros para exportar." try: - with tempfile.NamedTemporaryFile(suffix=".csv", delete=False, mode='w', encoding='utf-8') as tmp: # CSV is text - df_to_export.to_csv(tmp.name, index=False); return tmp.name + with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp: + with pd.ExcelWriter(tmp.name) as writer: + for exp_name, models_data in params_state_dict.items(): + for model_type_name, all_params_for_model_type in models_data.items(): + for param_category, category_params in all_params_for_model_type.items(): + if category_params and isinstance(category_params, dict) and category_params: + df_params = pd.DataFrame({ + 'Parámetro': list(category_params.keys()), + 'Valor': list(category_params.values()) + }) + # Sanitize sheet name + sheet_exp_name = "".join(c if c.isalnum() else "_" for c in exp_name[:15]) + sheet_model_name = "".join(c if c.isalnum() else "_" for c in model_type_name[:10]) + sheet_cat_name = "".join(c if c.isalnum() else "_" for c in param_category[:4]) + sheet_name = f"{sheet_exp_name}_{sheet_model_name}_{sheet_cat_name}" + # Ensure sheet name is unique if too long or special chars make them same + sheet_name = sheet_name[:31] # Excel sheet name limit + + # Basic uniqueness for sheet names (can be improved) + original_sheet_name = sheet_name + count = 1 + while sheet_name in writer.sheets: + sheet_name = f"{original_sheet_name[:28]}_{count}" + count += 1 + if count > 10: # Safety break + print(f"Too many similar sheet names for {original_sheet_name}") + break + + df_params.to_excel(writer, sheet_name=sheet_name, index=False) + return tmp.name, "Parámetros exportados exitosamente a Excel." except Exception as e: - with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as tmp: tmp.write(f"Error al exportar a CSV: {e}".encode()); return tmp.name - - export_csv_btn.click(fn=export_csv_interface, inputs=state_df_ui, outputs=download_file_output_ui) + with tempfile.NamedTemporaryFile(suffix=".txt", delete=False, mode="w", encoding="utf-8") as tmp_err: + tmp_err.write(f"Error al exportar parámetros: {str(e)}") + return tmp_err.name, f"Error exportando parámetros: {str(e)}" + + export_params_btn.click( + fn=export_parameters_interface, + inputs=[state_params_ui], + outputs=[download_file_output_ui, status_message_ui] + ) gr.Examples( examples=[ - [None, "best", "upper right", ["logistic", "baranyi"], "independent", "Exp A\nExp B", "", "", "whitegrid", "#0072B2", "#D55E00", "-", "o", True, True, False, 50000, "Tiempo (días)", "Células (millones/mL)", "Glucosa (mM)", "Anticuerpo (mg/L)", True, 3, 1.0] + [None, "best", "upper right", ["logistic", "baranyi"], "independent", "Hoja1_Datos\nHoja2_Control", + "", "", "whitegrid", "#0072B2", "#D55E00", "-", "o", + True, True, False, 50000, + "Tiempo (días)", "Células (millones/mL)", "Glucosa (mM)", "Anticuerpo (mg/L)", + True, 3, 1.0] ], inputs=[ file_input, legend_position_ui, params_position_ui, model_types_selected_ui, mode, experiment_names_str_ui, @@ -1335,10 +1437,23 @@ def create_interface(): x_axis_label_input_ui, biomass_axis_label_input_ui, substrate_axis_label_input_ui, product_axis_label_input_ui, show_error_bars_ui, error_cap_size_ui, error_line_width_ui ], - label="Ejemplo de Configuración (subir archivo manualmente)" + label="Ejemplo de Configuración (subir archivo manualmente)", + # Outputs for examples are not strictly necessary here as they depend on file processing ) return demo if __name__ == '__main__': + # For Pydantic v1, Config was used. For Pydantic v2, it's Model.model_config + # The YourModel class uses `class Config:` which is Pydantic v1 style for model config. + # If using Pydantic v2, it should be `model_config = ConfigDict(arbitrary_types_allowed=True)` + # However, YourModel is not critical to the app's function as shown. + + # Check Pydantic version for ConfigDict if issues arise with YourModel + # import pydantic + # if pydantic.__version__.startswith("1."): + # from pydantic import ConfigDict as PydanticConfigDict # Alias for clarity if needed + # else: # Pydantic v2+ + # from pydantic import ConfigDict as PydanticConfigDict + demo_instance = create_interface() - demo_instance.launch(share=True) \ No newline at end of file + demo_instance.launch(share=False, debug=True) # share=True for ngrok link if needed \ No newline at end of file