import pandas as pd from datetime import datetime from typing import Optional, Any from .config import DEFAULT_ROUNDS_DURATION def clean_numeric_column(series: pd.Series) -> pd.Series: """A helper to clean string columns into numbers, handling errors.""" series_str = series.astype(str) return pd.to_numeric(series_str.str.replace(r'[^0-9.]', '', regex=True), errors='coerce') def calculate_age(dob_str: str, fight_date_str: str) -> Optional[float]: """Calculates age in years from a date of birth string and fight date string.""" if pd.isna(dob_str) or not dob_str: return None try: dob = datetime.strptime(dob_str, '%b %d, %Y') fight_date = datetime.strptime(fight_date_str, '%B %d, %Y') return (fight_date - dob).days / 365.25 except (ValueError, TypeError): return None def parse_round_time_to_seconds(round_str: str, time_str: str) -> int: """Converts fight duration from round and time to total seconds.""" try: rounds = int(round_str) minutes, seconds = map(int, time_str.split(':')) # Assuming 5-minute rounds for calculation simplicity return ((rounds - 1) * DEFAULT_ROUNDS_DURATION) + (minutes * 60) + seconds except (ValueError, TypeError, AttributeError): return 0 def parse_striking_stats(stat_str: str) -> tuple[int, int]: """Parses striking stats string like '10 of 20' into (landed, attempted).""" try: landed, attempted = map(int, stat_str.split(' of ')) return landed, attempted except (ValueError, TypeError, AttributeError): return 0, 0 def to_int_safe(val: Any) -> int: """Safely converts a value to an integer, returning 0 if it's invalid or empty.""" if pd.isna(val): return 0 try: # handle strings with whitespace or empty strings return int(str(val).strip() or 0) except (ValueError, TypeError): return 0 def prepare_fighters_data(fighters_df: pd.DataFrame) -> pd.DataFrame: """Prepares fighter data for analysis by cleaning and standardizing.""" fighters_prepared = fighters_df.copy() fighters_prepared['full_name'] = fighters_prepared['first_name'] + ' ' + fighters_prepared['last_name'] # Handle duplicate fighter names by keeping the first entry fighters_prepared = fighters_prepared.drop_duplicates(subset=['full_name'], keep='first') fighters_prepared = fighters_prepared.set_index('full_name') for col in ['height_cm', 'reach_in', 'elo']: if col in fighters_prepared.columns: fighters_prepared[col] = clean_numeric_column(fighters_prepared[col]) return fighters_prepared