Spaces:
Sleeping
Sleeping
import pandas as pd | |
from datetime import datetime | |
from typing import Optional, Any | |
from .config import DEFAULT_ROUNDS_DURATION | |
def clean_numeric_column(series: pd.Series) -> pd.Series: | |
"""A helper to clean string columns into numbers, handling errors.""" | |
series_str = series.astype(str) | |
return pd.to_numeric(series_str.str.replace(r'[^0-9.]', '', regex=True), errors='coerce') | |
def calculate_age(dob_str: str, fight_date_str: str) -> Optional[float]: | |
"""Calculates age in years from a date of birth string and fight date string.""" | |
if pd.isna(dob_str) or not dob_str: | |
return None | |
try: | |
dob = datetime.strptime(dob_str, '%b %d, %Y') | |
fight_date = datetime.strptime(fight_date_str, '%B %d, %Y') | |
return (fight_date - dob).days / 365.25 | |
except (ValueError, TypeError): | |
return None | |
def parse_round_time_to_seconds(round_str: str, time_str: str) -> int: | |
"""Converts fight duration from round and time to total seconds.""" | |
try: | |
rounds = int(round_str) | |
minutes, seconds = map(int, time_str.split(':')) | |
# Assuming 5-minute rounds for calculation simplicity | |
return ((rounds - 1) * DEFAULT_ROUNDS_DURATION) + (minutes * 60) + seconds | |
except (ValueError, TypeError, AttributeError): | |
return 0 | |
def parse_striking_stats(stat_str: str) -> tuple[int, int]: | |
"""Parses striking stats string like '10 of 20' into (landed, attempted).""" | |
try: | |
landed, attempted = map(int, stat_str.split(' of ')) | |
return landed, attempted | |
except (ValueError, TypeError, AttributeError): | |
return 0, 0 | |
def to_int_safe(val: Any) -> int: | |
"""Safely converts a value to an integer, returning 0 if it's invalid or empty.""" | |
if pd.isna(val): | |
return 0 | |
try: | |
# handle strings with whitespace or empty strings | |
return int(str(val).strip() or 0) | |
except (ValueError, TypeError): | |
return 0 | |
def prepare_fighters_data(fighters_df: pd.DataFrame) -> pd.DataFrame: | |
"""Prepares fighter data for analysis by cleaning and standardizing.""" | |
fighters_prepared = fighters_df.copy() | |
fighters_prepared['full_name'] = fighters_prepared['first_name'] + ' ' + fighters_prepared['last_name'] | |
# Handle duplicate fighter names by keeping the first entry | |
fighters_prepared = fighters_prepared.drop_duplicates(subset=['full_name'], keep='first') | |
fighters_prepared = fighters_prepared.set_index('full_name') | |
for col in ['height_cm', 'reach_in', 'elo']: | |
if col in fighters_prepared.columns: | |
fighters_prepared[col] = clean_numeric_column(fighters_prepared[col]) | |
return fighters_prepared |