File size: 2,676 Bytes
f972c61
 
eb615ca
f972c61
eb615ca
f972c61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import pandas as pd
from datetime import datetime
from typing import Optional, Any

from .config import DEFAULT_ROUNDS_DURATION

def clean_numeric_column(series: pd.Series) -> pd.Series:
    """A helper to clean string columns into numbers, handling errors."""
    series_str = series.astype(str)
    return pd.to_numeric(series_str.str.replace(r'[^0-9.]', '', regex=True), errors='coerce')

def calculate_age(dob_str: str, fight_date_str: str) -> Optional[float]:
    """Calculates age in years from a date of birth string and fight date string."""
    if pd.isna(dob_str) or not dob_str:
        return None
    try:
        dob = datetime.strptime(dob_str, '%b %d, %Y')
        fight_date = datetime.strptime(fight_date_str, '%B %d, %Y')
        return (fight_date - dob).days / 365.25
    except (ValueError, TypeError):
        return None

def parse_round_time_to_seconds(round_str: str, time_str: str) -> int:
    """Converts fight duration from round and time to total seconds."""
    try:
        rounds = int(round_str)
        minutes, seconds = map(int, time_str.split(':'))
        # Assuming 5-minute rounds for calculation simplicity
        return ((rounds - 1) * DEFAULT_ROUNDS_DURATION) + (minutes * 60) + seconds
    except (ValueError, TypeError, AttributeError):
        return 0

def parse_striking_stats(stat_str: str) -> tuple[int, int]:
    """Parses striking stats string like '10 of 20' into (landed, attempted)."""
    try:
        landed, attempted = map(int, stat_str.split(' of '))
        return landed, attempted
    except (ValueError, TypeError, AttributeError):
        return 0, 0

def to_int_safe(val: Any) -> int:
    """Safely converts a value to an integer, returning 0 if it's invalid or empty."""
    if pd.isna(val):
        return 0
    try:
        # handle strings with whitespace or empty strings
        return int(str(val).strip() or 0)
    except (ValueError, TypeError):
        return 0

def prepare_fighters_data(fighters_df: pd.DataFrame) -> pd.DataFrame:
    """Prepares fighter data for analysis by cleaning and standardizing."""
    fighters_prepared = fighters_df.copy()
    fighters_prepared['full_name'] = fighters_prepared['first_name'] + ' ' + fighters_prepared['last_name']
    
    # Handle duplicate fighter names by keeping the first entry
    fighters_prepared = fighters_prepared.drop_duplicates(subset=['full_name'], keep='first')
    fighters_prepared = fighters_prepared.set_index('full_name')

    for col in ['height_cm', 'reach_in', 'elo']:
        if col in fighters_prepared.columns:
            fighters_prepared[col] = clean_numeric_column(fighters_prepared[col])
    
    return fighters_prepared