File size: 2,922 Bytes
ffd453e
 
9678fdb
 
2aed0aa
 
 
 
 
 
 
 
e012a04
ffd453e
 
 
 
 
 
 
 
 
 
 
 
3994c21
e012a04
 
bf7e729
 
e012a04
ffd453e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb615ca
 
 
 
 
ffd453e
e012a04
5271c2e
 
 
 
 
 
 
 
 
 
 
f972c61
ffd453e
5271c2e
ffd453e
5271c2e
 
 
bf7e729
eb615ca
 
 
 
 
bf7e729
 
 
eb615ca
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import argparse

from .pipeline import PredictionPipeline
from .models import (
    EloBaselineModel, 
    LogisticRegressionModel, 
    XGBoostModel,
    SVCModel,
    RandomForestModel,
    BernoulliNBModel,
    LGBMModel
)

# --- Define Models to Run ---
# Instantiate all the models you want to evaluate here.
MODELS_TO_RUN = [
    EloBaselineModel(),
    LogisticRegressionModel(),
    XGBoostModel(),
    SVCModel(),
    RandomForestModel(),
    BernoulliNBModel(),
    LGBMModel(),
]
# --- End of Model Definition ---

def main():
    """
    Main entry point to run the prediction pipeline.
    You can specify which models to run and the reporting format.
    """
    parser = argparse.ArgumentParser(description="UFC Fight Prediction Pipeline")
    parser.add_argument(
        '--report', 
        type=str, 
        default='detailed', 
        choices=['detailed', 'summary'],
        help="Type of report to generate: 'detailed' (file) or 'summary' (console)."
    )
    parser.add_argument(
        '--use-existing-models',
        action='store_true',
        default=True,
        help="Use existing saved models if available and no new data (default: True)."
    )
    parser.add_argument(
        '--no-use-existing-models',
        action='store_true',
        default=False,
        help="Force retrain all models from scratch, ignoring existing saved models."
    )
    parser.add_argument(
        '--force-retrain',
        action='store_true',
        default=False,
        help="Force retrain all models even if no new data is available."
    )
    parser.add_argument(
        '--kfold',
        action='store_true',
        help='Run 3-fold CV instead of standard split.'
    )
    args = parser.parse_args()

    # Handle conflicting arguments
    use_existing_models = not args.no_use_existing_models and args.use_existing_models
    force_retrain = args.force_retrain

    if args.no_use_existing_models:
        print("No-use-existing-models flag set: All models will be retrained from scratch.")
    elif force_retrain:
        print("Force-retrain flag set: All models will be retrained regardless of new data.")
    elif use_existing_models:
        print("Using existing models if available and no new data detected.")

    # Use the already defined MODELS_TO_RUN from the top of the file

    pipeline = PredictionPipeline(
        models=MODELS_TO_RUN, 
        use_existing_models=use_existing_models,
        force_retrain=force_retrain
    )
    try:
        if args.kfold:
            cv_results = pipeline.run_kfold_cv(k=3, holdout_events=1)
            print(cv_results)
        else:
            pipeline.run(detailed_report=(args.report == 'detailed'))
    except FileNotFoundError as e:
        print(f"Error: {e}")
        print("Please ensure the required data files exist. You may need to run the scraping and ELO analysis first.")

if __name__ == '__main__':
    main()