Spaces:

AlvaroMros
/

ufc-predictor

Sleeping

App Files Files Community

ufc-predictor / src /predict /main.py

AlvaroMros

Add k-fold cross-validation to prediction pipeline

eb615ca 7 days ago

raw

history blame contribute delete

2.92 kB

	import argparse

	from .pipeline import PredictionPipeline
	from .models import (
	EloBaselineModel,
	LogisticRegressionModel,
	XGBoostModel,
	SVCModel,
	RandomForestModel,
	BernoulliNBModel,
	LGBMModel
	)

	# --- Define Models to Run ---
	# Instantiate all the models you want to evaluate here.
	MODELS_TO_RUN = [
	EloBaselineModel(),
	LogisticRegressionModel(),
	XGBoostModel(),
	SVCModel(),
	RandomForestModel(),
	BernoulliNBModel(),
	LGBMModel(),
	]
	# --- End of Model Definition ---

	def main():
	"""
	Main entry point to run the prediction pipeline.
	You can specify which models to run and the reporting format.
	"""
	parser = argparse.ArgumentParser(description="UFC Fight Prediction Pipeline")
	parser.add_argument(
	'--report',
	type=str,
	default='detailed',
	choices=['detailed', 'summary'],
	help="Type of report to generate: 'detailed' (file) or 'summary' (console)."
	)
	parser.add_argument(
	'--use-existing-models',
	action='store_true',
	default=True,
	help="Use existing saved models if available and no new data (default: True)."
	)
	parser.add_argument(
	'--no-use-existing-models',
	action='store_true',
	default=False,
	help="Force retrain all models from scratch, ignoring existing saved models."
	)
	parser.add_argument(
	'--force-retrain',
	action='store_true',
	default=False,
	help="Force retrain all models even if no new data is available."
	)
	parser.add_argument(
	'--kfold',
	action='store_true',
	help='Run 3-fold CV instead of standard split.'
	)
	args = parser.parse_args()

	# Handle conflicting arguments
	use_existing_models = not args.no_use_existing_models and args.use_existing_models
	force_retrain = args.force_retrain

	if args.no_use_existing_models:
	print("No-use-existing-models flag set: All models will be retrained from scratch.")
	elif force_retrain:
	print("Force-retrain flag set: All models will be retrained regardless of new data.")
	elif use_existing_models:
	print("Using existing models if available and no new data detected.")

	# Use the already defined MODELS_TO_RUN from the top of the file

	pipeline = PredictionPipeline(
	models=MODELS_TO_RUN,
	use_existing_models=use_existing_models,
	force_retrain=force_retrain
	)
	try:
	if args.kfold:
	cv_results = pipeline.run_kfold_cv(k=3, holdout_events=1)
	print(cv_results)
	else:
	pipeline.run(detailed_report=(args.report == 'detailed'))
	except FileNotFoundError as e:
	print(f"Error: {e}")
	print("Please ensure the required data files exist. You may need to run the scraping and ELO analysis first.")

	if __name__ == '__main__':
	main()