|
""" |
|
Gokul Ramanan |
|
panel_creation_predictions.py |
|
8/6/2025 |
|
Description: Creates a panel dashboard to predict the number of runs some |
|
of the top young cricketers will score in 5 years for a specific format. Minimum |
|
Criteria for the prediction is 5 years of international experience in the format. |
|
""" |
|
|
|
import panel as pn |
|
from runs_api import RUNSAPI |
|
import plotly.graph_objects as go |
|
import plotly.colors as pc |
|
from statsmodels.nonparametric.smoothers_lowess import lowess |
|
import pandas as pd |
|
|
|
|
|
pn.extension('plotly', 'tabulator') |
|
|
|
|
|
api = RUNSAPI() |
|
api.load_runs("add_preds.csv") |
|
|
|
|
|
format_select = pn.widgets.CheckBoxGroup(name="Format", options=["test", "odi", "t20i"], value = ["test", "odi", "t20i"]) |
|
country_select = pn.widgets.MultiSelect(name="Country", options=sorted(api.runs["Country"].unique().tolist()), size=6) |
|
year_slider = pn.widgets.IntRangeSlider(name="Year Range", start=api.runs["Year"].min(), end=api.runs["Year"].max(), step=1) |
|
top_n_slider = pn.widgets.IntSlider(name="Top N Players", start=1, end=12, value=12) |
|
player_select = pn.widgets.MultiChoice( |
|
name="Select Players", |
|
options=sorted(api.runs["Name"].unique().tolist()), |
|
placeholder="Choose players to compare (optional)..." |
|
) |
|
career_length_slider = pn.widgets.IntRangeSlider( |
|
name="Career Length (Years)", |
|
start=1, |
|
end=25, |
|
step=1, |
|
value=(1, 25) |
|
) |
|
all_formats_toggle = pn.widgets.Checkbox( |
|
name="Only Include Players in All 3 Formats", |
|
value=False |
|
) |
|
not_formats_toggle = pn.widgets.Checkbox( |
|
name="Don't Include Players in All 3 Formats", |
|
value=False |
|
) |
|
|
|
|
|
width = pn.widgets.IntSlider(name = 'Width', start = 250, end = 2000, step = 250, value = 1500) |
|
height = pn.widgets.IntSlider(name = 'Height', start = 200, end = 2500, step = 100, value = 800) |
|
color_by = pn.widgets.Select( |
|
name="Color By", |
|
options=["Name", "Country", "Debut Bin"], |
|
value="Name" |
|
) |
|
career_align_toggle = pn.widgets.Checkbox( |
|
name="Align Careers to Year 0", |
|
value=False |
|
) |
|
detailed_tooltip = pn.widgets.Checkbox( |
|
name="Show Detailed Hover Info", |
|
value=True |
|
) |
|
plot_metric_select = pn.widgets.RadioButtonGroup( |
|
name="Plot Metric", |
|
options=["Cumulative Runs", "Cumulative Batting Average"], |
|
button_type="primary", |
|
value="Cumulative Runs" |
|
) |
|
|
|
theme_map = { |
|
"white": "plotly_white", |
|
"dark": "plotly_dark", |
|
"gray1": "ggplot2", |
|
"gray2": "seaborn", |
|
"white2": "simple_white" |
|
} |
|
|
|
theme_select = pn.widgets.RadioButtonGroup( |
|
name="Plot Theme", |
|
options=list(theme_map.keys()), |
|
button_type="success", |
|
value="dark" |
|
) |
|
|
|
|
|
def get_plot(format_select, country_select, year_slider, top_n_slider, player_select, career_length_slider, width, |
|
height, theme_select_value, plot_metric_select_value, color_by_value, detailed_tooltip_value, |
|
align_career_value, all_formats_toggle, not_formats_toggle): |
|
""" |
|
Generate a cumulative runs line chart based on filter selections. |
|
|
|
Parameters: |
|
format_select (list): Selected match formats (e.g., ['odi', 'test']). |
|
country_select (list): List of selected countries. |
|
year_slider (list): List of [start_year, end_year] to filter data by year. |
|
top_n_slider (int): Number of top players to include based on total runs. |
|
width (int): Plot width in pixels. |
|
height (int): Plot height in pixels. |
|
theme_select_value (str): Theme name corresponding to Plotly templates. |
|
|
|
Returns: |
|
panel.pane.Plotly or panel.pane.Markdown: Plotly pane if data exists, |
|
otherwise a message pane. |
|
""" |
|
if plot_metric_select_value == "Cumulative Runs": |
|
y_col = "cumulative_format_runs" |
|
hover = "Runs" |
|
ranking_metric = "Runs" |
|
elif plot_metric_select_value == "Cumulative Batting Average": |
|
y_col = "cumulative_format_average" |
|
hover = "Average" |
|
ranking_metric = "Average" |
|
|
|
df = api.apply_filters(formats=format_select, countries=country_select, year_range=year_slider, |
|
top_n_players=top_n_slider, player_select_value = player_select, ranking_metric=ranking_metric, |
|
career_length_slider = career_length_slider, only_all_formats = all_formats_toggle, |
|
not_all_formats=not_formats_toggle) |
|
|
|
if plot_metric_select_value == "Cumulative Strike Rate": |
|
df = df[~df["Name"].isin(["Sachin Tendulkar", "Allan Border", "Javed Miandad", "Desmond Haynes", |
|
"Aravinda de Silva", "Mohammed Azharuddin", "Viv Richards", "Marvin Atapattu", |
|
"Sunil Gavaskar", "Saleem Malik", "Gordon Greenidge", "Arjuna Ranatunga", |
|
"Richie Richardson", ])] |
|
df = df[df["cumulative_SR"].notnull()] |
|
|
|
if df.empty: |
|
return pn.pane.Markdown("### No data for selected filters.", width=700) |
|
|
|
fig = go.Figure() |
|
|
|
grouped = df.groupby("Name") |
|
|
|
|
|
color_palette = ( |
|
pc.qualitative.Set3 + pc.qualitative.Set2 + |
|
pc.qualitative.Bold + pc.qualitative.Pastel + |
|
pc.qualitative.Dark2 + pc.qualitative.Safe |
|
) |
|
|
|
|
|
unique_labels = df[color_by_value].unique() |
|
color_map = {label: color_palette[i % len(color_palette)] for i, label in enumerate(sorted(unique_labels))} |
|
|
|
x_col = "Career Year" if align_career_value else "Year" |
|
x_axis_title = "Career Year" if align_career_value else "Year" |
|
|
|
for name, group in grouped: |
|
color_label = group[color_by_value].iloc[0] |
|
color = color_map.get(color_label, "#000000") |
|
|
|
custom_data = group[["cumulative_innings", "cumulative_matches", "cumulative_100s", |
|
"cumulative_50s"]].values |
|
|
|
if detailed_tooltip_value: |
|
hovertemplate = ( |
|
f"<b>{name}</b><br>" |
|
"Year: %{x}<br>" |
|
f"{hover}: %{{y}}<br>" |
|
"Innings: %{customdata[0]}<br>" |
|
"Matches: %{customdata[1]}<br>" |
|
f"{color_by.name}: {color_label}<extra></extra>" |
|
) |
|
else: |
|
hovertemplate = ( |
|
f"<b>{name}</b><br>" |
|
f"Year: %{{x}}<br>" |
|
f"{hover}: %{{y}}<br>" |
|
f"{color_by.name}: {color_label}<extra></extra>" |
|
) |
|
|
|
historical = group[group["Is_Prediction"] == False] |
|
predicted = group[group["Is_Prediction"] == True] |
|
|
|
|
|
fig.add_trace(go.Scatter( |
|
x=historical[x_col], |
|
y=historical[y_col], |
|
mode='lines+markers', |
|
name=name, |
|
customdata=historical[["cumulative_innings", "cumulative_matches", "cumulative_100s", "cumulative_50s"]], |
|
line=dict(color=color), |
|
legendgroup=color_label, |
|
hovertemplate=hovertemplate |
|
)) |
|
|
|
|
|
if not predicted.empty: |
|
fig.add_trace(go.Scatter( |
|
x=predicted[x_col], |
|
y=predicted[y_col], |
|
mode='markers', |
|
name=f"{name} (Predicted)", |
|
customdata=predicted[["cumulative_innings", "cumulative_matches", "cumulative_100s", "cumulative_50s"]], |
|
line=dict(color=color, dash='dot'), |
|
marker=dict(size=10, symbol='diamond'), |
|
legendgroup=color_label, |
|
hovertemplate=hovertemplate.replace("<b>", "<b>[Predicted] ") |
|
)) |
|
|
|
if align_career_value: |
|
|
|
curve_df = df[[x_col, y_col]].dropna() |
|
|
|
if not curve_df.empty: |
|
smoothed = lowess(endog=curve_df[y_col], exog=curve_df[x_col], frac=0.2) |
|
|
|
fig.add_trace(go.Scatter( |
|
x=smoothed[:, 0], |
|
y=smoothed[:, 1], |
|
mode='lines', |
|
name="Best Fit Curve", |
|
line=dict(width=4, color='black', dash='dot'), |
|
hoverinfo='skip', |
|
showlegend=True |
|
)) |
|
|
|
fig.update_layout( |
|
title=f"Cumulative International {hover} Over Time", |
|
xaxis_title=x_axis_title, |
|
yaxis_title=f"Cumulative {hover}", |
|
width=width, |
|
height=height, |
|
showlegend=True, |
|
template= theme_map[theme_select_value] |
|
) |
|
|
|
return pn.pane.Plotly(fig) |
|
|
|
def get_catalog(format_select, country_select, year_slider, top_n_slider,player_select,career_length_slider, all_formats_toggle, |
|
not_formats_toggle): |
|
""" |
|
Generate an interactive data table of filtered run statistics. |
|
|
|
Parameters: |
|
format_select (list): Selected match formats (e.g., ['t20i']). |
|
country_select (list): List of selected countries. |
|
year_slider (tuple): (start_year, end_year) year range filter. |
|
top_n_slider (int): Number of top run-scorers to include. |
|
|
|
Returns: |
|
panel.widgets.Tabulator: A paginated and scrollable data table |
|
of the filtered DataFrame. |
|
""" |
|
df = api.apply_filters(formats=format_select, countries=country_select, year_range=year_slider, |
|
top_n_players=top_n_slider, player_select_value= player_select, |
|
career_length_slider = career_length_slider, only_all_formats = all_formats_toggle, |
|
not_all_formats= not_formats_toggle) |
|
table = pn.widgets.Tabulator(df[["Name", "Year", "Country", "Matches", "Innings", "Runs", "cumulative_matches", |
|
"cumulative_innings", "cumulative_format_runs", "cumulative_format_average", "Career Year", "Debut Year", "Debut Bin"]], selectable=False, pagination = 'local', page_size = 20) |
|
return table |
|
|
|
|
|
plot = pn.bind(get_plot, format_select, country_select, year_slider, top_n_slider, player_select, |
|
career_length_slider, width, height, theme_select, plot_metric_select, color_by, detailed_tooltip, |
|
career_align_toggle, all_formats_toggle, not_formats_toggle) |
|
catalog = pn.bind(get_catalog, format_select, country_select, year_slider, top_n_slider, player_select, |
|
career_length_slider, all_formats_toggle, not_formats_toggle) |
|
|
|
|
|
|
|
player_selection_md = """ |
|
### Player Selection Criteria |
|
|
|
Developed by Gokul Ramanan (August 2025) |
|
|
|
To ensure a balanced and representative analysis, we selected players using the following criteria: |
|
|
|
- **Top 15 in ICC Rankings (as of July 8, 2025)** in at least **one** format. |
|
- **Minimum 5–10 years of international batting experience** across formats. |
|
- If a player has played **11+ years**, they must be **under 35 years of age**. |
|
- **Exceptions (4 total)** were made for players with **exceptional potential** or realistic chances to **break all-time records**. |
|
- **If a player hasn’t played a format in the last 5 years**, they are considered **retired** in that format. Their past runs in that format are excluded for simplicity. |
|
|
|
--- |
|
|
|
### Players and Format Eligibility |
|
|
|
| Player | Formats Eligible | Reason for Selection | |
|
|---------------|------------------|----------------------| |
|
| **Joe Root** | Test, ODI | Likely to challenge Sachin's Test run tally | |
|
| **Shubman Gill** | Test, ODI | Next big Indian star; now Indian Test captain | |
|
| **Suryakumar Yadav** | T20 | T20 beast; currently India T20I captain | |
|
| **Shreyas Iyer** | ODI, T20 | Key to India's white-ball success, CT hero | |
|
| **Travis Head** | All Three | Met all criteria requirements | |
|
| **Rishabh Pant** | All Three | Met all criteria requirements | |
|
| **Babar Azam** | All Three | Met all criteria requirements | |
|
| **Kusal Mendis** | All Three | Met all criteria requirements | |
|
| **Shai Hope** | All Three | Met all criteria requirements | |
|
| **KL Rahul** | All Three | Met all criteria requirements | |
|
| **Mohammad Rizwan** | All Three | Met all criteria requirements | |
|
| **Aiden Markram** | All Three | Met all criteria requirements | |
|
|
|
""" |
|
|
|
selection_tab = pn.pane.Markdown(player_selection_md, width=950) |
|
|
|
ml_walkthrough_md = """ |
|
### Machine Learning Prediction Model Walkthrough |
|
|
|
This dashboard’s prediction pipeline begins by analyzing the **run-scoring trajectories of the top 85 batters in cricket history**. |
|
From that foundation, a hybrid model was designed to estimate a batter’s future scoring ability by combining three perspectives: |
|
their **recent form**, their **career average**, and their **trajectory average** (based on where they are in their career). |
|
These features were then weighted in a custom approach that consistently outperformed standard machine learning baselines like |
|
XGBoost when backtested across multiple eras. This provided a strong foundation for player-specific averages that realistically |
|
capture the effect of career phases, including late-career decline. |
|
|
|
The next stage focused on predicting **match volume and innings opportunities**, since raw scoring ability means little without |
|
chances to bat. Using ICC’s Future Tours Programme (FTP) data and historical schedules back to 2007, the model trained an XGBoost |
|
regressor to forecast the number of matches each country will play through 2030. These match forecasts were then combined with |
|
historical innings-per-match ratios to build an innings prediction engine. Backtesting this module produced exceptional accuracy, |
|
achieving **R² = 0.990** and **MAPE = 0.029**, with nearly 100% of predictions falling within a 20% error margin. |
|
|
|
Finally, the pipeline ties the pieces together: **Predicted Runs = Predicted Innings × Predicted Batting Average**. Each |
|
player’s projected 2030 endpoint is marked on the dashboard with a distinct diamond symbol, allowing users to compare actual |
|
versus forecasted career totals in real time. This end-to-end approach — from career form modeling to schedule forecasting — |
|
provides a transparent, accurate, and highly interpretable framework for projecting the next generation of international |
|
batting greats. It demonstrates how domain knowledge, careful backtesting, and hybrid modeling can create results that not |
|
only beat off-the-shelf ML models but also resonate with cricket’s real-world dynamics. |
|
""" |
|
|
|
ml_tab = pn.pane.Markdown(ml_walkthrough_md, width=950) |
|
|
|
|
|
|
|
|
|
card_width = 320 |
|
|
|
search_card = pn.Card( |
|
pn.Column( |
|
|
|
format_select, |
|
|
|
country_select, |
|
|
|
year_slider, |
|
|
|
top_n_slider, |
|
|
|
player_select, |
|
|
|
career_length_slider, |
|
all_formats_toggle, |
|
not_formats_toggle |
|
|
|
), |
|
title="Search", width=card_width, collapsed=False |
|
) |
|
|
|
|
|
plot_card = pn.Card( |
|
pn.Column( |
|
career_align_toggle, |
|
|
|
plot_metric_select, |
|
|
|
color_by, |
|
|
|
detailed_tooltip, |
|
|
|
width, |
|
|
|
height, |
|
|
|
theme_select |
|
), |
|
|
|
title="Plot", width=card_width, collapsed=False |
|
) |
|
|
|
|
|
|
|
|
|
layout = pn.template.FastListTemplate( |
|
title="Top Prospective International Cricket Batters: Predicted Runs by 2030", |
|
sidebar=[ |
|
search_card, |
|
plot_card, |
|
], |
|
theme_toggle=False, |
|
main=[ |
|
pn.Tabs( |
|
("Table", catalog), |
|
("Time Series", plot), |
|
("Selection Criteria", selection_tab), |
|
("ML Model Walkthrough", ml_tab), |
|
active=1 |
|
) |
|
|
|
], |
|
header_background='#a93226' |
|
|
|
).servable() |