import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import cv2
from PIL import Image
# Load data and models
player_stats_df = pd.read_csv(r"cleaned.csv")
model = joblib.load(r"svc_face_classifier.pkl")
label_encoder = joblib.load(r"label_encoder.pkl")
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
# Streamlit Page Setup
st.set_page_config(page_title="Player Face Classifier & Comparison", layout="centered")
st.markdown("""
""", unsafe_allow_html=True)
st.title("🧠 Player Stats Comparison")
# App disclaimer warning
st.markdown("""
⚠️ This app uses a basic Machine Learning model to classify Indian cricket players based on facial images.
The prediction accuracy may not be high. In future, a more robust Deep Learning model will be integrated. Currently, the app supports only Indian players.
""", unsafe_allow_html=True)
# Image Preprocessing + Face Detection Function
def detect_and_predict_face(image_file):
image = Image.open(image_file).convert("RGB")
img_np = np.array(image)
gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
if len(faces) == 0:
return None, "No face detected!"
x, y, w, h = faces[0]
face = gray[y:y+h, x:x+w]
resized_face = cv2.resize(face, (64, 64))
flattened = resized_face.flatten().reshape(1, -1)
pred_label = model.predict(flattened)[0]
pred_name = label_encoder.inverse_transform([pred_label])[0]
return pred_name, None
# Upload images
col1, col2 = st.columns(2)
with col1:
img1 = st.file_uploader(":grey[Upload First Player Image]", type=["jpg", "png", "jpeg"], key="img1", label_visibility="visible")
with col2:
img2 = st.file_uploader(":grey[Upload Second Player Image]", type=["jpg", "png", "jpeg"], key="img2")
player_list = sorted(player_stats_df['Player'][player_stats_df['team_name']=='India'].unique())
col1, col2 = st.columns(2)
with col1:
manual_p1_name = st.selectbox("Select Player 1 (optional override)", options=[""] + player_list)
with col2:
manual_p2_name = st.selectbox("Select Player 2 (optional override)", options=[""] + player_list)
# Detect Player 1
if img1:
p1_name, err1 = detect_and_predict_face(img1)
if err1:
st.error(f"Image 1: {err1}")
else:
st.success(f"✅ Player 1 Detected: {p1_name}")
# Detect Player 2
if img2:
p2_name, err2 = detect_and_predict_face(img2)
if err2:
st.error(f"Image 2: {err2}")
else:
st.success(f"✅ Player 2 Detected: {p2_name}")
p2_name, err2 = detect_and_predict_face(img2)
try:
if not manual_p1_name and not p1_name:
st.warning("Please upload an image or type the name for Player 1.")
st.stop()
if not manual_p2_name and not p2_name:
st.warning("Please upload an image or type the name for Player 2.")
st.stop()
except:
pass
try:
p1_final = manual_p1_name if manual_p1_name else p1_name
p2_final = manual_p2_name if manual_p2_name else p2_name
except:
pass
try:
formats = ['Test', 'ODI', 'T20', 'IPL']
df = player_stats_df
players = [p1_final, p2_final]
if p1_final not in df['Player'].values or p2_final not in df['Player'].values:
st.error("One or both players not found in dataset.")
st.stop()
p1_data = df[df['Player'] == p1_final].iloc[0]
p2_data = df[df['Player'] == p2_final].iloc[0]
st.markdown("## 🌟 Player Highlights")
col1, col2 = st.columns(2)
with col1:
st.markdown(f"### {p1_final} Highlights")
st.metric("Total Runs", sum([p1_data.get(f'batting_Runs_{fmt}', 0) for fmt in formats]))
st.metric("Total Wickets", sum([p1_data.get(f'bowling_{fmt}_Wickets', 0) for fmt in formats]))
st.metric("Best Strike Rate", f"{max([p1_data.get(f'batting_SR_{fmt}', 0) for fmt in formats]):.2f}")
with col2:
st.markdown(f"### {p2_final} Highlights")
st.metric("Total Runs", sum([p2_data.get(f'batting_Runs_{fmt}', 0) for fmt in formats]))
st.metric("Total Wickets", sum([p2_data.get(f'bowling_{fmt}_Wickets', 0) for fmt in formats]))
st.metric("Best Strike Rate", f"{max([p2_data.get(f'batting_SR_{fmt}', 0) for fmt in formats]):.2f}")
st.markdown("## 📊 Player Comparison Dashboard")
tabs = st.tabs(["🏏 Batting", "🎯 Bowling", "⚡ Strike Rate", "🏆 Milestones", "📈 Radar View", "🧩 Matches Played"])
with tabs[0]:
st.markdown("### 🏏 Batting Career Summary")
for fmt in formats:
col1, col2 = st.columns(2)
col1.metric(f"{p1_final} Runs in {fmt}", p1_data.get(f'batting_Runs_{fmt}', 0))
col2.metric(f"{p2_final} Runs in {fmt}", p2_data.get(f'batting_Runs_{fmt}', 0))
with tabs[1]:
st.markdown("### 🎯 Bowling Career Summary")
for fmt in formats:
col1, col2 = st.columns(2)
col1.metric(f"{p1_final} Wickets in {fmt}", p1_data.get(f'bowling_{fmt}_Wickets', 0))
col2.metric(f"{p2_final} Wickets in {fmt}", p2_data.get(f'bowling_{fmt}_Wickets', 0))
with tabs[2]:
st.markdown("### ⚡ Strike Rate vs Runs")
sr_data = pd.DataFrame({
"Format": formats * 2,
"Player": [p1_final]*4 + [p2_final]*4,
"Runs": [p1_data.get(f'batting_Runs_{fmt}', 0) for fmt in formats] + [p2_data.get(f'batting_Runs_{fmt}', 0) for fmt in formats],
"SR": [p1_data.get(f'batting_SR_{fmt}', 0) for fmt in formats] + [p2_data.get(f'batting_SR_{fmt}', 0) for fmt in formats]
})
fig = px.scatter(sr_data, x="Runs", y="SR", color="Player", text="Format", title="Strike Rate vs Runs by Format")
st.plotly_chart(fig, use_container_width=True)
with tabs[3]:
st.markdown("### 🏆 Milestone Comparison")
for fmt in formats:
col1, col2 = st.columns(2)
col1.metric(f"{p1_final} 50s in {fmt}", p1_data.get(f"batting_50s_{fmt}", 0))
col2.metric(f"{p2_final} 50s in {fmt}", p2_data.get(f"batting_50s_{fmt}", 0))
col1.metric(f"{p1_final} 100s in {fmt}", p1_data.get(f"batting_100s_{fmt}", 0))
col2.metric(f"{p2_final} 100s in {fmt}", p2_data.get(f"batting_100s_{fmt}", 0))
col1.metric(f"{p1_final} 200s in {fmt}", p1_data.get(f"batting_200s_{fmt}", 0))
col2.metric(f"{p2_final} 200s in {fmt}", p2_data.get(f"batting_200s_{fmt}", 0))
with tabs[4]:
st.markdown("### 📈 Radar View: Batting Metrics")
def make_radar_data(player_data, metrics_prefix, label):
return {
'Runs': np.mean([player_data.get(f"{metrics_prefix}_Runs_{fmt}", 0) for fmt in formats]),
'50s': np.sum([player_data.get(f"{metrics_prefix}_50s_{fmt}", 0) for fmt in formats]),
'100s': np.sum([player_data.get(f"{metrics_prefix}_100s_{fmt}", 0) for fmt in formats]),
'SR': np.mean([player_data.get(f"{metrics_prefix}_SR_{fmt}", 0) for fmt in formats]),
'Avg': np.mean([player_data.get(f"{metrics_prefix}_Avg_{fmt}", 0) for fmt in formats]),
'Label': label
}
radar_df = pd.DataFrame([
make_radar_data(p1_data, 'batting', p1_final),
make_radar_data(p2_data, 'batting', p2_final)
])
fig = px.line_polar(radar_df.melt(id_vars=["Label"]), r="value", theta="variable", color="Label", line_close=True, title="Batting Performance Radar")
st.plotly_chart(fig, use_container_width=True)
with tabs[5]:
st.markdown("### 🧩 Matches Played by Format")
col1, col2 = st.columns(2)
for i, (data_player, col) in enumerate(zip([p1_data, p2_data], [col1, col2])):
match_data = {fmt: data_player.get(f"Matches_{fmt}", 0) for fmt in formats}
fig = px.pie(values=list(match_data.values()), names=list(match_data.keys()), title=f"{players[i]} Match Distribution")
col.plotly_chart(fig, use_container_width=True)
except:
pass