Spaces:
Sleeping
Sleeping
import os | |
os.environ["STREAMLIT_WATCHER_TYPE"] = "none" | |
os.environ["STREAMLIT_WATCH_DISABLE"] = "true" | |
import streamlit as st | |
from PIL import Image | |
import torch | |
import torchvision.transforms as transforms | |
import pandas as pd | |
from utils.preprocessing import get_transforms | |
from models.resnet_model import ResNet18 | |
# Class names in order | |
class_names = [ | |
'calling', 'clapping', 'cycling', 'dancing', 'drinking', 'eating', 'fighting', | |
'hugging', 'laughing', 'listening_to_music', 'running', 'sitting', 'sleeping', | |
'texting', 'using_laptop' | |
] | |
def load_model(): | |
if not os.path.exists("models/best_model.pth"): | |
st.error("Model weights not found. Please ensure 'models/best_model.pth' exists.") | |
st.stop() | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model = ResNet18(num_classes=15) | |
model.load_state_dict(torch.load("models/best_model.pth", map_location=device)) | |
model.to(device) | |
model.eval() | |
return model, device | |
def predict(image, model, device): | |
transform = get_transforms() | |
image_t = transform(image).unsqueeze(0).to(device) | |
with torch.no_grad(): | |
outputs = model(image_t) | |
probs = torch.softmax(outputs, dim=1) | |
conf, predicted = torch.max(probs, dim=1) | |
return class_names[predicted.item()], float(conf.item()) # type: ignore | |
def main(): | |
st.title("Human Action Recognition App") | |
tab1, tab2, tab3 = st.tabs(["About", "Predict", "Metrics & Test Predictions"]) | |
with tab1: | |
st.header("About This App") | |
st.markdown(""" | |
### 🧠 Human Action Recognition (HAR) | |
This application classifies **human actions** from static images using a deep learning model trained on a curated dataset of 15 different activities. | |
#### 🔍 Purpose | |
To demonstrate how computer vision and deep learning can be used to **recognize and classify human behaviors** in images — useful for applications such as surveillance, activity monitoring, and human-computer interaction. | |
#### 🧰 Model | |
- **Architecture:** ResNet18 (Residual Neural Network with 18 layers) | |
- **Pretrained:** On ImageNet for general features | |
- **Fine-tuned:** On a specialized Human Action Recognition dataset for task-specific learning | |
#### 📚 Dataset | |
- **Source:** [Bingsu/Human_Action_Recognition](https://huggingface.co/datasets/Bingsu/Human_Action_Recognition) | |
- **Categories:** 15 action classes | |
- `calling`, `clapping`, `cycling`, `dancing`, `drinking`, `eating`, `fighting`, | |
`hugging`, `laughing`, `listening_to_music`, `running`, `sitting`, | |
`sleeping`, `texting`, `using_laptop` | |
--- | |
🖼️ Simply upload an image, and the model will analyze and classify the dominant action being performed. | |
""") | |
with tab2: | |
st.header("Predict Human Action from Image") | |
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) | |
if uploaded_file is not None: | |
try: | |
image = Image.open(uploaded_file).convert("RGB") | |
except Exception: | |
st.error("Error loading image. Please upload a valid JPG or PNG file.") | |
return | |
st.image(image, caption="Uploaded Image", use_container_width=True) | |
model, device = load_model() | |
prediction, confidence = predict(image, model, device) | |
pred_label = prediction.replace('_', ' ').title() | |
st.success(f"Predicted Action: **{pred_label}**") | |
st.info(f"Confidence: {confidence*100:.2f}%") | |
# Show transformed input | |
transform = get_transforms() | |
transformed_image = transform(image) | |
st.image(transforms.ToPILImage()(transformed_image), caption="Transformed Input (for model)", use_container_width=True) | |
# Detailed explanation based on predicted output | |
st.markdown(f""" | |
### About the Transformed Input for **{pred_label}** | |
Before the model makes its prediction, the uploaded image undergoes several preprocessing steps to prepare it for analysis: | |
- **Resizing and cropping:** The image is resized and cropped to a consistent size (usually 224x224 pixels) so that the model receives uniform input dimensions. | |
- **Normalization:** Pixel color values are scaled based on mean and standard deviation values (typically from ImageNet dataset statistics). This helps the model generalize better by standardizing the input distribution. | |
- **Conversion to Tensor:** The image is converted from a PIL image to a PyTorch tensor, which is the required input format for the model. | |
This processed image is exactly what the model "sees" when it predicts the action **{pred_label}**. Understanding this helps ensure the model's input is consistent and reliable. | |
""") | |
with tab3: | |
st.header("Training & Validation Metrics") | |
st.markdown(""" | |
**Training Accuracy (96.5%)** | |
During training, the model correctly identified human actions in 96.5% of the images. This indicates it has effectively learned the patterns and features present in the training data. | |
**Validation Accuracy (96.6%)** | |
When evaluated on new, unseen images, the model correctly classified 96.6% of them. This demonstrates its ability to generalize knowledge beyond simply memorizing the training examples. | |
**Training Loss (0.12)** | |
The average prediction error during training was low (0.12), meaning the model’s guesses are generally close to the true labels. | |
**Validation Loss (0.10)** | |
On unseen data, the prediction error was even lower (0.10), suggesting the model is not overfitting but genuinely learning to understand the task. | |
""") | |
st.markdown("---") | |
st.header("Test Set Predictions Preview") | |
st.markdown(""" | |
The table below presents a sample of the model’s predictions on the test dataset, which consists of images the model has not encountered during training. The columns typically include: | |
- **Filename:** The name of the test image file | |
- **Predicted Label:** The human action predicted by the model | |
- **Confidence:** The model’s confidence score for each prediction | |
Reviewing this information aids in evaluating the model’s real-world performance and helps identify potential failure cases. | |
""") | |
csv_path = "test_predictions.csv" | |
if os.path.exists(csv_path): | |
df = pd.read_csv(csv_path) | |
st.dataframe(df.head(20)) # show first 20 rows | |
st.success(f"Loaded {len(df)} test predictions.") | |
else: | |
st.warning(f"Test predictions CSV file not found at: {csv_path}") | |
if __name__ == "__main__": | |
main() | |