Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 10,108 Bytes
073785a be444d1 073785a be444d1 073785a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
import tempfile
import time
import logging
from PIL import Image as PILImage
from datasets import Dataset, Image, concatenate_datasets
from huggingface_hub import HfApi, login
import pandas as pd
import os
from datetime import datetime
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
CERTIFICATE_DATASET_NAME = "ysharma/gradio-agents-mcp-hackathon-certificates"
def safe_add_certificate_to_dataset(certificate_image, hf_username, max_retries=5, retry_delay=3):
"""
Safely add new certificate to the image dataset with bulletproof error handling
Handles both empty datasets and existing datasets
Args:
certificate_image: PIL Image object or image file path
hf_username: HF username string
max_retries: Maximum number of retry attempts
retry_delay: Delay between retries in seconds
Returns:
tuple: (success: bool, message: str)
"""
try:
logger.info("Starting new certificate upload process")
# Validate inputs
if not hf_username or not hf_username.strip():
return False, "β Error: HF username is required"
if certificate_image is None:
return False, "β Error: Certificate image is required"
# Normalize username
hf_username = hf_username.strip()
logger.info(f"Processing certificate for user: {hf_username}")
# Multi-attempt loading with different strategies
existing_dataset = None
load_successful = False
is_empty_dataset = False
for attempt in range(max_retries):
logger.info(f"Loading attempt {attempt + 1}/{max_retries}")
try:
# Strategy 1: Load dataset directly
from datasets import load_dataset
existing_dataset = load_dataset(CERTIFICATE_DATASET_NAME, split="train")
logger.info(f"Successfully loaded {len(existing_dataset)} existing certificates")
load_successful = True
break
except Exception as load_error:
error_str = str(load_error).lower()
logger.warning(f"Attempt {attempt + 1} failed: {str(load_error)[:100]}")
# Check if it's an empty dataset error
if "corresponds to no data" in error_str or "no data" in error_str:
logger.info("Dataset appears to be empty - will create first entry")
is_empty_dataset = True
load_successful = True
existing_dataset = None # Will create new
break
if attempt < max_retries - 1:
logger.info(f"Waiting {retry_delay} seconds before retry...")
time.sleep(retry_delay)
continue
# Handle the case where we couldn't load and it's not an empty dataset
if not load_successful:
error_msg = "π¨ CRITICAL ERROR: Could not access dataset after multiple attempts."
logger.error(error_msg)
return False, (
"β Certificate upload temporarily unavailable due to technical issues. "
"Please try again in a few minutes. If the problem persists, contact support."
)
# Check for duplicates (only if we have existing data)
if existing_dataset is not None:
existing_labels = existing_dataset['label']
if hf_username in existing_labels:
logger.warning("Duplicate certificate attempt detected")
return False, f"β Error: A certificate for username '{hf_username}' already exists."
# Prepare the new certificate data
with tempfile.TemporaryDirectory() as temp_dir:
# Save the image to a temporary file
if isinstance(certificate_image, PILImage.Image):
# If it's already a PIL Image
temp_image_path = os.path.join(temp_dir, f"certificate_{hf_username}_{int(time.time())}.png")
certificate_image.save(temp_image_path, "PNG")
elif isinstance(certificate_image, str) and os.path.exists(certificate_image):
# If it's a file path
temp_image_path = certificate_image
else:
return False, "β Error: Invalid image format provided"
# Create new dataset entry
new_data = {
"image": [temp_image_path],
"label": [hf_username]
}
new_dataset = Dataset.from_dict(new_data).cast_column("image", Image())
logger.info("Created new certificate dataset entry")
# Combine with existing dataset or use new dataset if empty
if existing_dataset is not None and not is_empty_dataset:
try:
#combined_dataset = existing_dataset.concatenate_datasets([new_dataset])
combined_dataset = concatenate_datasets([existing_dataset, new_dataset])
logger.info(f"Combined dataset now has {len(combined_dataset)} certificates (was {len(existing_dataset)})")
except Exception as concat_error:
logger.error(f"Failed to combine datasets: {concat_error}")
return False, f"β Error combining datasets: {str(concat_error)}"
else:
# First certificate in empty dataset
combined_dataset = new_dataset
logger.info("Creating first certificate in empty dataset")
# Create timestamped backup before upload (only if not first certificate)
backup_timestamp = int(time.time())
try:
# Create backup first (only if we had existing data)
if existing_dataset is not None and not is_empty_dataset:
backup_name = f"{CERTIFICATE_DATASET_NAME}-auto-backup-{backup_timestamp}"
logger.info(f"Creating backup: {backup_name}")
combined_dataset.push_to_hub(backup_name, private=True)
logger.info("Pushing to main certificate dataset...")
combined_dataset.push_to_hub(CERTIFICATE_DATASET_NAME, private=True)
logger.info("β
Successfully saved new certificate")
logger.info(f"Total certificates in dataset: {len(combined_dataset)}")
# Quick verification
time.sleep(2)
try:
api = HfApi()
verify_files = api.list_repo_files(CERTIFICATE_DATASET_NAME, repo_type="dataset")
logger.info("β
Upload verification: Files updated successfully")
except:
logger.warning("β οΈ Could not verify upload (this may be normal)")
return True, f"β
Certificate successfully uploaded for {hf_username}!"
except Exception as upload_error:
error_msg = str(upload_error).lower()
if any(indicator in error_msg for indicator in ['rate limit', '429', 'too many requests']):
logger.warning("π¨ Rate limit hit - certificate upload system temporarily busy")
return False, "β³ Certificate upload temporarily unavailable due to high server load. Please try again in 10-15 minutes."
else:
logger.error(f"Upload failed: {upload_error}")
return False, f"β Certificate upload failed: {str(upload_error)}"
except Exception as e:
logger.error(f"β Unexpected error in certificate upload: {e}")
import traceback
traceback.print_exc()
return False, f"β Certificate upload failed: {str(e)}"
def upload_user_certificate(certificate_image, hf_username):
"""
Main function to upload user's certificate - simplified interface for Space A
Args:
certificate_image: PIL Image object of the generated certificate
hf_username: User's Hugging Face username
Returns:
tuple: (success: bool, message: str)
"""
# Basic validation
if not certificate_image:
return False, "β No certificate image provided"
if not hf_username or not hf_username.strip():
return False, "β HF username is required"
# Call the safe upload function
success, message = safe_add_certificate_to_dataset(certificate_image, hf_username)
return success, message
def check_certificate_dataset_health():
"""Check if the certificate dataset is accessible and healthy"""
try:
from datasets import load_dataset
try:
dataset = load_dataset(CERTIFICATE_DATASET_NAME, split="train")
logger.info(f"β
Certificate dataset health check passed - found {len(dataset)} certificates")
return True
except Exception as e:
error_str = str(e).lower()
if "corresponds to no data" in error_str or "no data" in error_str:
logger.info("β
Certificate dataset exists but is empty - ready for first upload")
return True
else:
logger.error(f"β Certificate dataset health check failed: {e}")
return False
except Exception as e:
logger.error(f"β Certificate dataset health check failed: {e}")
return False
# Health check on import
logger.info("π Certificate Upload Module Initialized")
logger.info(f"π Target Dataset: {CERTIFICATE_DATASET_NAME}")
if check_certificate_dataset_health():
logger.info("β
Certificate dataset is healthy and ready")
else:
logger.warning("β οΈ Certificate dataset health warnings detected") |