File size: 10,108 Bytes
073785a
 
 
 
be444d1
073785a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be444d1
 
073785a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import tempfile
import time
import logging
from PIL import Image as PILImage
from datasets import Dataset, Image, concatenate_datasets
from huggingface_hub import HfApi, login
import pandas as pd
import os
from datetime import datetime

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

CERTIFICATE_DATASET_NAME = "ysharma/gradio-agents-mcp-hackathon-certificates"

def safe_add_certificate_to_dataset(certificate_image, hf_username, max_retries=5, retry_delay=3):
    """
    Safely add new certificate to the image dataset with bulletproof error handling
    Handles both empty datasets and existing datasets
    
    Args:
        certificate_image: PIL Image object or image file path
        hf_username: HF username string
        max_retries: Maximum number of retry attempts
        retry_delay: Delay between retries in seconds
    
    Returns:
        tuple: (success: bool, message: str)
    """
    try:
        logger.info("Starting new certificate upload process")
        
        # Validate inputs
        if not hf_username or not hf_username.strip():
            return False, "❌ Error: HF username is required"
        
        if certificate_image is None:
            return False, "❌ Error: Certificate image is required"
        
        # Normalize username
        hf_username = hf_username.strip()
        
        logger.info(f"Processing certificate for user: {hf_username}")
        
        # Multi-attempt loading with different strategies
        existing_dataset = None
        load_successful = False
        is_empty_dataset = False
        
        for attempt in range(max_retries):
            logger.info(f"Loading attempt {attempt + 1}/{max_retries}")
            
            try:
                # Strategy 1: Load dataset directly
                from datasets import load_dataset
                existing_dataset = load_dataset(CERTIFICATE_DATASET_NAME, split="train")
                logger.info(f"Successfully loaded {len(existing_dataset)} existing certificates")
                load_successful = True
                break
                        
            except Exception as load_error:
                error_str = str(load_error).lower()
                logger.warning(f"Attempt {attempt + 1} failed: {str(load_error)[:100]}")
                
                # Check if it's an empty dataset error
                if "corresponds to no data" in error_str or "no data" in error_str:
                    logger.info("Dataset appears to be empty - will create first entry")
                    is_empty_dataset = True
                    load_successful = True
                    existing_dataset = None  # Will create new
                    break
                
                if attempt < max_retries - 1:
                    logger.info(f"Waiting {retry_delay} seconds before retry...")
                    time.sleep(retry_delay)
                    continue
        
        # Handle the case where we couldn't load and it's not an empty dataset
        if not load_successful:
            error_msg = "🚨 CRITICAL ERROR: Could not access dataset after multiple attempts."
            logger.error(error_msg)
            return False, (
                "❌ Certificate upload temporarily unavailable due to technical issues. "
                "Please try again in a few minutes. If the problem persists, contact support."
            )
        
        # Check for duplicates (only if we have existing data)
        if existing_dataset is not None:
            existing_labels = existing_dataset['label']
            if hf_username in existing_labels:
                logger.warning("Duplicate certificate attempt detected")
                return False, f"❌ Error: A certificate for username '{hf_username}' already exists."
        
        # Prepare the new certificate data
        with tempfile.TemporaryDirectory() as temp_dir:
            # Save the image to a temporary file
            if isinstance(certificate_image, PILImage.Image):
                # If it's already a PIL Image
                temp_image_path = os.path.join(temp_dir, f"certificate_{hf_username}_{int(time.time())}.png")
                certificate_image.save(temp_image_path, "PNG")
            elif isinstance(certificate_image, str) and os.path.exists(certificate_image):
                # If it's a file path
                temp_image_path = certificate_image
            else:
                return False, "❌ Error: Invalid image format provided"
            
            # Create new dataset entry
            new_data = {
                "image": [temp_image_path],
                "label": [hf_username]
            }
            
            new_dataset = Dataset.from_dict(new_data).cast_column("image", Image())
            logger.info("Created new certificate dataset entry")
            
            # Combine with existing dataset or use new dataset if empty
            if existing_dataset is not None and not is_empty_dataset:
                try:
                    #combined_dataset = existing_dataset.concatenate_datasets([new_dataset])
                    combined_dataset = concatenate_datasets([existing_dataset, new_dataset])
                    logger.info(f"Combined dataset now has {len(combined_dataset)} certificates (was {len(existing_dataset)})")
                except Exception as concat_error:
                    logger.error(f"Failed to combine datasets: {concat_error}")
                    return False, f"❌ Error combining datasets: {str(concat_error)}"
            else:
                # First certificate in empty dataset
                combined_dataset = new_dataset
                logger.info("Creating first certificate in empty dataset")
            
            # Create timestamped backup before upload (only if not first certificate)
            backup_timestamp = int(time.time())
            
            try:
                # Create backup first (only if we had existing data)
                if existing_dataset is not None and not is_empty_dataset:
                    backup_name = f"{CERTIFICATE_DATASET_NAME}-auto-backup-{backup_timestamp}"
                    logger.info(f"Creating backup: {backup_name}")
                    combined_dataset.push_to_hub(backup_name, private=True)
                
                logger.info("Pushing to main certificate dataset...")
                combined_dataset.push_to_hub(CERTIFICATE_DATASET_NAME, private=True)
                
                logger.info("βœ… Successfully saved new certificate")
                logger.info(f"Total certificates in dataset: {len(combined_dataset)}")
                
                # Quick verification
                time.sleep(2)
                try:
                    api = HfApi()
                    verify_files = api.list_repo_files(CERTIFICATE_DATASET_NAME, repo_type="dataset")
                    logger.info("βœ… Upload verification: Files updated successfully")
                except:
                    logger.warning("⚠️  Could not verify upload (this may be normal)")
                
                return True, f"βœ… Certificate successfully uploaded for {hf_username}!"
                
            except Exception as upload_error:
                error_msg = str(upload_error).lower()
                if any(indicator in error_msg for indicator in ['rate limit', '429', 'too many requests']):
                    logger.warning("🚨 Rate limit hit - certificate upload system temporarily busy")
                    return False, "⏳ Certificate upload temporarily unavailable due to high server load. Please try again in 10-15 minutes."
                else:
                    logger.error(f"Upload failed: {upload_error}")
                    return False, f"❌ Certificate upload failed: {str(upload_error)}"
        
    except Exception as e:
        logger.error(f"❌ Unexpected error in certificate upload: {e}")
        import traceback
        traceback.print_exc()
        return False, f"❌ Certificate upload failed: {str(e)}"

def upload_user_certificate(certificate_image, hf_username):
    """
    Main function to upload user's certificate - simplified interface for Space A
    
    Args:
        certificate_image: PIL Image object of the generated certificate
        hf_username: User's Hugging Face username
    
    Returns:
        tuple: (success: bool, message: str)
    """
    
    # Basic validation
    if not certificate_image:
        return False, "❌ No certificate image provided"
    
    if not hf_username or not hf_username.strip():
        return False, "❌ HF username is required"
    
    # Call the safe upload function
    success, message = safe_add_certificate_to_dataset(certificate_image, hf_username)
    
    return success, message

def check_certificate_dataset_health():
    """Check if the certificate dataset is accessible and healthy"""
    try:
        from datasets import load_dataset
        try:
            dataset = load_dataset(CERTIFICATE_DATASET_NAME, split="train")
            logger.info(f"βœ… Certificate dataset health check passed - found {len(dataset)} certificates")
            return True
        except Exception as e:
            error_str = str(e).lower()
            if "corresponds to no data" in error_str or "no data" in error_str:
                logger.info("βœ… Certificate dataset exists but is empty - ready for first upload")
                return True
            else:
                logger.error(f"❌ Certificate dataset health check failed: {e}")
                return False
    except Exception as e:
        logger.error(f"❌ Certificate dataset health check failed: {e}")
        return False

# Health check on import
logger.info("πŸš€ Certificate Upload Module Initialized")
logger.info(f"πŸ“Š Target Dataset: {CERTIFICATE_DATASET_NAME}")

if check_certificate_dataset_health():
    logger.info("βœ… Certificate dataset is healthy and ready")
else:
    logger.warning("⚠️  Certificate dataset health warnings detected")