Spaces:
Sleeping
Sleeping
File size: 36,184 Bytes
b8cd78a a5debd6 5c88d83 b8cd78a 86532db b8cd78a d7dbf7e b8cd78a 6e9e3e0 b8cd78a a754003 eda4143 5c88d83 b8cd78a c68b85c cf8e6a2 5c88d83 b8cd78a a5debd6 5fa04c7 c68b85c 5c88d83 5fa04c7 c68b85c 5fa04c7 c68b85c 5fa04c7 a5debd6 5fa04c7 a5debd6 86532db d841385 5c88d83 d841385 a5debd6 5c88d83 a5debd6 5c88d83 a5debd6 5c88d83 a5debd6 7d8defe 86532db a754003 7d8defe 1c9e2cf 7d8defe 86532db 7d8defe 86532db 7d8defe a754003 7d8defe 86532db 7d8defe d841385 86532db 7d8defe 86532db a754003 86532db a754003 d841385 86532db 7d8defe 9b0e41a 7d8defe 9b0e41a 7d8defe 9b0e41a 7d8defe 9b0e41a 7d8defe 9b0e41a 7d8defe a5debd6 9534306 e1b81f0 9534306 a5debd6 6e9e3e0 a5debd6 86532db a02cc5c e1b81f0 a02cc5c 9b0e41a a02cc5c e1b81f0 a02cc5c a5debd6 5fa04c7 d7dbf7e 6e9e3e0 d7dbf7e 6e9e3e0 d7dbf7e a84214d 6e9e3e0 86532db 1c9e2cf 6e9e3e0 a84214d 5d18e68 4c6302b 5d18e68 4c6302b 5d18e68 4c6302b 5d18e68 4c6302b 5d18e68 4c6302b 5d18e68 a84214d 9534306 4c6302b 9534306 4c6302b 9534306 4c6302b 9534306 4c6302b 9534306 4c6302b 9534306 5d18e68 4c6302b 5d18e68 4c6302b 5d18e68 4c6302b 5d18e68 4c6302b 5d18e68 4c6302b 5d18e68 4c6302b 5d18e68 d841385 4c6302b d841385 4c6302b d841385 4c6302b d841385 4c6302b d841385 4c6302b d841385 4c6302b d841385 5d18e68 5fa04c7 5d18e68 5c88d83 5d18e68 5c88d83 86532db 5c88d83 86532db 5d18e68 5c88d83 5fa04c7 86532db 5fa04c7 86532db 5fa04c7 86532db a5debd6 86532db a5debd6 86532db a5debd6 5fa04c7 86532db 5fa04c7 86532db 5fa04c7 86532db 5fa04c7 86532db 5fa04c7 86532db a5debd6 86532db 5fa04c7 86532db 151f468 86532db b8cd78a 86532db b8cd78a 151f468 86532db b8cd78a 86532db b8cd78a 86532db a84214d 86532db b8cd78a 86532db b8cd78a a02cc5c 5d18e68 9b0e41a 5d18e68 b8cd78a 86532db d7dbf7e 86532db d7dbf7e a84214d 86532db d7dbf7e a5debd6 86532db d7dbf7e 6583266 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 |
import os
import io
import time
import gc
import pickle
import tempfile
import logging
from typing import Optional
import asyncio
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from PIL import Image
import torch
from transformers import AutoProcessor, AutoModelForImageTextToText
from huggingface_hub import HfFolder, snapshot_download
# Ensure HF cache is writable and not using /data
import os as _os_env
_os_env.environ.setdefault("HF_HOME", "/tmp/hf_home")
_os_env.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/hf_home")
# Avoid deprecated TRANSFORMERS_CACHE which may point to /data
if "TRANSFORMERS_CACHE" in _os_env.environ:
del _os_env.environ["TRANSFORMERS_CACHE"]
_os_env.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "0")
# Configuration du logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
APP_START_TS = time.time()
# Configuration du modèle
MODEL_ID = os.environ.get("MODEL_ID", "google/gemma-3n-E4B-it") # Fixed model name
DEVICE_MAP = os.environ.get("DEVICE_MAP", "cpu") # Force CPU pour Hugging Face Spaces
MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "256"))
# Fichier de cache pour partager le modèle entre Streamlit et FastAPI
MODEL_CACHE_FILE = os.path.join(tempfile.gettempdir(), "agrilens_model_cache.pkl")
def _get_dtype() -> torch.dtype:
"""Choix optimal du dtype selon le hardware."""
# Force float32 pour Hugging Face Spaces (CPU)
return torch.float32
def _build_prompt(culture: Optional[str], notes: Optional[str]) -> str:
"""Création du prompt d'analyse."""
base = (
"You are an agronomy assistant. Analyze the provided plant leaf image and identify the most likely disease. "
"Return a concise diagnosis in French with: disease name, short explanation of symptoms, "
"and 3 actionable treatment recommendations."
)
if culture:
base += f"\nCulture: {culture}"
if notes:
base += f"\nNotes: {notes}"
return base
class SharedModelManager:
"""Gestionnaire de modèle partagé entre Streamlit et FastAPI"""
def __init__(self):
self.model = None
self.processor = None
self.device_map = DEVICE_MAP
self.dtype = _get_dtype()
self._load_attempted = False
self._loading = False
self._load_error = None
self._last_load_attempt = 0
self._load_timeout = 300 # 5 minutes timeout
logger.info(f"Initializing ModelManager with device_map={self.device_map}, dtype={self.dtype}")
# Try to recover from previous state
self._recover_state()
def _recover_state(self):
"""Try to recover model state from disk"""
try:
state_file = "/tmp/model_state.json"
if os.path.exists(state_file):
import json
with open(state_file, 'r') as f:
state = json.load(f)
# Check if the state is recent (less than 1 hour old)
if time.time() - state.get('timestamp', 0) < 3600:
logger.info("État précédent trouvé, tentative de récupération...")
# Note: We can't actually reload the model objects, but we can mark as attempted
self._load_attempted = True
self._last_load_attempt = state.get('timestamp', 0)
except Exception as e:
logger.warning(f"Impossible de récupérer l'état: {e}")
def _save_state(self):
"""Save current state to disk"""
try:
state_file = "/tmp/model_state.json"
import json
state = {
'timestamp': time.time(),
'model_loaded': self.model is not None,
'processor_loaded': self.processor is not None,
'load_attempted': self._load_attempted,
'loading': self._loading,
'error': self._load_error
}
with open(state_file, 'w') as f:
json.dump(state, f)
except Exception as e:
logger.warning(f"Impossible de sauvegarder l'état: {e}")
def check_streamlit_model_cache(self):
"""Vérifie si le modèle est disponible dans le cache Streamlit via un fichier"""
try:
# Vérifier si le fichier de cache existe et est récent (moins de 1 heure)
if os.path.exists(MODEL_CACHE_FILE):
file_age = time.time() - os.path.getmtime(MODEL_CACHE_FILE)
if file_age < 3600: # 1 heure
# Lire les informations du cache
try:
with open(MODEL_CACHE_FILE, 'rb') as f:
cache_data = pickle.load(f)
logger.info(f"Cache Streamlit trouvé: {cache_data}")
return True
except Exception as e:
logger.error(f"Erreur lors de la lecture du cache: {e}")
return False
except Exception as e:
logger.error(f"Erreur lors de la vérification du cache: {e}")
return False
def load_model_directly(self):
"""Robust model loading that tries multiple approaches to avoid permission issues"""
try:
import gc
self._loading = True
self._load_attempted = True
self._last_load_attempt = time.time()
self._load_error = None
# Try different approaches in order of preference
approaches = [
("Direct HF Hub loading", self._try_direct_loading),
("Cache in /app/cache", self._try_app_cache),
("Cache in /tmp/hf_home", self._try_tmp_cache),
("Cache in /tmp/model_repo", self._try_tmp_repo),
]
for approach_name, approach_func in approaches:
try:
logger.info(f"Tentative: {approach_name}")
success = approach_func()
if success:
self._loading = False
self._save_state()
logger.info(f"✅ Succès avec {approach_name}")
return True
except Exception as e:
logger.warning(f"❌ Échec de {approach_name}: {e}")
continue
# If all approaches failed
self._loading = False
self._load_error = "Toutes les approches de chargement ont échoué"
self._save_state()
return False
except Exception as e:
logger.error(f"Erreur critique chargement: {e}")
self._loading = False
self._load_error = str(e)
self._save_state()
return False
def _try_direct_loading(self):
"""Try to load directly from Hugging Face Hub without using /data by forcing cache_dir"""
try:
logger.info("Chargement direct depuis HF Hub...")
writable_cache = os.environ.get("HF_HOME", "/home/user/.cache/huggingface")
os.makedirs(writable_cache, exist_ok=True)
# Load processor directly with explicit cache_dir
self.processor = AutoProcessor.from_pretrained(
MODEL_ID,
trust_remote_code=True,
cache_dir=writable_cache,
local_files_only=False,
)
logger.info("Processor chargé directement")
# Load model directly with explicit cache_dir
self.model = AutoModelForImageTextToText.from_pretrained(
MODEL_ID,
trust_remote_code=True,
cache_dir=writable_cache,
local_files_only=False,
low_cpu_mem_usage=True,
device_map=self.device_map,
torch_dtype=self.dtype,
)
if self.device_map == "cpu":
self.model = self.model.to("cpu")
logger.info("Modèle chargé directement depuis HF Hub")
return True
except Exception as e:
logger.error(f"Échec chargement direct: {e}")
return False
def _try_app_cache(self):
"""Try to cache in /app/cache directory"""
try:
from huggingface_hub import snapshot_download
cache_dir = "/app/cache/huggingface"
os.makedirs(cache_dir, exist_ok=True)
logger.info(f"Snapshot vers {cache_dir}")
snapshot_download(
repo_id=MODEL_ID,
local_dir=cache_dir,
local_dir_use_symlinks=False,
resume_download=True,
token=os.environ.get("HF_TOKEN", None),
)
# Load from cache
self.processor = AutoProcessor.from_pretrained(
cache_dir,
trust_remote_code=True,
local_files_only=True,
)
logger.info("Processor chargé depuis /app/cache")
self.model = AutoModelForImageTextToText.from_pretrained(
cache_dir,
trust_remote_code=True,
local_files_only=True,
low_cpu_mem_usage=True,
device_map=self.device_map,
torch_dtype=self.dtype,
)
if self.device_map == "cpu":
self.model = self.model.to("cpu")
logger.info("Modèle chargé depuis /app/cache")
return True
except Exception as e:
logger.error(f"Échec cache /app: {e}")
return False
def _try_tmp_cache(self):
"""Try to cache in /tmp/hf_home directory"""
try:
from huggingface_hub import snapshot_download
cache_dir = "/tmp/hf_home"
os.makedirs(cache_dir, exist_ok=True)
logger.info(f"Snapshot vers {cache_dir}")
snapshot_download(
repo_id=MODEL_ID,
local_dir=cache_dir,
local_dir_use_symlinks=False,
resume_download=True,
token=os.environ.get("HF_TOKEN", None),
)
# Load from cache
self.processor = AutoProcessor.from_pretrained(
cache_dir,
trust_remote_code=True,
local_files_only=True,
)
logger.info("Processor chargé depuis /tmp/hf_home")
self.model = AutoModelForImageTextToText.from_pretrained(
cache_dir,
trust_remote_code=True,
local_files_only=True,
low_cpu_mem_usage=True,
device_map=self.device_map,
torch_dtype=self.dtype,
)
if self.device_map == "cpu":
self.model = self.model.to("cpu")
logger.info("Modèle chargé depuis /tmp/hf_home")
return True
except Exception as e:
logger.error(f"Échec cache /tmp/hf_home: {e}")
return False
def _try_tmp_repo(self):
"""Try to cache in /tmp/model_repo directory (original approach)"""
try:
from huggingface_hub import snapshot_download
repo_dir = "/tmp/model_repo"
offload_dir = "/tmp/model_offload"
os.makedirs(repo_dir, exist_ok=True)
os.makedirs(offload_dir, exist_ok=True)
logger.info(f"Snapshot vers {repo_dir}")
snapshot_download(
repo_id=MODEL_ID,
local_dir=repo_dir,
local_dir_use_symlinks=False,
resume_download=True,
token=os.environ.get("HF_TOKEN", None),
)
# Load from cache
self.processor = AutoProcessor.from_pretrained(
repo_dir,
trust_remote_code=True,
local_files_only=True,
)
logger.info("Processor chargé depuis /tmp/model_repo")
self.model = AutoModelForImageTextToText.from_pretrained(
repo_dir,
trust_remote_code=True,
local_files_only=True,
low_cpu_mem_usage=True,
device_map=self.device_map,
torch_dtype=self.dtype,
offload_folder=offload_dir,
max_memory={0: "8GB", "cpu": "8GB"} if self.device_map == "cpu" else None,
)
if self.device_map == "cpu":
self.model = self.model.to("cpu")
logger.info("Modèle chargé depuis /tmp/model_repo")
return True
except Exception as e:
logger.error(f"Échec cache /tmp/model_repo: {e}")
return False
def load_model_with_retry(self, max_retries=5, delay=60):
"""Charge le modèle avec retry automatique en cas d'échec"""
for attempt in range(max_retries):
try:
logger.info(f"Tentative de chargement {attempt + 1}/{max_retries}")
success = self.load_model_directly()
if success:
return True
else:
logger.warning(f"Échec tentative {attempt + 1}, attente {delay}s...")
if attempt < max_retries - 1:
time.sleep(delay)
except Exception as e:
logger.error(f"Erreur tentative {attempt + 1}: {e}")
if attempt < max_retries - 1:
time.sleep(delay)
logger.error(f"Toutes les {max_retries} tentatives ont échoué")
return False
def ensure_model_loaded(self):
"""S'assure que le modèle est chargé"""
if self.model is not None and self.processor is not None:
return True
if not self._load_attempted:
self._load_attempted = True
# Charge directement le modèle (lancé à la demande)
return self.load_model_directly()
return False
def get_load_status(self):
"""Retourne le statut de chargement"""
return {
"loaded": self.model is not None and self.processor is not None,
"loading": self._loading,
"error": self._load_error,
"attempted": self._load_attempted
}
def _complete_partial_load(self):
"""Complete a partial model load (when processor is loaded but model is not)"""
try:
logger.info("Tentative de complétion du chargement partiel...")
if self.processor and not self.model:
logger.info("Processor disponible, chargement du modèle seulement...")
# Try to load just the model using the existing processor
try:
# Use the processor's config to load the model
model_config = self.processor.config
model_path = model_config._name_or_path
logger.info(f"Chargement du modèle depuis {model_path}")
self.model = AutoModelForImageTextToText.from_pretrained(
model_path,
trust_remote_code=True,
low_cpu_mem_usage=True,
device_map=self.device_map,
torch_dtype=self.dtype,
offload_folder="/tmp/model_offload",
max_memory={0: "8GB", "cpu": "8GB"} if self.device_map == "cpu" else None
)
if self.device_map == "cpu":
self.model = self.model.to("cpu")
logger.info("Modèle complété avec succès!")
self._loading = False
self._save_state()
return True
except Exception as e:
logger.error(f"Échec de la complétion: {e}")
# Fall back to full reload
return self.load_model_directly()
else:
logger.info("Pas de chargement partiel à compléter")
return False
except Exception as e:
logger.error(f"Erreur lors de la complétion: {e}")
return False
# Instance globale du gestionnaire de modèle
model_manager = SharedModelManager()
app = FastAPI(title="AgriLens AI FastAPI", version="1.0.0")
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Warmup non bloquant au démarrage - use a more robust approach
@app.on_event("startup")
async def _warmup_background():
"""Démarrage du chargement en arrière-plan sans bloquer le serveur"""
logger.info("Démarrage du chargement du modèle en arrière-plan...")
# Use a more robust approach that won't be cancelled
try:
# Run in thread but don't await it to avoid cancellation
import threading
thread = threading.Thread(target=model_manager.load_model_directly, daemon=True)
thread.start()
logger.info("Thread de chargement démarré")
except Exception as e:
logger.error(f"Erreur lors du démarrage du thread: {e}")
# Alternative: also try to load on first request if not already loaded
@app.middleware("http")
async def ensure_model_loaded_middleware(request, call_next):
"""Middleware pour s'assurer que le modèle est chargé avec récupération automatique"""
try:
current_time = time.time()
# Check for partial loads and trigger automatic recovery (with rate limiting)
if (model_manager.processor and not model_manager.model and
not model_manager._loading and
not hasattr(model_manager, '_middleware_recovery_triggered')):
logger.info("🔧 Récupération automatique déclenchée via middleware")
model_manager._middleware_recovery_triggered = current_time
# Start recovery in background
import threading
thread = threading.Thread(target=model_manager._complete_partial_load, daemon=True)
thread.start()
# Check if model needs loading (with rate limiting)
elif (not model_manager.model and not model_manager._loading and
not hasattr(model_manager, '_middleware_load_triggered')):
logger.info("Modèle non chargé, tentative de chargement...")
model_manager._middleware_load_triggered = current_time
# Start loading in background
import threading
thread = threading.Thread(target=model_manager.load_model_directly, daemon=True)
thread.start()
# Clean up old triggers (older than 5 minutes)
if hasattr(model_manager, '_middleware_recovery_triggered'):
if current_time - model_manager._middleware_recovery_triggered > 300:
delattr(model_manager, '_middleware_recovery_triggered')
if hasattr(model_manager, '_middleware_load_triggered'):
if current_time - model_manager._middleware_load_triggered > 300:
delattr(model_manager, '_middleware_load_triggered')
except Exception as e:
logger.error(f"Erreur dans le middleware: {e}")
response = await call_next(request)
return response
# Add a background task that keeps trying to load the model
@app.on_event("startup")
async def _persistent_model_loader():
"""Persistent model loader that keeps trying until success"""
import asyncio
import threading
def _load_loop():
"""Infinite loop to keep trying to load the model"""
max_attempts = 5 # Maximum attempts before giving up
attempt_count = 0
last_attempt_time = 0
cooldown = 60 # Wait 60s between attempts
while attempt_count < max_attempts:
try:
current_time = time.time()
# Check if we should attempt loading
if (not model_manager.model and
not model_manager._loading and
current_time - last_attempt_time > cooldown):
logger.info(f"Persistent loader: tentative {attempt_count + 1}/{max_attempts}...")
last_attempt_time = current_time
attempt_count += 1
success = model_manager.load_model_directly()
if success:
logger.info("Persistent loader: modèle chargé avec succès!")
break
else:
logger.warning(f"Persistent loader: échec {attempt_count}/{max_attempts}, nouvelle tentative dans {cooldown}s...")
time.sleep(cooldown)
else:
# Model is loading or loaded, wait a bit
time.sleep(10)
except Exception as e:
logger.error(f"Persistent loader: erreur: {e}")
attempt_count += 1
time.sleep(cooldown)
if attempt_count >= max_attempts:
logger.warning("Persistent loader: nombre maximum de tentatives atteint, arrêt")
else:
logger.info("Persistent loader: terminé avec succès")
# Start the persistent loader in a daemon thread
thread = threading.Thread(target=_load_loop, daemon=True)
thread.start()
logger.info("Persistent model loader démarré")
# Add automated recovery system
@app.on_event("startup")
async def _automated_recovery():
"""Automated recovery system that detects and fixes partial loads"""
import threading
import time
def _recovery_loop():
"""Continuous monitoring and recovery loop"""
last_recovery_attempt = 0
recovery_cooldown = 60 # Wait 60s between recovery attempts
while True:
try:
current_time = time.time()
# Check for partial loads (processor loaded but model not)
if (model_manager.processor and not model_manager.model and
not model_manager._loading and
current_time - last_recovery_attempt > recovery_cooldown):
logger.info("🔧 Récupération automatique détectée: processor chargé mais modèle manquant")
logger.info("🚀 Lancement automatique de la récupération...")
last_recovery_attempt = current_time
# Try to complete the partial load
success = model_manager._complete_partial_load()
if success:
logger.info("✅ Récupération automatique réussie!")
break # Exit the loop if successful
else:
logger.warning("⚠️ Récupération automatique échouée, nouvelle tentative dans 60s...")
# Check for stuck loading states
elif (model_manager._loading and
current_time - model_manager._last_load_attempt > 300): # 5 minutes timeout
logger.warning("⏰ Timeout détecté, reset de l'état de chargement...")
model_manager._loading = False
model_manager._load_error = "Timeout - chargement bloqué"
model_manager._save_state()
# Wait before next check
time.sleep(15) # Check every 15 seconds
except Exception as e:
logger.error(f"Erreur dans la boucle de récupération: {e}")
time.sleep(30)
# Start the automated recovery in a daemon thread
thread = threading.Thread(target=_recovery_loop, daemon=True)
thread.start()
logger.info("🔧 Système de récupération automatique démarré")
# Add a more robust startup approach using a separate process
@app.on_event("startup")
async def _robust_startup():
"""Robust startup using a separate process to avoid CancelledError"""
import multiprocessing
import time
# Only start if not already loading
if model_manager._loading:
logger.info("Démarrage robuste: chargement déjà en cours, skip")
return
try:
logger.info("Démarrage du chargement du modèle en arrière-plan...")
# Set a flag to prevent multiple processes
if hasattr(model_manager, '_startup_process_running'):
logger.info("Processus de démarrage déjà en cours, skip")
return
model_manager._startup_process_running = True
def _startup_load():
"""Load model in separate process"""
try:
# Set environment for this process
os.environ['HF_HOME'] = '/tmp/hf_home'
os.environ['TRANSFORMERS_CACHE'] = '/tmp/hf_home/transformers'
logger.info("Processus de chargement démarré")
success = model_manager.load_model_directly()
if success:
logger.info("Processus: chargement réussi")
else:
logger.warning("Processus: échec du chargement")
except Exception as e:
logger.error(f"Processus: erreur: {e}")
finally:
# Clean up
if hasattr(model_manager, '_startup_process_running'):
delattr(model_manager, '_startup_process_running')
# Start the process
process = multiprocessing.Process(target=_startup_load, daemon=True)
process.start()
logger.info(f"Processus de chargement du modèle démarré (PID: {process.pid})")
# Wait a bit for the process to start
time.sleep(2)
# Check if process is still alive
if not process.is_alive():
logger.warning("Processus de démarrage s'est terminé prématurément")
if hasattr(model_manager, '_startup_process_running'):
delattr(model_manager, '_startup_process_running')
except Exception as e:
logger.error(f"Erreur lors du démarrage du processus: {e}")
if hasattr(model_manager, '_startup_process_running'):
delattr(model_manager, '_startup_process_running')
# Add health monitoring with automatic recovery
@app.get("/health")
def health():
"""Vérifie l'état de l'application et du modèle avec récupération automatique."""
try:
# Check for partial loads and trigger automatic recovery
if model_manager.processor and not model_manager.model and not model_manager._loading:
logger.info("🔧 Récupération automatique déclenchée via /health")
# Start recovery in background
import threading
thread = threading.Thread(target=model_manager._complete_partial_load, daemon=True)
thread.start()
model_loaded = model_manager.ensure_model_loaded()
streamlit_cache_available = model_manager.check_streamlit_model_cache()
load_status = model_manager.get_load_status()
return {
"status": "ok" if model_loaded else "cold",
"uptime_s": int(time.time() - APP_START_TS),
"cuda": torch.cuda.is_available(),
"device_map": model_manager.device_map,
"dtype": str(model_manager.dtype),
"model_id": MODEL_ID,
"streamlit_cache_available": streamlit_cache_available,
"model_loaded": model_loaded,
"load_status": load_status,
"auto_recovery": "active",
}
except Exception as e:
logger.error(f"Erreur dans health check: {e}")
return {
"status": "error",
"error": str(e),
"uptime_s": int(time.time() - APP_START_TS),
}
@app.get("/load")
def load():
"""Force le chargement du modèle."""
try:
success = model_manager.load_model_directly()
load_status = model_manager.get_load_status()
if success:
return {"status": "success", "message": "Modèle chargé avec succès", "load_status": load_status}
else:
return {
"status": "error",
"message": "Échec du chargement du modèle",
"load_status": load_status,
"error": model_manager._load_error
}
except Exception as e:
logger.error(f"Erreur lors du chargement forcé: {e}")
return {"status": "error", "message": f"Erreur: {str(e)}"}
@app.post("/diagnose")
async def diagnose(
image: UploadFile = File(...),
culture: Optional[str] = Form(None),
notes: Optional[str] = Form(None)
):
"""Analyse une image de feuille de plante."""
try:
# Vérifier que le modèle est chargé
if not model_manager.ensure_model_loaded():
load_status = model_manager.get_load_status()
if model_manager._loading:
raise HTTPException(status_code=503, detail="Modèle en cours de chargement, veuillez réessayer dans quelques secondes")
else:
raise HTTPException(
status_code=500,
detail=f"Modèle non disponible. Statut: {load_status}"
)
# Lire l'image
image_data = await image.read()
pil_image = Image.open(io.BytesIO(image_data))
# Préparer le prompt
prompt = _build_prompt(culture, notes)
# Préparer les entrées pour le modèle
inputs = model_manager.processor(
images=pil_image,
text=prompt,
return_tensors="pt"
)
# Déplacer sur le bon device
if model_manager.device_map == "cpu":
inputs = {k: v.to("cpu") for k, v in inputs.items()}
# Générer la réponse
with torch.no_grad():
outputs = model_manager.model.generate(
**inputs,
max_new_tokens=MAX_NEW_TOKENS,
do_sample=True,
temperature=0.7,
pad_token_id=model_manager.processor.tokenizer.eos_token_id
)
# Décoder la réponse
response_text = model_manager.processor.tokenizer.decode(
outputs[0],
skip_special_tokens=True
)
# Extraire seulement la partie générée (après le prompt)
if prompt in response_text:
diagnosis = response_text.split(prompt)[-1].strip()
else:
diagnosis = response_text.strip()
return {
"diagnosis": diagnosis,
"model_id": MODEL_ID,
"culture": culture,
"notes": notes,
"processing_time": time.time() - APP_START_TS
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Erreur lors du diagnostic: {e}")
raise HTTPException(status_code=500, detail=f"Erreur lors de l'analyse: {str(e)}")
@app.get("/recover")
def recover():
"""Tente de récupérer un chargement partiel du modèle."""
try:
if model_manager.processor and not model_manager.model:
logger.info("Récupération d'un chargement partiel...")
success = model_manager._complete_partial_load()
if success:
return {"status": "success", "message": "Modèle récupéré avec succès"}
else:
return {"status": "error", "message": "Échec de la récupération"}
else:
return {"status": "info", "message": "Pas de chargement partiel à récupérer"}
except Exception as e:
logger.error(f"Erreur lors de la récupération: {e}")
return {"status": "error", "message": f"Erreur: {str(e)}"}
@app.get("/status")
def detailed_status():
"""Statut détaillé du système avec informations de récupération automatique"""
try:
current_time = time.time()
# Calculate time since last load attempt
time_since_last_attempt = current_time - model_manager._last_load_attempt if model_manager._last_load_attempt > 0 else 0
# Check for various states
partial_load_detected = model_manager.processor and not model_manager.model
stuck_loading = model_manager._loading and time_since_last_attempt > 300
recovery_needed = partial_load_detected or stuck_loading
status_info = {
"timestamp": current_time,
"model_state": {
"processor_loaded": model_manager.processor is not None,
"model_loaded": model_manager.model is not None,
"loading": model_manager._loading,
"load_attempted": model_manager._load_attempted,
"time_since_last_attempt": f"{time_since_last_attempt:.1f}s"
},
"auto_recovery": {
"active": True,
"partial_load_detected": partial_load_detected,
"stuck_loading_detected": stuck_loading,
"recovery_needed": recovery_needed,
"check_interval": "15s"
},
"system": {
"uptime_s": int(current_time - APP_START_TS),
"device_map": model_manager.device_map,
"dtype": str(model_manager.dtype),
"model_id": MODEL_ID
}
}
# If recovery is needed, trigger it automatically
if recovery_needed:
logger.info("🔧 Récupération automatique déclenchée via /status")
if partial_load_detected:
import threading
thread = threading.Thread(target=model_manager._complete_partial_load, daemon=True)
thread.start()
elif stuck_loading:
model_manager._loading = False
model_manager._load_error = "Timeout - chargement bloqué"
model_manager._save_state()
return status_info
except Exception as e:
logger.error(f"Erreur dans detailed_status: {e}")
return {
"status": "error",
"error": str(e),
"timestamp": time.time()
}
@app.get("/")
def root():
"""Page d'accueil avec informations sur l'API."""
return {
"message": "AgriLens AI FastAPI",
"version": "1.0.0",
"endpoints": {
"health": "/health",
"load": "/load",
"diagnose": "/diagnose (POST)"
},
"model": MODEL_ID,
"uptime_s": int(time.time() - APP_START_TS)
}
# Lancement correct pour Hugging Face Spaces
if __name__ == "__main__":
import uvicorn
port = int(os.environ.get("PORT", 7860)) # Hugging Face donne ce port
uvicorn.run("app:app", host="0.0.0.0", port=port)
|