import os import io import time import gc import pickle import tempfile import logging from typing import Optional import asyncio from fastapi import FastAPI, UploadFile, File, Form, HTTPException from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware from PIL import Image import torch from transformers import AutoProcessor, AutoModelForImageTextToText from huggingface_hub import HfFolder, snapshot_download # Ensure HF cache is writable and not using /data import os as _os_env _os_env.environ.setdefault("HF_HOME", "/tmp/hf_home") _os_env.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/hf_home") # Avoid deprecated TRANSFORMERS_CACHE which may point to /data if "TRANSFORMERS_CACHE" in _os_env.environ: del _os_env.environ["TRANSFORMERS_CACHE"] _os_env.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "0") # Configuration du logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) APP_START_TS = time.time() # Configuration du modèle MODEL_ID = os.environ.get("MODEL_ID", "google/gemma-3n-E4B-it") # Fixed model name DEVICE_MAP = os.environ.get("DEVICE_MAP", "cpu") # Force CPU pour Hugging Face Spaces MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "256")) # Fichier de cache pour partager le modèle entre Streamlit et FastAPI MODEL_CACHE_FILE = os.path.join(tempfile.gettempdir(), "agrilens_model_cache.pkl") def _get_dtype() -> torch.dtype: """Choix optimal du dtype selon le hardware.""" # Force float32 pour Hugging Face Spaces (CPU) return torch.float32 def _build_prompt(culture: Optional[str], notes: Optional[str]) -> str: """Création du prompt d'analyse.""" base = ( "You are an agronomy assistant. Analyze the provided plant leaf image and identify the most likely disease. " "Return a concise diagnosis in French with: disease name, short explanation of symptoms, " "and 3 actionable treatment recommendations." ) if culture: base += f"\nCulture: {culture}" if notes: base += f"\nNotes: {notes}" return base class SharedModelManager: """Gestionnaire de modèle partagé entre Streamlit et FastAPI""" def __init__(self): self.model = None self.processor = None self.device_map = DEVICE_MAP self.dtype = _get_dtype() self._load_attempted = False self._loading = False self._load_error = None self._last_load_attempt = 0 self._load_timeout = 300 # 5 minutes timeout logger.info(f"Initializing ModelManager with device_map={self.device_map}, dtype={self.dtype}") # Try to recover from previous state self._recover_state() def _recover_state(self): """Try to recover model state from disk""" try: state_file = "/tmp/model_state.json" if os.path.exists(state_file): import json with open(state_file, 'r') as f: state = json.load(f) # Check if the state is recent (less than 1 hour old) if time.time() - state.get('timestamp', 0) < 3600: logger.info("État précédent trouvé, tentative de récupération...") # Note: We can't actually reload the model objects, but we can mark as attempted self._load_attempted = True self._last_load_attempt = state.get('timestamp', 0) except Exception as e: logger.warning(f"Impossible de récupérer l'état: {e}") def _save_state(self): """Save current state to disk""" try: state_file = "/tmp/model_state.json" import json state = { 'timestamp': time.time(), 'model_loaded': self.model is not None, 'processor_loaded': self.processor is not None, 'load_attempted': self._load_attempted, 'loading': self._loading, 'error': self._load_error } with open(state_file, 'w') as f: json.dump(state, f) except Exception as e: logger.warning(f"Impossible de sauvegarder l'état: {e}") def check_streamlit_model_cache(self): """Vérifie si le modèle est disponible dans le cache Streamlit via un fichier""" try: # Vérifier si le fichier de cache existe et est récent (moins de 1 heure) if os.path.exists(MODEL_CACHE_FILE): file_age = time.time() - os.path.getmtime(MODEL_CACHE_FILE) if file_age < 3600: # 1 heure # Lire les informations du cache try: with open(MODEL_CACHE_FILE, 'rb') as f: cache_data = pickle.load(f) logger.info(f"Cache Streamlit trouvé: {cache_data}") return True except Exception as e: logger.error(f"Erreur lors de la lecture du cache: {e}") return False except Exception as e: logger.error(f"Erreur lors de la vérification du cache: {e}") return False def load_model_directly(self): """Robust model loading that tries multiple approaches to avoid permission issues""" try: import gc self._loading = True self._load_attempted = True self._last_load_attempt = time.time() self._load_error = None # Try different approaches in order of preference approaches = [ ("Direct HF Hub loading", self._try_direct_loading), ("Cache in /app/cache", self._try_app_cache), ("Cache in /tmp/hf_home", self._try_tmp_cache), ("Cache in /tmp/model_repo", self._try_tmp_repo), ] for approach_name, approach_func in approaches: try: logger.info(f"Tentative: {approach_name}") success = approach_func() if success: self._loading = False self._save_state() logger.info(f"✅ Succès avec {approach_name}") return True except Exception as e: logger.warning(f"❌ Échec de {approach_name}: {e}") continue # If all approaches failed self._loading = False self._load_error = "Toutes les approches de chargement ont échoué" self._save_state() return False except Exception as e: logger.error(f"Erreur critique chargement: {e}") self._loading = False self._load_error = str(e) self._save_state() return False def _try_direct_loading(self): """Try to load directly from Hugging Face Hub without using /data by forcing cache_dir""" try: logger.info("Chargement direct depuis HF Hub...") writable_cache = os.environ.get("HF_HOME", "/home/user/.cache/huggingface") os.makedirs(writable_cache, exist_ok=True) # Load processor directly with explicit cache_dir self.processor = AutoProcessor.from_pretrained( MODEL_ID, trust_remote_code=True, cache_dir=writable_cache, local_files_only=False, ) logger.info("Processor chargé directement") # Load model directly with explicit cache_dir self.model = AutoModelForImageTextToText.from_pretrained( MODEL_ID, trust_remote_code=True, cache_dir=writable_cache, local_files_only=False, low_cpu_mem_usage=True, device_map=self.device_map, torch_dtype=self.dtype, ) if self.device_map == "cpu": self.model = self.model.to("cpu") logger.info("Modèle chargé directement depuis HF Hub") return True except Exception as e: logger.error(f"Échec chargement direct: {e}") return False def _try_app_cache(self): """Try to cache in /app/cache directory""" try: from huggingface_hub import snapshot_download cache_dir = "/app/cache/huggingface" os.makedirs(cache_dir, exist_ok=True) logger.info(f"Snapshot vers {cache_dir}") snapshot_download( repo_id=MODEL_ID, local_dir=cache_dir, local_dir_use_symlinks=False, resume_download=True, token=os.environ.get("HF_TOKEN", None), ) # Load from cache self.processor = AutoProcessor.from_pretrained( cache_dir, trust_remote_code=True, local_files_only=True, ) logger.info("Processor chargé depuis /app/cache") self.model = AutoModelForImageTextToText.from_pretrained( cache_dir, trust_remote_code=True, local_files_only=True, low_cpu_mem_usage=True, device_map=self.device_map, torch_dtype=self.dtype, ) if self.device_map == "cpu": self.model = self.model.to("cpu") logger.info("Modèle chargé depuis /app/cache") return True except Exception as e: logger.error(f"Échec cache /app: {e}") return False def _try_tmp_cache(self): """Try to cache in /tmp/hf_home directory""" try: from huggingface_hub import snapshot_download cache_dir = "/tmp/hf_home" os.makedirs(cache_dir, exist_ok=True) logger.info(f"Snapshot vers {cache_dir}") snapshot_download( repo_id=MODEL_ID, local_dir=cache_dir, local_dir_use_symlinks=False, resume_download=True, token=os.environ.get("HF_TOKEN", None), ) # Load from cache self.processor = AutoProcessor.from_pretrained( cache_dir, trust_remote_code=True, local_files_only=True, ) logger.info("Processor chargé depuis /tmp/hf_home") self.model = AutoModelForImageTextToText.from_pretrained( cache_dir, trust_remote_code=True, local_files_only=True, low_cpu_mem_usage=True, device_map=self.device_map, torch_dtype=self.dtype, ) if self.device_map == "cpu": self.model = self.model.to("cpu") logger.info("Modèle chargé depuis /tmp/hf_home") return True except Exception as e: logger.error(f"Échec cache /tmp/hf_home: {e}") return False def _try_tmp_repo(self): """Try to cache in /tmp/model_repo directory (original approach)""" try: from huggingface_hub import snapshot_download repo_dir = "/tmp/model_repo" offload_dir = "/tmp/model_offload" os.makedirs(repo_dir, exist_ok=True) os.makedirs(offload_dir, exist_ok=True) logger.info(f"Snapshot vers {repo_dir}") snapshot_download( repo_id=MODEL_ID, local_dir=repo_dir, local_dir_use_symlinks=False, resume_download=True, token=os.environ.get("HF_TOKEN", None), ) # Load from cache self.processor = AutoProcessor.from_pretrained( repo_dir, trust_remote_code=True, local_files_only=True, ) logger.info("Processor chargé depuis /tmp/model_repo") self.model = AutoModelForImageTextToText.from_pretrained( repo_dir, trust_remote_code=True, local_files_only=True, low_cpu_mem_usage=True, device_map=self.device_map, torch_dtype=self.dtype, offload_folder=offload_dir, max_memory={0: "8GB", "cpu": "8GB"} if self.device_map == "cpu" else None, ) if self.device_map == "cpu": self.model = self.model.to("cpu") logger.info("Modèle chargé depuis /tmp/model_repo") return True except Exception as e: logger.error(f"Échec cache /tmp/model_repo: {e}") return False def load_model_with_retry(self, max_retries=5, delay=60): """Charge le modèle avec retry automatique en cas d'échec""" for attempt in range(max_retries): try: logger.info(f"Tentative de chargement {attempt + 1}/{max_retries}") success = self.load_model_directly() if success: return True else: logger.warning(f"Échec tentative {attempt + 1}, attente {delay}s...") if attempt < max_retries - 1: time.sleep(delay) except Exception as e: logger.error(f"Erreur tentative {attempt + 1}: {e}") if attempt < max_retries - 1: time.sleep(delay) logger.error(f"Toutes les {max_retries} tentatives ont échoué") return False def ensure_model_loaded(self): """S'assure que le modèle est chargé""" if self.model is not None and self.processor is not None: return True if not self._load_attempted: self._load_attempted = True # Charge directement le modèle (lancé à la demande) return self.load_model_directly() return False def get_load_status(self): """Retourne le statut de chargement""" return { "loaded": self.model is not None and self.processor is not None, "loading": self._loading, "error": self._load_error, "attempted": self._load_attempted } def _complete_partial_load(self): """Complete a partial model load (when processor is loaded but model is not)""" try: logger.info("Tentative de complétion du chargement partiel...") if self.processor and not self.model: logger.info("Processor disponible, chargement du modèle seulement...") # Try to load just the model using the existing processor try: # Use the processor's config to load the model model_config = self.processor.config model_path = model_config._name_or_path logger.info(f"Chargement du modèle depuis {model_path}") self.model = AutoModelForImageTextToText.from_pretrained( model_path, trust_remote_code=True, low_cpu_mem_usage=True, device_map=self.device_map, torch_dtype=self.dtype, offload_folder="/tmp/model_offload", max_memory={0: "8GB", "cpu": "8GB"} if self.device_map == "cpu" else None ) if self.device_map == "cpu": self.model = self.model.to("cpu") logger.info("Modèle complété avec succès!") self._loading = False self._save_state() return True except Exception as e: logger.error(f"Échec de la complétion: {e}") # Fall back to full reload return self.load_model_directly() else: logger.info("Pas de chargement partiel à compléter") return False except Exception as e: logger.error(f"Erreur lors de la complétion: {e}") return False # Instance globale du gestionnaire de modèle model_manager = SharedModelManager() app = FastAPI(title="AgriLens AI FastAPI", version="1.0.0") # Add CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Warmup non bloquant au démarrage - use a more robust approach @app.on_event("startup") async def _warmup_background(): """Démarrage du chargement en arrière-plan sans bloquer le serveur""" logger.info("Démarrage du chargement du modèle en arrière-plan...") # Use a more robust approach that won't be cancelled try: # Run in thread but don't await it to avoid cancellation import threading thread = threading.Thread(target=model_manager.load_model_directly, daemon=True) thread.start() logger.info("Thread de chargement démarré") except Exception as e: logger.error(f"Erreur lors du démarrage du thread: {e}") # Alternative: also try to load on first request if not already loaded @app.middleware("http") async def ensure_model_loaded_middleware(request, call_next): """Middleware pour s'assurer que le modèle est chargé avec récupération automatique""" try: current_time = time.time() # Check for partial loads and trigger automatic recovery (with rate limiting) if (model_manager.processor and not model_manager.model and not model_manager._loading and not hasattr(model_manager, '_middleware_recovery_triggered')): logger.info("🔧 Récupération automatique déclenchée via middleware") model_manager._middleware_recovery_triggered = current_time # Start recovery in background import threading thread = threading.Thread(target=model_manager._complete_partial_load, daemon=True) thread.start() # Check if model needs loading (with rate limiting) elif (not model_manager.model and not model_manager._loading and not hasattr(model_manager, '_middleware_load_triggered')): logger.info("Modèle non chargé, tentative de chargement...") model_manager._middleware_load_triggered = current_time # Start loading in background import threading thread = threading.Thread(target=model_manager.load_model_directly, daemon=True) thread.start() # Clean up old triggers (older than 5 minutes) if hasattr(model_manager, '_middleware_recovery_triggered'): if current_time - model_manager._middleware_recovery_triggered > 300: delattr(model_manager, '_middleware_recovery_triggered') if hasattr(model_manager, '_middleware_load_triggered'): if current_time - model_manager._middleware_load_triggered > 300: delattr(model_manager, '_middleware_load_triggered') except Exception as e: logger.error(f"Erreur dans le middleware: {e}") response = await call_next(request) return response # Add a background task that keeps trying to load the model @app.on_event("startup") async def _persistent_model_loader(): """Persistent model loader that keeps trying until success""" import asyncio import threading def _load_loop(): """Infinite loop to keep trying to load the model""" max_attempts = 5 # Maximum attempts before giving up attempt_count = 0 last_attempt_time = 0 cooldown = 60 # Wait 60s between attempts while attempt_count < max_attempts: try: current_time = time.time() # Check if we should attempt loading if (not model_manager.model and not model_manager._loading and current_time - last_attempt_time > cooldown): logger.info(f"Persistent loader: tentative {attempt_count + 1}/{max_attempts}...") last_attempt_time = current_time attempt_count += 1 success = model_manager.load_model_directly() if success: logger.info("Persistent loader: modèle chargé avec succès!") break else: logger.warning(f"Persistent loader: échec {attempt_count}/{max_attempts}, nouvelle tentative dans {cooldown}s...") time.sleep(cooldown) else: # Model is loading or loaded, wait a bit time.sleep(10) except Exception as e: logger.error(f"Persistent loader: erreur: {e}") attempt_count += 1 time.sleep(cooldown) if attempt_count >= max_attempts: logger.warning("Persistent loader: nombre maximum de tentatives atteint, arrêt") else: logger.info("Persistent loader: terminé avec succès") # Start the persistent loader in a daemon thread thread = threading.Thread(target=_load_loop, daemon=True) thread.start() logger.info("Persistent model loader démarré") # Add automated recovery system @app.on_event("startup") async def _automated_recovery(): """Automated recovery system that detects and fixes partial loads""" import threading import time def _recovery_loop(): """Continuous monitoring and recovery loop""" last_recovery_attempt = 0 recovery_cooldown = 60 # Wait 60s between recovery attempts while True: try: current_time = time.time() # Check for partial loads (processor loaded but model not) if (model_manager.processor and not model_manager.model and not model_manager._loading and current_time - last_recovery_attempt > recovery_cooldown): logger.info("🔧 Récupération automatique détectée: processor chargé mais modèle manquant") logger.info("🚀 Lancement automatique de la récupération...") last_recovery_attempt = current_time # Try to complete the partial load success = model_manager._complete_partial_load() if success: logger.info("✅ Récupération automatique réussie!") break # Exit the loop if successful else: logger.warning("⚠️ Récupération automatique échouée, nouvelle tentative dans 60s...") # Check for stuck loading states elif (model_manager._loading and current_time - model_manager._last_load_attempt > 300): # 5 minutes timeout logger.warning("⏰ Timeout détecté, reset de l'état de chargement...") model_manager._loading = False model_manager._load_error = "Timeout - chargement bloqué" model_manager._save_state() # Wait before next check time.sleep(15) # Check every 15 seconds except Exception as e: logger.error(f"Erreur dans la boucle de récupération: {e}") time.sleep(30) # Start the automated recovery in a daemon thread thread = threading.Thread(target=_recovery_loop, daemon=True) thread.start() logger.info("🔧 Système de récupération automatique démarré") # Add a more robust startup approach using a separate process @app.on_event("startup") async def _robust_startup(): """Robust startup using a separate process to avoid CancelledError""" import multiprocessing import time # Only start if not already loading if model_manager._loading: logger.info("Démarrage robuste: chargement déjà en cours, skip") return try: logger.info("Démarrage du chargement du modèle en arrière-plan...") # Set a flag to prevent multiple processes if hasattr(model_manager, '_startup_process_running'): logger.info("Processus de démarrage déjà en cours, skip") return model_manager._startup_process_running = True def _startup_load(): """Load model in separate process""" try: # Set environment for this process os.environ['HF_HOME'] = '/tmp/hf_home' os.environ['TRANSFORMERS_CACHE'] = '/tmp/hf_home/transformers' logger.info("Processus de chargement démarré") success = model_manager.load_model_directly() if success: logger.info("Processus: chargement réussi") else: logger.warning("Processus: échec du chargement") except Exception as e: logger.error(f"Processus: erreur: {e}") finally: # Clean up if hasattr(model_manager, '_startup_process_running'): delattr(model_manager, '_startup_process_running') # Start the process process = multiprocessing.Process(target=_startup_load, daemon=True) process.start() logger.info(f"Processus de chargement du modèle démarré (PID: {process.pid})") # Wait a bit for the process to start time.sleep(2) # Check if process is still alive if not process.is_alive(): logger.warning("Processus de démarrage s'est terminé prématurément") if hasattr(model_manager, '_startup_process_running'): delattr(model_manager, '_startup_process_running') except Exception as e: logger.error(f"Erreur lors du démarrage du processus: {e}") if hasattr(model_manager, '_startup_process_running'): delattr(model_manager, '_startup_process_running') # Add health monitoring with automatic recovery @app.get("/health") def health(): """Vérifie l'état de l'application et du modèle avec récupération automatique.""" try: # Check for partial loads and trigger automatic recovery if model_manager.processor and not model_manager.model and not model_manager._loading: logger.info("🔧 Récupération automatique déclenchée via /health") # Start recovery in background import threading thread = threading.Thread(target=model_manager._complete_partial_load, daemon=True) thread.start() model_loaded = model_manager.ensure_model_loaded() streamlit_cache_available = model_manager.check_streamlit_model_cache() load_status = model_manager.get_load_status() return { "status": "ok" if model_loaded else "cold", "uptime_s": int(time.time() - APP_START_TS), "cuda": torch.cuda.is_available(), "device_map": model_manager.device_map, "dtype": str(model_manager.dtype), "model_id": MODEL_ID, "streamlit_cache_available": streamlit_cache_available, "model_loaded": model_loaded, "load_status": load_status, "auto_recovery": "active", } except Exception as e: logger.error(f"Erreur dans health check: {e}") return { "status": "error", "error": str(e), "uptime_s": int(time.time() - APP_START_TS), } @app.get("/load") def load(): """Force le chargement du modèle.""" try: success = model_manager.load_model_directly() load_status = model_manager.get_load_status() if success: return {"status": "success", "message": "Modèle chargé avec succès", "load_status": load_status} else: return { "status": "error", "message": "Échec du chargement du modèle", "load_status": load_status, "error": model_manager._load_error } except Exception as e: logger.error(f"Erreur lors du chargement forcé: {e}") return {"status": "error", "message": f"Erreur: {str(e)}"} @app.post("/diagnose") async def diagnose( image: UploadFile = File(...), culture: Optional[str] = Form(None), notes: Optional[str] = Form(None) ): """Analyse une image de feuille de plante.""" try: # Vérifier que le modèle est chargé if not model_manager.ensure_model_loaded(): load_status = model_manager.get_load_status() if model_manager._loading: raise HTTPException(status_code=503, detail="Modèle en cours de chargement, veuillez réessayer dans quelques secondes") else: raise HTTPException( status_code=500, detail=f"Modèle non disponible. Statut: {load_status}" ) # Lire l'image image_data = await image.read() pil_image = Image.open(io.BytesIO(image_data)) # Préparer le prompt prompt = _build_prompt(culture, notes) # Préparer les entrées pour le modèle inputs = model_manager.processor( images=pil_image, text=prompt, return_tensors="pt" ) # Déplacer sur le bon device if model_manager.device_map == "cpu": inputs = {k: v.to("cpu") for k, v in inputs.items()} # Générer la réponse with torch.no_grad(): outputs = model_manager.model.generate( **inputs, max_new_tokens=MAX_NEW_TOKENS, do_sample=True, temperature=0.7, pad_token_id=model_manager.processor.tokenizer.eos_token_id ) # Décoder la réponse response_text = model_manager.processor.tokenizer.decode( outputs[0], skip_special_tokens=True ) # Extraire seulement la partie générée (après le prompt) if prompt in response_text: diagnosis = response_text.split(prompt)[-1].strip() else: diagnosis = response_text.strip() return { "diagnosis": diagnosis, "model_id": MODEL_ID, "culture": culture, "notes": notes, "processing_time": time.time() - APP_START_TS } except HTTPException: raise except Exception as e: logger.error(f"Erreur lors du diagnostic: {e}") raise HTTPException(status_code=500, detail=f"Erreur lors de l'analyse: {str(e)}") @app.get("/recover") def recover(): """Tente de récupérer un chargement partiel du modèle.""" try: if model_manager.processor and not model_manager.model: logger.info("Récupération d'un chargement partiel...") success = model_manager._complete_partial_load() if success: return {"status": "success", "message": "Modèle récupéré avec succès"} else: return {"status": "error", "message": "Échec de la récupération"} else: return {"status": "info", "message": "Pas de chargement partiel à récupérer"} except Exception as e: logger.error(f"Erreur lors de la récupération: {e}") return {"status": "error", "message": f"Erreur: {str(e)}"} @app.get("/status") def detailed_status(): """Statut détaillé du système avec informations de récupération automatique""" try: current_time = time.time() # Calculate time since last load attempt time_since_last_attempt = current_time - model_manager._last_load_attempt if model_manager._last_load_attempt > 0 else 0 # Check for various states partial_load_detected = model_manager.processor and not model_manager.model stuck_loading = model_manager._loading and time_since_last_attempt > 300 recovery_needed = partial_load_detected or stuck_loading status_info = { "timestamp": current_time, "model_state": { "processor_loaded": model_manager.processor is not None, "model_loaded": model_manager.model is not None, "loading": model_manager._loading, "load_attempted": model_manager._load_attempted, "time_since_last_attempt": f"{time_since_last_attempt:.1f}s" }, "auto_recovery": { "active": True, "partial_load_detected": partial_load_detected, "stuck_loading_detected": stuck_loading, "recovery_needed": recovery_needed, "check_interval": "15s" }, "system": { "uptime_s": int(current_time - APP_START_TS), "device_map": model_manager.device_map, "dtype": str(model_manager.dtype), "model_id": MODEL_ID } } # If recovery is needed, trigger it automatically if recovery_needed: logger.info("🔧 Récupération automatique déclenchée via /status") if partial_load_detected: import threading thread = threading.Thread(target=model_manager._complete_partial_load, daemon=True) thread.start() elif stuck_loading: model_manager._loading = False model_manager._load_error = "Timeout - chargement bloqué" model_manager._save_state() return status_info except Exception as e: logger.error(f"Erreur dans detailed_status: {e}") return { "status": "error", "error": str(e), "timestamp": time.time() } @app.get("/") def root(): """Page d'accueil avec informations sur l'API.""" return { "message": "AgriLens AI FastAPI", "version": "1.0.0", "endpoints": { "health": "/health", "load": "/load", "diagnose": "/diagnose (POST)" }, "model": MODEL_ID, "uptime_s": int(time.time() - APP_START_TS) } # Lancement correct pour Hugging Face Spaces if __name__ == "__main__": import uvicorn port = int(os.environ.get("PORT", 7860)) # Hugging Face donne ce port uvicorn.run("app:app", host="0.0.0.0", port=port)