Spaces:

learnmlf
/

Acfoley

Sleeping

learnmlf commited on 9 days ago

Commit

2c1dff6

1 Parent(s): 9c07de8

Optimize model loading for HF Space

- Implement global model instance with one-time initialization
- Add progressive loading status updates with Chinese descriptions
- Remove manual model loading - auto-initialize on startup
- Add model status refresh functionality
- Improve user experience with clear progress indicators
- Cache model in memory to avoid reloading on each request

Files changed (1) hide show

app.py +77 -19

app.py CHANGED Viewed

@@ -99,7 +99,9 @@ class AudioFoleyModel:
         self.feature_utils = None
     def load_model(self, variant='large_44k', model_path=None):
-        """Load the hf_AC model"""
         try:
             if not HF_AC_AVAILABLE:
                 return "❌ hf_AC modules not available. Please install the hf_AC package."
@@ -108,16 +110,20 @@ class AudioFoleyModel:
                 available_variants = list(all_model_cfg.keys()) if all_model_cfg else []
                 return f"❌ Unknown model variant: {variant}. Available: {available_variants}"
             log.info(f"Loading model variant: {variant}")
             self.model: ModelConfig = all_model_cfg[variant]
-            # Download model components if needed
             try:
                 self.model.download_if_needed()
             except Exception as e:
                 log.warning(f"Could not download model components: {e}")
-            # Try to download main model weights from HuggingFace
             if not hasattr(self.model, 'model_path') or not self.model.model_path or not Path(self.model.model_path).exists():
                 try:
                     from huggingface_hub import hf_hub_download
@@ -146,10 +152,12 @@ class AudioFoleyModel:
                 self.model.model_path = Path(model_path)
                 log.info(f"Using custom model path: {model_path}")
-            # Load network
             self.net: MMAudio = get_my_mmaudio(self.model.model_name).to(self.device, self.dtype).eval()
-            # Load weights
             if hasattr(self.model, 'model_path') and self.model.model_path and Path(self.model.model_path).exists():
                 try:
                     weights = torch.load(self.model.model_path, map_location=self.device, weights_only=True)
@@ -157,15 +165,19 @@ class AudioFoleyModel:
                     log.info(f'✅ Loaded weights from {self.model.model_path}')
                 except Exception as e:
                     log.error(f"Failed to load weights: {e}")
-                    return f"❌ Failed to load model weights: {e}"
             else:
                 log.warning('⚠️ No model weights found, using default initialization')
-                return "⚠️ Model components loaded, but main weights not available. Some features may be limited."
-            # Initialize flow matching
             self.fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=25)
-            # Initialize feature utils
             try:
                 self.feature_utils = FeaturesUtils(
                     tod_vae_ckpt=self.model.vae_path,
@@ -178,13 +190,17 @@ class AudioFoleyModel:
                 self.feature_utils = self.feature_utils.to(self.device, self.dtype).eval()
             except Exception as e:
                 log.error(f"Failed to initialize feature utils: {e}")
-                return f"❌ Failed to initialize feature utilities: {e}"
-            return "✅ Model loaded successfully!"
         except Exception as e:
-            error_msg = f"❌ Error loading model: {str(e)}\n{traceback.format_exc()}"
             log.error(error_msg)
             return error_msg
     def generate_audio(self, video_file, prompt: str, negative_prompt: str = "",
@@ -298,11 +314,35 @@ class AudioFoleyModel:
             log.error(error_msg)
             return None, error_msg
-# Initialize model
-audio_model = AudioFoleyModel()
 def generate_audio_interface(video_file, prompt, duration, cfg_strength):
     """Interface function for generating audio"""
     # Use fixed seed for consistency in HF Space
     seed = 42
     negative_prompt = ""  # Simplified interface
@@ -312,6 +352,11 @@ def generate_audio_interface(video_file, prompt, duration, cfg_strength):
     )
     return audio_path, message
 # Create Gradio interface
 with gr.Blocks(title="hf_AC Audio Foley Generator", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
@@ -319,16 +364,23 @@ with gr.Blocks(title="hf_AC Audio Foley Generator", theme=gr.themes.Soft()) as d
     基于AI的视频音频生成工具。上传视频并提供文本描述，模型将生成匹配的音频内容。
-    **注意**: 首次使用时模型需要下载，请耐心等待。
     """)
-    # Model status display
     model_status = gr.Textbox(
         label="模型状态",
-        value="正在初始化模型...",
         interactive=False
     )
     with gr.Row():
         with gr.Column():
             video_input = gr.Video(
@@ -404,12 +456,18 @@ with gr.Blocks(title="hf_AC Audio Foley Generator", theme=gr.themes.Soft()) as d
         - "木地板上轻柔的脚步声"
         """)
-    # Auto-load model on startup
     demo.load(
-        fn=lambda: audio_model.load_model(),
         outputs=[model_status]
     )
 if __name__ == "__main__":
     # HF Space will handle the server configuration
     demo.launch()

         self.feature_utils = None
     def load_model(self, variant='large_44k', model_path=None):
+        """Load the hf_AC model with progress updates"""
+        global model_loading_status
         try:
             if not HF_AC_AVAILABLE:
                 return "❌ hf_AC modules not available. Please install the hf_AC package."
                 available_variants = list(all_model_cfg.keys()) if all_model_cfg else []
                 return f"❌ Unknown model variant: {variant}. Available: {available_variants}"
+            # Step 1: Initialize model config
+            model_loading_status = "🔧 初始化模型配置..."
             log.info(f"Loading model variant: {variant}")
             self.model: ModelConfig = all_model_cfg[variant]
+            # Step 2: Download model components
+            model_loading_status = "📥 下载模型组件..."
             try:
                 self.model.download_if_needed()
             except Exception as e:
                 log.warning(f"Could not download model components: {e}")
+            # Step 3: Download main model weights
+            model_loading_status = "📥 下载主模型权重..."
             if not hasattr(self.model, 'model_path') or not self.model.model_path or not Path(self.model.model_path).exists():
                 try:
                     from huggingface_hub import hf_hub_download
                 self.model.model_path = Path(model_path)
                 log.info(f"Using custom model path: {model_path}")
+            # Step 4: Load neural network
+            model_loading_status = "🧠 加载神经网络..."
             self.net: MMAudio = get_my_mmaudio(self.model.model_name).to(self.device, self.dtype).eval()
+            # Step 5: Load weights
+            model_loading_status = "⚖️ 加载模型权重..."
             if hasattr(self.model, 'model_path') and self.model.model_path and Path(self.model.model_path).exists():
                 try:
                     weights = torch.load(self.model.model_path, map_location=self.device, weights_only=True)
                     log.info(f'✅ Loaded weights from {self.model.model_path}')
                 except Exception as e:
                     log.error(f"Failed to load weights: {e}")
+                    model_loading_status = f"❌ Failed to load model weights: {e}"
+                    return model_loading_status
             else:
                 log.warning('⚠️ No model weights found, using default initialization')
+                model_loading_status = "⚠️ 模型组件已加载，但主权重不可用。某些功能可能受限。"
+                return model_loading_status
+            # Step 6: Initialize flow matching
+            model_loading_status = "🌊 初始化流匹配..."
             self.fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=25)
+            # Step 7: Initialize feature utils
+            model_loading_status = "🔧 初始化特征工具..."
             try:
                 self.feature_utils = FeaturesUtils(
                     tod_vae_ckpt=self.model.vae_path,
                 self.feature_utils = self.feature_utils.to(self.device, self.dtype).eval()
             except Exception as e:
                 log.error(f"Failed to initialize feature utils: {e}")
+                model_loading_status = f"❌ Failed to initialize feature utilities: {e}"
+                return model_loading_status
+            # Step 8: Complete
+            model_loading_status = "✅ 模型加载完成！可以开始生成音频。"
+            return model_loading_status
         except Exception as e:
+            error_msg = f"❌ 模型加载错误: {str(e)}"
             log.error(error_msg)
+            model_loading_status = error_msg
             return error_msg
     def generate_audio(self, video_file, prompt: str, negative_prompt: str = "",
             log.error(error_msg)
             return None, error_msg
+# Global model instance - initialized once
+audio_model = None
+model_loading_status = "未初始化"
+def initialize_model():
+    """Initialize model once at startup"""
+    global audio_model, model_loading_status
+    if audio_model is None:
+        try:
+            model_loading_status = "正在初始化模型..."
+            audio_model = AudioFoleyModel()
+            load_result = audio_model.load_model()
+            model_loading_status = load_result
+            return load_result
+        except Exception as e:
+            model_loading_status = f"❌ 模型初始化失败: {str(e)}"
+            return model_loading_status
+    else:
+        return "✅ 模型已加载"
 def generate_audio_interface(video_file, prompt, duration, cfg_strength):
     """Interface function for generating audio"""
+    global audio_model, model_loading_status
+    # Check if model is loaded
+    if audio_model is None or audio_model.net is None:
+        return None, "❌ 模型未加载，请等待初始化完成或刷新页面"
     # Use fixed seed for consistency in HF Space
     seed = 42
     negative_prompt = ""  # Simplified interface
     )
     return audio_path, message
+def get_model_status():
+    """Get current model loading status"""
+    global model_loading_status
+    return model_loading_status
 # Create Gradio interface
 with gr.Blocks(title="hf_AC Audio Foley Generator", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     基于AI的视频音频生成工具。上传视频并提供文本描述，模型将生成匹配的音频内容。
+    **注意**: 模型会在启动时自动加载，首次使用需要下载约3GB的模型文件。
     """)
+    # Model status display - will be updated automatically
     model_status = gr.Textbox(
         label="模型状态",
+        value=model_loading_status,
         interactive=False
     )
+    # Add a refresh button for status
+    refresh_status_btn = gr.Button("🔄 刷新状态", size="sm")
+    refresh_status_btn.click(
+        fn=get_model_status,
+        outputs=model_status
+    )
     with gr.Row():
         with gr.Column():
             video_input = gr.Video(
         - "木地板上轻柔的脚步声"
         """)
+    # Auto-initialize model on startup
     demo.load(
+        fn=initialize_model,
         outputs=[model_status]
     )
+# Initialize model when module is imported (for HF Space)
+if HF_AC_AVAILABLE:
+    print("🚀 Starting model initialization...")
+    initialize_model()
+    print(f"📊 Model status: {model_loading_status}")
 if __name__ == "__main__":
     # HF Space will handle the server configuration
     demo.launch()