import modal app = modal.App("whisper-app") # Define container with required packages image = ( modal.Image.debian_slim() .pip_install("openai-whisper", "requests") .apt_install("ffmpeg") ) @app.cls(image=image) class WhisperModel: def __init__(self): self.model = None @modal.enter() # ✅ This is critical: use @modal.enter() not just __enter__ def init(self): import whisper self.model = whisper.load_model("base") @modal.method() def transcribe(self, audio_url): import requests # Download audio file response = requests.get(audio_url) with open("audio.wav", "wb") as f: f.write(response.content) # Run transcription result = self.model.transcribe("audio.wav") return result["text"] @app.local_entrypoint() def main(): url = "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac" model = WhisperModel() result = model.transcribe.remote(url) print(result)