podcastify / modal /app.py
eswardivi's picture
Upload 3 files
8f362a9 verified
import modal
import io
image = modal.Image.debian_slim(python_version="3.12").apt_install("espeak-ng")
image = image.pip_install(
"kokoro>=0.9.4", "soundfile", "fastapi[standard]", "spacy==3.8.0"
)
image = image.run_commands("python -m spacy download en_core_web_sm")
app = modal.App("kokoro-api", image=image)
with image.imports():
import os
from kokoro import KPipeline
from IPython.display import display, Audio
import soundfile as sf
import torch
from fastapi.responses import StreamingResponse, Response, FileResponse
import numpy as np
import uuid
@app.cls(gpu="t4", scaledown_window=60 * 2, enable_memory_snapshot=True)
@modal.concurrent(max_inputs=30)
class kokoro:
@modal.enter()
def load(self):
self.pipeline = KPipeline(lang_code="a")
@modal.fastapi_endpoint(docs=True, method="POST")
def generate(self, text: str, voice: str = "af_heart"):
if len(text) == 0:
return Response(content="Text is empty", status_code=400)
generator = self.pipeline(text, voice)
audio_bytes = io.BytesIO()
random_name = str(uuid.uuid4())
file_path = f"{random_name}.wav"
audio_combined = []
for _, _, audio in generator:
audio_combined.append(audio)
audio_combined = np.concatenate(audio_combined)
sf.write(file_path, audio_combined, 24000)
return FileResponse(path=file_path, media_type="audio/wav", filename=file_path)
@modal.fastapi_endpoint(docs=True, method="GET")
def wake_up(self):
return Response(content="Kokoro is awake", status_code=200)