Spaces:
Running
Running
import modal | |
import io | |
image = modal.Image.debian_slim(python_version="3.12").apt_install("espeak-ng") | |
image = image.pip_install( | |
"kokoro>=0.9.4", "soundfile", "fastapi[standard]", "spacy==3.8.0" | |
) | |
image = image.run_commands("python -m spacy download en_core_web_sm") | |
app = modal.App("kokoro-api", image=image) | |
with image.imports(): | |
import os | |
from kokoro import KPipeline | |
from IPython.display import display, Audio | |
import soundfile as sf | |
import torch | |
from fastapi.responses import StreamingResponse, Response, FileResponse | |
import numpy as np | |
import uuid | |
class kokoro: | |
def load(self): | |
self.pipeline = KPipeline(lang_code="a") | |
def generate(self, text: str, voice: str = "af_heart"): | |
if len(text) == 0: | |
return Response(content="Text is empty", status_code=400) | |
generator = self.pipeline(text, voice) | |
audio_bytes = io.BytesIO() | |
random_name = str(uuid.uuid4()) | |
file_path = f"{random_name}.wav" | |
audio_combined = [] | |
for _, _, audio in generator: | |
audio_combined.append(audio) | |
audio_combined = np.concatenate(audio_combined) | |
sf.write(file_path, audio_combined, 24000) | |
return FileResponse(path=file_path, media_type="audio/wav", filename=file_path) | |
def wake_up(self): | |
return Response(content="Kokoro is awake", status_code=200) | |