Spaces:
Running
Running
File size: 1,702 Bytes
8f362a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import modal
import io
image = modal.Image.debian_slim(python_version="3.12").apt_install("espeak-ng")
image = image.pip_install(
"kokoro>=0.9.4", "soundfile", "fastapi[standard]", "spacy==3.8.0"
)
image = image.run_commands("python -m spacy download en_core_web_sm")
app = modal.App("kokoro-api", image=image)
with image.imports():
import os
from kokoro import KPipeline
from IPython.display import display, Audio
import soundfile as sf
import torch
from fastapi.responses import StreamingResponse, Response, FileResponse
import numpy as np
import uuid
@app.cls(gpu="t4", scaledown_window=60 * 2, enable_memory_snapshot=True)
@modal.concurrent(max_inputs=30)
class kokoro:
@modal.enter()
def load(self):
self.pipeline = KPipeline(lang_code="a")
@modal.fastapi_endpoint(docs=True, method="POST")
def generate(self, text: str, voice: str = "af_heart"):
if len(text) == 0:
return Response(content="Text is empty", status_code=400)
generator = self.pipeline(text, voice)
audio_bytes = io.BytesIO()
random_name = str(uuid.uuid4())
file_path = f"{random_name}.wav"
audio_combined = []
for _, _, audio in generator:
audio_combined.append(audio)
audio_combined = np.concatenate(audio_combined)
sf.write(file_path, audio_combined, 24000)
return FileResponse(path=file_path, media_type="audio/wav", filename=file_path)
@modal.fastapi_endpoint(docs=True, method="GET")
def wake_up(self):
return Response(content="Kokoro is awake", status_code=200)
|