|
import os |
|
import tempfile |
|
import numpy as np |
|
import soundfile as sf |
|
from fastapi import FastAPI, File, Form, UploadFile, HTTPException |
|
from fastapi.responses import FileResponse, JSONResponse, HTMLResponse |
|
from fastapi.middleware.cors import CORSMiddleware |
|
from gtts import gTTS, lang |
|
import google.generativeai as genai |
|
from kokoro import KPipeline |
|
|
|
|
|
app = FastAPI() |
|
app.add_middleware( |
|
CORSMiddleware, |
|
allow_origins=["*"], |
|
allow_credentials=True, |
|
allow_methods=["*"], |
|
allow_headers=["*"], |
|
) |
|
|
|
|
|
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") |
|
if not GEMINI_API_KEY: |
|
raise ValueError("β GEMINI_API_KEY not set in environment.") |
|
genai.configure(api_key=GEMINI_API_KEY) |
|
|
|
|
|
KOKORO_LANGUAGES = { |
|
"American English": "a", |
|
"British English": "b", |
|
"Mandarin Chinese": "z", |
|
"Spanish": "e", |
|
"French": "f", |
|
"Hindi": "h", |
|
"Italian": "i", |
|
"Brazilian Portuguese": "p" |
|
} |
|
|
|
GTTS_LANGUAGES = lang.tts_langs() |
|
GTTS_LANGUAGES['ja'] = 'Japanese' |
|
|
|
SUPPORTED_LANGUAGES = sorted( |
|
list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))) |
|
) |
|
|
|
@app.get("/", response_class=HTMLResponse) |
|
async def root(): |
|
return """ |
|
<h2>π€ Audio Translator API</h2> |
|
<p>POST <code>/translate</code> with audio + target language</p> |
|
<p>GET <code>/languages</code> for supported languages</p> |
|
""" |
|
|
|
@app.get("/languages") |
|
async def get_languages(): |
|
return JSONResponse(content=SUPPORTED_LANGUAGES) |
|
|
|
@app.post("/translate") |
|
async def translate_audio(audio: UploadFile = File(...), language: str = Form(...)): |
|
try: |
|
|
|
if audio.content_type not in ['audio/wav', 'audio/mpeg', 'audio/mp4', 'audio/webm']: |
|
raise HTTPException(status_code=400, detail=f"Unsupported audio type: {audio.content_type}") |
|
|
|
audio_bytes = await audio.read() |
|
|
|
|
|
model = genai.GenerativeModel("gemini-1.5-flash") |
|
convo = model.start_chat() |
|
convo.send_message("You are a transcription expert. Transcribe this audio accurately. Only respond with the transcription.") |
|
response = convo.send_message({ |
|
'mime_type': audio.content_type, |
|
'data': audio_bytes |
|
}) |
|
transcription = response.text.strip() |
|
|
|
|
|
translate_prompt = f"Translate the following to {language}:\n\n{transcription}" |
|
translation = model.generate_content(translate_prompt).text.strip() |
|
|
|
|
|
if language in KOKORO_LANGUAGES: |
|
lang_code = KOKORO_LANGUAGES[language] |
|
pipeline = KPipeline(lang_code=lang_code) |
|
generator = pipeline(translation, voice="af_heart", speed=1) |
|
audio_segments = [audio for _, _, audio in generator if audio is not None] |
|
if not audio_segments: |
|
raise ValueError("No audio generated by Kokoro.") |
|
final_audio = np.concatenate(audio_segments) |
|
_, out_path = tempfile.mkstemp(suffix=".wav") |
|
sf.write(out_path, final_audio, 24000) |
|
else: |
|
gtts_lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == language), 'en') |
|
tts = gTTS(translation, lang=gtts_lang_code) |
|
_, out_path = tempfile.mkstemp(suffix=".mp3") |
|
tts.save(out_path) |
|
|
|
return { |
|
"transcription": transcription, |
|
"translation": translation, |
|
"audio_url": f"/download/{os.path.basename(out_path)}" |
|
} |
|
|
|
except Exception as e: |
|
return JSONResponse(status_code=500, content={"error": str(e)}) |
|
|
|
@app.get("/download/{filename}") |
|
async def download_audio(filename: str): |
|
path = os.path.join(tempfile.gettempdir(), filename) |
|
if not os.path.exists(path): |
|
raise HTTPException(status_code=404, detail="File not found") |
|
return FileResponse(path, media_type="audio/mpeg", filename=f"translated_{filename}") |