Gtggtggg / app.py
Athspi's picture
Create app.py
2d4f453 verified
import os
import tempfile
import numpy as np
import soundfile as sf
from fastapi import FastAPI, File, Form, UploadFile, HTTPException
from fastapi.responses import FileResponse, JSONResponse, HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
from gtts import gTTS, lang
import google.generativeai as genai
from kokoro import KPipeline
# FastAPI App Setup
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Load Gemini API Key
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
raise ValueError("❌ GEMINI_API_KEY not set in environment.")
genai.configure(api_key=GEMINI_API_KEY)
# Language configurations
KOKORO_LANGUAGES = {
"American English": "a",
"British English": "b",
"Mandarin Chinese": "z",
"Spanish": "e",
"French": "f",
"Hindi": "h",
"Italian": "i",
"Brazilian Portuguese": "p"
}
GTTS_LANGUAGES = lang.tts_langs()
GTTS_LANGUAGES['ja'] = 'Japanese'
SUPPORTED_LANGUAGES = sorted(
list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values())))
)
@app.get("/", response_class=HTMLResponse)
async def root():
return """
<h2>🎀 Audio Translator API</h2>
<p>POST <code>/translate</code> with audio + target language</p>
<p>GET <code>/languages</code> for supported languages</p>
"""
@app.get("/languages")
async def get_languages():
return JSONResponse(content=SUPPORTED_LANGUAGES)
@app.post("/translate")
async def translate_audio(audio: UploadFile = File(...), language: str = Form(...)):
try:
# Check MIME type
if audio.content_type not in ['audio/wav', 'audio/mpeg', 'audio/mp4', 'audio/webm']:
raise HTTPException(status_code=400, detail=f"Unsupported audio type: {audio.content_type}")
audio_bytes = await audio.read()
# Transcription using Gemini
model = genai.GenerativeModel("gemini-1.5-flash")
convo = model.start_chat()
convo.send_message("You are a transcription expert. Transcribe this audio accurately. Only respond with the transcription.")
response = convo.send_message({
'mime_type': audio.content_type,
'data': audio_bytes
})
transcription = response.text.strip()
# Translation using Gemini
translate_prompt = f"Translate the following to {language}:\n\n{transcription}"
translation = model.generate_content(translate_prompt).text.strip()
# TTS
if language in KOKORO_LANGUAGES:
lang_code = KOKORO_LANGUAGES[language]
pipeline = KPipeline(lang_code=lang_code)
generator = pipeline(translation, voice="af_heart", speed=1)
audio_segments = [audio for _, _, audio in generator if audio is not None]
if not audio_segments:
raise ValueError("No audio generated by Kokoro.")
final_audio = np.concatenate(audio_segments)
_, out_path = tempfile.mkstemp(suffix=".wav")
sf.write(out_path, final_audio, 24000)
else:
gtts_lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == language), 'en')
tts = gTTS(translation, lang=gtts_lang_code)
_, out_path = tempfile.mkstemp(suffix=".mp3")
tts.save(out_path)
return {
"transcription": transcription,
"translation": translation,
"audio_url": f"/download/{os.path.basename(out_path)}"
}
except Exception as e:
return JSONResponse(status_code=500, content={"error": str(e)})
@app.get("/download/{filename}")
async def download_audio(filename: str):
path = os.path.join(tempfile.gettempdir(), filename)
if not os.path.exists(path):
raise HTTPException(status_code=404, detail="File not found")
return FileResponse(path, media_type="audio/mpeg", filename=f"translated_{filename}")