Spaces:
Running
Running
import os | |
import time | |
import requests | |
import gradio as gr | |
from huggingface_hub import login | |
from dotenv import load_dotenv | |
load_dotenv() | |
image_API="https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2" | |
translation_API="https://api-inference.huggingface.co/models/facebook/nllb-200-distilled-1.3B" | |
whisper_API="https://api-inference.huggingface.co/models/openai/whisper-large-v3" | |
HF_TOKEN=os.getenv("HF_TOKEN") | |
login(HF_TOKEN) | |
headers={"Authorization":f"Bearer {HF_TOKEN}"} | |
def query_whisper(audio_path): | |
with open(audio_path,"rb") as audio: | |
data=audio.read() | |
response=requests.post(whisper_API,headers=headers,data=data) | |
return response.json() | |
def query_translation(text): | |
max_retries=5 | |
delay=10 | |
payload={"inputs":text,"parameters":{"src_lang":"ta_Taml","tgt_lang":'eng_Latn'}} | |
for attempt in range(max_retries): | |
response=requests.post(translation_API,headers=headers,json=payload) | |
result= response.json() | |
if isinstance(result,list) and len(result)>0: | |
return {"translated_text":result[0]["translation_text"]} | |
elif isinstance(result,dict) and "translation_text" in result: | |
return {"translated_text":result["translation_text"]} | |
time.sleep(delay) | |
return {"error":"Translation API failed","response":result} | |
def query_image(prompt): | |
max_retries=5 | |
delay=10 | |
payload={"inputs":prompt} | |
for attempt in range(max_retries): | |
response=requests.post(image_API,headers=headers,json=payload) | |
if response.status_code == 200 and response.headers.get('Content-Type', '').startswith('image/'): | |
image_path='generated_image.png' | |
with open(image_path,'wb') as f: | |
f.write(response.content) | |
return image_path | |
time.sleep(delay) | |
return None | |
def process_audio(audio_path): | |
if not audio_path: | |
return None | |
try: | |
transcription=query_whisper(audio_path) | |
tamil_text=transcription.get("text","Transcription error") | |
translation=query_translation(tamil_text) | |
translated_text=translation.get("translated_text","Translation error") | |
image_path=query_image(translated_text) | |
return tamil_text,translated_text,image_path | |
except Exception as e: | |
return None,str(e),None | |
iface=gr.Interface( | |
fn=process_audio, | |
inputs=gr.Audio(type="filepath",label="Upload Audio"), | |
outputs=[ | |
gr.Textbox(label="Tamil Text"), | |
gr.Textbox(label="English Translation"), | |
gr.Image(label="Generated Image") | |
], | |
title="Speech-to-Image Generation" | |
) | |
iface.launch(share=True,server_name="0.0.0.0",server_port=None) |