sk007msd commited on
Commit
389cdcb
·
verified ·
1 Parent(s): 01b1ac8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py CHANGED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import requests
4
+ import gradio as gr
5
+ from huggingface_hub import login
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+ image_API="https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2"
10
+ translation_API="https://api-inference.huggingface.co/models/facebook/nllb-200-distilled-1.3B"
11
+ whisper_API="https://api-inference.huggingface.co/models/openai/whisper-large-v3"
12
+
13
+ HF_TOKEN=os.getenv("HF_Token")
14
+ login(HF_TOKEN)
15
+ headers={"Authorization":f"Bearer {HF_TOKEN}"}
16
+
17
+
18
+ def query_whisper(audio_path):
19
+ with open(audio_path,"rb") as audio:
20
+ data=audio.read()
21
+ response=requests.post(whisper_API,headers=headers,data=data)
22
+ return response.json()
23
+
24
+ def query_translation(text):
25
+ max_retries=5
26
+ delay=10
27
+ payload={"inputs":text,"parameters":{"src_lang":"ta_Taml","tgt_lang":}}
28
+ for attempt in range(max_retries):
29
+ response=requests.post(translation_API,headers=headers,json=payload)
30
+ result= response.json()
31
+ if isinstance(result,list) and len(result)>0:
32
+ return {"translated_text":result[0]["translation_text"]}
33
+ elif isinstance(result,dict) and "translation_text" in result:
34
+ return {"translated_text":result["translation_text"]}
35
+ time.sleep(delay)
36
+ return {"error":"Translation API failed","response":result}
37
+
38
+ def query_image(prompt):
39
+ max_retries=5
40
+ delay=10
41
+ payload={"inputs":prompt}
42
+ for attempt in range(max_retries):
43
+ response=requests.post(image_API,headers=headers,json=payload)
44
+ if response.status_code==200 and response.get.headers('Content-Type','').startswith('image/'):
45
+ image_path='generated_image.png'
46
+ with open(image_path,'wb') as f:
47
+ f.write(response.content)
48
+ return image_path
49
+ time.sleep(delay)
50
+ return None
51
+
52
+ def process_audio(audio_path):
53
+ if not audio_path:
54
+ return None
55
+ try:
56
+ transcription=query_whisper(audio_path)
57
+ tamil_text=transcription.get("text","Transcription error")
58
+ translation=query_translation(tamil_text)
59
+ translated_text=translation.get("translated_text","Translation error")
60
+ image_path=query_image(translated_text)
61
+ return tamil_text,translated_text,image_path
62
+ except Exception as e:
63
+ return None,str(e),None
64
+
65
+ iface=gradio.Interface(
66
+ fn=process_audio,
67
+ inputs=gradio.Audio(type="filepath",label="Upload Audio"),
68
+ outputs=[
69
+ gradio.Textbox(label="Tamil Text"),
70
+ gradio.Textbox(label="English Translation"),
71
+ gradio.Image(label="Generated Image")
72
+ ],
73
+ title= title="Speech-to-Image Generation"
74
+ )
75
+ iface.launch(share=True,server_name="0.0.0.0",server_port=7860)