sk007msd commited on
Commit
a479fd5
·
verified ·
1 Parent(s): 389cdcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -61
app.py CHANGED
@@ -8,67 +8,7 @@ from dotenv import load_dotenv
8
  load_dotenv()
9
  image_API="https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2"
10
  translation_API="https://api-inference.huggingface.co/models/facebook/nllb-200-distilled-1.3B"
11
- whisper_API="https://api-inference.huggingface.co/models/openai/whisper-large-v3"
12
-
13
- HF_TOKEN=os.getenv("HF_Token")
14
- login(HF_TOKEN)
15
- headers={"Authorization":f"Bearer {HF_TOKEN}"}
16
-
17
-
18
- def query_whisper(audio_path):
19
- with open(audio_path,"rb") as audio:
20
- data=audio.read()
21
- response=requests.post(whisper_API,headers=headers,data=data)
22
- return response.json()
23
-
24
- def query_translation(text):
25
- max_retries=5
26
- delay=10
27
- payload={"inputs":text,"parameters":{"src_lang":"ta_Taml","tgt_lang":}}
28
- for attempt in range(max_retries):
29
- response=requests.post(translation_API,headers=headers,json=payload)
30
- result= response.json()
31
- if isinstance(result,list) and len(result)>0:
32
- return {"translated_text":result[0]["translation_text"]}
33
- elif isinstance(result,dict) and "translation_text" in result:
34
- return {"translated_text":result["translation_text"]}
35
- time.sleep(delay)
36
- return {"error":"Translation API failed","response":result}
37
-
38
- def query_image(prompt):
39
- max_retries=5
40
- delay=10
41
- payload={"inputs":prompt}
42
- for attempt in range(max_retries):
43
- response=requests.post(image_API,headers=headers,json=payload)
44
- if response.status_code==200 and response.get.headers('Content-Type','').startswith('image/'):
45
- image_path='generated_image.png'
46
- with open(image_path,'wb') as f:
47
- f.write(response.content)
48
- return image_path
49
- time.sleep(delay)
50
- return None
51
-
52
- def process_audio(audio_path):
53
- if not audio_path:
54
- return None
55
- try:
56
- transcription=query_whisper(audio_path)
57
- tamil_text=transcription.get("text","Transcription error")
58
- translation=query_translation(tamil_text)
59
- translated_text=translation.get("translated_text","Translation error")
60
- image_path=query_image(translated_text)
61
- return tamil_text,translated_text,image_path
62
- except Exception as e:
63
- return None,str(e),None
64
-
65
- iface=gradio.Interface(
66
- fn=process_audio,
67
- inputs=gradio.Audio(type="filepath",label="Upload Audio"),
68
- outputs=[
69
- gradio.Textbox(label="Tamil Text"),
70
- gradio.Textbox(label="English Translation"),
71
- gradio.Image(label="Generated Image")
72
  ],
73
  title= title="Speech-to-Image Generation"
74
  )
 
8
  load_dotenv()
9
  image_API="https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2"
10
  translation_API="https://api-inference.huggingface.co/models/facebook/nllb-200-distilled-1.3B"
11
+ … gradio.Image(label="Generated Image")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  ],
13
  title= title="Speech-to-Image Generation"
14
  )