sk007msd commited on
Commit
5715904
·
verified ·
1 Parent(s): 07b05e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -54
app.py CHANGED
@@ -1,93 +1,95 @@
1
  import os
2
- import time
3
  import requests
4
  import gradio as gr
 
 
5
  from huggingface_hub import login
6
  from dotenv import load_dotenv
7
 
8
  load_dotenv()
9
  image_API="https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2"
10
- translation_API="https://api-inference.huggingface.co/models/facebook/nllb-200-distilled-1.3B"
11
- whisper_API="https://api-inference.huggingface.co/models/openai/whisper-large-v3"
12
- txt_API="https://api-inference.huggingface.co/models/deepseek-ai/DeepSeek-R1"
13
- HF_TOKEN=os.getenv("HF_TOKEN")
14
  login(HF_TOKEN)
15
  headers={"Authorization":f"Bearer {HF_TOKEN}"}
16
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- def query_whisper(audio_path):
19
  with open(audio_path,"rb") as audio:
20
  data=audio.read()
21
- response=requests.post(whisper_API,headers=headers,data=data)
22
- return response.json()
23
 
24
- def query_translation(text):
25
  max_retries=5
26
  delay=10
27
  payload={"inputs":text,"parameters":{"src_lang":"ta_Taml","tgt_lang":'eng_Latn'}}
28
- for attempt in range(max_retries):
29
- response=requests.post(translation_API,headers=headers,json=payload)
30
- result= response.json()
31
- if isinstance(result,list) and len(result)>0:
32
- return {"translated_text":result[0]["translation_text"]}
33
- elif isinstance(result,dict) and "translation_text" in result:
34
- return {"translated_text":result["translation_text"]}
35
- time.sleep(delay)
36
- return {"error":"Translation API failed","response":result}
37
 
38
- def query_image(prompt):
39
- max_retries=5
40
- delay=10
41
  payload={"inputs":prompt}
42
- for attempt in range(max_retries):
43
- response=requests.post(image_API,headers=headers,json=payload)
44
- if response.status_code == 200 and response.headers.get('Content-Type', '').startswith('image/'):
45
- image_path='generated_image.png'
46
- with open(image_path,'wb') as f:
47
- f.write(response.content)
48
- return image_path
49
- time.sleep(delay)
50
  return None
51
 
52
- def query_text_generation(prompt):
53
- max_retries=5
54
- delay=10
55
  payload={"inputs":f"give me a short story about {prompt}"}
56
- for attempt in range(max_retries):
57
- response=requests.post(txt_API,headers=headers,json=payload)
58
- if response.status_code == 200:
59
- result=response.json()
60
- if isinstance(result,list) and len(result)>0:
61
- return result[0].get("generated_text","Text Generation Error")
62
- elif isinstance(result,dict) and "generated_text" in result:
63
- return result["generated_text"]
64
- print(f"⚠️ Text generation failed, retrying in {delay} seconds...")
65
- time.sleep(delay)
66
  return None
67
 
68
- def process_audio(audio_path):
 
 
 
69
  if not audio_path:
70
- return None
71
  try:
72
- transcription=query_whisper(audio_path)
73
  tamil_text=transcription.get("text","Transcription error")
74
- translation=query_translation(tamil_text)
75
- translated_text=translation.get("translated_text","Translation error")
76
- image_path=query_image(translated_text)
77
- story=query_text_generation(translated_text)
78
- return tamil_text,translated_text,image_path
 
 
79
  except Exception as e:
80
  return None,str(e),None
81
 
82
  iface=gr.Interface(
83
- fn=process_audio,
84
  inputs=gr.Audio(type="filepath",label="Upload Audio"),
85
  outputs=[
86
  gr.Textbox(label="Tamil Text"),
87
  gr.Textbox(label="English Translation"),
88
- gr.Image(label="Generated Image"),
89
- gr.Textbox(label="Text")
90
-
91
  ],
92
  title="Speech-to-Image Generation"
93
  )
 
1
  import os
 
2
  import requests
3
  import gradio as gr
4
+ import asyncio
5
+ import aiohttp
6
  from huggingface_hub import login
7
  from dotenv import load_dotenv
8
 
9
  load_dotenv()
10
  image_API="https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2"
11
+ translation_API="https://api-inference.huggingface.co/models/facebook/nllb-200-distilled-600M"
12
+ whisper_API="https://api-inference.huggingface.co/models/openai/whisper-medium"
13
+ txt_API="https://api-inference.huggingface.co/models/openai-community/gpt2"
14
+ HF_TOKEN=os.getenv("HF_Token")
15
  login(HF_TOKEN)
16
  headers={"Authorization":f"Bearer {HF_TOKEN}"}
17
 
18
+ async def query_api(url, payload=None, data=None):
19
+ async with aiohttp.ClientSession() as session:
20
+ for attempts in range(5):
21
+ try:
22
+ async with session.post(url, headers=headers, json=payload, data=data) as response:
23
+ if response.status == 200:
24
+ return await response.json()
25
+ print("API Error {response.status}, retrying....")
26
+ except Exception as e:
27
+ print(f"Error: {e}")
28
+ await asyncio.sleep(10)
29
+ return None
30
 
31
+ async def query_whisper(audio_path):
32
  with open(audio_path,"rb") as audio:
33
  data=audio.read()
34
+ return await query_api(whisper_API,data=data)
 
35
 
36
+ async def query_translation(text):
37
  max_retries=5
38
  delay=10
39
  payload={"inputs":text,"parameters":{"src_lang":"ta_Taml","tgt_lang":'eng_Latn'}}
40
+ response=await query_api(translation_API,json=payload)
41
+ if response and isinstance(response,list) and len(response)>0:
42
+ return response[0]["translation_text"]
43
+ elif response and "translation_text" in resopnse:
44
+ return response["translation_text"]
45
+ return "Translation Error"
 
 
 
46
 
47
+ async def query_image(prompt):
 
 
48
  payload={"inputs":prompt}
49
+ response=await query_api(image_API,json=payload)
50
+ if response:
51
+ image_path='generated_image.png'
52
+ with open(image_path,'wb') as f:
53
+ f.write(response.content)
54
+ return image_path
 
 
55
  return None
56
 
57
+ async def query_text_generation(prompt):
 
 
58
  payload={"inputs":f"give me a short story about {prompt}"}
59
+ response=await query_api(txt_API,json=payload)
60
+ if response and isinstance(response, list) and len(response) > 0:
61
+ return response[0].get("generated_text", "Text Generation Error")
62
+ elif response and "generated_text" in response:
63
+ return response["generated_text"]
 
 
 
 
 
64
  return None
65
 
66
+
67
+
68
+
69
+ async def process_audio(audio_path):
70
  if not audio_path:
71
+ return None,"Audio not provided",None,None
72
  try:
73
+ transcription=await query_whisper(audio_path)
74
  tamil_text=transcription.get("text","Transcription error")
75
+
76
+ translation_task=query_translation(tamil_text)
77
+ image_task=query_image(translated_text)
78
+ story_task=query_text_generation(translated_text)
79
+
80
+ translated_text,image_path,story=await asyncio.gather(translation_task,image_task,story_task)
81
+ return tamil_text,translated_text,image_path,story
82
  except Exception as e:
83
  return None,str(e),None
84
 
85
  iface=gr.Interface(
86
+ fn=lambda x:asyncio.run(process_audio(x)),
87
  inputs=gr.Audio(type="filepath",label="Upload Audio"),
88
  outputs=[
89
  gr.Textbox(label="Tamil Text"),
90
  gr.Textbox(label="English Translation"),
91
+ gr.Image(label="Generated Image")
92
+ gr.Textbox(label="Story")
 
93
  ],
94
  title="Speech-to-Image Generation"
95
  )