Spaces:

sk007msd
/

Audio_to_image

Sleeping

App Files Files Community

sk007msd commited on Mar 5

Commit

a479fd5

verified ·

1 Parent(s): 389cdcb

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -61

app.py CHANGED Viewed

@@ -8,67 +8,7 @@ from dotenv import load_dotenv
 load_dotenv()
 image_API="https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2"
 translation_API="https://api-inference.huggingface.co/models/facebook/nllb-200-distilled-1.3B"
-whisper_API="https://api-inference.huggingface.co/models/openai/whisper-large-v3"
-HF_TOKEN=os.getenv("HF_Token")
-login(HF_TOKEN)
-headers={"Authorization":f"Bearer {HF_TOKEN}"}
-def query_whisper(audio_path):
-  with open(audio_path,"rb") as audio:
-    data=audio.read()
-    response=requests.post(whisper_API,headers=headers,data=data)
-    return response.json()
-def query_translation(text):
-  max_retries=5
-  delay=10
-  payload={"inputs":text,"parameters":{"src_lang":"ta_Taml","tgt_lang":}}
-  for attempt in range(max_retries):
-    response=requests.post(translation_API,headers=headers,json=payload)
-    result= response.json()
-    if isinstance(result,list) and len(result)>0:
-      return {"translated_text":result[0]["translation_text"]}
-    elif isinstance(result,dict) and "translation_text" in result:
-      return {"translated_text":result["translation_text"]}
-    time.sleep(delay)
-  return {"error":"Translation API failed","response":result}
-def query_image(prompt):
-  max_retries=5
-  delay=10
-  payload={"inputs":prompt}
-  for attempt in range(max_retries):
-    response=requests.post(image_API,headers=headers,json=payload)
-    if response.status_code==200 and response.get.headers('Content-Type','').startswith('image/'):
-      image_path='generated_image.png'
-      with open(image_path,'wb') as f:
-        f.write(response.content)
-      return image_path
-    time.sleep(delay)
-  return None
-def process_audio(audio_path):
-  if not audio_path:
-    return None
-  try:
-    transcription=query_whisper(audio_path)
-    tamil_text=transcription.get("text","Transcription error")
-    translation=query_translation(tamil_text)
-    translated_text=translation.get("translated_text","Translation error")
-    image_path=query_image(translated_text)
-    return tamil_text,translated_text,image_path
-  except Exception as e:
-    return None,str(e),None
-iface=gradio.Interface(
-    fn=process_audio,
-    inputs=gradio.Audio(type="filepath",label="Upload Audio"),
-    outputs=[
-        gradio.Textbox(label="Tamil Text"),
-        gradio.Textbox(label="English Translation"),
-        gradio.Image(label="Generated Image")
     ],
     title= title="Speech-to-Image Generation"
 )

 load_dotenv()
 image_API="https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2"
 translation_API="https://api-inference.huggingface.co/models/facebook/nllb-200-distilled-1.3B"
+…        gradio.Image(label="Generated Image")
     ],
     title= title="Speech-to-Image Generation"
 )