Spaces:
Running
Running
import gradio as gr | |
import re | |
import os | |
import requests | |
import time | |
import soundfile as sf | |
import io | |
def hide_notice(): | |
return gr.update(visible=False) | |
def start_app(): | |
return gr.update(visible=False), gr.update(visible=True) | |
def audio_to_bytes(audio): | |
data, sr = sf.read(audio) | |
audio_bytes = io.BytesIO() | |
sf.write(audio_bytes, data, sr, format='WAV') | |
audio_bytes.seek(0) | |
if len(data)/sr >= 60.0: return None | |
return audio_bytes | |
def langswitch_API_call(audio, language): | |
audio_bytes = audio_to_bytes(audio) | |
if audio_bytes == None: return None | |
files = {'file': (f'audio_chunk.wav', audio_bytes, 'audio/wav')} | |
api_url = os.getenv("api_url") | |
response = requests.post(f"{api_url}/online/http?language={language}", files=files) | |
if response.status_code != 200: | |
print(response) | |
raise Exception("API error") | |
return response.json() | |
def transcribe_base(audio, language): | |
if not language: return "⚠️ Please select a language before transcribing." | |
try: | |
response = langswitch_API_call(audio, language) | |
print(response) | |
if response == None: return "⚠️ Audio file too long! Audio file should be shorter than 1 minute." | |
except Exception as e: | |
if "Invalid file: None" in str(e): | |
return "⚠️ Check that you have clicked the stop button or wait for the audio to load completely." | |
transcription = response["transcription"] | |
is_new_speaker = response["is_new_speaker"] | |
speaker = response["classified_speaker"] | |
if is_new_speaker: | |
speaker_class_string = f'New speaker detected. Assigned new ID {speaker}' | |
else: | |
speaker_class_string = f'Speaker found in database, ID {speaker}' | |
return transcription#, speaker_class_string | |
def fix_italian_transcription(transcription): | |
no_elision_cases = { | |
"un autore", "un artista", "un uomo", "un amico", "un imperatore", | |
"uno studente", "uno psicologo", "uno zio", | |
"di autore", "a uomo", "su imperatore", "con amico", "per artista" | |
} | |
transcription = re.sub(r"\b(un|l|d|s|t|m|c|n|quest|all|dall|dell|nell|sull|coll|pell|dov)\s+(?=[aeiouhàèìòùáéíóú])", r"\1'", transcription) | |
transcription = re.sub(r"\b(s|t|m|c|n)\s+(?=è\b|ha\b|hanno\b)", r"\1'", transcription) | |
transcription = re.sub(r"\bpo\b", "po'", transcription) | |
transcription = re.sub(r"\b(senz) ([aeiou])", r"\1'\2", transcription) | |
transcription = transcription.replace("anch io", "anch'io") | |
pattern_numbers = r"\b(trent|quarant|cinquant|sessant|settant|ottant|novant)\s+([aeiouàèìòù])" | |
replacement_numbers = lambda m: m.group(1) + "'" + m.group(2) | |
transcription = re.sub(pattern_numbers, replacement_numbers, transcription) | |
for phrase in no_elision_cases: | |
fixed = phrase.replace(" ", "'") | |
transcription = transcription.replace(fixed, phrase) | |
return transcription | |
def transcribe_mic(audio_microphone, language): | |
print("Transcription microphone") | |
transcription = transcribe_base(audio_microphone, language) | |
if language=="it": | |
transcription = fix_italian_transcription(transcription) | |
elif language == "fr": | |
transcription = re.sub(r"mètres ([aeiouáéíóúàèìòùhHAEIOUÁÉÍÓÚÀÈÌÒÙ])", r"m'\1", transcription) | |
return transcription | |
#return transcribe_base(audio_microphone, language) | |
def transcribe_file(audio_upload, language): | |
print("Transcription local file") | |
transcription = transcribe_base(audio_upload, language) | |
if language=="it": | |
transcription = fix_italian_transcription(transcription) | |
elif language == "fr": | |
transcription = re.sub(r"mètres ([aeiouáéíóúàèìòùhHAEIOUÁÉÍÓÚÀÈÌÒÙ])", r"m'\1", transcription) | |
return transcribe_base(audio_upload, language) | |
css_content = """ | |
#intro-text { | |
font-size: 2.0rem; | |
line-height: 1.6; | |
text-align: center; | |
color: #333; | |
} | |
#ok-button { | |
background-color: #4CAF50; /* green */ | |
color: white; | |
padding: 10px 20px; | |
border-radius: 8px; | |
margin-top: 20px; | |
border: none; | |
font-weight: bold; | |
cursor: pointer; | |
font-size: 1rem; | |
transition: background-color 0.3s ease; | |
} | |
#ok-button:hover { | |
background-color: #388E3C; | |
} | |
/* | |
.popup-button:hover { | |
background-color: #3c4687 !important; | |
} | |
/* | |
/* | |
.gradio-container{ | |
padding: 0 !important; | |
} | |
.html-container{ | |
padding: 0 !important; | |
} | |
*/ | |
#orai-info{ | |
padding: 50px; | |
text-align: center; | |
font-size: 1rem; | |
background: url('https://elia.eus/static/elhuyar/img/landing_page/ig.webp') rgba(0,0,0,0.8); | |
background-repeat: no-repeat; | |
background-position: center center; | |
background-size: cover; | |
background-blend-mode: multiply; | |
} | |
#orai-info-text p{ | |
color: white !important; | |
} | |
/* | |
#orai-info img{ | |
margin: auto; | |
display: block; | |
margin-bottom: 1rem; | |
}*/ | |
.bold{ | |
font-weight: bold; | |
color: inherit !important; | |
} | |
footer{ | |
display:none !important | |
} | |
.logos{ | |
display: flex; | |
justify-content: center; | |
} | |
.sermas-logo{ | |
display: flex; | |
align-items: center; | |
margin-right: 3rem; | |
} | |
.sermas-logo span{ | |
color: white !important; | |
font-size: 2.5rem; | |
font-family: Verdana, Geneva, sans-serif !important; | |
font-weight: bold; | |
} | |
.text-elhuyar{ | |
color: #0045e7; | |
} | |
#header{ | |
padding: 50px; | |
padding-top: 30px; | |
background-color: #5b65a7; | |
} | |
#header h1,h3{ | |
color: white; | |
} | |
button.primary{ | |
background-color: #5b65a7; | |
} | |
button.primary:hover{ | |
background-color: #3c4687; | |
} | |
button.selected{ | |
color: #5b65a7 !important; | |
} | |
button.selected::after{ | |
background-color: #5b65a7; | |
} | |
.record-button::before{ | |
background: #E50914; | |
} | |
""" | |
demo = gr.Blocks(css=css_content) #, fill_width=True) | |
with demo: | |
intro = gr.Column(visible=True, elem_id="intro-message") | |
app_block = gr.Column(visible=False) | |
with intro: | |
gr.Markdown("Demoa probatzeko epea amaitu da. Eskerrik asko parte-hartzeagatik!<br><br>El periodo de prueba de la demo ha concluido. ¡Muchas gracias por tu participación!<br><br>The testing period of the demo has ended. Thank you very much for your participation!", elem_id=["intro-text"]) | |
gr.HTML(""" | |
<div id="orai-info"> | |
<div class="logos"> | |
<div class="sermas-logo"> | |
<img src="https://sermasproject.eu/wp-content/uploads/2023/04/sermas-logo.png" width=100/> | |
<span>SERMAS</span> | |
</div> | |
<img src="https://www.orai.eus/themes/custom/orai_for_drupal9/orai_bw.svg" width=175/> | |
</div> | |
<div id="orai-info-text"> | |
<p>The <span class="bold">LANGSWITCH</span> sub-project is part of the Open Call 1 of the <span class="bold">SERMAS</span> project. The goal of the <span class="bold">SERMAS</span> project is to provide socially-acceptable extended reality models and systems.</p> | |
<p>The technology powering LANGSWITCH was developed by <span class="bold">Orai NLP Teknologiak</span></p> | |
<p><span class="bold">Orai NLP Teknologiak</span> specializes in research, development, and innovation in artificial intelligence, with a focus on fostering a more competitive industrial and business landscape, enhancing public administration efficiency, and promoting a more inclusive society.</p> | |
</div> | |
</div> | |
<p>""") | |
''' | |
gr.Markdown(""" | |
<span style="display:inline-flex; align-items:center;"> | |
<img src="https://upload.wikimedia.org/wikipedia/commons/2/2d/Flag_of_the_Basque_Country.svg" alt="Ikurriña" style="width: 1.2em; vertical-align: middle; margin-right: 0.4em;"> | |
Ongi etorri LANGSWITCH-en demora, Orai NLP Teknologiak garatutako hizketa-ezagutzaile eleanitza! | |
</span> <br> | |
Grabatu esaldi motz gutxi batzuk euskaraz, gazteleraz, ingelesez, frantsesez edo italieraz eta bidali transkribatzera. | |
<br> | |
Grabaketak <strong>automatikoki ezabatzen dira</strong> eta ez dira gordeko. | |
<br> | |
🇪🇸 ¡Bienvenida/o a la demo de LANGSWITCH, el sistema de reconocimiento automático del habla multilingüe desarrollado por Orai NLP Teknologiak! | |
<br> | |
Graba unas pocas frases cortas en euskera, castellano, inglés, francés o italiano y envíalos a transcribir. | |
<br> | |
Las grabaciones se <strong>eliminan automáticamente</strong> y no serán guardados. | |
<br> | |
🇬🇧 Welcome to the LANGSWITCH demo, the multilingual Automatic Speech Recognition system developed by Orai NLP Teknologiak! | |
<br> | |
Record a few short sentences in Basque, Spanish, English, French or Italian and submit them for their transcription. | |
<br> | |
The recordings are <strong>automatically removed</strong> and will not be saved. | |
<br> | |
""", elem_id=["intro-text"]) | |
ok_button = gr.Button("OK", elem_id="ok-button") | |
ok_button.click(fn=start_app, outputs=[intro, app_block]) | |
''' | |
''' | |
with app_block: | |
gr.HTML(""" | |
<div id="header"> | |
<h1>LANGSWITCH</h1> | |
<h3>Multilingual Automatic Speech Recognition in noisy environments</h3> | |
</div> | |
""") | |
with gr.Tab("Transcribe microphone"): | |
iface = gr.Interface( | |
fn=transcribe_mic, | |
inputs=[ | |
gr.Audio(sources="microphone", type="filepath"), | |
gr.Dropdown(label="Language", choices=[("English", "en"), | |
("Euskara", "eu"), | |
("Español", "es"), | |
("Français", "fr"), | |
("Italiano", "it")], | |
value="en") | |
], | |
outputs=[ | |
gr.Textbox(label="Transcription", autoscroll=False), | |
#gr.Textbox(label="Speaker Identification", autoscroll=False) | |
], | |
allow_flagging="never", | |
) | |
with gr.Tab("Transcribe local file"): | |
iface = gr.Interface( | |
fn=transcribe_file, | |
inputs=[ | |
gr.Audio(sources="upload", type="filepath"), | |
gr.Dropdown(choices=[("English", "en"), | |
("Euskara", "eu"), | |
("Español", "es"), | |
("Français", "fr"), | |
("Italiano", "it")], | |
value="en") | |
], | |
outputs=[ | |
gr.Textbox(label="Transcription", autoscroll=False), | |
#gr.Textbox(label="Speaker Identification", autoscroll=False) | |
], | |
allow_flagging="never", | |
) | |
gr.HTML(""" | |
<div id="orai-info"> | |
<div class="logos"> | |
<div class="sermas-logo"> | |
<img src="https://sermasproject.eu/wp-content/uploads/2023/04/sermas-logo.png" width=100/> | |
<span>SERMAS</span> | |
</div> | |
<img src="https://www.orai.eus/themes/custom/orai_for_drupal9/orai_bw.svg" width=175/> | |
</div> | |
<div id="orai-info-text"> | |
<p>The <span class="bold">LANGSWITCH</span> sub-project is part of the Open Call 1 of the <span class="bold">SERMAS</span> project. The goal of the <span class="bold">SERMAS</span> project is to provide socially-acceptable extended reality models and systems.</p> | |
<p>The technology powering LANGSWITCH was developed by <span class="bold">Orai NLP Teknologiak</span></p> | |
<p><span class="bold">Orai NLP Teknologiak</span> specializes in research, development, and innovation in artificial intelligence, with a focus on fostering a more competitive industrial and business landscape, enhancing public administration efficiency, and promoting a more inclusive society.</p> | |
</div> | |
</div> | |
<p>""") | |
''' | |
demo.queue(max_size=1) | |
#demo.launch(share=False, max_threads=3, auth=(os.getenv("username"), os.getenv("password")), auth_message="Please provide a username and a password.") | |
demo.launch(share=False, max_threads=3) |