import gradio as gr import re import os import requests import time import soundfile as sf import io def hide_notice(): return gr.update(visible=False) def start_app(): return gr.update(visible=False), gr.update(visible=True) def audio_to_bytes(audio): data, sr = sf.read(audio) audio_bytes = io.BytesIO() sf.write(audio_bytes, data, sr, format='WAV') audio_bytes.seek(0) if len(data)/sr >= 60.0: return None return audio_bytes def langswitch_API_call(audio, language): audio_bytes = audio_to_bytes(audio) if audio_bytes == None: return None files = {'file': (f'audio_chunk.wav', audio_bytes, 'audio/wav')} api_url = os.getenv("api_url") response = requests.post(f"{api_url}/online/http?language={language}", files=files) if response.status_code != 200: print(response) raise Exception("API error") return response.json() def transcribe_base(audio, language): if not language: return "⚠️ Please select a language before transcribing." try: response = langswitch_API_call(audio, language) print(response) if response == None: return "⚠️ Audio file too long! Audio file should be shorter than 1 minute." except Exception as e: if "Invalid file: None" in str(e): return "⚠️ Check that you have clicked the stop button or wait for the audio to load completely." transcription = response["transcription"] is_new_speaker = response["is_new_speaker"] speaker = response["classified_speaker"] if is_new_speaker: speaker_class_string = f'New speaker detected. Assigned new ID {speaker}' else: speaker_class_string = f'Speaker found in database, ID {speaker}' return transcription#, speaker_class_string def fix_italian_transcription(transcription): no_elision_cases = { "un autore", "un artista", "un uomo", "un amico", "un imperatore", "uno studente", "uno psicologo", "uno zio", "di autore", "a uomo", "su imperatore", "con amico", "per artista" } transcription = re.sub(r"\b(un|l|d|s|t|m|c|n|quest|all|dall|dell|nell|sull|coll|pell|dov)\s+(?=[aeiouhàèìòùáéíóú])", r"\1'", transcription) transcription = re.sub(r"\b(s|t|m|c|n)\s+(?=è\b|ha\b|hanno\b)", r"\1'", transcription) transcription = re.sub(r"\bpo\b", "po'", transcription) transcription = re.sub(r"\b(senz) ([aeiou])", r"\1'\2", transcription) transcription = transcription.replace("anch io", "anch'io") pattern_numbers = r"\b(trent|quarant|cinquant|sessant|settant|ottant|novant)\s+([aeiouàèìòù])" replacement_numbers = lambda m: m.group(1) + "'" + m.group(2) transcription = re.sub(pattern_numbers, replacement_numbers, transcription) for phrase in no_elision_cases: fixed = phrase.replace(" ", "'") transcription = transcription.replace(fixed, phrase) return transcription def transcribe_mic(audio_microphone, language): print("Transcription microphone") transcription = transcribe_base(audio_microphone, language) if language=="it": transcription = fix_italian_transcription(transcription) elif language == "fr": transcription = re.sub(r"mètres ([aeiouáéíóúàèìòùhHAEIOUÁÉÍÓÚÀÈÌÒÙ])", r"m'\1", transcription) return transcription #return transcribe_base(audio_microphone, language) def transcribe_file(audio_upload, language): print("Transcription local file") transcription = transcribe_base(audio_upload, language) if language=="it": transcription = fix_italian_transcription(transcription) elif language == "fr": transcription = re.sub(r"mètres ([aeiouáéíóúàèìòùhHAEIOUÁÉÍÓÚÀÈÌÒÙ])", r"m'\1", transcription) return transcribe_base(audio_upload, language) css_content = """ #intro-text { font-size: 2.0rem; line-height: 1.6; text-align: center; color: #333; } #ok-button { background-color: #4CAF50; /* green */ color: white; padding: 10px 20px; border-radius: 8px; margin-top: 20px; border: none; font-weight: bold; cursor: pointer; font-size: 1rem; transition: background-color 0.3s ease; } #ok-button:hover { background-color: #388E3C; } /* .popup-button:hover { background-color: #3c4687 !important; } /* /* .gradio-container{ padding: 0 !important; } .html-container{ padding: 0 !important; } */ #orai-info{ padding: 50px; text-align: center; font-size: 1rem; background: url('https://elia.eus/static/elhuyar/img/landing_page/ig.webp') rgba(0,0,0,0.8); background-repeat: no-repeat; background-position: center center; background-size: cover; background-blend-mode: multiply; } #orai-info-text p{ color: white !important; } /* #orai-info img{ margin: auto; display: block; margin-bottom: 1rem; }*/ .bold{ font-weight: bold; color: inherit !important; } footer{ display:none !important } .logos{ display: flex; justify-content: center; } .sermas-logo{ display: flex; align-items: center; margin-right: 3rem; } .sermas-logo span{ color: white !important; font-size: 2.5rem; font-family: Verdana, Geneva, sans-serif !important; font-weight: bold; } .text-elhuyar{ color: #0045e7; } #header{ padding: 50px; padding-top: 30px; background-color: #5b65a7; } #header h1,h3{ color: white; } button.primary{ background-color: #5b65a7; } button.primary:hover{ background-color: #3c4687; } button.selected{ color: #5b65a7 !important; } button.selected::after{ background-color: #5b65a7; } .record-button::before{ background: #E50914; } """ demo = gr.Blocks(css=css_content) #, fill_width=True) with demo: intro = gr.Column(visible=True, elem_id="intro-message") app_block = gr.Column(visible=False) with intro: gr.Markdown("Demoa probatzeko epea amaitu da. Eskerrik asko parte-hartzeagatik!

El periodo de prueba de la demo ha concluido. ¡Muchas gracias por tu participación!

The testing period of the demo has ended. Thank you very much for your participation!", elem_id=["intro-text"]) gr.HTML("""

The LANGSWITCH sub-project is part of the Open Call 1 of the SERMAS project. The goal of the SERMAS project is to provide socially-acceptable extended reality models and systems.

The technology powering LANGSWITCH was developed by Orai NLP Teknologiak

Orai NLP Teknologiak specializes in research, development, and innovation in artificial intelligence, with a focus on fostering a more competitive industrial and business landscape, enhancing public administration efficiency, and promoting a more inclusive society.

""") ''' gr.Markdown(""" Ikurriña Ongi etorri LANGSWITCH-en demora, Orai NLP Teknologiak garatutako hizketa-ezagutzaile eleanitza!
Grabatu esaldi motz gutxi batzuk euskaraz, gazteleraz, ingelesez, frantsesez edo italieraz eta bidali transkribatzera.
Grabaketak automatikoki ezabatzen dira eta ez dira gordeko.
🇪🇸 ¡Bienvenida/o a la demo de LANGSWITCH, el sistema de reconocimiento automático del habla multilingüe desarrollado por Orai NLP Teknologiak!
Graba unas pocas frases cortas en euskera, castellano, inglés, francés o italiano y envíalos a transcribir.
Las grabaciones se eliminan automáticamente y no serán guardados.
🇬🇧 Welcome to the LANGSWITCH demo, the multilingual Automatic Speech Recognition system developed by Orai NLP Teknologiak!
Record a few short sentences in Basque, Spanish, English, French or Italian and submit them for their transcription.
The recordings are automatically removed and will not be saved.
""", elem_id=["intro-text"]) ok_button = gr.Button("OK", elem_id="ok-button") ok_button.click(fn=start_app, outputs=[intro, app_block]) ''' ''' with app_block: gr.HTML("""

""") with gr.Tab("Transcribe microphone"): iface = gr.Interface( fn=transcribe_mic, inputs=[ gr.Audio(sources="microphone", type="filepath"), gr.Dropdown(label="Language", choices=[("English", "en"), ("Euskara", "eu"), ("Español", "es"), ("Français", "fr"), ("Italiano", "it")], value="en") ], outputs=[ gr.Textbox(label="Transcription", autoscroll=False), #gr.Textbox(label="Speaker Identification", autoscroll=False) ], allow_flagging="never", ) with gr.Tab("Transcribe local file"): iface = gr.Interface( fn=transcribe_file, inputs=[ gr.Audio(sources="upload", type="filepath"), gr.Dropdown(choices=[("English", "en"), ("Euskara", "eu"), ("Español", "es"), ("Français", "fr"), ("Italiano", "it")], value="en") ], outputs=[ gr.Textbox(label="Transcription", autoscroll=False), #gr.Textbox(label="Speaker Identification", autoscroll=False) ], allow_flagging="never", ) gr.HTML("""

The LANGSWITCH sub-project is part of the Open Call 1 of the SERMAS project. The goal of the SERMAS project is to provide socially-acceptable extended reality models and systems.

The technology powering LANGSWITCH was developed by Orai NLP Teknologiak

Orai NLP Teknologiak specializes in research, development, and innovation in artificial intelligence, with a focus on fostering a more competitive industrial and business landscape, enhancing public administration efficiency, and promoting a more inclusive society.

""") ''' demo.queue(max_size=1) #demo.launch(share=False, max_threads=3, auth=(os.getenv("username"), os.getenv("password")), auth_message="Please provide a username and a password.") demo.launch(share=False, max_threads=3)