Spaces:

orai-nlp
/

Sermas

Running

File size: 12,392 Bytes

import gradio as gr
import re
import os
import requests
import time
import soundfile as sf
import io

def hide_notice():
    return gr.update(visible=False)
def start_app():
    return gr.update(visible=False), gr.update(visible=True)

def audio_to_bytes(audio):
    data, sr = sf.read(audio)
    audio_bytes = io.BytesIO()
    sf.write(audio_bytes, data, sr, format='WAV')
    audio_bytes.seek(0)
    if len(data)/sr >= 60.0: return None
    return audio_bytes

def langswitch_API_call(audio, language):
    audio_bytes = audio_to_bytes(audio)
    if audio_bytes == None: return None
    files = {'file': (f'audio_chunk.wav', audio_bytes, 'audio/wav')}
    api_url = os.getenv("api_url")
    response = requests.post(f"{api_url}/online/http?language={language}", files=files)
    if response.status_code != 200:
        print(response)
        raise Exception("API error")
    return response.json()

def transcribe_base(audio, language):
    if not language: return "⚠️ Please select a language before transcribing."
    try:
        response = langswitch_API_call(audio, language)
        print(response)
        if response == None: return "⚠️ Audio file too long! Audio file should be shorter than 1 minute."
    except Exception as e:
        if "Invalid file: None" in str(e):
            return "⚠️ Check that you have clicked the stop button or wait for the audio to load completely."
    transcription = response["transcription"]
    is_new_speaker = response["is_new_speaker"]
    speaker = response["classified_speaker"]
    if is_new_speaker:
        speaker_class_string = f'New speaker detected. Assigned new ID {speaker}'
    else:
        speaker_class_string = f'Speaker found in database, ID {speaker}'
    return transcription#, speaker_class_string

def fix_italian_transcription(transcription):
    no_elision_cases = {
        "un autore", "un artista", "un uomo", "un amico", "un imperatore",
        "uno studente", "uno psicologo", "uno zio",
        "di autore", "a uomo", "su imperatore", "con amico", "per artista"
    }
    
    transcription = re.sub(r"\b(un|l|d|s|t|m|c|n|quest|all|dall|dell|nell|sull|coll|pell|dov)\s+(?=[aeiouhàèìòùáéíóú])", r"\1'", transcription)
    transcription = re.sub(r"\b(s|t|m|c|n)\s+(?=è\b|ha\b|hanno\b)", r"\1'", transcription)
    transcription = re.sub(r"\bpo\b", "po'", transcription)
    transcription = re.sub(r"\b(senz) ([aeiou])", r"\1'\2", transcription)
    transcription = transcription.replace("anch io", "anch'io")
    
    pattern_numbers = r"\b(trent|quarant|cinquant|sessant|settant|ottant|novant)\s+([aeiouàèìòù])"
    replacement_numbers = lambda m: m.group(1) + "'" + m.group(2)
    transcription = re.sub(pattern_numbers, replacement_numbers, transcription)
    
    for phrase in no_elision_cases:
        fixed = phrase.replace(" ", "'")
        transcription = transcription.replace(fixed, phrase)
    
    return transcription

def transcribe_mic(audio_microphone, language):
    print("Transcription microphone")
    transcription = transcribe_base(audio_microphone, language)
    if language=="it":
        transcription = fix_italian_transcription(transcription)
    elif language == "fr":
        transcription = re.sub(r"mètres ([aeiouáéíóúàèìòùhHAEIOUÁÉÍÓÚÀÈÌÒÙ])", r"m'\1", transcription)
    return transcription
    #return transcribe_base(audio_microphone, language)

def transcribe_file(audio_upload, language):
    print("Transcription local file")
    transcription = transcribe_base(audio_upload, language)
    if language=="it":
        transcription = fix_italian_transcription(transcription)
    elif language == "fr":
        transcription = re.sub(r"mètres ([aeiouáéíóúàèìòùhHAEIOUÁÉÍÓÚÀÈÌÒÙ])", r"m'\1", transcription)
    return transcribe_base(audio_upload, language)


css_content = """
#intro-text {
    font-size: 2.0rem;
    line-height: 1.6;
    text-align: center;
    color: #333;
}

#ok-button {
    background-color: #4CAF50; /* green */
    color: white;
    padding: 10px 20px;
    border-radius: 8px;
    margin-top: 20px;
    border: none;
    font-weight: bold;
    cursor: pointer;
    font-size: 1rem;
    transition: background-color 0.3s ease;
}

#ok-button:hover {
    background-color: #388E3C;
}


/*
.popup-button:hover {
    background-color: #3c4687 !important;
}
/*

/*
.gradio-container{
    padding: 0 !important;
}
.html-container{
    padding: 0 !important;
}
*/
#orai-info{
    padding: 50px;
    text-align: center;
    font-size: 1rem;
    background: url('https://elia.eus/static/elhuyar/img/landing_page/ig.webp') rgba(0,0,0,0.8);
    background-repeat: no-repeat;
    background-position: center center;
    background-size: cover;
    background-blend-mode: multiply;
}
#orai-info-text p{
    color: white !important;
}
/*
#orai-info img{
    margin: auto;
    display: block;
    margin-bottom: 1rem;
}*/
.bold{
    font-weight: bold;
    color: inherit !important;
}
footer{
    display:none !important
}

.logos{
    display: flex;
    justify-content: center;
}
.sermas-logo{
    display: flex;
    align-items: center;
    margin-right: 3rem;
}
.sermas-logo span{
    color: white !important;
    font-size: 2.5rem;
    font-family: Verdana, Geneva, sans-serif !important;
    font-weight: bold;
}

.text-elhuyar{
    color: #0045e7;
}

#header{
    padding: 50px;
    padding-top: 30px;
    background-color: #5b65a7;
}
#header h1,h3{
    color: white;
}

button.primary{
    background-color: #5b65a7;
}
button.primary:hover{
    background-color: #3c4687;
}

button.selected{
    color: #5b65a7 !important;
}
button.selected::after{
    background-color: #5b65a7;
}

.record-button::before{
    background: #E50914;
}
"""




demo = gr.Blocks(css=css_content) #, fill_width=True)
with demo:

    intro = gr.Column(visible=True, elem_id="intro-message")
    app_block = gr.Column(visible=False)
    
    with intro:
        gr.Markdown("Demoa probatzeko epea amaitu da. Eskerrik asko parte-hartzeagatik!<br><br>El periodo de prueba de la demo ha concluido. ¡Muchas gracias por tu participación!<br><br>The testing period of the demo has ended. Thank you very much for your participation!", elem_id=["intro-text"])
        gr.HTML("""
        <div id="orai-info">
            <div class="logos">
                <div class="sermas-logo">
                    <img src="https://sermasproject.eu/wp-content/uploads/2023/04/sermas-logo.png" width=100/>
                    <span>SERMAS</span>
                </div>
                <img src="https://www.orai.eus/themes/custom/orai_for_drupal9/orai_bw.svg" width=175/>
            </div>
            <div id="orai-info-text">
                <p>The <span class="bold">LANGSWITCH</span> sub-project is part of the Open Call 1 of the <span class="bold">SERMAS</span> project. The goal of the <span class="bold">SERMAS</span> project is to provide socially-acceptable extended reality models and systems.</p>
                <p>The technology powering LANGSWITCH was developed by <span class="bold">Orai NLP Teknologiak</span></p>
                <p><span class="bold">Orai NLP Teknologiak</span> specializes in research, development, and innovation in artificial intelligence, with a focus on fostering a more competitive industrial and business landscape, enhancing public administration efficiency, and promoting a more inclusive society.</p>
            </div>
        </div>
        <p>""")
        '''
        gr.Markdown("""
        <span style="display:inline-flex; align-items:center;">
        <img src="https://upload.wikimedia.org/wikipedia/commons/2/2d/Flag_of_the_Basque_Country.svg" alt="Ikurriña" style="width: 1.2em; vertical-align: middle; margin-right: 0.4em;">
        Ongi etorri LANGSWITCH-en demora, Orai NLP Teknologiak garatutako hizketa-ezagutzaile eleanitza!  
        </span> <br>
        Grabatu esaldi motz gutxi batzuk euskaraz, gazteleraz, ingelesez, frantsesez edo italieraz eta bidali transkribatzera.
        <br>
        Grabaketak <strong>automatikoki ezabatzen dira</strong> eta ez dira gordeko.
        <br>
        
        🇪🇸 ¡Bienvenida/o a la demo de LANGSWITCH, el sistema de reconocimiento automático del habla multilingüe desarrollado por Orai NLP Teknologiak!
        <br>
        Graba unas pocas frases cortas en euskera, castellano, inglés, francés o italiano y envíalos a transcribir.
        <br>
        Las grabaciones se <strong>eliminan automáticamente</strong> y no serán guardados.
        <br>

        🇬🇧 Welcome to the LANGSWITCH demo, the multilingual Automatic Speech Recognition system developed by Orai NLP Teknologiak!
        <br>
        Record a few short sentences in Basque, Spanish, English, French or Italian and submit them for their transcription.
        <br>
        The recordings are <strong>automatically removed</strong> and will not be saved.
        <br>
        """, elem_id=["intro-text"])
        
        ok_button = gr.Button("OK", elem_id="ok-button")
        ok_button.click(fn=start_app, outputs=[intro, app_block])
        '''
    '''
    with app_block:

        gr.HTML("""
    <div id="header">
        <h1>LANGSWITCH</h1>
        <h3>Multilingual Automatic Speech Recognition in noisy environments</h3>
    </div>
    """)
    
        with gr.Tab("Transcribe microphone"):
            iface = gr.Interface(
                fn=transcribe_mic,
                inputs=[
                    gr.Audio(sources="microphone", type="filepath"),
                    gr.Dropdown(label="Language", choices=[("English", "en"),
                                         ("Euskara", "eu"),
                                         ("Español", "es"),
                                         ("Français", "fr"),
                                         ("Italiano", "it")],
                                value="en")
                ],
                outputs=[
                    gr.Textbox(label="Transcription", autoscroll=False),
                    #gr.Textbox(label="Speaker Identification", autoscroll=False)
                ],
                allow_flagging="never",
            )
    
        with gr.Tab("Transcribe local file"):
            iface = gr.Interface(
                fn=transcribe_file,
                inputs=[
                    gr.Audio(sources="upload", type="filepath"),
                    gr.Dropdown(choices=[("English", "en"),
                                         ("Euskara", "eu"),
                                         ("Español", "es"),
                                         ("Français", "fr"),
                                         ("Italiano", "it")],
                                value="en")
                ],
                outputs=[
                    gr.Textbox(label="Transcription", autoscroll=False),
                    #gr.Textbox(label="Speaker Identification", autoscroll=False)
                ],
                allow_flagging="never",
            )

        
        gr.HTML("""
    <div id="orai-info">
        <div class="logos">
            <div class="sermas-logo">
                <img src="https://sermasproject.eu/wp-content/uploads/2023/04/sermas-logo.png" width=100/>
                <span>SERMAS</span>
            </div>
            <img src="https://www.orai.eus/themes/custom/orai_for_drupal9/orai_bw.svg" width=175/>
        </div>
        <div id="orai-info-text">
            <p>The <span class="bold">LANGSWITCH</span> sub-project is part of the Open Call 1 of the <span class="bold">SERMAS</span> project. The goal of the <span class="bold">SERMAS</span> project is to provide socially-acceptable extended reality models and systems.</p>
            <p>The technology powering LANGSWITCH was developed by <span class="bold">Orai NLP Teknologiak</span></p>
            <p><span class="bold">Orai NLP Teknologiak</span> specializes in research, development, and innovation in artificial intelligence, with a focus on fostering a more competitive industrial and business landscape, enhancing public administration efficiency, and promoting a more inclusive society.</p>
        </div>
    </div>
    <p>""")
    '''
    
demo.queue(max_size=1)
#demo.launch(share=False, max_threads=3, auth=(os.getenv("username"), os.getenv("password")), auth_message="Please provide a username and a password.")
demo.launch(share=False, max_threads=3)