File size: 12,392 Bytes
594893f
 
 
 
 
5e48419
 
594893f
d2d7dc7
a247d56
2987692
 
4921a8c
5e48419
769531c
 
 
 
5b5c6c7
769531c
594893f
5e48419
 
3828e6f
5e48419
a96b890
 
a12adfc
 
 
5e48419
594893f
 
0994a24
b841d1b
 
 
c9f8542
b841d1b
 
6ef53dc
5e48419
 
 
 
 
 
 
cb2de80
594893f
eaf5bbe
 
 
 
 
 
e04575a
ebf21e6
eaf5bbe
 
 
963b5a3
e04575a
963b5a3
ebf21e6
eaf5bbe
e04575a
eaf5bbe
 
 
e04575a
143de70
eaf5bbe
5e48419
 
b915101
cb2de80
eaf5bbe
d4e2c45
c937cb1
48bba9a
cb2de80
a5fcb05
5e48419
 
d4e2c45
 
 
9c972ac
c937cb1
5e48419
594893f
 
5e48419
7410556
6eab893
5a68f50
 
 
 
 
7410556
5a68f50
 
 
 
 
 
 
 
 
 
 
 
7410556
5a68f50
 
6202273
ab8da36
a9d86d8
4211536
 
 
a9d86d8
d61e53e
5e48419
 
 
 
 
 
 
 
 
 
 
 
a12adfc
5e48419
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594893f
5e48419
 
 
 
 
 
594893f
5e48419
 
 
 
 
 
594893f
5e48419
751dcae
5e48419
62ad9d3
 
d21aa08
de78153
d21aa08
5e48419
594893f
1e77a13
a536df8
2987692
 
a536df8
9903b32
f266819
 
 
 
 
 
 
 
 
 
 
 
 
325f07d
 
f266819
c59d44a
d2d7dc7
48e113d
 
e23e0e8
95b06af
 
 
f541b76
95b06af
6202273
95b06af
 
c00d44d
95b06af
 
 
 
2fbe556
95b06af
 
 
 
 
6202273
6556071
b3d371b
 
c59d44a
5e88edb
d4749c2
966fb46
d4749c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6202273
2987692
 
 
 
 
 
 
 
 
 
 
 
 
 
5e48419
 
2987692
5e88edb
4caf1c5
594893f
aa6787d
202bf02
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
import gradio as gr
import re
import os
import requests
import time
import soundfile as sf
import io

def hide_notice():
    return gr.update(visible=False)
def start_app():
    return gr.update(visible=False), gr.update(visible=True)

def audio_to_bytes(audio):
    data, sr = sf.read(audio)
    audio_bytes = io.BytesIO()
    sf.write(audio_bytes, data, sr, format='WAV')
    audio_bytes.seek(0)
    if len(data)/sr >= 60.0: return None
    return audio_bytes

def langswitch_API_call(audio, language):
    audio_bytes = audio_to_bytes(audio)
    if audio_bytes == None: return None
    files = {'file': (f'audio_chunk.wav', audio_bytes, 'audio/wav')}
    api_url = os.getenv("api_url")
    response = requests.post(f"{api_url}/online/http?language={language}", files=files)
    if response.status_code != 200:
        print(response)
        raise Exception("API error")
    return response.json()

def transcribe_base(audio, language):
    if not language: return "⚠️ Please select a language before transcribing."
    try:
        response = langswitch_API_call(audio, language)
        print(response)
        if response == None: return "⚠️ Audio file too long! Audio file should be shorter than 1 minute."
    except Exception as e:
        if "Invalid file: None" in str(e):
            return "⚠️ Check that you have clicked the stop button or wait for the audio to load completely."
    transcription = response["transcription"]
    is_new_speaker = response["is_new_speaker"]
    speaker = response["classified_speaker"]
    if is_new_speaker:
        speaker_class_string = f'New speaker detected. Assigned new ID {speaker}'
    else:
        speaker_class_string = f'Speaker found in database, ID {speaker}'
    return transcription#, speaker_class_string

def fix_italian_transcription(transcription):
    no_elision_cases = {
        "un autore", "un artista", "un uomo", "un amico", "un imperatore",
        "uno studente", "uno psicologo", "uno zio",
        "di autore", "a uomo", "su imperatore", "con amico", "per artista"
    }
    
    transcription = re.sub(r"\b(un|l|d|s|t|m|c|n|quest|all|dall|dell|nell|sull|coll|pell|dov)\s+(?=[aeiouhàèìòùáéíóú])", r"\1'", transcription)
    transcription = re.sub(r"\b(s|t|m|c|n)\s+(?=è\b|ha\b|hanno\b)", r"\1'", transcription)
    transcription = re.sub(r"\bpo\b", "po'", transcription)
    transcription = re.sub(r"\b(senz) ([aeiou])", r"\1'\2", transcription)
    transcription = transcription.replace("anch io", "anch'io")
    
    pattern_numbers = r"\b(trent|quarant|cinquant|sessant|settant|ottant|novant)\s+([aeiouàèìòù])"
    replacement_numbers = lambda m: m.group(1) + "'" + m.group(2)
    transcription = re.sub(pattern_numbers, replacement_numbers, transcription)
    
    for phrase in no_elision_cases:
        fixed = phrase.replace(" ", "'")
        transcription = transcription.replace(fixed, phrase)
    
    return transcription

def transcribe_mic(audio_microphone, language):
    print("Transcription microphone")
    transcription = transcribe_base(audio_microphone, language)
    if language=="it":
        transcription = fix_italian_transcription(transcription)
    elif language == "fr":
        transcription = re.sub(r"mètres ([aeiouáéíóúàèìòùhHAEIOUÁÉÍÓÚÀÈÌÒÙ])", r"m'\1", transcription)
    return transcription
    #return transcribe_base(audio_microphone, language)

def transcribe_file(audio_upload, language):
    print("Transcription local file")
    transcription = transcribe_base(audio_upload, language)
    if language=="it":
        transcription = fix_italian_transcription(transcription)
    elif language == "fr":
        transcription = re.sub(r"mètres ([aeiouáéíóúàèìòùhHAEIOUÁÉÍÓÚÀÈÌÒÙ])", r"m'\1", transcription)
    return transcribe_base(audio_upload, language)


css_content = """
#intro-text {
    font-size: 2.0rem;
    line-height: 1.6;
    text-align: center;
    color: #333;
}

#ok-button {
    background-color: #4CAF50; /* green */
    color: white;
    padding: 10px 20px;
    border-radius: 8px;
    margin-top: 20px;
    border: none;
    font-weight: bold;
    cursor: pointer;
    font-size: 1rem;
    transition: background-color 0.3s ease;
}

#ok-button:hover {
    background-color: #388E3C;
}


/*
.popup-button:hover {
    background-color: #3c4687 !important;
}
/*

/*
.gradio-container{
    padding: 0 !important;
}
.html-container{
    padding: 0 !important;
}
*/
#orai-info{
    padding: 50px;
    text-align: center;
    font-size: 1rem;
    background: url('https://elia.eus/static/elhuyar/img/landing_page/ig.webp') rgba(0,0,0,0.8);
    background-repeat: no-repeat;
    background-position: center center;
    background-size: cover;
    background-blend-mode: multiply;
}
#orai-info-text p{
    color: white !important;
}
/*
#orai-info img{
    margin: auto;
    display: block;
    margin-bottom: 1rem;
}*/
.bold{
    font-weight: bold;
    color: inherit !important;
}
footer{
    display:none !important
}

.logos{
    display: flex;
    justify-content: center;
}
.sermas-logo{
    display: flex;
    align-items: center;
    margin-right: 3rem;
}
.sermas-logo span{
    color: white !important;
    font-size: 2.5rem;
    font-family: Verdana, Geneva, sans-serif !important;
    font-weight: bold;
}

.text-elhuyar{
    color: #0045e7;
}

#header{
    padding: 50px;
    padding-top: 30px;
    background-color: #5b65a7;
}
#header h1,h3{
    color: white;
}

button.primary{
    background-color: #5b65a7;
}
button.primary:hover{
    background-color: #3c4687;
}

button.selected{
    color: #5b65a7 !important;
}
button.selected::after{
    background-color: #5b65a7;
}

.record-button::before{
    background: #E50914;
}
"""




demo = gr.Blocks(css=css_content) #, fill_width=True)
with demo:

    intro = gr.Column(visible=True, elem_id="intro-message")
    app_block = gr.Column(visible=False)
    
    with intro:
        gr.Markdown("Demoa probatzeko epea amaitu da. Eskerrik asko parte-hartzeagatik!<br><br>El periodo de prueba de la demo ha concluido. ¡Muchas gracias por tu participación!<br><br>The testing period of the demo has ended. Thank you very much for your participation!", elem_id=["intro-text"])
        gr.HTML("""
        <div id="orai-info">
            <div class="logos">
                <div class="sermas-logo">
                    <img src="https://sermasproject.eu/wp-content/uploads/2023/04/sermas-logo.png" width=100/>
                    <span>SERMAS</span>
                </div>
                <img src="https://www.orai.eus/themes/custom/orai_for_drupal9/orai_bw.svg" width=175/>
            </div>
            <div id="orai-info-text">
                <p>The <span class="bold">LANGSWITCH</span> sub-project is part of the Open Call 1 of the <span class="bold">SERMAS</span> project. The goal of the <span class="bold">SERMAS</span> project is to provide socially-acceptable extended reality models and systems.</p>
                <p>The technology powering LANGSWITCH was developed by <span class="bold">Orai NLP Teknologiak</span></p>
                <p><span class="bold">Orai NLP Teknologiak</span> specializes in research, development, and innovation in artificial intelligence, with a focus on fostering a more competitive industrial and business landscape, enhancing public administration efficiency, and promoting a more inclusive society.</p>
            </div>
        </div>
        <p>""")
        '''
        gr.Markdown("""
        <span style="display:inline-flex; align-items:center;">
        <img src="https://upload.wikimedia.org/wikipedia/commons/2/2d/Flag_of_the_Basque_Country.svg" alt="Ikurriña" style="width: 1.2em; vertical-align: middle; margin-right: 0.4em;">
        Ongi etorri LANGSWITCH-en demora, Orai NLP Teknologiak garatutako hizketa-ezagutzaile eleanitza!  
        </span> <br>
        Grabatu esaldi motz gutxi batzuk euskaraz, gazteleraz, ingelesez, frantsesez edo italieraz eta bidali transkribatzera.
        <br>
        Grabaketak <strong>automatikoki ezabatzen dira</strong> eta ez dira gordeko.
        <br>
        
        🇪🇸 ¡Bienvenida/o a la demo de LANGSWITCH, el sistema de reconocimiento automático del habla multilingüe desarrollado por Orai NLP Teknologiak!
        <br>
        Graba unas pocas frases cortas en euskera, castellano, inglés, francés o italiano y envíalos a transcribir.
        <br>
        Las grabaciones se <strong>eliminan automáticamente</strong> y no serán guardados.
        <br>

        🇬🇧 Welcome to the LANGSWITCH demo, the multilingual Automatic Speech Recognition system developed by Orai NLP Teknologiak!
        <br>
        Record a few short sentences in Basque, Spanish, English, French or Italian and submit them for their transcription.
        <br>
        The recordings are <strong>automatically removed</strong> and will not be saved.
        <br>
        """, elem_id=["intro-text"])
        
        ok_button = gr.Button("OK", elem_id="ok-button")
        ok_button.click(fn=start_app, outputs=[intro, app_block])
        '''
    '''
    with app_block:

        gr.HTML("""
    <div id="header">
        <h1>LANGSWITCH</h1>
        <h3>Multilingual Automatic Speech Recognition in noisy environments</h3>
    </div>
    """)
    
        with gr.Tab("Transcribe microphone"):
            iface = gr.Interface(
                fn=transcribe_mic,
                inputs=[
                    gr.Audio(sources="microphone", type="filepath"),
                    gr.Dropdown(label="Language", choices=[("English", "en"),
                                         ("Euskara", "eu"),
                                         ("Español", "es"),
                                         ("Français", "fr"),
                                         ("Italiano", "it")],
                                value="en")
                ],
                outputs=[
                    gr.Textbox(label="Transcription", autoscroll=False),
                    #gr.Textbox(label="Speaker Identification", autoscroll=False)
                ],
                allow_flagging="never",
            )
    
        with gr.Tab("Transcribe local file"):
            iface = gr.Interface(
                fn=transcribe_file,
                inputs=[
                    gr.Audio(sources="upload", type="filepath"),
                    gr.Dropdown(choices=[("English", "en"),
                                         ("Euskara", "eu"),
                                         ("Español", "es"),
                                         ("Français", "fr"),
                                         ("Italiano", "it")],
                                value="en")
                ],
                outputs=[
                    gr.Textbox(label="Transcription", autoscroll=False),
                    #gr.Textbox(label="Speaker Identification", autoscroll=False)
                ],
                allow_flagging="never",
            )

        
        gr.HTML("""
    <div id="orai-info">
        <div class="logos">
            <div class="sermas-logo">
                <img src="https://sermasproject.eu/wp-content/uploads/2023/04/sermas-logo.png" width=100/>
                <span>SERMAS</span>
            </div>
            <img src="https://www.orai.eus/themes/custom/orai_for_drupal9/orai_bw.svg" width=175/>
        </div>
        <div id="orai-info-text">
            <p>The <span class="bold">LANGSWITCH</span> sub-project is part of the Open Call 1 of the <span class="bold">SERMAS</span> project. The goal of the <span class="bold">SERMAS</span> project is to provide socially-acceptable extended reality models and systems.</p>
            <p>The technology powering LANGSWITCH was developed by <span class="bold">Orai NLP Teknologiak</span></p>
            <p><span class="bold">Orai NLP Teknologiak</span> specializes in research, development, and innovation in artificial intelligence, with a focus on fostering a more competitive industrial and business landscape, enhancing public administration efficiency, and promoting a more inclusive society.</p>
        </div>
    </div>
    <p>""")
    '''
    
demo.queue(max_size=1)
#demo.launch(share=False, max_threads=3, auth=(os.getenv("username"), os.getenv("password")), auth_message="Please provide a username and a password.")
demo.launch(share=False, max_threads=3)