Spaces:

MERaLiON
/

AudioBench-Leaderboard

Running

App Files Files Community

zhuohan-7 commited on May 8

Commit

42ba589

1 Parent(s): f04bb8b

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app/content.py +27 -7

app/content.py CHANGED Viewed

@@ -141,17 +141,37 @@ dataset_diaplay_information = {
     'Parliament-Short': 'Under Development',
     'UKUS-News-Short' : 'Under Development',
     'Mediacorp-Short' : 'Under Development',
-    'YouTube ASR: English Singapore Content'  : '''\nYouTube Evaluation Dataset for ASR Task: This dataset include English and Singlish with Singapore Content.''',
-    'YouTube ASR: English with Strong Emotion'  : '\nYouTube Evaluation Dataset for ASR Task. English with strong emotions',
-    'YouTube ASR: Malay English Prompt': 'YouTube ASR Dataset, Malay and Malay-English CondeSwitch',
-    'YouTube ASR: Malay with Malay Prompt': 'YouTube ASR Dataset, Malay and Malay-English CondeSwitch. Use Malay prompts',
     'SEAME-Dev-Mandarin'   : 'Under Development',
     'SEAME-Dev-Singlish'   : 'Under Development',
-    'YouTube SQA: English with Singapore Content': 'Under Development',
-    'YouTube SDS: English with Singapore Content': 'Under Development',
-    'YouTube PQA: English with Singapore Content': 'Under Development',
                 }

     'Parliament-Short': 'Under Development',
     'UKUS-News-Short' : 'Under Development',
     'Mediacorp-Short' : 'Under Development',
+    'YouTube ASR: English Singapore Content'  : '''YouTube Evaluation Dataset for ASR Task: \n
+                                                   This dataset contains English and Singlish audio clips, featuring Singapore-related content. \n
+                                                   It includes approximately 2.5 hours of audio, with individual clips ranging from 2 seconds to 30 seconds in length.''',
+    'YouTube ASR: English with Strong Emotion'  : '''YouTube Evaluation Dataset for ASR Task: \n
+                                                     This dataset contains English and some unknown languages audio clips, featuring speech with strong emotional expression. \n
+                                                     It includes approximately 3.9 hours of audio, with each clip lasting 30 seconds.''',
+    'YouTube ASR: Malay English Prompt': '''YouTube Evaluation Dataset for ASR Task: \n
+                                            This dataset mainly contains Malay and some English audio clips, featuring with English prompts. \n
+                                            It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.''',
+    'YouTube ASR: Malay with Malay Prompt': '''YouTube Evaluation Dataset for ASR Task: \n
+                                               This dataset use the same audio from *YouTube ASR: Malay English Prompt*, except featuring with Malay prompts. \n
+                                               It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.''',
     'SEAME-Dev-Mandarin'   : 'Under Development',
     'SEAME-Dev-Singlish'   : 'Under Development',
+    'YouTube SQA: English with Singapore Content': '''YouTube Evaluation Dataset for Speech-QA Task: \n
+                                                      This dataset use the same audio from *YouTube ASR: English Singapore Content*, featuring Singapore-related content. \n
+                                                      It includes approximately 2.5 hours of audio, with individual clips ranging from 2 seconds to 30 seconds in length.''',
+    'YouTube SDS: English with Singapore Content': '''YouTube Evaluation Dataset for Summary Task: \n
+                                                      This dataset use the same audio from *YouTube ASR: English Singapore Content*, featuring Singapore-related content. \n
+                                                      It includes approximately 2.5 hours of audio, with individual clips ranging from 2 seconds to 30 seconds in length.''',
+    'YouTube PQA: English with Singapore Content': '''YouTube Evaluation Dataset for Paralinguistics Task: \n
+                                                      This dataset use the same audio from *YouTube ASR: English Singapore Content*, featuring Singapore-related content. \n
+                                                      It includes approximately 2.5 hours of audio, with individual clips ranging from 2 seconds to 30 seconds in length.''',
                 }