zhuohan-7 commited on
Commit
42ba589
·
1 Parent(s): f04bb8b

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app/content.py +27 -7
app/content.py CHANGED
@@ -141,17 +141,37 @@ dataset_diaplay_information = {
141
  'Parliament-Short': 'Under Development',
142
  'UKUS-News-Short' : 'Under Development',
143
  'Mediacorp-Short' : 'Under Development',
144
- 'YouTube ASR: English Singapore Content' : '''\nYouTube Evaluation Dataset for ASR Task: This dataset include English and Singlish with Singapore Content.''',
145
- 'YouTube ASR: English with Strong Emotion' : '\nYouTube Evaluation Dataset for ASR Task. English with strong emotions',
146
- 'YouTube ASR: Malay English Prompt': 'YouTube ASR Dataset, Malay and Malay-English CondeSwitch',
147
- 'YouTube ASR: Malay with Malay Prompt': 'YouTube ASR Dataset, Malay and Malay-English CondeSwitch. Use Malay prompts',
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  'SEAME-Dev-Mandarin' : 'Under Development',
150
  'SEAME-Dev-Singlish' : 'Under Development',
151
 
152
- 'YouTube SQA: English with Singapore Content': 'Under Development',
153
- 'YouTube SDS: English with Singapore Content': 'Under Development',
154
- 'YouTube PQA: English with Singapore Content': 'Under Development',
 
 
 
 
 
 
 
 
155
 
156
 
157
  }
 
141
  'Parliament-Short': 'Under Development',
142
  'UKUS-News-Short' : 'Under Development',
143
  'Mediacorp-Short' : 'Under Development',
144
+
145
+ 'YouTube ASR: English Singapore Content' : '''YouTube Evaluation Dataset for ASR Task: \n
146
+ This dataset contains English and Singlish audio clips, featuring Singapore-related content. \n
147
+ It includes approximately 2.5 hours of audio, with individual clips ranging from 2 seconds to 30 seconds in length.''',
148
+
149
+ 'YouTube ASR: English with Strong Emotion' : '''YouTube Evaluation Dataset for ASR Task: \n
150
+ This dataset contains English and some unknown languages audio clips, featuring speech with strong emotional expression. \n
151
+ It includes approximately 3.9 hours of audio, with each clip lasting 30 seconds.''',
152
+
153
+ 'YouTube ASR: Malay English Prompt': '''YouTube Evaluation Dataset for ASR Task: \n
154
+ This dataset mainly contains Malay and some English audio clips, featuring with English prompts. \n
155
+ It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.''',
156
+
157
+ 'YouTube ASR: Malay with Malay Prompt': '''YouTube Evaluation Dataset for ASR Task: \n
158
+ This dataset use the same audio from *YouTube ASR: Malay English Prompt*, except featuring with Malay prompts. \n
159
+ It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.''',
160
 
161
  'SEAME-Dev-Mandarin' : 'Under Development',
162
  'SEAME-Dev-Singlish' : 'Under Development',
163
 
164
+ 'YouTube SQA: English with Singapore Content': '''YouTube Evaluation Dataset for Speech-QA Task: \n
165
+ This dataset use the same audio from *YouTube ASR: English Singapore Content*, featuring Singapore-related content. \n
166
+ It includes approximately 2.5 hours of audio, with individual clips ranging from 2 seconds to 30 seconds in length.''',
167
+
168
+ 'YouTube SDS: English with Singapore Content': '''YouTube Evaluation Dataset for Summary Task: \n
169
+ This dataset use the same audio from *YouTube ASR: English Singapore Content*, featuring Singapore-related content. \n
170
+ It includes approximately 2.5 hours of audio, with individual clips ranging from 2 seconds to 30 seconds in length.''',
171
+
172
+ 'YouTube PQA: English with Singapore Content': '''YouTube Evaluation Dataset for Paralinguistics Task: \n
173
+ This dataset use the same audio from *YouTube ASR: English Singapore Content*, featuring Singapore-related content. \n
174
+ It includes approximately 2.5 hours of audio, with individual clips ranging from 2 seconds to 30 seconds in length.''',
175
 
176
 
177
  }