Spaces:

alibabasglab
/

SpeechScore

Running

App Files Files Community

alibabasglab commited on Oct 16, 2024

Commit

6e44725

verified ·

1 Parent(s): b66c175

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -37

app.py CHANGED Viewed

@@ -9,33 +9,8 @@ BATCH_SIZE = 1
 device = 0 if torch.cuda.is_available() else "cpu"
-"""
-mySpeechScore = SpeechScore([
-        'PESQ','DNSMOS'
-    ])
-"""
-# Copied from https://github.com/openai/whisper/blob/c09a7ae299c4c34c5839a76380ae407e7d785914/whisper/utils.py#L50
-def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = "."):
-    if seconds is not None:
-        milliseconds = round(seconds * 1000.0)
-        hours = milliseconds // 3_600_000
-        milliseconds -= hours * 3_600_000
-        minutes = milliseconds // 60_000
-        milliseconds -= minutes * 60_000
-        seconds = milliseconds // 1_000
-        milliseconds -= seconds * 1_000
-        hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
-        return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
-    else:
-        # we have a malformed timestamp so just return it as is
-        return seconds
-def score(test_file, ref_file, score_list, return_timestamps):
     mySpeechScore = SpeechScore(score_list)
     scores = mySpeechScore(test_path=test_file, reference_path=ref_file, window=None, score_rate=16000, return_mean=False)
     return scores
@@ -48,25 +23,50 @@ file_score = gr.Interface(
     inputs=[
         gr.Audio(sources=["upload"], label="test file", type="filepath"),
         gr.Audio(sources=["upload"], label="reference file", type="filepath"),
-        #gr.Radio(["without reference", "with reference"], label="Task", info="choose non-instrusive or instrusive scoring"),
-        #gr.Checkbox(default=False, label="DNSMOS"),
-        #gr.Checkbox(default=False, label="PESQ"),
         gr.Dropdown(
-            ["DNSMOS", "PESQ", "NB-PESQ", "SISNR"], value=["DNSMOS", "PESQ"], multiselect=True, label="Scores", info="By checking the following scores, include them in the output."
         ),
     ],
     outputs="text",
-    #layout="horizontal",
-    #theme="huggingface",
-    title="Score speech from a file",
     description=(
-        "Score audio inputs with the click of a button! Demo uses the"
-        " commonly used speech quality assessment methods for the audio files"
         " of arbitrary length."
     ),
 )
 with demo:
-    gr.TabbedInterface([file_score], ["Score Audio File"])
 demo.launch()

 device = 0 if torch.cuda.is_available() else "cpu"
+def score(test_file, ref_file, score_list_nis, score_list_is, return_timestamps):
+    score_list = score_list_nis + score_list_is
     mySpeechScore = SpeechScore(score_list)
     scores = mySpeechScore(test_path=test_file, reference_path=ref_file, window=None, score_rate=16000, return_mean=False)
     return scores
     inputs=[
         gr.Audio(sources=["upload"], label="test file", type="filepath"),
         gr.Audio(sources=["upload"], label="reference file", type="filepath"),
         gr.Dropdown(
+            ["DNSMOS", "SRMR"], value=["DNSMOS", "SRMR"], multiselect=True, label="Non-Intrusive Scores", info="Choose scores to include, no reference audio is required."
+        ),
+        gr.Dropdown(
+            ["PESQ", 'NB_PESQ', 'STOI', 'SISDR',
+        'FWSEGSNR', 'LSD', 'BSSEval', 'DNSMOS',
+        'SNR', 'SSNR', 'LLR', 'CSIG', 'CBAK',
+        'COVL', 'MCD'], value=["PESQ", "STOI"], multiselect=True, label="Intrusive Scores", info="Choose scores to include, reference audio is required."
         ),
     ],
     outputs="text",
+    title="Score speech quality for an audio clip",
     description=(
+        "Score speech quality with the click of a button! Demo includes the"
+        " commonly used speech quality assessments for the audio file"
         " of arbitrary length."
     ),
 )
+mic_score = gr.Interface(
+    fn=score,
+    inputs=[
+        gr.Audio(sources=["microphone"],
+                waveform_options=gr.WaveformOptions(
+                waveform_color="#01C6FF",
+                waveform_progress_color="#0066B4",
+                skip_length=2,
+                show_controls=False,
+                ),
+            ),
+        gr.Dropdown(
+            ["DNSMOS", "SRMR"], value=["DNSMOS", "SRMR"], multiselect=True, label="Non-Intrusive Scores", info="Choose scores to include, no reference audio is required."
+        ),
+    ],
+    outputs="text",
+    title="Test microphone quality using speech score",
+    description=(
+        "Score your microphone quality with the click of a button!"
+        " Uses the most popular method to test your microphone quality"
+        " with a short speech clip."
+    ),
+)
 with demo:
+    gr.TabbedInterface([mic_score, file_score], ["Score Microphone Quality", "Score Speech Quality"])
 demo.launch()