File size: 4,631 Bytes
f6e8e0d
 
 
 
 
 
 
 
8c3a864
f6e8e0d
 
 
 
 
 
 
8c3a864
f6e8e0d
 
 
 
 
 
 
 
 
 
 
8c3a864
 
ecaebb1
8c3a864
 
 
 
ecaebb1
8c3a864
 
f6e8e0d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c3a864
f6e8e0d
8c3a864
f6e8e0d
 
8c3a864
f6e8e0d
8c3a864
f6e8e0d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
827c019
8c3a864
 
f6e8e0d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import logging
from pathlib import Path
import platform
import re

from project_settings import log_directory
import log

log.setup(log_directory=log_directory)

import azure.cognitiveservices.speech as speechsdk
import gradio as gr

from project_settings import project_path, environment
from toolbox.os.command import Command

main_logger = logging.getLogger("main")


def shell(cmd: str):
    return Command.popen(cmd)


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--speech_key",
        default=environment.get("speech_key", default="require speech_key"),
        type=str
    )
    parser.add_argument(
        "--service_region",
        default=environment.get("service_region", default="require service_region"),
        type=str
    )
    parser.add_argument(
        "--asr_examples_wav_dir",
        default=(project_path / "data/asr_examples").as_posix(),
        type=str
    )
    args = parser.parse_args()
    return args


def do_asr(filename: str, language: str, speech_key: str, service_region: str) -> str:
    speech_config = speechsdk.SpeechConfig(
        subscription=speech_key,
        region=service_region,
        speech_recognition_language=language
    )
    audio_config = speechsdk.AudioConfig(
        filename=filename,
    )

    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)

    resp = speech_recognizer.recognize_once_async().get()

    if resp.reason == speechsdk.ResultReason.RecognizedSpeech:
        result = "Recognized: `{}`. ".format(resp.text)
    elif resp.reason == speechsdk.ResultReason.NoMatch:
        result = "No speech could be recognized: `{}`. ".format(resp.no_match_details)
    elif resp.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = resp.cancellation_details
        result = "Speech Recognition canceled: `{}`. ".format(cancellation_details.reason)
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            result += "Error details: `{}`. ".format(cancellation_details.error_details)
    else:
        raise AssertionError
    return result


def main():
    args = get_args()

    asr_examples_wav_dir = Path(args.asr_examples_wav_dir)
    asr_examples = list()
    for filename in asr_examples_wav_dir.glob("*/*.wav"):
        language = filename.parts[-2]

        asr_examples.append([
            filename.as_posix(),
            language,
            args.speech_key,
            args.service_region,
        ])

    title = "## Azure Service."

    # blocks
    with gr.Blocks() as blocks:
        gr.Markdown(value=title)

        with gr.Tabs():
            with gr.TabItem("ASR"):
                asr_file = gr.Audio(
                    sources=["upload"],
                    type="filepath",
                    label="file",
                )
                asr_language = gr.Dropdown(choices=["ja-JP", "en-US"], label="result")
                asr_speech_key = gr.Textbox(label="speech_key")
                asr_service_region = gr.Textbox(label="service_region")

                asr_button = gr.Button("run")
                asr_result = gr.Textbox(label="result")

                gr.Examples(
                    examples=asr_examples,
                    inputs=[
                        asr_file, asr_language, asr_speech_key, asr_service_region
                    ],
                    outputs=[
                        asr_result
                    ],
                    fn=do_asr
                )

                asr_button.click(
                    do_asr,
                    inputs=[
                        asr_file, asr_language, asr_speech_key, asr_service_region
                    ],
                    outputs=[
                        asr_result
                    ],
                )

            with gr.TabItem("shell"):
                shell_text = gr.Textbox(label="cmd")
                shell_button = gr.Button("run")
                shell_output = gr.Textbox(label="output")

                shell_button.click(
                    shell,
                    inputs=[
                        shell_text,
                    ],
                    outputs=[
                        shell_output
                    ],
                )

    blocks.queue().launch(
        share=False if platform.system() == "Windows" else False,
        server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
        server_port=7860
    )

    return


if __name__ == '__main__':
    main()