#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse import logging from pathlib import Path import platform import re from project_settings import log_directory import log log.setup(log_directory=log_directory) import azure.cognitiveservices.speech as speechsdk import gradio as gr from project_settings import project_path, environment from toolbox.os.command import Command main_logger = logging.getLogger("main") def shell(cmd: str): return Command.popen(cmd) def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--speech_key", default=environment.get("speech_key", default="require speech_key"), type=str ) parser.add_argument( "--service_region", default=environment.get("service_region", default="require service_region"), type=str ) parser.add_argument( "--asr_examples_wav_dir", default=(project_path / "data/asr_examples").as_posix(), type=str ) args = parser.parse_args() return args def do_asr(filename: str, language: str, speech_key: str, service_region: str) -> str: speech_config = speechsdk.SpeechConfig( subscription=speech_key, region=service_region, speech_recognition_language=language ) audio_config = speechsdk.AudioConfig( filename=filename, ) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) resp = speech_recognizer.recognize_once_async().get() if resp.reason == speechsdk.ResultReason.RecognizedSpeech: result = "Recognized: `{}`. ".format(resp.text) elif resp.reason == speechsdk.ResultReason.NoMatch: result = "No speech could be recognized: `{}`. ".format(resp.no_match_details) elif resp.reason == speechsdk.ResultReason.Canceled: cancellation_details = resp.cancellation_details result = "Speech Recognition canceled: `{}`. ".format(cancellation_details.reason) if cancellation_details.reason == speechsdk.CancellationReason.Error: result += "Error details: `{}`. ".format(cancellation_details.error_details) else: raise AssertionError return result def main(): args = get_args() asr_examples_wav_dir = Path(args.asr_examples_wav_dir) asr_examples = list() for filename in asr_examples_wav_dir.glob("*/*.wav"): language = filename.parts[-2] asr_examples.append([ filename.as_posix(), language, args.speech_key, args.service_region, ]) title = "## Azure Service." # blocks with gr.Blocks() as blocks: gr.Markdown(value=title) with gr.Tabs(): with gr.TabItem("ASR"): asr_file = gr.Audio( sources=["upload"], type="filepath", label="file", ) asr_language = gr.Dropdown(choices=["ja-JP", "en-US"], label="result") asr_speech_key = gr.Textbox(label="speech_key") asr_service_region = gr.Textbox(label="service_region") asr_button = gr.Button("run") asr_result = gr.Textbox(label="result") gr.Examples( examples=asr_examples, inputs=[ asr_file, asr_language, asr_speech_key, asr_service_region ], outputs=[ asr_result ], fn=do_asr ) asr_button.click( do_asr, inputs=[ asr_file, asr_language, asr_speech_key, asr_service_region ], outputs=[ asr_result ], ) with gr.TabItem("shell"): shell_text = gr.Textbox(label="cmd") shell_button = gr.Button("run") shell_output = gr.Textbox(label="output") shell_button.click( shell, inputs=[ shell_text, ], outputs=[ shell_output ], ) blocks.queue().launch( share=False if platform.system() == "Windows" else False, server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0", server_port=7860 ) return if __name__ == '__main__': main()