azure / main.py
HoneyTian's picture
update
ecaebb1
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import logging
from pathlib import Path
import platform
import re
from project_settings import log_directory
import log
log.setup(log_directory=log_directory)
import azure.cognitiveservices.speech as speechsdk
import gradio as gr
from project_settings import project_path, environment
from toolbox.os.command import Command
main_logger = logging.getLogger("main")
def shell(cmd: str):
return Command.popen(cmd)
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--speech_key",
default=environment.get("speech_key", default="require speech_key"),
type=str
)
parser.add_argument(
"--service_region",
default=environment.get("service_region", default="require service_region"),
type=str
)
parser.add_argument(
"--asr_examples_wav_dir",
default=(project_path / "data/asr_examples").as_posix(),
type=str
)
args = parser.parse_args()
return args
def do_asr(filename: str, language: str, speech_key: str, service_region: str) -> str:
speech_config = speechsdk.SpeechConfig(
subscription=speech_key,
region=service_region,
speech_recognition_language=language
)
audio_config = speechsdk.AudioConfig(
filename=filename,
)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
resp = speech_recognizer.recognize_once_async().get()
if resp.reason == speechsdk.ResultReason.RecognizedSpeech:
result = "Recognized: `{}`. ".format(resp.text)
elif resp.reason == speechsdk.ResultReason.NoMatch:
result = "No speech could be recognized: `{}`. ".format(resp.no_match_details)
elif resp.reason == speechsdk.ResultReason.Canceled:
cancellation_details = resp.cancellation_details
result = "Speech Recognition canceled: `{}`. ".format(cancellation_details.reason)
if cancellation_details.reason == speechsdk.CancellationReason.Error:
result += "Error details: `{}`. ".format(cancellation_details.error_details)
else:
raise AssertionError
return result
def main():
args = get_args()
asr_examples_wav_dir = Path(args.asr_examples_wav_dir)
asr_examples = list()
for filename in asr_examples_wav_dir.glob("*/*.wav"):
language = filename.parts[-2]
asr_examples.append([
filename.as_posix(),
language,
args.speech_key,
args.service_region,
])
title = "## Azure Service."
# blocks
with gr.Blocks() as blocks:
gr.Markdown(value=title)
with gr.Tabs():
with gr.TabItem("ASR"):
asr_file = gr.Audio(
sources=["upload"],
type="filepath",
label="file",
)
asr_language = gr.Dropdown(choices=["ja-JP", "en-US"], label="result")
asr_speech_key = gr.Textbox(label="speech_key")
asr_service_region = gr.Textbox(label="service_region")
asr_button = gr.Button("run")
asr_result = gr.Textbox(label="result")
gr.Examples(
examples=asr_examples,
inputs=[
asr_file, asr_language, asr_speech_key, asr_service_region
],
outputs=[
asr_result
],
fn=do_asr
)
asr_button.click(
do_asr,
inputs=[
asr_file, asr_language, asr_speech_key, asr_service_region
],
outputs=[
asr_result
],
)
with gr.TabItem("shell"):
shell_text = gr.Textbox(label="cmd")
shell_button = gr.Button("run")
shell_output = gr.Textbox(label="output")
shell_button.click(
shell,
inputs=[
shell_text,
],
outputs=[
shell_output
],
)
blocks.queue().launch(
share=False if platform.system() == "Windows" else False,
server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
server_port=7860
)
return
if __name__ == '__main__':
main()