Spaces:
Running
Running
# | |
# Copyright (c) Microsoft. All rights reserved. | |
# Licensed under the MIT license. See LICENSE.md file in the project root for full license information. | |
# | |
from datetime import timedelta | |
from enum import Enum | |
from os import linesep, environ | |
from sys import argv | |
from typing import List, Optional | |
import azure.cognitiveservices.speech as speechsdk # type: ignore | |
from config.config import my_config | |
import streamlit as st | |
from services.captioning import helper | |
from tools.utils import get_session_option, must_have_value | |
key = my_config['audio']['Azure']['speech_key'] | |
region = my_config['audio']['Azure']['service_region'] | |
# must_have_value(key, "请设置Azure speech_key") | |
# must_have_value(region, "请设置Azure service_region") | |
class CaptioningMode(Enum): | |
OFFLINE = 1 | |
REALTIME = 2 | |
def get_language() -> str: | |
retval = "zh-CN" | |
language = get_session_option("audio_language") | |
if language is not None: | |
retval = language | |
return retval | |
def get_phrases() -> List[str]: | |
retval: List[str] = [] | |
phrases = get_session_option("captioning_phrases") | |
if phrases is not None: | |
retval = list(map(lambda phrase: phrase.strip(), phrases.split(';'))) | |
return retval | |
def get_compressed_audio_format() -> speechsdk.AudioStreamContainerFormat: | |
value = get_session_option("captioning_format") | |
if value is None: | |
return speechsdk.AudioStreamContainerFormat.ANY | |
else: | |
value = value.lower() | |
if "alaw" == value: | |
return speechsdk.AudioStreamContainerFormat.ALAW | |
elif "flac" == value: | |
return speechsdk.AudioStreamContainerFormat.FLAC | |
elif "mp3" == value: | |
return speechsdk.AudioStreamContainerFormat.MP3 | |
elif "mulaw" == value: | |
return speechsdk.AudioStreamContainerFormat.MULAW | |
elif "ogg_opus" == value: | |
return speechsdk.AudioStreamContainerFormat.OGG_OPUS | |
else: | |
return speechsdk.AudioStreamContainerFormat.ANY | |
def get_profanity_option() -> speechsdk.ProfanityOption: | |
value = get_session_option("captioning_profanity") | |
if value is None: | |
return speechsdk.ProfanityOption.Masked | |
else: | |
value = value.lower() | |
if "raw" == value: | |
return speechsdk.ProfanityOption.Raw | |
elif "remove" == value: | |
return speechsdk.ProfanityOption.Removed | |
else: | |
return speechsdk.ProfanityOption.Masked | |
def user_config_from_args() -> helper.Read_Only_Dict: | |
if get_session_option("captioning_mode") == "realtime": | |
captioning_mode = CaptioningMode.REALTIME | |
else: | |
captioning_mode = CaptioningMode.OFFLINE | |
td_remain_time = timedelta(milliseconds=1000) | |
s_remain_time = get_session_option("captioning_remainTime") | |
if s_remain_time is not None: | |
int_remain_time = float(s_remain_time) | |
if int_remain_time < 0: | |
int_remain_time = 1000 | |
td_remain_time = timedelta(milliseconds=int_remain_time) | |
td_delay = timedelta(milliseconds=1000) | |
s_delay = get_session_option("captioning_delay") | |
if s_delay is not None: | |
int_delay = float(s_delay) | |
if int_delay < 0: | |
int_delay = 1000 | |
td_delay = timedelta(milliseconds=int_delay) | |
int_max_line_length = helper.DEFAULT_MAX_LINE_LENGTH_SBCS | |
s_max_line_length = get_session_option("captioning_maxLineLength") | |
if s_max_line_length is not None: | |
int_max_line_length = int(s_max_line_length) | |
if int_max_line_length < 20: | |
int_max_line_length = 20 | |
int_lines = 2 | |
s_lines = get_session_option("captioning_lines") | |
if s_lines is not None: | |
int_lines = int(s_lines) | |
if int_lines < 1: | |
int_lines = 2 | |
return helper.Read_Only_Dict({ | |
"use_compressed_audio": get_session_option("captioning_format"), | |
"compressed_audio_format": get_compressed_audio_format(), | |
"profanity_option": get_profanity_option(), | |
"language": get_language(), | |
"input_file": get_session_option("audio_output_file"), | |
"output_file": get_session_option("captioning_output"), | |
"phrases": get_phrases(), | |
"suppress_console_output": get_session_option("captioning_quiet"), | |
"captioning_mode": captioning_mode, | |
"remain_time": td_remain_time, | |
"delay": td_delay, | |
"use_sub_rip_text_caption_format": True, | |
"max_line_length": int_max_line_length, | |
"lines": int_lines, | |
"stable_partial_result_threshold": get_session_option("captioning_threshold"), | |
"subscription_key": key, | |
"region": region, | |
}) | |