File size: 4,643 Bytes
5690e11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
#

from datetime import timedelta
from enum import Enum
from os import linesep, environ
from sys import argv
from typing import List, Optional
import azure.cognitiveservices.speech as speechsdk  # type: ignore
from config.config import my_config
import streamlit as st

from services.captioning import helper
from tools.utils import get_session_option, must_have_value

key = my_config['audio']['Azure']['speech_key']
region = my_config['audio']['Azure']['service_region']

# must_have_value(key, "请设置Azure speech_key")
# must_have_value(region, "请设置Azure service_region")


class CaptioningMode(Enum):
    OFFLINE = 1
    REALTIME = 2


def get_language() -> str:
    retval = "zh-CN"
    language = get_session_option("audio_language")
    if language is not None:
        retval = language
    return retval


def get_phrases() -> List[str]:
    retval: List[str] = []
    phrases = get_session_option("captioning_phrases")
    if phrases is not None:
        retval = list(map(lambda phrase: phrase.strip(), phrases.split(';')))
    return retval


def get_compressed_audio_format() -> speechsdk.AudioStreamContainerFormat:
    value = get_session_option("captioning_format")
    if value is None:
        return speechsdk.AudioStreamContainerFormat.ANY
    else:
        value = value.lower()
        if "alaw" == value:
            return speechsdk.AudioStreamContainerFormat.ALAW
        elif "flac" == value:
            return speechsdk.AudioStreamContainerFormat.FLAC
        elif "mp3" == value:
            return speechsdk.AudioStreamContainerFormat.MP3
        elif "mulaw" == value:
            return speechsdk.AudioStreamContainerFormat.MULAW
        elif "ogg_opus" == value:
            return speechsdk.AudioStreamContainerFormat.OGG_OPUS
        else:
            return speechsdk.AudioStreamContainerFormat.ANY


def get_profanity_option() -> speechsdk.ProfanityOption:
    value = get_session_option("captioning_profanity")
    if value is None:
        return speechsdk.ProfanityOption.Masked
    else:
        value = value.lower()
        if "raw" == value:
            return speechsdk.ProfanityOption.Raw
        elif "remove" == value:
            return speechsdk.ProfanityOption.Removed
        else:
            return speechsdk.ProfanityOption.Masked


def user_config_from_args() -> helper.Read_Only_Dict:
    if get_session_option("captioning_mode") == "realtime":
        captioning_mode = CaptioningMode.REALTIME
    else:
        captioning_mode = CaptioningMode.OFFLINE

    td_remain_time = timedelta(milliseconds=1000)
    s_remain_time = get_session_option("captioning_remainTime")
    if s_remain_time is not None:
        int_remain_time = float(s_remain_time)
        if int_remain_time < 0:
            int_remain_time = 1000
        td_remain_time = timedelta(milliseconds=int_remain_time)

    td_delay = timedelta(milliseconds=1000)
    s_delay = get_session_option("captioning_delay")
    if s_delay is not None:
        int_delay = float(s_delay)
        if int_delay < 0:
            int_delay = 1000
        td_delay = timedelta(milliseconds=int_delay)

    int_max_line_length = helper.DEFAULT_MAX_LINE_LENGTH_SBCS
    s_max_line_length = get_session_option("captioning_maxLineLength")
    if s_max_line_length is not None:
        int_max_line_length = int(s_max_line_length)
        if int_max_line_length < 20:
            int_max_line_length = 20

    int_lines = 2
    s_lines = get_session_option("captioning_lines")
    if s_lines is not None:
        int_lines = int(s_lines)
        if int_lines < 1:
            int_lines = 2

    return helper.Read_Only_Dict({
        "use_compressed_audio": get_session_option("captioning_format"),
        "compressed_audio_format": get_compressed_audio_format(),
        "profanity_option": get_profanity_option(),
        "language": get_language(),
        "input_file": get_session_option("audio_output_file"),
        "output_file": get_session_option("captioning_output"),
        "phrases": get_phrases(),
        "suppress_console_output": get_session_option("captioning_quiet"),
        "captioning_mode": captioning_mode,
        "remain_time": td_remain_time,
        "delay": td_delay,
        "use_sub_rip_text_caption_format": True,
        "max_line_length": int_max_line_length,
        "lines": int_lines,
        "stable_partial_result_threshold": get_session_option("captioning_threshold"),
        "subscription_key": key,
        "region": region,
    })