File size: 2,316 Bytes
202c078
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""
https://platform.openai.com/docs/guides/text-to-speech
"""
import argparse
from pathlib import Path
import time

import librosa
import numpy as np
import os
import openai
from openai import OpenAI
import pandas as pd
from scipy.io import wavfile
from tqdm import tqdm

from project_settings import environment, project_path


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--openai_model", default="tts-1", choices=["tts-1"], type=str)
    parser.add_argument("--openai_voice", default="nova", choices=["alloy", "nova"], type=str)
    parser.add_argument("--filename", required=False, default="录音文件 (18).xlsx", type=str)

    parser.add_argument(
        "--openai_api_key",
        default=environment.get("openai_api_key", default=None, dtype=str),
        type=str
    )
    args = parser.parse_args()
    return args


def main():
    args = get_args()

    os.makedirs("wav", exist_ok=False)

    max_wave_value = 32768.0

    client = OpenAI(
        api_key=args.openai_api_key
    )

    df = pd.read_excel(args.filename)
    for i, row in tqdm(df.iterrows(), total=len(df)):
        text = row["文案"]
        idx = row["编号"]

        if pd.isna(text) or pd.isna(idx):
            continue

        text = str(text).strip().lower()

        if len(text) == 0:
            continue

        time.sleep(10)
        try:
            response = client.audio.speech.create(
                model=args.openai_model,
                voice=args.openai_voice,
                input=text,
            )
        except openai.PermissionDeniedError as e:
            print("被OpenAI屏蔽了,后来我换了新加坡的 VPN就测试成功了,不用VPN好像访问不到它的API。")
            raise e

        filename = "temp.mp3"
        response.stream_to_file(filename)

        signal, sample_rate = librosa.load(filename, sr=8000, mono=True)
        signal = signal * max_wave_value
        signal = np.array(signal, dtype=np.int16)

        filename = "wav/{}.wav".format(idx)

        wavfile.write(
            filename=filename,
            rate=sample_rate,
            data=signal
        )
        print("success. idx: {}, text: {}".format(idx, text))

    return


if __name__ == '__main__':
    main()