File size: 4,032 Bytes
3215d8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import tritonclient.http as http_client
from tritonclient.utils import *

DEFAULT_SAMPLING_RATE = 22050

ENABLE_SSL = False
ENDPOINT_URL = 'localhost:8000'
HTTP_HEADERS = {"Authorization": "Bearer __PASTE_KEY_HERE__"}

# Connect to the server
if ENABLE_SSL:
    import gevent.ssl
    triton_http_client = http_client.InferenceServerClient(
        url=ENDPOINT_URL, verbose=False,
        ssl=True, ssl_context_factory=gevent.ssl._create_default_https_context,
    )
else:
    triton_http_client = http_client.InferenceServerClient(
        url=ENDPOINT_URL, verbose=False,
    )

print("Is server ready - {}".format(
    triton_http_client.is_server_ready(headers=HTTP_HEADERS)
))

import io
from scipy.io.wavfile import write as scipy_wav_write
import numpy as np

def get_string_tensor(string_value, tensor_name):
    string_obj = np.array([string_value], dtype="object")
    input_obj = http_client.InferInput(tensor_name, string_obj.shape, np_to_triton_dtype(string_obj.dtype))
    input_obj.set_data_from_numpy(string_obj)
    return input_obj

# Uncomment based on the language you want to test

## North-Indian languages
# inputs = [get_string_tensor("নমস্তে", "INPUT_TEXT"), get_string_tensor("female", "INPUT_SPEAKER_ID"), get_string_tensor("as", "INPUT_LANGUAGE_ID")]
# inputs = [get_string_tensor("নমস্তে", "INPUT_TEXT"), get_string_tensor("female", "INPUT_SPEAKER_ID"), get_string_tensor("bn", "INPUT_LANGUAGE_ID")]
# inputs = [get_string_tensor("નમસ્તે", "INPUT_TEXT"), get_string_tensor("female", "INPUT_SPEAKER_ID"), get_string_tensor("gu", "INPUT_LANGUAGE_ID")]
inputs = [get_string_tensor("सलाम", "INPUT_TEXT"), get_string_tensor("female", "INPUT_SPEAKER_ID"), get_string_tensor("hi", "INPUT_LANGUAGE_ID")]
# inputs = [get_string_tensor("नमस्ते", "INPUT_TEXT"), get_string_tensor("female", "INPUT_SPEAKER_ID"), get_string_tensor("mr", "INPUT_LANGUAGE_ID")]
# inputs = [get_string_tensor("ନମସ୍ତେ", "INPUT_TEXT"), get_string_tensor("female", "INPUT_SPEAKER_ID"), get_string_tensor("or", "INPUT_LANGUAGE_ID")]
# inputs = [get_string_tensor("ਨਮਸਤੇ", "INPUT_TEXT"), get_string_tensor("female", "INPUT_SPEAKER_ID"), get_string_tensor("pa", "INPUT_LANGUAGE_ID")]
# inputs = [get_string_tensor("सलाम", "INPUT_TEXT"), get_string_tensor("female", "INPUT_SPEAKER_ID"), get_string_tensor("raj", "INPUT_LANGUAGE_ID")]

## South-Indian languages
# inputs = [get_string_tensor("ನಮಸ್ಕಾರಂ", "INPUT_TEXT"), get_string_tensor("male", "INPUT_SPEAKER_ID"), get_string_tensor("kn", "INPUT_LANGUAGE_ID")]
# inputs = [get_string_tensor("നമസ്കാരം", "INPUT_TEXT"), get_string_tensor("male", "INPUT_SPEAKER_ID"), get_string_tensor("ml", "INPUT_LANGUAGE_ID")]
# inputs = [get_string_tensor("வணக்கம்‌", "INPUT_TEXT"), get_string_tensor("male", "INPUT_SPEAKER_ID"), get_string_tensor("ta", "INPUT_LANGUAGE_ID")]
# inputs = [get_string_tensor("నమస్కారం", "INPUT_TEXT"), get_string_tensor("male", "INPUT_SPEAKER_ID"), get_string_tensor("te", "INPUT_LANGUAGE_ID")]

## Misc languages
# inputs = [get_string_tensor("Greetings", "INPUT_TEXT"), get_string_tensor("female", "INPUT_SPEAKER_ID"), get_string_tensor("en", "INPUT_LANGUAGE_ID")]
# inputs = [get_string_tensor("नमस्ते", "INPUT_TEXT"), get_string_tensor("female", "INPUT_SPEAKER_ID"), get_string_tensor("brx", "INPUT_LANGUAGE_ID")]
# inputs = [get_string_tensor("নমস্তে", "INPUT_TEXT"), get_string_tensor("female", "INPUT_SPEAKER_ID"), get_string_tensor("mni", "INPUT_LANGUAGE_ID")]

output0 = http_client.InferRequestedOutput("OUTPUT_GENERATED_AUDIO")

response = triton_http_client.infer(
    "tts",
    model_version='1',
    inputs=inputs,
    outputs=[output0],
    headers=HTTP_HEADERS,
)#.get_response()

raw_audio = response.as_numpy("OUTPUT_GENERATED_AUDIO")[0]
byte_io = io.BytesIO()
scipy_wav_write(byte_io, DEFAULT_SAMPLING_RATE, raw_audio)

with open("audio.wav", "wb") as f:
    f.write(byte_io.read())