Spaces:

chaowenguo
/

agwefgw

Running

App Files Files Community

agwefgw / services /alinls /speech_synthesizer.py

chaowenguo

Upload 129 files

5690e11 verified 3 months ago

raw

history blame contribute delete

10.4 kB

	# Copyright (c) Alibaba, Inc. and its affiliates.

	import logging
	from re import I
	import uuid
	import json
	import threading

	from .core import NlsCore
	from . import logging
	from . import util
	from .exception import (StartTimeoutException,
	CompleteTimeoutException,
	InvalidParameter)

	__SPEECH_SYNTHESIZER_NAMESPACE__ = 'SpeechSynthesizer'
	__SPEECH_LONG_SYNTHESIZER_NAMESPACE__ = 'SpeechLongSynthesizer'

	__SPEECH_SYNTHESIZER_REQUEST_CMD__ = {
	'start': 'StartSynthesis'
	}

	__URL__ = 'wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1'

	__all__ = ['NlsSpeechSynthesizer']


	class NlsSpeechSynthesizer:
	"""
	Api for text-to-speech
	"""
	def __init__(self,
	url=__URL__,
	token=None,
	appkey=None,
	long_tts=False,
	on_metainfo=None,
	on_data=None,
	on_completed=None,
	on_error=None,
	on_close=None,
	callback_args=[]):
	"""
	NlsSpeechSynthesizer initialization

	Parameters:
	-----------
	url: str
	websocket url.
	akid: str
	access id from aliyun. if you provide a token, ignore this argument.
	appkey: str
	appkey from aliyun
	long_tts: bool
	whether using long-text synthesis support, default is False. long-text synthesis
	can support longer text but more expensive.
	on_metainfo: function
	Callback object which is called when recognition started.
	on_start has two arguments.
	The 1st argument is message which is a json format string.
	The 2nd argument is *args which is callback_args.
	on_data: function
	Callback object which is called when partial synthesis result arrived
	arrived.
	on_result_changed has two arguments.
	The 1st argument is binary data corresponding to aformat in start
	method.
	The 2nd argument is *args which is callback_args.
	on_completed: function
	Callback object which is called when recognition is completed.
	on_completed has two arguments.
	The 1st argument is message which is a json format string.
	The 2nd argument is *args which is callback_args.
	on_error: function
	Callback object which is called when any error occurs.
	on_error has two arguments.
	The 1st argument is message which is a json format string.
	The 2nd argument is *args which is callback_args.
	on_close: function
	Callback object which is called when connection closed.
	on_close has one arguments.
	The 1st argument is *args which is callback_args.
	callback_args: list
	callback_args will return in callbacks above for *args.
	"""
	if not token or not appkey:
	raise InvalidParameter('Must provide token and appkey')
	self.__response_handler__ = {
	'MetaInfo': self.__metainfo,
	'SynthesisCompleted': self.__synthesis_completed,
	'TaskFailed': self.__task_failed
	}
	self.__callback_args = callback_args
	self.__url = url
	self.__appkey = appkey
	self.__token = token
	self.__long_tts = long_tts
	self.__start_cond = threading.Condition()
	self.__start_flag = False
	self.__on_metainfo = on_metainfo
	self.__on_data = on_data
	self.__on_completed = on_completed
	self.__on_error = on_error
	self.__on_close = on_close
	self.__allow_aformat = (
	'pcm', 'wav', 'mp3'
	)
	self.__allow_sample_rate = (
	8000, 11025, 16000, 22050,
	24000, 32000, 44100, 48000
	)

	def __handle_message(self, message):
	print('__handle_message')
	try:
	__result = json.loads(message)
	if __result['header']['name'] in self.__response_handler__:
	__handler = self.__response_handler__[__result['header']['name']]
	__handler(message)
	else:
	print('cannot handle cmd{}'.format(
	__result['header']['name']))
	return
	except json.JSONDecodeError:
	print('cannot parse message:{}'.format(message))
	return

	def __syn_core_on_open(self):
	print('__syn_core_on_open')
	with self.__start_cond:
	self.__start_flag = True
	self.__start_cond.notify()

	def __syn_core_on_data(self, data, opcode, flag):
	print('__syn_core_on_data')
	if self.__on_data:
	self.__on_data(data, *self.__callback_args)

	def __syn_core_on_msg(self, msg, *args):
	print('__syn_core_on_msg:msg={} args={}'.format(msg, args))
	self.__handle_message(msg)

	def __syn_core_on_error(self, msg, *args):
	print('__sr_core_on_error:msg={} args={}'.format(msg, args))

	def __syn_core_on_close(self):
	print('__sr_core_on_close')
	if self.__on_close:
	self.__on_close(*self.__callback_args)
	with self.__start_cond:
	self.__start_flag = False
	self.__start_cond.notify()

	def __metainfo(self, message):
	print('__metainfo')
	if self.__on_metainfo:
	self.__on_metainfo(message, *self.__callback_args)

	def __synthesis_completed(self, message):
	print('__synthesis_completed')
	self.__nls.shutdown()
	print('__synthesis_completed shutdown done')
	if self.__on_completed:
	self.__on_completed(message, *self.__callback_args)
	with self.__start_cond:
	self.__start_flag = False
	self.__start_cond.notify()

	def __task_failed(self, message):
	print('__task_failed')
	with self.__start_cond:
	self.__start_flag = False
	self.__start_cond.notify()
	if self.__on_error:
	self.__on_error(message, *self.__callback_args)

	def start(self,
	text=None,
	voice='xiaoyun',
	aformat='pcm',
	sample_rate=16000,
	volume=50,
	speech_rate=0,
	pitch_rate=0,
	wait_complete=True,
	start_timeout=10,
	completed_timeout=60,
	ex:dict=None):
	"""
	Synthesis start

	Parameters:
	-----------
	text: str
	utf-8 text
	voice: str
	voice for text-to-speech, default is xiaoyun
	aformat: str
	audio binary format, support: 'pcm', 'wav', 'mp3', default is 'pcm'
	sample_rate: int
	audio sample rate, default is 16000, support:8000, 11025, 16000, 22050,
	24000, 32000, 44100, 48000
	volume: int
	audio volume, from 0~100, default is 50
	speech_rate: int
	speech rate from -500~500, default is 0
	pitch_rate: int
	pitch for voice from -500~500, default is 0
	wait_complete: bool
	whether block until syntheis completed or timeout for completed timeout
	start_timeout: int
	timeout for connection established
	completed_timeout: int
	timeout for waiting synthesis completed from connection established
	ex: dict
	dict which will merge into 'payload' field in request
	"""
	if text is None:
	raise InvalidParameter('Text cannot be None')

	self.__nls = NlsCore(
	url=self.__url,
	token=self.__token,
	on_open=self.__syn_core_on_open,
	on_message=self.__syn_core_on_msg,
	on_data=self.__syn_core_on_data,
	on_close=self.__syn_core_on_close,
	on_error=self.__syn_core_on_error,
	callback_args=[])

	if aformat not in self.__allow_aformat:
	raise InvalidParameter('format {} not support'.format(aformat))
	if sample_rate not in self.__allow_sample_rate:
	raise InvalidParameter('samplerate {} not support'.format(sample_rate))
	if volume < 0 or volume > 100:
	raise InvalidParameter('volume {} not support'.format(volume))
	if speech_rate < -500 or speech_rate > 500:
	raise InvalidParameter('speech_rate {} not support'.format(speech_rate))
	if pitch_rate < -500 or pitch_rate > 500:
	raise InvalidParameter('pitch rate {} not support'.format(pitch_rate))

	__id4 = uuid.uuid4().hex
	self.__task_id = uuid.uuid4().hex
	__namespace = __SPEECH_SYNTHESIZER_NAMESPACE__
	if self.__long_tts:
	__namespace = __SPEECH_LONG_SYNTHESIZER_NAMESPACE__
	__header = {
	'message_id': __id4,
	'task_id': self.__task_id,
	'namespace': __namespace,
	'name': __SPEECH_SYNTHESIZER_REQUEST_CMD__['start'],
	'appkey': self.__appkey
	}
	__payload = {
	'text': text,
	'voice': voice,
	'format': aformat,
	'sample_rate': sample_rate,
	'volume': volume,
	'speech_rate': speech_rate,
	'pitch_rate': pitch_rate
	}
	if ex:
	__payload.update(ex)
	__msg = {
	'header': __header,
	'payload': __payload,
	'context': util.GetDefaultContext()
	}
	__jmsg = json.dumps(__msg)
	with self.__start_cond:
	if self.__start_flag:
	print('already start...')
	return
	self.__nls.start(__jmsg, ping_interval=0, ping_timeout=None)
	if self.__start_flag == False:
	if not self.__start_cond.wait(start_timeout):
	print('syn start timeout')
	raise StartTimeoutException(f'Waiting Start over {start_timeout}s')
	if self.__start_flag and wait_complete:
	if not self.__start_cond.wait(completed_timeout):
	raise CompleteTimeoutException(f'Waiting Complete over {completed_timeout}s')

	def shutdown(self):
	"""
	Shutdown connection immediately
	"""
	self.__nls.shutdown()