from transcribe.pipelines import WhisperPipe, MetaItem, WhisperChinese, Translate7BPipe, FunASRPipe class TranslatePipes: def __init__(self) -> None: # self.whisper_input_q = mp.Queue() # self.translate_input_q = mp.Queue() # self.result_queue = mp.Queue() # whisper 转录 self._whisper_pipe_en = self._launch_process(WhisperPipe()) self._whisper_pipe_zh = self._launch_process(WhisperChinese()) self._funasr_pipe = self._launch_process(FunASRPipe()) # llm 翻译 # self._translate_pipe = self._launch_process(TranslatePipe()) self._translate_7b_pipe = self._launch_process(Translate7BPipe()) # vad # self._vad_pipe = self._launch_process(VadPipe()) # def reset(self): # self._vad_pipe.reset() def _launch_process(self, process_obj): process_obj.daemon = True process_obj.start() return process_obj def wait_ready(self): self._whisper_pipe_zh.wait() self._funasr_pipe.wait() self._whisper_pipe_en.wait() # self._translate_pipe.wait() # self._vad_pipe.wait() self._translate_7b_pipe.wait() def translate(self, text, src_lang, dst_lang) -> MetaItem: item = MetaItem( transcribe_content=text, source_language=src_lang, destination_language=dst_lang) self._translate_pipe.input_queue.put(item) return self._translate_pipe.output_queue.get() def translate_large(self, text, src_lang, dst_lang) -> MetaItem: item = MetaItem( transcribe_content=text, source_language=src_lang, destination_language=dst_lang) self._translate_7b_pipe.input_queue.put(item) return self._translate_7b_pipe.output_queue.get() def get_whisper_model(self, lang: str = 'en'): if lang == 'zh': return self._whisper_pipe_zh return self._whisper_pipe_en def get_transcription_model(self, lang: str = 'en'): if lang == 'zh': return self._funasr_pipe return self._whisper_pipe_en def transcrible(self, audio_buffer: bytes, src_lang: str) -> MetaItem: transcription_model = self.get_transcription_model(src_lang) item = MetaItem(audio=audio_buffer, source_language=src_lang) transcription_model.input_queue.put(item) return transcription_model.output_queue.get() def voice_detect(self, audio_buffer: bytes) -> MetaItem: item = MetaItem(source_audio=audio_buffer) self._vad_pipe.input_queue.put(item) return self._vad_pipe.output_queue.get() if __name__ == "__main__": import soundfile tp = TranslatePipes() # result = tp.translate("你好,今天天气怎么样?", src_lang="zh", dst_lang="en") mel, _, = soundfile.read("assets/jfk.flac") # result = tp.transcrible(mel, 'en') result = tp.voice_detect(mel) print(result)