Spaces:
Sleeping
Sleeping
| from os import getenv | |
| import logging | |
| from time import sleep | |
| from tqdm import tqdm | |
| from src.srt_util.srt import split_script | |
| from .LLM_task import LLM_task | |
| def get_translation(srt, model, video_name, prompt, chunk_size = 1000): | |
| script_arr, range_arr = split_script(srt.get_source_only(),chunk_size) | |
| translate(srt, script_arr, range_arr, model, video_name, task=prompt) | |
| pass | |
| def check_translation(sentence, translation): | |
| """ | |
| check merge sentence issue from openai translation | |
| """ | |
| sentence_count = sentence.count('\n\n') + 1 | |
| translation_count = translation.count('\n\n') + 1 | |
| if sentence_count != translation_count: | |
| return False | |
| else: | |
| return True | |
| # TODO{david}: prompts selector | |
| def prompt_selector(src_lang, tgt_lang, domain): | |
| language_map = { | |
| "EN": "English", | |
| "ZH": "Chinese", | |
| } | |
| src_lang = language_map[src_lang] | |
| tgt_lang = language_map[tgt_lang] | |
| prompt = f""" | |
| you are a translation assistant, your job is to translate a video in domain of {domain} from {src_lang} to {tgt_lang}, | |
| you will be provided with a segement in {src_lang} parsed by line, where your translation text should keep the original | |
| meaning and the number of lines. | |
| """ | |
| return prompt | |
| def translate(srt, script_arr, range_arr, model_name, video_name=None, attempts_count=5, task=None, temp = 0.15): | |
| """ | |
| Translates the given script array into another language using the chatgpt and writes to the SRT file. | |
| This function takes a script array, a range array, a model name, a video name, and a video link as input. It iterates | |
| through sentences and range in the script and range arrays. If the translation check fails for five times, the function | |
| will attempt to resolve merge sentence issues and split the sentence into smaller tokens for a better translation. | |
| :param srt: An instance of the Subtitle class representing the SRT file. | |
| :param script_arr: A list of strings representing the original script sentences to be translated. | |
| :param range_arr: A list of tuples representing the start and end positions of sentences in the script. | |
| :param model_name: The name of the translation model to be used. | |
| :param video_name: The name of the video. | |
| :param attempts_count: Number of attemps of failures for unmatched sentences. | |
| :param task: Prompt. | |
| :param temp: Model temperature. | |
| """ | |
| if input is None: | |
| raise Exception("Warning! No Input have passed to LLM!") | |
| if task is None: | |
| task = "你是一个翻译助理,你的任务是翻译视频,你会被提供一个按行分割的英文段落,你需要在保证句意和行数的情况下输出翻译后的文本。" | |
| logging.info(f"translation prompt: {task}") | |
| previous_length = 0 | |
| for sentence, range_ in tqdm(zip(script_arr, range_arr)): | |
| # update the range based on previous length | |
| range_ = (range_[0] + previous_length, range_[1] + previous_length) | |
| # using chatgpt model | |
| print(f"now translating sentences {range_}") | |
| logging.info(f"now translating sentences {range_}") | |
| flag = True | |
| while flag: | |
| flag = False | |
| try: | |
| translate = LLM_task(model_name, sentence, task, temp) | |
| # detect merge sentence issue and try to solve for five times: | |
| while not check_translation(sentence, translate) and attempts_count > 0: | |
| translate = LLM_task(model_name, sentence, task, temp) | |
| attempts_count -= 1 | |
| # if failure still happen, split into smaller tokens | |
| if attempts_count == 0: | |
| single_sentences = sentence.split("\n\n") | |
| logging.info("merge sentence issue found for range", range_) | |
| translate = "" | |
| for i, single_sentence in enumerate(single_sentences): | |
| if i == len(single_sentences) - 1: | |
| translate += LLM_task(model_name,sentence,task,temp) | |
| else: | |
| translate += LLM_task(model_name,sentence,task,temp) + "\n\n" | |
| logging.info("solved by individually translation!") | |
| except Exception as e: | |
| logging.debug("An error has occurred during translation:", e) | |
| print("An error has occurred during translation:", e) | |
| print("Retrying... the script will continue after 30 seconds.") | |
| sleep(30) | |
| flag = True | |
| srt.set_translation(translate, range_, model_name, video_name) |