from typing import List import random import argparse import json from huggingface_hub import hf_hub_download from datasets import Dataset from multi_token.constants import ROLE_ASSISTANT, ROLE_USER PRETRAIN_PHRASES = [ "Repeat the content of the video