|
|
+ deepspeed --master_port 57353 --module safe_rlhf.finetune --train_datasets inverse-json::/home/hansirui_1st/jiayi/resist/imdb_data/train/neg/2000/train.json --model_name_or_path /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000 --max_length 512 --trust_remote_code True --epochs 1 --per_device_train_batch_size 1 --per_device_eval_batch_size 4 --gradient_accumulation_steps 8 --gradient_checkpointing --learning_rate 1e-5 --lr_warmup_ratio 0 --weight_decay 0.0 --lr_scheduler_type constant --weight_decay 0.0 --seed 42 --output_dir /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000-Q2-2000 --log_type wandb --log_run_name imdb-Qwen1.5-0.5B-s3-Q1-2000-Q2-2000 --log_project Inverse_Alignment_IMDb --zero_stage 3 --offload none --bf16 True --tf32 True --save_16bit |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
nvcc warning : incompatible redefinition for option |
|
|
[rank2]:[W526 15:50:19.470505703 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 2] using GPU 2 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
[rank7]:[W526 15:50:19.517882762 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 7] using GPU 7 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
[rank1]:[W526 15:50:19.527720907 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 1] using GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
[rank6]:[W526 15:50:19.527730367 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 6] using GPU 6 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
[rank4]:[W526 15:50:19.527753093 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 4] using GPU 4 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
[rank3]:[W526 15:50:19.544649924 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 3] using GPU 3 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
[rank5]:[W526 15:50:19.545629940 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 5] using GPU 5 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
[rank0]:[W526 15:50:19.554132551 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/config.json |
|
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/config.json |
|
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/config.json |
|
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/config.json |
|
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/config.json |
|
|
Model config Qwen2Config { |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 1024, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 2816, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 21, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 16, |
|
|
"num_hidden_layers": 24, |
|
|
"num_key_value_heads": 16, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": true, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.52.1", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
Model config Qwen2Config { |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 1024, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 2816, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 21, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 16, |
|
|
"num_hidden_layers": 24, |
|
|
"num_key_value_heads": 16, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": true, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.52.1", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/config.json |
|
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/config.json |
|
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/config.json |
|
|
Model config Qwen2Config { |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 1024, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 2816, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 21, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 16, |
|
|
"num_hidden_layers": 24, |
|
|
"num_key_value_heads": 16, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": true, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.52.1", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
Model config Qwen2Config { |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 1024, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 2816, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 21, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 16, |
|
|
"num_hidden_layers": 24, |
|
|
"num_key_value_heads": 16, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": true, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.52.1", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
Model config Qwen2Config { |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 1024, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 2816, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 21, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 16, |
|
|
"num_hidden_layers": 24, |
|
|
"num_key_value_heads": 16, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": true, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.52.1", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
Model config Qwen2Config { |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 1024, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 2816, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 21, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 16, |
|
|
"num_hidden_layers": 24, |
|
|
"num_key_value_heads": 16, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": true, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.52.1", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
Model config Qwen2Config { |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 1024, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 2816, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 21, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 16, |
|
|
"num_hidden_layers": 24, |
|
|
"num_key_value_heads": 16, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": true, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.52.1", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
Model config Qwen2Config { |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 1024, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 2816, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 21, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 16, |
|
|
"num_hidden_layers": 24, |
|
|
"num_key_value_heads": 16, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": true, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.52.1", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/pytorch_model.bin |
|
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/pytorch_model.bin |
|
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/pytorch_model.bin |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/pytorch_model.bin |
|
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/pytorch_model.bin |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/pytorch_model.bin |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/pytorch_model.bin |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000/pytorch_model.bin |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
loading file vocab.json |
|
|
loading file vocab.json |
|
|
loading file vocab.json |
|
|
loading file vocab.json |
|
|
loading file merges.txt |
|
|
loading file merges.txt |
|
|
loading file merges.txt |
|
|
loading file merges.txt |
|
|
loading file tokenizer.json |
|
|
loading file tokenizer.json |
|
|
loading file added_tokens.json |
|
|
loading file tokenizer.json |
|
|
loading file tokenizer.json |
|
|
loading file added_tokens.json |
|
|
loading file special_tokens_map.json |
|
|
loading file added_tokens.json |
|
|
loading file special_tokens_map.json |
|
|
loading file added_tokens.json |
|
|
loading file tokenizer_config.json |
|
|
loading file special_tokens_map.json |
|
|
loading file special_tokens_map.json |
|
|
loading file tokenizer_config.json |
|
|
loading file chat_template.jinja |
|
|
loading file tokenizer_config.json |
|
|
loading file chat_template.jinja |
|
|
loading file tokenizer_config.json |
|
|
loading file chat_template.jinja |
|
|
loading file chat_template.jinja |
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
loading file vocab.json |
|
|
loading file merges.txt |
|
|
loading file tokenizer.json |
|
|
loading file added_tokens.json |
|
|
loading file special_tokens_map.json |
|
|
loading file tokenizer_config.json |
|
|
loading file chat_template.jinja |
|
|
loading file vocab.json |
|
|
loading file merges.txt |
|
|
loading file tokenizer.json |
|
|
loading file added_tokens.json |
|
|
loading file special_tokens_map.json |
|
|
loading file tokenizer_config.json |
|
|
loading file chat_template.jinja |
|
|
loading file vocab.json |
|
|
loading file merges.txt |
|
|
loading file tokenizer.json |
|
|
loading file added_tokens.json |
|
|
loading file special_tokens_map.json |
|
|
loading file tokenizer_config.json |
|
|
loading file chat_template.jinja |
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
loading file vocab.json |
|
|
loading file merges.txt |
|
|
loading file tokenizer.json |
|
|
loading file added_tokens.json |
|
|
loading file special_tokens_map.json |
|
|
loading file tokenizer_config.json |
|
|
loading file chat_template.jinja |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root...Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
|
|
|
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
|
Detected CUDA files, patching ldflags |
|
|
Emitting ninja build file /home/hansirui_1st/.cache/torch_extensions/py311_cu124/fused_adam/build.ninja... |
|
|
/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/torch/utils/cpp_extension.py:2059: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. |
|
|
If this is not desired, please set os.environ[ |
|
|
warnings.warn( |
|
|
Building extension module fused_adam... |
|
|
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) |
|
|
Loading extension module fused_adam... |
|
|
Loading extension module fused_adam...Loading extension module fused_adam...Loading extension module fused_adam... |
|
|
|
|
|
|
|
|
Loading extension module fused_adam... |
|
|
Loading extension module fused_adam... |
|
|
Loading extension module fused_adam...Loading extension module fused_adam... |
|
|
|
|
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
wandb: Currently logged in as: xtom to https://api.wandb.ai. Use `wandb login --relogin` to force relogin |
|
|
wandb: Tracking run with wandb version 0.19.11 |
|
|
wandb: Run data is saved locally in /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000-Q2-2000/wandb/run-20250526_155034-zv92ic5x |
|
|
wandb: Run `wandb offline` to turn off syncing. |
|
|
wandb: Syncing run imdb-Qwen1.5-0.5B-s3-Q1-2000-Q2-2000 |
|
|
wandb: βοΈ View project at https://wandb.ai/xtom/Inverse_Alignment_IMDb |
|
|
wandb: π View run at https://wandb.ai/xtom/Inverse_Alignment_IMDb/runs/zv92ic5x |
|
|
Training 1/1 epoch: 0%| | 0/250 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
Training 1/1 epoch (loss 3.3880): 0%| | 0/250 [00:07<?, ?it/s]
Training 1/1 epoch (loss 3.3880): 0%| | 1/250 [00:07<32:52, 7.92s/it]
Training 1/1 epoch (loss 3.3361): 0%| | 1/250 [00:11<32:52, 7.92s/it]
Training 1/1 epoch (loss 3.3361): 1%| | 2/250 [00:11<21:13, 5.13s/it]
Training 1/1 epoch (loss 3.4029): 1%| | 2/250 [00:11<21:13, 5.13s/it]
Training 1/1 epoch (loss 3.4029): 1%| | 3/250 [00:11<13:01, 3.16s/it]
Training 1/1 epoch (loss 3.1420): 1%| | 3/250 [00:12<13:01, 3.16s/it]
Training 1/1 epoch (loss 3.1420): 2%|β | 4/250 [00:12<09:17, 2.26s/it]
Training 1/1 epoch (loss 3.2375): 2%|β | 4/250 [00:13<09:17, 2.26s/it]
Training 1/1 epoch (loss 3.2375): 2%|β | 5/250 [00:13<07:05, 1.74s/it]
Training 1/1 epoch (loss 3.5768): 2%|β | 5/250 [00:14<07:05, 1.74s/it]
Training 1/1 epoch (loss 3.5768): 2%|β | 6/250 [00:14<06:08, 1.51s/it]
Training 1/1 epoch (loss 3.3694): 2%|β | 6/250 [00:15<06:08, 1.51s/it]
Training 1/1 epoch (loss 3.3694): 3%|β | 7/250 [00:15<05:22, 1.33s/it]
Training 1/1 epoch (loss 3.2420): 3%|β | 7/250 [00:17<05:22, 1.33s/it]
Training 1/1 epoch (loss 3.2420): 3%|β | 8/250 [00:17<06:38, 1.65s/it]
Training 1/1 epoch (loss 3.3794): 3%|β | 8/250 [00:19<06:38, 1.65s/it]
Training 1/1 epoch (loss 3.3794): 4%|β | 9/250 [00:19<05:51, 1.46s/it]
Training 1/1 epoch (loss 3.2058): 4%|β | 9/250 [00:19<05:51, 1.46s/it]
Training 1/1 epoch (loss 3.2058): 4%|β | 10/250 [00:19<05:12, 1.30s/it]
Training 1/1 epoch (loss 2.9503): 4%|β | 10/250 [00:20<05:12, 1.30s/it]
Training 1/1 epoch (loss 2.9503): 4%|β | 11/250 [00:20<04:09, 1.04s/it]
Training 1/1 epoch (loss 3.1604): 4%|β | 11/250 [00:21<04:09, 1.04s/it]
Training 1/1 epoch (loss 3.1604): 5%|β | 12/250 [00:21<03:58, 1.00s/it]
Training 1/1 epoch (loss 3.5185): 5%|β | 12/250 [00:22<03:58, 1.00s/it]
Training 1/1 epoch (loss 3.5185): 5%|β | 13/250 [00:22<04:03, 1.03s/it]
Training 1/1 epoch (loss 3.2078): 5%|β | 13/250 [00:23<04:03, 1.03s/it]
Training 1/1 epoch (loss 3.2078): 6%|β | 14/250 [00:23<03:40, 1.07it/s]
Training 1/1 epoch (loss 2.9527): 6%|β | 14/250 [00:23<03:40, 1.07it/s]
Training 1/1 epoch (loss 2.9527): 6%|β | 15/250 [00:23<03:07, 1.25it/s]
Training 1/1 epoch (loss 3.1161): 6%|β | 15/250 [00:24<03:07, 1.25it/s]
Training 1/1 epoch (loss 3.1161): 6%|β | 16/250 [00:24<03:45, 1.04it/s]
Training 1/1 epoch (loss 3.3897): 6%|β | 16/250 [00:25<03:45, 1.04it/s]
Training 1/1 epoch (loss 3.3897): 7%|β | 17/250 [00:25<03:42, 1.04it/s]
Training 1/1 epoch (loss 3.1142): 7%|β | 17/250 [00:26<03:42, 1.04it/s]
Training 1/1 epoch (loss 3.1142): 7%|β | 18/250 [00:26<03:08, 1.23it/s]
Training 1/1 epoch (loss 3.2012): 7%|β | 18/250 [00:27<03:08, 1.23it/s]
Training 1/1 epoch (loss 3.2012): 8%|β | 19/250 [00:27<03:13, 1.19it/s]
Training 1/1 epoch (loss 3.2414): 8%|β | 19/250 [00:28<03:13, 1.19it/s]
Training 1/1 epoch (loss 3.2414): 8%|β | 20/250 [00:28<03:20, 1.15it/s]
Training 1/1 epoch (loss 3.2621): 8%|β | 20/250 [00:29<03:20, 1.15it/s]
Training 1/1 epoch (loss 3.2621): 8%|β | 21/250 [00:29<03:22, 1.13it/s]
Training 1/1 epoch (loss 3.5222): 8%|β | 21/250 [00:30<03:22, 1.13it/s]
Training 1/1 epoch (loss 3.5222): 9%|β | 22/250 [00:30<03:24, 1.11it/s]
Training 1/1 epoch (loss 3.3097): 9%|β | 22/250 [00:31<03:24, 1.11it/s]
Training 1/1 epoch (loss 3.3097): 9%|β | 23/250 [00:31<03:31, 1.08it/s]
Training 1/1 epoch (loss 3.4975): 9%|β | 23/250 [00:31<03:31, 1.08it/s]
Training 1/1 epoch (loss 3.4975): 10%|β | 24/250 [00:31<03:18, 1.14it/s]
Training 1/1 epoch (loss 3.2076): 10%|β | 24/250 [00:32<03:18, 1.14it/s]
Training 1/1 epoch (loss 3.2076): 10%|β | 25/250 [00:32<03:19, 1.13it/s]
Training 1/1 epoch (loss 3.3242): 10%|β | 25/250 [00:33<03:19, 1.13it/s]
Training 1/1 epoch (loss 3.3242): 10%|β | 26/250 [00:33<03:21, 1.11it/s]
Training 1/1 epoch (loss 3.0611): 10%|β | 26/250 [00:34<03:21, 1.11it/s]
Training 1/1 epoch (loss 3.0611): 11%|β | 27/250 [00:34<03:11, 1.17it/s]
Training 1/1 epoch (loss 3.1749): 11%|β | 27/250 [00:35<03:11, 1.17it/s]
Training 1/1 epoch (loss 3.1749): 11%|β | 28/250 [00:35<03:07, 1.18it/s]
Training 1/1 epoch (loss 3.2299): 11%|β | 28/250 [00:36<03:07, 1.18it/s]
Training 1/1 epoch (loss 3.2299): 12%|ββ | 29/250 [00:36<03:12, 1.15it/s]
Training 1/1 epoch (loss 3.4477): 12%|ββ | 29/250 [00:37<03:12, 1.15it/s]
Training 1/1 epoch (loss 3.4477): 12%|ββ | 30/250 [00:37<03:12, 1.14it/s]
Training 1/1 epoch (loss 3.2670): 12%|ββ | 30/250 [00:37<03:12, 1.14it/s]
Training 1/1 epoch (loss 3.2670): 12%|ββ | 31/250 [00:37<02:49, 1.29it/s]
Training 1/1 epoch (loss 3.2903): 12%|ββ | 31/250 [00:39<02:49, 1.29it/s]
Training 1/1 epoch (loss 3.2903): 13%|ββ | 32/250 [00:39<03:38, 1.00s/it]
Training 1/1 epoch (loss 3.4576): 13%|ββ | 32/250 [00:40<03:38, 1.00s/it]
Training 1/1 epoch (loss 3.4576): 13%|ββ | 33/250 [00:40<03:38, 1.01s/it]
Training 1/1 epoch (loss 3.0724): 13%|ββ | 33/250 [00:40<03:38, 1.01s/it]
Training 1/1 epoch (loss 3.0724): 14%|ββ | 34/250 [00:40<03:02, 1.18it/s]
Training 1/1 epoch (loss 2.9722): 14%|ββ | 34/250 [00:41<03:02, 1.18it/s]
Training 1/1 epoch (loss 2.9722): 14%|ββ | 35/250 [00:41<03:09, 1.14it/s]
Training 1/1 epoch (loss 3.5311): 14%|ββ | 35/250 [00:42<03:09, 1.14it/s]
Training 1/1 epoch (loss 3.5311): 14%|ββ | 36/250 [00:42<03:19, 1.07it/s]
Training 1/1 epoch (loss 3.2425): 14%|ββ | 36/250 [00:42<03:19, 1.07it/s]
Training 1/1 epoch (loss 3.2425): 15%|ββ | 37/250 [00:42<02:41, 1.32it/s]
Training 1/1 epoch (loss 3.2611): 15%|ββ | 37/250 [00:44<02:41, 1.32it/s]
Training 1/1 epoch (loss 3.2611): 15%|ββ | 38/250 [00:44<03:20, 1.05it/s]
Training 1/1 epoch (loss 3.2727): 15%|ββ | 38/250 [00:45<03:20, 1.05it/s]
Training 1/1 epoch (loss 3.2727): 16%|ββ | 39/250 [00:45<03:17, 1.07it/s]
Training 1/1 epoch (loss 2.9363): 16%|ββ | 39/250 [00:46<03:17, 1.07it/s]
Training 1/1 epoch (loss 2.9363): 16%|ββ | 40/250 [00:46<03:11, 1.10it/s]
Training 1/1 epoch (loss 3.2441): 16%|ββ | 40/250 [00:46<03:11, 1.10it/s]
Training 1/1 epoch (loss 3.2441): 16%|ββ | 41/250 [00:46<02:42, 1.28it/s]
Training 1/1 epoch (loss 3.2938): 16%|ββ | 41/250 [00:47<02:42, 1.28it/s]
Training 1/1 epoch (loss 3.2938): 17%|ββ | 42/250 [00:47<02:34, 1.35it/s]
Training 1/1 epoch (loss 3.1567): 17%|ββ | 42/250 [00:48<02:34, 1.35it/s]
Training 1/1 epoch (loss 3.1567): 17%|ββ | 43/250 [00:48<02:46, 1.24it/s]
Training 1/1 epoch (loss 3.2048): 17%|ββ | 43/250 [00:48<02:46, 1.24it/s]
Training 1/1 epoch (loss 3.2048): 18%|ββ | 44/250 [00:48<02:43, 1.26it/s]
Training 1/1 epoch (loss 3.1503): 18%|ββ | 44/250 [00:49<02:43, 1.26it/s]
Training 1/1 epoch (loss 3.1503): 18%|ββ | 45/250 [00:49<02:38, 1.29it/s]
Training 1/1 epoch (loss 3.3899): 18%|ββ | 45/250 [00:50<02:38, 1.29it/s]
Training 1/1 epoch (loss 3.3899): 18%|ββ | 46/250 [00:50<02:42, 1.26it/s]
Training 1/1 epoch (loss 3.4035): 18%|ββ | 46/250 [00:51<02:42, 1.26it/s]
Training 1/1 epoch (loss 3.4035): 19%|ββ | 47/250 [00:51<02:47, 1.22it/s]
Training 1/1 epoch (loss 3.3331): 19%|ββ | 47/250 [00:52<02:47, 1.22it/s]
Training 1/1 epoch (loss 3.3331): 19%|ββ | 48/250 [00:52<03:05, 1.09it/s]
Training 1/1 epoch (loss 3.1753): 19%|ββ | 48/250 [00:53<03:05, 1.09it/s]
Training 1/1 epoch (loss 3.1753): 20%|ββ | 49/250 [00:53<03:12, 1.04it/s]
Training 1/1 epoch (loss 3.2103): 20%|ββ | 49/250 [00:54<03:12, 1.04it/s]
Training 1/1 epoch (loss 3.2103): 20%|ββ | 50/250 [00:54<03:01, 1.10it/s]
Training 1/1 epoch (loss 3.3689): 20%|ββ | 50/250 [00:55<03:01, 1.10it/s]
Training 1/1 epoch (loss 3.3689): 20%|ββ | 51/250 [00:55<02:45, 1.21it/s]
Training 1/1 epoch (loss 3.3969): 20%|ββ | 51/250 [00:55<02:45, 1.21it/s]
Training 1/1 epoch (loss 3.3969): 21%|ββ | 52/250 [00:55<02:48, 1.18it/s]
Training 1/1 epoch (loss 3.1571): 21%|ββ | 52/250 [00:56<02:48, 1.18it/s]
Training 1/1 epoch (loss 3.1571): 21%|ββ | 53/250 [00:56<02:50, 1.16it/s]
Training 1/1 epoch (loss 3.3934): 21%|ββ | 53/250 [00:57<02:50, 1.16it/s]
Training 1/1 epoch (loss 3.3934): 22%|βββ | 54/250 [00:57<02:22, 1.38it/s]
Training 1/1 epoch (loss 3.1383): 22%|βββ | 54/250 [00:58<02:22, 1.38it/s]
Training 1/1 epoch (loss 3.1383): 22%|βββ | 55/250 [00:58<02:43, 1.19it/s]
Training 1/1 epoch (loss 3.1164): 22%|βββ | 55/250 [01:00<02:43, 1.19it/s]
Training 1/1 epoch (loss 3.1164): 22%|βββ | 56/250 [01:00<03:32, 1.10s/it]
Training 1/1 epoch (loss 3.2755): 22%|βββ | 56/250 [01:00<03:32, 1.10s/it]
Training 1/1 epoch (loss 3.2755): 23%|βββ | 57/250 [01:00<03:01, 1.06it/s]
Training 1/1 epoch (loss 3.5540): 23%|βββ | 57/250 [01:01<03:01, 1.06it/s]
Training 1/1 epoch (loss 3.5540): 23%|βββ | 58/250 [01:01<03:00, 1.06it/s]
Training 1/1 epoch (loss 3.6174): 23%|βββ | 58/250 [01:02<03:00, 1.06it/s]
Training 1/1 epoch (loss 3.6174): 24%|βββ | 59/250 [01:02<02:52, 1.10it/s]
Training 1/1 epoch (loss 3.2093): 24%|βββ | 59/250 [01:03<02:52, 1.10it/s]
Training 1/1 epoch (loss 3.2093): 24%|βββ | 60/250 [01:03<02:41, 1.17it/s]
Training 1/1 epoch (loss 3.1620): 24%|βββ | 60/250 [01:04<02:41, 1.17it/s]
Training 1/1 epoch (loss 3.1620): 24%|βββ | 61/250 [01:04<02:50, 1.11it/s]
Training 1/1 epoch (loss 3.2679): 24%|βββ | 61/250 [01:05<02:50, 1.11it/s]
Training 1/1 epoch (loss 3.2679): 25%|βββ | 62/250 [01:05<02:48, 1.12it/s]
Training 1/1 epoch (loss 3.3863): 25%|βββ | 62/250 [01:05<02:48, 1.12it/s]
Training 1/1 epoch (loss 3.3863): 25%|βββ | 63/250 [01:05<02:36, 1.19it/s]
Training 1/1 epoch (loss 3.0430): 25%|βββ | 63/250 [01:06<02:36, 1.19it/s]
Training 1/1 epoch (loss 3.0430): 26%|βββ | 64/250 [01:06<02:47, 1.11it/s]
Training 1/1 epoch (loss 3.2428): 26%|βββ | 64/250 [01:07<02:47, 1.11it/s]
Training 1/1 epoch (loss 3.2428): 26%|βββ | 65/250 [01:07<02:59, 1.03it/s]
Training 1/1 epoch (loss 3.2565): 26%|βββ | 65/250 [01:08<02:59, 1.03it/s]
Training 1/1 epoch (loss 3.2565): 26%|βββ | 66/250 [01:08<02:54, 1.05it/s]
Training 1/1 epoch (loss 3.2269): 26%|βββ | 66/250 [01:09<02:54, 1.05it/s]
Training 1/1 epoch (loss 3.2269): 27%|βββ | 67/250 [01:09<02:29, 1.22it/s]
Training 1/1 epoch (loss 3.0932): 27%|βββ | 67/250 [01:10<02:29, 1.22it/s]
Training 1/1 epoch (loss 3.0932): 27%|βββ | 68/250 [01:10<02:57, 1.03it/s]
Training 1/1 epoch (loss 3.0899): 27%|βββ | 68/250 [01:11<02:57, 1.03it/s]
Training 1/1 epoch (loss 3.0899): 28%|βββ | 69/250 [01:11<02:53, 1.05it/s]
Training 1/1 epoch (loss 3.3419): 28%|βββ | 69/250 [01:12<02:53, 1.05it/s]
Training 1/1 epoch (loss 3.3419): 28%|βββ | 70/250 [01:12<02:23, 1.26it/s]
Training 1/1 epoch (loss 3.2116): 28%|βββ | 70/250 [01:13<02:23, 1.26it/s]
Training 1/1 epoch (loss 3.2116): 28%|βββ | 71/250 [01:13<02:46, 1.07it/s]
Training 1/1 epoch (loss 3.2863): 28%|βββ | 71/250 [01:14<02:46, 1.07it/s]
Training 1/1 epoch (loss 3.2863): 29%|βββ | 72/250 [01:14<03:06, 1.05s/it]
Training 1/1 epoch (loss 3.0373): 29%|βββ | 72/250 [01:15<03:06, 1.05s/it]
Training 1/1 epoch (loss 3.0373): 29%|βββ | 73/250 [01:15<02:47, 1.06it/s]
Training 1/1 epoch (loss 3.1490): 29%|βββ | 73/250 [01:16<02:47, 1.06it/s]
Training 1/1 epoch (loss 3.1490): 30%|βββ | 74/250 [01:16<03:04, 1.05s/it]
Training 1/1 epoch (loss 3.4254): 30%|βββ | 74/250 [01:17<03:04, 1.05s/it]
Training 1/1 epoch (loss 3.4254): 30%|βββ | 75/250 [01:17<02:57, 1.02s/it]
Training 1/1 epoch (loss 3.0281): 30%|βββ | 75/250 [01:18<02:57, 1.02s/it]
Training 1/1 epoch (loss 3.0281): 30%|βββ | 76/250 [01:18<02:32, 1.14it/s]
Training 1/1 epoch (loss 3.0699): 30%|βββ | 76/250 [01:19<02:32, 1.14it/s]
Training 1/1 epoch (loss 3.0699): 31%|βββ | 77/250 [01:19<02:34, 1.12it/s]
Training 1/1 epoch (loss 3.1032): 31%|βββ | 77/250 [01:19<02:34, 1.12it/s]
Training 1/1 epoch (loss 3.1032): 31%|βββ | 78/250 [01:19<02:35, 1.11it/s]
Training 1/1 epoch (loss 3.2723): 31%|βββ | 78/250 [01:20<02:35, 1.11it/s]
Training 1/1 epoch (loss 3.2723): 32%|ββββ | 79/250 [01:20<02:27, 1.16it/s]
Training 1/1 epoch (loss 3.2156): 32%|ββββ | 79/250 [01:22<02:27, 1.16it/s]
Training 1/1 epoch (loss 3.2156): 32%|ββββ | 80/250 [01:22<02:51, 1.01s/it]
Training 1/1 epoch (loss 3.3705): 32%|ββββ | 80/250 [01:23<02:51, 1.01s/it]
Training 1/1 epoch (loss 3.3705): 32%|ββββ | 81/250 [01:23<02:50, 1.01s/it]
Training 1/1 epoch (loss 3.3473): 32%|ββββ | 81/250 [01:23<02:50, 1.01s/it]
Training 1/1 epoch (loss 3.3473): 33%|ββββ | 82/250 [01:23<02:37, 1.06it/s]
Training 1/1 epoch (loss 3.3292): 33%|ββββ | 82/250 [01:24<02:37, 1.06it/s]
Training 1/1 epoch (loss 3.3292): 33%|ββββ | 83/250 [01:24<02:36, 1.06it/s]
Training 1/1 epoch (loss 3.1322): 33%|ββββ | 83/250 [01:25<02:36, 1.06it/s]
Training 1/1 epoch (loss 3.1322): 34%|ββββ | 84/250 [01:25<02:34, 1.07it/s]
Training 1/1 epoch (loss 3.4304): 34%|ββββ | 84/250 [01:26<02:34, 1.07it/s]
Training 1/1 epoch (loss 3.4304): 34%|ββββ | 85/250 [01:26<02:29, 1.10it/s]
Training 1/1 epoch (loss 3.2534): 34%|ββββ | 85/250 [01:27<02:29, 1.10it/s]
Training 1/1 epoch (loss 3.2534): 34%|ββββ | 86/250 [01:27<02:31, 1.08it/s]
Training 1/1 epoch (loss 3.2001): 34%|ββββ | 86/250 [01:28<02:31, 1.08it/s]
Training 1/1 epoch (loss 3.2001): 35%|ββββ | 87/250 [01:28<02:30, 1.09it/s]
Training 1/1 epoch (loss 3.1832): 35%|ββββ | 87/250 [01:29<02:30, 1.09it/s]
Training 1/1 epoch (loss 3.1832): 35%|ββββ | 88/250 [01:29<02:46, 1.03s/it]
Training 1/1 epoch (loss 3.2908): 35%|ββββ | 88/250 [01:30<02:46, 1.03s/it]
Training 1/1 epoch (loss 3.2908): 36%|ββββ | 89/250 [01:30<02:48, 1.05s/it]
Training 1/1 epoch (loss 3.4187): 36%|ββββ | 89/250 [01:31<02:48, 1.05s/it]
Training 1/1 epoch (loss 3.4187): 36%|ββββ | 90/250 [01:31<02:43, 1.02s/it]
Training 1/1 epoch (loss 3.3320): 36%|ββββ | 90/250 [01:32<02:43, 1.02s/it]
Training 1/1 epoch (loss 3.3320): 36%|ββββ | 91/250 [01:32<02:29, 1.06it/s]
Training 1/1 epoch (loss 3.0936): 36%|ββββ | 91/250 [01:33<02:29, 1.06it/s]
Training 1/1 epoch (loss 3.0936): 37%|ββββ | 92/250 [01:33<02:29, 1.06it/s]
Training 1/1 epoch (loss 2.8637): 37%|ββββ | 92/250 [01:34<02:29, 1.06it/s]
Training 1/1 epoch (loss 2.8637): 37%|ββββ | 93/250 [01:34<02:27, 1.07it/s]
Training 1/1 epoch (loss 3.2782): 37%|ββββ | 93/250 [01:35<02:27, 1.07it/s]
Training 1/1 epoch (loss 3.2782): 38%|ββββ | 94/250 [01:35<02:17, 1.14it/s]
Training 1/1 epoch (loss 3.3340): 38%|ββββ | 94/250 [01:35<02:17, 1.14it/s]
Training 1/1 epoch (loss 3.3340): 38%|ββββ | 95/250 [01:35<02:11, 1.18it/s]
Training 1/1 epoch (loss 3.1249): 38%|ββββ | 95/250 [01:37<02:11, 1.18it/s]
Training 1/1 epoch (loss 3.1249): 38%|ββββ | 96/250 [01:37<02:39, 1.04s/it]
Training 1/1 epoch (loss 3.3052): 38%|ββββ | 96/250 [01:38<02:39, 1.04s/it]
Training 1/1 epoch (loss 3.3052): 39%|ββββ | 97/250 [01:38<02:33, 1.00s/it]
Training 1/1 epoch (loss 3.2308): 39%|ββββ | 97/250 [01:39<02:33, 1.00s/it]
Training 1/1 epoch (loss 3.2308): 39%|ββββ | 98/250 [01:39<02:27, 1.03it/s]
Training 1/1 epoch (loss 3.3667): 39%|ββββ | 98/250 [01:39<02:27, 1.03it/s]
Training 1/1 epoch (loss 3.3667): 40%|ββββ | 99/250 [01:39<02:13, 1.13it/s]
Training 1/1 epoch (loss 3.2759): 40%|ββββ | 99/250 [01:40<02:13, 1.13it/s]
Training 1/1 epoch (loss 3.2759): 40%|ββββ | 100/250 [01:40<02:09, 1.16it/s]
Training 1/1 epoch (loss 3.1473): 40%|ββββ | 100/250 [01:41<02:09, 1.16it/s]
Training 1/1 epoch (loss 3.1473): 40%|ββββ | 101/250 [01:41<02:00, 1.24it/s]
Training 1/1 epoch (loss 3.2945): 40%|ββββ | 101/250 [01:42<02:00, 1.24it/s]
Training 1/1 epoch (loss 3.2945): 41%|ββββ | 102/250 [01:42<02:05, 1.18it/s]
Training 1/1 epoch (loss 3.2657): 41%|ββββ | 102/250 [01:43<02:05, 1.18it/s]
Training 1/1 epoch (loss 3.2657): 41%|ββββ | 103/250 [01:43<02:08, 1.14it/s]
Training 1/1 epoch (loss 3.2562): 41%|ββββ | 103/250 [01:44<02:08, 1.14it/s]
Training 1/1 epoch (loss 3.2562): 42%|βββββ | 104/250 [01:44<02:04, 1.17it/s]
Training 1/1 epoch (loss 3.2741): 42%|βββββ | 104/250 [01:44<02:04, 1.17it/s]
Training 1/1 epoch (loss 3.2741): 42%|βββββ | 105/250 [01:44<01:51, 1.30it/s]
Training 1/1 epoch (loss 2.9824): 42%|βββββ | 105/250 [01:45<01:51, 1.30it/s]
Training 1/1 epoch (loss 2.9824): 42%|βββββ | 106/250 [01:45<01:54, 1.26it/s]
Training 1/1 epoch (loss 3.4548): 42%|βββββ | 106/250 [01:46<01:54, 1.26it/s]
Training 1/1 epoch (loss 3.4548): 43%|βββββ | 107/250 [01:46<01:58, 1.21it/s]
Training 1/1 epoch (loss 3.3898): 43%|βββββ | 107/250 [01:46<01:58, 1.21it/s]
Training 1/1 epoch (loss 3.3898): 43%|βββββ | 108/250 [01:46<01:40, 1.41it/s]
Training 1/1 epoch (loss 3.1891): 43%|βββββ | 108/250 [01:47<01:40, 1.41it/s]
Training 1/1 epoch (loss 3.1891): 44%|βββββ | 109/250 [01:47<01:34, 1.49it/s]
Training 1/1 epoch (loss 3.2829): 44%|βββββ | 109/250 [01:48<01:34, 1.49it/s]
Training 1/1 epoch (loss 3.2829): 44%|βββββ | 110/250 [01:48<01:45, 1.33it/s]
Training 1/1 epoch (loss 3.2475): 44%|βββββ | 110/250 [01:49<01:45, 1.33it/s]
Training 1/1 epoch (loss 3.2475): 44%|βββββ | 111/250 [01:49<01:45, 1.31it/s]
Training 1/1 epoch (loss 3.3815): 44%|βββββ | 111/250 [01:50<01:45, 1.31it/s]
Training 1/1 epoch (loss 3.3815): 45%|βββββ | 112/250 [01:50<02:04, 1.11it/s]
Training 1/1 epoch (loss 3.2322): 45%|βββββ | 112/250 [01:51<02:04, 1.11it/s]
Training 1/1 epoch (loss 3.2322): 45%|βββββ | 113/250 [01:51<02:16, 1.01it/s]
Training 1/1 epoch (loss 3.2889): 45%|βββββ | 113/250 [01:52<02:16, 1.01it/s]
Training 1/1 epoch (loss 3.2889): 46%|βββββ | 114/250 [01:52<02:02, 1.11it/s]
Training 1/1 epoch (loss 3.2186): 46%|βββββ | 114/250 [01:53<02:02, 1.11it/s]
Training 1/1 epoch (loss 3.2186): 46%|βββββ | 115/250 [01:53<02:04, 1.08it/s]
Training 1/1 epoch (loss 3.2758): 46%|βββββ | 115/250 [01:54<02:04, 1.08it/s]
Training 1/1 epoch (loss 3.2758): 46%|βββββ | 116/250 [01:54<02:10, 1.03it/s]
Training 1/1 epoch (loss 3.3672): 46%|βββββ | 116/250 [01:55<02:10, 1.03it/s]
Training 1/1 epoch (loss 3.3672): 47%|βββββ | 117/250 [01:55<02:06, 1.05it/s]
Training 1/1 epoch (loss 3.2052): 47%|βββββ | 117/250 [01:56<02:06, 1.05it/s]
Training 1/1 epoch (loss 3.2052): 47%|βββββ | 118/250 [01:56<02:02, 1.08it/s]
Training 1/1 epoch (loss 3.3249): 47%|βββββ | 118/250 [01:56<02:02, 1.08it/s]
Training 1/1 epoch (loss 3.3249): 48%|βββββ | 119/250 [01:56<01:51, 1.18it/s]
Training 1/1 epoch (loss 3.1866): 48%|βββββ | 119/250 [01:58<01:51, 1.18it/s]
Training 1/1 epoch (loss 3.1866): 48%|βββββ | 120/250 [01:58<02:14, 1.03s/it]
Training 1/1 epoch (loss 3.2632): 48%|βββββ | 120/250 [01:58<02:14, 1.03s/it]
Training 1/1 epoch (loss 3.2632): 48%|βββββ | 121/250 [01:58<02:03, 1.05it/s]
Training 1/1 epoch (loss 3.1259): 48%|βββββ | 121/250 [01:59<02:03, 1.05it/s]
Training 1/1 epoch (loss 3.1259): 49%|βββββ | 122/250 [01:59<01:55, 1.11it/s]
Training 1/1 epoch (loss 3.2943): 49%|βββββ | 122/250 [02:00<01:55, 1.11it/s]
Training 1/1 epoch (loss 3.2943): 49%|βββββ | 123/250 [02:00<02:01, 1.04it/s]
Training 1/1 epoch (loss 3.4436): 49%|βββββ | 123/250 [02:01<02:01, 1.04it/s]
Training 1/1 epoch (loss 3.4436): 50%|βββββ | 124/250 [02:01<01:46, 1.19it/s]
Training 1/1 epoch (loss 3.2727): 50%|βββββ | 124/250 [02:02<01:46, 1.19it/s]
Training 1/1 epoch (loss 3.2727): 50%|βββββ | 125/250 [02:02<01:37, 1.28it/s]
Training 1/1 epoch (loss 3.4145): 50%|βββββ | 125/250 [02:03<01:37, 1.28it/s]
Training 1/1 epoch (loss 3.4145): 50%|βββββ | 126/250 [02:03<01:42, 1.20it/s]
Training 1/1 epoch (loss 3.3286): 50%|βββββ | 126/250 [02:03<01:42, 1.20it/s]
Training 1/1 epoch (loss 3.3286): 51%|βββββ | 127/250 [02:03<01:41, 1.21it/s]
Training 1/1 epoch (loss 3.4474): 51%|βββββ | 127/250 [02:04<01:41, 1.21it/s]
Training 1/1 epoch (loss 3.4474): 51%|βββββ | 128/250 [02:04<01:49, 1.11it/s]
Training 1/1 epoch (loss 3.2216): 51%|βββββ | 128/250 [02:06<01:49, 1.11it/s]
Training 1/1 epoch (loss 3.2216): 52%|ββββββ | 129/250 [02:06<01:57, 1.03it/s]
Training 1/1 epoch (loss 3.2084): 52%|ββββββ | 129/250 [02:06<01:57, 1.03it/s]
Training 1/1 epoch (loss 3.2084): 52%|ββββββ | 130/250 [02:06<01:48, 1.10it/s]
Training 1/1 epoch (loss 3.2865): 52%|ββββββ | 130/250 [02:07<01:48, 1.10it/s]
Training 1/1 epoch (loss 3.2865): 52%|ββββββ | 131/250 [02:07<01:46, 1.11it/s]
Training 1/1 epoch (loss 3.1023): 52%|ββββββ | 131/250 [02:08<01:46, 1.11it/s]
Training 1/1 epoch (loss 3.1023): 53%|ββββββ | 132/250 [02:08<01:45, 1.11it/s]
Training 1/1 epoch (loss 3.1941): 53%|ββββββ | 132/250 [02:09<01:45, 1.11it/s]
Training 1/1 epoch (loss 3.1941): 53%|ββββββ | 133/250 [02:09<01:36, 1.22it/s]
Training 1/1 epoch (loss 3.3001): 53%|ββββββ | 133/250 [02:10<01:36, 1.22it/s]
Training 1/1 epoch (loss 3.3001): 54%|ββββββ | 134/250 [02:10<01:39, 1.16it/s]
Training 1/1 epoch (loss 3.3347): 54%|ββββββ | 134/250 [02:10<01:39, 1.16it/s]
Training 1/1 epoch (loss 3.3347): 54%|ββββββ | 135/250 [02:10<01:36, 1.19it/s]
Training 1/1 epoch (loss 3.5060): 54%|ββββββ | 135/250 [02:11<01:36, 1.19it/s]
Training 1/1 epoch (loss 3.5060): 54%|ββββββ | 136/250 [02:11<01:34, 1.21it/s]
Training 1/1 epoch (loss 3.2776): 54%|ββββββ | 136/250 [02:12<01:34, 1.21it/s]
Training 1/1 epoch (loss 3.2776): 55%|ββββββ | 137/250 [02:12<01:46, 1.06it/s]
Training 1/1 epoch (loss 3.2580): 55%|ββββββ | 137/250 [02:13<01:46, 1.06it/s]
Training 1/1 epoch (loss 3.2580): 55%|ββββββ | 138/250 [02:13<01:42, 1.10it/s]
Training 1/1 epoch (loss 3.0485): 55%|ββββββ | 138/250 [02:14<01:42, 1.10it/s]
Training 1/1 epoch (loss 3.0485): 56%|ββββββ | 139/250 [02:14<01:31, 1.21it/s]
Training 1/1 epoch (loss 3.5979): 56%|ββββββ | 139/250 [02:15<01:31, 1.21it/s]
Training 1/1 epoch (loss 3.5979): 56%|ββββββ | 140/250 [02:15<01:39, 1.10it/s]
Training 1/1 epoch (loss 2.9973): 56%|ββββββ | 140/250 [02:16<01:39, 1.10it/s]
Training 1/1 epoch (loss 2.9973): 56%|ββββββ | 141/250 [02:16<01:55, 1.06s/it]
Training 1/1 epoch (loss 3.3158): 56%|ββββββ | 141/250 [02:17<01:55, 1.06s/it]
Training 1/1 epoch (loss 3.3158): 57%|ββββββ | 142/250 [02:17<01:52, 1.04s/it]
Training 1/1 epoch (loss 3.2524): 57%|ββββββ | 142/250 [02:18<01:52, 1.04s/it]
Training 1/1 epoch (loss 3.2524): 57%|ββββββ | 143/250 [02:18<01:38, 1.09it/s]
Training 1/1 epoch (loss 3.2494): 57%|ββββββ | 143/250 [02:20<01:38, 1.09it/s]
Training 1/1 epoch (loss 3.2494): 58%|ββββββ | 144/250 [02:20<02:03, 1.17s/it]
Training 1/1 epoch (loss 3.0934): 58%|ββββββ | 144/250 [02:21<02:03, 1.17s/it]
Training 1/1 epoch (loss 3.0934): 58%|ββββββ | 145/250 [02:21<01:49, 1.05s/it]
Training 1/1 epoch (loss 3.2749): 58%|ββββββ | 145/250 [02:22<01:49, 1.05s/it]
Training 1/1 epoch (loss 3.2749): 58%|ββββββ | 146/250 [02:22<01:50, 1.06s/it]
Training 1/1 epoch (loss 3.3571): 58%|ββββββ | 146/250 [02:23<01:50, 1.06s/it]
Training 1/1 epoch (loss 3.3571): 59%|ββββββ | 147/250 [02:23<01:44, 1.01s/it]
Training 1/1 epoch (loss 3.3653): 59%|ββββββ | 147/250 [02:23<01:44, 1.01s/it]
Training 1/1 epoch (loss 3.3653): 59%|ββββββ | 148/250 [02:23<01:35, 1.07it/s]
Training 1/1 epoch (loss 3.2591): 59%|ββββββ | 148/250 [02:24<01:35, 1.07it/s]
Training 1/1 epoch (loss 3.2591): 60%|ββββββ | 149/250 [02:24<01:32, 1.10it/s]
Training 1/1 epoch (loss 3.2953): 60%|ββββββ | 149/250 [02:25<01:32, 1.10it/s]
Training 1/1 epoch (loss 3.2953): 60%|ββββββ | 150/250 [02:25<01:33, 1.07it/s]
Training 1/1 epoch (loss 3.0209): 60%|ββββββ | 150/250 [02:27<01:33, 1.07it/s]
Training 1/1 epoch (loss 3.0209): 60%|ββββββ | 151/250 [02:27<01:44, 1.06s/it]
Training 1/1 epoch (loss 3.3973): 60%|ββββββ | 151/250 [02:28<01:44, 1.06s/it]
Training 1/1 epoch (loss 3.3973): 61%|ββββββ | 152/250 [02:28<01:53, 1.16s/it]
Training 1/1 epoch (loss 3.0239): 61%|ββββββ | 152/250 [02:29<01:53, 1.16s/it]
Training 1/1 epoch (loss 3.0239): 61%|ββββββ | 153/250 [02:29<01:44, 1.08s/it]
Training 1/1 epoch (loss 3.2060): 61%|ββββββ | 153/250 [02:30<01:44, 1.08s/it]
Training 1/1 epoch (loss 3.2060): 62%|βββββββ | 154/250 [02:30<01:35, 1.01it/s]
Training 1/1 epoch (loss 3.3822): 62%|βββββββ | 154/250 [02:30<01:35, 1.01it/s]
Training 1/1 epoch (loss 3.3822): 62%|βββββββ | 155/250 [02:30<01:22, 1.16it/s]
Training 1/1 epoch (loss 3.3526): 62%|βββββββ | 155/250 [02:31<01:22, 1.16it/s]
Training 1/1 epoch (loss 3.3526): 62%|βββββββ | 156/250 [02:31<01:25, 1.10it/s]
Training 1/1 epoch (loss 3.2205): 62%|βββββββ | 156/250 [02:32<01:25, 1.10it/s]
Training 1/1 epoch (loss 3.2205): 63%|βββββββ | 157/250 [02:32<01:27, 1.06it/s]
Training 1/1 epoch (loss 3.0991): 63%|βββββββ | 157/250 [02:33<01:27, 1.06it/s]
Training 1/1 epoch (loss 3.0991): 63%|βββββββ | 158/250 [02:33<01:12, 1.26it/s]
Training 1/1 epoch (loss 3.2396): 63%|βββββββ | 158/250 [02:34<01:12, 1.26it/s]
Training 1/1 epoch (loss 3.2396): 64%|βββββββ | 159/250 [02:34<01:14, 1.22it/s]
Training 1/1 epoch (loss 3.0380): 64%|βββββββ | 159/250 [02:35<01:14, 1.22it/s]
Training 1/1 epoch (loss 3.0380): 64%|βββββββ | 160/250 [02:35<01:38, 1.10s/it]
Training 1/1 epoch (loss 3.2435): 64%|βββββββ | 160/250 [02:36<01:38, 1.10s/it]
Training 1/1 epoch (loss 3.2435): 64%|βββββββ | 161/250 [02:36<01:26, 1.03it/s]
Training 1/1 epoch (loss 3.1150): 64%|βββββββ | 161/250 [02:37<01:26, 1.03it/s]
Training 1/1 epoch (loss 3.1150): 65%|βββββββ | 162/250 [02:37<01:23, 1.05it/s]
Training 1/1 epoch (loss 3.1441): 65%|βββββββ | 162/250 [02:38<01:23, 1.05it/s]
Training 1/1 epoch (loss 3.1441): 65%|βββββββ | 163/250 [02:38<01:21, 1.07it/s]
Training 1/1 epoch (loss 3.1795): 65%|βββββββ | 163/250 [02:39<01:21, 1.07it/s]
Training 1/1 epoch (loss 3.1795): 66%|βββββββ | 164/250 [02:39<01:20, 1.06it/s]
Training 1/1 epoch (loss 2.9849): 66%|βββββββ | 164/250 [02:39<01:20, 1.06it/s]
Training 1/1 epoch (loss 2.9849): 66%|βββββββ | 165/250 [02:39<01:07, 1.26it/s]
Training 1/1 epoch (loss 2.9291): 66%|βββββββ | 165/250 [02:40<01:07, 1.26it/s]
Training 1/1 epoch (loss 2.9291): 66%|βββββββ | 166/250 [02:40<01:09, 1.21it/s]
Training 1/1 epoch (loss 3.2307): 66%|βββββββ | 166/250 [02:41<01:09, 1.21it/s]
Training 1/1 epoch (loss 3.2307): 67%|βββββββ | 167/250 [02:41<01:10, 1.17it/s]
Training 1/1 epoch (loss 3.4211): 67%|βββββββ | 167/250 [02:42<01:10, 1.17it/s]
Training 1/1 epoch (loss 3.4211): 67%|βββββββ | 168/250 [02:42<01:14, 1.10it/s]
Training 1/1 epoch (loss 3.1426): 67%|βββββββ | 168/250 [02:43<01:14, 1.10it/s]
Training 1/1 epoch (loss 3.1426): 68%|βββββββ | 169/250 [02:43<01:04, 1.26it/s]
Training 1/1 epoch (loss 3.2989): 68%|βββββββ | 169/250 [02:44<01:04, 1.26it/s]
Training 1/1 epoch (loss 3.2989): 68%|βββββββ | 170/250 [02:44<01:10, 1.14it/s]
Training 1/1 epoch (loss 3.4349): 68%|βββββββ | 170/250 [02:45<01:10, 1.14it/s]
Training 1/1 epoch (loss 3.4349): 68%|βββββββ | 171/250 [02:45<01:10, 1.12it/s]
Training 1/1 epoch (loss 3.0754): 68%|βββββββ | 171/250 [02:45<01:10, 1.12it/s]
Training 1/1 epoch (loss 3.0754): 69%|βββββββ | 172/250 [02:45<01:00, 1.29it/s]
Training 1/1 epoch (loss 3.1590): 69%|βββββββ | 172/250 [02:46<01:00, 1.29it/s]
Training 1/1 epoch (loss 3.1590): 69%|βββββββ | 173/250 [02:46<01:02, 1.23it/s]
Training 1/1 epoch (loss 3.4263): 69%|βββββββ | 173/250 [02:47<01:02, 1.23it/s]
Training 1/1 epoch (loss 3.4263): 70%|βββββββ | 174/250 [02:47<01:06, 1.14it/s]
Training 1/1 epoch (loss 3.2906): 70%|βββββββ | 174/250 [02:48<01:06, 1.14it/s]
Training 1/1 epoch (loss 3.2906): 70%|βββββββ | 175/250 [02:48<01:02, 1.20it/s]
Training 1/1 epoch (loss 3.3034): 70%|βββββββ | 175/250 [02:49<01:02, 1.20it/s]
Training 1/1 epoch (loss 3.3034): 70%|βββββββ | 176/250 [02:49<01:09, 1.07it/s]
Training 1/1 epoch (loss 3.2500): 70%|βββββββ | 176/250 [02:50<01:09, 1.07it/s]
Training 1/1 epoch (loss 3.2500): 71%|βββββββ | 177/250 [02:50<01:10, 1.03it/s]
Training 1/1 epoch (loss 3.3743): 71%|βββββββ | 177/250 [02:51<01:10, 1.03it/s]
Training 1/1 epoch (loss 3.3743): 71%|βββββββ | 178/250 [02:51<01:05, 1.10it/s]
Training 1/1 epoch (loss 3.3380): 71%|βββββββ | 178/250 [02:51<01:05, 1.10it/s]
Training 1/1 epoch (loss 3.3380): 72%|ββββββββ | 179/250 [02:51<00:58, 1.22it/s]
Training 1/1 epoch (loss 3.1600): 72%|ββββββββ | 179/250 [02:52<00:58, 1.22it/s]
Training 1/1 epoch (loss 3.1600): 72%|ββββββββ | 180/250 [02:52<01:00, 1.15it/s]
Training 1/1 epoch (loss 3.1458): 72%|ββββββββ | 180/250 [02:54<01:00, 1.15it/s]
Training 1/1 epoch (loss 3.1458): 72%|ββββββββ | 181/250 [02:54<01:08, 1.00it/s]
Training 1/1 epoch (loss 3.2104): 72%|ββββββββ | 181/250 [02:54<01:08, 1.00it/s]
Training 1/1 epoch (loss 3.2104): 73%|ββββββββ | 182/250 [02:54<01:05, 1.03it/s]
Training 1/1 epoch (loss 3.0065): 73%|ββββββββ | 182/250 [02:56<01:05, 1.03it/s]
Training 1/1 epoch (loss 3.0065): 73%|ββββββββ | 183/250 [02:56<01:12, 1.08s/it]
Training 1/1 epoch (loss 3.2011): 73%|ββββββββ | 183/250 [02:57<01:12, 1.08s/it]
Training 1/1 epoch (loss 3.2011): 74%|ββββββββ | 184/250 [02:57<01:10, 1.06s/it]
Training 1/1 epoch (loss 3.3490): 74%|ββββββββ | 184/250 [02:58<01:10, 1.06s/it]
Training 1/1 epoch (loss 3.3490): 74%|ββββββββ | 185/250 [02:58<01:12, 1.12s/it]
Training 1/1 epoch (loss 3.2736): 74%|ββββββββ | 185/250 [02:59<01:12, 1.12s/it]
Training 1/1 epoch (loss 3.2736): 74%|ββββββββ | 186/250 [02:59<01:06, 1.03s/it]
Training 1/1 epoch (loss 3.2843): 74%|ββββββββ | 186/250 [02:59<01:06, 1.03s/it]
Training 1/1 epoch (loss 3.2843): 75%|ββββββββ | 187/250 [02:59<00:55, 1.14it/s]
Training 1/1 epoch (loss 3.4444): 75%|ββββββββ | 187/250 [03:00<00:55, 1.14it/s]
Training 1/1 epoch (loss 3.4444): 75%|ββββββββ | 188/250 [03:00<00:54, 1.15it/s]
Training 1/1 epoch (loss 3.0787): 75%|ββββββββ | 188/250 [03:02<00:54, 1.15it/s]
Training 1/1 epoch (loss 3.0787): 76%|ββββββββ | 189/250 [03:02<01:03, 1.03s/it]
Training 1/1 epoch (loss 3.1340): 76%|ββββββββ | 189/250 [03:02<01:03, 1.03s/it]
Training 1/1 epoch (loss 3.1340): 76%|ββββββββ | 190/250 [03:02<00:57, 1.04it/s]
Training 1/1 epoch (loss 3.2392): 76%|ββββββββ | 190/250 [03:03<00:57, 1.04it/s]
Training 1/1 epoch (loss 3.2392): 76%|ββββββββ | 191/250 [03:03<00:56, 1.04it/s]
Training 1/1 epoch (loss 3.2597): 76%|ββββββββ | 191/250 [03:05<00:56, 1.04it/s]
Training 1/1 epoch (loss 3.2597): 77%|ββββββββ | 192/250 [03:05<01:03, 1.09s/it]
Training 1/1 epoch (loss 3.1062): 77%|ββββββββ | 192/250 [03:06<01:03, 1.09s/it]
Training 1/1 epoch (loss 3.1062): 77%|ββββββββ | 193/250 [03:06<00:59, 1.05s/it]
Training 1/1 epoch (loss 3.1844): 77%|ββββββββ | 193/250 [03:07<00:59, 1.05s/it]
Training 1/1 epoch (loss 3.1844): 78%|ββββββββ | 194/250 [03:07<00:55, 1.00it/s]
Training 1/1 epoch (loss 3.2439): 78%|ββββββββ | 194/250 [03:08<00:55, 1.00it/s]
Training 1/1 epoch (loss 3.2439): 78%|ββββββββ | 195/250 [03:08<00:53, 1.03it/s]
Training 1/1 epoch (loss 3.1234): 78%|ββββββββ | 195/250 [03:08<00:53, 1.03it/s]
Training 1/1 epoch (loss 3.1234): 78%|ββββββββ | 196/250 [03:08<00:47, 1.13it/s]
Training 1/1 epoch (loss 3.1713): 78%|ββββββββ | 196/250 [03:10<00:47, 1.13it/s]
Training 1/1 epoch (loss 3.1713): 79%|ββββββββ | 197/250 [03:10<00:54, 1.03s/it]
Training 1/1 epoch (loss 3.1816): 79%|ββββββββ | 197/250 [03:11<00:54, 1.03s/it]
Training 1/1 epoch (loss 3.1816): 79%|ββββββββ | 198/250 [03:11<00:59, 1.14s/it]
Training 1/1 epoch (loss 3.3053): 79%|ββββββββ | 198/250 [03:12<00:59, 1.14s/it]
Training 1/1 epoch (loss 3.3053): 80%|ββββββββ | 199/250 [03:12<00:57, 1.13s/it]
Training 1/1 epoch (loss 3.2516): 80%|ββββββββ | 199/250 [03:14<00:57, 1.13s/it]
Training 1/1 epoch (loss 3.2516): 80%|ββββββββ | 200/250 [03:14<01:00, 1.21s/it]
Training 1/1 epoch (loss 3.1438): 80%|ββββββββ | 200/250 [03:15<01:00, 1.21s/it]
Training 1/1 epoch (loss 3.1438): 80%|ββββββββ | 201/250 [03:15<00:57, 1.17s/it]
Training 1/1 epoch (loss 3.1897): 80%|ββββββββ | 201/250 [03:15<00:57, 1.17s/it]
Training 1/1 epoch (loss 3.1897): 81%|ββββββββ | 202/250 [03:15<00:48, 1.02s/it]
Training 1/1 epoch (loss 3.1338): 81%|ββββββββ | 202/250 [03:16<00:48, 1.02s/it]
Training 1/1 epoch (loss 3.1338): 81%|ββββββββ | 203/250 [03:16<00:45, 1.04it/s]
Training 1/1 epoch (loss 3.3031): 81%|ββββββββ | 203/250 [03:18<00:45, 1.04it/s]
Training 1/1 epoch (loss 3.3031): 82%|βββββββββ | 204/250 [03:18<00:50, 1.11s/it]
Training 1/1 epoch (loss 3.0533): 82%|βββββββββ | 204/250 [03:18<00:50, 1.11s/it]
Training 1/1 epoch (loss 3.0533): 82%|βββββββββ | 205/250 [03:18<00:44, 1.01it/s]
Training 1/1 epoch (loss 3.3950): 82%|βββββββββ | 205/250 [03:19<00:44, 1.01it/s]
Training 1/1 epoch (loss 3.3950): 82%|βββββββββ | 206/250 [03:19<00:45, 1.04s/it]
Training 1/1 epoch (loss 3.3022): 82%|βββββββββ | 206/250 [03:20<00:45, 1.04s/it]
Training 1/1 epoch (loss 3.3022): 83%|βββββββββ | 207/250 [03:20<00:42, 1.00it/s]
Training 1/1 epoch (loss 3.1333): 83%|βββββββββ | 207/250 [03:21<00:42, 1.00it/s]
Training 1/1 epoch (loss 3.1333): 83%|βββββββββ | 208/250 [03:21<00:40, 1.05it/s]
Training 1/1 epoch (loss 3.2503): 83%|βββββββββ | 208/250 [03:22<00:40, 1.05it/s]
Training 1/1 epoch (loss 3.2503): 84%|βββββββββ | 209/250 [03:22<00:39, 1.05it/s]
Training 1/1 epoch (loss 3.1480): 84%|βββββββββ | 209/250 [03:23<00:39, 1.05it/s]
Training 1/1 epoch (loss 3.1480): 84%|βββββββββ | 210/250 [03:23<00:37, 1.06it/s]
Training 1/1 epoch (loss 3.2129): 84%|βββββββββ | 210/250 [03:24<00:37, 1.06it/s]
Training 1/1 epoch (loss 3.2129): 84%|βββββββββ | 211/250 [03:24<00:40, 1.03s/it]
Training 1/1 epoch (loss 3.2132): 84%|βββββββββ | 211/250 [03:25<00:40, 1.03s/it]
Training 1/1 epoch (loss 3.2132): 85%|βββββββββ | 212/250 [03:25<00:32, 1.18it/s]
Training 1/1 epoch (loss 3.1804): 85%|βββββββββ | 212/250 [03:26<00:32, 1.18it/s]
Training 1/1 epoch (loss 3.1804): 85%|βββββββββ | 213/250 [03:26<00:32, 1.13it/s]
Training 1/1 epoch (loss 3.2973): 85%|βββββββββ | 213/250 [03:26<00:32, 1.13it/s]
Training 1/1 epoch (loss 3.2973): 86%|βββββββββ | 214/250 [03:26<00:29, 1.24it/s]
Training 1/1 epoch (loss 3.3855): 86%|βββββββββ | 214/250 [03:27<00:29, 1.24it/s]
Training 1/1 epoch (loss 3.3855): 86%|βββββββββ | 215/250 [03:27<00:29, 1.18it/s]
Training 1/1 epoch (loss 3.1316): 86%|βββββββββ | 215/250 [03:29<00:29, 1.18it/s]
Training 1/1 epoch (loss 3.1316): 86%|βββββββββ | 216/250 [03:29<00:33, 1.01it/s]
Training 1/1 epoch (loss 3.3808): 86%|βββββββββ | 216/250 [03:30<00:33, 1.01it/s]
Training 1/1 epoch (loss 3.3808): 87%|βββββββββ | 217/250 [03:30<00:34, 1.04s/it]
Training 1/1 epoch (loss 3.3034): 87%|βββββββββ | 217/250 [03:30<00:34, 1.04s/it]
Training 1/1 epoch (loss 3.3034): 87%|βββββββββ | 218/250 [03:30<00:30, 1.05it/s]
Training 1/1 epoch (loss 3.4208): 87%|βββββββββ | 218/250 [03:31<00:30, 1.05it/s]
Training 1/1 epoch (loss 3.4208): 88%|βββββββββ | 219/250 [03:31<00:30, 1.03it/s]
Training 1/1 epoch (loss 3.3830): 88%|βββββββββ | 219/250 [03:32<00:30, 1.03it/s]
Training 1/1 epoch (loss 3.3830): 88%|βββββββββ | 220/250 [03:32<00:27, 1.10it/s]
Training 1/1 epoch (loss 3.2360): 88%|βββββββββ | 220/250 [03:33<00:27, 1.10it/s]
Training 1/1 epoch (loss 3.2360): 88%|βββββββββ | 221/250 [03:33<00:24, 1.16it/s]
Training 1/1 epoch (loss 3.1139): 88%|βββββββββ | 221/250 [03:34<00:24, 1.16it/s]
Training 1/1 epoch (loss 3.1139): 89%|βββββββββ | 222/250 [03:34<00:22, 1.24it/s]
Training 1/1 epoch (loss 3.1341): 89%|βββββββββ | 222/250 [03:35<00:22, 1.24it/s]
Training 1/1 epoch (loss 3.1341): 89%|βββββββββ | 223/250 [03:35<00:22, 1.18it/s]
Training 1/1 epoch (loss 3.1936): 89%|βββββββββ | 223/250 [03:36<00:22, 1.18it/s]
Training 1/1 epoch (loss 3.1936): 90%|βββββββββ | 224/250 [03:36<00:27, 1.07s/it]
Training 1/1 epoch (loss 3.2003): 90%|βββββββββ | 224/250 [03:37<00:27, 1.07s/it]
Training 1/1 epoch (loss 3.2003): 90%|βββββββββ | 225/250 [03:37<00:22, 1.10it/s]
Training 1/1 epoch (loss 3.0704): 90%|βββββββββ | 225/250 [03:38<00:22, 1.10it/s]
Training 1/1 epoch (loss 3.0704): 90%|βββββββββ | 226/250 [03:38<00:22, 1.09it/s]
Training 1/1 epoch (loss 2.7828): 90%|βββββββββ | 226/250 [03:39<00:22, 1.09it/s]
Training 1/1 epoch (loss 2.7828): 91%|βββββββββ | 227/250 [03:39<00:21, 1.09it/s]
Training 1/1 epoch (loss 3.3354): 91%|βββββββββ | 227/250 [03:39<00:21, 1.09it/s]
Training 1/1 epoch (loss 3.3354): 91%|βββββββββ | 228/250 [03:39<00:19, 1.15it/s]
Training 1/1 epoch (loss 3.0457): 91%|βββββββββ | 228/250 [03:40<00:19, 1.15it/s]
Training 1/1 epoch (loss 3.0457): 92%|ββββββββββ| 229/250 [03:40<00:18, 1.14it/s]
Training 1/1 epoch (loss 3.4336): 92%|ββββββββββ| 229/250 [03:41<00:18, 1.14it/s]
Training 1/1 epoch (loss 3.4336): 92%|ββββββββββ| 230/250 [03:41<00:17, 1.12it/s]
Training 1/1 epoch (loss 3.4582): 92%|ββββββββββ| 230/250 [03:42<00:17, 1.12it/s]
Training 1/1 epoch (loss 3.4582): 92%|ββββββββββ| 231/250 [03:42<00:17, 1.10it/s]
Training 1/1 epoch (loss 3.2025): 92%|ββββββββββ| 231/250 [03:43<00:17, 1.10it/s]
Training 1/1 epoch (loss 3.2025): 93%|ββββββββββ| 232/250 [03:43<00:18, 1.02s/it]
Training 1/1 epoch (loss 3.1660): 93%|ββββββββββ| 232/250 [03:45<00:18, 1.02s/it]
Training 1/1 epoch (loss 3.1660): 93%|ββββββββββ| 233/250 [03:45<00:19, 1.17s/it]
Training 1/1 epoch (loss 3.1857): 93%|ββββββββββ| 233/250 [03:46<00:19, 1.17s/it]
Training 1/1 epoch (loss 3.1857): 94%|ββββββββββ| 234/250 [03:46<00:17, 1.09s/it]
Training 1/1 epoch (loss 2.9552): 94%|ββββββββββ| 234/250 [03:46<00:17, 1.09s/it]
Training 1/1 epoch (loss 2.9552): 94%|ββββββββββ| 235/250 [03:46<00:13, 1.11it/s]
Training 1/1 epoch (loss 3.0465): 94%|ββββββββββ| 235/250 [03:47<00:13, 1.11it/s]
Training 1/1 epoch (loss 3.0465): 94%|ββββββββββ| 236/250 [03:47<00:12, 1.10it/s]
Training 1/1 epoch (loss 3.0878): 94%|ββββββββββ| 236/250 [03:48<00:12, 1.10it/s]
Training 1/1 epoch (loss 3.0878): 95%|ββββββββββ| 237/250 [03:48<00:11, 1.11it/s]
Training 1/1 epoch (loss 3.1270): 95%|ββββββββββ| 237/250 [03:49<00:11, 1.11it/s]
Training 1/1 epoch (loss 3.1270): 95%|ββββββββββ| 238/250 [03:49<00:09, 1.20it/s]
Training 1/1 epoch (loss 3.1320): 95%|ββββββββββ| 238/250 [03:50<00:09, 1.20it/s]
Training 1/1 epoch (loss 3.1320): 96%|ββββββββββ| 239/250 [03:50<00:11, 1.00s/it]
Training 1/1 epoch (loss 2.9191): 96%|ββββββββββ| 239/250 [03:52<00:11, 1.00s/it]
Training 1/1 epoch (loss 2.9191): 96%|ββββββββββ| 240/250 [03:52<00:11, 1.13s/it]
Training 1/1 epoch (loss 3.1944): 96%|ββββββββββ| 240/250 [03:52<00:11, 1.13s/it]
Training 1/1 epoch (loss 3.1944): 96%|ββββββββββ| 241/250 [03:52<00:08, 1.01it/s]
Training 1/1 epoch (loss 3.1011): 96%|ββββββββββ| 241/250 [03:53<00:08, 1.01it/s]
Training 1/1 epoch (loss 3.1011): 97%|ββββββββββ| 242/250 [03:53<00:07, 1.04it/s]
Training 1/1 epoch (loss 3.3142): 97%|ββββββββββ| 242/250 [03:54<00:07, 1.04it/s]
Training 1/1 epoch (loss 3.3142): 97%|ββββββββββ| 243/250 [03:54<00:06, 1.06it/s]
Training 1/1 epoch (loss 3.2536): 97%|ββββββββββ| 243/250 [03:55<00:06, 1.06it/s]
Training 1/1 epoch (loss 3.2536): 98%|ββββββββββ| 244/250 [03:55<00:05, 1.09it/s]
Training 1/1 epoch (loss 3.2569): 98%|ββββββββββ| 244/250 [03:55<00:05, 1.09it/s]
Training 1/1 epoch (loss 3.2569): 98%|ββββββββββ| 245/250 [03:55<00:03, 1.29it/s]
Training 1/1 epoch (loss 3.1959): 98%|ββββββββββ| 245/250 [03:56<00:03, 1.29it/s]
Training 1/1 epoch (loss 3.1959): 98%|ββββββββββ| 246/250 [03:56<00:03, 1.22it/s]
Training 1/1 epoch (loss 3.1654): 98%|ββββββββββ| 246/250 [03:57<00:03, 1.22it/s]
Training 1/1 epoch (loss 3.1654): 99%|ββββββββββ| 247/250 [03:57<00:02, 1.19it/s]
Training 1/1 epoch (loss 3.2959): 99%|ββββββββββ| 247/250 [03:59<00:02, 1.19it/s]
Training 1/1 epoch (loss 3.2959): 99%|ββββββββββ| 248/250 [03:59<00:02, 1.02s/it]
Training 1/1 epoch (loss 3.1758): 99%|ββββββββββ| 248/250 [04:00<00:02, 1.02s/it]
Training 1/1 epoch (loss 3.1758): 100%|ββββββββββ| 249/250 [04:00<00:01, 1.05s/it]
Training 1/1 epoch (loss 3.3693): 100%|ββββββββββ| 249/250 [04:01<00:01, 1.05s/it]
Training 1/1 epoch (loss 3.3693): 100%|ββββββββββ| 250/250 [04:01<00:00, 1.01s/it]
Training 1/1 epoch (loss 3.3693): 100%|ββββββββββ| 250/250 [04:01<00:00, 1.04it/s] |
|
|
chat template saved in /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000-Q2-2000/chat_template.jinja |
|
|
tokenizer config file saved in /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000-Q2-2000/tokenizer_config.json |
|
|
Special tokens file saved in /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-0.5B/Qwen1.5-0.5B-s3-Q1-2000-Q2-2000/special_tokens_map.json |
|
|
wandb: ERROR Problem finishing run |
|
|
Exception ignored in atexit callback: <bound method rank_zero_only.<locals>.wrapper of <safe_rlhf.logger.Logger object at 0x15512c399210>> |
|
|
Traceback (most recent call last): |
|
|
File "/home/hansirui_1st/jiayi/resist/setting3/safe_rlhf/utils.py", line 212, in wrapper |
|
|
return func(*args, **kwargs) |
|
|
^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/home/hansirui_1st/jiayi/resist/setting3/safe_rlhf/logger.py", line 183, in close |
|
|
self.wandb.finish() |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 406, in wrapper |
|
|
return func(self, *args, **kwargs) |
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 503, in wrapper |
|
|
return func(self, *args, **kwargs) |
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 451, in wrapper |
|
|
return func(self, *args, **kwargs) |
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2309, in finish |
|
|
return self._finish(exit_code) |
|
|
^^^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 406, in wrapper |
|
|
return func(self, *args, **kwargs) |
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2337, in _finish |
|
|
self._atexit_cleanup(exit_code=exit_code) |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2550, in _atexit_cleanup |
|
|
self._on_finish() |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2806, in _on_finish |
|
|
wait_with_progress( |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/mailbox/wait_with_progress.py", line 24, in wait_with_progress |
|
|
return wait_all_with_progress( |
|
|
^^^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/mailbox/wait_with_progress.py", line 87, in wait_all_with_progress |
|
|
return asyncio_compat.run(progress_loop_with_timeout) |
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/lib/asyncio_compat.py", line 27, in run |
|
|
future = executor.submit(runner.run, fn) |
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/concurrent/futures/thread.py", line 169, in submit |
|
|
raise RuntimeError( |
|
|
RuntimeError: cannot schedule new futures after interpreter shutdown |
|
|
|