|
|
+ deepspeed |
|
|
[rank5]:[W528 20:56:14.678944472 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 5] using GPU 5 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
[rank1]:[W528 20:56:14.771567362 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 1] using GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
[rank2]:[W528 20:56:14.188232200 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 2] using GPU 2 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
[rank4]:[W528 20:56:14.188879391 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 4] using GPU 4 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
[rank7]:[W528 20:56:14.210295142 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 7] using GPU 7 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
[rank3]:[W528 20:56:14.234249958 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 3] using GPU 3 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
[rank6]:[W528 20:56:14.256044521 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 6] using GPU 6 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
[rank0]:[W528 20:56:14.264283373 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/config.json |
|
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/config.json |
|
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/config.json |
|
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/config.json |
|
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/config.json |
|
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/config.json |
|
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/config.json |
|
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/config.json |
|
|
Model config Qwen2Config { |
|
|
"_attn_implementation_autoset": true, |
|
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k", |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 4096, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 11008, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 28, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 32, |
|
|
"num_hidden_layers": 32, |
|
|
"num_key_value_heads": 32, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": false, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.49.0", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
Model config Qwen2Config { |
|
|
"_attn_implementation_autoset": true, |
|
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k", |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 4096, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 11008, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 28, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 32, |
|
|
"num_hidden_layers": 32, |
|
|
"num_key_value_heads": 32, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": false, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.49.0", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
Model config Qwen2Config { |
|
|
"_attn_implementation_autoset": true, |
|
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k", |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 4096, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 11008, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 28, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 32, |
|
|
"num_hidden_layers": 32, |
|
|
"num_key_value_heads": 32, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": false, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.49.0", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
Model config Qwen2Config { |
|
|
"_attn_implementation_autoset": true, |
|
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k", |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 4096, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 11008, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 28, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 32, |
|
|
"num_hidden_layers": 32, |
|
|
"num_key_value_heads": 32, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": false, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.49.0", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
Model config Qwen2Config { |
|
|
"_attn_implementation_autoset": true, |
|
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k", |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 4096, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 11008, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 28, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 32, |
|
|
"num_hidden_layers": 32, |
|
|
"num_key_value_heads": 32, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": false, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.49.0", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
Model config Qwen2Config { |
|
|
"_attn_implementation_autoset": true, |
|
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k", |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 4096, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 11008, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 28, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 32, |
|
|
"num_hidden_layers": 32, |
|
|
"num_key_value_heads": 32, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": false, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.49.0", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
Model config Qwen2Config { |
|
|
"_attn_implementation_autoset": true, |
|
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k", |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 4096, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 11008, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 28, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 32, |
|
|
"num_hidden_layers": 32, |
|
|
"num_key_value_heads": 32, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": false, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.49.0", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
Model config Qwen2Config { |
|
|
"_attn_implementation_autoset": true, |
|
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k", |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 4096, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 11008, |
|
|
"max_position_embeddings": 32768, |
|
|
"max_window_layers": 28, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 32, |
|
|
"num_hidden_layers": 32, |
|
|
"num_key_value_heads": 32, |
|
|
"pad_token_id": 151643, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 1000000.0, |
|
|
"sliding_window": 32768, |
|
|
"tie_word_embeddings": false, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.49.0", |
|
|
"use_cache": true, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151646 |
|
|
} |
|
|
|
|
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/pytorch_model.bin |
|
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/pytorch_model.bin |
|
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/pytorch_model.bin |
|
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/pytorch_model.bin |
|
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/pytorch_model.bin |
|
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/pytorch_model.bin |
|
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/pytorch_model.bin |
|
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k/pytorch_model.bin |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model's config object |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
Sliding Window Attention is enabled but not implemented for `eager`; unexpected results may be encountered. |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model's config object |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model's config object |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model's config object |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model's config object |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model's config object |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model's config object |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
Sliding Window Attention is enabled but not implemented for `eager`; unexpected results may be encountered. |
|
|
Sliding Window Attention is enabled but not implemented for `eager`; unexpected results may be encountered. |
|
|
Sliding Window Attention is enabled but not implemented for `eager`; unexpected results may be encountered. |
|
|
Sliding Window Attention is enabled but not implemented for `eager`; unexpected results may be encountered. |
|
|
Sliding Window Attention is enabled but not implemented for `eager`; unexpected results may be encountered. |
|
|
Sliding Window Attention is enabled but not implemented for `eager`; unexpected results may be encountered. |
|
|
Will use torch_dtype=torch.bfloat16 as defined in model's config object |
|
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
|
Generate config GenerationConfig { |
|
|
"bos_token_id": 128245, |
|
|
"eos_token_id": 151643, |
|
|
"pad_token_id": 151643 |
|
|
} |
|
|
|
|
|
Sliding Window Attention is enabled but not implemented for `eager`; unexpected results may be encountered. |
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
loading file vocab.json |
|
|
loading file vocab.json |
|
|
loading file vocab.json |
|
|
loading file vocab.json |
|
|
loading file vocab.json |
|
|
loading file merges.txt |
|
|
loading file merges.txt |
|
|
loading file merges.txt |
|
|
loading file merges.txt |
|
|
loading file merges.txt |
|
|
loading file tokenizer.json |
|
|
loading file added_tokens.json |
|
|
loading file tokenizer.json |
|
|
loading file tokenizer.json |
|
|
loading file tokenizer.json |
|
|
loading file tokenizer.json |
|
|
loading file added_tokens.json |
|
|
loading file special_tokens_map.json |
|
|
loading file added_tokens.json |
|
|
loading file added_tokens.json |
|
|
loading file added_tokens.json |
|
|
loading file tokenizer_config.json |
|
|
loading file special_tokens_map.json |
|
|
loading file special_tokens_map.json |
|
|
loading file special_tokens_map.json |
|
|
loading file special_tokens_map.json |
|
|
loading file chat_template.jinja |
|
|
loading file tokenizer_config.json |
|
|
loading file tokenizer_config.json |
|
|
loading file tokenizer_config.json |
|
|
loading file tokenizer_config.json |
|
|
loading file chat_template.jinja |
|
|
loading file chat_template.jinja |
|
|
loading file chat_template.jinja |
|
|
loading file chat_template.jinja |
|
|
loading file vocab.json |
|
|
loading file merges.txt |
|
|
loading file tokenizer.json |
|
|
loading file added_tokens.json |
|
|
loading file special_tokens_map.json |
|
|
loading file tokenizer_config.json |
|
|
loading file chat_template.jinja |
|
|
loading file vocab.json |
|
|
loading file merges.txt |
|
|
loading file tokenizer.json |
|
|
loading file added_tokens.json |
|
|
loading file special_tokens_map.json |
|
|
loading file tokenizer_config.json |
|
|
loading file chat_template.jinja |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k. |
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
Generation config file not found, using a generation config created from the model config. |
|
|
loading file vocab.json |
|
|
loading file merges.txt |
|
|
loading file tokenizer.json |
|
|
loading file added_tokens.json |
|
|
loading file special_tokens_map.json |
|
|
loading file tokenizer_config.json |
|
|
loading file chat_template.jinja |
|
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
|
Detected CUDA files, patching ldflags |
|
|
Emitting ninja build file /home/hansirui_1st/.cache/torch_extensions/py311_cu124/fused_adam/build.ninja... |
|
|
/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/torch/utils/cpp_extension.py:2059: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. |
|
|
If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST']. |
|
|
warnings.warn( |
|
|
Building extension module fused_adam... |
|
|
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) |
|
|
Loading extension module fused_adam... |
|
|
Loading extension module fused_adam... |
|
|
Loading extension module fused_adam... |
|
|
Loading extension module fused_adam...Loading extension module fused_adam... |
|
|
|
|
|
Loading extension module fused_adam...Loading extension module fused_adam... |
|
|
|
|
|
Loading extension module fused_adam... |
|
|
wandb: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information. |
|
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
wandb: Currently logged in as: xtom to https://api.wandb.ai. Use `wandb login |
|
|
wandb: Tracking run with wandb version 0.19.8 |
|
|
wandb: Run data is saved locally in /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k-Q2-5k/wandb/run-20250528_205639-j9skp0tn |
|
|
wandb: Run `wandb offline` to turn off syncing. |
|
|
wandb: Syncing run qwen-7b-s3-Q1-40k-Q2-5k |
|
|
wandb: βοΈ View project at https://wandb.ai/xtom/Inverse_Alignment |
|
|
wandb: π View run at https://wandb.ai/xtom/Inverse_Alignment/runs/j9skp0tn |
|
|
Training 1/1 epoch: 0%| | 0/157 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
Training 1/1 epoch (loss 1.5890): 0%| | 0/157 [00:05<?, ?it/s]
Training 1/1 epoch (loss 1.5890): 1%| | 1/157 [00:05<14:45, 5.68s/it]
Training 1/1 epoch (loss 1.6594): 1%| | 1/157 [00:06<14:45, 5.68s/it]
Training 1/1 epoch (loss 1.6594): 1%|β | 2/157 [00:06<07:40, 2.97s/it]
Training 1/1 epoch (loss 1.5865): 1%|β | 2/157 [00:07<07:40, 2.97s/it]
Training 1/1 epoch (loss 1.5865): 2%|β | 3/157 [00:07<04:40, 1.82s/it]
Training 1/1 epoch (loss 1.7676): 2%|β | 3/157 [00:07<04:40, 1.82s/it]
Training 1/1 epoch (loss 1.7676): 3%|β | 4/157 [00:07<03:16, 1.28s/it]
Training 1/1 epoch (loss 1.5672): 3%|β | 4/157 [00:08<03:16, 1.28s/it]
Training 1/1 epoch (loss 1.5672): 3%|β | 5/157 [00:08<02:29, 1.02it/s]
Training 1/1 epoch (loss 1.6944): 3%|β | 5/157 [00:08<02:29, 1.02it/s]
Training 1/1 epoch (loss 1.6944): 4%|β | 6/157 [00:08<02:01, 1.24it/s]
Training 1/1 epoch (loss 1.6179): 4%|β | 6/157 [00:09<02:01, 1.24it/s]
Training 1/1 epoch (loss 1.6179): 4%|β | 7/157 [00:09<01:42, 1.46it/s]
Training 1/1 epoch (loss 1.6609): 4%|β | 7/157 [00:09<01:42, 1.46it/s]
Training 1/1 epoch (loss 1.6609): 5%|β | 8/157 [00:09<01:36, 1.54it/s]
Training 1/1 epoch (loss 1.6289): 5%|β | 8/157 [00:10<01:36, 1.54it/s]
Training 1/1 epoch (loss 1.6289): 6%|β | 9/157 [00:10<01:28, 1.68it/s]
Training 1/1 epoch (loss 1.6054): 6%|β | 9/157 [00:10<01:28, 1.68it/s]
Training 1/1 epoch (loss 1.6054): 6%|β | 10/157 [00:10<01:21, 1.81it/s]
Training 1/1 epoch (loss 1.6806): 6%|β | 10/157 [00:10<01:21, 1.81it/s]
Training 1/1 epoch (loss 1.6806): 7%|β | 11/157 [00:10<01:16, 1.91it/s]
Training 1/1 epoch (loss 1.5443): 7%|β | 11/157 [00:11<01:16, 1.91it/s]
Training 1/1 epoch (loss 1.5443): 8%|β | 12/157 [00:11<01:13, 1.98it/s]
Training 1/1 epoch (loss 1.6126): 8%|β | 12/157 [00:11<01:13, 1.98it/s]
Training 1/1 epoch (loss 1.6126): 8%|β | 13/157 [00:11<01:09, 2.06it/s]
Training 1/1 epoch (loss 1.7495): 8%|β | 13/157 [00:12<01:09, 2.06it/s]
Training 1/1 epoch (loss 1.7495): 9%|β | 14/157 [00:12<01:07, 2.11it/s]
Training 1/1 epoch (loss 1.5188): 9%|β | 14/157 [00:12<01:07, 2.11it/s]
Training 1/1 epoch (loss 1.5188): 10%|β | 15/157 [00:12<01:05, 2.16it/s]
Training 1/1 epoch (loss 1.6805): 10%|β | 15/157 [00:13<01:05, 2.16it/s]
Training 1/1 epoch (loss 1.6805): 10%|β | 16/157 [00:13<01:07, 2.08it/s]
Training 1/1 epoch (loss 1.6272): 10%|β | 16/157 [00:13<01:07, 2.08it/s]
Training 1/1 epoch (loss 1.6272): 11%|β | 17/157 [00:13<01:08, 2.05it/s]
Training 1/1 epoch (loss 1.6288): 11%|β | 17/157 [00:14<01:08, 2.05it/s]
Training 1/1 epoch (loss 1.6288): 11%|ββ | 18/157 [00:14<01:07, 2.06it/s]
Training 1/1 epoch (loss 1.6931): 11%|ββ | 18/157 [00:14<01:07, 2.06it/s]
Training 1/1 epoch (loss 1.6931): 12%|ββ | 19/157 [00:14<01:05, 2.10it/s]
Training 1/1 epoch (loss 1.6837): 12%|ββ | 19/157 [00:15<01:05, 2.10it/s]
Training 1/1 epoch (loss 1.6837): 13%|ββ | 20/157 [00:15<01:04, 2.13it/s]
Training 1/1 epoch (loss 1.6466): 13%|ββ | 20/157 [00:15<01:04, 2.13it/s]
Training 1/1 epoch (loss 1.6466): 13%|ββ | 21/157 [00:15<01:03, 2.16it/s]
Training 1/1 epoch (loss 1.5812): 13%|ββ | 21/157 [00:16<01:03, 2.16it/s]
Training 1/1 epoch (loss 1.5812): 14%|ββ | 22/157 [00:16<01:03, 2.12it/s]
Training 1/1 epoch (loss 1.4667): 14%|ββ | 22/157 [00:16<01:03, 2.12it/s]
Training 1/1 epoch (loss 1.4667): 15%|ββ | 23/157 [00:16<01:01, 2.18it/s]
Training 1/1 epoch (loss 1.6064): 15%|ββ | 23/157 [00:17<01:01, 2.18it/s]
Training 1/1 epoch (loss 1.6064): 15%|ββ | 24/157 [00:17<01:02, 2.12it/s]
Training 1/1 epoch (loss 1.6069): 15%|ββ | 24/157 [00:17<01:02, 2.12it/s]
Training 1/1 epoch (loss 1.6069): 16%|ββ | 25/157 [00:17<01:02, 2.13it/s]
Training 1/1 epoch (loss 1.6983): 16%|ββ | 25/157 [00:17<01:02, 2.13it/s]
Training 1/1 epoch (loss 1.6983): 17%|ββ | 26/157 [00:17<01:00, 2.16it/s]
Training 1/1 epoch (loss 1.5669): 17%|ββ | 26/157 [00:18<01:00, 2.16it/s]
Training 1/1 epoch (loss 1.5669): 17%|ββ | 27/157 [00:18<00:59, 2.19it/s]
Training 1/1 epoch (loss 1.6038): 17%|ββ | 27/157 [00:18<00:59, 2.19it/s]
Training 1/1 epoch (loss 1.6038): 18%|ββ | 28/157 [00:18<00:58, 2.21it/s]
Training 1/1 epoch (loss 1.6237): 18%|ββ | 28/157 [00:19<00:58, 2.21it/s]
Training 1/1 epoch (loss 1.6237): 18%|ββ | 29/157 [00:19<00:57, 2.22it/s]
Training 1/1 epoch (loss 1.5750): 18%|ββ | 29/157 [00:19<00:57, 2.22it/s]
Training 1/1 epoch (loss 1.5750): 19%|ββ | 30/157 [00:19<00:56, 2.23it/s]
Training 1/1 epoch (loss 1.6396): 19%|ββ | 30/157 [00:20<00:56, 2.23it/s]
Training 1/1 epoch (loss 1.6396): 20%|ββ | 31/157 [00:20<00:56, 2.22it/s]
Training 1/1 epoch (loss 1.5523): 20%|ββ | 31/157 [00:20<00:56, 2.22it/s]
Training 1/1 epoch (loss 1.5523): 20%|ββ | 32/157 [00:20<00:57, 2.16it/s]
Training 1/1 epoch (loss 1.6405): 20%|ββ | 32/157 [00:21<00:57, 2.16it/s]
Training 1/1 epoch (loss 1.6405): 21%|ββ | 33/157 [00:21<00:57, 2.17it/s]
Training 1/1 epoch (loss 1.5923): 21%|ββ | 33/157 [00:21<00:57, 2.17it/s]
Training 1/1 epoch (loss 1.5923): 22%|βββ | 34/157 [00:21<00:57, 2.14it/s]
Training 1/1 epoch (loss 1.5525): 22%|βββ | 34/157 [00:22<00:57, 2.14it/s]
Training 1/1 epoch (loss 1.5525): 22%|βββ | 35/157 [00:22<00:56, 2.17it/s]
Training 1/1 epoch (loss 1.5625): 22%|βββ | 35/157 [00:22<00:56, 2.17it/s]
Training 1/1 epoch (loss 1.5625): 23%|βββ | 36/157 [00:22<00:54, 2.21it/s]
Training 1/1 epoch (loss 1.6363): 23%|βββ | 36/157 [00:22<00:54, 2.21it/s]
Training 1/1 epoch (loss 1.6363): 24%|βββ | 37/157 [00:22<00:53, 2.23it/s]
Training 1/1 epoch (loss 1.6008): 24%|βββ | 37/157 [00:23<00:53, 2.23it/s]
Training 1/1 epoch (loss 1.6008): 24%|βββ | 38/157 [00:23<00:52, 2.27it/s]
Training 1/1 epoch (loss 1.7012): 24%|βββ | 38/157 [00:23<00:52, 2.27it/s]
Training 1/1 epoch (loss 1.7012): 25%|βββ | 39/157 [00:23<00:52, 2.26it/s]
Training 1/1 epoch (loss 1.7481): 25%|βββ | 39/157 [00:24<00:52, 2.26it/s]
Training 1/1 epoch (loss 1.7481): 25%|βββ | 40/157 [00:24<00:54, 2.17it/s]
Training 1/1 epoch (loss 1.5977): 25%|βββ | 40/157 [00:24<00:54, 2.17it/s]
Training 1/1 epoch (loss 1.5977): 26%|βββ | 41/157 [00:24<00:54, 2.13it/s]
Training 1/1 epoch (loss 1.4805): 26%|βββ | 41/157 [00:25<00:54, 2.13it/s]
Training 1/1 epoch (loss 1.4805): 27%|βββ | 42/157 [00:25<00:52, 2.18it/s]
Training 1/1 epoch (loss 1.5915): 27%|βββ | 42/157 [00:25<00:52, 2.18it/s]
Training 1/1 epoch (loss 1.5915): 27%|βββ | 43/157 [00:25<00:52, 2.19it/s]
Training 1/1 epoch (loss 1.6141): 27%|βββ | 43/157 [00:26<00:52, 2.19it/s]
Training 1/1 epoch (loss 1.6141): 28%|βββ | 44/157 [00:26<00:51, 2.20it/s]
Training 1/1 epoch (loss 1.6722): 28%|βββ | 44/157 [00:26<00:51, 2.20it/s]
Training 1/1 epoch (loss 1.6722): 29%|βββ | 45/157 [00:26<00:50, 2.22it/s]
Training 1/1 epoch (loss 1.6013): 29%|βββ | 45/157 [00:27<00:50, 2.22it/s]
Training 1/1 epoch (loss 1.6013): 29%|βββ | 46/157 [00:27<00:49, 2.23it/s]
Training 1/1 epoch (loss 1.5826): 29%|βββ | 46/157 [00:27<00:49, 2.23it/s]
Training 1/1 epoch (loss 1.5826): 30%|βββ | 47/157 [00:27<00:49, 2.24it/s]
Training 1/1 epoch (loss 1.5746): 30%|βββ | 47/157 [00:27<00:49, 2.24it/s]
Training 1/1 epoch (loss 1.5746): 31%|βββ | 48/157 [00:27<00:50, 2.15it/s]
Training 1/1 epoch (loss 1.7577): 31%|βββ | 48/157 [00:28<00:50, 2.15it/s]
Training 1/1 epoch (loss 1.7577): 31%|βββ | 49/157 [00:28<00:50, 2.15it/s]
Training 1/1 epoch (loss 1.6390): 31%|βββ | 49/157 [00:28<00:50, 2.15it/s]
Training 1/1 epoch (loss 1.6390): 32%|ββββ | 50/157 [00:28<00:49, 2.16it/s]
Training 1/1 epoch (loss 1.6335): 32%|ββββ | 50/157 [00:29<00:49, 2.16it/s]
Training 1/1 epoch (loss 1.6335): 32%|ββββ | 51/157 [00:29<00:48, 2.19it/s]
Training 1/1 epoch (loss 1.6952): 32%|ββββ | 51/157 [00:29<00:48, 2.19it/s]
Training 1/1 epoch (loss 1.6952): 33%|ββββ | 52/157 [00:29<00:47, 2.22it/s]
Training 1/1 epoch (loss 1.7370): 33%|ββββ | 52/157 [00:30<00:47, 2.22it/s]
Training 1/1 epoch (loss 1.7370): 34%|ββββ | 53/157 [00:30<00:47, 2.20it/s]
Training 1/1 epoch (loss 1.6099): 34%|ββββ | 53/157 [00:30<00:47, 2.20it/s]
Training 1/1 epoch (loss 1.6099): 34%|ββββ | 54/157 [00:30<00:46, 2.23it/s]
Training 1/1 epoch (loss 1.6833): 34%|ββββ | 54/157 [00:31<00:46, 2.23it/s]
Training 1/1 epoch (loss 1.6833): 35%|ββββ | 55/157 [00:31<00:45, 2.25it/s]
Training 1/1 epoch (loss 1.6136): 35%|ββββ | 55/157 [00:31<00:45, 2.25it/s]
Training 1/1 epoch (loss 1.6136): 36%|ββββ | 56/157 [00:31<00:46, 2.15it/s]
Training 1/1 epoch (loss 1.5316): 36%|ββββ | 56/157 [00:32<00:46, 2.15it/s]
Training 1/1 epoch (loss 1.5316): 36%|ββββ | 57/157 [00:32<00:46, 2.15it/s]
Training 1/1 epoch (loss 1.4469): 36%|ββββ | 57/157 [00:32<00:46, 2.15it/s]
Training 1/1 epoch (loss 1.4469): 37%|ββββ | 58/157 [00:32<00:45, 2.18it/s]
Training 1/1 epoch (loss 1.5206): 37%|ββββ | 58/157 [00:32<00:45, 2.18it/s]
Training 1/1 epoch (loss 1.5206): 38%|ββββ | 59/157 [00:32<00:44, 2.20it/s]
Training 1/1 epoch (loss 1.6000): 38%|ββββ | 59/157 [00:33<00:44, 2.20it/s]
Training 1/1 epoch (loss 1.6000): 38%|ββββ | 60/157 [00:33<00:43, 2.23it/s]
Training 1/1 epoch (loss 1.5474): 38%|ββββ | 60/157 [00:33<00:43, 2.23it/s]
Training 1/1 epoch (loss 1.5474): 39%|ββββ | 61/157 [00:33<00:43, 2.22it/s]
Training 1/1 epoch (loss 1.6231): 39%|ββββ | 61/157 [00:34<00:43, 2.22it/s]
Training 1/1 epoch (loss 1.6231): 39%|ββββ | 62/157 [00:34<00:42, 2.23it/s]
Training 1/1 epoch (loss 1.5372): 39%|ββββ | 62/157 [00:34<00:42, 2.23it/s]
Training 1/1 epoch (loss 1.5372): 40%|ββββ | 63/157 [00:34<00:41, 2.25it/s]
Training 1/1 epoch (loss 1.6803): 40%|ββββ | 63/157 [00:35<00:41, 2.25it/s]
Training 1/1 epoch (loss 1.6803): 41%|ββββ | 64/157 [00:35<00:43, 2.15it/s]
Training 1/1 epoch (loss 1.6941): 41%|ββββ | 64/157 [00:35<00:43, 2.15it/s]
Training 1/1 epoch (loss 1.6941): 41%|βββββ | 65/157 [00:35<00:42, 2.17it/s]
Training 1/1 epoch (loss 1.6386): 41%|βββββ | 65/157 [00:36<00:42, 2.17it/s]
Training 1/1 epoch (loss 1.6386): 42%|βββββ | 66/157 [00:36<00:42, 2.16it/s]
Training 1/1 epoch (loss 1.5193): 42%|βββββ | 66/157 [00:36<00:42, 2.16it/s]
Training 1/1 epoch (loss 1.5193): 43%|βββββ | 67/157 [00:36<00:41, 2.18it/s]
Training 1/1 epoch (loss 1.6160): 43%|βββββ | 67/157 [00:37<00:41, 2.18it/s]
Training 1/1 epoch (loss 1.6160): 43%|βββββ | 68/157 [00:37<00:40, 2.21it/s]
Training 1/1 epoch (loss 1.6740): 43%|βββββ | 68/157 [00:37<00:40, 2.21it/s]
Training 1/1 epoch (loss 1.6740): 44%|βββββ | 69/157 [00:37<00:39, 2.22it/s]
Training 1/1 epoch (loss 1.6932): 44%|βββββ | 69/157 [00:37<00:39, 2.22it/s]
Training 1/1 epoch (loss 1.6932): 45%|βββββ | 70/157 [00:37<00:38, 2.23it/s]
Training 1/1 epoch (loss 1.6144): 45%|βββββ | 70/157 [00:38<00:38, 2.23it/s]
Training 1/1 epoch (loss 1.6144): 45%|βββββ | 71/157 [00:38<00:38, 2.25it/s]
Training 1/1 epoch (loss 1.5374): 45%|βββββ | 71/157 [00:38<00:38, 2.25it/s]
Training 1/1 epoch (loss 1.5374): 46%|βββββ | 72/157 [00:38<00:39, 2.18it/s]
Training 1/1 epoch (loss 1.6768): 46%|βββββ | 72/157 [00:39<00:39, 2.18it/s]
Training 1/1 epoch (loss 1.6768): 46%|βββββ | 73/157 [00:39<00:38, 2.18it/s]
Training 1/1 epoch (loss 1.6631): 46%|βββββ | 73/157 [00:39<00:38, 2.18it/s]
Training 1/1 epoch (loss 1.6631): 47%|βββββ | 74/157 [00:39<00:38, 2.17it/s]
Training 1/1 epoch (loss 1.5824): 47%|βββββ | 74/157 [00:40<00:38, 2.17it/s]
Training 1/1 epoch (loss 1.5824): 48%|βββββ | 75/157 [00:40<00:37, 2.19it/s]
Training 1/1 epoch (loss 1.6192): 48%|βββββ | 75/157 [00:40<00:37, 2.19it/s]
Training 1/1 epoch (loss 1.6192): 48%|βββββ | 76/157 [00:40<00:37, 2.15it/s]
Training 1/1 epoch (loss 1.6325): 48%|βββββ | 76/157 [00:41<00:37, 2.15it/s]
Training 1/1 epoch (loss 1.6325): 49%|βββββ | 77/157 [00:41<00:37, 2.12it/s]
Training 1/1 epoch (loss 1.9039): 49%|βββββ | 77/157 [00:41<00:37, 2.12it/s]
Training 1/1 epoch (loss 1.9039): 50%|βββββ | 78/157 [00:41<00:36, 2.14it/s]
Training 1/1 epoch (loss 1.7171): 50%|βββββ | 78/157 [00:42<00:36, 2.14it/s]
Training 1/1 epoch (loss 1.7171): 50%|βββββ | 79/157 [00:42<00:36, 2.16it/s]
Training 1/1 epoch (loss 1.5554): 50%|βββββ | 79/157 [00:42<00:36, 2.16it/s]
Training 1/1 epoch (loss 1.5554): 51%|βββββ | 80/157 [00:42<00:36, 2.10it/s]
Training 1/1 epoch (loss 1.6576): 51%|βββββ | 80/157 [00:43<00:36, 2.10it/s]
Training 1/1 epoch (loss 1.6576): 52%|ββββββ | 81/157 [00:43<00:35, 2.11it/s]
Training 1/1 epoch (loss 1.6420): 52%|ββββββ | 81/157 [00:43<00:35, 2.11it/s]
Training 1/1 epoch (loss 1.6420): 52%|ββββββ | 82/157 [00:43<00:34, 2.15it/s]
Training 1/1 epoch (loss 1.6123): 52%|ββββββ | 82/157 [00:43<00:34, 2.15it/s]
Training 1/1 epoch (loss 1.6123): 53%|ββββββ | 83/157 [00:43<00:33, 2.18it/s]
Training 1/1 epoch (loss 1.5326): 53%|ββββββ | 83/157 [00:44<00:33, 2.18it/s]
Training 1/1 epoch (loss 1.5326): 54%|ββββββ | 84/157 [00:44<00:33, 2.21it/s]
Training 1/1 epoch (loss 1.6410): 54%|ββββββ | 84/157 [00:44<00:33, 2.21it/s]
Training 1/1 epoch (loss 1.6410): 54%|ββββββ | 85/157 [00:44<00:32, 2.21it/s]
Training 1/1 epoch (loss 1.5528): 54%|ββββββ | 85/157 [00:45<00:32, 2.21it/s]
Training 1/1 epoch (loss 1.5528): 55%|ββββββ | 86/157 [00:45<00:31, 2.22it/s]
Training 1/1 epoch (loss 1.6920): 55%|ββββββ | 86/157 [00:45<00:31, 2.22it/s]
Training 1/1 epoch (loss 1.6920): 55%|ββββββ | 87/157 [00:45<00:31, 2.23it/s]
Training 1/1 epoch (loss 1.5672): 55%|ββββββ | 87/157 [00:46<00:31, 2.23it/s]
Training 1/1 epoch (loss 1.5672): 56%|ββββββ | 88/157 [00:46<00:32, 2.10it/s]
Training 1/1 epoch (loss 1.4680): 56%|ββββββ | 88/157 [00:46<00:32, 2.10it/s]
Training 1/1 epoch (loss 1.4680): 57%|ββββββ | 89/157 [00:46<00:31, 2.14it/s]
Training 1/1 epoch (loss 1.6826): 57%|ββββββ | 89/157 [00:47<00:31, 2.14it/s]
Training 1/1 epoch (loss 1.6826): 57%|ββββββ | 90/157 [00:47<00:30, 2.18it/s]
Training 1/1 epoch (loss 1.6267): 57%|ββββββ | 90/157 [00:47<00:30, 2.18it/s]
Training 1/1 epoch (loss 1.6267): 58%|ββββββ | 91/157 [00:47<00:30, 2.14it/s]
Training 1/1 epoch (loss 1.6146): 58%|ββββββ | 91/157 [00:48<00:30, 2.14it/s]
Training 1/1 epoch (loss 1.6146): 59%|ββββββ | 92/157 [00:48<00:29, 2.17it/s]
Training 1/1 epoch (loss 1.6887): 59%|ββββββ | 92/157 [00:48<00:29, 2.17it/s]
Training 1/1 epoch (loss 1.6887): 59%|ββββββ | 93/157 [00:48<00:29, 2.20it/s]
Training 1/1 epoch (loss 1.5361): 59%|ββββββ | 93/157 [00:49<00:29, 2.20it/s]
Training 1/1 epoch (loss 1.5361): 60%|ββββββ | 94/157 [00:49<00:28, 2.21it/s]
Training 1/1 epoch (loss 1.6834): 60%|ββββββ | 94/157 [00:49<00:28, 2.21it/s]
Training 1/1 epoch (loss 1.6834): 61%|ββββββ | 95/157 [00:49<00:27, 2.22it/s]
Training 1/1 epoch (loss 1.6557): 61%|ββββββ | 95/157 [00:50<00:27, 2.22it/s]
Training 1/1 epoch (loss 1.6557): 61%|ββββββ | 96/157 [00:50<00:29, 2.06it/s]
Training 1/1 epoch (loss 1.6177): 61%|ββββββ | 96/157 [00:50<00:29, 2.06it/s]
Training 1/1 epoch (loss 1.6177): 62%|βββββββ | 97/157 [00:50<00:28, 2.11it/s]
Training 1/1 epoch (loss 1.7292): 62%|βββββββ | 97/157 [00:50<00:28, 2.11it/s]
Training 1/1 epoch (loss 1.7292): 62%|βββββββ | 98/157 [00:50<00:27, 2.16it/s]
Training 1/1 epoch (loss 1.6938): 62%|βββββββ | 98/157 [00:51<00:27, 2.16it/s]
Training 1/1 epoch (loss 1.6938): 63%|βββββββ | 99/157 [00:51<00:26, 2.20it/s]
Training 1/1 epoch (loss 1.6465): 63%|βββββββ | 99/157 [00:51<00:26, 2.20it/s]
Training 1/1 epoch (loss 1.6465): 64%|βββββββ | 100/157 [00:51<00:25, 2.23it/s]
Training 1/1 epoch (loss 1.6855): 64%|βββββββ | 100/157 [00:52<00:25, 2.23it/s]
Training 1/1 epoch (loss 1.6855): 64%|βββββββ | 101/157 [00:52<00:25, 2.20it/s]
Training 1/1 epoch (loss 1.7388): 64%|βββββββ | 101/157 [00:52<00:25, 2.20it/s]
Training 1/1 epoch (loss 1.7388): 65%|βββββββ | 102/157 [00:52<00:24, 2.21it/s]
Training 1/1 epoch (loss 1.5346): 65%|βββββββ | 102/157 [00:53<00:24, 2.21it/s]
Training 1/1 epoch (loss 1.5346): 66%|βββββββ | 103/157 [00:53<00:24, 2.25it/s]
Training 1/1 epoch (loss 1.7214): 66%|βββββββ | 103/157 [00:53<00:24, 2.25it/s]
Training 1/1 epoch (loss 1.7214): 66%|βββββββ | 104/157 [00:53<00:24, 2.17it/s]
Training 1/1 epoch (loss 1.6647): 66%|βββββββ | 104/157 [00:54<00:24, 2.17it/s]
Training 1/1 epoch (loss 1.6647): 67%|βββββββ | 105/157 [00:54<00:24, 2.09it/s]
Training 1/1 epoch (loss 1.6029): 67%|βββββββ | 105/157 [00:54<00:24, 2.09it/s]
Training 1/1 epoch (loss 1.6029): 68%|βββββββ | 106/157 [00:54<00:23, 2.15it/s]
Training 1/1 epoch (loss 1.6620): 68%|βββββββ | 106/157 [00:55<00:23, 2.15it/s]
Training 1/1 epoch (loss 1.6620): 68%|βββββββ | 107/157 [00:55<00:23, 2.17it/s]
Training 1/1 epoch (loss 1.6297): 68%|βββββββ | 107/157 [00:55<00:23, 2.17it/s]
Training 1/1 epoch (loss 1.6297): 69%|βββββββ | 108/157 [00:55<00:22, 2.20it/s]
Training 1/1 epoch (loss 1.6680): 69%|βββββββ | 108/157 [00:55<00:22, 2.20it/s]
Training 1/1 epoch (loss 1.6680): 69%|βββββββ | 109/157 [00:55<00:21, 2.23it/s]
Training 1/1 epoch (loss 1.5817): 69%|βββββββ | 109/157 [00:56<00:21, 2.23it/s]
Training 1/1 epoch (loss 1.5817): 70%|βββββββ | 110/157 [00:56<00:20, 2.24it/s]
Training 1/1 epoch (loss 1.6261): 70%|βββββββ | 110/157 [00:56<00:20, 2.24it/s]
Training 1/1 epoch (loss 1.6261): 71%|βββββββ | 111/157 [00:56<00:21, 2.18it/s]
Training 1/1 epoch (loss 1.6692): 71%|βββββββ | 111/157 [00:57<00:21, 2.18it/s]
Training 1/1 epoch (loss 1.6692): 71%|ββββββββ | 112/157 [00:57<00:21, 2.12it/s]
Training 1/1 epoch (loss 1.6103): 71%|ββββββββ | 112/157 [00:57<00:21, 2.12it/s]
Training 1/1 epoch (loss 1.6103): 72%|ββββββββ | 113/157 [00:57<00:20, 2.12it/s]
Training 1/1 epoch (loss 1.5509): 72%|ββββββββ | 113/157 [00:58<00:20, 2.12it/s]
Training 1/1 epoch (loss 1.5509): 73%|ββββββββ | 114/157 [00:58<00:20, 2.15it/s]
Training 1/1 epoch (loss 1.5908): 73%|ββββββββ | 114/157 [00:58<00:20, 2.15it/s]
Training 1/1 epoch (loss 1.5908): 73%|ββββββββ | 115/157 [00:58<00:19, 2.18it/s]
Training 1/1 epoch (loss 1.5550): 73%|ββββββββ | 115/157 [00:59<00:19, 2.18it/s]
Training 1/1 epoch (loss 1.5550): 74%|ββββββββ | 116/157 [00:59<00:18, 2.21it/s]
Training 1/1 epoch (loss 1.4955): 74%|ββββββββ | 116/157 [00:59<00:18, 2.21it/s]
Training 1/1 epoch (loss 1.4955): 75%|ββββββββ | 117/157 [00:59<00:18, 2.22it/s]
Training 1/1 epoch (loss 1.5205): 75%|ββββββββ | 117/157 [01:00<00:18, 2.22it/s]
Training 1/1 epoch (loss 1.5205): 75%|ββββββββ | 118/157 [01:00<00:17, 2.24it/s]
Training 1/1 epoch (loss 1.6576): 75%|ββββββββ | 118/157 [01:00<00:17, 2.24it/s]
Training 1/1 epoch (loss 1.6576): 76%|ββββββββ | 119/157 [01:00<00:16, 2.25it/s]
Training 1/1 epoch (loss 1.5310): 76%|ββββββββ | 119/157 [01:00<00:16, 2.25it/s]
Training 1/1 epoch (loss 1.5310): 76%|ββββββββ | 120/157 [01:00<00:16, 2.18it/s]
Training 1/1 epoch (loss 1.6619): 76%|ββββββββ | 120/157 [01:01<00:16, 2.18it/s]
Training 1/1 epoch (loss 1.6619): 77%|ββββββββ | 121/157 [01:01<00:16, 2.20it/s]
Training 1/1 epoch (loss 1.6077): 77%|ββββββββ | 121/157 [01:01<00:16, 2.20it/s]
Training 1/1 epoch (loss 1.6077): 78%|ββββββββ | 122/157 [01:01<00:15, 2.21it/s]
Training 1/1 epoch (loss 1.5734): 78%|ββββββββ | 122/157 [01:02<00:15, 2.21it/s]
Training 1/1 epoch (loss 1.5734): 78%|ββββββββ | 123/157 [01:02<00:15, 2.23it/s]
Training 1/1 epoch (loss 1.6124): 78%|ββββββββ | 123/157 [01:02<00:15, 2.23it/s]
Training 1/1 epoch (loss 1.6124): 79%|ββββββββ | 124/157 [01:02<00:14, 2.25it/s]
Training 1/1 epoch (loss 1.5166): 79%|ββββββββ | 124/157 [01:03<00:14, 2.25it/s]
Training 1/1 epoch (loss 1.5166): 80%|ββββββββ | 125/157 [01:03<00:14, 2.28it/s]
Training 1/1 epoch (loss 1.5599): 80%|ββββββββ | 125/157 [01:03<00:14, 2.28it/s]
Training 1/1 epoch (loss 1.5599): 80%|ββββββββ | 126/157 [01:03<00:13, 2.29it/s]
Training 1/1 epoch (loss 1.6814): 80%|ββββββββ | 126/157 [01:04<00:13, 2.29it/s]
Training 1/1 epoch (loss 1.6814): 81%|ββββββββ | 127/157 [01:04<00:13, 2.29it/s]
Training 1/1 epoch (loss 1.6348): 81%|ββββββββ | 127/157 [01:04<00:13, 2.29it/s]
Training 1/1 epoch (loss 1.6348): 82%|βββββββββ | 128/157 [01:04<00:13, 2.18it/s]
Training 1/1 epoch (loss 1.5789): 82%|βββββββββ | 128/157 [01:04<00:13, 2.18it/s]
Training 1/1 epoch (loss 1.5789): 82%|βββββββββ | 129/157 [01:04<00:12, 2.20it/s]
Training 1/1 epoch (loss 1.6534): 82%|βββββββββ | 129/157 [01:05<00:12, 2.20it/s]
Training 1/1 epoch (loss 1.6534): 83%|βββββββββ | 130/157 [01:05<00:12, 2.23it/s]
Training 1/1 epoch (loss 1.6516): 83%|βββββββββ | 130/157 [01:05<00:12, 2.23it/s]
Training 1/1 epoch (loss 1.6516): 83%|βββββββββ | 131/157 [01:05<00:11, 2.25it/s]
Training 1/1 epoch (loss 1.6377): 83%|βββββββββ | 131/157 [01:06<00:11, 2.25it/s]
Training 1/1 epoch (loss 1.6377): 84%|βββββββββ | 132/157 [01:06<00:11, 2.25it/s]
Training 1/1 epoch (loss 1.5450): 84%|βββββββββ | 132/157 [01:06<00:11, 2.25it/s]
Training 1/1 epoch (loss 1.5450): 85%|βββββββββ | 133/157 [01:06<00:10, 2.25it/s]
Training 1/1 epoch (loss 1.6465): 85%|βββββββββ | 133/157 [01:07<00:10, 2.25it/s]
Training 1/1 epoch (loss 1.6465): 85%|βββββββββ | 134/157 [01:07<00:10, 2.28it/s]
Training 1/1 epoch (loss 1.6019): 85%|βββββββββ | 134/157 [01:07<00:10, 2.28it/s]
Training 1/1 epoch (loss 1.6019): 86%|βββββββββ | 135/157 [01:07<00:09, 2.29it/s]
Training 1/1 epoch (loss 1.6043): 86%|βββββββββ | 135/157 [01:08<00:09, 2.29it/s]
Training 1/1 epoch (loss 1.6043): 87%|βββββββββ | 136/157 [01:08<00:09, 2.19it/s]
Training 1/1 epoch (loss 1.5799): 87%|βββββββββ | 136/157 [01:08<00:09, 2.19it/s]
Training 1/1 epoch (loss 1.5799): 87%|βββββββββ | 137/157 [01:08<00:09, 2.19it/s]
Training 1/1 epoch (loss 1.6515): 87%|βββββββββ | 137/157 [01:08<00:09, 2.19it/s]
Training 1/1 epoch (loss 1.6515): 88%|βββββββββ | 138/157 [01:08<00:08, 2.23it/s]
Training 1/1 epoch (loss 1.6628): 88%|βββββββββ | 138/157 [01:09<00:08, 2.23it/s]
Training 1/1 epoch (loss 1.6628): 89%|βββββββββ | 139/157 [01:09<00:07, 2.25it/s]
Training 1/1 epoch (loss 1.6154): 89%|βββββββββ | 139/157 [01:09<00:07, 2.25it/s]
Training 1/1 epoch (loss 1.6154): 89%|βββββββββ | 140/157 [01:09<00:07, 2.22it/s]
Training 1/1 epoch (loss 1.4899): 89%|βββββββββ | 140/157 [01:10<00:07, 2.22it/s]
Training 1/1 epoch (loss 1.4899): 90%|βββββββββ | 141/157 [01:10<00:07, 2.24it/s]
Training 1/1 epoch (loss 1.5085): 90%|βββββββββ | 141/157 [01:10<00:07, 2.24it/s]
Training 1/1 epoch (loss 1.5085): 90%|βββββββββ | 142/157 [01:10<00:06, 2.25it/s]
Training 1/1 epoch (loss 1.5483): 90%|βββββββββ | 142/157 [01:11<00:06, 2.25it/s]
Training 1/1 epoch (loss 1.5483): 91%|βββββββββ | 143/157 [01:11<00:06, 2.26it/s]
Training 1/1 epoch (loss 1.5635): 91%|βββββββββ | 143/157 [01:11<00:06, 2.26it/s]
Training 1/1 epoch (loss 1.5635): 92%|ββββββββββ| 144/157 [01:11<00:05, 2.20it/s]
Training 1/1 epoch (loss 1.5871): 92%|ββββββββββ| 144/157 [01:12<00:05, 2.20it/s]
Training 1/1 epoch (loss 1.5871): 92%|ββββββββββ| 145/157 [01:12<00:05, 2.18it/s]
Training 1/1 epoch (loss 1.5729): 92%|ββββββββββ| 145/157 [01:12<00:05, 2.18it/s]
Training 1/1 epoch (loss 1.5729): 93%|ββββββββββ| 146/157 [01:12<00:05, 2.19it/s]
Training 1/1 epoch (loss 1.6486): 93%|ββββββββββ| 146/157 [01:13<00:05, 2.19it/s]
Training 1/1 epoch (loss 1.6486): 94%|ββββββββββ| 147/157 [01:13<00:04, 2.23it/s]
Training 1/1 epoch (loss 1.5343): 94%|ββββββββββ| 147/157 [01:13<00:04, 2.23it/s]
Training 1/1 epoch (loss 1.5343): 94%|ββββββββββ| 148/157 [01:13<00:04, 2.22it/s]
Training 1/1 epoch (loss 1.6556): 94%|ββββββββββ| 148/157 [01:13<00:04, 2.22it/s]
Training 1/1 epoch (loss 1.6556): 95%|ββββββββββ| 149/157 [01:13<00:03, 2.22it/s]
Training 1/1 epoch (loss 1.6864): 95%|ββββββββββ| 149/157 [01:14<00:03, 2.22it/s]
Training 1/1 epoch (loss 1.6864): 96%|ββββββββββ| 150/157 [01:14<00:03, 2.22it/s]
Training 1/1 epoch (loss 1.7332): 96%|ββββββββββ| 150/157 [01:14<00:03, 2.22it/s]
Training 1/1 epoch (loss 1.7332): 96%|ββββββββββ| 151/157 [01:14<00:02, 2.24it/s]
Training 1/1 epoch (loss 1.5896): 96%|ββββββββββ| 151/157 [01:15<00:02, 2.24it/s]
Training 1/1 epoch (loss 1.5896): 97%|ββββββββββ| 152/157 [01:15<00:02, 2.16it/s]
Training 1/1 epoch (loss 1.5636): 97%|ββββββββββ| 152/157 [01:15<00:02, 2.16it/s]
Training 1/1 epoch (loss 1.5636): 97%|ββββββββββ| 153/157 [01:15<00:01, 2.16it/s]
Training 1/1 epoch (loss 1.6042): 97%|ββββββββββ| 153/157 [01:16<00:01, 2.16it/s]
Training 1/1 epoch (loss 1.6042): 98%|ββββββββββ| 154/157 [01:16<00:01, 2.17it/s]
Training 1/1 epoch (loss 1.7323): 98%|ββββββββββ| 154/157 [01:16<00:01, 2.17it/s]
Training 1/1 epoch (loss 1.7323): 99%|ββββββββββ| 155/157 [01:16<00:00, 2.19it/s]
Training 1/1 epoch (loss 1.6252): 99%|ββββββββββ| 155/157 [01:17<00:00, 2.19it/s]
Training 1/1 epoch (loss 1.6252): 99%|ββββββββββ| 156/157 [01:17<00:00, 2.22it/s]
Training 1/1 epoch (loss 1.3796): 99%|ββββββββββ| 156/157 [01:17<00:00, 2.22it/s]
Training 1/1 epoch (loss 1.3796): 100%|ββββββββββ| 157/157 [01:17<00:00, 2.23it/s]
Training 1/1 epoch (loss 1.3796): 100%|ββββββββββ| 157/157 [01:17<00:00, 2.02it/s] |
|
|
tokenizer config file saved in /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k-Q2-5k/tokenizer_config.json |
|
|
Special tokens file saved in /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/Qwen1.5-7B/Qwen1.5-7B-s3-Q1-40k-Q2-5k/special_tokens_map.json |
|
|
wandb: ERROR Problem finishing run |
|
|
Exception ignored in atexit callback: <bound method rank_zero_only.<locals>.wrapper of <safe_rlhf.logger.Logger object at 0x155104203290>> |
|
|
Traceback (most recent call last): |
|
|
File "/home/hansirui_1st/jiayi/resist/setting3/safe_rlhf/utils.py", line 212, in wrapper |
|
|
return func(*args, **kwargs) |
|
|
^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/home/hansirui_1st/jiayi/resist/setting3/safe_rlhf/logger.py", line 183, in close |
|
|
self.wandb.finish() |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 449, in wrapper |
|
|
return func(self, *args, **kwargs) |
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 391, in wrapper |
|
|
return func(self, *args, **kwargs) |
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2106, in finish |
|
|
return self._finish(exit_code) |
|
|
^^^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2127, in _finish |
|
|
self._atexit_cleanup(exit_code=exit_code) |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2352, in _atexit_cleanup |
|
|
self._on_finish() |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2609, in _on_finish |
|
|
wait_with_progress( |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/mailbox/wait_with_progress.py", line 24, in wait_with_progress |
|
|
return wait_all_with_progress( |
|
|
^^^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/mailbox/wait_with_progress.py", line 87, in wait_all_with_progress |
|
|
return asyncio_compat.run(progress_loop_with_timeout) |
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/lib/asyncio_compat.py", line 27, in run |
|
|
future = executor.submit(runner.run, fn) |
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/concurrent/futures/thread.py", line 169, in submit |
|
|
raise RuntimeError('cannot schedule new futures after ' |
|
|
RuntimeError: cannot schedule new futures after interpreter shutdown |
|
|
|