Kyleyee commited on
Commit
9928eaa
·
verified ·
1 Parent(s): 3fa1484

Model save

Browse files
README.md CHANGED
@@ -1,17 +1,16 @@
1
  ---
2
- datasets: Kyleyee/train_data_imdb_for_target_policy_dpo
3
  library_name: transformers
4
  model_name: Qwen2.5-7b-dpo-imdb
5
  tags:
6
  - generated_from_trainer
7
- - trl
8
  - dpo
 
9
  licence: license
10
  ---
11
 
12
  # Model Card for Qwen2.5-7b-dpo-imdb
13
 
14
- This model is a fine-tuned version of [None](https://huggingface.co/None) on the [Kyleyee/train_data_imdb_for_target_policy_dpo](https://huggingface.co/datasets/Kyleyee/train_data_imdb_for_target_policy_dpo) dataset.
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
1
  ---
 
2
  library_name: transformers
3
  model_name: Qwen2.5-7b-dpo-imdb
4
  tags:
5
  - generated_from_trainer
 
6
  - dpo
7
+ - trl
8
  licence: license
9
  ---
10
 
11
  # Model Card for Qwen2.5-7b-dpo-imdb
12
 
13
+ This model is a fine-tuned version of [None](https://huggingface.co/None).
14
  It has been trained using [TRL](https://github.com/huggingface/trl).
15
 
16
  ## Quick start
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -3.1484375,
4
- "eval_logits/rejected": -3.20703125,
5
- "eval_logps/chosen": -885.0,
6
- "eval_logps/rejected": -913.0,
7
- "eval_loss": 0.5779687762260437,
8
- "eval_rewards/accuracies": 0.5625,
9
- "eval_rewards/chosen": -0.176513671875,
10
- "eval_rewards/margins": 0.3870849609375,
11
- "eval_rewards/rejected": -0.5640869140625,
12
- "eval_runtime": 4.7753,
13
- "eval_samples_per_second": 20.941,
14
- "eval_steps_per_second": 0.838
15
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -3.22265625,
4
+ "eval_logits/rejected": -3.2421875,
5
+ "eval_logps/chosen": -297.0,
6
+ "eval_logps/rejected": -294.0,
7
+ "eval_loss": 0.6393749713897705,
8
+ "eval_rewards/accuracies": 0.671875,
9
+ "eval_rewards/chosen": -0.0892333984375,
10
+ "eval_rewards/margins": 0.1739501953125,
11
+ "eval_rewards/rejected": -0.263427734375,
12
+ "eval_runtime": 2.6575,
13
+ "eval_samples_per_second": 37.629,
14
+ "eval_steps_per_second": 1.505
15
  }
eval_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -3.1484375,
4
- "eval_logits/rejected": -3.20703125,
5
- "eval_logps/chosen": -885.0,
6
- "eval_logps/rejected": -913.0,
7
- "eval_loss": 0.5779687762260437,
8
- "eval_rewards/accuracies": 0.5625,
9
- "eval_rewards/chosen": -0.176513671875,
10
- "eval_rewards/margins": 0.3870849609375,
11
- "eval_rewards/rejected": -0.5640869140625,
12
- "eval_runtime": 4.7753,
13
- "eval_samples_per_second": 20.941,
14
- "eval_steps_per_second": 0.838
15
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -3.22265625,
4
+ "eval_logits/rejected": -3.2421875,
5
+ "eval_logps/chosen": -297.0,
6
+ "eval_logps/rejected": -294.0,
7
+ "eval_loss": 0.6393749713897705,
8
+ "eval_rewards/accuracies": 0.671875,
9
+ "eval_rewards/chosen": -0.0892333984375,
10
+ "eval_rewards/margins": 0.1739501953125,
11
+ "eval_rewards/rejected": -0.263427734375,
12
+ "eval_runtime": 2.6575,
13
+ "eval_samples_per_second": 37.629,
14
+ "eval_steps_per_second": 1.505
15
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06dee3a6d4cddbca6dac50d0668c899e7e55fdb123bfce0aa3f85dc083f7b3af
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06188e52a894adb2dcad8250508b51e8618986607cd3b93d88fc7a20829ca744
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88460107b4e4231f37c4253e7894cad91b4d95192044462e160d97174ca2ac33
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62267c9a28d8d1265218d799bc3d3abb99c077b24f594c3b89890c28fc4931bc
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4de4da1fed4e467e289c6e79a7a569bc61f87ce791b9268ff6587b3c7dd1cbf1
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac7f585a281ca8214a5acdf70051a3241932bc9b839356a07936938af62d959d
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fee778416713744bad2b33deabbb4e66241ffc29a145cda981d2cfabe323be0f
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81e2922e8bd9d52d05057b7c72b9ea56929d53fe8f4830931bd786163fc6cfad
3
  size 1089994880
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a842771d53f2d9ffcc98f4d185088d570767337beba7bcae98a87d9ce68baaf2
3
  size 7800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4645055338393f6806e6f3eb22c61d6e2e5e7b527a528b0a73e952aa93320c
3
  size 7800