Model save
Browse files- README.md +2 -3
- all_results.json +12 -12
- eval_results.json +12 -12
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- training_args.bin +1 -1
README.md
CHANGED
|
@@ -1,17 +1,16 @@
|
|
| 1 |
---
|
| 2 |
-
datasets: Kyleyee/train_data_imdb_for_target_policy_dpo
|
| 3 |
library_name: transformers
|
| 4 |
model_name: Qwen2.5-7b-dpo-imdb
|
| 5 |
tags:
|
| 6 |
- generated_from_trainer
|
| 7 |
-
- trl
|
| 8 |
- dpo
|
|
|
|
| 9 |
licence: license
|
| 10 |
---
|
| 11 |
|
| 12 |
# Model Card for Qwen2.5-7b-dpo-imdb
|
| 13 |
|
| 14 |
-
This model is a fine-tuned version of [None](https://huggingface.co/None)
|
| 15 |
It has been trained using [TRL](https://github.com/huggingface/trl).
|
| 16 |
|
| 17 |
## Quick start
|
|
|
|
| 1 |
---
|
|
|
|
| 2 |
library_name: transformers
|
| 3 |
model_name: Qwen2.5-7b-dpo-imdb
|
| 4 |
tags:
|
| 5 |
- generated_from_trainer
|
|
|
|
| 6 |
- dpo
|
| 7 |
+
- trl
|
| 8 |
licence: license
|
| 9 |
---
|
| 10 |
|
| 11 |
# Model Card for Qwen2.5-7b-dpo-imdb
|
| 12 |
|
| 13 |
+
This model is a fine-tuned version of [None](https://huggingface.co/None).
|
| 14 |
It has been trained using [TRL](https://github.com/huggingface/trl).
|
| 15 |
|
| 16 |
## Quick start
|
all_results.json
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 1.0,
|
| 3 |
-
"eval_logits/chosen": -3.
|
| 4 |
-
"eval_logits/rejected": -3.
|
| 5 |
-
"eval_logps/chosen": -
|
| 6 |
-
"eval_logps/rejected": -
|
| 7 |
-
"eval_loss": 0.
|
| 8 |
-
"eval_rewards/accuracies": 0.
|
| 9 |
-
"eval_rewards/chosen": -0.
|
| 10 |
-
"eval_rewards/margins": 0.
|
| 11 |
-
"eval_rewards/rejected": -0.
|
| 12 |
-
"eval_runtime":
|
| 13 |
-
"eval_samples_per_second":
|
| 14 |
-
"eval_steps_per_second":
|
| 15 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 1.0,
|
| 3 |
+
"eval_logits/chosen": -3.22265625,
|
| 4 |
+
"eval_logits/rejected": -3.2421875,
|
| 5 |
+
"eval_logps/chosen": -297.0,
|
| 6 |
+
"eval_logps/rejected": -294.0,
|
| 7 |
+
"eval_loss": 0.6393749713897705,
|
| 8 |
+
"eval_rewards/accuracies": 0.671875,
|
| 9 |
+
"eval_rewards/chosen": -0.0892333984375,
|
| 10 |
+
"eval_rewards/margins": 0.1739501953125,
|
| 11 |
+
"eval_rewards/rejected": -0.263427734375,
|
| 12 |
+
"eval_runtime": 2.6575,
|
| 13 |
+
"eval_samples_per_second": 37.629,
|
| 14 |
+
"eval_steps_per_second": 1.505
|
| 15 |
}
|
eval_results.json
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 1.0,
|
| 3 |
-
"eval_logits/chosen": -3.
|
| 4 |
-
"eval_logits/rejected": -3.
|
| 5 |
-
"eval_logps/chosen": -
|
| 6 |
-
"eval_logps/rejected": -
|
| 7 |
-
"eval_loss": 0.
|
| 8 |
-
"eval_rewards/accuracies": 0.
|
| 9 |
-
"eval_rewards/chosen": -0.
|
| 10 |
-
"eval_rewards/margins": 0.
|
| 11 |
-
"eval_rewards/rejected": -0.
|
| 12 |
-
"eval_runtime":
|
| 13 |
-
"eval_samples_per_second":
|
| 14 |
-
"eval_steps_per_second":
|
| 15 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 1.0,
|
| 3 |
+
"eval_logits/chosen": -3.22265625,
|
| 4 |
+
"eval_logits/rejected": -3.2421875,
|
| 5 |
+
"eval_logps/chosen": -297.0,
|
| 6 |
+
"eval_logps/rejected": -294.0,
|
| 7 |
+
"eval_loss": 0.6393749713897705,
|
| 8 |
+
"eval_rewards/accuracies": 0.671875,
|
| 9 |
+
"eval_rewards/chosen": -0.0892333984375,
|
| 10 |
+
"eval_rewards/margins": 0.1739501953125,
|
| 11 |
+
"eval_rewards/rejected": -0.263427734375,
|
| 12 |
+
"eval_runtime": 2.6575,
|
| 13 |
+
"eval_samples_per_second": 37.629,
|
| 14 |
+
"eval_steps_per_second": 1.505
|
| 15 |
}
|
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4877660776
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06188e52a894adb2dcad8250508b51e8618986607cd3b93d88fc7a20829ca744
|
| 3 |
size 4877660776
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4932751008
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62267c9a28d8d1265218d799bc3d3abb99c077b24f594c3b89890c28fc4931bc
|
| 3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4330865200
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac7f585a281ca8214a5acdf70051a3241932bc9b839356a07936938af62d959d
|
| 3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1089994880
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81e2922e8bd9d52d05057b7c72b9ea56929d53fe8f4830931bd786163fc6cfad
|
| 3 |
size 1089994880
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7800
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a4645055338393f6806e6f3eb22c61d6e2e5e7b527a528b0a73e952aa93320c
|
| 3 |
size 7800
|