Model save

Browse files

Files changed (12) hide show

README.md +3 -5
all_results.json +8 -8
config.json +1 -1
eval_results.json +4 -4
model-00001-of-00003.safetensors +1 -1
model-00002-of-00003.safetensors +1 -1
model-00003-of-00003.safetensors +1 -1
runs/Feb20_14-12-02_idealab-05.cs.illinois.edu/events.out.tfevents.1708459946.idealab-05.cs.illinois.edu.1352443.0 +3 -0
runs/Feb20_14-12-02_idealab-05.cs.illinois.edu/events.out.tfevents.1708461567.idealab-05.cs.illinois.edu.1352443.1 +3 -0
train_results.json +4 -4
trainer_state.json +14 -14
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -2,13 +2,11 @@
 license: apache-2.0
 base_model: mistralai/Mistral-7B-v0.1
 tags:
-- alignment-handbook
-- generated_from_trainer
 - trl
 - sft
 - generated_from_trainer
 datasets:
-- HuggingFaceH4/ultrachat_200k
 model-index:
 - name: zephyr-7b-sft-full
   results: []
@@ -19,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
 # zephyr-7b-sft-full
-This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the HuggingFaceH4/ultrachat_200k dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.9357
@@ -57,7 +55,7 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 0.9081        | 1.0   | 1090 | 0.9357          |
 ### Framework versions

 license: apache-2.0
 base_model: mistralai/Mistral-7B-v0.1
 tags:
 - trl
 - sft
 - generated_from_trainer
 datasets:
+- generator
 model-index:
 - name: zephyr-7b-sft-full
   results: []
 # zephyr-7b-sft-full
+This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the generator dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.9357
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 0.9082        | 1.0   | 1090 | 0.9357          |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
     "epoch": 1.0,
-    "eval_loss": 0.9357115030288696,
-    "eval_runtime": 337.5218,
     "eval_samples": 23110,
-    "eval_samples_per_second": 45.719,
-    "eval_steps_per_second": 0.717,
-    "train_loss": 0.9779492704146499,
-    "train_runtime": 12151.2808,
     "train_samples": 207865,
-    "train_samples_per_second": 11.474,
-    "train_steps_per_second": 0.09
 }

 {
     "epoch": 1.0,
+    "eval_loss": 0.9357138276100159,
+    "eval_runtime": 337.7533,
     "eval_samples": 23110,
+    "eval_samples_per_second": 45.687,
+    "eval_steps_per_second": 0.716,
+    "train_loss": 0.07625311886498687,
+    "train_runtime": 1283.1445,
     "train_samples": 207865,
+    "train_samples_per_second": 108.66,
+    "train_steps_per_second": 0.849
 }

config.json CHANGED Viewed

@@ -21,6 +21,6 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.36.2",
-  "use_cache": true,
   "vocab_size": 32000
 }

   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.36.2",
+  "use_cache": false,
   "vocab_size": 32000
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "eval_loss": 0.9357115030288696,
-    "eval_runtime": 337.5218,
     "eval_samples": 23110,
-    "eval_samples_per_second": 45.719,
-    "eval_steps_per_second": 0.717
 }

 {
     "epoch": 1.0,
+    "eval_loss": 0.9357138276100159,
+    "eval_runtime": 337.7533,
     "eval_samples": 23110,
+    "eval_samples_per_second": 45.687,
+    "eval_steps_per_second": 0.716
 }

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9caa0e01c728e53d96bf22580aa44c7e7cbee59915dca4055d2b770d5496a3c7
 size 4943162336

 version https://git-lfs.github.com/spec/v1
+oid sha256:070f2c2cfe338875a56ccfa23a395385e1570c0d012d8fbe3f49dee58dcaca17
 size 4943162336

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:71821a0b0dd8f8297e58c224ae8c4c270b43a26d8f7124778e9319129e0c628a
 size 4999819336

 version https://git-lfs.github.com/spec/v1
+oid sha256:86687db1e94132049ffb71bdd40bfdd82bf0601f4370a96a0b9d9ac80115c681
 size 4999819336

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ea43e2df8d41a2e19a56943a634eba0b781e850c96a8c7e807a97dd185546b2
 size 4540516344

 version https://git-lfs.github.com/spec/v1
+oid sha256:0124f92197c6639a9e532627e51bc4a75e29aa6c51d724b17ff0aba1366a75d7
 size 4540516344

runs/Feb20_14-12-02_idealab-05.cs.illinois.edu/events.out.tfevents.1708459946.idealab-05.cs.illinois.edu.1352443.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03d908b92954853e30d065702659a5f8e17349f5852cfc0ba86fd45de1a7633e
+size 7893

runs/Feb20_14-12-02_idealab-05.cs.illinois.edu/events.out.tfevents.1708461567.idealab-05.cs.illinois.edu.1352443.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0506fae4e3944944daa47bb70d8a7542bd09ba7a8e3f19df328dee16322b8671
+size 359

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "train_loss": 0.9779492704146499,
-    "train_runtime": 12151.2808,
     "train_samples": 207865,
-    "train_samples_per_second": 11.474,
-    "train_steps_per_second": 0.09
 }

 {
     "epoch": 1.0,
+    "train_loss": 0.07625311886498687,
+    "train_runtime": 1283.1445,
     "train_samples": 207865,
+    "train_samples_per_second": 108.66,
+    "train_steps_per_second": 0.849
 }

trainer_state.json CHANGED Viewed

@@ -1229,7 +1229,7 @@
     {
       "epoch": 0.93,
       "learning_rate": 2.870552305351382e-07,
-      "loss": 0.9293,
       "step": 1015
     },
     {
@@ -1253,7 +1253,7 @@
     {
       "epoch": 0.95,
       "learning_rate": 1.5471558192656776e-07,
-      "loss": 0.9232,
       "step": 1035
     },
     {
@@ -1271,7 +1271,7 @@
     {
       "epoch": 0.96,
       "learning_rate": 8.19327959602012e-08,
-      "loss": 0.9372,
       "step": 1050
     },
     {
@@ -1301,7 +1301,7 @@
     {
       "epoch": 0.99,
       "learning_rate": 1.1535349032167908e-08,
-      "loss": 0.9247,
       "step": 1075
     },
     {
@@ -1313,31 +1313,31 @@
     {
       "epoch": 1.0,
       "learning_rate": 1.2819245493955746e-09,
-      "loss": 0.9163,
       "step": 1085
     },
     {
       "epoch": 1.0,
       "learning_rate": 0.0,
-      "loss": 0.9081,
       "step": 1090
     },
     {
       "epoch": 1.0,
-      "eval_loss": 0.9357115030288696,
-      "eval_runtime": 337.6383,
-      "eval_samples_per_second": 45.703,
-      "eval_steps_per_second": 0.717,
       "step": 1090
     },
     {
       "epoch": 1.0,
       "step": 1090,
       "total_flos": 456447649382400.0,
-      "train_loss": 0.9779492704146499,
-      "train_runtime": 12151.2808,
-      "train_samples_per_second": 11.474,
-      "train_steps_per_second": 0.09
     }
   ],
   "logging_steps": 5,

     {
       "epoch": 0.93,
       "learning_rate": 2.870552305351382e-07,
+      "loss": 0.9294,
       "step": 1015
     },
     {
     {
       "epoch": 0.95,
       "learning_rate": 1.5471558192656776e-07,
+      "loss": 0.9233,
       "step": 1035
     },
     {
     {
       "epoch": 0.96,
       "learning_rate": 8.19327959602012e-08,
+      "loss": 0.9371,
       "step": 1050
     },
     {
     {
       "epoch": 0.99,
       "learning_rate": 1.1535349032167908e-08,
+      "loss": 0.9248,
       "step": 1075
     },
     {
     {
       "epoch": 1.0,
       "learning_rate": 1.2819245493955746e-09,
+      "loss": 0.9164,
       "step": 1085
     },
     {
       "epoch": 1.0,
       "learning_rate": 0.0,
+      "loss": 0.9082,
       "step": 1090
     },
     {
       "epoch": 1.0,
+      "eval_loss": 0.9357138276100159,
+      "eval_runtime": 338.0357,
+      "eval_samples_per_second": 45.649,
+      "eval_steps_per_second": 0.716,
       "step": 1090
     },
     {
       "epoch": 1.0,
       "step": 1090,
       "total_flos": 456447649382400.0,
+      "train_loss": 0.07625311886498687,
+      "train_runtime": 1283.1445,
+      "train_samples_per_second": 108.66,
+      "train_steps_per_second": 0.849
     }
   ],
   "logging_steps": 5,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:abe12a4b60609475166cb6e2ac4cc51e96a1d0ee98ca97c0a946d7d0a5039515
 size 5880

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ba4634a5dc42447eaee695ee8aa28ff4185b93c3a3c211a42704f15c8610336
 size 5880