File size: 3,045 Bytes
6bd48a7 d43531a 2ce6842 ec13c93 6bd48a7 2ce6842 9c0cb28 6bd48a7 7e65f51 6bd48a7 7e65f51 2ce6842 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
---
license: llama3.2
datasets:
- teknium/OpenHermes-2.5
- NousResearch/hermes-function-calling-v1
base_model:
- minpeter/QLoRA-Llama-3.2-1B-chatml-tool-v1
- minpeter/Llama-3.2-1B-AlternateTokenizer-tool-chatml
language:
- en
pipeline_tag: text-generation
library_name: transformers
tags:
- axolotl
- merge
new_version: minpeter/Llama-3.2-1B-chatml-tool-v2
---
axolotl config
```yaml
base_model: minpeter/Llama-3.2-1B-AlternateTokenizer-chatml
load_in_8bit: false
load_in_4bit: true
strict: false
datasets:
- path: teknium/OpenHermes-2.5
type: chat_template
chat_template: chatml
field_messages: conversations
message_field_role: from
message_field_content: value
shards: 800
- path: func-calling-singleturn.jsonl
type: chat_template
chat_template: chatml
field_messages: conversations
message_field_role: from
message_field_content: value
shards: 2
save_safetensors: true
auto_resume_from_checkpoints: false
save_steps: 200
chat_template: chatml
dataset_prepared_path: last_run_prepared
val_set_size: 0.1
output_dir: ./output
adapter: qlora
lora_model_dir:
sequence_len: 4096
sample_packing: true
eval_sample_packing: true
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16
lora_dropout: 0.05
lora_fan_in_fan_out:
lora_target_modules:
- gate_proj
- down_proj
- up_proj
- q_proj
- v_proj
- k_proj
- o_proj
wandb_project: "axolotl"
wandb_entity: "kasfiekfs-e"
wandb_watch:
wandb_name:
wandb_log_model:
gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 2
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true
early_stopping_patience:
resume_from_checkpoint:
local_rank:
logging_steps: 1
xformers_attention:
# flash_attention: true
loss_watchdog_threshold: 5.0
loss_watchdog_patience: 3
warmup_steps: 10
evals_per_epoch: 4
eval_table_size:
eval_max_new_tokens: 128
debug:
deepspeed:
weight_decay: 0.0
fsdp:
fsdp_config:
special_tokens:
bos_token: <|begin_of_text|>
eos_token: <|im_end|>
pad_token: <|end_of_text|>
# <--- unsloth config --->
unsloth_lora_mlp: true
unsloth_lora_qkv: true
unsloth_lora_o: true
```
function calling prompt
```yaml
tool_call_body_style: "arguments_name_object"
system_prompt_template: |
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools>{{tools}}</tools> Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
<tool_call>
{'arguments': <args-dict>, 'name': <function-name>}
</tool_call>
``` |