minpeter
/

Llama-3.2-1B-chatml-tool-v1

Text Generation

text-generation-inference

Model card Files Files and versions

Llama-3.2-1B-chatml-tool-v1 / README.md

minpeter's picture

Update README.md

9c0cb28 verified 7 months ago

|

history blame contribute delete

3.05 kB

	---
	license: llama3.2
	datasets:
	- teknium/OpenHermes-2.5
	- NousResearch/hermes-function-calling-v1
	base_model:
	- minpeter/QLoRA-Llama-3.2-1B-chatml-tool-v1
	- minpeter/Llama-3.2-1B-AlternateTokenizer-tool-chatml
	language:
	- en
	pipeline_tag: text-generation
	library_name: transformers
	tags:
	- axolotl
	- merge
	new_version: minpeter/Llama-3.2-1B-chatml-tool-v2
	---

	axolotl config
	```yaml
	base_model: minpeter/Llama-3.2-1B-AlternateTokenizer-chatml

	load_in_8bit: false
	load_in_4bit: true
	strict: false

	datasets:
	- path: teknium/OpenHermes-2.5
	type: chat_template
	chat_template: chatml
	field_messages: conversations
	message_field_role: from
	message_field_content: value
	shards: 800
	- path: func-calling-singleturn.jsonl
	type: chat_template
	chat_template: chatml
	field_messages: conversations
	message_field_role: from
	message_field_content: value
	shards: 2

	save_safetensors: true
	auto_resume_from_checkpoints: false
	save_steps: 200

	chat_template: chatml
	dataset_prepared_path: last_run_prepared
	val_set_size: 0.1
	output_dir: ./output

	adapter: qlora
	lora_model_dir:

	sequence_len: 4096
	sample_packing: true
	eval_sample_packing: true
	pad_to_sequence_len: true

	lora_r: 32
	lora_alpha: 16
	lora_dropout: 0.05
	lora_fan_in_fan_out:
	lora_target_modules:
	- gate_proj
	- down_proj
	- up_proj
	- q_proj
	- v_proj
	- k_proj
	- o_proj

	wandb_project: "axolotl"
	wandb_entity: "kasfiekfs-e"
	wandb_watch:
	wandb_name:
	wandb_log_model:

	gradient_accumulation_steps: 4
	micro_batch_size: 2
	num_epochs: 2
	optimizer: adamw_bnb_8bit
	lr_scheduler: cosine
	learning_rate: 0.0002

	train_on_inputs: false
	group_by_length: false
	bf16: auto
	fp16:
	tf32: false

	gradient_checkpointing: true
	early_stopping_patience:
	resume_from_checkpoint:
	local_rank:
	logging_steps: 1
	xformers_attention:
	# flash_attention: true

	loss_watchdog_threshold: 5.0
	loss_watchdog_patience: 3

	warmup_steps: 10
	evals_per_epoch: 4
	eval_table_size:
	eval_max_new_tokens: 128
	debug:
	deepspeed:
	weight_decay: 0.0
	fsdp:
	fsdp_config:

	special_tokens:
	bos_token: <\|begin_of_text\|>
	eos_token: <\|im_end\|>
	pad_token: <\|end_of_text\|>

	# <--- unsloth config --->
	unsloth_lora_mlp: true
	unsloth_lora_qkv: true
	unsloth_lora_o: true
	```

	function calling prompt

	```yaml
	tool_call_body_style: "arguments_name_object"
	system_prompt_template: \|
	You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools>{{tools}}</tools> Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
	<tool_call>
	{'arguments': <args-dict>, 'name': <function-name>}
	</tool_call>
	```