File size: 1,770 Bytes
909bba4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
**EXAMPLE USAGE**
```
# Install required packages if needed
# !pip install transformers torch unsloth
from transformers import AutoModelForCausalLM, AutoTokenizer
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel
import torch
# Load the electrical engineering model
model_name = "neuralnets/electrical_engg_model"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Apply the chat template to format inputs correctly
tokenizer = get_chat_template(
tokenizer,
chat_template = "llama-3.1",
)
# Enable faster inference using Unsloth
model = FastLanguageModel.for_inference(model)
# Move model to GPU if available (or specify your device)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
# Create an electrical engineering related query
messages = [
{"role": "user", "content": "Explain the working principle of a three-phase induction motor."},
]
# Format the input using the chat template
inputs = tokenizer.apply_chat_template(
messages,
tokenize = True,
add_generation_prompt = True, # Required for generation
return_tensors = "pt",
).to(device)
# Set up text streaming for real-time output
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
# Generate response
outputs = model.generate(
input_ids = inputs,
streamer = text_streamer,
max_new_tokens = 512,
use_cache = True,
temperature = 0.7, # Adjust temperature for creativity vs precision
min_p = 0.05 # Nucleus sampling parameter
)
# If you want to capture the full response as a string
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
``` |