File size: 1,770 Bytes

909bba4

**EXAMPLE USAGE**

```
# Install required packages if needed
# !pip install transformers torch unsloth

from transformers import AutoModelForCausalLM, AutoTokenizer
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel
import torch

# Load the electrical engineering model
model_name = "neuralnets/electrical_engg_model"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Apply the chat template to format inputs correctly
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

# Enable faster inference using Unsloth
model = FastLanguageModel.for_inference(model)

# Move model to GPU if available (or specify your device)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

# Create an electrical engineering related query
messages = [
    {"role": "user", "content": "Explain the working principle of a three-phase induction motor."},
]

# Format the input using the chat template
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True,  # Required for generation
    return_tensors = "pt",
).to(device)

# Set up text streaming for real-time output
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)

# Generate response
outputs = model.generate(
    input_ids = inputs, 
    streamer = text_streamer,
    max_new_tokens = 512,
    use_cache = True, 
    temperature = 0.7,  # Adjust temperature for creativity vs precision
    min_p = 0.05       # Nucleus sampling parameter
)

# If you want to capture the full response as a string
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)

```