|
**EXAMPLE USAGE** |
|
|
|
``` |
|
# Install required packages if needed |
|
# !pip install transformers torch unsloth |
|
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
from unsloth.chat_templates import get_chat_template |
|
from unsloth import FastLanguageModel |
|
import torch |
|
|
|
# Load the electrical engineering model |
|
model_name = "neuralnets/electrical_engg_model" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
|
|
# Apply the chat template to format inputs correctly |
|
tokenizer = get_chat_template( |
|
tokenizer, |
|
chat_template = "llama-3.1", |
|
) |
|
|
|
# Enable faster inference using Unsloth |
|
model = FastLanguageModel.for_inference(model) |
|
|
|
# Move model to GPU if available (or specify your device) |
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
model = model.to(device) |
|
|
|
# Create an electrical engineering related query |
|
messages = [ |
|
{"role": "user", "content": "Explain the working principle of a three-phase induction motor."}, |
|
] |
|
|
|
# Format the input using the chat template |
|
inputs = tokenizer.apply_chat_template( |
|
messages, |
|
tokenize = True, |
|
add_generation_prompt = True, # Required for generation |
|
return_tensors = "pt", |
|
).to(device) |
|
|
|
# Set up text streaming for real-time output |
|
from transformers import TextStreamer |
|
text_streamer = TextStreamer(tokenizer, skip_prompt = True) |
|
|
|
# Generate response |
|
outputs = model.generate( |
|
input_ids = inputs, |
|
streamer = text_streamer, |
|
max_new_tokens = 512, |
|
use_cache = True, |
|
temperature = 0.7, # Adjust temperature for creativity vs precision |
|
min_p = 0.05 # Nucleus sampling parameter |
|
) |
|
|
|
# If you want to capture the full response as a string |
|
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
``` |