**EXAMPLE USAGE** ``` # Install required packages if needed # !pip install transformers torch unsloth from transformers import AutoModelForCausalLM, AutoTokenizer from unsloth.chat_templates import get_chat_template from unsloth import FastLanguageModel import torch # Load the electrical engineering model model_name = "neuralnets/electrical_engg_model" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Apply the chat template to format inputs correctly tokenizer = get_chat_template( tokenizer, chat_template = "llama-3.1", ) # Enable faster inference using Unsloth model = FastLanguageModel.for_inference(model) # Move model to GPU if available (or specify your device) device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) # Create an electrical engineering related query messages = [ {"role": "user", "content": "Explain the working principle of a three-phase induction motor."}, ] # Format the input using the chat template inputs = tokenizer.apply_chat_template( messages, tokenize = True, add_generation_prompt = True, # Required for generation return_tensors = "pt", ).to(device) # Set up text streaming for real-time output from transformers import TextStreamer text_streamer = TextStreamer(tokenizer, skip_prompt = True) # Generate response outputs = model.generate( input_ids = inputs, streamer = text_streamer, max_new_tokens = 512, use_cache = True, temperature = 0.7, # Adjust temperature for creativity vs precision min_p = 0.05 # Nucleus sampling parameter ) # If you want to capture the full response as a string full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) ```