""" Example script for running inference with the Hugging Face model. """ from transformers import AutoModelForCausalLM, AutoTokenizer import torch import warnings # Suppress the TypedStorage deprecation warning warnings.filterwarnings('ignore', category=UserWarning, message='.*TypedStorage is deprecated.*') def main(): model_path = "." # Path to the model, or use "YOUR_USERNAME/YOUR_MODEL_NAME" for HF Hub print("Loading model and tokenizer...") model = AutoModelForCausalLM.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained(model_path) device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) model.eval() print(f"Model loaded on {device}") print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}") prompts = [ "Once upon a time", "The quick brown fox", ] for prompt in prompts: print(f"\n{'='*60}") print(f"Prompt: {prompt}") print(f"{'='*60}") inputs = tokenizer(prompt, return_tensors="pt").to(device) with torch.no_grad(): outputs = model.generate( **inputs, max_length=100, temperature=1.0, top_k=50, top_p=0.9, do_sample=True, pad_token_id=tokenizer.eos_token_id ) generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) print(f"\nGenerated:\n{generated_text}") if __name__ == "__main__": main()