MegaTronX's picture
Rename app.py to app.bak
533346f verified
import torch
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import gradio as gr
# Step 1: Load base model
base_model_name = "meta-llama/Llama-3.2-1B-Instruct"
adapter_repo = "MegaTronX/Llama-3.2-1B-Instruct-Selectolax-QLoRA"
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
device_map="auto",
torch_dtype=torch.bfloat16,
)
# Step 2: Load LoRA adapter
model_with_adapter = PeftModel.from_pretrained(
base_model,
adapter_repo,
device_map="auto",
)
print(f"Loaded LoRA adapter from {adapter_repo}")
# Verify adapter configuration
print(model_with_adapter.config)
# Step 3: Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
# Step 4: Define inference function
@spaces.GPU(duration=120)
def generate_text(prompt, max_length=1024):
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to("cuda")
outputs = model_with_adapter.generate(**inputs, max_length=max_length)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Step 5: Create Gradio interface
iface = gr.Interface(
fn=generate_text,
inputs=[
gr.Textbox(label="Prompt", placeholder="Enter your text prompt here..."),
gr.Slider(label="Max Length", minimum=50, maximum=1024, step=10, value=256),
],
outputs="text",
title="LLaMA + LoRA Text Generator",
description="Generate text using a LLaMA model with LoRA adapters."
)
# Step 6: Launch Gradio app
if __name__ == "__main__":
iface.launch()