import os import torch import gradio as gr from datasets import load_dataset, Dataset from transformers import ( AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling ) from peft import LoraConfig, get_peft_model MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" DATASET_NAME = "embedding-data/Amazon-QA" FINETUNED_MODEL_NAME = "tinyllama-shopify-lora" def load_data(): dataset = load_dataset(DATASET_NAME) df = dataset['train'].to_pandas() df = df.rename(columns={'query': 'question', 'pos': 'answer'})[['question', 'answer']] df = df.dropna().astype(str) df['answer'] = df['answer'].str.replace(r'\[\^.*?\^\]', '').str.strip() return Dataset.from_pandas(df).train_test_split(test_size=0.1) # Now using imported Dataset def setup_model(): tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, device_map="auto", torch_dtype=torch.float32 ) peft_config = LoraConfig( r=8, lora_alpha=16, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" ) return tokenizer, get_peft_model(model, peft_config) def train_model(): tokenizer, model = setup_model() data = load_data() def tokenize_function(examples): text = [f"Question: {q}\nAnswer: {a}" for q, a in zip(examples["question"], examples["answer"])] return tokenizer(text, truncation=True, max_length=256, padding="max_length") tokenized_data = data.map(tokenize_function, batched=True, remove_columns=["question", "answer"]) trainer = Trainer( model=model, args=TrainingArguments( output_dir="./results", per_device_train_batch_size=2, num_train_epochs=1, learning_rate=2e-5, logging_steps=10, save_strategy="steps", save_steps=100, optim="adamw_torch", no_cuda=True ), train_dataset=tokenized_data["train"], data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False), ) print("Starting training...") trainer.train() model.save_pretrained(FINETUNED_MODEL_NAME) tokenizer.save_pretrained(FINETUNED_MODEL_NAME) print("Training complete!") def generate_response(message, history): tokenizer = AutoTokenizer.from_pretrained(FINETUNED_MODEL_NAME) model = AutoModelForCausalLM.from_pretrained(FINETUNED_MODEL_NAME, torch_dtype=torch.float32) prompt = f"Question: {message}\nAnswer:" inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=64) return tokenizer.decode(outputs[0], skip_special_tokens=True).split("Answer:")[-1] def create_interface(): if not os.path.exists(FINETUNED_MODEL_NAME): print("Model not found. Starting training...") train_model() return gr.ChatInterface( fn=generate_response, examples=[ "What's your return policy?", "Do you ship internationally?", "Is this waterproof?" ] ) if __name__ == "__main__": create_interface().launch()