EvoPlatformV3 / retrain_from_feedback.py
HemanM's picture
Update retrain_from_feedback.py
600667e verified
# retrain_from_feedback.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from transformers import AutoTokenizer
from evo_architecture import mutate_genome, log_genome, default_config
from evo_model import EvoTransformerV22
import os
MODEL_PATH = "evo_hellaswag.pt"
CSV_PATH = "feedback_log.csv"
def train_evo():
if not os.path.exists(CSV_PATH):
print("⚠️ No feedback_log.csv file found.")
return "⚠️ No feedback data file found."
df = pd.read_csv(CSV_PATH)
# ✅ Only use rows where vote is Evo or GPT
usable_df = df[df["vote"].isin(["Evo", "GPT"])].copy()
if usable_df.empty:
print("⚠️ No usable feedback data. Please vote on Evo or GPT.")
return "⚠️ No usable feedback data. Please vote on Evo or GPT."
# Step 1: Evolve new architecture
base_config = default_config()
evolved_config = mutate_genome(base_config)
print("🧬 New mutated config:", evolved_config)
# Step 2: Initialize model
model = EvoTransformerV22(
num_layers=evolved_config["num_layers"],
num_heads=evolved_config["num_heads"],
ffn_dim=evolved_config["ffn_dim"],
memory_enabled=evolved_config["memory_enabled"]
)
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
model.train()
# Step 3: Train using feedback
total_loss = 0.0
for _, row in usable_df.iterrows():
question = row["question"]
opt1 = row["option1"]
opt2 = row["option2"]
evo_answer = row["evo_answer"]
label = torch.tensor([1.0 if evo_answer.strip() == opt2.strip() else 0.0])
input_text = f"{question} [SEP] {opt2 if label.item() == 1 else opt1}"
encoded = tokenizer(input_text, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
logits = model(encoded["input_ids"])
loss = F.binary_cross_entropy_with_logits(logits.squeeze(), label)
loss.backward()
optimizer.step()
optimizer.zero_grad()
total_loss += loss.item()
# Step 4: Save the retrained model
torch.save(model.state_dict(), MODEL_PATH)
print("✅ Evo model retrained and saved.")
# Step 5: Log genome with fitness score (1 - avg_loss)
avg_loss = total_loss / len(usable_df)
fitness = round(1.0 - avg_loss, 4)
log_genome(evolved_config, score=fitness)
print("🧬 Genome logged with score:", fitness)
return f"✅ Evo retrained. Loss: {avg_loss:.4f}, Fitness: {fitness}"