Spaces:
Running
Running
from trl import PPOTrainer, PPOConfig | |
from peft import PeftModel | |
import torch, random, json, glob | |
from diffusers import StableDiffusionPipeline | |
from reward_model import CLIPModel, CLIPProcessor | |
rm=CLIPModel.from_pretrained("rm").eval().half().cuda() | |
proc=CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") | |
pipe=StableDiffusionPipeline.from_pretrained("./nyc-ad-model",torch_dtype=torch.float16).to("cuda") | |
ppo_cfg=PPOConfig(batch_size=1,learning_rate=1e-6,target_kl=0.2) | |
trainer=PPOTrainer(model=pipe.unet, reward_model=rm, config=ppo_cfg) | |
prompts=[l.strip() for l in open("prompt.txt")] | |
for step in range(500): | |
p=random.choice(prompts) | |
img=pipe(p,num_inference_steps=20).images[0] | |
reward=rm(**proc(text=p,images=img,return_tensors="pt").to("cuda")).logits[0,0].item() | |
trainer.step(prompts=[p], rewards=[reward]) | |
pipe.save_pretrained("nyc-ad-model-rlhf") |