Spaces:
Sleeping
Sleeping
Commit
·
de9a113
1
Parent(s):
fc2fdc7
updates
Browse files- app.py +12 -7
- config.py +1 -1
- last_epoch_ckpt/diffusion_pytorch_model.safetensors +1 -1
- prior/pipeline_kandinsky_prior.py +8 -9
app.py
CHANGED
@@ -8,7 +8,9 @@ import glob
|
|
8 |
import config
|
9 |
from model import get_model_and_tokenizer
|
10 |
|
11 |
-
|
|
|
|
|
12 |
'cuda', torch.bfloat16)
|
13 |
|
14 |
# TODO unify/merge origin and this
|
@@ -16,6 +18,7 @@ model, model.prior_pipe.image_encoder = get_model_and_tokenizer(config.model_pat
|
|
16 |
|
17 |
device = "cuda"
|
18 |
|
|
|
19 |
|
20 |
import spaces
|
21 |
import matplotlib.pyplot as plt
|
@@ -51,14 +54,14 @@ def generate_gpu(in_im_embs, prompt='the scene'):
|
|
51 |
with torch.no_grad():
|
52 |
in_im_embs = in_im_embs.to('cuda')
|
53 |
|
54 |
-
negative_image_embeds = in_im_embs[0]
|
55 |
positive_image_embeds = in_im_embs[1]
|
56 |
|
57 |
images = model.kandinsky_pipe(
|
58 |
num_inference_steps=50,
|
59 |
image_embeds=positive_image_embeds,
|
60 |
negative_image_embeds=negative_image_embeds,
|
61 |
-
guidance_scale=
|
62 |
).images[0]
|
63 |
cond = (
|
64 |
model.prior_pipe.image_processor(images, return_tensors="pt")
|
@@ -91,11 +94,10 @@ def generate(in_im_embs, ):
|
|
91 |
@spaces.GPU()
|
92 |
def sample_embs(prompt_embeds):
|
93 |
latent = torch.randn(prompt_embeds.shape[0], 1, prompt_embeds.shape[-1])
|
94 |
-
if prompt_embeds.shape[1] <
|
95 |
-
prompt_embeds = torch.nn.functional.pad(prompt_embeds, [0, 0, 0,
|
96 |
-
assert prompt_embeds.shape[1] ==
|
97 |
image_embeds = model(latent.to('cuda'), prompt_embeds.to('cuda')).predicted_image_embedding
|
98 |
-
|
99 |
return image_embeds
|
100 |
|
101 |
@spaces.GPU()
|
@@ -113,6 +115,8 @@ def get_user_emb(embs, ys):
|
|
113 |
else:
|
114 |
negative_embs = random.sample(negs, min(4, len(negs))) + negs[-4:]
|
115 |
negatives = torch.stack(negative_embs, 1)
|
|
|
|
|
116 |
|
117 |
image_embeds = torch.stack([sample_embs(negatives), sample_embs(positives)])
|
118 |
|
@@ -175,6 +179,7 @@ def background_next_image():
|
|
175 |
prevs_df = pd.concat((prevs_df.iloc[:6], prevs_df.iloc[7:]))
|
176 |
|
177 |
def pluck_img(user_id):
|
|
|
178 |
rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, None) is not None for i in prevs_df.iterrows()]]
|
179 |
ems = rated_rows['embeddings'].to_list()
|
180 |
ys = [i[user_id][0] for i in rated_rows['user:rating'].to_list()]
|
|
|
8 |
import config
|
9 |
from model import get_model_and_tokenizer
|
10 |
|
11 |
+
torch.set_float32_matmul_precision('high')
|
12 |
+
|
13 |
+
model, model.prior_pipe.image_encoder = get_model_and_tokenizer(config.model_path,
|
14 |
'cuda', torch.bfloat16)
|
15 |
|
16 |
# TODO unify/merge origin and this
|
|
|
18 |
|
19 |
device = "cuda"
|
20 |
|
21 |
+
k = config.k
|
22 |
|
23 |
import spaces
|
24 |
import matplotlib.pyplot as plt
|
|
|
54 |
with torch.no_grad():
|
55 |
in_im_embs = in_im_embs.to('cuda')
|
56 |
|
57 |
+
negative_image_embeds = in_im_embs[0]# if random.random() < .3 else model.prior_pipe.get_zero_embed()
|
58 |
positive_image_embeds = in_im_embs[1]
|
59 |
|
60 |
images = model.kandinsky_pipe(
|
61 |
num_inference_steps=50,
|
62 |
image_embeds=positive_image_embeds,
|
63 |
negative_image_embeds=negative_image_embeds,
|
64 |
+
guidance_scale=8,
|
65 |
).images[0]
|
66 |
cond = (
|
67 |
model.prior_pipe.image_processor(images, return_tensors="pt")
|
|
|
94 |
@spaces.GPU()
|
95 |
def sample_embs(prompt_embeds):
|
96 |
latent = torch.randn(prompt_embeds.shape[0], 1, prompt_embeds.shape[-1])
|
97 |
+
if prompt_embeds.shape[1] < k:
|
98 |
+
prompt_embeds = torch.nn.functional.pad(prompt_embeds, [0, 0, 0, k-prompt_embeds.shape[1]])
|
99 |
+
assert prompt_embeds.shape[1] == k, f"The model is set to take `k`` cond image embeds but is shape {prompt_embeds.shape}"
|
100 |
image_embeds = model(latent.to('cuda'), prompt_embeds.to('cuda')).predicted_image_embedding
|
|
|
101 |
return image_embeds
|
102 |
|
103 |
@spaces.GPU()
|
|
|
115 |
else:
|
116 |
negative_embs = random.sample(negs, min(4, len(negs))) + negs[-4:]
|
117 |
negatives = torch.stack(negative_embs, 1)
|
118 |
+
# if random.random() < .5:
|
119 |
+
# negatives = torch.zeros_like(negatives)
|
120 |
|
121 |
image_embeds = torch.stack([sample_embs(negatives), sample_embs(positives)])
|
122 |
|
|
|
179 |
prevs_df = pd.concat((prevs_df.iloc[:6], prevs_df.iloc[7:]))
|
180 |
|
181 |
def pluck_img(user_id):
|
182 |
+
# TODO pluck images based on similarity but also based on diversity by cluster every few times.
|
183 |
rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, None) is not None for i in prevs_df.iterrows()]]
|
184 |
ems = rated_rows['embeddings'].to_list()
|
185 |
ys = [i[user_id][0] for i in rated_rows['user:rating'].to_list()]
|
config.py
CHANGED
@@ -12,5 +12,5 @@ batch_size = 16
|
|
12 |
number_k_clip_embed = 16 # divide by this to determine bundling together of sequences -> CLIP
|
13 |
num_workers = 32
|
14 |
seed = 107
|
15 |
-
|
16 |
# TODO config option to swap to diffusion?
|
|
|
12 |
number_k_clip_embed = 16 # divide by this to determine bundling together of sequences -> CLIP
|
13 |
num_workers = 32
|
14 |
seed = 107
|
15 |
+
k = 8
|
16 |
# TODO config option to swap to diffusion?
|
last_epoch_ckpt/diffusion_pytorch_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 136790920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae34b5c319b9c804e1e82c93f78821b880553d2ac60ff628003175334ee9066d
|
3 |
size 136790920
|
prior/pipeline_kandinsky_prior.py
CHANGED
@@ -498,14 +498,13 @@ class KandinskyPriorPipeline(DiffusionPipeline):
|
|
498 |
if negative_prompt is None:
|
499 |
# zero_embeds = self.get_zero_embed(latents.shape[0], device=latents.device)
|
500 |
|
501 |
-
#
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
)
|
509 |
|
510 |
latents = self.prior(
|
511 |
hidden_states,
|
@@ -541,7 +540,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
|
|
541 |
|
542 |
if not return_dict:
|
543 |
return (image_embeddings, zero_embeds)
|
544 |
-
|
545 |
return KandinskyPriorPipelineOutput(
|
546 |
image_embeds=image_embeddings, negative_image_embeds=zero_embeds
|
547 |
)
|
|
|
498 |
if negative_prompt is None:
|
499 |
# zero_embeds = self.get_zero_embed(latents.shape[0], device=latents.device)
|
500 |
|
501 |
+
# use the same hidden states or different hidden states?
|
502 |
+
# hidden_states = torch.randn(
|
503 |
+
# (batch_size, prompt_embeds.shape[-1]),
|
504 |
+
# device=prompt_embeds.device,
|
505 |
+
# dtype=prompt_embeds.dtype,
|
506 |
+
# generator=generator,
|
507 |
+
# )
|
|
|
508 |
|
509 |
latents = self.prior(
|
510 |
hidden_states,
|
|
|
540 |
|
541 |
if not return_dict:
|
542 |
return (image_embeddings, zero_embeds)
|
543 |
+
|
544 |
return KandinskyPriorPipelineOutput(
|
545 |
image_embeds=image_embeddings, negative_image_embeds=zero_embeds
|
546 |
)
|