File size: 2,925 Bytes
0d80f56 c746c39 0d80f56 c746c39 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import random
import torch
import dgl
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from exp.utils import normalize_embeddings
def fix_random(seed):
dgl.seed(seed)
torch.random.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
class LRSchedule:
def __init__(self, total_steps, warmup_steps, final_factor):
self._total_steps = total_steps
self._warmup_steps = warmup_steps
self._final_factor = final_factor
def __call__(self, step):
if step >= self._total_steps:
return self._final_factor
if self._warmup_steps > 0:
warmup_factor = step / self._warmup_steps
else:
warmup_factor = 1.0
steps_after_warmup = step - self._warmup_steps
total_steps_after_warmup = self._total_steps - self._warmup_steps
after_warmup_factor = 1 \
- (1 - self._final_factor) * (steps_after_warmup / total_steps_after_warmup)
factor = min(warmup_factor, after_warmup_factor)
return min(max(factor, 0), 1)
def prepare_graphs(items_path, ratings_path):
items = pd.read_csv(items_path)
ratings = pd.read_csv(ratings_path)
n_users = np.max(ratings["user_id"].unique()) + 1
item_ids = torch.tensor(sorted(items["item_id"].unique()))
edges = torch.tensor(ratings["user_id"]), torch.tensor(ratings["item_id"])
reverse_edges = (edges[1], edges[0])
bipartite_graph = dgl.heterograph(
data_dict={
("User", "UserItem", "Item"): edges,
("Item", "ItemUser", "User"): reverse_edges
},
num_nodes_dict={
"User": n_users,
"Item": len(item_ids)
}
)
graph = dgl.to_homogeneous(bipartite_graph)
graph = dgl.add_self_loop(graph)
return bipartite_graph, graph
def sample_item_batch(user_batch, bipartite_graph):
sampled_edges = dgl.sampling.sample_neighbors(
bipartite_graph, {"User": user_batch}, fanout=2
).edges(etype="ItemUser")
item_batch = sampled_edges[0]
item_batch = item_batch[torch.argsort(sampled_edges[1])]
item_batch = item_batch.reshape(-1, 2)
item_batch = item_batch.T
return item_batch
@torch.no_grad()
def inference_model(model, bipartite_graph, batch_size, hidden_dim, device):
model.eval()
item_embeddings = torch.zeros(bipartite_graph.num_nodes("Item"), hidden_dim).to(device)
for items_batch in tqdm(torch.utils.data.DataLoader(
torch.arange(bipartite_graph.num_nodes("Item")),
batch_size=batch_size,
shuffle=True
)):
item_embeddings[items_batch] = model(items_batch.to(device))
item_embeddings = normalize_embeddings(item_embeddings.cpu().numpy())
return item_embeddings |