|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
Semantic Relatedness (supervised) with Pytorch |
|
""" |
|
from __future__ import absolute_import, division, unicode_literals |
|
|
|
import copy |
|
import numpy as np |
|
|
|
import torch |
|
from torch import nn |
|
import torch.optim as optim |
|
|
|
from scipy.stats import pearsonr, spearmanr |
|
|
|
|
|
class RelatednessPytorch(object): |
|
|
|
def __init__(self, train, valid, test, devscores, config): |
|
|
|
np.random.seed(config['seed']) |
|
torch.manual_seed(config['seed']) |
|
assert torch.cuda.is_available(), 'torch.cuda required for Relatedness' |
|
torch.cuda.manual_seed(config['seed']) |
|
|
|
self.train = train |
|
self.valid = valid |
|
self.test = test |
|
self.devscores = devscores |
|
|
|
self.inputdim = train['X'].shape[1] |
|
self.nclasses = config['nclasses'] |
|
self.seed = config['seed'] |
|
self.l2reg = 0. |
|
self.batch_size = 64 |
|
self.maxepoch = 1000 |
|
self.early_stop = True |
|
|
|
self.model = nn.Sequential( |
|
nn.Linear(self.inputdim, self.nclasses), |
|
nn.Softmax(dim=-1), |
|
) |
|
self.loss_fn = nn.MSELoss() |
|
|
|
if torch.cuda.is_available(): |
|
self.model = self.model.cuda() |
|
self.loss_fn = self.loss_fn.cuda() |
|
|
|
self.loss_fn.size_average = False |
|
self.optimizer = optim.Adam(self.model.parameters(), |
|
weight_decay=self.l2reg) |
|
|
|
def prepare_data(self, trainX, trainy, devX, devy, testX, testy): |
|
|
|
trainX = torch.from_numpy(trainX).float().cuda() |
|
trainy = torch.from_numpy(trainy).float().cuda() |
|
devX = torch.from_numpy(devX).float().cuda() |
|
devy = torch.from_numpy(devy).float().cuda() |
|
testX = torch.from_numpy(testX).float().cuda() |
|
testY = torch.from_numpy(testy).float().cuda() |
|
|
|
return trainX, trainy, devX, devy, testX, testy |
|
|
|
def run(self): |
|
self.nepoch = 0 |
|
bestpr = -1 |
|
early_stop_count = 0 |
|
r = np.arange(1, 6) |
|
stop_train = False |
|
|
|
|
|
trainX, trainy, devX, devy, testX, testy = self.prepare_data( |
|
self.train['X'], self.train['y'], |
|
self.valid['X'], self.valid['y'], |
|
self.test['X'], self.test['y']) |
|
|
|
|
|
while not stop_train and self.nepoch <= self.maxepoch: |
|
self.trainepoch(trainX, trainy, nepoches=50) |
|
yhat = np.dot(self.predict_proba(devX), r) |
|
pr = spearmanr(yhat, self.devscores)[0] |
|
pr = 0 if pr != pr else pr |
|
|
|
if pr > bestpr: |
|
bestpr = pr |
|
bestmodel = copy.deepcopy(self.model) |
|
elif self.early_stop: |
|
if early_stop_count >= 3: |
|
stop_train = True |
|
early_stop_count += 1 |
|
self.model = bestmodel |
|
|
|
yhat = np.dot(self.predict_proba(testX), r) |
|
|
|
return bestpr, yhat |
|
|
|
def trainepoch(self, X, y, nepoches=1): |
|
self.model.train() |
|
for _ in range(self.nepoch, self.nepoch + nepoches): |
|
permutation = np.random.permutation(len(X)) |
|
all_costs = [] |
|
for i in range(0, len(X), self.batch_size): |
|
|
|
idx = torch.from_numpy(permutation[i:i + self.batch_size]).long().cuda() |
|
Xbatch = X[idx] |
|
ybatch = y[idx] |
|
output = self.model(Xbatch) |
|
|
|
loss = self.loss_fn(output, ybatch) |
|
all_costs.append(loss.item()) |
|
|
|
self.optimizer.zero_grad() |
|
loss.backward() |
|
|
|
self.optimizer.step() |
|
self.nepoch += nepoches |
|
|
|
def predict_proba(self, devX): |
|
self.model.eval() |
|
probas = [] |
|
with torch.no_grad(): |
|
for i in range(0, len(devX), self.batch_size): |
|
Xbatch = devX[i:i + self.batch_size] |
|
if len(probas) == 0: |
|
probas = self.model(Xbatch).data.cpu().numpy() |
|
else: |
|
probas = np.concatenate((probas, self.model(Xbatch).data.cpu().numpy()), axis=0) |
|
return probas |
|
|