|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
Pytorch Classifier class in the style of scikit-learn |
|
Classifiers include Logistic Regression and MLP |
|
""" |
|
|
|
from __future__ import absolute_import, division, unicode_literals |
|
|
|
import numpy as np |
|
import copy |
|
from senteval import utils |
|
|
|
import torch |
|
from torch import nn |
|
import torch.nn.functional as F |
|
|
|
|
|
class PyTorchClassifier(object): |
|
def __init__(self, inputdim, nclasses, l2reg=0., batch_size=64, seed=1111, |
|
cudaEfficient=False): |
|
|
|
np.random.seed(seed) |
|
torch.manual_seed(seed) |
|
torch.cuda.manual_seed(seed) |
|
|
|
self.inputdim = inputdim |
|
self.nclasses = nclasses |
|
self.l2reg = l2reg |
|
self.batch_size = batch_size |
|
self.cudaEfficient = cudaEfficient |
|
|
|
def prepare_split(self, X, y, validation_data=None, validation_split=None): |
|
|
|
assert validation_split or validation_data |
|
if validation_data is not None: |
|
trainX, trainy = X, y |
|
devX, devy = validation_data |
|
else: |
|
permutation = np.random.permutation(len(X)) |
|
trainidx = permutation[int(validation_split * len(X)):] |
|
devidx = permutation[0:int(validation_split * len(X))] |
|
trainX, trainy = X[trainidx], y[trainidx] |
|
devX, devy = X[devidx], y[devidx] |
|
|
|
device = torch.device('cpu') if self.cudaEfficient else torch.device('cuda') |
|
|
|
trainX = torch.from_numpy(trainX).to(device, dtype=torch.float32) |
|
trainy = torch.from_numpy(trainy).to(device, dtype=torch.int64) |
|
devX = torch.from_numpy(devX).to(device, dtype=torch.float32) |
|
devy = torch.from_numpy(devy).to(device, dtype=torch.int64) |
|
|
|
return trainX, trainy, devX, devy |
|
|
|
def fit(self, X, y, validation_data=None, validation_split=None, |
|
early_stop=True): |
|
self.nepoch = 0 |
|
bestaccuracy = -1 |
|
stop_train = False |
|
early_stop_count = 0 |
|
|
|
|
|
trainX, trainy, devX, devy = self.prepare_split(X, y, validation_data, |
|
validation_split) |
|
|
|
|
|
while not stop_train and self.nepoch <= self.max_epoch: |
|
self.trainepoch(trainX, trainy, epoch_size=self.epoch_size) |
|
accuracy = self.score(devX, devy) |
|
if accuracy > bestaccuracy: |
|
bestaccuracy = accuracy |
|
bestmodel = copy.deepcopy(self.model) |
|
elif early_stop: |
|
if early_stop_count >= self.tenacity: |
|
stop_train = True |
|
early_stop_count += 1 |
|
self.model = bestmodel |
|
return bestaccuracy |
|
|
|
def trainepoch(self, X, y, epoch_size=1): |
|
self.model.train() |
|
for _ in range(self.nepoch, self.nepoch + epoch_size): |
|
permutation = np.random.permutation(len(X)) |
|
all_costs = [] |
|
for i in range(0, len(X), self.batch_size): |
|
|
|
idx = torch.from_numpy(permutation[i:i + self.batch_size]).long().to(X.device) |
|
|
|
Xbatch = X[idx] |
|
ybatch = y[idx] |
|
|
|
if self.cudaEfficient: |
|
Xbatch = Xbatch.cuda() |
|
ybatch = ybatch.cuda() |
|
output = self.model(Xbatch) |
|
|
|
loss = self.loss_fn(output, ybatch) |
|
all_costs.append(loss.data.item()) |
|
|
|
self.optimizer.zero_grad() |
|
loss.backward() |
|
|
|
self.optimizer.step() |
|
self.nepoch += epoch_size |
|
|
|
def score(self, devX, devy): |
|
self.model.eval() |
|
correct = 0 |
|
if not isinstance(devX, torch.cuda.FloatTensor) or self.cudaEfficient: |
|
devX = torch.FloatTensor(devX).cuda() |
|
devy = torch.LongTensor(devy).cuda() |
|
with torch.no_grad(): |
|
for i in range(0, len(devX), self.batch_size): |
|
Xbatch = devX[i:i + self.batch_size] |
|
ybatch = devy[i:i + self.batch_size] |
|
if self.cudaEfficient: |
|
Xbatch = Xbatch.cuda() |
|
ybatch = ybatch.cuda() |
|
output = self.model(Xbatch) |
|
pred = output.data.max(1)[1] |
|
correct += pred.long().eq(ybatch.data.long()).sum().item() |
|
accuracy = 1.0 * correct / len(devX) |
|
return accuracy |
|
|
|
def predict(self, devX): |
|
self.model.eval() |
|
if not isinstance(devX, torch.cuda.FloatTensor): |
|
devX = torch.FloatTensor(devX).cuda() |
|
yhat = np.array([]) |
|
with torch.no_grad(): |
|
for i in range(0, len(devX), self.batch_size): |
|
Xbatch = devX[i:i + self.batch_size] |
|
output = self.model(Xbatch) |
|
yhat = np.append(yhat, |
|
output.data.max(1)[1].cpu().numpy()) |
|
yhat = np.vstack(yhat) |
|
return yhat |
|
|
|
def predict_proba(self, devX): |
|
self.model.eval() |
|
probas = [] |
|
with torch.no_grad(): |
|
for i in range(0, len(devX), self.batch_size): |
|
Xbatch = devX[i:i + self.batch_size] |
|
vals = F.softmax(self.model(Xbatch).data.cpu().numpy()) |
|
if not probas: |
|
probas = vals |
|
else: |
|
probas = np.concatenate(probas, vals, axis=0) |
|
return probas |
|
|
|
|
|
""" |
|
MLP with Pytorch (nhid=0 --> Logistic Regression) |
|
""" |
|
|
|
class MLP(PyTorchClassifier): |
|
def __init__(self, params, inputdim, nclasses, l2reg=0., batch_size=64, |
|
seed=1111, cudaEfficient=False): |
|
super(self.__class__, self).__init__(inputdim, nclasses, l2reg, |
|
batch_size, seed, cudaEfficient) |
|
""" |
|
PARAMETERS: |
|
-nhid: number of hidden units (0: Logistic Regression) |
|
-optim: optimizer ("sgd,lr=0.1", "adam", "rmsprop" ..) |
|
-tenacity: how many times dev acc does not increase before stopping |
|
-epoch_size: each epoch corresponds to epoch_size pass on the train set |
|
-max_epoch: max number of epoches |
|
-dropout: dropout for MLP |
|
""" |
|
|
|
self.nhid = 0 if "nhid" not in params else params["nhid"] |
|
self.optim = "adam" if "optim" not in params else params["optim"] |
|
self.tenacity = 5 if "tenacity" not in params else params["tenacity"] |
|
self.epoch_size = 4 if "epoch_size" not in params else params["epoch_size"] |
|
self.max_epoch = 200 if "max_epoch" not in params else params["max_epoch"] |
|
self.dropout = 0. if "dropout" not in params else params["dropout"] |
|
self.batch_size = 64 if "batch_size" not in params else params["batch_size"] |
|
|
|
if params["nhid"] == 0: |
|
self.model = nn.Sequential( |
|
nn.Linear(self.inputdim, self.nclasses), |
|
).cuda() |
|
else: |
|
self.model = nn.Sequential( |
|
nn.Linear(self.inputdim, params["nhid"]), |
|
nn.Dropout(p=self.dropout), |
|
nn.Sigmoid(), |
|
nn.Linear(params["nhid"], self.nclasses), |
|
).cuda() |
|
|
|
self.loss_fn = nn.CrossEntropyLoss().cuda() |
|
self.loss_fn.size_average = False |
|
|
|
optim_fn, optim_params = utils.get_optimizer(self.optim) |
|
self.optimizer = optim_fn(self.model.parameters(), **optim_params) |
|
self.optimizer.param_groups[0]['weight_decay'] = self.l2reg |
|
|