# Copyright (c) 2017-present, Facebook, Inc. # All rights reserved. # # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. # """ Pytorch Classifier class in the style of scikit-learn Classifiers include Logistic Regression and MLP """ from __future__ import absolute_import, division, unicode_literals import numpy as np import copy from senteval import utils import torch from torch import nn import torch.nn.functional as F class PyTorchClassifier(object): def __init__(self, inputdim, nclasses, l2reg=0., batch_size=64, seed=1111, cudaEfficient=False): # fix seed np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) self.inputdim = inputdim self.nclasses = nclasses self.l2reg = l2reg self.batch_size = batch_size self.cudaEfficient = cudaEfficient def prepare_split(self, X, y, validation_data=None, validation_split=None): # Preparing validation data assert validation_split or validation_data if validation_data is not None: trainX, trainy = X, y devX, devy = validation_data else: permutation = np.random.permutation(len(X)) trainidx = permutation[int(validation_split * len(X)):] devidx = permutation[0:int(validation_split * len(X))] trainX, trainy = X[trainidx], y[trainidx] devX, devy = X[devidx], y[devidx] device = torch.device('cpu') if self.cudaEfficient else torch.device('cuda') trainX = torch.from_numpy(trainX).to(device, dtype=torch.float32) trainy = torch.from_numpy(trainy).to(device, dtype=torch.int64) devX = torch.from_numpy(devX).to(device, dtype=torch.float32) devy = torch.from_numpy(devy).to(device, dtype=torch.int64) return trainX, trainy, devX, devy def fit(self, X, y, validation_data=None, validation_split=None, early_stop=True): self.nepoch = 0 bestaccuracy = -1 stop_train = False early_stop_count = 0 # Preparing validation data trainX, trainy, devX, devy = self.prepare_split(X, y, validation_data, validation_split) # Training while not stop_train and self.nepoch <= self.max_epoch: self.trainepoch(trainX, trainy, epoch_size=self.epoch_size) accuracy = self.score(devX, devy) if accuracy > bestaccuracy: bestaccuracy = accuracy bestmodel = copy.deepcopy(self.model) elif early_stop: if early_stop_count >= self.tenacity: stop_train = True early_stop_count += 1 self.model = bestmodel return bestaccuracy def trainepoch(self, X, y, epoch_size=1): self.model.train() for _ in range(self.nepoch, self.nepoch + epoch_size): permutation = np.random.permutation(len(X)) all_costs = [] for i in range(0, len(X), self.batch_size): # forward idx = torch.from_numpy(permutation[i:i + self.batch_size]).long().to(X.device) Xbatch = X[idx] ybatch = y[idx] if self.cudaEfficient: Xbatch = Xbatch.cuda() ybatch = ybatch.cuda() output = self.model(Xbatch) # loss loss = self.loss_fn(output, ybatch) all_costs.append(loss.data.item()) # backward self.optimizer.zero_grad() loss.backward() # Update parameters self.optimizer.step() self.nepoch += epoch_size def score(self, devX, devy): self.model.eval() correct = 0 if not isinstance(devX, torch.cuda.FloatTensor) or self.cudaEfficient: devX = torch.FloatTensor(devX).cuda() devy = torch.LongTensor(devy).cuda() with torch.no_grad(): for i in range(0, len(devX), self.batch_size): Xbatch = devX[i:i + self.batch_size] ybatch = devy[i:i + self.batch_size] if self.cudaEfficient: Xbatch = Xbatch.cuda() ybatch = ybatch.cuda() output = self.model(Xbatch) pred = output.data.max(1)[1] correct += pred.long().eq(ybatch.data.long()).sum().item() accuracy = 1.0 * correct / len(devX) return accuracy def predict(self, devX): self.model.eval() if not isinstance(devX, torch.cuda.FloatTensor): devX = torch.FloatTensor(devX).cuda() yhat = np.array([]) with torch.no_grad(): for i in range(0, len(devX), self.batch_size): Xbatch = devX[i:i + self.batch_size] output = self.model(Xbatch) yhat = np.append(yhat, output.data.max(1)[1].cpu().numpy()) yhat = np.vstack(yhat) return yhat def predict_proba(self, devX): self.model.eval() probas = [] with torch.no_grad(): for i in range(0, len(devX), self.batch_size): Xbatch = devX[i:i + self.batch_size] vals = F.softmax(self.model(Xbatch).data.cpu().numpy()) if not probas: probas = vals else: probas = np.concatenate(probas, vals, axis=0) return probas """ MLP with Pytorch (nhid=0 --> Logistic Regression) """ class MLP(PyTorchClassifier): def __init__(self, params, inputdim, nclasses, l2reg=0., batch_size=64, seed=1111, cudaEfficient=False): super(self.__class__, self).__init__(inputdim, nclasses, l2reg, batch_size, seed, cudaEfficient) """ PARAMETERS: -nhid: number of hidden units (0: Logistic Regression) -optim: optimizer ("sgd,lr=0.1", "adam", "rmsprop" ..) -tenacity: how many times dev acc does not increase before stopping -epoch_size: each epoch corresponds to epoch_size pass on the train set -max_epoch: max number of epoches -dropout: dropout for MLP """ self.nhid = 0 if "nhid" not in params else params["nhid"] self.optim = "adam" if "optim" not in params else params["optim"] self.tenacity = 5 if "tenacity" not in params else params["tenacity"] self.epoch_size = 4 if "epoch_size" not in params else params["epoch_size"] self.max_epoch = 200 if "max_epoch" not in params else params["max_epoch"] self.dropout = 0. if "dropout" not in params else params["dropout"] self.batch_size = 64 if "batch_size" not in params else params["batch_size"] if params["nhid"] == 0: self.model = nn.Sequential( nn.Linear(self.inputdim, self.nclasses), ).cuda() else: self.model = nn.Sequential( nn.Linear(self.inputdim, params["nhid"]), nn.Dropout(p=self.dropout), nn.Sigmoid(), nn.Linear(params["nhid"], self.nclasses), ).cuda() self.loss_fn = nn.CrossEntropyLoss().cuda() self.loss_fn.size_average = False optim_fn, optim_params = utils.get_optimizer(self.optim) self.optimizer = optim_fn(self.model.parameters(), **optim_params) self.optimizer.param_groups[0]['weight_decay'] = self.l2reg