|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import absolute_import, division, unicode_literals |
|
|
|
import numpy as np |
|
import re |
|
import inspect |
|
from torch import optim |
|
|
|
|
|
def create_dictionary(sentences): |
|
words = {} |
|
for s in sentences: |
|
for word in s: |
|
if word in words: |
|
words[word] += 1 |
|
else: |
|
words[word] = 1 |
|
words['<s>'] = 1e9 + 4 |
|
words['</s>'] = 1e9 + 3 |
|
words['<p>'] = 1e9 + 2 |
|
|
|
sorted_words = sorted(words.items(), key=lambda x: -x[1]) |
|
id2word = [] |
|
word2id = {} |
|
for i, (w, _) in enumerate(sorted_words): |
|
id2word.append(w) |
|
word2id[w] = i |
|
|
|
return id2word, word2id |
|
|
|
|
|
def cosine(u, v): |
|
return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v)) |
|
|
|
|
|
class dotdict(dict): |
|
""" dot.notation access to dictionary attributes """ |
|
__getattr__ = dict.get |
|
__setattr__ = dict.__setitem__ |
|
__delattr__ = dict.__delitem__ |
|
|
|
|
|
def get_optimizer(s): |
|
""" |
|
Parse optimizer parameters. |
|
Input should be of the form: |
|
- "sgd,lr=0.01" |
|
- "adagrad,lr=0.1,lr_decay=0.05" |
|
""" |
|
if "," in s: |
|
method = s[:s.find(',')] |
|
optim_params = {} |
|
for x in s[s.find(',') + 1:].split(','): |
|
split = x.split('=') |
|
assert len(split) == 2 |
|
assert re.match("^[+-]?(\d+(\.\d*)?|\.\d+)$", split[1]) is not None |
|
optim_params[split[0]] = float(split[1]) |
|
else: |
|
method = s |
|
optim_params = {} |
|
|
|
if method == 'adadelta': |
|
optim_fn = optim.Adadelta |
|
elif method == 'adagrad': |
|
optim_fn = optim.Adagrad |
|
elif method == 'adam': |
|
optim_fn = optim.Adam |
|
elif method == 'adamax': |
|
optim_fn = optim.Adamax |
|
elif method == 'asgd': |
|
optim_fn = optim.ASGD |
|
elif method == 'rmsprop': |
|
optim_fn = optim.RMSprop |
|
elif method == 'rprop': |
|
optim_fn = optim.Rprop |
|
elif method == 'sgd': |
|
optim_fn = optim.SGD |
|
assert 'lr' in optim_params |
|
else: |
|
raise Exception('Unknown optimization method: "%s"' % method) |
|
|
|
|
|
expected_args = inspect.getargspec(optim_fn.__init__)[0] |
|
assert expected_args[:2] == ['self', 'params'] |
|
if not all(k in expected_args[2:] for k in optim_params.keys()): |
|
raise Exception('Unexpected parameters: expected "%s", got "%s"' % ( |
|
str(expected_args[2:]), str(optim_params.keys()))) |
|
|
|
return optim_fn, optim_params |
|
|