File size: 1,569 Bytes
0b9dd86
036190a
 
 
 
 
 
 
 
 
 
 
 
 
 
6566ac0
 
e129d30
036190a
4af56cd
44fc0ac
036190a
4af56cd
036190a
 
 
d1bb134
036190a
fde12d7
036190a
 
 
 
 
 
 
 
 
8ab5779
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import torch
import streamlit as st
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
from transformers import ElectraModel, AutoConfig, GPT2LMHeadModel
from transformers.activations import get_activation
from transformers import AutoTokenizer


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

from transformers import AutoTokenizer, AutoModelForMaskedLM
artist_name = st.text_input("Model", "roberta-base")
tokenizer = AutoTokenizer.from_pretrained("roberta-base")
model = AutoModelForMaskedLM.from_pretrained(artist_name)

first = "Boston is a <mask> place to live."

with st.form(key='my_form'):
    prompt = st.text_area(label='Enter Text. Put <mask> where you want the model to fill in the blank. You can use more than one at a time.', value=first)
    submit_button = st.form_submit_button(label='Submit')

    if submit_button:
      a_list = []
      token_ids = tokenizer.encode(prompt, return_tensors='pt')
      token_ids_tk = tokenizer.tokenize(prompt, return_tensors='pt')
      masked_position = (token_ids.squeeze() == tokenizer.mask_token_id).nonzero()
      masked_pos = [mask.item() for mask in masked_position ]
      with torch.no_grad():
        output = model(token_ids)
      last_hidden_state = output[0].squeeze()
      for mask_index in masked_pos:
        mask_hidden_state = last_hidden_state[mask_index]
        idx = torch.topk(mask_hidden_state, k=100, dim=0)[1]
        words = [tokenizer.decode(i.item()).strip() for i in idx]
        st.text_area(label = 'Infill:', value=words)