MASKK / app.py
Simon Salmon
Update app.py
d1bb134
raw
history blame
1.8 kB
import torch
import streamlit as st
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
from transformers import ElectraModel, AutoConfig, GPT2LMHeadModel
from transformers.activations import get_activation
from transformers import AutoTokenizer
st.title('KoGPT2 Demo')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from transformers import AutoTokenizer, AutoModelForMaskedLM
artist_name = st.text_input("Model", "BigSalmon/FormalBerta")
tokenizer = AutoTokenizer.from_pretrained("BigSalmon/FormalBerta")
model = AutoModelForMaskedLM.from_pretrained(artist_name)
with st.form(key='my_form'):
prompt = st.text_area(label='Enter sentence')
submit_button = st.form_submit_button(label='Submit')
if submit_button:
a_list = []
token_ids = tokenizer.encode(prompt, return_tensors='pt')
token_ids_tk = tokenizer.tokenize(prompt, return_tensors='pt')
masked_position = (token_ids.squeeze() == tokenizer.mask_token_id).nonzero()
masked_pos = [mask.item() for mask in masked_position ]
with torch.no_grad():
output = model(token_ids)
last_hidden_state = output[0].squeeze()
for mask_index in masked_pos:
mask_hidden_state = last_hidden_state[mask_index]
idx = torch.topk(mask_hidden_state, k=100, dim=0)[1]
words = [tokenizer.decode(i.item()).strip() for i in idx]
a_list.append(words)
length = len(a_list)
middle_index = length//2
first_half = a_list[:middle_index]
second_half = a_list[middle_index:]
#st.write(words)
#col1= st.columns(1)
#with col1:
#st.write(words)
st.write(pd.DataFrame({
... 'first column': first_half,
... 'second column': second_half,
... }))