|
import torch |
|
import streamlit as st |
|
import numpy as np |
|
import pandas as pd |
|
import os |
|
import torch |
|
import torch.nn as nn |
|
from transformers import ElectraModel, AutoConfig, GPT2LMHeadModel |
|
from transformers.activations import get_activation |
|
from transformers import AutoTokenizer |
|
|
|
|
|
st.title('KoGPT2 Demo') |
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
from transformers import AutoTokenizer, AutoModelForMaskedLM |
|
artist_name = st.text_input("Model", "BigSalmon/FormalBerta") |
|
tokenizer = AutoTokenizer.from_pretrained("BigSalmon/FormalBerta") |
|
model = AutoModelForMaskedLM.from_pretrained(artist_name) |
|
|
|
|
|
|
|
with st.form(key='my_form'): |
|
prompt = st.text_area(label='Enter sentence') |
|
submit_button = st.form_submit_button(label='Submit') |
|
|
|
if submit_button: |
|
a_list = [] |
|
token_ids = tokenizer.encode(prompt, return_tensors='pt') |
|
token_ids_tk = tokenizer.tokenize(prompt, return_tensors='pt') |
|
masked_position = (token_ids.squeeze() == tokenizer.mask_token_id).nonzero() |
|
masked_pos = [mask.item() for mask in masked_position ] |
|
with torch.no_grad(): |
|
output = model(token_ids) |
|
last_hidden_state = output[0].squeeze() |
|
for mask_index in masked_pos: |
|
mask_hidden_state = last_hidden_state[mask_index] |
|
idx = torch.topk(mask_hidden_state, k=100, dim=0)[1] |
|
words = [tokenizer.decode(i.item()).strip() for i in idx] |
|
a_list.append(words) |
|
length = len(a_list) |
|
middle_index = length//2 |
|
first_half = a_list[:middle_index] |
|
second_half = a_list[middle_index:] |
|
|
|
|
|
|
|
|
|
st.write(pd.DataFrame({ |
|
... 'first column': first_half, |
|
... 'second column': second_half, |
|
... })) |