import torch import streamlit as st import numpy as np import pandas as pd import os import torch import torch.nn as nn from transformers import ElectraModel, AutoConfig, GPT2LMHeadModel from transformers.activations import get_activation from transformers import AutoTokenizer st.title('KoGPT2 Demo') device = torch.device("cuda" if torch.cuda.is_available() else "cpu") from transformers import AutoTokenizer, AutoModelForMaskedLM artist_name = st.text_input("Model", "BigSalmon/FormalBerta") tokenizer = AutoTokenizer.from_pretrained("BigSalmon/FormalBerta") model = AutoModelForMaskedLM.from_pretrained(artist_name) with st.form(key='my_form'): prompt = st.text_area(label='Enter sentence') submit_button = st.form_submit_button(label='Submit') if submit_button: a_list = [] token_ids = tokenizer.encode(prompt, return_tensors='pt') token_ids_tk = tokenizer.tokenize(prompt, return_tensors='pt') masked_position = (token_ids.squeeze() == tokenizer.mask_token_id).nonzero() masked_pos = [mask.item() for mask in masked_position ] with torch.no_grad(): output = model(token_ids) last_hidden_state = output[0].squeeze() for mask_index in masked_pos: mask_hidden_state = last_hidden_state[mask_index] idx = torch.topk(mask_hidden_state, k=100, dim=0)[1] words = [tokenizer.decode(i.item()).strip() for i in idx] a_list.append(words) length = len(a_list) middle_index = length//2 first_half = a_list[:middle_index] second_half = a_list[middle_index:] #st.write(words) #col1= st.columns(1) #with col1: #st.write(words) st.write(pd.DataFrame({ 'first column': first_half, 'second column': second_half, }))