Simon Salmon commited on
Commit
036190a
·
1 Parent(s): a324b66

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -0
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import os
5
+ import torch
6
+ import torch.nn as nn
7
+ from transformers import ElectraModel, AutoConfig, GPT2LMHeadModel
8
+ from transformers.activations import get_activation
9
+ from transformers import AutoTokenizer
10
+
11
+
12
+ st.title('KoGPT2 Demo')
13
+
14
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
+
16
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
17
+ tokenizer = AutoTokenizer.from_pretrained("BigSalmon/FormalBerta")
18
+ model = AutoModelForMaskedLM.from_pretrained("BigSalmon/FormalBerta")
19
+
20
+ with st.form(key='my_form'):
21
+ prompt = st.text_area(label='Enter sentence')
22
+ submit_button = st.form_submit_button(label='Submit')
23
+
24
+ if submit_button:
25
+ token_ids = tokenizer.encode(prompt, return_tensors='pt')
26
+ token_ids_tk = tokenizer.tokenize(sentence, return_tensors='pt')
27
+ masked_position = (token_ids.squeeze() == tokenizer.mask_token_id).nonzero()
28
+ masked_pos = [mask.item() for mask in masked_position ]
29
+ with torch.no_grad():
30
+ output = model(token_ids)
31
+ last_hidden_state = output[0].squeeze()
32
+ for mask_index in masked_pos:
33
+ mask_hidden_state = last_hidden_state[mask_index]
34
+ idx = torch.topk(mask_hidden_state, k=100, dim=0)[1]
35
+ words = [tokenizer.decode(i.item()).strip() for i in idx]
36
+ st.write(words)