BigSalmon commited on
Commit
60659f5
·
1 Parent(s): 74d993b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import streamlit as st
4
+ st.title("Paraphrase")
5
+
6
+ @st.cache(allow_output_mutation=True)
7
+ def get_model():
8
+ tokenizer = AutoTokenizer.from_pretrained("chinhon/headline_writer")
9
+ model = AutoModelForSeq2SeqLM.from_pretrained("chinhon/headline_writer")
10
+ return model, tokenizer
11
+
12
+ model, tokenizer = get_model()
13
+
14
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
+ model = model.to(device)
16
+ temp = st.sidebar.slider("Temperature", 0.7, 1.5)
17
+ number_of_outputs = st.sidebar.slider("Number of Outputs", 1, 10)
18
+
19
+ def translate_to_english(model, tokenizer, text):
20
+ translated_text = []
21
+ text = text + " </s>"
22
+ encoding = tokenizer.encode_plus(text,pad_to_max_length=True, return_tensors="pt")
23
+ input_ids, attention_masks = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)
24
+ beam_outputs = model.generate(
25
+ input_ids=input_ids, attention_mask=attention_masks,
26
+ do_sample=True,
27
+ max_length=256,
28
+ temperature = temp,
29
+ top_k=120,
30
+ top_p=0.98,
31
+ early_stopping=True,
32
+ num_return_sequences=number_of_outputs,
33
+ )
34
+ for beam_output in beam_outputs:
35
+ sent = tokenizer.decode(beam_output, skip_special_tokens=True,clean_up_tokenization_spaces=True)
36
+ print(sent)
37
+ translated_text.append(sent)
38
+ return translated_text
39
+
40
+ text = st.text_input("Okay")
41
+ st.text("What you wrote: ")
42
+ st.write(text)
43
+ st.text("Output: ")
44
+ if text:
45
+ translated_text = translate_to_english(model, tokenizer, text)
46
+ st.write(translated_text if translated_text else "No translation found")