kadabengaran's picture
Create main.py
5ca6171
try:
import torch
import pandas as pd
import streamlit as st
import re
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from stqdm import stqdm
from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
except Exception as e:
print(e)
# Config
MODELS_PATH = "kadabengaran/distilbert-base-uncased-lora-text-classification"
id2label= {0: 'Other', 1: 'Problem Discovery', 2: 'Information Seeking', 3: 'Feature Request'}
label2id= {'Other': 0, 'Problem Discovery': 1, 'Information Seeking': 2, 'Feature Request': 3}
numLabels= 4
def get_device():
if torch.cuda.is_available():
return torch.device('cuda')
else:
return torch.device('cpu')
USE_CUDA = False
device = get_device()
if device.type == 'cuda':
USE_CUDA = True
# Get the Keys
def get_key(val, my_dict):
for key, value in my_dict.items():
if val == value:
return key
def load_tokenizer(model_path):
# create tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, add_prefix_space=True)
return tokenizer
def remove_special_characters(text):
# case folding
text = text.lower()
# menghapus karakter khusus
text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
text = re.sub(r'[0-9]', ' ', text)
# replace multiple whitespace characters with a single space
text = re.sub(r"\s+", " ", text)
return text
def load_model():
config = PeftConfig.from_pretrained(MODELS_PATH)
inference_model = AutoModelForSequenceClassification.from_pretrained(
config.base_model_name_or_path, num_labels=numLabels, id2label=id2label, label2id=label2id
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(inference_model, MODELS_PATH)
return model, tokenizer
def classify_single(text, model, tokenizer, device):
if device.type == 'cuda':
model.cuda()
# tokenize text
inputs = tokenizer.encode(text, return_tensors="pt").to(device)
# compute logits
logits = model(inputs).logits
# convert logits to label
predictions = torch.argmax(logits)
return id2label[predictions.tolist()]
tab_labels = ["Single Input", "Multiple Input"]
class App:
def __init__(self):
self.fileTypes = ["csv"]
self.default_tab_selected = tab_labels[0]
self.input_text = None
self.csv_input = None
self.csv_process = None
def run(self):
model, tokenizer = load_model()
html_temp = """
<div style="padding:10px">
<h1 style="color:white;text-align:center;">User Question Classification</h1>
</div>
"""
st.markdown(html_temp, unsafe_allow_html=True)
st.markdown("")
if USE_CUDA:
st.sidebar.markdown(footer,unsafe_allow_html=True)
self.render_single_input()
st.divider()
self.render_process_button(model, tokenizer, device)
def render_single_input(self):
self.input_text = st.text_area("Enter Text Here", placeholder="Type Here")
def render_process_button(self, model, tokenizer, device):
if st.button("Process"):
input_text = self.input_text
if input_text:
classification_result = classify_single(input_text, model, tokenizer, device)
st.write("Classification result:", classification_result)
else:
st.warning('Please enter text to process', icon="⚠️")
footer="""<style>
.footer {
position: fixed;
left: 10;
bottom: 0;
width: 100%;
color: #ffa9365e;
}
</style>
<div class="footer">
<p>CUDA enabled</p>
</div>
"""
if __name__ == "__main__":
app = App()
app.run()