Spaces:
Runtime error
Runtime error
try: | |
import torch | |
import pandas as pd | |
import streamlit as st | |
import re | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
from stqdm import stqdm | |
from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig | |
except Exception as e: | |
print(e) | |
# Config | |
MODELS_PATH = "kadabengaran/distilbert-base-uncased-lora-text-classification" | |
id2label= {0: 'Other', 1: 'Problem Discovery', 2: 'Information Seeking', 3: 'Feature Request'} | |
label2id= {'Other': 0, 'Problem Discovery': 1, 'Information Seeking': 2, 'Feature Request': 3} | |
numLabels= 4 | |
def get_device(): | |
if torch.cuda.is_available(): | |
return torch.device('cuda') | |
else: | |
return torch.device('cpu') | |
USE_CUDA = False | |
device = get_device() | |
if device.type == 'cuda': | |
USE_CUDA = True | |
# Get the Keys | |
def get_key(val, my_dict): | |
for key, value in my_dict.items(): | |
if val == value: | |
return key | |
def load_tokenizer(model_path): | |
# create tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_path, add_prefix_space=True) | |
return tokenizer | |
def remove_special_characters(text): | |
# case folding | |
text = text.lower() | |
# menghapus karakter khusus | |
text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text) | |
text = re.sub(r'[0-9]', ' ', text) | |
# replace multiple whitespace characters with a single space | |
text = re.sub(r"\s+", " ", text) | |
return text | |
def load_model(): | |
config = PeftConfig.from_pretrained(MODELS_PATH) | |
inference_model = AutoModelForSequenceClassification.from_pretrained( | |
config.base_model_name_or_path, num_labels=numLabels, id2label=id2label, label2id=label2id | |
) | |
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) | |
model = PeftModel.from_pretrained(inference_model, MODELS_PATH) | |
return model, tokenizer | |
def classify_single(text, model, tokenizer, device): | |
if device.type == 'cuda': | |
model.cuda() | |
# tokenize text | |
inputs = tokenizer.encode(text, return_tensors="pt").to(device) | |
# compute logits | |
logits = model(inputs).logits | |
# convert logits to label | |
predictions = torch.argmax(logits) | |
return id2label[predictions.tolist()] | |
tab_labels = ["Single Input", "Multiple Input"] | |
class App: | |
def __init__(self): | |
self.fileTypes = ["csv"] | |
self.default_tab_selected = tab_labels[0] | |
self.input_text = None | |
self.csv_input = None | |
self.csv_process = None | |
def run(self): | |
model, tokenizer = load_model() | |
html_temp = """ | |
<div style="padding:10px"> | |
<h1 style="color:white;text-align:center;">User Question Classification</h1> | |
</div> | |
""" | |
st.markdown(html_temp, unsafe_allow_html=True) | |
st.markdown("") | |
if USE_CUDA: | |
st.sidebar.markdown(footer,unsafe_allow_html=True) | |
self.render_single_input() | |
st.divider() | |
self.render_process_button(model, tokenizer, device) | |
def render_single_input(self): | |
self.input_text = st.text_area("Enter Text Here", placeholder="Type Here") | |
def render_process_button(self, model, tokenizer, device): | |
if st.button("Process"): | |
input_text = self.input_text | |
if input_text: | |
classification_result = classify_single(input_text, model, tokenizer, device) | |
st.write("Classification result:", classification_result) | |
else: | |
st.warning('Please enter text to process', icon="⚠️") | |
footer="""<style> | |
.footer { | |
position: fixed; | |
left: 10; | |
bottom: 0; | |
width: 100%; | |
color: #ffa9365e; | |
} | |
</style> | |
<div class="footer"> | |
<p>CUDA enabled</p> | |
</div> | |
""" | |
if __name__ == "__main__": | |
app = App() | |
app.run() |