try: import torch import pandas as pd import streamlit as st import re from transformers import AutoTokenizer, AutoModelForSequenceClassification from stqdm import stqdm from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig except Exception as e: print(e) # Config MODELS_PATH = "kadabengaran/distilbert-base-uncased-lora-text-classification" id2label= {0: 'Other', 1: 'Problem Discovery', 2: 'Information Seeking', 3: 'Feature Request'} label2id= {'Other': 0, 'Problem Discovery': 1, 'Information Seeking': 2, 'Feature Request': 3} numLabels= 4 def get_device(): if torch.cuda.is_available(): return torch.device('cuda') else: return torch.device('cpu') USE_CUDA = False device = get_device() if device.type == 'cuda': USE_CUDA = True # Get the Keys def get_key(val, my_dict): for key, value in my_dict.items(): if val == value: return key def load_tokenizer(model_path): # create tokenizer tokenizer = AutoTokenizer.from_pretrained(model_path, add_prefix_space=True) return tokenizer def remove_special_characters(text): # case folding text = text.lower() # menghapus karakter khusus text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text) text = re.sub(r'[0-9]', ' ', text) # replace multiple whitespace characters with a single space text = re.sub(r"\s+", " ", text) return text def load_model(): config = PeftConfig.from_pretrained(MODELS_PATH) inference_model = AutoModelForSequenceClassification.from_pretrained( config.base_model_name_or_path, num_labels=numLabels, id2label=id2label, label2id=label2id ) tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) model = PeftModel.from_pretrained(inference_model, MODELS_PATH) return model, tokenizer def classify_single(text, model, tokenizer, device): if device.type == 'cuda': model.cuda() # tokenize text inputs = tokenizer.encode(text, return_tensors="pt").to(device) # compute logits logits = model(inputs).logits # convert logits to label predictions = torch.argmax(logits) return id2label[predictions.tolist()] tab_labels = ["Single Input", "Multiple Input"] class App: def __init__(self): self.fileTypes = ["csv"] self.default_tab_selected = tab_labels[0] self.input_text = None self.csv_input = None self.csv_process = None def run(self): model, tokenizer = load_model() html_temp = """

User Question Classification

""" st.markdown(html_temp, unsafe_allow_html=True) st.markdown("") if USE_CUDA: st.sidebar.markdown(footer,unsafe_allow_html=True) self.render_single_input() st.divider() self.render_process_button(model, tokenizer, device) def render_single_input(self): self.input_text = st.text_area("Enter Text Here", placeholder="Type Here") def render_process_button(self, model, tokenizer, device): if st.button("Process"): input_text = self.input_text if input_text: classification_result = classify_single(input_text, model, tokenizer, device) st.write("Classification result:", classification_result) else: st.warning('Please enter text to process', icon="⚠️") footer=""" """ if __name__ == "__main__": app = App() app.run()