try: import torch import pandas as pd import streamlit as st import re from transformers import AutoTokenizer, AutoModelForSequenceClassification from stqdm import stqdm from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig except Exception as e: print(e) # Config MODELS_PATH = "kadabengaran/distilbert-base-uncased-lora-text-classification" id2label= {0: 'Other', 1: 'Problem Discovery', 2: 'Information Seeking', 3: 'Feature Request'} label2id= {'Other': 0, 'Problem Discovery': 1, 'Information Seeking': 2, 'Feature Request': 3} numLabels= 4 def get_device(): if torch.cuda.is_available(): return torch.device('cuda') else: return torch.device('cpu') USE_CUDA = False device = get_device() if device.type == 'cuda': USE_CUDA = True # Get the Keys def get_key(val, my_dict): for key, value in my_dict.items(): if val == value: return key def load_tokenizer(model_path): # create tokenizer tokenizer = AutoTokenizer.from_pretrained(model_path, add_prefix_space=True) return tokenizer def remove_special_characters(text): # case folding text = text.lower() # menghapus karakter khusus text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text) text = re.sub(r'[0-9]', ' ', text) # replace multiple whitespace characters with a single space text = re.sub(r"\s+", " ", text) return text def load_model(): config = PeftConfig.from_pretrained(MODELS_PATH) inference_model = AutoModelForSequenceClassification.from_pretrained( config.base_model_name_or_path, num_labels=numLabels, id2label=id2label, label2id=label2id ) tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) model = PeftModel.from_pretrained(inference_model, MODELS_PATH) return model, tokenizer def classify_single(text, model, tokenizer, device): if device.type == 'cuda': model.cuda() # tokenize text inputs = tokenizer.encode(text, return_tensors="pt").to(device) # compute logits logits = model(inputs).logits # convert logits to label predictions = torch.argmax(logits) return id2label[predictions.tolist()] tab_labels = ["Single Input", "Multiple Input"] class App: def __init__(self): self.fileTypes = ["csv"] self.default_tab_selected = tab_labels[0] self.input_text = None self.csv_input = None self.csv_process = None def run(self): model, tokenizer = load_model() html_temp = """