|
|
import os
|
|
|
import torch
|
|
|
import pandas as pd
|
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
|
|
|
|
|
|
|
|
|
class PredictSentiment:
|
|
|
def __init__(self,
|
|
|
model_name='alwanrahmana/sentiment-classification-deberta-base',
|
|
|
label_map=None,
|
|
|
device=None):
|
|
|
|
|
|
local_dir = f"./app/inference/pretrained_models/{model_name.replace('/', '_')}"
|
|
|
|
|
|
|
|
|
if os.path.exists(local_dir) and os.listdir(local_dir):
|
|
|
print(f"✅ Local model found at {local_dir}. Loading from local...")
|
|
|
config = AutoConfig.from_pretrained(local_dir)
|
|
|
self.tokenizer = AutoTokenizer.from_pretrained(local_dir)
|
|
|
self.model = AutoModelForSequenceClassification.from_pretrained(local_dir, config=config)
|
|
|
else:
|
|
|
print(f"⬇️ Downloading model from Hugging Face Hub: {model_name} ...")
|
|
|
config = AutoConfig.from_pretrained(model_name)
|
|
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
self.model = AutoModelForSequenceClassification.from_pretrained(model_name, config=config)
|
|
|
|
|
|
os.makedirs(local_dir, exist_ok=True)
|
|
|
self.tokenizer.save_pretrained(local_dir)
|
|
|
self.model.save_pretrained(local_dir)
|
|
|
print(f"✅ Model saved to {local_dir}")
|
|
|
|
|
|
self.model.eval()
|
|
|
|
|
|
|
|
|
self.DEVICE = device if device else ("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
self.model.to(self.DEVICE)
|
|
|
|
|
|
|
|
|
self.label_map = label_map if label_map else {0: "Negative", 1: "Neutral", 2: "Positive"}
|
|
|
|
|
|
def _predict_sentiment(self, aspect, text, max_len=256):
|
|
|
encoding = self.tokenizer(
|
|
|
aspect,
|
|
|
text,
|
|
|
truncation=True,
|
|
|
padding='max_length',
|
|
|
max_length=max_len,
|
|
|
return_tensors='pt'
|
|
|
)
|
|
|
|
|
|
input_ids = encoding['input_ids'].to(self.DEVICE)
|
|
|
attention_mask = encoding['attention_mask'].to(self.DEVICE)
|
|
|
token_type_ids = encoding.get('token_type_ids', None)
|
|
|
if token_type_ids is not None:
|
|
|
token_type_ids = token_type_ids.to(self.DEVICE)
|
|
|
|
|
|
with torch.no_grad():
|
|
|
outputs = self.model(
|
|
|
input_ids=input_ids,
|
|
|
attention_mask=attention_mask,
|
|
|
token_type_ids=token_type_ids
|
|
|
)
|
|
|
logits = outputs.logits
|
|
|
pred = torch.argmax(logits, dim=1).item()
|
|
|
|
|
|
return self.label_map.get(pred, "Unknown")
|
|
|
|
|
|
def predict(self, df):
|
|
|
"""
|
|
|
Input: DataFrame hasil PredictAspect
|
|
|
Output: DataFrame dengan kolom text, aspect, predicted_sentiment
|
|
|
"""
|
|
|
results = []
|
|
|
|
|
|
for idx, row in df.iterrows():
|
|
|
text = row['text']
|
|
|
aspects = row['aspects']
|
|
|
|
|
|
for aspect in aspects:
|
|
|
sentiment = self._predict_sentiment(aspect, text)
|
|
|
results.append({
|
|
|
'text': text,
|
|
|
'aspect': aspect,
|
|
|
'predicted_sentiment': sentiment
|
|
|
})
|
|
|
|
|
|
result_df = pd.DataFrame(results)
|
|
|
return result_df
|
|
|
|