File size: 3,941 Bytes
039737d
 
f7930af
 
039737d
 
 
 
 
 
51a6dc6
 
0308df2
039737d
 
 
 
 
 
 
 
 
 
f7930af
039737d
983e5c2
 
 
 
 
f7930af
 
 
 
 
 
 
 
983e5c2
f7930af
 
983e5c2
bd7d325
983e5c2
 
 
 
 
bd7d325
983e5c2
 
 
f7930af
039737d
983e5c2
f7930af
039737d
983e5c2
f7930af
 
0a83c3c
039737d
 
 
983e5c2
 
f7930af
 
 
 
039737d
983e5c2
 
039737d
f7930af
 
 
039737d
f7930af
039737d
983e5c2
f7930af
 
039737d
f7930af
 
 
 
039737d
 
f7930af
039737d
f7930af
 
 
 
039737d
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import torch
import re
import json
import gradio as gr
from konlpy.tag import Okt
from transformers import AutoTokenizer, BertForSequenceClassification

# --- 1. ์„ค์ • ๋ฐ ์ „์ฒ˜๋ฆฌ ํ•จ์ˆ˜ ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

BASE_TOKENIZER_DIR = 'base'
EMOTION_MODEL_DIR = 'kobert_emotion_classifier'
GENRE_MODEL_DIR = 'kobert_genre_classifier_archive'

okt = Okt()
def remove_english(text):
    return re.sub(r'[A-Za-z]+', '', text)

def extract_pos(text):
    allowed_pos = ['Noun', 'Verb', 'Adjective']
    text = remove_english(text)
    return ' '.join([word for word, pos in okt.pos(text) if pos in allowed_pos])

# --- 2. ๋‘ ๋ชจ๋ธ ๋ฐ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ ---
try:
    tokenizer = AutoTokenizer.from_pretrained(BASE_TOKENIZER_DIR, trust_remote_code=True)
    print("โœ… ๊ณต์šฉ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ ์„ฑ๊ณต")

    # ๊ฐ์ • ๋ถ„๋ฅ˜ ๋ชจ๋ธ ๋กœ๋“œ (ํŒŒ์ผ์—์„œ ๋ ˆ์ด๋ธ” ์ฝ๊ธฐ)
    emotion_model = BertForSequenceClassification.from_pretrained(EMOTION_MODEL_DIR)
    emotion_model.to(device)
    emotion_model.eval()
    with open(f"{EMOTION_MODEL_DIR}/labels_ids.json", "r", encoding="utf-8") as f:
        emotion_labels_ids = json.load(f)
    id_to_emotion_label = {v: k for k, v in emotion_labels_ids.items()}
    print("โœ… ๊ฐ์ • ๋ถ„๋ฅ˜ ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต")

    # ์žฅ๋ฅด ๋ถ„๋ฅ˜ ๋ชจ๋ธ ๋กœ๋“œ
    genre_model = BertForSequenceClassification.from_pretrained(GENRE_MODEL_DIR)
    genre_model.to(device)
    genre_model.eval()
    
  
    id_to_genre_label = {
        0: '๋ก/๋ฉ”ํƒˆ',
        1: '๋Œ„์Šค',
        2: 'R&B/Soul',
        3: '๋ฐœ๋ผ๋“œ',
        4: '๋žฉ/ํž™ํ•ฉ', 
        5: 'ํŠธ๋กœํŠธ'
    }
    print("โœ… ์žฅ๋ฅด ๋ถ„๋ฅ˜ ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต (๋ ˆ์ด๋ธ” ์ง์ ‘ ์ •์˜)")

except Exception as e:
    print(f"๋ชจ๋ธ ๋˜๋Š” ํ† ํฌ๋‚˜์ด์ € ๋กœ๋”ฉ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
    emotion_model, genre_model = None, None

# --- 3. ํ†ตํ•ฉ ์˜ˆ์ธก ํ•จ์ˆ˜ (์ดํ•˜ ๋™์ผ) ---
def predict_emotion_and_genre(text):
    if not emotion_model or not genre_model:
        raise gr.Error("๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. Space์˜ ๋กœ๊ทธ๋ฅผ ํ™•์ธํ•ด์ฃผ์„ธ์š”.")

    preprocessed_text = extract_pos(text)

    # ๊ฐ์ • ์˜ˆ์ธก
    emotion_inputs = tokenizer(preprocessed_text, return_tensors='pt', truncation=True, padding=True, max_length=384).to(device)
    with torch.no_grad():
        emotion_logits = emotion_model(**emotion_inputs).logits
    emotion_probs = torch.softmax(emotion_logits, dim=1).squeeze().cpu().numpy()
    emotion_confidences = {id_to_emotion_label[i]: float(prob) for i, prob in enumerate(emotion_probs)}

    # ์žฅ๋ฅด ์˜ˆ์ธก
    genre_inputs = tokenizer(preprocessed_text, return_tensors='pt', truncation=True, padding=True, max_length=512).to(device)
    with torch.no_grad():
        genre_logits = genre_model(**genre_inputs).logits
    genre_probs = torch.softmax(genre_logits, dim=1).squeeze().cpu().numpy()
    genre_confidences = {id_to_genre_label[i]: float(prob) for i, prob in enumerate(genre_probs)}

    return emotion_confidences, genre_confidences

# --- 4. Gradio ์ธํ„ฐํŽ˜์ด์Šค (์ดํ•˜ ๋™์ผ) ---
title = "๐ŸŽค ํ•œ๊ตญ์–ด ๊ฐ€์‚ฌ ๊ฐ์ • ๋ฐ ์žฅ๋ฅด ๋™์‹œ ๋ถ„์„๊ธฐ ๐ŸŽถ"
description = "KoBERT๋ฅผ ํŒŒ์ธํŠœ๋‹ํ•˜์—ฌ ๋งŒ๋“  ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค. ๊ฐ€์‚ฌ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด ๊ฐ์ •๊ณผ ์žฅ๋ฅด๋ฅผ ๋™์‹œ์— ์˜ˆ์ธกํ•ฉ๋‹ˆ๋‹ค."
examples = [
    ["์Šฌํ””์˜ ๋ฐ‘๋ฐ”๋‹ฅ์—์„œ ๋‚œ ๋„ˆ๋ฅผ ๋งŒ๋‚˜"],
    ["๊ฐ€์Šด์ด ์›…์žฅํ•ด์ง„๋‹ค ์ด๊ฑด ๋ชป ์ฐธ์ง€"],
    ["๋„ˆ์™€ ํ•จ๊ป˜๋ผ๋ฉด ์–ด๋””๋“  ๊ฐˆ ์ˆ˜ ์žˆ์–ด"],
    ["์˜ค๋Š˜ ๋ฐค ์ฃผ์ธ๊ณต์€ ๋‚˜์•ผ ๋‚˜"]
]
iface = gr.Interface(
    fn=predict_emotion_and_genre,
    inputs=gr.Textbox(lines=10, placeholder="์—ฌ๊ธฐ์— ๋…ธ๋ž˜ ๊ฐ€์‚ฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”...", label="๋…ธ๋ž˜ ๊ฐ€์‚ฌ"),
    outputs=[
        gr.Label(num_top_classes=3, label="๊ฐ์ • ์˜ˆ์ธก ๊ฒฐ๊ณผ"),
        gr.Label(num_top_classes=3, label="์žฅ๋ฅด ์˜ˆ์ธก ๊ฒฐ๊ณผ")
    ],
    title=title,
    description=description,
    examples=examples
)

iface.launch()