young476 commited on
Commit
f7930af
ยท
verified ยท
1 Parent(s): 3b93580

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -60
app.py CHANGED
@@ -1,35 +1,21 @@
1
- # app.py
2
-
3
  import torch
4
  import re
 
 
5
  from konlpy.tag import Okt
6
  from transformers import AutoTokenizer, BertForSequenceClassification
7
- import gradio as gr
8
 
9
  # --- 1. ์„ค์ • ๋ฐ ์ „์ฒ˜๋ฆฌ ํ•จ์ˆ˜ ---
10
 
11
  # ๋””๋ฐ”์ด์Šค ์„ค์ •
12
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
13
 
14
- # ์ €์žฅ๋œ ๋ชจ๋ธ๊ณผ ํ† ํฌ๋‚˜์ด์ €๊ฐ€ ์žˆ๋Š” ๊ฒฝ๋กœ
15
- MODEL_DIR = './kobert_genre_classifier_archive'
 
16
 
17
- # ๋ ˆ์ด๋ธ” ๋งตํ•‘ (โ€ปโ€ปโ€ป ์ค‘์š”: ๋…ธํŠธ๋ถ์—์„œ ์‚ฌ์šฉํ•œ ๋ ˆ์ด๋ธ” ์ˆœ์„œ์— ๋งž๊ฒŒ ์‹ค์ œ ์žฅ๋ฅด๋ช…์œผ๋กœ ์ˆ˜์ •ํ•˜์„ธ์š” โ€ปโ€ปโ€ป)
18
- # ์˜ˆ์‹œ: labels_ids = {'๋Œ„์Šค': 0, '๋ฐœ๋ผ๋“œ': 1, '๋ก': 2, ...}
19
- id_to_label = {
20
- 0: '๋ก/๋ฉ”ํƒˆ', # ์‹ค์ œ ์žฅ๋ฅด ์ด๋ฆ„์œผ๋กœ ๋ณ€๊ฒฝ
21
- 1: '๋Œ„์Šค',
22
- 2: 'R&B/Soul',
23
- 3: '๋ฐœ๋ผ๋“œ',
24
- 4: '๋žฉ/ํž™ํ•ฉํ•ฉ',
25
- 5: 'ํŠธ๋กœํŠธ'
26
- }
27
- # โ†‘โ†‘โ†‘โ†‘โ†‘ ์ด ๋ถ€๋ถ„์„ ์‹ค์ œ ์žฅ๋ฅด๋ช…์œผ๋กœ ๊ผญ ์ˆ˜์ •ํ•ด์ฃผ์„ธ์š”! โ†‘โ†‘โ†‘โ†‘โ†‘
28
-
29
-
30
- # ๋…ธํŠธ๋ถ์—์„œ ์‚ฌ์šฉํ•œ ์ „์ฒ˜๋ฆฌ ํ•จ์ˆ˜ (๊ทธ๋Œ€๋กœ ๋ณต์‚ฌ)
31
  okt = Okt()
32
-
33
  def remove_english(text):
34
  return re.sub(r'[A-Za-z]+', '', text)
35
 
@@ -38,69 +24,82 @@ def extract_pos(text):
38
  text = remove_english(text)
39
  return ' '.join([word for word, pos in okt.pos(text) if pos in allowed_pos])
40
 
41
- # --- 2. ๋ชจ๋ธ ๋ฐ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ ---
42
 
43
  try:
44
- tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR, trust_remote_code=True)
45
- model = BertForSequenceClassification.from_pretrained(MODEL_DIR, trust_remote_code=True)
46
- model.to(device)
47
- model.eval()
48
- print("๋ชจ๋ธ๊ณผ ํ† ํฌ๋‚˜์ด์ €๋ฅผ ์„ฑ๊ณต์ ์œผ๋กœ ๋กœ๋“œํ–ˆ์Šต๋‹ˆ๋‹ค.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  except Exception as e:
50
  print(f"๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
51
- # Hugging Face Spaces์—์„œ๋Š” ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ ์•ฑ์ด ์‹คํ–‰๋˜์ง€ ์•Š๋„๋ก ์ฒ˜๋ฆฌ
52
- tokenizer, model = None, None
53
-
54
- # --- 3. ์˜ˆ์ธก ํ•จ์ˆ˜ ---
55
 
56
- def predict_genre(text):
57
- if not model or not tokenizer:
 
58
  raise gr.Error("๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. Space์˜ ๋กœ๊ทธ๋ฅผ ํ™•์ธํ•ด์ฃผ์„ธ์š”.")
59
 
60
- # 1. ์ž…๋ ฅ๋œ ๊ฐ€์‚ฌ ์ „์ฒ˜๋ฆฌ
61
  preprocessed_text = extract_pos(text)
62
 
63
- # 2. ํ† ํฌ๋‚˜์ด์ง•
64
- inputs = tokenizer(
65
- preprocessed_text,
66
- return_tensors='pt',
67
- truncation=True,
68
- padding='max_length',
69
- max_length=512 # ๋…ธํŠธ๋ถ์—์„œ ์„ค์ •ํ•œ MAX_LENGTH์™€ ๋™์ผํ•˜๊ฒŒ
70
  ).to(device)
 
 
 
 
71
 
72
- # 3. ์˜ˆ์ธก
 
 
 
73
  with torch.no_grad():
74
- outputs = model(**inputs)
75
- logits = outputs.logits
 
76
 
77
- # 4. ํ™•๋ฅ ๊ฐ’ ๊ณ„์‚ฐ ๋ฐ ๊ฒฐ๊ณผ ํฌ๋งคํŒ…
78
- probabilities = torch.softmax(logits, dim=1).squeeze().cpu().numpy()
79
-
80
- # Gradio์˜ Label ์ปดํฌ๋„ŒํŠธ์— ๋งž๊ฒŒ ์ถœ๋ ฅ ํ˜•์‹ ๋ณ€๊ฒฝ
81
- confidences = {id_to_label[i]: float(prob) for i, prob in enumerate(probabilities)}
82
-
83
- return confidences
84
 
85
  # --- 4. Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ ---
86
-
87
- title = "๐ŸŽค ํ•œ๊ตญ์–ด ๋…ธ๋ž˜ ๊ฐ€์‚ฌ ์žฅ๋ฅด ๋ถ„๋ฅ˜๊ธฐ ๐ŸŽถ"
88
- description = "KoBERT๋ฅผ ํŒŒ์ธํŠœ๋‹ํ•˜์—ฌ ๋งŒ๋“  ๋…ธ๋ž˜ ๊ฐ€์‚ฌ ์žฅ๋ฅด ๋ถ„๋ฅ˜ ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค. ์•„๋ž˜์— ๊ฐ€์‚ฌ๋ฅผ ์ž…๋ ฅํ•˜๊ณ  '๋ถ„๋ฅ˜ํ•˜๊ธฐ' ๋ฒ„ํŠผ์„ ๋ˆ„๋ฅด๋ฉด ์žฅ๋ฅด๋ฅผ ์˜ˆ์ธกํ•ด์ค๋‹ˆ๋‹ค."
89
  examples = [
90
- ["๋ฌด์–ผ ๋ฏฟ์€ ๊ฑธ๊นŒ ๋ถ€์กฑํ–ˆ๋˜ ๋‚ด๊ฒŒ์„œ ๋‚˜์กฐ์ฐจ ๋ชป ๋ฏฟ๋˜ ๋‚ด๊ฒŒ ์—ฌํƒœ ๋จธ๋ฌธ ์‚ฌ๋žŒ"],
91
- ["๋ฏธ์น˜๋„๋ก ์‚ฌ๋ž‘ํ–ˆ๋˜ ์ง€๊ฒน๋„๋ก ๋‹คํˆฌ์—ˆ๋˜ ๋„ค๊ฐ€ ๋จผ์ € ๋– ๋‚˜๊ณ  ์—ฌ๊ธด ์˜จ์ข…์ผ ๋น„๊ฐ€ ์™”์–ด"],
92
- ["์šฐ๋ฆฐ ๋ฉ‹์ง„ ๋‚˜์ด์•ผ ์ข€ ์–ด๋ฆฌ๊ธด ํ•˜์ง€๋งŒ ํ•˜๊ณ ํ”ˆ ์ผ์ด๋‚˜ ๊ฐ€๊ณ ํ”ˆ ๊ธธ ํ•ด์•ผ ํ•  ์ผ๋“ค๊นŒ์ง€"]
 
93
  ]
94
 
95
-
96
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
97
  iface = gr.Interface(
98
- fn=predict_genre,
99
  inputs=gr.Textbox(lines=10, placeholder="์—ฌ๊ธฐ์— ๋…ธ๋ž˜ ๊ฐ€์‚ฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”...", label="๋…ธ๋ž˜ ๊ฐ€์‚ฌ"),
100
- outputs=gr.Label(num_top_classes=3, label="์˜ˆ์ธก๋œ ์žฅ๋ฅด"),
 
 
 
 
101
  title=title,
102
  description=description,
103
  examples=examples
104
  )
105
 
 
106
  iface.launch()
 
 
 
1
  import torch
2
  import re
3
+ import json
4
+ import gradio as gr
5
  from konlpy.tag import Okt
6
  from transformers import AutoTokenizer, BertForSequenceClassification
 
7
 
8
  # --- 1. ์„ค์ • ๋ฐ ์ „์ฒ˜๋ฆฌ ํ•จ์ˆ˜ ---
9
 
10
  # ๋””๋ฐ”์ด์Šค ์„ค์ •
11
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
12
 
13
+ # ๊ฐ ๋ชจ๋ธ์ด ์ €์žฅ๋œ ๊ฒฝ๋กœ
14
+ EMOTION_MODEL_DIR = './kobert_emotion_classifier_archive'
15
+ GENRE_MODEL_DIR = './kobert_genre_classifier_archive'
16
 
17
+ # ๋…ธํŠธ๋ถ์—์„œ ์‚ฌ์šฉํ•œ ์ „์ฒ˜๋ฆฌ ํ•จ์ˆ˜ (๊ณตํ†ต ์‚ฌ์šฉ)
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  okt = Okt()
 
19
  def remove_english(text):
20
  return re.sub(r'[A-Za-z]+', '', text)
21
 
 
24
  text = remove_english(text)
25
  return ' '.join([word for word, pos in okt.pos(text) if pos in allowed_pos])
26
 
27
+ # --- 2. ๋‘ ๋ชจ๋ธ ๋ฐ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ ---
28
 
29
  try:
30
+ # ๊ฐ์ • ๋ถ„๋ฅ˜ ๋ชจ๋ธ ๋กœ๋“œ
31
+ emotion_tokenizer = AutoTokenizer.from_pretrained(EMOTION_MODEL_DIR, trust_remote_code=True)
32
+ emotion_model = BertForSequenceClassification.from_pretrained(EMOTION_MODEL_DIR, trust_remote_code=True)
33
+ emotion_model.to(device)
34
+ emotion_model.eval()
35
+ with open(f"{EMOTION_MODEL_DIR}/labels_ids.json", "r", encoding="utf-8") as f:
36
+ emotion_labels_ids = json.load(f)
37
+ id_to_emotion_label = {v: k for k, v in emotion_labels_ids.items()}
38
+ print("โœ… ๊ฐ์ • ๋ถ„๋ฅ˜ ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต")
39
+
40
+ # ์žฅ๋ฅด ๋ถ„๋ฅ˜ ๋ชจ๋ธ ๋กœ๋“œ
41
+ genre_tokenizer = AutoTokenizer.from_pretrained(GENRE_MODEL_DIR, trust_remote_code=True)
42
+ genre_model = BertForSequenceClassification.from_pretrained(GENRE_MODEL_DIR, trust_remote_code=True)
43
+ genre_model.to(device)
44
+ genre_model.eval()
45
+ # ์žฅ๋ฅด ๋ ˆ์ด๋ธ” ๋งต
46
+ id_to_genre_label = {0: '๋ก/๋ฉ”ํƒˆ', 1: '๋Œ„์Šค', 2: 'R&B/Soul', 3: '๋ฐœ๋ผ๋“œ', 4: '๋žฉ/ํž™ํ•ฉ', 5: 'ํŠธ๋กœํŠธ'}
47
+ print("โœ… ์žฅ๋ฅด ๋ถ„๋ฅ˜ ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต")
48
+
49
  except Exception as e:
50
  print(f"๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
51
+ emotion_model, genre_model = None, None
 
 
 
52
 
53
+ # --- 3. ํ†ตํ•ฉ ์˜ˆ์ธก ํ•จ์ˆ˜ ---
54
+ def predict_emotion_and_genre(text):
55
+ if not emotion_model or not genre_model:
56
  raise gr.Error("๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. Space์˜ ๋กœ๊ทธ๋ฅผ ํ™•์ธํ•ด์ฃผ์„ธ์š”.")
57
 
58
+ # 1. ์ž…๋ ฅ๋œ ๊ฐ€์‚ฌ ๊ณตํ†ต ์ „์ฒ˜๋ฆฌ
59
  preprocessed_text = extract_pos(text)
60
 
61
+ # 2. ๊ฐ์ • ์˜ˆ์ธก ์ˆ˜ํ–‰
62
+ emotion_inputs = emotion_tokenizer(
63
+ preprocessed_text, return_tensors='pt', truncation=True, padding=True, max_length=384
 
 
 
 
64
  ).to(device)
65
+ with torch.no_grad():
66
+ emotion_logits = emotion_model(**emotion_inputs).logits
67
+ emotion_probs = torch.softmax(emotion_logits, dim=1).squeeze().cpu().numpy()
68
+ emotion_confidences = {id_to_emotion_label[i]: float(prob) for i, prob in enumerate(emotion_probs)}
69
 
70
+ # 3. ์žฅ๋ฅด ์˜ˆ์ธก ์ˆ˜ํ–‰
71
+ genre_inputs = genre_tokenizer(
72
+ preprocessed_text, return_tensors='pt', truncation=True, padding=True, max_length=512
73
+ ).to(device)
74
  with torch.no_grad():
75
+ genre_logits = genre_model(**genre_inputs).logits
76
+ genre_probs = torch.softmax(genre_logits, dim=1).squeeze().cpu().numpy()
77
+ genre_confidences = {id_to_genre_label[i]: float(prob) for i, prob in enumerate(genre_probs)}
78
 
79
+ return emotion_confidences, genre_confidences
 
 
 
 
 
 
80
 
81
  # --- 4. Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ ---
82
+ title = "๐ŸŽค ํ•œ๊ตญ์–ด ๊ฐ€์‚ฌ ๊ฐ์ • ๋ฐ ์žฅ๋ฅด ๋™์‹œ ๋ถ„์„๊ธฐ ๐ŸŽถ"
83
+ description = "KoBERT๋ฅผ ํŒŒ์ธํŠœ๋‹ํ•˜์—ฌ ๋งŒ๋“  ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค. ๊ฐ€์‚ฌ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด ๊ฐ์ •๊ณผ ์žฅ๋ฅด๋ฅผ ๋™์‹œ์— ์˜ˆ์ธกํ•ฉ๋‹ˆ๋‹ค."
 
84
  examples = [
85
+ ["์Šฌํ””์˜ ๋ฐ‘๋ฐ”๋‹ฅ์—์„œ ๋‚œ ๋„ˆ๋ฅผ ๋งŒ๋‚˜"],
86
+ ["๊ฐ€์Šด์ด ์›…์žฅํ•ด์ง„๋‹ค ์ด๊ฑด ๋ชป ์ฐธ์ง€"],
87
+ ["๋„ˆ์™€ ํ•จ๊ป˜๋ผ๋ฉด ์–ด๋””๋“  ๊ฐˆ ์ˆ˜ ์žˆ์–ด"],
88
+ ["์˜ค๋Š˜ ๋ฐค ์ฃผ์ธ๊ณต์€ ๋‚˜์•ผ ๋‚˜"]
89
  ]
90
 
 
 
91
  iface = gr.Interface(
92
+ fn=predict_emotion_and_genre,
93
  inputs=gr.Textbox(lines=10, placeholder="์—ฌ๊ธฐ์— ๋…ธ๋ž˜ ๊ฐ€์‚ฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”...", label="๋…ธ๋ž˜ ๊ฐ€์‚ฌ"),
94
+ # โœ… outputs ๋ถ€๋ถ„์„ ์ˆ˜์ •ํ•˜์—ฌ ๋‘ ๊ฒฐ๊ณผ ๋ชจ๋‘ ์ƒ์œ„ 3๊ฐœ๋ฅผ ํ‘œ์‹œํ•˜๋„๋ก ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.
95
+ outputs=[
96
+ gr.Label(num_top_classes=3, label="๊ฐ์ • ์˜ˆ์ธก ๊ฒฐ๊ณผ"),
97
+ gr.Label(num_top_classes=3, label="์žฅ๋ฅด ์˜ˆ์ธก ๊ฒฐ๊ณผ")
98
+ ],
99
  title=title,
100
  description=description,
101
  examples=examples
102
  )
103
 
104
+ # ์•ฑ ์‹คํ–‰
105
  iface.launch()