Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,14 @@
|
|
| 1 |
import re
|
| 2 |
import threading
|
| 3 |
import gc
|
|
|
|
| 4 |
import torch
|
| 5 |
|
| 6 |
import gradio as gr
|
| 7 |
import spaces
|
| 8 |
import transformers
|
| 9 |
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
|
|
|
|
| 10 |
|
| 11 |
# λͺ¨λΈ λ©λͺ¨λ¦¬ κ΄λ¦¬ λ° μ΅μ νλ₯Ό μν μ€μ
|
| 12 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -28,6 +30,17 @@ available_models = {
|
|
| 28 |
pipe = None
|
| 29 |
current_model_name = None
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
# μ΅μ’
λ΅λ³μ κ°μ§νκΈ° μν λ§μ»€
|
| 32 |
ANSWER_MARKER = "**λ΅λ³**"
|
| 33 |
|
|
@@ -143,6 +156,14 @@ def load_model(model_names):
|
|
| 143 |
|
| 144 |
# λͺ¨λΈ λ‘λ (ν¬κΈ°μ λ°λΌ μ΅μ νλ μ€μ μ μ©)
|
| 145 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
# BF16 μ λ°λ μ¬μ© (A100μ μ΅μ ν)
|
| 147 |
if config["quantization"]:
|
| 148 |
# μμν μ μ©
|
|
@@ -159,9 +180,9 @@ def load_model(model_names):
|
|
| 159 |
torch_dtype=DTYPE,
|
| 160 |
quantization_config=quantization_config if config["quantization"] else None,
|
| 161 |
offload_folder="offload" if config["offload"] else None,
|
| 162 |
-
|
| 163 |
)
|
| 164 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name,
|
| 165 |
|
| 166 |
pipe = pipeline(
|
| 167 |
"text-generation",
|
|
@@ -177,7 +198,7 @@ def load_model(model_names):
|
|
| 177 |
model=model_name,
|
| 178 |
device_map="auto",
|
| 179 |
torch_dtype=DTYPE,
|
| 180 |
-
|
| 181 |
)
|
| 182 |
|
| 183 |
current_model_name = model_name
|
|
@@ -414,5 +435,12 @@ if __name__ == "__main__":
|
|
| 414 |
print(f"νμ¬ GPU: {torch.cuda.current_device()}")
|
| 415 |
print(f"GPU μ΄λ¦: {torch.cuda.get_device_name(0)}")
|
| 416 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
# ν μ¬μ© λ° μ± μ€ν
|
| 418 |
demo.queue(max_size=10).launch()
|
|
|
|
| 1 |
import re
|
| 2 |
import threading
|
| 3 |
import gc
|
| 4 |
+
import os
|
| 5 |
import torch
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
import spaces
|
| 9 |
import transformers
|
| 10 |
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
|
| 11 |
+
from huggingface_hub import login
|
| 12 |
|
| 13 |
# λͺ¨λΈ λ©λͺ¨λ¦¬ κ΄λ¦¬ λ° μ΅μ νλ₯Ό μν μ€μ
|
| 14 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 30 |
pipe = None
|
| 31 |
current_model_name = None
|
| 32 |
|
| 33 |
+
# Hugging Face ν ν°μΌλ‘ λ‘κ·ΈμΈ μλ
|
| 34 |
+
try:
|
| 35 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 36 |
+
if hf_token:
|
| 37 |
+
login(token=hf_token)
|
| 38 |
+
print("Hugging Faceμ μ±κ³΅μ μΌλ‘ λ‘κ·ΈμΈνμ΅λλ€.")
|
| 39 |
+
else:
|
| 40 |
+
print("κ²½κ³ : HF_TOKEN νκ²½ λ³μκ° μ€μ λμ§ μμμ΅λλ€.")
|
| 41 |
+
except Exception as e:
|
| 42 |
+
print(f"Hugging Face λ‘κ·ΈμΈ μλ¬: {str(e)}")
|
| 43 |
+
|
| 44 |
# μ΅μ’
λ΅λ³μ κ°μ§νκΈ° μν λ§μ»€
|
| 45 |
ANSWER_MARKER = "**λ΅λ³**"
|
| 46 |
|
|
|
|
| 156 |
|
| 157 |
# λͺ¨λΈ λ‘λ (ν¬κΈ°μ λ°λΌ μ΅μ νλ μ€μ μ μ©)
|
| 158 |
try:
|
| 159 |
+
# HF_TOKEN νκ²½ λ³μ νμΈ
|
| 160 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 161 |
+
# κ³΅ν΅ λ§€κ°λ³μ
|
| 162 |
+
common_params = {
|
| 163 |
+
"token": hf_token, # μ κ·Ό μ ν λͺ¨λΈμ μν ν ν°
|
| 164 |
+
"trust_remote_code": True,
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
# BF16 μ λ°λ μ¬μ© (A100μ μ΅μ ν)
|
| 168 |
if config["quantization"]:
|
| 169 |
# μμν μ μ©
|
|
|
|
| 180 |
torch_dtype=DTYPE,
|
| 181 |
quantization_config=quantization_config if config["quantization"] else None,
|
| 182 |
offload_folder="offload" if config["offload"] else None,
|
| 183 |
+
**common_params
|
| 184 |
)
|
| 185 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, **common_params)
|
| 186 |
|
| 187 |
pipe = pipeline(
|
| 188 |
"text-generation",
|
|
|
|
| 198 |
model=model_name,
|
| 199 |
device_map="auto",
|
| 200 |
torch_dtype=DTYPE,
|
| 201 |
+
**common_params
|
| 202 |
)
|
| 203 |
|
| 204 |
current_model_name = model_name
|
|
|
|
| 435 |
print(f"νμ¬ GPU: {torch.cuda.current_device()}")
|
| 436 |
print(f"GPU μ΄λ¦: {torch.cuda.get_device_name(0)}")
|
| 437 |
|
| 438 |
+
# HF_TOKEN νκ²½ λ³μ νμΈ
|
| 439 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 440 |
+
if hf_token:
|
| 441 |
+
print("HF_TOKEN νκ²½ λ³μκ° μ€μ λμ΄ μμ΅λλ€.")
|
| 442 |
+
else:
|
| 443 |
+
print("κ²½κ³ : HF_TOKEN νκ²½ λ³μκ° μ€μ λμ§ μμμ΅λλ€. μ νλ λͺ¨λΈμ μ κ·Όν μ μμ΅λλ€.")
|
| 444 |
+
|
| 445 |
# ν μ¬μ© λ° μ± μ€ν
|
| 446 |
demo.queue(max_size=10).launch()
|