Spaces:
Runtime error
Runtime error
File size: 6,670 Bytes
1bd1b79 ca4c2d6 1bd1b79 ca4c2d6 1bd1b79 ca4c2d6 1bd1b79 ca4c2d6 1bd1b79 ca4c2d6 1bd1b79 ca4c2d6 1bd1b79 ca4c2d6 1bd1b79 ca4c2d6 1bd1b79 ca4c2d6 1bd1b79 ca4c2d6 1bd1b79 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import time
import logging
import os
import json
from datetime import datetime
# --- μ€μ ---
# β
β
β
λͺ¨λΈ κ°μ€μΉμ ν ν¬λμ΄μ ID λΆλ¦¬ β
β
β
MODEL_ID_FOR_WEIGHTS = "unsloth/gemma-3-1b-it-bnb-4bit" # λͺ¨λΈ κ°μ€μΉλ μ¬κΈ°μ λ‘λ
TOKENIZER_ID = "google/gemma-3-1b-it" # ν ν¬λμ΄μ λ μλ³Έ μ¬κΈ°μ λ‘λ
# CPU μ¬μ© (HF Spaces λ¬΄λ£ ν°μ΄ κΈ°μ€)
DEVICE = "cpu"
# λ©λͺ¨λ¦¬ νμΌ κ²½λ‘
MEMORY_FILE = "thought_memory.json"
# μκ° μ£ΌκΈ° (μ΄)
THINKING_INTERVAL_SECONDS = 120 # μ: 2λΆλ§λ€ μκ°
# μμ±ν μ΅λ ν ν° μ
MAX_NEW_TOKENS = 150
# μ΄κΈ° μκ° ν둬ννΈ
INITIAL_PROMPT = "λλ κ³μν΄μ μ€μ€λ‘ μκ°νλ AIμ
λλ€. λμ 첫 λ²μ§Έ μκ°μ λ€μκ³Ό κ°μ΅λλ€:"
# λ‘κΉ
μ€μ
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# (load_memory, save_memory ν¨μλ μ΄μ κ³Ό λμΌ)
def load_memory():
"""λ©λͺ¨λ¦¬ νμΌμμ μ΄μ μκ° κΈ°λ‘μ λ‘λν©λλ€."""
if os.path.exists(MEMORY_FILE):
try:
with open(MEMORY_FILE, 'r', encoding='utf-8') as f:
memory = json.load(f)
if not isinstance(memory, list):
logging.warning(f"{MEMORY_FILE} λ΄μ©μ΄ 리μ€νΈκ° μλλ―λ‘ μ΄κΈ°νν©λλ€.")
return []
logging.info(f"{len(memory)}κ°μ μ΄μ μκ°μ λ‘λνμ΅λλ€.")
return memory
except json.JSONDecodeError:
logging.error(f"{MEMORY_FILE} νμΌ νμ± μ€λ₯. λ©λͺ¨λ¦¬λ₯Ό μ΄κΈ°νν©λλ€.")
return []
except Exception as e:
logging.error(f"λ©λͺ¨λ¦¬ λ‘λ μ€ μ€λ₯ λ°μ: {e}", exc_info=True)
return []
else:
logging.info("λ©λͺ¨λ¦¬ νμΌμ΄ μμ΄ μλ‘ μμν©λλ€.")
return []
def save_memory(memory):
"""νμ¬ μκ° κΈ°λ‘μ λ©λͺ¨λ¦¬ νμΌμ μ μ₯ν©λλ€."""
try:
with open(MEMORY_FILE, 'w', encoding='utf-8') as f:
json.dump(memory, f, ensure_ascii=False, indent=2)
logging.debug(f"λ©λͺ¨λ¦¬λ₯Ό {MEMORY_FILE}μ μ μ₯νμ΅λλ€.")
except Exception as e:
logging.error(f"λ©λͺ¨λ¦¬ μ μ₯ μ€ μ€λ₯ λ°μ: {e}", exc_info=True)
def generate_thought(tokenizer, model, prompt_history):
"""μ£Όμ΄μ§ ν둬ννΈ κΈ°λ‘μ λ°νμΌλ‘ λ€μ μκ°μ μμ±ν©λλ€."""
if not prompt_history:
chat = [{"role": "user", "content": INITIAL_PROMPT}]
else:
last_thought = prompt_history[-1]['content']
prompt = f"μ΄μ μκ°: \"{last_thought}\"\n\nμ΄ μκ°μ λ°νμΌλ‘ λ€μμΌλ‘ λ μ€λ₯΄λ μκ°μ΄λ μ§λ¬Έ, λλ νμ₯λ κ°λ
μ 무μμΈκ°μ? κ°κ²°νκ² λ΅ν΄μ£ΌμΈμ."
chat = [{"role": "user", "content": prompt}]
prompt_formatted = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
logging.info(f"--- λͺ¨λΈ μ
λ ₯ ν둬ννΈ ---\n{prompt_formatted}\n-----------------------")
inputs = tokenizer(prompt_formatted, return_tensors="pt").to(DEVICE)
start_time = time.time()
logging.info("λͺ¨λΈ μΆλ‘ μμ...")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=MAX_NEW_TOKENS,
pad_token_id=tokenizer.eos_token_id
)
end_time = time.time()
logging.info(f"λͺ¨λΈ μΆλ‘ μλ£ ({end_time - start_time:.2f}μ΄ μμ)")
input_token_length = inputs.input_ids.shape[1]
generated_ids = outputs[0, input_token_length:]
new_thought_raw = tokenizer.decode(generated_ids, skip_special_tokens=True)
logging.info(f"λͺ¨λΈ μμ± κ²°κ³Ό (Raw): {new_thought_raw}")
return new_thought_raw.strip()
if __name__ == "__main__":
logging.info("AI μκ° νλ‘μΈμ€ μμ...")
logging.info(f"Tokenizer ID: {TOKENIZER_ID}")
logging.info(f"Model Weights ID: {MODEL_ID_FOR_WEIGHTS}")
logging.info(f"μ€ν μ₯μΉ: {DEVICE}")
hf_token = os.getenv("HF_TOKEN")
if hf_token:
logging.info("Hugging Face ν ν°μ μ¬μ©ν©λλ€.")
else:
logging.info("Hugging Face ν ν°μ΄ μ€μ λμ§ μμμ΅λλ€ (νμ μ Secretsμ μΆκ°).")
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16 # CPU μ§μ μλλ©΄ float32λ‘ λ³κ²½
# bnb_4bit_compute_dtype=torch.float32 # bfloat16 λ¬Έμ μ μ΄ λΌμΈ μ¬μ©
)
try:
logging.info("ν ν¬λμ΄μ λ‘λ© μ€...")
# β
β
β
ν ν¬λμ΄μ λ‘λ© μ μλ³Έ ID μ¬μ© β
β
β
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_ID, token=hf_token)
logging.info("μμνλ λͺ¨λΈ λ‘λ© μ€... (bitsandbytes μ€μ μ μ©)")
# β
β
β
λͺ¨λΈ κ°μ€μΉ λ‘λ© μ μμν λͺ¨λΈ ID μ¬μ© β
β
β
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID_FOR_WEIGHTS,
quantization_config=bnb_config,
device_map=DEVICE,
token=hf_token
)
model.eval()
logging.info("λͺ¨λΈ λ° ν ν¬λμ΄μ λ‘λ μλ£.")
except Exception as e:
logging.error(f"λͺ¨λΈ λλ ν ν¬λμ΄μ λ‘λ© μ€ μΉλͺ
μ μ€λ₯ λ°μ: {e}", exc_info=True)
# CPUκ° bfloat16 μ§μνμ§ μμΌλ©΄ μ¬κΈ°μ μ€λ₯ λ°μ κ°λ₯
exit(1)
thought_history = load_memory()
try:
while True:
logging.info("=== μλ‘μ΄ μκ° μ¬μ΄ν΄ μμ ===")
new_thought = generate_thought(tokenizer, model, thought_history)
if new_thought:
logging.info(f"μμ±λ μλ‘μ΄ μκ°: {new_thought}")
thought_entry = {"role": "assistant", "content": new_thought, "timestamp": datetime.now().isoformat()}
thought_history.append(thought_entry)
save_memory(thought_history)
else:
logging.warning("λͺ¨λΈμ΄ λΉ μκ°μ μμ±νμ΅λλ€.")
logging.info(f"λ€μ μκ°κΉμ§ {THINKING_INTERVAL_SECONDS}μ΄ λκΈ°...")
time.sleep(THINKING_INTERVAL_SECONDS)
except KeyboardInterrupt:
logging.info("μ¬μ©μ μμ²μΌλ‘ AI νλ‘μΈμ€ μ€μ§.")
except Exception as e:
logging.error(f"λ©μΈ 루νμμ μ€λ₯ λ°μ: {e}", exc_info=True)
finally:
logging.info("AI μκ° νλ‘μΈμ€ μ’
λ£. μ΅μ’
λ©λͺ¨λ¦¬ μ μ₯ μλ.")
save_memory(thought_history) |