kimhyunwoo commited on
Commit
1bd1b79
Β·
verified Β·
1 Parent(s): 1af008e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -0
app.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ # BitsAndBytesConfigλ₯Ό import ν•©λ‹ˆλ‹€.
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
+ import time
5
+ import logging
6
+ import os
7
+ import json
8
+ from datetime import datetime
9
+
10
+ # --- μ„€μ • ---
11
+ # μ‚¬μš©ν•  μ–‘μžν™”λœ λͺ¨λΈ ID둜 λ³€κ²½!
12
+ MODEL_ID = "unsloth/gemma-3-1b-it-bnb-4bit"
13
+ # CPU μ‚¬μš© (HF Spaces 무료 ν‹°μ–΄ κΈ°μ€€)
14
+ DEVICE = "cpu"
15
+ # λ©”λͺ¨λ¦¬ 파일 경둜
16
+ MEMORY_FILE = "thought_memory.json"
17
+ # 생각 μ£ΌκΈ° (초) - CPUμ—μ„œλŠ” μΆ”λ‘  μ‹œκ°„μ΄ 걸릴 수 μžˆμœΌλ―€λ‘œ 간격을 λ„‰λ„‰νžˆ λ‘‘λ‹ˆλ‹€.
18
+ THINKING_INTERVAL_SECONDS = 120 # 예: 2λΆ„λ§ˆλ‹€ 생각
19
+ # 생성할 μ΅œλŒ€ 토큰 수
20
+ MAX_NEW_TOKENS = 150
21
+ # 초기 생각 ν”„λ‘¬ν”„νŠΈ
22
+ INITIAL_PROMPT = "λ‚˜λŠ” κ³„μ†ν•΄μ„œ 슀슀둜 μƒκ°ν•˜λŠ” AIμž…λ‹ˆλ‹€. λ‚˜μ˜ 첫 번째 생각은 λ‹€μŒκ³Ό κ°™μŠ΅λ‹ˆλ‹€:"
23
+
24
+ # λ‘œκΉ… μ„€μ •
25
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
26
+
27
+ def load_memory():
28
+ """λ©”λͺ¨λ¦¬ νŒŒμΌμ—μ„œ 이전 생각 기둝을 λ‘œλ“œν•©λ‹ˆλ‹€."""
29
+ if os.path.exists(MEMORY_FILE):
30
+ try:
31
+ with open(MEMORY_FILE, 'r', encoding='utf-8') as f:
32
+ memory = json.load(f)
33
+ if not isinstance(memory, list):
34
+ logging.warning(f"{MEMORY_FILE} λ‚΄μš©μ΄ λ¦¬μŠ€νŠΈκ°€ μ•„λ‹ˆλ―€λ‘œ μ΄ˆκΈ°ν™”ν•©λ‹ˆλ‹€.")
35
+ return []
36
+ logging.info(f"{len(memory)}개의 이전 생각을 λ‘œλ“œν–ˆμŠ΅λ‹ˆλ‹€.")
37
+ return memory
38
+ except json.JSONDecodeError:
39
+ logging.error(f"{MEMORY_FILE} 파일 νŒŒμ‹± 였λ₯˜. λ©”λͺ¨λ¦¬λ₯Ό μ΄ˆκΈ°ν™”ν•©λ‹ˆλ‹€.")
40
+ return []
41
+ except Exception as e:
42
+ logging.error(f"λ©”λͺ¨λ¦¬ λ‘œλ“œ 쀑 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
43
+ return []
44
+ else:
45
+ logging.info("λ©”λͺ¨λ¦¬ 파일이 μ—†μ–΄ μƒˆλ‘œ μ‹œμž‘ν•©λ‹ˆλ‹€.")
46
+ return []
47
+
48
+ def save_memory(memory):
49
+ """ν˜„μž¬ 생각 기둝을 λ©”λͺ¨λ¦¬ νŒŒμΌμ— μ €μž₯ν•©λ‹ˆλ‹€."""
50
+ try:
51
+ with open(MEMORY_FILE, 'w', encoding='utf-8') as f:
52
+ json.dump(memory, f, ensure_ascii=False, indent=2)
53
+ logging.debug(f"λ©”λͺ¨λ¦¬λ₯Ό {MEMORY_FILE}에 μ €μž₯ν–ˆμŠ΅λ‹ˆλ‹€.")
54
+ except Exception as e:
55
+ logging.error(f"λ©”λͺ¨λ¦¬ μ €μž₯ 쀑 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
56
+
57
+ def generate_thought(tokenizer, model, prompt_history):
58
+ """μ£Όμ–΄μ§„ ν”„λ‘¬ν”„νŠΈ 기둝을 λ°”νƒ•μœΌλ‘œ λ‹€μŒ 생각을 μƒμ„±ν•©λ‹ˆλ‹€."""
59
+ if not prompt_history:
60
+ chat = [{"role": "user", "content": INITIAL_PROMPT}]
61
+ else:
62
+ last_thought = prompt_history[-1]['content']
63
+ prompt = f"이전 생각: \"{last_thought}\"\n\n이 생각을 λ°”νƒ•μœΌλ‘œ λ‹€μŒμœΌλ‘œ λ– μ˜€λ₯΄λŠ” μƒκ°μ΄λ‚˜ 질문, λ˜λŠ” ν™•μž₯된 κ°œλ…μ€ λ¬΄μ—‡μΈκ°€μš”? κ°„κ²°ν•˜κ²Œ λ‹΅ν•΄μ£Όμ„Έμš”."
64
+ chat = [{"role": "user", "content": prompt}]
65
+
66
+ prompt_formatted = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
67
+ logging.info(f"--- λͺ¨λΈ μž…λ ₯ ν”„λ‘¬ν”„νŠΈ ---\n{prompt_formatted}\n-----------------------")
68
+
69
+ inputs = tokenizer(prompt_formatted, return_tensors="pt").to(DEVICE)
70
+
71
+ # λͺ¨λΈ μΆ”λ‘  (CPUμ—μ„œλŠ” μ‹œκ°„μ΄ 걸릴 수 μžˆμŠ΅λ‹ˆλ‹€)
72
+ start_time = time.time()
73
+ logging.info("λͺ¨λΈ μΆ”λ‘  μ‹œμž‘...")
74
+ with torch.no_grad():
75
+ outputs = model.generate(
76
+ **inputs,
77
+ max_new_tokens=MAX_NEW_TOKENS,
78
+ pad_token_id=tokenizer.eos_token_id
79
+ )
80
+ end_time = time.time()
81
+ logging.info(f"λͺ¨λΈ μΆ”λ‘  μ™„λ£Œ ({end_time - start_time:.2f}초 μ†Œμš”)")
82
+
83
+ input_token_length = inputs.input_ids.shape[1]
84
+ generated_ids = outputs[0, input_token_length:]
85
+ new_thought_raw = tokenizer.decode(generated_ids, skip_special_tokens=True)
86
+
87
+ logging.info(f"λͺ¨λΈ 생성 κ²°κ³Ό (Raw): {new_thought_raw}")
88
+ return new_thought_raw.strip()
89
+
90
+ if __name__ == "__main__":
91
+ logging.info("AI 생각 ν”„λ‘œμ„ΈμŠ€ μ‹œμž‘...")
92
+ logging.info(f"λͺ¨λΈ ID: {MODEL_ID}")
93
+ logging.info(f"μ‹€ν–‰ μž₯치: {DEVICE}")
94
+
95
+ hf_token = os.getenv("HF_TOKEN")
96
+ if hf_token:
97
+ logging.info("Hugging Face 토큰을 μ‚¬μš©ν•©λ‹ˆλ‹€.")
98
+ else:
99
+ logging.info("Hugging Face 토큰이 μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€ (ν•„μš” μ‹œ Secrets에 μΆ”κ°€).")
100
+
101
+ # --- BitsAndBytes μ–‘μžν™” μ„€μ • ---
102
+ # 4λΉ„νŠΈ μ–‘μžν™”λ₯Ό μ‚¬μš©ν•˜λ„λ‘ μ„€μ •ν•©λ‹ˆλ‹€.
103
+ bnb_config = BitsAndBytesConfig(
104
+ load_in_4bit=True,
105
+ # bnb_4bit_use_double_quant=True, # 이쀑 μ–‘μžν™” (λ©”λͺ¨λ¦¬ μΆ”κ°€ μ ˆμ•½, μ•½κ°„μ˜ μ„±λŠ₯ μ €ν•˜ κ°€λŠ₯)
106
+ bnb_4bit_quant_type="nf4", # NF4 (Normal Float 4) νƒ€μž… μ‚¬μš© (일반적으둜 ꢌμž₯됨)
107
+ bnb_4bit_compute_dtype=torch.bfloat16 # 계산 μ‹œ μ‚¬μš©ν•  데이터 νƒ€μž… (CPUμ—μ„œλ„ bfloat16 지원 μ—¬λΆ€ 확인 ν•„μš”, μ•ˆλ˜λ©΄ float32)
108
+ )
109
+ # CPUμ—μ„œ bfloat16 지원 μ•ˆλ˜λ©΄ μ•„λž˜ 주석 ν•΄μ œν•˜κ³  μœ„ 라인 μ£Όμ„μ²˜λ¦¬
110
+ # bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="fp4", bnb_4bit_compute_dtype=torch.float32)
111
+
112
+ # λͺ¨λΈ 및 ν† ν¬λ‚˜μ΄μ € λ‘œλ“œ
113
+ try:
114
+ logging.info("ν† ν¬λ‚˜μ΄μ € λ‘œλ”© 쀑...")
115
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=hf_token)
116
+
117
+ logging.info("μ–‘μžν™”λœ λͺ¨λΈ λ‘œλ”© 쀑... (bitsandbytes μ„€μ • 적용)")
118
+ model = AutoModelForCausalLM.from_pretrained(
119
+ MODEL_ID,
120
+ quantization_config=bnb_config, # β˜…β˜…β˜… μ–‘μžν™” μ„€μ • 적용 β˜…β˜…β˜…
121
+ device_map=DEVICE, # λͺ…μ‹œμ μœΌλ‘œ CPU μ§€μ • (λ˜λŠ” "auto"μ§€λ§Œ CPUμ—μ„œλŠ” λͺ…μ‹œκ°€ λ‚˜μ„ 수 있음)
122
+ # torch_dtype=torch.bfloat16, # quantization_config μ—μ„œ compute_dtype 둜 섀정함
123
+ token=hf_token
124
+ )
125
+ # device_map μ‚¬μš© μ‹œ .to(DEVICE)λŠ” λΆˆν•„μš”
126
+ # model = model.to(DEVICE) # device_map 을 μ“°μ§€ μ•ŠλŠ”λ‹€λ©΄ 이 라인 μ‚¬μš©
127
+
128
+ model.eval()
129
+ logging.info("λͺ¨λΈ 및 ν† ν¬λ‚˜μ΄μ € λ‘œλ“œ μ™„λ£Œ.")
130
+
131
+ except Exception as e:
132
+ logging.error(f"λͺ¨λΈ λ‘œλ”© 쀑 치λͺ…적 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
133
+ # 였λ₯˜ λ‘œκ·Έμ— CPUκ°€ bfloat16을 μ§€μ›ν•˜μ§€ μ•ŠλŠ”λ‹€λŠ” λ©”μ‹œμ§€κ°€ 있으면,
134
+ # bnb_config의 compute_dtype을 torch.float32둜 λ³€κ²½ν•΄λ³΄μ„Έμš”.
135
+ exit(1)
136
+
137
+ thought_history = load_memory()
138
+
139
+ try:
140
+ while True:
141
+ logging.info("=== μƒˆλ‘œμš΄ 생각 사이클 μ‹œμž‘ ===")
142
+ new_thought = generate_thought(tokenizer, model, thought_history)
143
+
144
+ if new_thought:
145
+ logging.info(f"μƒμ„±λœ μƒˆλ‘œμš΄ 생각: {new_thought}")
146
+ thought_entry = {"role": "assistant", "content": new_thought, "timestamp": datetime.now().isoformat()}
147
+ thought_history.append(thought_entry)
148
+ save_memory(thought_history)
149
+ else:
150
+ logging.warning("λͺ¨λΈμ΄ 빈 생각을 μƒμ„±ν–ˆμŠ΅λ‹ˆλ‹€.")
151
+
152
+ logging.info(f"λ‹€μŒ μƒκ°κΉŒμ§€ {THINKING_INTERVAL_SECONDS}초 λŒ€κΈ°...")
153
+ time.sleep(THINKING_INTERVAL_SECONDS)
154
+
155
+ except KeyboardInterrupt:
156
+ logging.info("μ‚¬μš©μž μš”μ²­μœΌλ‘œ AI ν”„λ‘œμ„ΈμŠ€ 쀑지.")
157
+ except Exception as e:
158
+ logging.error(f"메인 λ£¨ν”„μ—μ„œ 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
159
+ finally:
160
+ logging.info("AI 생각 ν”„λ‘œμ„ΈμŠ€ μ’…λ£Œ. μ΅œμ’… λ©”λͺ¨λ¦¬ μ €μž₯ μ‹œλ„.")
161
+ save_memory(thought_history)