File size: 12,307 Bytes
3d30435 abc60e9 3d30435 016ba45 3d30435 016ba45 3d30435 39728bb 016ba45 55daf79 0b8c8b5 3d30435 016ba45 0b8c8b5 016ba45 0b8c8b5 016ba45 0b8c8b5 55daf79 acfdca9 016ba45 0b8c8b5 016ba45 0b8c8b5 016ba45 0b8c8b5 016ba45 0b8c8b5 016ba45 0b8c8b5 016ba45 0b8c8b5 016ba45 0b8c8b5 016ba45 0b8c8b5 016ba45 0b8c8b5 016ba45 0b8c8b5 016ba45 39728bb 016ba45 39728bb 6e0e022 39728bb 0b8c8b5 39728bb 2a276b1 39728bb 0b8c8b5 39728bb 0b8c8b5 39728bb 0b8c8b5 016ba45 55daf79 0b8c8b5 39728bb 3d30435 39728bb 2a276b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 |
import os, torch, threading, uvicorn, time, traceback, zipfile, random, json, shutil, asyncio, re
from fastapi import FastAPI
from fastapi.responses import HTMLResponse, JSONResponse
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, Trainer, TrainingArguments, default_data_collator, AutoConfig
from peft import PeftModel
from datasets import Dataset
from datetime import datetime
# === Ortam
HF_TOKEN = os.getenv("HF_TOKEN")
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
os.environ["TORCH_HOME"] = "/app/.torch_cache"
os.makedirs("/app/.torch_cache", exist_ok=True)
# === Ayarlar
MODEL_BASE = "TURKCELL/Turkcell-LLM-7b-v1"
USE_FINE_TUNE = False
FINE_TUNE_REPO = "UcsTurkey/trained-zips"
FINE_TUNE_ZIP = "trained_model_000_009.zip"
USE_SAMPLING = False
INTENT_CONFIDENCE_THRESHOLD = 0.5
LLM_CONFIDENCE_THRESHOLD = 0.2
TRAIN_CONFIDENCE_THRESHOLD = 0.7
FALLBACK_ANSWERS = [
"Bu konuda maalesef bilgim yok.",
"Ne demek istediğinizi tam anlayamadım.",
"Bu soruya şu an yanıt veremiyorum."
]
INTENT_MODEL_PATH = "intent_model"
INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
INTENT_MODEL = None
INTENT_TOKENIZER = None
LABEL2ID = {}
INTENT_DEFINITIONS = {}
# === FastAPI
app = FastAPI()
chat_history = []
model = None
tokenizer = None
eos_token_id = None
class Message(BaseModel):
user_input: str
class TrainInput(BaseModel):
intents: list
@app.get("/")
def health():
return {"status": "ok"}
@app.get("/start", response_class=HTMLResponse)
def root():
return """
<html><body>
<h2>Turkcell LLM Chat</h2>
<textarea id='input' rows='4' cols='60'></textarea><br>
<button onclick='send()'>Gönder</button><br><br>
<label>Model Cevabı:</label><br>
<textarea id='output' rows='10' cols='80' readonly style='white-space: pre-wrap;'></textarea>
<script>
async function send() {
const input = document.getElementById("input").value;
const res = await fetch('/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ user_input: input })
});
const data = await res.json();
document.getElementById('output').value = data.answer || data.response || data.error || 'Hata oluştu.';
}
</script>
</body></html>
"""
@app.post("/train_intents", status_code=202)
def train_intents(train_input: TrainInput):
global INTENT_DEFINITIONS
log("📥 POST /train_intents çağrıldı.")
intents = train_input.intents
INTENT_DEFINITIONS = {intent["name"]: intent for intent in intents}
threading.Thread(target=lambda: background_training(intents), daemon=True).start()
return {"status": "accepted", "message": "Intent eğitimi arka planda başlatıldı."}
def background_training(intents):
try:
log("🔧 Intent eğitimi başlatıldı...")
texts, labels, label2id = [], [], {}
for idx, intent in enumerate(intents):
label2id[intent["name"]] = idx
for ex in intent["examples"]:
texts.append(ex)
labels.append(idx)
dataset = Dataset.from_dict({"text": texts, "label": labels})
tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
config = AutoConfig.from_pretrained(INTENT_MODEL_ID)
config.problem_type = "single_label_classification"
config.num_labels = len(label2id)
model = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID, config=config)
tokenized_data = {"input_ids": [], "attention_mask": [], "label": []}
for row in dataset:
out = tokenizer(row["text"], truncation=True, padding="max_length", max_length=128)
tokenized_data["input_ids"].append(out["input_ids"])
tokenized_data["attention_mask"].append(out["attention_mask"])
tokenized_data["label"].append(row["label"])
tokenized = Dataset.from_dict(tokenized_data)
tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
output_dir = "/app/intent_train_output"
os.makedirs(output_dir, exist_ok=True)
trainer = Trainer(
model=model,
args=TrainingArguments(output_dir, per_device_train_batch_size=4, num_train_epochs=3, logging_steps=10, save_strategy="no", report_to=[]),
train_dataset=tokenized,
data_collator=default_data_collator
)
trainer.train()
# ✅ Başarı raporu üret
log("🔧 Başarı raporu üretiliyor...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
input_ids_tensor = tokenized["input_ids"].to(device)
attention_mask_tensor = tokenized["attention_mask"].to(device)
with torch.no_grad():
outputs = model(input_ids=input_ids_tensor, attention_mask=attention_mask_tensor)
predictions = outputs.logits.argmax(dim=-1).tolist()
actuals = tokenized["label"]
counts = {}
correct = {}
for pred, actual in zip(predictions, actuals):
intent = list(label2id.keys())[list(label2id.values()).index(actual)]
counts[intent] = counts.get(intent, 0) + 1
if pred == actual:
correct[intent] = correct.get(intent, 0) + 1
for intent, total in counts.items():
accuracy = correct.get(intent, 0) / total
log(f"📊 Intent '{intent}' doğruluk: {accuracy:.2f} — {total} örnek")
if accuracy < TRAIN_CONFIDENCE_THRESHOLD or total < 5:
log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}")
log("📦 Intent modeli eğitimi kaydediliyor...")
if os.path.exists(INTENT_MODEL_PATH):
shutil.rmtree(INTENT_MODEL_PATH)
model.save_pretrained(INTENT_MODEL_PATH)
tokenizer.save_pretrained(INTENT_MODEL_PATH)
with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f:
json.dump(label2id, f)
log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
except Exception as e:
log(f"❌ Intent eğitimi hatası: {e}")
traceback.print_exc()
@app.post("/load_intent_model")
def load_intent_model():
global INTENT_MODEL, INTENT_TOKENIZER, LABEL2ID
try:
INTENT_TOKENIZER = AutoTokenizer.from_pretrained(INTENT_MODEL_PATH)
INTENT_MODEL = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_PATH)
with open(os.path.join(INTENT_MODEL_PATH, "label2id.json")) as f:
LABEL2ID = json.load(f)
return {"status": "ok", "message": "Intent modeli yüklendi."}
except Exception as e:
return JSONResponse(content={"error": str(e)}, status_code=500)
async def detect_intent(text):
inputs = INTENT_TOKENIZER(text, return_tensors="pt")
outputs = INTENT_MODEL(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
confidence, pred_id = torch.max(probs, dim=-1)
id2label = {v: k for k, v in LABEL2ID.items()}
return id2label[pred_id.item()], confidence.item()
async def generate_response(text):
messages = [{"role": "user", "content": text}]
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
eos_token = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
input_ids = encodeds.to(model.device)
attention_mask = (input_ids != tokenizer.pad_token_id).long()
with torch.no_grad():
output = model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
max_new_tokens=128,
do_sample=USE_SAMPLING,
eos_token_id=eos_token,
pad_token_id=tokenizer.pad_token_id,
return_dict_in_generate=True,
output_scores=True
)
if not USE_SAMPLING:
scores = torch.stack(output.scores, dim=1)
probs = torch.nn.functional.softmax(scores[0], dim=-1)
top_conf = probs.max().item()
else:
top_conf = None
decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
for tag in ["assistant", "<|im_start|>assistant"]:
start = decoded.find(tag)
if start != -1:
decoded = decoded[start + len(tag):].strip()
break
return decoded, top_conf
def extract_parameters(variables_list, user_input):
for pattern in variables_list:
regex = re.sub(r"(\w+):\{(.+?)\}", r"(?P<\1>.+?)", pattern)
match = re.match(regex, user_input)
if match:
return [{"key": k, "value": v} for k, v in match.groupdict().items()]
return []
def execute_intent(intent_name, user_input):
if intent_name in INTENT_DEFINITIONS:
definition = INTENT_DEFINITIONS[intent_name]
variables = extract_parameters(definition.get("variables", []), user_input)
log(f"🚀 execute_intent('{intent_name}', {variables})")
return {"intent": intent_name, "parameters": variables}
return {"intent": intent_name, "parameters": []}
@app.post("/chat")
async def chat(msg: Message):
user_input = msg.user_input.strip()
try:
if model is None or tokenizer is None:
return {"error": "Model yüklenmedi."}
if INTENT_MODEL:
intent_task = asyncio.create_task(detect_intent(user_input))
response_task = asyncio.create_task(generate_response(user_input))
intent, intent_conf = await intent_task
log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})")
if intent_conf > INTENT_CONFIDENCE_THRESHOLD and intent in INTENT_DEFINITIONS:
result = execute_intent(intent, user_input)
return result
else:
response, response_conf = await response_task
if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
return {"response": random.choice(FALLBACK_ANSWERS)}
return {"response": response}
else:
response, response_conf = await generate_response(user_input)
if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
return {"response": random.choice(FALLBACK_ANSWERS)}
return {"response": response}
except Exception as e:
traceback.print_exc()
return JSONResponse(content={"error": str(e)}, status_code=500)
def log(message):
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] {message}", flush=True)
def setup_model():
global model, tokenizer, eos_token_id
try:
log("🧠 setup_model() başladı")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
log(f"📡 Kullanılan cihaz: {device}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
log("📦 Tokenizer yüklendi. Ana model indiriliyor...")
model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=torch.float32).to(device)
log("📦 Ana model indirildi ve yüklendi. eval() çağırılıyor...")
tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id
eos_token_id = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
model.eval()
log("✅ Ana model eval() çağrıldı")
log(f"📦 Intent modeli indiriliyor: {INTENT_MODEL_ID}")
_ = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
_ = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID)
log("✅ Intent modeli önbelleğe alındı.")
log("✔️ Model başarıyla yüklendi ve sohbet için hazır.")
except Exception as e:
log(f"❌ setup_model() hatası: {e}")
traceback.print_exc()
threading.Thread(target=setup_model, daemon=True).start()
threading.Thread(target=lambda: uvicorn.run(app, host="0.0.0.0", port=7860), daemon=True).start()
while True:
time.sleep(60)
|