|
import os, torch, threading, uvicorn, time, traceback, zipfile, random, json, shutil, asyncio, re |
|
from fastapi import FastAPI |
|
from fastapi.responses import HTMLResponse, JSONResponse |
|
from pydantic import BaseModel |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, Trainer, TrainingArguments, default_data_collator, AutoConfig |
|
from peft import PeftModel |
|
from datasets import Dataset |
|
from datetime import datetime |
|
|
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true" |
|
os.environ["TORCH_HOME"] = "/app/.torch_cache" |
|
os.makedirs("/app/.torch_cache", exist_ok=True) |
|
|
|
|
|
MODEL_BASE = "TURKCELL/Turkcell-LLM-7b-v1" |
|
USE_FINE_TUNE = False |
|
FINE_TUNE_REPO = "UcsTurkey/trained-zips" |
|
FINE_TUNE_ZIP = "trained_model_000_009.zip" |
|
USE_SAMPLING = False |
|
INTENT_CONFIDENCE_THRESHOLD = 0.5 |
|
LLM_CONFIDENCE_THRESHOLD = 0.2 |
|
TRAIN_CONFIDENCE_THRESHOLD = 0.7 |
|
FALLBACK_ANSWERS = [ |
|
"Bu konuda maalesef bilgim yok.", |
|
"Ne demek istediğinizi tam anlayamadım.", |
|
"Bu soruya şu an yanıt veremiyorum." |
|
] |
|
|
|
INTENT_MODEL_PATH = "intent_model" |
|
INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased" |
|
INTENT_MODEL = None |
|
INTENT_TOKENIZER = None |
|
LABEL2ID = {} |
|
INTENT_DEFINITIONS = {} |
|
|
|
|
|
app = FastAPI() |
|
chat_history = [] |
|
model = None |
|
tokenizer = None |
|
eos_token_id = None |
|
|
|
class Message(BaseModel): |
|
user_input: str |
|
|
|
class TrainInput(BaseModel): |
|
intents: list |
|
|
|
@app.get("/") |
|
def health(): |
|
return {"status": "ok"} |
|
|
|
@app.get("/start", response_class=HTMLResponse) |
|
def root(): |
|
return """ |
|
<html><body> |
|
<h2>Turkcell LLM Chat</h2> |
|
<textarea id='input' rows='4' cols='60'></textarea><br> |
|
<button onclick='send()'>Gönder</button><br><br> |
|
<label>Model Cevabı:</label><br> |
|
<textarea id='output' rows='10' cols='80' readonly style='white-space: pre-wrap;'></textarea> |
|
<script> |
|
async function send() { |
|
const input = document.getElementById("input").value; |
|
const res = await fetch('/chat', { |
|
method: 'POST', |
|
headers: { 'Content-Type': 'application/json' }, |
|
body: JSON.stringify({ user_input: input }) |
|
}); |
|
const data = await res.json(); |
|
document.getElementById('output').value = data.answer || data.response || data.error || 'Hata oluştu.'; |
|
} |
|
</script> |
|
</body></html> |
|
""" |
|
|
|
@app.post("/train_intents", status_code=202) |
|
def train_intents(train_input: TrainInput): |
|
global INTENT_DEFINITIONS |
|
log("📥 POST /train_intents çağrıldı.") |
|
intents = train_input.intents |
|
INTENT_DEFINITIONS = {intent["name"]: intent for intent in intents} |
|
threading.Thread(target=lambda: background_training(intents), daemon=True).start() |
|
return {"status": "accepted", "message": "Intent eğitimi arka planda başlatıldı."} |
|
|
|
def background_training(intents): |
|
try: |
|
log("🔧 Intent eğitimi başlatıldı...") |
|
texts, labels, label2id = [], [], {} |
|
for idx, intent in enumerate(intents): |
|
label2id[intent["name"]] = idx |
|
for ex in intent["examples"]: |
|
texts.append(ex) |
|
labels.append(idx) |
|
|
|
dataset = Dataset.from_dict({"text": texts, "label": labels}) |
|
tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID) |
|
config = AutoConfig.from_pretrained(INTENT_MODEL_ID) |
|
config.problem_type = "single_label_classification" |
|
config.num_labels = len(label2id) |
|
model = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID, config=config) |
|
|
|
tokenized_data = {"input_ids": [], "attention_mask": [], "label": []} |
|
for row in dataset: |
|
out = tokenizer(row["text"], truncation=True, padding="max_length", max_length=128) |
|
tokenized_data["input_ids"].append(out["input_ids"]) |
|
tokenized_data["attention_mask"].append(out["attention_mask"]) |
|
tokenized_data["label"].append(row["label"]) |
|
|
|
tokenized = Dataset.from_dict(tokenized_data) |
|
tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"]) |
|
|
|
output_dir = "/app/intent_train_output" |
|
os.makedirs(output_dir, exist_ok=True) |
|
trainer = Trainer( |
|
model=model, |
|
args=TrainingArguments(output_dir, per_device_train_batch_size=4, num_train_epochs=3, logging_steps=10, save_strategy="no", report_to=[]), |
|
train_dataset=tokenized, |
|
data_collator=default_data_collator |
|
) |
|
trainer.train() |
|
|
|
|
|
log("🔧 Başarı raporu üretiliyor...") |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model.to(device) |
|
input_ids_tensor = tokenized["input_ids"].to(device) |
|
attention_mask_tensor = tokenized["attention_mask"].to(device) |
|
|
|
with torch.no_grad(): |
|
outputs = model(input_ids=input_ids_tensor, attention_mask=attention_mask_tensor) |
|
predictions = outputs.logits.argmax(dim=-1).tolist() |
|
|
|
actuals = tokenized["label"] |
|
counts = {} |
|
correct = {} |
|
for pred, actual in zip(predictions, actuals): |
|
intent = list(label2id.keys())[list(label2id.values()).index(actual)] |
|
counts[intent] = counts.get(intent, 0) + 1 |
|
if pred == actual: |
|
correct[intent] = correct.get(intent, 0) + 1 |
|
for intent, total in counts.items(): |
|
accuracy = correct.get(intent, 0) / total |
|
log(f"📊 Intent '{intent}' doğruluk: {accuracy:.2f} — {total} örnek") |
|
if accuracy < TRAIN_CONFIDENCE_THRESHOLD or total < 5: |
|
log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}") |
|
|
|
log("📦 Intent modeli eğitimi kaydediliyor...") |
|
if os.path.exists(INTENT_MODEL_PATH): |
|
shutil.rmtree(INTENT_MODEL_PATH) |
|
model.save_pretrained(INTENT_MODEL_PATH) |
|
tokenizer.save_pretrained(INTENT_MODEL_PATH) |
|
with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f: |
|
json.dump(label2id, f) |
|
|
|
log("✅ Intent eğitimi tamamlandı ve model kaydedildi.") |
|
|
|
except Exception as e: |
|
log(f"❌ Intent eğitimi hatası: {e}") |
|
traceback.print_exc() |
|
|
|
@app.post("/load_intent_model") |
|
def load_intent_model(): |
|
global INTENT_MODEL, INTENT_TOKENIZER, LABEL2ID |
|
try: |
|
INTENT_TOKENIZER = AutoTokenizer.from_pretrained(INTENT_MODEL_PATH) |
|
INTENT_MODEL = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_PATH) |
|
with open(os.path.join(INTENT_MODEL_PATH, "label2id.json")) as f: |
|
LABEL2ID = json.load(f) |
|
return {"status": "ok", "message": "Intent modeli yüklendi."} |
|
except Exception as e: |
|
return JSONResponse(content={"error": str(e)}, status_code=500) |
|
|
|
async def detect_intent(text): |
|
inputs = INTENT_TOKENIZER(text, return_tensors="pt") |
|
outputs = INTENT_MODEL(**inputs) |
|
probs = torch.nn.functional.softmax(outputs.logits, dim=-1) |
|
confidence, pred_id = torch.max(probs, dim=-1) |
|
id2label = {v: k for k, v in LABEL2ID.items()} |
|
return id2label[pred_id.item()], confidence.item() |
|
|
|
async def generate_response(text): |
|
messages = [{"role": "user", "content": text}] |
|
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True) |
|
eos_token = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0] |
|
input_ids = encodeds.to(model.device) |
|
attention_mask = (input_ids != tokenizer.pad_token_id).long() |
|
|
|
with torch.no_grad(): |
|
output = model.generate( |
|
input_ids=input_ids, |
|
attention_mask=attention_mask, |
|
max_new_tokens=128, |
|
do_sample=USE_SAMPLING, |
|
eos_token_id=eos_token, |
|
pad_token_id=tokenizer.pad_token_id, |
|
return_dict_in_generate=True, |
|
output_scores=True |
|
) |
|
|
|
if not USE_SAMPLING: |
|
scores = torch.stack(output.scores, dim=1) |
|
probs = torch.nn.functional.softmax(scores[0], dim=-1) |
|
top_conf = probs.max().item() |
|
else: |
|
top_conf = None |
|
|
|
decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip() |
|
for tag in ["assistant", "<|im_start|>assistant"]: |
|
start = decoded.find(tag) |
|
if start != -1: |
|
decoded = decoded[start + len(tag):].strip() |
|
break |
|
return decoded, top_conf |
|
|
|
def extract_parameters(variables_list, user_input): |
|
for pattern in variables_list: |
|
regex = re.sub(r"(\w+):\{(.+?)\}", r"(?P<\1>.+?)", pattern) |
|
match = re.match(regex, user_input) |
|
if match: |
|
return [{"key": k, "value": v} for k, v in match.groupdict().items()] |
|
return [] |
|
|
|
def execute_intent(intent_name, user_input): |
|
if intent_name in INTENT_DEFINITIONS: |
|
definition = INTENT_DEFINITIONS[intent_name] |
|
variables = extract_parameters(definition.get("variables", []), user_input) |
|
log(f"🚀 execute_intent('{intent_name}', {variables})") |
|
return {"intent": intent_name, "parameters": variables} |
|
return {"intent": intent_name, "parameters": []} |
|
|
|
@app.post("/chat") |
|
async def chat(msg: Message): |
|
user_input = msg.user_input.strip() |
|
try: |
|
if model is None or tokenizer is None: |
|
return {"error": "Model yüklenmedi."} |
|
|
|
if INTENT_MODEL: |
|
intent_task = asyncio.create_task(detect_intent(user_input)) |
|
response_task = asyncio.create_task(generate_response(user_input)) |
|
intent, intent_conf = await intent_task |
|
log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})") |
|
if intent_conf > INTENT_CONFIDENCE_THRESHOLD and intent in INTENT_DEFINITIONS: |
|
result = execute_intent(intent, user_input) |
|
return result |
|
else: |
|
response, response_conf = await response_task |
|
if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD: |
|
return {"response": random.choice(FALLBACK_ANSWERS)} |
|
return {"response": response} |
|
else: |
|
response, response_conf = await generate_response(user_input) |
|
if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD: |
|
return {"response": random.choice(FALLBACK_ANSWERS)} |
|
return {"response": response} |
|
except Exception as e: |
|
traceback.print_exc() |
|
return JSONResponse(content={"error": str(e)}, status_code=500) |
|
|
|
def log(message): |
|
timestamp = datetime.now().strftime("%H:%M:%S") |
|
print(f"[{timestamp}] {message}", flush=True) |
|
|
|
def setup_model(): |
|
global model, tokenizer, eos_token_id |
|
try: |
|
log("🧠 setup_model() başladı") |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
log(f"📡 Kullanılan cihaz: {device}") |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False) |
|
log("📦 Tokenizer yüklendi. Ana model indiriliyor...") |
|
model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=torch.float32).to(device) |
|
log("📦 Ana model indirildi ve yüklendi. eval() çağırılıyor...") |
|
tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token |
|
model.config.pad_token_id = tokenizer.pad_token_id |
|
eos_token_id = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0] |
|
model.eval() |
|
log("✅ Ana model eval() çağrıldı") |
|
log(f"📦 Intent modeli indiriliyor: {INTENT_MODEL_ID}") |
|
_ = AutoTokenizer.from_pretrained(INTENT_MODEL_ID) |
|
_ = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID) |
|
log("✅ Intent modeli önbelleğe alındı.") |
|
log("✔️ Model başarıyla yüklendi ve sohbet için hazır.") |
|
except Exception as e: |
|
log(f"❌ setup_model() hatası: {e}") |
|
traceback.print_exc() |
|
|
|
threading.Thread(target=setup_model, daemon=True).start() |
|
threading.Thread(target=lambda: uvicorn.run(app, host="0.0.0.0", port=7860), daemon=True).start() |
|
while True: |
|
time.sleep(60) |
|
|