import os, torch, threading, uvicorn, time, traceback, zipfile, random, json, shutil, asyncio, re
from fastapi import FastAPI
from fastapi.responses import HTMLResponse, JSONResponse
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, Trainer, TrainingArguments, default_data_collator, AutoConfig
from peft import PeftModel
from datasets import Dataset
from datetime import datetime
# === Ortam
HF_TOKEN = os.getenv("HF_TOKEN")
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
os.environ["TORCH_HOME"] = "/app/.torch_cache"
os.makedirs("/app/.torch_cache", exist_ok=True)
# === Ayarlar
MODEL_BASE = "TURKCELL/Turkcell-LLM-7b-v1"
USE_FINE_TUNE = False
FINE_TUNE_REPO = "UcsTurkey/trained-zips"
FINE_TUNE_ZIP = "trained_model_000_009.zip"
USE_SAMPLING = False
INTENT_CONFIDENCE_THRESHOLD = 0.5
LLM_CONFIDENCE_THRESHOLD = 0.2
TRAIN_CONFIDENCE_THRESHOLD = 0.7
FALLBACK_ANSWERS = [
"Bu konuda maalesef bilgim yok.",
"Ne demek istediğinizi tam anlayamadım.",
"Bu soruya şu an yanıt veremiyorum."
]
INTENT_MODEL_PATH = "intent_model"
INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
INTENT_MODEL = None
INTENT_TOKENIZER = None
LABEL2ID = {}
INTENT_DEFINITIONS = {}
# === FastAPI
app = FastAPI()
chat_history = []
model = None
tokenizer = None
eos_token_id = None
class Message(BaseModel):
user_input: str
class TrainInput(BaseModel):
intents: list
@app.get("/")
def health():
return {"status": "ok"}
@app.get("/start", response_class=HTMLResponse)
def root():
return """
Turkcell LLM Chat
"""
@app.post("/train_intents", status_code=202)
def train_intents(train_input: TrainInput):
global INTENT_DEFINITIONS
log("📥 POST /train_intents çağrıldı.")
intents = train_input.intents
INTENT_DEFINITIONS = {intent["name"]: intent for intent in intents}
threading.Thread(target=lambda: background_training(intents), daemon=True).start()
return {"status": "accepted", "message": "Intent eğitimi arka planda başlatıldı."}
def background_training(intents):
try:
log("🔧 Intent eğitimi başlatıldı...")
texts, labels, label2id = [], [], {}
for idx, intent in enumerate(intents):
label2id[intent["name"]] = idx
for ex in intent["examples"]:
texts.append(ex)
labels.append(idx)
dataset = Dataset.from_dict({"text": texts, "label": labels})
tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
config = AutoConfig.from_pretrained(INTENT_MODEL_ID)
config.problem_type = "single_label_classification"
config.num_labels = len(label2id)
model = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID, config=config)
tokenized_data = {"input_ids": [], "attention_mask": [], "label": []}
for row in dataset:
out = tokenizer(row["text"], truncation=True, padding="max_length", max_length=128)
tokenized_data["input_ids"].append(out["input_ids"])
tokenized_data["attention_mask"].append(out["attention_mask"])
tokenized_data["label"].append(row["label"])
tokenized = Dataset.from_dict(tokenized_data)
tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
output_dir = "/app/intent_train_output"
os.makedirs(output_dir, exist_ok=True)
trainer = Trainer(
model=model,
args=TrainingArguments(output_dir, per_device_train_batch_size=4, num_train_epochs=3, logging_steps=10, save_strategy="no", report_to=[]),
train_dataset=tokenized,
data_collator=default_data_collator
)
trainer.train()
# ✅ Başarı raporu üret
log("🔧 Başarı raporu üretiliyor...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
input_ids_tensor = tokenized["input_ids"].to(device)
attention_mask_tensor = tokenized["attention_mask"].to(device)
with torch.no_grad():
outputs = model(input_ids=input_ids_tensor, attention_mask=attention_mask_tensor)
predictions = outputs.logits.argmax(dim=-1).tolist()
actuals = tokenized["label"]
counts = {}
correct = {}
for pred, actual in zip(predictions, actuals):
intent = list(label2id.keys())[list(label2id.values()).index(actual)]
counts[intent] = counts.get(intent, 0) + 1
if pred == actual:
correct[intent] = correct.get(intent, 0) + 1
for intent, total in counts.items():
accuracy = correct.get(intent, 0) / total
log(f"📊 Intent '{intent}' doğruluk: {accuracy:.2f} — {total} örnek")
if accuracy < TRAIN_CONFIDENCE_THRESHOLD or total < 5:
log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}")
log("📦 Intent modeli eğitimi kaydediliyor...")
if os.path.exists(INTENT_MODEL_PATH):
shutil.rmtree(INTENT_MODEL_PATH)
model.save_pretrained(INTENT_MODEL_PATH)
tokenizer.save_pretrained(INTENT_MODEL_PATH)
with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f:
json.dump(label2id, f)
log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
except Exception as e:
log(f"❌ Intent eğitimi hatası: {e}")
traceback.print_exc()
@app.post("/load_intent_model")
def load_intent_model():
global INTENT_MODEL, INTENT_TOKENIZER, LABEL2ID
try:
INTENT_TOKENIZER = AutoTokenizer.from_pretrained(INTENT_MODEL_PATH)
INTENT_MODEL = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_PATH)
with open(os.path.join(INTENT_MODEL_PATH, "label2id.json")) as f:
LABEL2ID = json.load(f)
return {"status": "ok", "message": "Intent modeli yüklendi."}
except Exception as e:
return JSONResponse(content={"error": str(e)}, status_code=500)
async def detect_intent(text):
inputs = INTENT_TOKENIZER(text, return_tensors="pt")
outputs = INTENT_MODEL(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
confidence, pred_id = torch.max(probs, dim=-1)
id2label = {v: k for k, v in LABEL2ID.items()}
return id2label[pred_id.item()], confidence.item()
async def generate_response(text):
messages = [{"role": "user", "content": text}]
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
eos_token = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
input_ids = encodeds.to(model.device)
attention_mask = (input_ids != tokenizer.pad_token_id).long()
with torch.no_grad():
output = model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
max_new_tokens=128,
do_sample=USE_SAMPLING,
eos_token_id=eos_token,
pad_token_id=tokenizer.pad_token_id,
return_dict_in_generate=True,
output_scores=True
)
if not USE_SAMPLING:
scores = torch.stack(output.scores, dim=1)
probs = torch.nn.functional.softmax(scores[0], dim=-1)
top_conf = probs.max().item()
else:
top_conf = None
decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
for tag in ["assistant", "<|im_start|>assistant"]:
start = decoded.find(tag)
if start != -1:
decoded = decoded[start + len(tag):].strip()
break
return decoded, top_conf
def extract_parameters(variables_list, user_input):
for pattern in variables_list:
regex = re.sub(r"(\w+):\{(.+?)\}", r"(?P<\1>.+?)", pattern)
match = re.match(regex, user_input)
if match:
return [{"key": k, "value": v} for k, v in match.groupdict().items()]
return []
def execute_intent(intent_name, user_input):
if intent_name in INTENT_DEFINITIONS:
definition = INTENT_DEFINITIONS[intent_name]
variables = extract_parameters(definition.get("variables", []), user_input)
log(f"🚀 execute_intent('{intent_name}', {variables})")
return {"intent": intent_name, "parameters": variables}
return {"intent": intent_name, "parameters": []}
@app.post("/chat")
async def chat(msg: Message):
user_input = msg.user_input.strip()
try:
if model is None or tokenizer is None:
return {"error": "Model yüklenmedi."}
if INTENT_MODEL:
intent_task = asyncio.create_task(detect_intent(user_input))
response_task = asyncio.create_task(generate_response(user_input))
intent, intent_conf = await intent_task
log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})")
if intent_conf > INTENT_CONFIDENCE_THRESHOLD and intent in INTENT_DEFINITIONS:
result = execute_intent(intent, user_input)
return result
else:
response, response_conf = await response_task
if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
return {"response": random.choice(FALLBACK_ANSWERS)}
return {"response": response}
else:
response, response_conf = await generate_response(user_input)
if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
return {"response": random.choice(FALLBACK_ANSWERS)}
return {"response": response}
except Exception as e:
traceback.print_exc()
return JSONResponse(content={"error": str(e)}, status_code=500)
def log(message):
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] {message}", flush=True)
def setup_model():
global model, tokenizer, eos_token_id
try:
log("🧠 setup_model() başladı")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
log(f"📡 Kullanılan cihaz: {device}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
log("📦 Tokenizer yüklendi. Ana model indiriliyor...")
model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=torch.float32).to(device)
log("📦 Ana model indirildi ve yüklendi. eval() çağırılıyor...")
tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id
eos_token_id = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
model.eval()
log("✅ Ana model eval() çağrıldı")
log(f"📦 Intent modeli indiriliyor: {INTENT_MODEL_ID}")
_ = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
_ = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID)
log("✅ Intent modeli önbelleğe alındı.")
log("✔️ Model başarıyla yüklendi ve sohbet için hazır.")
except Exception as e:
log(f"❌ setup_model() hatası: {e}")
traceback.print_exc()
threading.Thread(target=setup_model, daemon=True).start()
threading.Thread(target=lambda: uvicorn.run(app, host="0.0.0.0", port=7860), daemon=True).start()
while True:
time.sleep(60)