import os, torch, threading, uvicorn, time, traceback, zipfile, random, json, shutil, asyncio, re from fastapi import FastAPI from fastapi.responses import HTMLResponse, JSONResponse from pydantic import BaseModel from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, Trainer, TrainingArguments, default_data_collator, AutoConfig from peft import PeftModel from datasets import Dataset from datetime import datetime # === Ortam HF_TOKEN = os.getenv("HF_TOKEN") os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true" os.environ["TORCH_HOME"] = "/app/.torch_cache" os.makedirs("/app/.torch_cache", exist_ok=True) # === Ayarlar MODEL_BASE = "TURKCELL/Turkcell-LLM-7b-v1" USE_FINE_TUNE = False FINE_TUNE_REPO = "UcsTurkey/trained-zips" FINE_TUNE_ZIP = "trained_model_000_009.zip" USE_SAMPLING = False INTENT_CONFIDENCE_THRESHOLD = 0.5 LLM_CONFIDENCE_THRESHOLD = 0.2 TRAIN_CONFIDENCE_THRESHOLD = 0.7 FALLBACK_ANSWERS = [ "Bu konuda maalesef bilgim yok.", "Ne demek istediğinizi tam anlayamadım.", "Bu soruya şu an yanıt veremiyorum." ] INTENT_MODEL_PATH = "intent_model" INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased" INTENT_MODEL = None INTENT_TOKENIZER = None LABEL2ID = {} INTENT_DEFINITIONS = {} # === FastAPI app = FastAPI() chat_history = [] model = None tokenizer = None eos_token_id = None class Message(BaseModel): user_input: str class TrainInput(BaseModel): intents: list @app.get("/") def health(): return {"status": "ok"} @app.get("/start", response_class=HTMLResponse) def root(): return """

Turkcell LLM Chat





""" @app.post("/train_intents", status_code=202) def train_intents(train_input: TrainInput): global INTENT_DEFINITIONS log("📥 POST /train_intents çağrıldı.") intents = train_input.intents INTENT_DEFINITIONS = {intent["name"]: intent for intent in intents} threading.Thread(target=lambda: background_training(intents), daemon=True).start() return {"status": "accepted", "message": "Intent eğitimi arka planda başlatıldı."} def background_training(intents): try: log("🔧 Intent eğitimi başlatıldı...") texts, labels, label2id = [], [], {} for idx, intent in enumerate(intents): label2id[intent["name"]] = idx for ex in intent["examples"]: texts.append(ex) labels.append(idx) dataset = Dataset.from_dict({"text": texts, "label": labels}) tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID) config = AutoConfig.from_pretrained(INTENT_MODEL_ID) config.problem_type = "single_label_classification" config.num_labels = len(label2id) model = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID, config=config) tokenized_data = {"input_ids": [], "attention_mask": [], "label": []} for row in dataset: out = tokenizer(row["text"], truncation=True, padding="max_length", max_length=128) tokenized_data["input_ids"].append(out["input_ids"]) tokenized_data["attention_mask"].append(out["attention_mask"]) tokenized_data["label"].append(row["label"]) tokenized = Dataset.from_dict(tokenized_data) tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"]) output_dir = "/app/intent_train_output" os.makedirs(output_dir, exist_ok=True) trainer = Trainer( model=model, args=TrainingArguments(output_dir, per_device_train_batch_size=4, num_train_epochs=3, logging_steps=10, save_strategy="no", report_to=[]), train_dataset=tokenized, data_collator=default_data_collator ) trainer.train() # ✅ Başarı raporu üret log("🔧 Başarı raporu üretiliyor...") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) input_ids_tensor = tokenized["input_ids"].to(device) attention_mask_tensor = tokenized["attention_mask"].to(device) with torch.no_grad(): outputs = model(input_ids=input_ids_tensor, attention_mask=attention_mask_tensor) predictions = outputs.logits.argmax(dim=-1).tolist() actuals = tokenized["label"] counts = {} correct = {} for pred, actual in zip(predictions, actuals): intent = list(label2id.keys())[list(label2id.values()).index(actual)] counts[intent] = counts.get(intent, 0) + 1 if pred == actual: correct[intent] = correct.get(intent, 0) + 1 for intent, total in counts.items(): accuracy = correct.get(intent, 0) / total log(f"📊 Intent '{intent}' doğruluk: {accuracy:.2f} — {total} örnek") if accuracy < TRAIN_CONFIDENCE_THRESHOLD or total < 5: log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}") log("📦 Intent modeli eğitimi kaydediliyor...") if os.path.exists(INTENT_MODEL_PATH): shutil.rmtree(INTENT_MODEL_PATH) model.save_pretrained(INTENT_MODEL_PATH) tokenizer.save_pretrained(INTENT_MODEL_PATH) with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f: json.dump(label2id, f) log("✅ Intent eğitimi tamamlandı ve model kaydedildi.") except Exception as e: log(f"❌ Intent eğitimi hatası: {e}") traceback.print_exc() @app.post("/load_intent_model") def load_intent_model(): global INTENT_MODEL, INTENT_TOKENIZER, LABEL2ID try: INTENT_TOKENIZER = AutoTokenizer.from_pretrained(INTENT_MODEL_PATH) INTENT_MODEL = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_PATH) with open(os.path.join(INTENT_MODEL_PATH, "label2id.json")) as f: LABEL2ID = json.load(f) return {"status": "ok", "message": "Intent modeli yüklendi."} except Exception as e: return JSONResponse(content={"error": str(e)}, status_code=500) async def detect_intent(text): inputs = INTENT_TOKENIZER(text, return_tensors="pt") outputs = INTENT_MODEL(**inputs) probs = torch.nn.functional.softmax(outputs.logits, dim=-1) confidence, pred_id = torch.max(probs, dim=-1) id2label = {v: k for k, v in LABEL2ID.items()} return id2label[pred_id.item()], confidence.item() async def generate_response(text): messages = [{"role": "user", "content": text}] encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True) eos_token = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0] input_ids = encodeds.to(model.device) attention_mask = (input_ids != tokenizer.pad_token_id).long() with torch.no_grad(): output = model.generate( input_ids=input_ids, attention_mask=attention_mask, max_new_tokens=128, do_sample=USE_SAMPLING, eos_token_id=eos_token, pad_token_id=tokenizer.pad_token_id, return_dict_in_generate=True, output_scores=True ) if not USE_SAMPLING: scores = torch.stack(output.scores, dim=1) probs = torch.nn.functional.softmax(scores[0], dim=-1) top_conf = probs.max().item() else: top_conf = None decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip() for tag in ["assistant", "<|im_start|>assistant"]: start = decoded.find(tag) if start != -1: decoded = decoded[start + len(tag):].strip() break return decoded, top_conf def extract_parameters(variables_list, user_input): for pattern in variables_list: regex = re.sub(r"(\w+):\{(.+?)\}", r"(?P<\1>.+?)", pattern) match = re.match(regex, user_input) if match: return [{"key": k, "value": v} for k, v in match.groupdict().items()] return [] def execute_intent(intent_name, user_input): if intent_name in INTENT_DEFINITIONS: definition = INTENT_DEFINITIONS[intent_name] variables = extract_parameters(definition.get("variables", []), user_input) log(f"🚀 execute_intent('{intent_name}', {variables})") return {"intent": intent_name, "parameters": variables} return {"intent": intent_name, "parameters": []} @app.post("/chat") async def chat(msg: Message): user_input = msg.user_input.strip() try: if model is None or tokenizer is None: return {"error": "Model yüklenmedi."} if INTENT_MODEL: intent_task = asyncio.create_task(detect_intent(user_input)) response_task = asyncio.create_task(generate_response(user_input)) intent, intent_conf = await intent_task log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})") if intent_conf > INTENT_CONFIDENCE_THRESHOLD and intent in INTENT_DEFINITIONS: result = execute_intent(intent, user_input) return result else: response, response_conf = await response_task if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD: return {"response": random.choice(FALLBACK_ANSWERS)} return {"response": response} else: response, response_conf = await generate_response(user_input) if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD: return {"response": random.choice(FALLBACK_ANSWERS)} return {"response": response} except Exception as e: traceback.print_exc() return JSONResponse(content={"error": str(e)}, status_code=500) def log(message): timestamp = datetime.now().strftime("%H:%M:%S") print(f"[{timestamp}] {message}", flush=True) def setup_model(): global model, tokenizer, eos_token_id try: log("🧠 setup_model() başladı") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") log(f"📡 Kullanılan cihaz: {device}") tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False) log("📦 Tokenizer yüklendi. Ana model indiriliyor...") model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=torch.float32).to(device) log("📦 Ana model indirildi ve yüklendi. eval() çağırılıyor...") tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token model.config.pad_token_id = tokenizer.pad_token_id eos_token_id = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0] model.eval() log("✅ Ana model eval() çağrıldı") log(f"📦 Intent modeli indiriliyor: {INTENT_MODEL_ID}") _ = AutoTokenizer.from_pretrained(INTENT_MODEL_ID) _ = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID) log("✅ Intent modeli önbelleğe alındı.") log("✔️ Model başarıyla yüklendi ve sohbet için hazır.") except Exception as e: log(f"❌ setup_model() hatası: {e}") traceback.print_exc() threading.Thread(target=setup_model, daemon=True).start() threading.Thread(target=lambda: uvicorn.run(app, host="0.0.0.0", port=7860), daemon=True).start() while True: time.sleep(60)