Spaces:

UcsTurkey
/

mistral7b

Paused

File size: 12,307 Bytes

import os, torch, threading, uvicorn, time, traceback, zipfile, random, json, shutil, asyncio, re
from fastapi import FastAPI
from fastapi.responses import HTMLResponse, JSONResponse
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, Trainer, TrainingArguments, default_data_collator, AutoConfig
from peft import PeftModel
from datasets import Dataset
from datetime import datetime

# === Ortam
HF_TOKEN = os.getenv("HF_TOKEN")
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
os.environ["TORCH_HOME"] = "/app/.torch_cache"
os.makedirs("/app/.torch_cache", exist_ok=True)

# === Ayarlar
MODEL_BASE = "TURKCELL/Turkcell-LLM-7b-v1"
USE_FINE_TUNE = False
FINE_TUNE_REPO = "UcsTurkey/trained-zips"
FINE_TUNE_ZIP = "trained_model_000_009.zip"
USE_SAMPLING = False
INTENT_CONFIDENCE_THRESHOLD = 0.5
LLM_CONFIDENCE_THRESHOLD = 0.2
TRAIN_CONFIDENCE_THRESHOLD = 0.7
FALLBACK_ANSWERS = [
    "Bu konuda maalesef bilgim yok.",
    "Ne demek istediğinizi tam anlayamadım.",
    "Bu soruya şu an yanıt veremiyorum."
]

INTENT_MODEL_PATH = "intent_model"
INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
INTENT_MODEL = None
INTENT_TOKENIZER = None
LABEL2ID = {}
INTENT_DEFINITIONS = {}

# === FastAPI
app = FastAPI()
chat_history = []
model = None
tokenizer = None
eos_token_id = None

class Message(BaseModel):
    user_input: str

class TrainInput(BaseModel):
    intents: list

@app.get("/")
def health():
    return {"status": "ok"}

@app.get("/start", response_class=HTMLResponse)
def root():
    return """
    <html><body>
        <h2>Turkcell LLM Chat</h2>
        <textarea id='input' rows='4' cols='60'></textarea><br>
        <button onclick='send()'>Gönder</button><br><br>
        <label>Model Cevabı:</label><br>
        <textarea id='output' rows='10' cols='80' readonly style='white-space: pre-wrap;'></textarea>
        <script>
        async function send() {
            const input = document.getElementById("input").value;
            const res = await fetch('/chat', {
                method: 'POST',
                headers: { 'Content-Type': 'application/json' },
                body: JSON.stringify({ user_input: input })
            });
            const data = await res.json();
            document.getElementById('output').value = data.answer || data.response || data.error || 'Hata oluştu.';
        }
        </script>
    </body></html>
    """

@app.post("/train_intents", status_code=202)
def train_intents(train_input: TrainInput):
    global INTENT_DEFINITIONS
    log("📥 POST /train_intents çağrıldı.")
    intents = train_input.intents
    INTENT_DEFINITIONS = {intent["name"]: intent for intent in intents}
    threading.Thread(target=lambda: background_training(intents), daemon=True).start()
    return {"status": "accepted", "message": "Intent eğitimi arka planda başlatıldı."}

def background_training(intents):
    try:
        log("🔧 Intent eğitimi başlatıldı...")
        texts, labels, label2id = [], [], {}
        for idx, intent in enumerate(intents):
            label2id[intent["name"]] = idx
            for ex in intent["examples"]:
                texts.append(ex)
                labels.append(idx)

        dataset = Dataset.from_dict({"text": texts, "label": labels})
        tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
        config = AutoConfig.from_pretrained(INTENT_MODEL_ID)
        config.problem_type = "single_label_classification"
        config.num_labels = len(label2id)
        model = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID, config=config)

        tokenized_data = {"input_ids": [], "attention_mask": [], "label": []}
        for row in dataset:
            out = tokenizer(row["text"], truncation=True, padding="max_length", max_length=128)
            tokenized_data["input_ids"].append(out["input_ids"])
            tokenized_data["attention_mask"].append(out["attention_mask"])
            tokenized_data["label"].append(row["label"])

        tokenized = Dataset.from_dict(tokenized_data)
        tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

        output_dir = "/app/intent_train_output"
        os.makedirs(output_dir, exist_ok=True)
        trainer = Trainer(
            model=model,
            args=TrainingArguments(output_dir, per_device_train_batch_size=4, num_train_epochs=3, logging_steps=10, save_strategy="no", report_to=[]),
            train_dataset=tokenized,
            data_collator=default_data_collator
        )
        trainer.train()

        # ✅ Başarı raporu üret
        log("🔧 Başarı raporu üretiliyor...")
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)
        input_ids_tensor = tokenized["input_ids"].to(device)
        attention_mask_tensor = tokenized["attention_mask"].to(device)

        with torch.no_grad():
            outputs = model(input_ids=input_ids_tensor, attention_mask=attention_mask_tensor)
            predictions = outputs.logits.argmax(dim=-1).tolist()

        actuals = tokenized["label"]
        counts = {}
        correct = {}
        for pred, actual in zip(predictions, actuals):
            intent = list(label2id.keys())[list(label2id.values()).index(actual)]
            counts[intent] = counts.get(intent, 0) + 1
            if pred == actual:
                correct[intent] = correct.get(intent, 0) + 1
        for intent, total in counts.items():
            accuracy = correct.get(intent, 0) / total
            log(f"📊 Intent '{intent}' doğruluk: {accuracy:.2f} — {total} örnek")
            if accuracy < TRAIN_CONFIDENCE_THRESHOLD or total < 5:
                log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}")

        log("📦 Intent modeli eğitimi kaydediliyor...")
        if os.path.exists(INTENT_MODEL_PATH):
            shutil.rmtree(INTENT_MODEL_PATH)
        model.save_pretrained(INTENT_MODEL_PATH)
        tokenizer.save_pretrained(INTENT_MODEL_PATH)
        with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f:
            json.dump(label2id, f)

        log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")

    except Exception as e:
        log(f"❌ Intent eğitimi hatası: {e}")
        traceback.print_exc()

@app.post("/load_intent_model")
def load_intent_model():
    global INTENT_MODEL, INTENT_TOKENIZER, LABEL2ID
    try:
        INTENT_TOKENIZER = AutoTokenizer.from_pretrained(INTENT_MODEL_PATH)
        INTENT_MODEL = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_PATH)
        with open(os.path.join(INTENT_MODEL_PATH, "label2id.json")) as f:
            LABEL2ID = json.load(f)
        return {"status": "ok", "message": "Intent modeli yüklendi."}
    except Exception as e:
        return JSONResponse(content={"error": str(e)}, status_code=500)

async def detect_intent(text):
    inputs = INTENT_TOKENIZER(text, return_tensors="pt")
    outputs = INTENT_MODEL(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    confidence, pred_id = torch.max(probs, dim=-1)
    id2label = {v: k for k, v in LABEL2ID.items()}
    return id2label[pred_id.item()], confidence.item()

async def generate_response(text):
    messages = [{"role": "user", "content": text}]
    encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
    eos_token = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
    input_ids = encodeds.to(model.device)
    attention_mask = (input_ids != tokenizer.pad_token_id).long()

    with torch.no_grad():
        output = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=128,
            do_sample=USE_SAMPLING,
            eos_token_id=eos_token,
            pad_token_id=tokenizer.pad_token_id,
            return_dict_in_generate=True,
            output_scores=True
        )

    if not USE_SAMPLING:
        scores = torch.stack(output.scores, dim=1)
        probs = torch.nn.functional.softmax(scores[0], dim=-1)
        top_conf = probs.max().item()
    else:
        top_conf = None

    decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
    for tag in ["assistant", "<|im_start|>assistant"]:
        start = decoded.find(tag)
        if start != -1:
            decoded = decoded[start + len(tag):].strip()
            break
    return decoded, top_conf

def extract_parameters(variables_list, user_input):
    for pattern in variables_list:
        regex = re.sub(r"(\w+):\{(.+?)\}", r"(?P<\1>.+?)", pattern)
        match = re.match(regex, user_input)
        if match:
            return [{"key": k, "value": v} for k, v in match.groupdict().items()]
    return []

def execute_intent(intent_name, user_input):
    if intent_name in INTENT_DEFINITIONS:
        definition = INTENT_DEFINITIONS[intent_name]
        variables = extract_parameters(definition.get("variables", []), user_input)
        log(f"🚀 execute_intent('{intent_name}', {variables})")
        return {"intent": intent_name, "parameters": variables}
    return {"intent": intent_name, "parameters": []}

@app.post("/chat")
async def chat(msg: Message):
    user_input = msg.user_input.strip()
    try:
        if model is None or tokenizer is None:
            return {"error": "Model yüklenmedi."}

        if INTENT_MODEL:
            intent_task = asyncio.create_task(detect_intent(user_input))
            response_task = asyncio.create_task(generate_response(user_input))
            intent, intent_conf = await intent_task
            log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})")
            if intent_conf > INTENT_CONFIDENCE_THRESHOLD and intent in INTENT_DEFINITIONS:
                result = execute_intent(intent, user_input)
                return result
            else:
                response, response_conf = await response_task
                if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
                    return {"response": random.choice(FALLBACK_ANSWERS)}
                return {"response": response}
        else:
            response, response_conf = await generate_response(user_input)
            if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
                return {"response": random.choice(FALLBACK_ANSWERS)}
            return {"response": response}
    except Exception as e:
        traceback.print_exc()
        return JSONResponse(content={"error": str(e)}, status_code=500)

def log(message):
    timestamp = datetime.now().strftime("%H:%M:%S")
    print(f"[{timestamp}] {message}", flush=True)

def setup_model():
    global model, tokenizer, eos_token_id
    try:
        log("🧠 setup_model() başladı")
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        log(f"📡 Kullanılan cihaz: {device}")
        tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
        log("📦 Tokenizer yüklendi. Ana model indiriliyor...")
        model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=torch.float32).to(device)
        log("📦 Ana model indirildi ve yüklendi. eval() çağırılıyor...")
        tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
        model.config.pad_token_id = tokenizer.pad_token_id
        eos_token_id = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
        model.eval()
        log("✅ Ana model eval() çağrıldı")
        log(f"📦 Intent modeli indiriliyor: {INTENT_MODEL_ID}")
        _ = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
        _ = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID)
        log("✅ Intent modeli önbelleğe alındı.")
        log("✔️ Model başarıyla yüklendi ve sohbet için hazır.")
    except Exception as e:
        log(f"❌ setup_model() hatası: {e}")
        traceback.print_exc()

threading.Thread(target=setup_model, daemon=True).start()
threading.Thread(target=lambda: uvicorn.run(app, host="0.0.0.0", port=7860), daemon=True).start()
while True:
    time.sleep(60)