File size: 3,047 Bytes
444adae
f1e3b60
444adae
f1e3b60
 
 
cf581f9
f1e3b60
444adae
f1e3b60
444adae
 
f1e3b60
 
8a39f61
f1e3b60
 
8a39f61
f1e3b60
 
 
25cd8e8
f1e3b60
 
8687d10
f1e3b60
089f657
f1e3b60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
089f657
f1e3b60
 
 
 
 
444adae
f1e3b60
 
25cd8e8
f1e3b60
 
 
 
444adae
089f657
f1e3b60
 
e7c2c5f
4113d2a
 
f1e3b60
e68dc63
e7c2c5f
 
4113d2a
 
 
f1e3b60
4113d2a
f1e3b60
 
 
4113d2a
e7c2c5f
 
2b39fa7
 
 
 
f1e3b60
4113d2a
f1e3b60
4113d2a
e7c2c5f
f1e3b60
 
 
4113d2a
 
f1e3b60
 
4113d2a
f1e3b60
 
4113d2a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import time
import sys
from datetime import datetime
from fastapi import FastAPI, Request
import uvicorn
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import threading

# 🕒 Zamanlı log fonksiyonu
def log(message):
    timestamp = datetime.now().strftime("%H:%M:%S")
    print(f"[{timestamp}] {message}")
    sys.stdout.flush()

# ✅ Health check sunucusu
app = FastAPI()

@app.get("/")
def health():
    return {"status": "ok"}

def run_health_server():
    uvicorn.run(app, host="0.0.0.0", port=7860)

threading.Thread(target=run_health_server, daemon=True).start()

# ✅ Model yükleme
MODEL_ID = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
log("⬇️ Model ve tokenizer yükleme başlatılıyor...")
start_time = time.time()
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.bfloat16,
        device_map="auto",
    )
    log(f"✅ Model yüklendi. Süre: {time.time() - start_time:.2f} sn")
except Exception as e:
    log(f"❌ Model yükleme hatası: {e}")
    sys.exit(1)

@app.post("/generate")
async def generate(request: Request):
    req_data = await request.json()
    user_input = req_data.get("user_input", "")
    system_prompt = req_data.get("system_prompt", "")

    if not user_input or not system_prompt:
        return {"error": "user_input ve system_prompt zorunludur."}

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_input},
    ]

    try:
        log("🧩 Input preparation başlatılıyor...")
        prep_start = time.time()
        inputs = tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            return_tensors="pt"
        ).to(model.device)
        attention_mask = torch.ones(inputs.shape, dtype=torch.long, device=model.device)
        log(f"✅ Input ve attention mask hazırlandı. Süre: {time.time() - prep_start:.2f} sn")

        terminators = [
            tokenizer.eos_token_id,
            tokenizer.convert_tokens_to_ids("<|eot_id|>")
        ]

        log("🧠 Generate çağrısı başlatılıyor...")
        gen_start = time.time()
        outputs = model.generate(
            inputs,
            attention_mask=attention_mask,
            max_new_tokens=128,
            do_sample=False,
            temperature=0.3,
            top_p=0.7,
            eos_token_id=terminators,
        )
        log(f"✅ Generate tamamlandı. Süre: {time.time() - gen_start:.2f} sn")

        response = outputs[0][inputs.shape[-1]:]
        decoded_output = tokenizer.decode(response, skip_special_tokens=True)
        log("✅ Cevap başarıyla decode edildi.")
        return {"response": decoded_output}

    except Exception as e:
        log(f"❌ Generate hatası: {e}")
        return {"error": str(e)}

# 🧘 Eğitim sonrası uygulama restart olmasın diye bekleme
log("⏸️ Uygulama hazır, bekleme modunda...")
while True:
    time.sleep(60)