File size: 3,047 Bytes
444adae f1e3b60 444adae f1e3b60 cf581f9 f1e3b60 444adae f1e3b60 444adae f1e3b60 8a39f61 f1e3b60 8a39f61 f1e3b60 25cd8e8 f1e3b60 8687d10 f1e3b60 089f657 f1e3b60 089f657 f1e3b60 444adae f1e3b60 25cd8e8 f1e3b60 444adae 089f657 f1e3b60 e7c2c5f 4113d2a f1e3b60 e68dc63 e7c2c5f 4113d2a f1e3b60 4113d2a f1e3b60 4113d2a e7c2c5f 2b39fa7 f1e3b60 4113d2a f1e3b60 4113d2a e7c2c5f f1e3b60 4113d2a f1e3b60 4113d2a f1e3b60 4113d2a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import time
import sys
from datetime import datetime
from fastapi import FastAPI, Request
import uvicorn
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import threading
# 🕒 Zamanlı log fonksiyonu
def log(message):
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] {message}")
sys.stdout.flush()
# ✅ Health check sunucusu
app = FastAPI()
@app.get("/")
def health():
return {"status": "ok"}
def run_health_server():
uvicorn.run(app, host="0.0.0.0", port=7860)
threading.Thread(target=run_health_server, daemon=True).start()
# ✅ Model yükleme
MODEL_ID = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
log("⬇️ Model ve tokenizer yükleme başlatılıyor...")
start_time = time.time()
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
device_map="auto",
)
log(f"✅ Model yüklendi. Süre: {time.time() - start_time:.2f} sn")
except Exception as e:
log(f"❌ Model yükleme hatası: {e}")
sys.exit(1)
@app.post("/generate")
async def generate(request: Request):
req_data = await request.json()
user_input = req_data.get("user_input", "")
system_prompt = req_data.get("system_prompt", "")
if not user_input or not system_prompt:
return {"error": "user_input ve system_prompt zorunludur."}
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_input},
]
try:
log("🧩 Input preparation başlatılıyor...")
prep_start = time.time()
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
attention_mask = torch.ones(inputs.shape, dtype=torch.long, device=model.device)
log(f"✅ Input ve attention mask hazırlandı. Süre: {time.time() - prep_start:.2f} sn")
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
log("🧠 Generate çağrısı başlatılıyor...")
gen_start = time.time()
outputs = model.generate(
inputs,
attention_mask=attention_mask,
max_new_tokens=128,
do_sample=False,
temperature=0.3,
top_p=0.7,
eos_token_id=terminators,
)
log(f"✅ Generate tamamlandı. Süre: {time.time() - gen_start:.2f} sn")
response = outputs[0][inputs.shape[-1]:]
decoded_output = tokenizer.decode(response, skip_special_tokens=True)
log("✅ Cevap başarıyla decode edildi.")
return {"response": decoded_output}
except Exception as e:
log(f"❌ Generate hatası: {e}")
return {"error": str(e)}
# 🧘 Eğitim sonrası uygulama restart olmasın diye bekleme
log("⏸️ Uygulama hazır, bekleme modunda...")
while True:
time.sleep(60)
|