test-oncu / app.py
ciyidogan's picture
Update app.py
2b39fa7 verified
import time
import sys
from datetime import datetime
from fastapi import FastAPI, Request
import uvicorn
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import threading
# 🕒 Zamanlı log fonksiyonu
def log(message):
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] {message}")
sys.stdout.flush()
# ✅ Health check sunucusu
app = FastAPI()
@app.get("/")
def health():
return {"status": "ok"}
def run_health_server():
uvicorn.run(app, host="0.0.0.0", port=7860)
threading.Thread(target=run_health_server, daemon=True).start()
# ✅ Model yükleme
MODEL_ID = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
log("⬇️ Model ve tokenizer yükleme başlatılıyor...")
start_time = time.time()
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
device_map="auto",
)
log(f"✅ Model yüklendi. Süre: {time.time() - start_time:.2f} sn")
except Exception as e:
log(f"❌ Model yükleme hatası: {e}")
sys.exit(1)
@app.post("/generate")
async def generate(request: Request):
req_data = await request.json()
user_input = req_data.get("user_input", "")
system_prompt = req_data.get("system_prompt", "")
if not user_input or not system_prompt:
return {"error": "user_input ve system_prompt zorunludur."}
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_input},
]
try:
log("🧩 Input preparation başlatılıyor...")
prep_start = time.time()
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
attention_mask = torch.ones(inputs.shape, dtype=torch.long, device=model.device)
log(f"✅ Input ve attention mask hazırlandı. Süre: {time.time() - prep_start:.2f} sn")
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
log("🧠 Generate çağrısı başlatılıyor...")
gen_start = time.time()
outputs = model.generate(
inputs,
attention_mask=attention_mask,
max_new_tokens=128,
do_sample=False,
temperature=0.3,
top_p=0.7,
eos_token_id=terminators,
)
log(f"✅ Generate tamamlandı. Süre: {time.time() - gen_start:.2f} sn")
response = outputs[0][inputs.shape[-1]:]
decoded_output = tokenizer.decode(response, skip_special_tokens=True)
log("✅ Cevap başarıyla decode edildi.")
return {"response": decoded_output}
except Exception as e:
log(f"❌ Generate hatası: {e}")
return {"error": str(e)}
# 🧘 Eğitim sonrası uygulama restart olmasın diye bekleme
log("⏸️ Uygulama hazır, bekleme modunda...")
while True:
time.sleep(60)