import time import sys from datetime import datetime from fastapi import FastAPI, Request import uvicorn from transformers import AutoTokenizer, AutoModelForCausalLM import torch import threading # 🕒 Zamanlı log fonksiyonu def log(message): timestamp = datetime.now().strftime("%H:%M:%S") print(f"[{timestamp}] {message}") sys.stdout.flush() # ✅ Health check sunucusu app = FastAPI() @app.get("/") def health(): return {"status": "ok"} def run_health_server(): uvicorn.run(app, host="0.0.0.0", port=7860) threading.Thread(target=run_health_server, daemon=True).start() # ✅ Model yükleme MODEL_ID = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1" log("⬇️ Model ve tokenizer yükleme başlatılıyor...") start_time = time.time() try: tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16, device_map="auto", ) log(f"✅ Model yüklendi. Süre: {time.time() - start_time:.2f} sn") except Exception as e: log(f"❌ Model yükleme hatası: {e}") sys.exit(1) @app.post("/generate") async def generate(request: Request): req_data = await request.json() user_input = req_data.get("user_input", "") system_prompt = req_data.get("system_prompt", "") if not user_input or not system_prompt: return {"error": "user_input ve system_prompt zorunludur."} messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_input}, ] try: log("🧩 Input preparation başlatılıyor...") prep_start = time.time() inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, return_tensors="pt" ).to(model.device) attention_mask = torch.ones(inputs.shape, dtype=torch.long, device=model.device) log(f"✅ Input ve attention mask hazırlandı. Süre: {time.time() - prep_start:.2f} sn") terminators = [ tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>") ] log("🧠 Generate çağrısı başlatılıyor...") gen_start = time.time() outputs = model.generate( inputs, attention_mask=attention_mask, max_new_tokens=128, do_sample=False, temperature=0.3, top_p=0.7, eos_token_id=terminators, ) log(f"✅ Generate tamamlandı. Süre: {time.time() - gen_start:.2f} sn") response = outputs[0][inputs.shape[-1]:] decoded_output = tokenizer.decode(response, skip_special_tokens=True) log("✅ Cevap başarıyla decode edildi.") return {"response": decoded_output} except Exception as e: log(f"❌ Generate hatası: {e}") return {"error": str(e)} # 🧘 Eğitim sonrası uygulama restart olmasın diye bekleme log("⏸️ Uygulama hazır, bekleme modunda...") while True: time.sleep(60)