|
import time |
|
import sys |
|
from datetime import datetime |
|
from fastapi import FastAPI, Request |
|
import uvicorn |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import torch |
|
import threading |
|
|
|
|
|
def log(message): |
|
timestamp = datetime.now().strftime("%H:%M:%S") |
|
print(f"[{timestamp}] {message}") |
|
sys.stdout.flush() |
|
|
|
|
|
app = FastAPI() |
|
|
|
@app.get("/") |
|
def health(): |
|
return {"status": "ok"} |
|
|
|
def run_health_server(): |
|
uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|
|
threading.Thread(target=run_health_server, daemon=True).start() |
|
|
|
|
|
MODEL_ID = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1" |
|
log("⬇️ Model ve tokenizer yükleme başlatılıyor...") |
|
start_time = time.time() |
|
try: |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
MODEL_ID, |
|
torch_dtype=torch.bfloat16, |
|
device_map="auto", |
|
) |
|
log(f"✅ Model yüklendi. Süre: {time.time() - start_time:.2f} sn") |
|
except Exception as e: |
|
log(f"❌ Model yükleme hatası: {e}") |
|
sys.exit(1) |
|
|
|
@app.post("/generate") |
|
async def generate(request: Request): |
|
req_data = await request.json() |
|
user_input = req_data.get("user_input", "") |
|
system_prompt = req_data.get("system_prompt", "") |
|
|
|
if not user_input or not system_prompt: |
|
return {"error": "user_input ve system_prompt zorunludur."} |
|
|
|
messages = [ |
|
{"role": "system", "content": system_prompt}, |
|
{"role": "user", "content": user_input}, |
|
] |
|
|
|
try: |
|
log("🧩 Input preparation başlatılıyor...") |
|
prep_start = time.time() |
|
inputs = tokenizer.apply_chat_template( |
|
messages, |
|
add_generation_prompt=True, |
|
return_tensors="pt" |
|
).to(model.device) |
|
attention_mask = torch.ones(inputs.shape, dtype=torch.long, device=model.device) |
|
log(f"✅ Input ve attention mask hazırlandı. Süre: {time.time() - prep_start:.2f} sn") |
|
|
|
terminators = [ |
|
tokenizer.eos_token_id, |
|
tokenizer.convert_tokens_to_ids("<|eot_id|>") |
|
] |
|
|
|
log("🧠 Generate çağrısı başlatılıyor...") |
|
gen_start = time.time() |
|
outputs = model.generate( |
|
inputs, |
|
attention_mask=attention_mask, |
|
max_new_tokens=128, |
|
do_sample=False, |
|
temperature=0.3, |
|
top_p=0.7, |
|
eos_token_id=terminators, |
|
) |
|
log(f"✅ Generate tamamlandı. Süre: {time.time() - gen_start:.2f} sn") |
|
|
|
response = outputs[0][inputs.shape[-1]:] |
|
decoded_output = tokenizer.decode(response, skip_special_tokens=True) |
|
log("✅ Cevap başarıyla decode edildi.") |
|
return {"response": decoded_output} |
|
|
|
except Exception as e: |
|
log(f"❌ Generate hatası: {e}") |
|
return {"error": str(e)} |
|
|
|
|
|
log("⏸️ Uygulama hazır, bekleme modunda...") |
|
while True: |
|
time.sleep(60) |
|
|