Spaces:

UcsTurkey
/

oncu

Paused

App Files Files Community

ciyidogan commited on May 28

Commit

de0a38c

verified ·

1 Parent(s): df72abf

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -3

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 from datetime import datetime
 # === 🕒 Zamanlı log fonksiyonu
 def log(message):
@@ -15,7 +16,7 @@ def log(message):
 # === Model bilgileri
 REPO_ID = "oncu/Turkish-Llama-3-8B-function-calling-GGUF"
-FILENAME = "turkish-llama-3-8b-function-calling.q8_0.gguf"
 LOCAL_MODEL_PATH = f"/tmp/{FILENAME}"
 HF_TOKEN = os.getenv("HF_TOKEN")  # ✅ Hugging Face Token (varsa)
@@ -58,7 +59,7 @@ def load_model():
         log(f"✅ Model indirildi: {model_path}")
         log("📦 GGUF model yükleniyor...")
-        llm = Llama(model_path=model_path, n_gpu_layers=-1, n_ctx=4096)
         log("✅ Model başarıyla yüklendi ve kullanılmaya hazır.")
         log("💡 Artık /chat endpoint'ine POST isteği gönderebilirsiniz.")
     except Exception as e:
@@ -72,10 +73,23 @@ def chat(req: ChatRequest):
         log(f"💬 Yeni istek alındı: '{req.prompt}'")
         prompt = f"{SYSTEM_PROMPT}\n\nKullanıcı: {req.prompt}\nAsistan:"
         log("🧠 LLM çağrısı başlatılıyor...")
-        response = llm(prompt, max_tokens=512, stop=["Kullanıcı:", "Asistan:"], echo=False)
         answer = response["choices"][0]["text"].strip()
         log("✅ LLM cevabı başarıyla alındı.")
         return {"response": answer}
     except Exception as e:
         log(f"❌ /chat sırasında hata oluştu: {e}")
         traceback.print_exc()

 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 from datetime import datetime
+import concurrent.futures
 # === 🕒 Zamanlı log fonksiyonu
 def log(message):
 # === Model bilgileri
 REPO_ID = "oncu/Turkish-Llama-3-8B-function-calling-GGUF"
+FILENAME = "turkish-llama-3-8b-function-calling.q8_0.gguf"  # ✅ doğru dosya adı
 LOCAL_MODEL_PATH = f"/tmp/{FILENAME}"
 HF_TOKEN = os.getenv("HF_TOKEN")  # ✅ Hugging Face Token (varsa)
         log(f"✅ Model indirildi: {model_path}")
         log("📦 GGUF model yükleniyor...")
+        llm = Llama(model_path=model_path, n_gpu_layers=-1, n_ctx=1024)  # ✅ n_ctx düşürüldü
         log("✅ Model başarıyla yüklendi ve kullanılmaya hazır.")
         log("💡 Artık /chat endpoint'ine POST isteği gönderebilirsiniz.")
     except Exception as e:
         log(f"💬 Yeni istek alındı: '{req.prompt}'")
         prompt = f"{SYSTEM_PROMPT}\n\nKullanıcı: {req.prompt}\nAsistan:"
         log("🧠 LLM çağrısı başlatılıyor...")
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            future = executor.submit(
+                llm,
+                prompt,
+                max_tokens=512,
+                stop=["Kullanıcı:", "Asistan:"],
+                echo=False
+            )
+            response = future.result(timeout=30)  # ✅ 30 saniye timeout
         answer = response["choices"][0]["text"].strip()
         log("✅ LLM cevabı başarıyla alındı.")
         return {"response": answer}
+    except concurrent.futures.TimeoutError:
+        log("❌ LLM çağrısı timeout oldu (30 saniye).")
+        return {"error": "LLM çağrısı zaman aşımına uğradı."}
     except Exception as e:
         log(f"❌ /chat sırasında hata oluştu: {e}")
         traceback.print_exc()