File size: 808 Bytes
e2e640c
8dd6602
62d49a1
e2e640c
8dd6602
dc24051
e2e640c
8dd6602
2289445
 
8dd6602
 
 
e6a07cc
 
2289445
8dd6602
62d49a1
6b456ee
62d49a1
 
8dd6602
6b456ee
62d49a1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import os
from dotenv import load_dotenv
from ctransformers import AutoModelForCausalLM

# Optional: Set Hugging Face cache dir
os.environ.setdefault("HF_HOME", os.path.expanduser("~/.hf_home"))

# Load variables from .env if available
load_dotenv()

# === High-Precision GGUF Model Configuration ===
MODEL_REPO = os.getenv("MODEL_REPO", "TheBloke/openchat-3.5-0106-GGUF")
MODEL_FILE = os.getenv("MODEL_FILE", "openchat-3.5-0106.Q8_0.gguf")
MODEL_TYPE = os.getenv("MODEL_TYPE", "mistral")  # OpenChat 3.5 is Mistral-compatible
CTX_LEN = int(os.getenv("CTX_LEN", "8192"))  # Use full 8K context

# === Load Model ===
model = AutoModelForCausalLM.from_pretrained(
    MODEL_REPO,
    model_file=MODEL_FILE,
    model_type=MODEL_TYPE,
    context_length=CTX_LEN,
    gpu_layers=0,
    local_files_only=False,
)