File size: 2,659 Bytes
e53c2d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f677465
e53c2d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca87024
e53c2d7
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
"""
This file consolidates parameters for logging, database connections, model paths, API settings, and security.
"""

import torch
import logging  # kind of advanced logger
import os

base_path = os.path.dirname(os.path.realpath(__file__))

# Logging setup for console output.
logging.basicConfig(
    level=logging.INFO,
    format="%(levelname)s: %(message)s",
    handlers=[logging.StreamHandler()]
)

# Qdrant vector database connection.
qdrant_client_config = {
    "host": "localhost",
    "port": 6333,
}

# Automatically detects CUDA or uses CPU.
device = "cuda" if torch.cuda.is_available() else 'cpu'

embedder_model = "all-MiniLM-L6-v2"

reranker_model = "cross-encoder/ms-marco-MiniLM-L6-v2"

local_llm_config = {
    "model_path_or_repo_id": "TheBloke/Mistral-7B-v0.1-GGUF",
    "model_file": "mistral-7b-v0.1.Q5_K_S.gguf",
    "model_type": "mistral",
    "gpu_layers": 20 if torch.cuda.is_available() else 0,
    "threads": 8,
    "context_length": 4096,  # The maximum context window is 4096 tokens
    "mlock": True,  # Locks the model into RAM to prevent swapping
}

local_generation_config = {
    "last_n_tokens": 128,  # The most recent of tokens that will be penalized (if it was repeated)
    "temperature": 0.3,  # Controls the randomness of output. Higher value - higher randomness
    "repetition_penalty": 1.2,
}

text_splitter_config = {
    "chunk_size": 1000,  # The maximum size of chunk
    "chunk_overlap": 100,
    "length_function": len,  # Function to measure chunk length
    "is_separator_regex": False,
    "add_start_index": True,
}

# "127.0.0.1"
api_config = {
    "app": "app.api:api",
    "host": "0.0.0.0",
    "port": 5050,
    "reload": True,  # The server will reload on system changes
}

gemini_generation_config = {
    "temperature": 0,  # deterministic, predictable output
    "top_p": 0.95,
    "top_k": 20,
    "candidate_count": 1,
    "seed": 5,
    "max_output_tokens": 1000,
    "stop_sequences": ['STOP!'],
    "presence_penalty": 0.0,
    "frequency_penalty": 0.0,
}

use_gemini: bool = True

max_delta = 0.15  # defines what is the minimum boundary for vectors to be considered similar

# for postgres client
# Note: you should run postgres server with similar host, post, and do not forget to create a user with similar settings
host = "localhost"
port = 5432
user = "postgres"
password = "lol"
dbname = "exp"

postgres_client_config = {
    "url": os.environ['DATABASE_URL'],
    "echo": False,
}

very_secret_pepper = "goida"  # +1 point, имба
jwt_algorithm = "HS256"

max_cookie_lifetime = 300  # in seconds

url_user_not_required = ["login", "", "viewer", "message_with_docs", "new_user"]