File size: 1,840 Bytes
192b91e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import os
from pinecone import Pinecone, ServerlessSpec
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv

load_dotenv()
cache_dir = os.path.join(os.getcwd(), ".cache")
os.makedirs(cache_dir, exist_ok=True)
os.environ['HF_HOME'] = cache_dir
os.environ['TRANSFORMERS_CACHE'] = cache_dir

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_ENV = os.getenv("PINECONE_ENV")  # Not used in new SDK, keep cloud+region below instead
PINECONE_INDEX_NAME = 'bajaj-rag-assistant'
PINECONE_CLOUD = 'aws'           # or 'gcp', or your choice, must match Pinecone project
PINECONE_REGION = 'us-east-1'    # or your choice, must match Pinecone project

# Create Pinecone client globally
pc = Pinecone(api_key=PINECONE_API_KEY)

_model = None

def preload_model(model_name="paraphrase-MiniLM-L3-v2"):
    global _model
    if _model is not None:
        return _model
    _model = SentenceTransformer(model_name, cache_folder=cache_dir)
    return _model

def get_model():
    return preload_model()

def build_pinecone_index(chunks, index_name=PINECONE_INDEX_NAME):
    model = get_model()
    embeddings = model.encode(
        chunks,
        batch_size=128,
        convert_to_numpy=True,
        normalize_embeddings=True
    )
    # Create index if it doesn't exist
    if index_name not in pc.list_indexes().names():
        pc.create_index(
            name=index_name,
            dimension=embeddings.shape[1],
            metric='cosine',
            spec=ServerlessSpec(
                cloud=PINECONE_CLOUD,
                region=PINECONE_REGION
            )
        )
    index = pc.Index(index_name)
    # Upsert embeddings in Pinecone
    vectors = [(f"id-{i}", emb.tolist(), {"text": chunk}) for i, (emb, chunk) in enumerate(zip(embeddings, chunks))]
    index.upsert(vectors)
    return index