File size: 3,289 Bytes
5e1a30c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""
Base interface for Vector Index sub-components.

This module defines the abstract base class for all vector index implementations
in the modular retriever architecture.
"""

from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional, Tuple
import numpy as np

from src.core.interfaces import Document


class VectorIndex(ABC):
    """
    Abstract base class for vector index implementations.
    
    This interface defines the contract for all vector index sub-components
    in the modular retriever architecture. Implementations can be either
    direct (FAISS) or adapters for cloud services (Pinecone, Weaviate).
    """
    
    @abstractmethod
    def __init__(self, config: Dict[str, Any]):
        """
        Initialize the vector index.
        
        Args:
            config: Configuration dictionary specific to the index type
        """
        pass
    
    @abstractmethod
    def initialize_index(self, embedding_dim: int) -> None:
        """
        Initialize the index with the specified embedding dimension.
        
        Args:
            embedding_dim: Dimension of the embeddings to be indexed
        """
        pass
    
    @abstractmethod
    def add_documents(self, documents: List[Document]) -> None:
        """
        Add documents to the index.
        
        Args:
            documents: List of documents with embeddings to add
            
        Raises:
            ValueError: If documents don't have embeddings or wrong dimension
        """
        pass
    
    @abstractmethod
    def search(self, query_embedding: np.ndarray, k: int = 5) -> List[Tuple[int, float]]:
        """
        Search for similar documents using vector similarity.
        
        Args:
            query_embedding: Query embedding vector
            k: Number of results to return
            
        Returns:
            List of (document_index, similarity_score) tuples
        """
        pass
    
    @abstractmethod
    def get_document_count(self) -> int:
        """
        Get the number of documents in the index.
        
        Returns:
            Number of indexed documents
        """
        pass
    
    @abstractmethod
    def clear(self) -> None:
        """Clear all documents from the index."""
        pass
    
    @abstractmethod
    def get_index_info(self) -> Dict[str, Any]:
        """
        Get information about the index.
        
        Returns:
            Dictionary with index statistics and configuration
        """
        pass
    
    @abstractmethod
    def is_trained(self) -> bool:
        """
        Check if the index is trained (relevant for some index types).
        
        Returns:
            True if the index is ready for searching
        """
        pass
    
    def get_component_info(self) -> Dict[str, Any]:
        """
        Get component information for logging and debugging.
        
        Returns:
            Dictionary with component details
        """
        return {
            "type": "vector_index",
            "class": self.__class__.__name__,
            "module": self.__class__.__module__,
            "document_count": self.get_document_count(),
            "is_trained": self.is_trained(),
            **self.get_index_info()
        }