File size: 2,530 Bytes
5e1a30c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""
Base interface for Sparse Retriever sub-components.

This module defines the abstract base class for all sparse retrieval implementations
in the modular retriever architecture.
"""

from abc import ABC, abstractmethod
from typing import List, Dict, Any, Tuple

from src.core.interfaces import Document


class SparseRetriever(ABC):
    """
    Abstract base class for sparse retrieval implementations.
    
    This interface defines the contract for all sparse retriever sub-components
    in the modular retriever architecture. Implementations can be either
    direct (BM25) or adapters for external services (Elasticsearch).
    """
    
    @abstractmethod
    def __init__(self, config: Dict[str, Any]):
        """
        Initialize the sparse retriever.
        
        Args:
            config: Configuration dictionary specific to the retriever type
        """
        pass
    
    @abstractmethod
    def index_documents(self, documents: List[Document]) -> None:
        """
        Index documents for sparse retrieval.
        
        Args:
            documents: List of documents to index
        """
        pass
    
    @abstractmethod
    def search(self, query: str, k: int = 5) -> List[Tuple[int, float]]:
        """
        Search for documents using sparse retrieval.
        
        Args:
            query: Search query string
            k: Number of results to return
            
        Returns:
            List of (document_index, score) tuples
        """
        pass
    
    @abstractmethod
    def get_document_count(self) -> int:
        """
        Get the number of indexed documents.
        
        Returns:
            Number of indexed documents
        """
        pass
    
    @abstractmethod
    def clear(self) -> None:
        """Clear all indexed documents."""
        pass
    
    @abstractmethod
    def get_stats(self) -> Dict[str, Any]:
        """
        Get statistics about the sparse retriever.
        
        Returns:
            Dictionary with retriever statistics
        """
        pass
    
    def get_component_info(self) -> Dict[str, Any]:
        """
        Get component information for logging and debugging.
        
        Returns:
            Dictionary with component details
        """
        return {
            "type": "sparse_retriever",
            "class": self.__class__.__name__,
            "module": self.__class__.__module__,
            "document_count": self.get_document_count(),
            **self.get_stats()
        }