File size: 3,026 Bytes
cc80c3d
ef088c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc80c3d
 
 
ef088c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc80c3d
 
 
ef088c2
 
cc80c3d
 
ef088c2
 
 
 
cc80c3d
 
ef088c2
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from langchain_core.documents import Document
from langchain_core.tools import tool

from ask_candid.base.retrieval.knowledge_base import (
    SourceNames,
    generate_queries,
    run_search,
    reranker,
    process_hit
)


@tool(response_format="content_and_artifact")
def search_candid_knowledge_base(
    query: str,
    sources: list[SourceNames],
    news_days_ago: int = 60
) -> tuple[str, list[Document]]:
    """Search Candid's subject matter expert knowledge base to find answers about the social and philanthropic sector.
    This knowledge includes help articles and video training sessions from Candid's subject matter experts, blog posts
    about the sector from Candid staff and trusted partner authors, research documents about the sector and news
    articles curated about activity happening in the sector around the world.

    Searches are performed through a combination of vector and keyword searching. Results are then re-ranked against
    the original query to get the best results.

    Search results often come back with specific organizations named, especially if referencing the news. In these cases
    the organizations should be identified in Candid's data and links to their profiles **MUST** be included in final
    chat response to the user.

    Parameters
    ----------
    query : str
        Text describing a user's question or a description of investigative work which requires support from Candid's
        knowledge base
    sources : list[SourceNames]
        One or more sources of knowledge from different areas at Candid.
        * Candid Blog: Blog posts from Candid staff and trusted partners intended to help those in the sector or
        illuminate ongoing work
        * Candid Help: Candid FAQs to help user's get started with Candid's product platform and learning resources
        * Candid Learning: Training documents from Candid's subject matter experts
        * Candid News: News articles and press releases about real-time activity in the philanthropic sector
        * IssueLab Research Reports: Academic research reports about the social/philanthropic sector
        * YouTube Training: Transcripts from video-based training seminars from Candid's subject matter experts
    news_days_ago : int, optional
        How many days in the past to search for news articles, if a user is asking for recent trends then this value
        should be set lower >~ 10, by default 60

    Returns
    -------
    str
        Re-ranked document text
    """

    vector_queries, quasi_vector_queries = generate_queries(
        query=query,
        sources=sources,
        news_days_ago=news_days_ago
    )

    results = run_search(vector_searches=vector_queries, non_vector_searches=quasi_vector_queries)
    text_response = []
    response_sources = []
    for hit in map(process_hit, reranker(results, search_text=query)):
        text_response.append(hit.page_content)
        response_sources.append(hit)
    return '\n\n'.join(text_response), response_sources