brainsqueeze's picture
v2 of public chat
ef088c2 verified
raw
history blame
3.03 kB
from langchain_core.documents import Document
from langchain_core.tools import tool
from ask_candid.base.retrieval.knowledge_base import (
SourceNames,
generate_queries,
run_search,
reranker,
process_hit
)
@tool(response_format="content_and_artifact")
def search_candid_knowledge_base(
query: str,
sources: list[SourceNames],
news_days_ago: int = 60
) -> tuple[str, list[Document]]:
"""Search Candid's subject matter expert knowledge base to find answers about the social and philanthropic sector.
This knowledge includes help articles and video training sessions from Candid's subject matter experts, blog posts
about the sector from Candid staff and trusted partner authors, research documents about the sector and news
articles curated about activity happening in the sector around the world.
Searches are performed through a combination of vector and keyword searching. Results are then re-ranked against
the original query to get the best results.
Search results often come back with specific organizations named, especially if referencing the news. In these cases
the organizations should be identified in Candid's data and links to their profiles **MUST** be included in final
chat response to the user.
Parameters
----------
query : str
Text describing a user's question or a description of investigative work which requires support from Candid's
knowledge base
sources : list[SourceNames]
One or more sources of knowledge from different areas at Candid.
* Candid Blog: Blog posts from Candid staff and trusted partners intended to help those in the sector or
illuminate ongoing work
* Candid Help: Candid FAQs to help user's get started with Candid's product platform and learning resources
* Candid Learning: Training documents from Candid's subject matter experts
* Candid News: News articles and press releases about real-time activity in the philanthropic sector
* IssueLab Research Reports: Academic research reports about the social/philanthropic sector
* YouTube Training: Transcripts from video-based training seminars from Candid's subject matter experts
news_days_ago : int, optional
How many days in the past to search for news articles, if a user is asking for recent trends then this value
should be set lower >~ 10, by default 60
Returns
-------
str
Re-ranked document text
"""
vector_queries, quasi_vector_queries = generate_queries(
query=query,
sources=sources,
news_days_ago=news_days_ago
)
results = run_search(vector_searches=vector_queries, non_vector_searches=quasi_vector_queries)
text_response = []
response_sources = []
for hit in map(process_hit, reranker(results, search_text=query)):
text_response.append(hit.page_content)
response_sources.append(hit)
return '\n\n'.join(text_response), response_sources