Spaces:
Running
Running
from langchain_core.documents import Document | |
from ask_candid.retrieval.sources.schema import ElasticSourceConfig, ElasticHitsResult | |
CandidNewsConfig = ElasticSourceConfig( | |
index_name="news_1", | |
text_fields=("title", "content") | |
) | |
def process_news_hit(hit: ElasticHitsResult) -> Document: | |
return Document( | |
page_content='\n\n'.join([hit.source.get("title", ""), hit.source.get("content", "")]), | |
metadata={ | |
"title": hit.source.get("title", ""), | |
"source": hit.source.get("site_name") or "Candid News", | |
"source_id": hit.source["id"], | |
"url": hit.source.get("link", "") | |
} | |
) | |