File size: 658 Bytes
f86d7f2
cc80c3d
f86d7f2
cc80c3d
 
 
 
 
f86d7f2
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from langchain_core.documents import Document

from ask_candid.retrieval.sources.schema import ElasticSourceConfig, ElasticHitsResult

CandidNewsConfig = ElasticSourceConfig(
    index_name="news_1",
    text_fields=("title", "content")
)


def process_news_hit(hit: ElasticHitsResult) -> Document:
    return Document(
        page_content='\n\n'.join([hit.source.get("title", ""), hit.source.get("content", "")]),
        metadata={
            "title": hit.source.get("title", ""),
            "source": hit.source.get("site_name") or "Candid News",
            "source_id": hit.source["id"],
            "url": hit.source.get("link", "")
        }
    )