from typing import Dict, Any from langchain_core.documents import Document from ask_candid.retrieval.sources.schema import ElasticSourceConfig, ElasticHitsResult from ask_candid.retrieval.sources.utils import get_context CandidBlogConfig = ElasticSourceConfig( index_name="search-semantic-candid-blog", text_fields=("content", "authors_text", "title_summary_tags") ) def process_blog_hit(hit: ElasticHitsResult) -> Document: excerpt = hit.source.get("excerpt", "") title = hit.source.get("title", "") # we only need to process long text content_with_context_txt = get_context("content", hit, context_length=12, add_context=False) authors = get_context("authors_text", hit, context_length=12, add_context=False) tags = hit.source.get("title_summary_tags", "") return Document( page_content='\n\n'.join([title, excerpt, content_with_context_txt, authors, tags]), metadata={ "title": title, "source": "Candid Blog", "source_id": hit.source["id"], "url": hit.source["link"] } ) def build_card_html(doc: Dict[str, Any], height_px: int = 200, show_chunks=False) -> str: url = f"{doc['link']}" fields = ["title", "excerpt"] fields_dict = {} fields_len = 0 for field in fields: if doc.get(field, None) is not None: fields_dict[field] = doc[field] fields_dict[field + "_txt"] = f"

{doc[field]}

" if (fields_len + len(doc[field])) > 999: rest_text_len = 999 - fields_len if rest_text_len > 0: fields_dict[field + "_txt"] = f"

{doc[field][:rest_text_len] + '[...]'}

" else: fields_dict[field + "_txt"] = f"{'[...]'}" fields_len = fields_len + len(doc[field]) else: fields_dict[field] = "" fields_dict[field + "_txt"] = "" html = f"""

Candid blog post: {doc['title']}

{fields_dict["excerpt_txt"]}

""" return html