Spaces:
Running
Running
from typing import Dict, Any | |
from langchain_core.documents import Document | |
from ask_candid.retrieval.sources.schema import ElasticSourceConfig, ElasticHitsResult | |
from ask_candid.retrieval.sources.utils import get_context | |
CandidBlogConfig = ElasticSourceConfig( | |
index_name="search-semantic-candid-blog", | |
text_fields=("content", "authors_text", "title_summary_tags") | |
) | |
def process_blog_hit(hit: ElasticHitsResult) -> Document: | |
excerpt = hit.source.get("excerpt", "") | |
title = hit.source.get("title", "") | |
# we only need to process long text | |
content_with_context_txt = get_context("content", hit, context_length=12, add_context=False) | |
authors = get_context("authors_text", hit, context_length=12, add_context=False) | |
tags = hit.source.get("title_summary_tags", "") | |
return Document( | |
page_content='\n\n'.join([title, excerpt, content_with_context_txt, authors, tags]), | |
metadata={ | |
"title": title, | |
"source": "Candid Blog", | |
"source_id": hit.source["id"], | |
"url": hit.source["link"] | |
} | |
) | |
def build_card_html(doc: Dict[str, Any], height_px: int = 200, show_chunks=False) -> str: | |
url = f"{doc['link']}" | |
fields = ["title", "excerpt"] | |
fields_dict = {} | |
fields_len = 0 | |
for field in fields: | |
if doc.get(field, None) is not None: | |
fields_dict[field] = doc[field] | |
fields_dict[field + "_txt"] = f"<div>{doc[field]}</div>" | |
if (fields_len + len(doc[field])) > 999: | |
rest_text_len = 999 - fields_len | |
if rest_text_len > 0: | |
fields_dict[field + "_txt"] = f"<div>{doc[field][:rest_text_len] + '[...]'}</div>" | |
else: fields_dict[field + "_txt"] = f"<span>{'[...]'}</span>" | |
fields_len = fields_len + len(doc[field]) | |
else: | |
fields_dict[field] = "" | |
fields_dict[field + "_txt"] = "" | |
html = f""" | |
<div style='height: {height_px}px; padding: 5px;'> | |
<div style='height: {height_px}px; border: 1px solid #febe10;'> | |
<span style='padding-left: 10px; display: inline-block; width: 100%;'> | |
<div> | |
<span> | |
<b>Candid blog post:</b> | |
<a href='{url}' target='_blank' style='text-decoration: none;'> | |
{doc['title']} | |
</a> | |
</span> | |
<br> | |
<br> | |
{fields_dict["excerpt_txt"]} | |
</div> | |
</span> | |
</div> | |
</div> | |
""" | |
return html | |