from typing import List, Optional import json import gradio as gr import requests from .utils import html_format_doc from .retrieval.up_orgs_keyword import organization_card_html from .retrieval.elastic import reranker, get_query_results from .retrieval.config import ALL_INDICES from . import UP_QA_SEARCH_API def run_search(search_text: str, indices: Optional[List[str]] = None): results = get_query_results(search_text, indices=indices) output = [] for result in reranker(results): source_name = None if "news" in result.index: source_name = "news" elif "transactions" in result.index: source_name = "transactions" elif "organizations" in result.index: source_name = "organizations" elif "issuelab-elser" in result.index: source_name = "issuelab" # elif "issuelab" in result.index: # source_name = "issuelab" elif "youtube-elser" in result.index: source_name = "youtube" # elif "youtube" in result.index: # source_name = "youtube" elif "candid-blog-elser" in result.index: source_name = "candid_blog" # elif "candid-blog" in result.index: # source_name = "candid_blog" elif "candid-learning" in result.index: # TODO fix that source_name = "candid_learning" elif "candid-help-elser" in result.index: source_name = "candid_help" doc = html_format_doc(doc=result.source, source=source_name) output.append(doc) return f"
{''.join(output)}
" def run_ks(search_text: str): json_body = {"keyword": search_text, "rowCount": 10} response = requests.post( url=UP_QA_SEARCH_API["API_URL"], json=json_body, headers={ "accept": "application/json", "content-type": "application/json", "x-api-key": UP_QA_SEARCH_API["API_KEY"] }, timeout=(5 * 60) ) r_json = json.loads(response.text) output_k = [] if r_json.get("returnedOrgs", None) is not None: for doc in r_json["returnedOrgs"]: org = {} org["candid_entity_id"] = doc.get("candidEntityID", "") org["main_name"] = doc.get("orgName", "") org["logo"] = doc.get("logo", "") org["seal"] = doc.get("seal", {}) org["city"] = doc.get("city", "") org["admin1"] = doc.get("admin1", "") org["country_name"] = doc.get("countryName", "") org["taxonomy"] = doc.get("taxonomy", {}) highlights = doc.get("highlights", []) if highlights: for h in highlights: if h["field"] == "mission_statement": org["mission_statement"] = "; ".join(h["highlights"]) html = organization_card_html(org, 250) output_k.append(html) # Getting semantic results output_s = run_search(search_text=search_text) return f"
{''.join(output_k)}
", output_s def build_search_tab() -> gr.Blocks: with gr.Blocks(theme=gr.themes.Soft(), title="Semantic search") as demo: gr.Markdown( "

Alpha demo: Semantic search

" "Search and ask questions of Candid's data together with casual language" ) query = gr.Text(placeholder="Search", show_label=False) with gr.Accordion(label="Advanced settings", open=False): es_indices = gr.CheckboxGroup( choices=list(ALL_INDICES), value=list(ALL_INDICES), label="Sources to include", interactive=True ) search = gr.Button("Search") feed = gr.HTML() # pylint: disable=no-member search.click( fn=run_search, inputs=[query, es_indices], outputs=[feed], api_name=False, queue=True ) return demo def build_ks_tab() -> gr.Blocks: with gr.Blocks(theme=gr.themes.Soft(), title="Semantic search") as demo: gr.Markdown( "

Alpha demo: Keyword versus Semantic search

" "Compare current search results versus semantic search results" ) query = gr.TextArea(placeholder="Search", show_label=False, lines=1) ask = gr.Button("Search Unified Platform organizations") with gr.Row(): with gr.Column(): gr.Markdown("

Keyword results

") feed_k = gr.HTML() with gr.Column(): gr.Markdown("

Semantic results

") feed_s = gr.HTML() # pylint: disable=no-member ask.click( fn=run_ks, inputs=[query], outputs=[feed_k, feed_s], api_name=False, queue=True ) return demo