Spaces:
Running
Running
from typing import List, Optional | |
import json | |
import gradio as gr | |
import requests | |
from .utils import html_format_doc | |
from .retrieval.up_orgs_keyword import organization_card_html | |
from .retrieval.elastic import reranker, get_query_results | |
from .retrieval.config import ALL_INDICES | |
from . import UP_QA_SEARCH_API | |
def run_search(search_text: str, indices: Optional[List[str]] = None): | |
results = get_query_results(search_text, indices=indices) | |
output = [] | |
for result in reranker(results): | |
source_name = None | |
if "news" in result.index: | |
source_name = "news" | |
elif "transactions" in result.index: | |
source_name = "transactions" | |
elif "organizations" in result.index: | |
source_name = "organizations" | |
elif "issuelab-elser" in result.index: | |
source_name = "issuelab" | |
# elif "issuelab" in result.index: | |
# source_name = "issuelab" | |
elif "youtube-elser" in result.index: | |
source_name = "youtube" | |
# elif "youtube" in result.index: | |
# source_name = "youtube" | |
elif "candid-blog-elser" in result.index: | |
source_name = "candid_blog" | |
# elif "candid-blog" in result.index: | |
# source_name = "candid_blog" | |
elif "candid-learning" in result.index: # TODO fix that | |
source_name = "candid_learning" | |
elif "candid-help-elser" in result.index: | |
source_name = "candid_help" | |
doc = html_format_doc(doc=result.source, source=source_name) | |
output.append(doc) | |
return f"<div>{''.join(output)}</div>" | |
def run_ks(search_text: str): | |
json_body = {"keyword": search_text, "rowCount": 10} | |
response = requests.post( | |
url=UP_QA_SEARCH_API["API_URL"], | |
json=json_body, | |
headers={ | |
"accept": "application/json", | |
"content-type": "application/json", | |
"x-api-key": UP_QA_SEARCH_API["API_KEY"] | |
}, | |
timeout=(5 * 60) | |
) | |
r_json = json.loads(response.text) | |
output_k = [] | |
if r_json.get("returnedOrgs", None) is not None: | |
for doc in r_json["returnedOrgs"]: | |
org = {} | |
org["candid_entity_id"] = doc.get("candidEntityID", "") | |
org["main_name"] = doc.get("orgName", "") | |
org["logo"] = doc.get("logo", "") | |
org["seal"] = doc.get("seal", {}) | |
org["city"] = doc.get("city", "") | |
org["admin1"] = doc.get("admin1", "") | |
org["country_name"] = doc.get("countryName", "") | |
org["taxonomy"] = doc.get("taxonomy", {}) | |
highlights = doc.get("highlights", []) | |
if highlights: | |
for h in highlights: | |
if h["field"] == "mission_statement": | |
org["mission_statement"] = "; ".join(h["highlights"]) | |
html = organization_card_html(org, 250) | |
output_k.append(html) | |
# Getting semantic results | |
output_s = run_search(search_text=search_text) | |
return f"<div>{''.join(output_k)}</div>", output_s | |
def build_search_tab() -> gr.Blocks: | |
with gr.Blocks(theme=gr.themes.Soft(), title="Semantic search") as demo: | |
gr.Markdown( | |
"<h1>Alpha demo: Semantic search</h1>" | |
"Search and ask questions of Candid's data together with casual language" | |
) | |
query = gr.Text(placeholder="Search", show_label=False) | |
with gr.Accordion(label="Advanced settings", open=False): | |
es_indices = gr.CheckboxGroup( | |
choices=list(ALL_INDICES), | |
value=list(ALL_INDICES), | |
label="Sources to include", | |
interactive=True | |
) | |
search = gr.Button("Search") | |
feed = gr.HTML() | |
# pylint: disable=no-member | |
search.click( | |
fn=run_search, | |
inputs=[query, es_indices], | |
outputs=[feed], | |
api_name=False, | |
queue=True | |
) | |
return demo | |
def build_ks_tab() -> gr.Blocks: | |
with gr.Blocks(theme=gr.themes.Soft(), title="Semantic search") as demo: | |
gr.Markdown( | |
"<h1>Alpha demo: Keyword versus Semantic search</h1>" | |
"Compare current search results versus semantic search results" | |
) | |
query = gr.TextArea(placeholder="Search", show_label=False, lines=1) | |
ask = gr.Button("Search Unified Platform organizations") | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("<h2>Keyword results</h2>") | |
feed_k = gr.HTML() | |
with gr.Column(): | |
gr.Markdown("<h2>Semantic results</h2>") | |
feed_s = gr.HTML() | |
# pylint: disable=no-member | |
ask.click( | |
fn=run_ks, | |
inputs=[query], | |
outputs=[feed_k, feed_s], | |
api_name=False, | |
queue=True | |
) | |
return demo | |