Spaces:

CandidAI
/

ask-candid

Running

App Files Files Community

brainsqueeze commited on Apr 10

Commit

ce9dc52

verified ·

1 Parent(s): ce98665

updated wrong utils.py

Browse files

Files changed (1) hide show

ask_candid/utils.py +84 -37

ask_candid/utils.py CHANGED Viewed

@@ -1,48 +1,95 @@
-from ask_candid.retrieval.sources.schema import ElasticHitsResult
-def get_context(field_name: str, hit: ElasticHitsResult, context_length: int = 1024, add_context: bool = True) -> str:
-    """Pads the relevant chunk of text with context before and after
     Parameters
     ----------
-    field_name : str
-        a field with the long text that was chunked into pieces
-    hit : ElasticHitsResult
-    context_length : int, optional
-        length of text to add before and after the chunk, by default 1024
     Returns
     -------
     str
-        longer chunks stuffed together
     """
-    chunks = []
-    # NOTE chunks have tokens, long text is a normal text, but may contain html that also gets weird after tokenization
-    long_text = hit.source.get(field_name) or ""
-    long_text = long_text.lower()
-    inner_hits_field = f"embeddings.{field_name}.chunks"
-    found_chunks = hit.inner_hits.get(inner_hits_field, {})
-    if found_chunks:
-        hits = found_chunks.get("hits", {}).get("hits", [])
-        for h in hits:
-            chunk = h.get("fields", {})[inner_hits_field][0]["chunk"][0]
-            # cutting the middle because we may have tokenizing artifacts there
-            chunk = chunk[3: -3]
-            if add_context:
-                # Find the start and end indices of the chunk in the large text
-                start_index = long_text.find(chunk[:20])
-                # Chunk is found
-                if start_index != -1:
-                    end_index = start_index + len(chunk)
-                    pre_start_index = max(0, start_index - context_length)
-                    post_end_index = min(len(long_text), end_index + context_length)
-                    chunks.append(long_text[pre_start_index:post_end_index])
-            else:
-                chunks.append(chunk)
-    return '\n\n'.join(chunks)

+from typing import List, Dict, Union, Any
+from uuid import uuid4
+from langchain_core.documents import Document
+from ask_candid.retrieval.sources import (
+    candid_blog,
+    candid_help,
+    candid_learning,
+    issuelab,
+    youtube
+)
+def html_format_doc(doc: Dict[str, Any], source: str, show_chunks=False) -> str:
+    height_px = 200
+    html = ""
+    if source == "news":
+        # html = news.article_card_html(doc, height_px, show_chunks)
+        pass
+    elif source == "transactions":
+        pass
+    elif source == "organizations":
+        pass
+    elif source == "issuelab":
+        html = issuelab.issuelab_card_html(doc, height_px, show_chunks)
+    elif source == "youtube":
+        html = youtube.build_card_html(doc, 400, show_chunks)
+    elif source == "candid_blog":
+        html = candid_blog.build_card_html(doc, height_px, show_chunks)
+    elif source == "candid_learning":
+        html = candid_learning.build_card_html(doc, height_px, show_chunks)
+    elif source == "candid_help":
+        html = candid_help.build_card_html(doc, height_px, show_chunks)
+    return html
+def html_format_docs_chat(docs: List[Document]) -> str:
+    """Formats Candid sources
     Parameters
     ----------
+    docs : List[Document]
+        Retrieved documents for context
     Returns
     -------
     str
+        Formatted HTML
     """
+    html = ""
+    if docs:
+        docs_html = []
+        for doc in docs:
+            s_name = doc.metadata.get("source", "Source")
+            s_url = doc.metadata.get("url", "URL")
+            s_html = (
+                "<span class='source-item'>"
+                f"<a href='{s_url}' target='_blank' rel='noreferrer' class='ssearch-source'>"
+                f"{doc.metadata['title']} &vert; {s_name}</a></span>"
+            )
+            docs_html.append(s_html)
+        html = f"<h2>Candid Resources</h2><div id='ssearch-sources'>{'<br>'.join(docs_html)}</div>"
+    return html
+def format_chat_ag_response(chatbot: List[Any]) -> List[Any]:
+    """If we called retriever, we appended sources as as one more message. Here we concatinate HTML of sources
+        with the AI response
+    Returns:
+        _type_: updated chatbot message as HTML
+    """
+    sources = ""
+    if chatbot:
+        title = (chatbot[-1].get("metadata") or {}).get("title", None)
+        if title == "Sources HTML":
+            sources = chatbot[-1]["content"]
+            chatbot.pop(-1)
+            chatbot[-1]["content"] = chatbot[-1]["content"] + sources
+    return chatbot
+def valid_inputs(*args) -> bool:
+    return any(a is not None or (isinstance(a, str) and a.strip() != '') for a in args)
+def get_session_id(thread_id: Union[str, None]) -> str:
+    if not thread_id:
+        thread_id = uuid4().hex
+    return thread_id