Spaces:
Running
Running
Delete tools
Browse files- tools/__init__.py +0 -0
- tools/common/__init__.py +0 -0
- tools/common/base/__init__.py +0 -0
- tools/common/base/api_base.py +0 -42
- tools/common/org_search_component.py +0 -223
- tools/config.py +0 -5
- tools/org_seach.py +0 -188
- tools/org_search_component.py +0 -223
- tools/question_reformulation.py +0 -44
tools/__init__.py
DELETED
File without changes
|
tools/common/__init__.py
DELETED
File without changes
|
tools/common/base/__init__.py
DELETED
File without changes
|
tools/common/base/api_base.py
DELETED
@@ -1,42 +0,0 @@
|
|
1 |
-
from typing import Dict, Optional, Any
|
2 |
-
|
3 |
-
from urllib3.util.retry import Retry
|
4 |
-
from requests.adapters import HTTPAdapter
|
5 |
-
import requests
|
6 |
-
|
7 |
-
|
8 |
-
class BaseAPI:
|
9 |
-
|
10 |
-
def __init__(
|
11 |
-
self,
|
12 |
-
url: str,
|
13 |
-
headers: Optional[Dict[str, Any]] = None,
|
14 |
-
total_retries: int = 3,
|
15 |
-
backoff_factor: int = 2
|
16 |
-
) -> None:
|
17 |
-
total_retries = max(total_retries, 10)
|
18 |
-
|
19 |
-
adapter = HTTPAdapter(
|
20 |
-
max_retries=Retry(
|
21 |
-
total=total_retries,
|
22 |
-
status_forcelist=[429, 500, 502, 503, 504],
|
23 |
-
allowed_methods=frozenset({"HEAD", "GET", "POST", "OPTIONS"}),
|
24 |
-
backoff_factor=backoff_factor,
|
25 |
-
)
|
26 |
-
)
|
27 |
-
self.session = requests.Session()
|
28 |
-
self.session.mount("https://", adapter)
|
29 |
-
self.session.mount("http://", adapter)
|
30 |
-
|
31 |
-
self.__url = url
|
32 |
-
self.__headers = headers
|
33 |
-
|
34 |
-
def get(self, **request_kwargs):
|
35 |
-
r = self.session.get(url=self.__url, headers=self.__headers, params=request_kwargs, timeout=30)
|
36 |
-
r.raise_for_status()
|
37 |
-
return r.json()
|
38 |
-
|
39 |
-
def post(self, payload: Dict[str, Any]):
|
40 |
-
r = self.session.post(url=self.__url, headers=self.__headers, json=payload, timeout=30)
|
41 |
-
r.raise_for_status()
|
42 |
-
return r.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/common/org_search_component.py
DELETED
@@ -1,223 +0,0 @@
|
|
1 |
-
from typing import List, Tuple, Optional, Any
|
2 |
-
import os
|
3 |
-
|
4 |
-
import gradio as gr
|
5 |
-
|
6 |
-
try:
|
7 |
-
from base.api_base import BaseAPI
|
8 |
-
except ImportError:
|
9 |
-
from .base.api_base import BaseAPI
|
10 |
-
|
11 |
-
|
12 |
-
class OrgSearch(BaseAPI):
|
13 |
-
|
14 |
-
def __init__(self):
|
15 |
-
super().__init__(
|
16 |
-
url=f"{os.getenv('CDS_API_URL')}/v1/organization/search",
|
17 |
-
headers={"x-api-key": os.getenv('CDS_API_KEY')}
|
18 |
-
)
|
19 |
-
|
20 |
-
def __call__(self, name: str, **kwargs):
|
21 |
-
is_valid = False
|
22 |
-
|
23 |
-
payload = {
|
24 |
-
"names": [{
|
25 |
-
"value": name,
|
26 |
-
"type": "main"
|
27 |
-
}],
|
28 |
-
"status": "authorized"
|
29 |
-
}
|
30 |
-
|
31 |
-
if kwargs.get("ein"):
|
32 |
-
ein = kwargs.get("ein")
|
33 |
-
if "-" not in ein:
|
34 |
-
ein = f"{ein[:2]}-{ein[2:]}"
|
35 |
-
payload["ids"] = [{
|
36 |
-
"value": ein,
|
37 |
-
"type": "ein"
|
38 |
-
}]
|
39 |
-
is_valid = True
|
40 |
-
|
41 |
-
if kwargs.get("street") or kwargs.get("city") or kwargs.get("state") or kwargs.get("postal_code"):
|
42 |
-
payload["addresses"] = [{
|
43 |
-
"street1": kwargs.get("street") or "",
|
44 |
-
"city": kwargs.get("city") or "",
|
45 |
-
"state": kwargs.get("state") or "",
|
46 |
-
"postal_code": kwargs.get("postal_code") or ""
|
47 |
-
}]
|
48 |
-
is_valid = True
|
49 |
-
|
50 |
-
if not is_valid:
|
51 |
-
return None
|
52 |
-
|
53 |
-
result = self.post(payload=payload)
|
54 |
-
return result.get("payload", [])
|
55 |
-
|
56 |
-
|
57 |
-
search_org = OrgSearch()
|
58 |
-
|
59 |
-
|
60 |
-
def callback_to_state(event: gr.SelectData, state: gr.State) -> Tuple[List[Any], int]:
|
61 |
-
"""Handles DataFrame `select` events. Updates the internal state for either the recipient or funder based on
|
62 |
-
selection. Also sends along the selected Candid entity ID to the proposal generation tab.
|
63 |
-
|
64 |
-
Parameters
|
65 |
-
----------
|
66 |
-
event : gr.SelectData
|
67 |
-
state : gr.State
|
68 |
-
|
69 |
-
Returns
|
70 |
-
-------
|
71 |
-
Tuple[List[Any], int]
|
72 |
-
(Updated state, Candid entity ID)
|
73 |
-
"""
|
74 |
-
|
75 |
-
row, _ = event.index
|
76 |
-
|
77 |
-
if len(state) == 0:
|
78 |
-
return [], None
|
79 |
-
|
80 |
-
# the state should be a nested list of lists
|
81 |
-
# if the state is a single list with non-list elements then we just want a pass-through
|
82 |
-
if all(isinstance(s, list) for s in state):
|
83 |
-
return state[row], state[row][0]
|
84 |
-
return state, state[0]
|
85 |
-
|
86 |
-
|
87 |
-
def lookup_organization(
|
88 |
-
name: str,
|
89 |
-
ein: Optional[str] = None,
|
90 |
-
# street: Optional[str] = None,
|
91 |
-
city: Optional[str] = None,
|
92 |
-
state: Optional[str] = None,
|
93 |
-
postal: Optional[str] = None,
|
94 |
-
) -> Tuple[List[List[str]], List[List[str]]]:
|
95 |
-
"""Performs a simple search using the CDS organization search API. Results are sent to the DataFrame table and also
|
96 |
-
populate the state for the recipient information.
|
97 |
-
|
98 |
-
Parameters
|
99 |
-
----------
|
100 |
-
name : str
|
101 |
-
Org name
|
102 |
-
ein : Optional[str], optional
|
103 |
-
Org EIN, by default None
|
104 |
-
street : Optional[str], optional
|
105 |
-
Street address, by default None
|
106 |
-
city : Optional[str], optional
|
107 |
-
Address city, by default None
|
108 |
-
state : Optional[str], optional
|
109 |
-
Address state, by default None
|
110 |
-
postal : Optional[str], optional
|
111 |
-
Address postal code, by default None
|
112 |
-
|
113 |
-
Returns
|
114 |
-
-------
|
115 |
-
Tuple[List[List[str]], List[List[str]]]
|
116 |
-
(recip data, recip data)
|
117 |
-
|
118 |
-
Raises
|
119 |
-
------
|
120 |
-
gr.Error
|
121 |
-
Raised if not enough information was entered to run a search
|
122 |
-
gr.Error
|
123 |
-
Raised if no search results were returned
|
124 |
-
"""
|
125 |
-
|
126 |
-
results = search_org(name=name, ein=ein, city=city, state=state, postal=postal)
|
127 |
-
if results is None:
|
128 |
-
raise gr.Error("You must provide a name, and either an EIN or an address.")
|
129 |
-
if not results:
|
130 |
-
raise gr.Error("No organizations could be found. Please refine the search criteria.")
|
131 |
-
|
132 |
-
data = []
|
133 |
-
for applicant_data in results:
|
134 |
-
address = applicant_data.get("addresses", [{}])[0].get("normalized")
|
135 |
-
seal = (applicant_data.get("current_seal", {}) or {}).get("image")
|
136 |
-
|
137 |
-
record = [
|
138 |
-
applicant_data.get('candid_entity_id'),
|
139 |
-
applicant_data.get('main_sort_name'),
|
140 |
-
address
|
141 |
-
]
|
142 |
-
|
143 |
-
if seal:
|
144 |
-
record.append(f"")
|
145 |
-
else:
|
146 |
-
record.append("")
|
147 |
-
|
148 |
-
data.append(record)
|
149 |
-
return data, data
|
150 |
-
|
151 |
-
|
152 |
-
def render(org_id_element: Optional[gr.Blocks] = None) -> Tuple[gr.Blocks, gr.State]:
|
153 |
-
"""Main blocks build and render function.
|
154 |
-
|
155 |
-
Parameters
|
156 |
-
----------
|
157 |
-
org_id_element : Optional[gr.Blocks], optional
|
158 |
-
Callback Gradio element, by default None
|
159 |
-
|
160 |
-
Returns
|
161 |
-
-------
|
162 |
-
Tuple[gr.Blocks, gr.State]
|
163 |
-
(component, selected org state)
|
164 |
-
"""
|
165 |
-
|
166 |
-
with gr.Blocks() as component:
|
167 |
-
org_data = gr.State([])
|
168 |
-
selected_org_data = gr.State([])
|
169 |
-
|
170 |
-
with gr.Row():
|
171 |
-
with gr.Column(scale=2):
|
172 |
-
name = gr.Textbox(label="Name of organization", lines=1)
|
173 |
-
ein = gr.Textbox(label="EIN of organization", lines=1)
|
174 |
-
with gr.Column(scale=3):
|
175 |
-
with gr.Group():
|
176 |
-
with gr.Row():
|
177 |
-
with gr.Column():
|
178 |
-
# street = gr.Textbox(label="Street address", lines=1)
|
179 |
-
city = gr.Textbox(label="City", lines=1)
|
180 |
-
with gr.Column():
|
181 |
-
state = gr.Textbox(label="State/province", lines=1)
|
182 |
-
postal = gr.Textbox(label="Postal code", lines=1)
|
183 |
-
|
184 |
-
search_button = gr.Button("Find organization", variant="primary")
|
185 |
-
org_info = gr.DataFrame(
|
186 |
-
label="Organizations",
|
187 |
-
type="array",
|
188 |
-
headers=["Candid ID", "Name", "Address", "Seal"],
|
189 |
-
col_count=(4, "fixed"),
|
190 |
-
datatype=["number", "str", "str", "markdown"],
|
191 |
-
wrap=True,
|
192 |
-
column_widths=["20%", "30%", "30%", "20%"]
|
193 |
-
)
|
194 |
-
|
195 |
-
if org_id_element is None:
|
196 |
-
org_id_element = gr.Textbox(label="Selected Candid entity ID", lines=1)
|
197 |
-
|
198 |
-
# pylint: disable=no-member
|
199 |
-
search_button.click(
|
200 |
-
fn=lambda name, ein, city, state, postal: lookup_organization(
|
201 |
-
name=name,
|
202 |
-
ein=ein,
|
203 |
-
# street=street,
|
204 |
-
city=city,
|
205 |
-
state=state,
|
206 |
-
postal=postal
|
207 |
-
),
|
208 |
-
# inputs=[name, ein, street, city, state, postal],
|
209 |
-
inputs=[name, ein, city, state, postal],
|
210 |
-
outputs=[org_info, org_data],
|
211 |
-
api_name=False,
|
212 |
-
show_api=False
|
213 |
-
)
|
214 |
-
|
215 |
-
# pylint: disable=no-member
|
216 |
-
org_info.select(
|
217 |
-
fn=callback_to_state,
|
218 |
-
inputs=org_data,
|
219 |
-
outputs=[selected_org_data, org_id_element],
|
220 |
-
api_name=False,
|
221 |
-
show_api=False
|
222 |
-
)
|
223 |
-
return component, selected_org_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/config.py
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
CDS_API = {
|
3 |
-
'CDS_API_URL': os.getenv('CDS_API_URL'),
|
4 |
-
'CDS_API_KEY': os.getenv('CDS_API_KEY')
|
5 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
tools/org_seach.py
DELETED
@@ -1,188 +0,0 @@
|
|
1 |
-
from typing import List
|
2 |
-
import re
|
3 |
-
|
4 |
-
from fuzzywuzzy import fuzz
|
5 |
-
|
6 |
-
from langchain.output_parsers.openai_tools import JsonOutputToolsParser
|
7 |
-
from langchain_openai.chat_models import ChatOpenAI
|
8 |
-
from langchain_core.runnables import RunnableSequence
|
9 |
-
from langchain_core.prompts import ChatPromptTemplate
|
10 |
-
from pydantic import BaseModel
|
11 |
-
|
12 |
-
try:
|
13 |
-
from common.org_search_component import OrgSearch
|
14 |
-
except ImportError:
|
15 |
-
from ...common.org_search_component import OrgSearch
|
16 |
-
|
17 |
-
search = OrgSearch()
|
18 |
-
|
19 |
-
|
20 |
-
class OrganizationNames(BaseModel):
|
21 |
-
orgnames: List[str]
|
22 |
-
|
23 |
-
|
24 |
-
def extract_org_links_from_chatbot(chatbot_output: str):
|
25 |
-
"""
|
26 |
-
Extracts a list of organization names from the provided text.
|
27 |
-
|
28 |
-
Args:
|
29 |
-
chatbot_output (str):The chatbot output containing organization names and other content.
|
30 |
-
|
31 |
-
Returns:
|
32 |
-
list: A list of organization names extracted from the text.
|
33 |
-
|
34 |
-
Raises:
|
35 |
-
ValueError: If parsing fails or if an unexpected output format is received.
|
36 |
-
"""
|
37 |
-
prompt = """Extract only the names of officially recognized organizations, foundations, and government entities from the text below. Do not include any entries that contain descriptions, regional identifiers, or explanations within parentheses or following the name. Strictly exclude databases, resources, crowdfunding platforms, and general terms. Provide the output only in the specified JSON format.
|
38 |
-
|
39 |
-
input text below:
|
40 |
-
|
41 |
-
```{chatbot_output}``
|
42 |
-
|
43 |
-
output format:
|
44 |
-
{{
|
45 |
-
'orgnames' : [list of organization names without any additional descriptions or identifiers]
|
46 |
-
}}
|
47 |
-
|
48 |
-
"""
|
49 |
-
|
50 |
-
try:
|
51 |
-
parser = JsonOutputToolsParser()
|
52 |
-
llm = ChatOpenAI(model="gpt-4o").bind_tools([OrganizationNames])
|
53 |
-
prompt = ChatPromptTemplate.from_template(prompt)
|
54 |
-
chain = RunnableSequence(prompt, llm, parser)
|
55 |
-
|
56 |
-
# Run the chain with the input data
|
57 |
-
result = chain.invoke({"chatbot_output": chatbot_output})
|
58 |
-
|
59 |
-
# Extract the organization names from the output
|
60 |
-
output_list = result[0]["args"].get("orgnames", [])
|
61 |
-
|
62 |
-
# Validate output format
|
63 |
-
if not isinstance(output_list, list):
|
64 |
-
raise ValueError("Unexpected output format: 'orgnames' should be a list")
|
65 |
-
|
66 |
-
return output_list
|
67 |
-
|
68 |
-
except Exception as e:
|
69 |
-
# Log or print the error as needed for debugging
|
70 |
-
print(f"text does not have any organization: {e}")
|
71 |
-
return []
|
72 |
-
|
73 |
-
|
74 |
-
def is_similar(name: str, list_of_dict: list, threshold: int = 80):
|
75 |
-
"""
|
76 |
-
Returns True if `name` is similar to any names in `list_of_dict` based on a similarity threshold.
|
77 |
-
"""
|
78 |
-
try:
|
79 |
-
for item in list_of_dict:
|
80 |
-
try:
|
81 |
-
# Attempt to calculate similarity score
|
82 |
-
similarity = fuzz.ratio(name.lower(), item["name"].lower())
|
83 |
-
if similarity >= threshold:
|
84 |
-
return True
|
85 |
-
except KeyError:
|
86 |
-
# Handle cases where 'name' key might be missing in dictionary
|
87 |
-
print(f"KeyError: Missing 'name' key in dictionary item {item}")
|
88 |
-
continue
|
89 |
-
except AttributeError:
|
90 |
-
# Handle non-string name values in dictionary items
|
91 |
-
print(f"AttributeError: Non-string 'name' in dictionary item {item}")
|
92 |
-
continue
|
93 |
-
except TypeError as e:
|
94 |
-
# Handle cases where input types are incorrect
|
95 |
-
print(f"TypeError: {e}")
|
96 |
-
return False
|
97 |
-
|
98 |
-
return False
|
99 |
-
|
100 |
-
|
101 |
-
def generate_org_link_dict(org_names_list: list):
|
102 |
-
"""
|
103 |
-
Maps organization names to their Candid profile URLs if available.
|
104 |
-
|
105 |
-
For each organization in `output_list`, this function attempts to retrieve a matching profile
|
106 |
-
using `search_org`. If a similar name is found and a Candid entity ID is available, it constructs
|
107 |
-
a profile URL. If no ID or similar match is found, or if an error occurs, it assigns an empty string.
|
108 |
-
|
109 |
-
Args:
|
110 |
-
output_list (list): List of organization names (str) to retrieve Candid profile links for.
|
111 |
-
|
112 |
-
Returns:
|
113 |
-
dict: Dictionary with organization names as keys and Candid profile URLs or empty strings as values.
|
114 |
-
|
115 |
-
Example:
|
116 |
-
get_org_link(['New York-Presbyterian Hospital'])
|
117 |
-
# {'New York-Presbyterian Hospital': 'https://app.candid.org/profile/6915255'}
|
118 |
-
"""
|
119 |
-
link_dict = {}
|
120 |
-
|
121 |
-
for org in org_names_list:
|
122 |
-
try:
|
123 |
-
# Attempt to retrieve organization data
|
124 |
-
response = search(org)
|
125 |
-
|
126 |
-
# Check if there is a valid response and if names are similar
|
127 |
-
if response and is_similar(org, response[0].get("names", "")):
|
128 |
-
# Try to get the Candid entity ID and construct the URL
|
129 |
-
candid_entity_id = response[0].get("candid_entity_id")
|
130 |
-
if candid_entity_id:
|
131 |
-
link_dict[org] = (
|
132 |
-
f"https://app.candid.org/profile/{candid_entity_id}"
|
133 |
-
)
|
134 |
-
else:
|
135 |
-
link_dict[org] = "" # No ID found, set empty string
|
136 |
-
else:
|
137 |
-
link_dict[org] = "" # No similar match found
|
138 |
-
|
139 |
-
except KeyError as e:
|
140 |
-
# Handle missing keys in the response dictionary
|
141 |
-
print(f"KeyError encountered for organization '{org}': {e}")
|
142 |
-
link_dict[org] = ""
|
143 |
-
|
144 |
-
except Exception as e:
|
145 |
-
# Catch any other unexpected errors
|
146 |
-
|
147 |
-
print(f"An error occurred for organization '{org}': {e}")
|
148 |
-
link_dict[org] = ""
|
149 |
-
|
150 |
-
return link_dict
|
151 |
-
|
152 |
-
|
153 |
-
def embed_org_links_in_text(input_text: str, org_link_dict: dict):
|
154 |
-
"""
|
155 |
-
Replaces organization names in `text` with links from `link_dict` and appends a Candid info message.
|
156 |
-
|
157 |
-
Args:
|
158 |
-
text (str): The text containing organization names.
|
159 |
-
link_dict (dict): Mapping of organization names to URLs.
|
160 |
-
|
161 |
-
Returns:
|
162 |
-
str: Updated text with linked organization names and an appended Candid message.
|
163 |
-
"""
|
164 |
-
try:
|
165 |
-
for org_name, url in org_link_dict.items():
|
166 |
-
if url: # Only proceed if the URL is not empty
|
167 |
-
regex_pattern = re.compile(re.escape(org_name))
|
168 |
-
input_text = regex_pattern.sub(
|
169 |
-
repl=f"<a href={url} target='_blank' rel='noreferrer' class='candid-org-link'>{org_name}</a>",
|
170 |
-
string=input_text
|
171 |
-
)
|
172 |
-
|
173 |
-
# Append Candid information message at the end
|
174 |
-
input_text += "<p class='candid-app-link'> Visit <a href=https://app.candid.org/ target='_blank' rel='noreferrer' class='candid-org-link'>Candid</a> to get nonprofit information you need.</p>"
|
175 |
-
|
176 |
-
except TypeError as e:
|
177 |
-
print(f"TypeError encountered: {e}")
|
178 |
-
return input_text
|
179 |
-
|
180 |
-
except re.error as e:
|
181 |
-
print(f"Regex error encountered for '{org_name}': {e}")
|
182 |
-
return input_text
|
183 |
-
|
184 |
-
except Exception as e:
|
185 |
-
print(f"Unexpected error: {e}")
|
186 |
-
return input_text
|
187 |
-
|
188 |
-
return input_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/org_search_component.py
DELETED
@@ -1,223 +0,0 @@
|
|
1 |
-
from typing import List, Tuple, Optional, Any
|
2 |
-
import os
|
3 |
-
|
4 |
-
import gradio as gr
|
5 |
-
|
6 |
-
try:
|
7 |
-
from base.api_base import BaseAPI
|
8 |
-
except ImportError:
|
9 |
-
from .base.api_base import BaseAPI
|
10 |
-
|
11 |
-
|
12 |
-
class OrgSearch(BaseAPI):
|
13 |
-
|
14 |
-
def __init__(self):
|
15 |
-
super().__init__(
|
16 |
-
url=f"{os.getenv('CDS_API_URL')}/v1/organization/search",
|
17 |
-
headers={"x-api-key": os.getenv('CDS_API_KEY')}
|
18 |
-
)
|
19 |
-
|
20 |
-
def __call__(self, name: str, **kwargs):
|
21 |
-
is_valid = False
|
22 |
-
|
23 |
-
payload = {
|
24 |
-
"names": [{
|
25 |
-
"value": name,
|
26 |
-
"type": "main"
|
27 |
-
}],
|
28 |
-
"status": "authorized"
|
29 |
-
}
|
30 |
-
|
31 |
-
if kwargs.get("ein"):
|
32 |
-
ein = kwargs.get("ein")
|
33 |
-
if "-" not in ein:
|
34 |
-
ein = f"{ein[:2]}-{ein[2:]}"
|
35 |
-
payload["ids"] = [{
|
36 |
-
"value": ein,
|
37 |
-
"type": "ein"
|
38 |
-
}]
|
39 |
-
is_valid = True
|
40 |
-
|
41 |
-
if kwargs.get("street") or kwargs.get("city") or kwargs.get("state") or kwargs.get("postal_code"):
|
42 |
-
payload["addresses"] = [{
|
43 |
-
"street1": kwargs.get("street") or "",
|
44 |
-
"city": kwargs.get("city") or "",
|
45 |
-
"state": kwargs.get("state") or "",
|
46 |
-
"postal_code": kwargs.get("postal_code") or ""
|
47 |
-
}]
|
48 |
-
is_valid = True
|
49 |
-
|
50 |
-
if not is_valid:
|
51 |
-
return None
|
52 |
-
|
53 |
-
result = self.post(payload=payload)
|
54 |
-
return result.get("payload", [])
|
55 |
-
|
56 |
-
|
57 |
-
search_org = OrgSearch()
|
58 |
-
|
59 |
-
|
60 |
-
def callback_to_state(event: gr.SelectData, state: gr.State) -> Tuple[List[Any], int]:
|
61 |
-
"""Handles DataFrame `select` events. Updates the internal state for either the recipient or funder based on
|
62 |
-
selection. Also sends along the selected Candid entity ID to the proposal generation tab.
|
63 |
-
|
64 |
-
Parameters
|
65 |
-
----------
|
66 |
-
event : gr.SelectData
|
67 |
-
state : gr.State
|
68 |
-
|
69 |
-
Returns
|
70 |
-
-------
|
71 |
-
Tuple[List[Any], int]
|
72 |
-
(Updated state, Candid entity ID)
|
73 |
-
"""
|
74 |
-
|
75 |
-
row, _ = event.index
|
76 |
-
|
77 |
-
if len(state) == 0:
|
78 |
-
return [], None
|
79 |
-
|
80 |
-
# the state should be a nested list of lists
|
81 |
-
# if the state is a single list with non-list elements then we just want a pass-through
|
82 |
-
if all(isinstance(s, list) for s in state):
|
83 |
-
return state[row], state[row][0]
|
84 |
-
return state, state[0]
|
85 |
-
|
86 |
-
|
87 |
-
def lookup_organization(
|
88 |
-
name: str,
|
89 |
-
ein: Optional[str] = None,
|
90 |
-
# street: Optional[str] = None,
|
91 |
-
city: Optional[str] = None,
|
92 |
-
state: Optional[str] = None,
|
93 |
-
postal: Optional[str] = None,
|
94 |
-
) -> Tuple[List[List[str]], List[List[str]]]:
|
95 |
-
"""Performs a simple search using the CDS organization search API. Results are sent to the DataFrame table and also
|
96 |
-
populate the state for the recipient information.
|
97 |
-
|
98 |
-
Parameters
|
99 |
-
----------
|
100 |
-
name : str
|
101 |
-
Org name
|
102 |
-
ein : Optional[str], optional
|
103 |
-
Org EIN, by default None
|
104 |
-
street : Optional[str], optional
|
105 |
-
Street address, by default None
|
106 |
-
city : Optional[str], optional
|
107 |
-
Address city, by default None
|
108 |
-
state : Optional[str], optional
|
109 |
-
Address state, by default None
|
110 |
-
postal : Optional[str], optional
|
111 |
-
Address postal code, by default None
|
112 |
-
|
113 |
-
Returns
|
114 |
-
-------
|
115 |
-
Tuple[List[List[str]], List[List[str]]]
|
116 |
-
(recip data, recip data)
|
117 |
-
|
118 |
-
Raises
|
119 |
-
------
|
120 |
-
gr.Error
|
121 |
-
Raised if not enough information was entered to run a search
|
122 |
-
gr.Error
|
123 |
-
Raised if no search results were returned
|
124 |
-
"""
|
125 |
-
|
126 |
-
results = search_org(name=name, ein=ein, city=city, state=state, postal=postal)
|
127 |
-
if results is None:
|
128 |
-
raise gr.Error("You must provide a name, and either an EIN or an address.")
|
129 |
-
if not results:
|
130 |
-
raise gr.Error("No organizations could be found. Please refine the search criteria.")
|
131 |
-
|
132 |
-
data = []
|
133 |
-
for applicant_data in results:
|
134 |
-
address = applicant_data.get("addresses", [{}])[0].get("normalized")
|
135 |
-
seal = (applicant_data.get("current_seal", {}) or {}).get("image")
|
136 |
-
|
137 |
-
record = [
|
138 |
-
applicant_data.get('candid_entity_id'),
|
139 |
-
applicant_data.get('main_sort_name'),
|
140 |
-
address
|
141 |
-
]
|
142 |
-
|
143 |
-
if seal:
|
144 |
-
record.append(f"")
|
145 |
-
else:
|
146 |
-
record.append("")
|
147 |
-
|
148 |
-
data.append(record)
|
149 |
-
return data, data
|
150 |
-
|
151 |
-
|
152 |
-
def render(org_id_element: Optional[gr.Blocks] = None) -> Tuple[gr.Blocks, gr.State]:
|
153 |
-
"""Main blocks build and render function.
|
154 |
-
|
155 |
-
Parameters
|
156 |
-
----------
|
157 |
-
org_id_element : Optional[gr.Blocks], optional
|
158 |
-
Callback Gradio element, by default None
|
159 |
-
|
160 |
-
Returns
|
161 |
-
-------
|
162 |
-
Tuple[gr.Blocks, gr.State]
|
163 |
-
(component, selected org state)
|
164 |
-
"""
|
165 |
-
|
166 |
-
with gr.Blocks() as component:
|
167 |
-
org_data = gr.State([])
|
168 |
-
selected_org_data = gr.State([])
|
169 |
-
|
170 |
-
with gr.Row():
|
171 |
-
with gr.Column(scale=2):
|
172 |
-
name = gr.Textbox(label="Name of organization", lines=1)
|
173 |
-
ein = gr.Textbox(label="EIN of organization", lines=1)
|
174 |
-
with gr.Column(scale=3):
|
175 |
-
with gr.Group():
|
176 |
-
with gr.Row():
|
177 |
-
with gr.Column():
|
178 |
-
# street = gr.Textbox(label="Street address", lines=1)
|
179 |
-
city = gr.Textbox(label="City", lines=1)
|
180 |
-
with gr.Column():
|
181 |
-
state = gr.Textbox(label="State/province", lines=1)
|
182 |
-
postal = gr.Textbox(label="Postal code", lines=1)
|
183 |
-
|
184 |
-
search_button = gr.Button("Find organization", variant="primary")
|
185 |
-
org_info = gr.DataFrame(
|
186 |
-
label="Organizations",
|
187 |
-
type="array",
|
188 |
-
headers=["Candid ID", "Name", "Address", "Seal"],
|
189 |
-
col_count=(4, "fixed"),
|
190 |
-
datatype=["number", "str", "str", "markdown"],
|
191 |
-
wrap=True,
|
192 |
-
column_widths=["20%", "30%", "30%", "20%"]
|
193 |
-
)
|
194 |
-
|
195 |
-
if org_id_element is None:
|
196 |
-
org_id_element = gr.Textbox(label="Selected Candid entity ID", lines=1)
|
197 |
-
|
198 |
-
# pylint: disable=no-member
|
199 |
-
search_button.click(
|
200 |
-
fn=lambda name, ein, city, state, postal: lookup_organization(
|
201 |
-
name=name,
|
202 |
-
ein=ein,
|
203 |
-
# street=street,
|
204 |
-
city=city,
|
205 |
-
state=state,
|
206 |
-
postal=postal
|
207 |
-
),
|
208 |
-
# inputs=[name, ein, street, city, state, postal],
|
209 |
-
inputs=[name, ein, city, state, postal],
|
210 |
-
outputs=[org_info, org_data],
|
211 |
-
api_name=False,
|
212 |
-
show_api=False
|
213 |
-
)
|
214 |
-
|
215 |
-
# pylint: disable=no-member
|
216 |
-
org_info.select(
|
217 |
-
fn=callback_to_state,
|
218 |
-
inputs=org_data,
|
219 |
-
outputs=[selected_org_data, org_id_element],
|
220 |
-
api_name=False,
|
221 |
-
show_api=False
|
222 |
-
)
|
223 |
-
return component, selected_org_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/question_reformulation.py
DELETED
@@ -1,44 +0,0 @@
|
|
1 |
-
from langchain_core.prompts import ChatPromptTemplate
|
2 |
-
from langchain_core.output_parsers import StrOutputParser
|
3 |
-
|
4 |
-
|
5 |
-
def reformulate_question_using_history(state, llm):
|
6 |
-
"""
|
7 |
-
Transform the query to produce a better query with details from previous messages.
|
8 |
-
|
9 |
-
Args:
|
10 |
-
state (messages): The current state
|
11 |
-
llm: LLM to use
|
12 |
-
Returns:
|
13 |
-
dict: The updated state with re-phrased question and original user_input for UI
|
14 |
-
"""
|
15 |
-
print("---REFORMULATE THE USER INPUT---")
|
16 |
-
messages = state["messages"]
|
17 |
-
question = messages[-1].content
|
18 |
-
|
19 |
-
if len(messages) > 1:
|
20 |
-
contextualize_q_system_prompt = """Given a chat history and the latest user input \
|
21 |
-
which might reference context in the chat history, formulate a standalone input \
|
22 |
-
which can be understood without the chat history.
|
23 |
-
Chat history:
|
24 |
-
\n ------- \n
|
25 |
-
{chat_history}
|
26 |
-
\n ------- \n
|
27 |
-
User input:
|
28 |
-
\n ------- \n
|
29 |
-
{question}
|
30 |
-
\n ------- \n
|
31 |
-
Do NOT answer the question, \
|
32 |
-
just reformulate it if needed and otherwise return it as is.
|
33 |
-
"""
|
34 |
-
|
35 |
-
contextualize_q_prompt = ChatPromptTemplate([
|
36 |
-
("system", contextualize_q_system_prompt),
|
37 |
-
("human", question),
|
38 |
-
])
|
39 |
-
|
40 |
-
rag_chain = contextualize_q_prompt | llm | StrOutputParser()
|
41 |
-
new_question = rag_chain.invoke({"chat_history": messages, "question": question})
|
42 |
-
print(f"user asked: '{question}', agent reformulated the question basing on the chat history: {new_question}")
|
43 |
-
return {"messages": [new_question], "user_input" : question}
|
44 |
-
return {"messages": [question], "user_input" : question}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|