brainsqueeze commited on
Commit
34b2d9c
·
verified ·
1 Parent(s): 8a6abdf

Delete tools

Browse files
tools/__init__.py DELETED
File without changes
tools/config.py DELETED
@@ -1,5 +0,0 @@
1
- import os
2
- CDS_API = {
3
- 'CDS_API_URL': os.getenv('CDS_API_URL'),
4
- 'CDS_API_KEY': os.getenv('CDS_API_KEY')
5
- }
 
 
 
 
 
 
tools/org_seach.py DELETED
@@ -1,196 +0,0 @@
1
- from typing import List
2
- import re
3
-
4
- from fuzzywuzzy import fuzz
5
-
6
- from langchain.output_parsers.openai_tools import JsonOutputToolsParser
7
- from langchain_openai.chat_models import ChatOpenAI
8
- from langchain_core.runnables import RunnableSequence
9
- from langchain_core.prompts import ChatPromptTemplate
10
- from pydantic import BaseModel, Field
11
-
12
- try:
13
- from common.org_search_component import OrgSearch
14
- except ImportError:
15
- from ...common.org_search_component import OrgSearch
16
-
17
- search = OrgSearch()
18
-
19
-
20
- class OrganizationNames(BaseModel):
21
- """List of names of social-sector organizations, such as nonprofits and foundations."""
22
- orgnames: List[str] = Field(description="List of organization names")
23
-
24
-
25
- def extract_org_links_from_chatbot(chatbot_output: str):
26
- """
27
- Extracts a list of organization names from the provided text.
28
-
29
- Args:
30
- chatbot_output (str):The chatbot output containing organization names and other content.
31
-
32
- Returns:
33
- list: A list of organization names extracted from the text.
34
-
35
- Raises:
36
- ValueError: If parsing fails or if an unexpected output format is received.
37
- """
38
- prompt = """Extract only the names of officially recognized organizations, foundations, and government entities
39
- from the text below. Do not include any entries that contain descriptions, regional identifiers, or explanations
40
- within parentheses or following the name. Strictly exclude databases, resources, crowdfunding platforms, and general
41
- terms. Provide the output only in the specified JSON format.
42
-
43
- input text below:
44
-
45
- ```{chatbot_output}``
46
-
47
- output format:
48
- {{
49
- 'orgnames' : [list of organization names without any additional descriptions or identifiers]
50
- }}
51
-
52
- """
53
-
54
- try:
55
- parser = JsonOutputToolsParser()
56
- llm = ChatOpenAI(model="gpt-4o").bind_tools([OrganizationNames])
57
- prompt = ChatPromptTemplate.from_template(prompt)
58
- chain = RunnableSequence(prompt, llm, parser)
59
-
60
- # Run the chain with the input data
61
- result = chain.invoke({"chatbot_output": chatbot_output})
62
-
63
- # Extract the organization names from the output
64
- output_list = result[0]["args"].get("orgnames", [])
65
-
66
- # Validate output format
67
- if not isinstance(output_list, list):
68
- raise ValueError("Unexpected output format: 'orgnames' should be a list")
69
-
70
- return output_list
71
-
72
- except Exception as e:
73
- # Log or print the error as needed for debugging
74
- print(f"text does not have any organization: {e}")
75
- return []
76
-
77
-
78
- def is_similar(name: str, list_of_dict: list, threshold: int = 80):
79
- """
80
- Returns True if `name` is similar to any names in `list_of_dict` based on a similarity threshold.
81
- """
82
- try:
83
- for item in list_of_dict:
84
- try:
85
- # Attempt to calculate similarity score
86
- similarity = fuzz.ratio(name.lower(), item["name"].lower())
87
- if similarity >= threshold:
88
- return True
89
- except KeyError:
90
- # Handle cases where 'name' key might be missing in dictionary
91
- print(f"KeyError: Missing 'name' key in dictionary item {item}")
92
- continue
93
- except AttributeError:
94
- # Handle non-string name values in dictionary items
95
- print(f"AttributeError: Non-string 'name' in dictionary item {item}")
96
- continue
97
- except TypeError as e:
98
- # Handle cases where input types are incorrect
99
- print(f"TypeError: {e}")
100
- return False
101
-
102
- return False
103
-
104
-
105
- def generate_org_link_dict(org_names_list: list):
106
- """
107
- Maps organization names to their Candid profile URLs if available.
108
-
109
- For each organization in `output_list`, this function attempts to retrieve a matching profile
110
- using `search_org`. If a similar name is found and a Candid entity ID is available, it constructs
111
- a profile URL. If no ID or similar match is found, or if an error occurs, it assigns an empty string.
112
-
113
- Args:
114
- output_list (list): List of organization names (str) to retrieve Candid profile links for.
115
-
116
- Returns:
117
- dict: Dictionary with organization names as keys and Candid profile URLs or empty strings as values.
118
-
119
- Example:
120
- get_org_link(['New York-Presbyterian Hospital'])
121
- # {'New York-Presbyterian Hospital': 'https://app.candid.org/profile/6915255'}
122
- """
123
- link_dict = {}
124
-
125
- for org in org_names_list:
126
- try:
127
- # Attempt to retrieve organization data
128
- response = search(org, name_only=True)
129
-
130
- # Check if there is a valid response and if names are similar
131
- if response and is_similar(org, response[0].get("names", "")):
132
- # Try to get the Candid entity ID and construct the URL
133
- candid_entity_id = response[0].get("candid_entity_id")
134
- if candid_entity_id:
135
- link_dict[org] = (
136
- f"https://app.candid.org/profile/{candid_entity_id}"
137
- )
138
- else:
139
- link_dict[org] = "" # No ID found, set empty string
140
- else:
141
- link_dict[org] = "" # No similar match found
142
-
143
- except KeyError as e:
144
- # Handle missing keys in the response dictionary
145
- print(f"KeyError encountered for organization '{org}': {e}")
146
- link_dict[org] = ""
147
-
148
- except Exception as e:
149
- # Catch any other unexpected errors
150
-
151
- print(f"An error occurred for organization '{org}': {e}")
152
- link_dict[org] = ""
153
-
154
- return link_dict
155
-
156
-
157
- def embed_org_links_in_text(input_text: str, org_link_dict: dict):
158
- """
159
- Replaces organization names in `text` with links from `link_dict` and appends a Candid info message.
160
-
161
- Args:
162
- text (str): The text containing organization names.
163
- link_dict (dict): Mapping of organization names to URLs.
164
-
165
- Returns:
166
- str: Updated text with linked organization names and an appended Candid message.
167
- """
168
- try:
169
- for org_name, url in org_link_dict.items():
170
- if url: # Only proceed if the URL is not empty
171
- regex_pattern = re.compile(re.escape(org_name))
172
- input_text = regex_pattern.sub(
173
- repl=f"<a href={url} target='_blank' rel='noreferrer' class='candid-org-link'>{org_name}</a>",
174
- string=input_text
175
- )
176
-
177
- # Append Candid information message at the end
178
- input_text += (
179
- "<p class='candid-app-link'> "
180
- "Visit <a href=https://app.candid.org/ target='_blank' rel='noreferrer' class='candid-org-link'>Candid</a> "
181
- "to get nonprofit information you need.</p>"
182
- )
183
-
184
- except TypeError as e:
185
- print(f"TypeError encountered: {e}")
186
- return input_text
187
-
188
- except re.error as e:
189
- print(f"Regex error encountered for '{org_name}': {e}")
190
- return input_text
191
-
192
- except Exception as e:
193
- print(f"Unexpected error: {e}")
194
- return input_text
195
-
196
- return input_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/question_reformulation.py DELETED
@@ -1,44 +0,0 @@
1
- from langchain_core.prompts import ChatPromptTemplate
2
- from langchain_core.output_parsers import StrOutputParser
3
-
4
-
5
- def reformulate_question_using_history(state, llm):
6
- """
7
- Transform the query to produce a better query with details from previous messages.
8
-
9
- Args:
10
- state (messages): The current state
11
- llm: LLM to use
12
- Returns:
13
- dict: The updated state with re-phrased question and original user_input for UI
14
- """
15
- print("---REFORMULATE THE USER INPUT---")
16
- messages = state["messages"]
17
- question = messages[-1].content
18
-
19
- if len(messages) > 1:
20
- contextualize_q_system_prompt = """Given a chat history and the latest user input \
21
- which might reference context in the chat history, formulate a standalone input \
22
- which can be understood without the chat history.
23
- Chat history:
24
- \n ------- \n
25
- {chat_history}
26
- \n ------- \n
27
- User input:
28
- \n ------- \n
29
- {question}
30
- \n ------- \n
31
- Do NOT answer the question, \
32
- just reformulate it if needed and otherwise return it as is.
33
- """
34
-
35
- contextualize_q_prompt = ChatPromptTemplate([
36
- ("system", contextualize_q_system_prompt),
37
- ("human", question),
38
- ])
39
-
40
- rag_chain = contextualize_q_prompt | llm | StrOutputParser()
41
- new_question = rag_chain.invoke({"chat_history": messages, "question": question})
42
- print(f"user asked: '{question}', agent reformulated the question basing on the chat history: {new_question}")
43
- return {"messages": [new_question], "user_input" : question}
44
- return {"messages": [question], "user_input" : question}