jproman commited on
Commit
522e6c3
·
1 Parent(s): 16f1cd8

Refactor to use langchain

Browse files
.gitignore CHANGED
@@ -174,4 +174,5 @@ cython_debug/
174
  .pypirc
175
 
176
  # agent's log files
177
- log.txt
 
 
174
  .pypirc
175
 
176
  # agent's log files
177
+ log.txt
178
+ questions.txt
app.py CHANGED
@@ -5,7 +5,7 @@ import inspect
5
  import pandas as pd
6
 
7
  import config
8
- from agent import Agent
9
  from logger import Logger
10
 
11
 
@@ -178,11 +178,15 @@ if __name__ == "__main__":
178
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
179
 
180
  print("-"*(60 + len(" App Starting ")) + "\n")
181
- # print("\n" + "*"*30 + " Debug area " + "*"*30)
182
- # from utils import callHfInferenceClientLLM
183
- # response = callHfInferenceClientLLM("What is the capital of France?")
184
- # print(response)
185
- # print("\n" + "*"*30 + " Debug area " + "*"*30)
186
-
187
- print("Launching Gradio Interface for Basic Agent Evaluation...")
188
- demo.launch(debug=True, share=False)
 
 
 
 
 
5
  import pandas as pd
6
 
7
  import config
8
+ from lc_agent.Agent import Agent
9
  from logger import Logger
10
 
11
 
 
178
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
179
 
180
  print("-"*(60 + len(" App Starting ")) + "\n")
181
+
182
+ print("\n" + "*"*30 + " Debug area " + "*"*30)
183
+ logger = Logger(config.logLevel,config.logFile)
184
+ agent = Agent(config,logger)
185
+ question = "this is a question"
186
+ print("===",question)
187
+ response = agent.invoke(question)
188
+ print(response)
189
+ print("\n" + "*"*30 + " Debug area " + "*"*30)
190
+
191
+ #print("Launching Gradio Interface for Basic Agent Evaluation...")
192
+ #demo.launch(debug=True, share=False)
assignment_utils.py CHANGED
@@ -2,8 +2,6 @@
2
  import requests
3
 
4
  import config
5
- from logger import Logger
6
- from agent import Agent
7
 
8
 
9
  def getQuestions():
@@ -26,7 +24,7 @@ def getQuestionByPos(i):
26
 
27
  def printQuestions():
28
  for i,question in enumerate(getQuestions()):
29
- print(f"{i+1} ({question['task_id']}): {question['question']} {'(File: ' + question['file_name'] + ')' if question['file_name'] else ''}")
30
 
31
  def submitAnswers(answers):
32
  submissionData = {
@@ -70,11 +68,9 @@ def getTestAnswers():
70
  {"task_id": "5a0c1adf-205e-4841-a666-7c3ef95def9d", "submitted_answer": "test answer"},
71
  ]
72
 
73
- def submitFirstAnswers(n):
74
  questions = getQuestions()
75
  answers = getTestAnswers()
76
- logger = Logger(config.logLevel,config.logFile)
77
- agent = Agent(logger)
78
  for i in range(n):
79
  response = agent.invoke(questions[i]['question'])
80
  answers[i]["submitted_answer"] = response
@@ -86,6 +82,9 @@ if __name__ == "__main__":
86
  # https://huggingface.co/spaces/agents-course/Students_leaderboard
87
  questions = getQuestions()
88
  printQuestions()
89
- response = submitAnswers(getTestAnswers())
90
- print(response)
 
 
 
91
 
 
2
  import requests
3
 
4
  import config
 
 
5
 
6
 
7
  def getQuestions():
 
24
 
25
  def printQuestions():
26
  for i,question in enumerate(getQuestions()):
27
+ print(f"{i+1}. ({question['task_id']}): {question['question']} {'(File: ' + question['file_name'] + ')' if question['file_name'] else ''}\n\n")
28
 
29
  def submitAnswers(answers):
30
  submissionData = {
 
68
  {"task_id": "5a0c1adf-205e-4841-a666-7c3ef95def9d", "submitted_answer": "test answer"},
69
  ]
70
 
71
+ def submitFirstAnswers(n,agent):
72
  questions = getQuestions()
73
  answers = getTestAnswers()
 
 
74
  for i in range(n):
75
  response = agent.invoke(questions[i]['question'])
76
  answers[i]["submitted_answer"] = response
 
82
  # https://huggingface.co/spaces/agents-course/Students_leaderboard
83
  questions = getQuestions()
84
  printQuestions()
85
+ #from logger import Logger
86
+ #logger = Logger(config.logLevel,config.logFile)
87
+ #agent = Agent(logger)
88
+ #response = submitAnswers(getTestAnswers())
89
+ #print(response)
90
 
config.py CHANGED
@@ -3,7 +3,9 @@ baseApiUrl = "https://agents-course-unit4-scoring.hf.space"
3
  questionsUrl = f"{baseApiUrl}/questions"
4
  submitUrl = f"{baseApiUrl}/submit"
5
 
6
- hfMoldel = "meta-llama/Llama-3.3-70B-Instruct"
 
 
7
 
8
  logLevel = 'DEBUG'
9
  logFile = 'log.txt'
 
3
  questionsUrl = f"{baseApiUrl}/questions"
4
  submitUrl = f"{baseApiUrl}/submit"
5
 
6
+ hfMoldel = "microsoft/Phi-4-reasoning-plus" # "meta-llama/Llama-3.3-70B-Instruct"
7
+ wikipediaSearchURL = 'https://en.wikipedia.org/w/api.php?action=query&prop=extracts&origin=*&format=json&generator=search&gsrnamespace=0&gsrlimit=1&gsrsearch='
8
+ wikipediaRetrieveURL = 'https://en.wikipedia.org/wiki/'
9
 
10
  logLevel = 'DEBUG'
11
  logFile = 'log.txt'
lc_agent/Agent.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ class Agent:
4
+ def __init__(self,config,logger):
5
+ self.config = config
6
+ self.logger = logger
7
+
8
+ def invoke(self,question):
9
+ return f"This is my response: {self.config.baseApiUrl}"
logger.py CHANGED
@@ -27,6 +27,7 @@ class Logger:
27
  self.__log(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ** CRITICAL ** {message}")
28
 
29
  def __log(self, message):
 
30
  print(message)
31
  if config.logFile:
32
  with open(config.logFile,"a") as fh:
 
27
  self.__log(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ** CRITICAL ** {message}")
28
 
29
  def __log(self, message):
30
+ message = message.strip()
31
  print(message)
32
  if config.logFile:
33
  with open(config.logFile,"a") as fh:
agent.py → pf_agent/agent.py RENAMED
@@ -1,4 +1,7 @@
1
 
 
 
 
2
  from flow import createFlow
3
  from logger import Logger
4
  import config
@@ -21,5 +24,5 @@ if __name__ == "__main__":
21
  agent = Agent(logger)
22
  question = getQuestionByPos(0)
23
  print(question)
24
- #response = agent.invoke(question['question'])
25
- #print(response)
 
1
 
2
+ import os
3
+ os.environ["LOCALLLM"] = "llama3.2"
4
+
5
  from flow import createFlow
6
  from logger import Logger
7
  import config
 
24
  agent = Agent(logger)
25
  question = getQuestionByPos(0)
26
  print(question)
27
+ response = agent.invoke(question['question'])
28
+ print(response)
flow.py → pf_agent/flow.py RENAMED
@@ -1,18 +1,22 @@
1
 
2
  from pocketflow import Flow
3
 
4
- from nodes import Decide, Search, Answer
5
 
6
  def createFlow(logger):
7
  decide = Decide()
8
  decide.setLogger(logger)
9
  search = Search()
10
  search.setLogger(logger)
 
 
11
  answer = Answer()
12
  answer.setLogger(logger)
13
 
14
  decide - "search" >> search
 
15
  decide - "answer" >> answer
16
  search - "decide" >> decide
 
17
 
18
  return Flow(start=decide)
 
1
 
2
  from pocketflow import Flow
3
 
4
+ from nodes import Decide, Search, Wikipedia, Answer
5
 
6
  def createFlow(logger):
7
  decide = Decide()
8
  decide.setLogger(logger)
9
  search = Search()
10
  search.setLogger(logger)
11
+ wikipedia = Wikipedia()
12
+ wikipedia.setLogger(logger)
13
  answer = Answer()
14
  answer.setLogger(logger)
15
 
16
  decide - "search" >> search
17
+ decide - "wikipedia" >> wikipedia
18
  decide - "answer" >> answer
19
  search - "decide" >> decide
20
+ wikipedia - "decide" >> decide
21
 
22
  return Flow(start=decide)
nodes.py → pf_agent/nodes.py RENAMED
@@ -2,7 +2,7 @@
2
  import yaml
3
  from pocketflow import Node
4
 
5
- from utils import callLLM, callWebSearch
6
 
7
  class Decide(Node):
8
  def setLogger(self,logger):
@@ -17,7 +17,7 @@ class Decide(Node):
17
  question,context = inputs
18
  prompt = f"""
19
  ### CONTEXT
20
- You are a research assistant that can search the web
21
  Question: {question}
22
  Previous research: {context}
23
 
@@ -25,37 +25,47 @@ Previous research: {context}
25
  [1] search
26
  Description: Look up more information on the web
27
  Parameters:
28
- - query (str): what to search for
29
 
30
- [2] answer
 
 
 
 
 
31
  Description: Answer the question with current knowledge
32
  Parameters:
33
  - answer (str): final answer to the question
34
 
 
35
  ### NEXT ACTION
36
  Decide the next action based on the context and available actions.
37
  Return your response in the following format:
38
  ```yaml
39
  thinking: |
40
  <your step-by-step reasoning process>
41
- action: search OR answer
42
  reason: <why you choose this action>
43
- search_query: <specific search query if action is search>
44
  ```
45
  IMPORTANT: Make sure to:
46
  1. Use proper indentation (4 spaces) for all multi-line fields
47
  2. Use the | character for multi-line text fields
48
  3. Keep single-line fields without the | character
49
  """
 
 
50
  response = callLLM(prompt)
51
- self.logger.debug(f"=== CALLING LLM\n{prompt}\n=== LLM RESPONSE\n{response}\n\n==========\n\n")
52
- yaml_str = response.replace("|","") #.split("```yaml")[1].split("```")[0].strip()
53
  decision = yaml.safe_load(yaml_str)
 
 
 
54
  return decision
55
 
56
  def post(self,shared,prep_res,exec_res):
57
- if exec_res["action"] == "search":
58
- shared["search_query"] = exec_res["search_query"]
59
  return exec_res["action"]
60
 
61
 
@@ -64,18 +74,62 @@ class Search(Node):
64
  self.logger = logger
65
 
66
  def prep(self,shared):
67
- return shared["search_query"]
 
 
 
 
 
 
68
 
69
  def exec(self,search_query):
 
 
70
  results = callWebSearch(search_query)
71
- self.logger.debug(f"*** SEARCHING\n{search_query}\n*** SEARCH RESULTS\n{results}\n\n**********\n\n")
72
  return results
73
 
74
  def post(self,shared,prep_res,exec_res):
75
  previous = shared.get("context","")
76
- shared["context"] = f"{previous}\n\nSEARCH: {shared['search_query']}\n\nRESULTS: {exec_res}"
77
  return "decide"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  class Answer(Node):
81
  def setLogger(self,logger):
 
2
  import yaml
3
  from pocketflow import Node
4
 
5
+ from pf_agent.utils import callLLM, callWebSearch, callWikipediaSearch
6
 
7
  class Decide(Node):
8
  def setLogger(self,logger):
 
17
  question,context = inputs
18
  prompt = f"""
19
  ### CONTEXT
20
+ You are a research assistant and I need your help to answer the following question:
21
  Question: {question}
22
  Previous research: {context}
23
 
 
25
  [1] search
26
  Description: Look up more information on the web
27
  Parameters:
28
+ - query (str): keywords to run the search on the web
29
 
30
+ [2] wikipedia
31
+ Description: Look up for more information in the wikipedia in English
32
+ Parameters:
33
+ - query (str): keywords to run the search in the English wikipedia
34
+
35
+ [3] answer
36
  Description: Answer the question with current knowledge
37
  Parameters:
38
  - answer (str): final answer to the question
39
 
40
+
41
  ### NEXT ACTION
42
  Decide the next action based on the context and available actions.
43
  Return your response in the following format:
44
  ```yaml
45
  thinking: |
46
  <your step-by-step reasoning process>
47
+ action: search OR answer OR wikipedia
48
  reason: <why you choose this action>
49
+ parameters: <yaml containing the parameter or parameters required to execute the action selected in yaml format indicating parameter name and value>
50
  ```
51
  IMPORTANT: Make sure to:
52
  1. Use proper indentation (4 spaces) for all multi-line fields
53
  2. Use the | character for multi-line text fields
54
  3. Keep single-line fields without the | character
55
  """
56
+ self.logger.debug(f"=== Calling LLM to DECIDE")
57
+ self.logger.debug(f"Context: {context}")
58
  response = callLLM(prompt)
59
+ yaml_str = response.replace("|","") #.split("```yaml")[1].split("```")[0].strip() #.replace("|","") #
 
60
  decision = yaml.safe_load(yaml_str)
61
+ self.logger.debug("LLM responded")
62
+ self.logger.debug(f"Thinking: {decision['thinking']}")
63
+ self.logger.debug(f"Action: {decision['action']} {decision['parameters']}")
64
  return decision
65
 
66
  def post(self,shared,prep_res,exec_res):
67
+ if exec_res["action"] != "answer":
68
+ shared["parameters"] = exec_res["parameters"]
69
  return exec_res["action"]
70
 
71
 
 
74
  self.logger = logger
75
 
76
  def prep(self,shared):
77
+ if type(shared["parameters"]) == type([]):
78
+ for p in shared["parameters"]:
79
+ if 'query' in p:
80
+ return p['query']
81
+ else:
82
+ return shared["parameters"]["query"]
83
+ return "invalid query"
84
 
85
  def exec(self,search_query):
86
+ self.logger.debug("=== Searching the web")
87
+ self.logger.debug(f"Search query: {search_query}")
88
  results = callWebSearch(search_query)
89
+ self.logger.debug(f"Results: {results}")
90
  return results
91
 
92
  def post(self,shared,prep_res,exec_res):
93
  previous = shared.get("context","")
94
+ shared["context"] = f"{previous}\n\nSEARCH: {shared['parameters']}\n\nRESULTS: {exec_res}"
95
  return "decide"
96
+
97
+ class Wikipedia(Node):
98
+ def setLogger(self,logger):
99
+ self.logger = logger
100
+
101
+ def prep(self,shared):
102
+ if type(shared["parameters"]) == type([]):
103
+ for p in shared["parameters"]:
104
+ if 'query' in p:
105
+ return shared["question"],p['query']
106
+ else:
107
+ return shared["question"],shared["parameters"]["query"]
108
+ return shared["question"],"invalid query"
109
+
110
+ def exec(self,prepared):
111
+ question,search_query = prepared
112
+ self.logger.debug("=== Searching wikipedia")
113
+ self.logger.debug(f"Search query: {search_query}")
114
+ results = callWikipediaSearch(search_query)
115
+ self.logger.debug(f"Results: {results}")
116
+ prompt = f'''You are my research assistant and I need your help to answer the following question: {question}
117
 
118
+ From my previous research I have found a Wikipedia page that could contain the answer or, at least, useful information for further research.
119
+ Take a careful look at it. Ignore any HTML markup you may find. You will find sections, paragraphs, etc. formated as plain text and tables
120
+ as comma-separated values. The answer or any useful information could be anywhere.
121
+ If the text contains the answer, give it to me.
122
+ If it is not, extract relevant information that could me help find it elsewere.
123
+ Please don't include information not directly related to the question.
124
+ It is very important that in any case, you are concise, brief and stick to the question being asked. Find the text below:
125
+ {results}'''
126
+ response = callLLM(prompt)
127
+ return response
128
+
129
+ def post(self,shared,prep_res,exec_res):
130
+ previous = shared.get("context","")
131
+ shared["context"] = f"{previous}\n\nWIKIPEDIA RESULTS: {shared['parameters']}\n\nRESULTS: {exec_res}"
132
+ return "decide"
133
 
134
  class Answer(Node):
135
  def setLogger(self,logger):
pf_agent/utils.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import config
4
+ if "LOCALLLM" in os.environ and os.environ["LOCALLLM"]:
5
+ from ollama import chat as OllamaChat
6
+
7
+ #################################################
8
+
9
+ from langchain_community.tools import DuckDuckGoSearchRun
10
+
11
+ def callWebSearch(query):
12
+ return DuckDuckGo(query)
13
+
14
+ def DuckDuckGo(query):
15
+ search_tool = DuckDuckGoSearchRun()
16
+ results = search_tool.invoke(query)
17
+ return results
18
+
19
+ #################################################
20
+
21
+ from langchain_community.tools import WikipediaQueryRun
22
+ from langchain_community.utilities import WikipediaAPIWrapper
23
+ import requests
24
+ from bs4 import BeautifulSoup
25
+
26
+ def callWikipediaSearch(query):
27
+ return callWikipediaLangchain(query)
28
+
29
+ def callWikipediaLangchain(query):
30
+ wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
31
+ response = wikipedia.run(query)
32
+ return response
33
+
34
+ def callCustomWikipediaSearch(query):
35
+ searchURL = config.wikipediaSearchURL + query
36
+ response = requests.get(searchURL, timeout=60)
37
+ response.raise_for_status()
38
+ searchResult = response.json()
39
+ for pageId in searchResult['query']['pages']:
40
+ if searchResult['query']['pages'][pageId]['index'] == 1:
41
+ page = searchResult['query']['pages'][pageId]['title']
42
+ # response3 = requests.get('https://en.wikipedia.org/w/api.php',
43
+ # params={'action': 'parse','page': page,'format': 'json'}).json()
44
+ # raw_html3 = response3['parse']['text']['*']
45
+ response2 = requests.get(config.wikipediaRetrieveURL + page)
46
+ response2.raise_for_status()
47
+ raw_html = response2.text
48
+ soup = BeautifulSoup(raw_html, 'html.parser')
49
+ raw_text = soup.get_text(separator=" ",strip=True)
50
+ return raw_text
51
+ # response2 = requests.get(config.wikipediaRetrieveURL + page)
52
+ # response2.raise_for_status()
53
+ # return response2.text
54
+ # response2 = requests.get('https://en.wikipedia.org/w/api.php',
55
+ # params={'action': 'query', 'format': 'json', 'titles': page, 'prop': 'extracts', 'exintro': True, 'explaintext': True })
56
+ # searchResult2 = response.json()
57
+ # for pageId2 in searchResult2['query']['pages']:
58
+ # if searchResult2['query']['pages'][pageId2]['index'] == 1:
59
+ # return searchResult2['query']['pages'][pageId2]['extract']
60
+ return "No result found in wikipedia. Search elsewhere!!"
61
+
62
+ #################################################
63
+
64
+ from huggingface_hub import InferenceClient
65
+
66
+ def callLLM(query):
67
+ if "LOCALLLM" in os.environ:
68
+ return callLocalLLM(query)
69
+ else:
70
+ return callHfInferenceClientLLM(query)
71
+
72
+ def callLocalLLM(query):
73
+ response = OllamaChat(model=os.environ["LOCALLLM"], messages=[ { 'role': 'user', 'content': query } ])
74
+ return response['message']['content']
75
+
76
+ def callHfInferenceClientLLM(query):
77
+ client = InferenceClient(config.hfMoldel)
78
+ response = client.chat.completions.create(
79
+ messages = [ {"role": "user", "content": query } ],
80
+ stream=False, max_tokens=1024 )
81
+ return response.choices[0].message.content
82
+
83
+ #################################################
84
+
85
+ if __name__ == "__main__":
86
+ os.environ["LOCALLLM"] = "llama3.2"
87
+ # from ollama import chat as OllamaChat
88
+ # response = callLLM("What is the capital of France?")
89
+ # print(response)
90
+ # response = callWebSearch("who is the president of France")
91
+ # print(response)
92
+ # response = callHfInferenceClientLLM("What is the capital of France?")
93
+ # print(response)
94
+ print(callWikipediaSearch("Mercedes Sosa discography"))
requirements.txt CHANGED
@@ -7,4 +7,6 @@ langchain_huggingface
7
  langchain-community
8
  duckduckgo-search
9
  pocketflow
 
 
10
  #ollama
 
7
  langchain-community
8
  duckduckgo-search
9
  pocketflow
10
+ beautifulsoup4
11
+ wikipedia
12
  #ollama
utils.py DELETED
@@ -1,42 +0,0 @@
1
-
2
- import os
3
-
4
- from huggingface_hub import InferenceClient
5
- from langchain_community.tools import DuckDuckGoSearchRun
6
- import config
7
-
8
-
9
- def callWebSearch(query):
10
- return DuckDuckGo(query)
11
-
12
- def callLLM(query):
13
- if "LOCALLLM" in os.environ:
14
- return callLocalLLM(query)
15
- else:
16
- return callHfInferenceClientLLM(query)
17
-
18
- def DuckDuckGo(query):
19
- search_tool = DuckDuckGoSearchRun()
20
- results = search_tool.invoke(query)
21
- return results
22
-
23
- def callLocalLLM(query):
24
- response = OllamaChat(model=os.environ["LOCALLLM"], messages=[ { 'role': 'user', 'content': query } ])
25
- return response['message']['content']
26
-
27
- def callHfInferenceClientLLM(query):
28
- client = InferenceClient(config.hfMoldel)
29
- response = client.chat.completions.create(
30
- messages = [ {"role": "user", "content": query } ],
31
- stream=False, max_tokens=1024 )
32
- return response.choices[0].message.content
33
-
34
- if __name__ == "__main__":
35
- os.environ["LOCALLLM"] = "llama3.2"
36
- from ollama import chat as OllamaChat
37
- response = callLLM("What is the capital of France?")
38
- print(response)
39
- response = callWebSearch("who is the president of France")
40
- print(response)
41
- response = callHfInferenceClientLLM("What is the capital of France?")
42
- print(response)