Final_Assignment_Template

Sleeping

App Files Files Community

jproman commited on Jun 2

Commit

522e6c3

1 Parent(s): 16f1cd8

Refactor to use langchain

Browse files

Files changed (12) hide show

.gitignore +2 -1
app.py +13 -9
assignment_utils.py +7 -8
config.py +3 -1
lc_agent/Agent.py +9 -0
logger.py +1 -0
agent.py → pf_agent/agent.py +5 -2
flow.py → pf_agent/flow.py +5 -1
nodes.py → pf_agent/nodes.py +67 -13
pf_agent/utils.py +94 -0
requirements.txt +2 -0
utils.py +0 -42

.gitignore CHANGED Viewed

@@ -174,4 +174,5 @@ cython_debug/
 .pypirc
 # agent's log files
-log.txt

 .pypirc
 # agent's log files
+log.txt
+questions.txt

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import inspect
 import pandas as pd
 import config
-from agent import Agent
 from logger import Logger
@@ -178,11 +178,15 @@ if __name__ == "__main__":
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
     print("-"*(60 + len(" App Starting ")) + "\n")
-    # print("\n" + "*"*30 + " Debug area " + "*"*30)
-    # from utils import callHfInferenceClientLLM
-    # response = callHfInferenceClientLLM("What is the capital of France?")
-    # print(response)
-    # print("\n" + "*"*30 + " Debug area " + "*"*30)
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import pandas as pd
 import config
+from lc_agent.Agent import Agent
 from logger import Logger
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
     print("-"*(60 + len(" App Starting ")) + "\n")
+    print("\n" + "*"*30 + " Debug area " + "*"*30)
+    logger = Logger(config.logLevel,config.logFile)
+    agent = Agent(config,logger)
+    question = "this is a question"
+    print("===",question)
+    response = agent.invoke(question)
+    print(response)
+    print("\n" + "*"*30 + " Debug area " + "*"*30)
+    #print("Launching Gradio Interface for Basic Agent Evaluation...")
+    #demo.launch(debug=True, share=False)

assignment_utils.py CHANGED Viewed

@@ -2,8 +2,6 @@
 import requests
 import config
-from logger import Logger
-from agent import Agent
 def getQuestions():
@@ -26,7 +24,7 @@ def getQuestionByPos(i):
 def printQuestions():
     for i,question in enumerate(getQuestions()):
-        print(f"{i+1} ({question['task_id']}): {question['question']} {'(File: ' + question['file_name'] + ')' if question['file_name'] else ''}")
 def submitAnswers(answers):
     submissionData = {
@@ -70,11 +68,9 @@ def getTestAnswers():
         {"task_id": "5a0c1adf-205e-4841-a666-7c3ef95def9d", "submitted_answer": "test answer"},
     ]
-def submitFirstAnswers(n):
     questions = getQuestions()
     answers = getTestAnswers()
-    logger = Logger(config.logLevel,config.logFile)
-    agent = Agent(logger)
     for i in range(n):
         response = agent.invoke(questions[i]['question'])
         answers[i]["submitted_answer"] = response
@@ -86,6 +82,9 @@ if __name__ == "__main__":
     # https://huggingface.co/spaces/agents-course/Students_leaderboard
     questions = getQuestions()
     printQuestions()
-    response = submitAnswers(getTestAnswers())
-    print(response)

 import requests
 import config
 def getQuestions():
 def printQuestions():
     for i,question in enumerate(getQuestions()):
+        print(f"{i+1}. ({question['task_id']}): {question['question']} {'(File: ' + question['file_name'] + ')' if question['file_name'] else ''}\n\n")
 def submitAnswers(answers):
     submissionData = {
         {"task_id": "5a0c1adf-205e-4841-a666-7c3ef95def9d", "submitted_answer": "test answer"},
     ]
+def submitFirstAnswers(n,agent):
     questions = getQuestions()
     answers = getTestAnswers()
     for i in range(n):
         response = agent.invoke(questions[i]['question'])
         answers[i]["submitted_answer"] = response
     # https://huggingface.co/spaces/agents-course/Students_leaderboard
     questions = getQuestions()
     printQuestions()
+    #from logger import Logger
+    #logger = Logger(config.logLevel,config.logFile)
+    #agent = Agent(logger)
+    #response = submitAnswers(getTestAnswers())
+    #print(response)

config.py CHANGED Viewed

@@ -3,7 +3,9 @@ baseApiUrl = "https://agents-course-unit4-scoring.hf.space"
 questionsUrl = f"{baseApiUrl}/questions"
 submitUrl = f"{baseApiUrl}/submit"
-hfMoldel = "meta-llama/Llama-3.3-70B-Instruct"
 logLevel = 'DEBUG'
 logFile = 'log.txt'

 questionsUrl = f"{baseApiUrl}/questions"
 submitUrl = f"{baseApiUrl}/submit"
+hfMoldel = "microsoft/Phi-4-reasoning-plus" # "meta-llama/Llama-3.3-70B-Instruct"
+wikipediaSearchURL = 'https://en.wikipedia.org/w/api.php?action=query&prop=extracts&origin=*&format=json&generator=search&gsrnamespace=0&gsrlimit=1&gsrsearch='
+wikipediaRetrieveURL = 'https://en.wikipedia.org/wiki/'
 logLevel = 'DEBUG'
 logFile = 'log.txt'

lc_agent/Agent.py ADDED Viewed

	@@ -0,0 +1,9 @@

+class Agent:
+    def __init__(self,config,logger):
+        self.config = config
+        self.logger = logger
+    def invoke(self,question):
+        return f"This is my response: {self.config.baseApiUrl}"

logger.py CHANGED Viewed

@@ -27,6 +27,7 @@ class Logger:
             self.__log(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ** CRITICAL ** {message}")
     def __log(self, message):
         print(message)
         if config.logFile:
             with open(config.logFile,"a") as fh:

             self.__log(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ** CRITICAL ** {message}")
     def __log(self, message):
+        message = message.strip()
         print(message)
         if config.logFile:
             with open(config.logFile,"a") as fh:

agent.py → pf_agent/agent.py RENAMED Viewed

@@ -1,4 +1,7 @@
 from flow import createFlow
 from logger import Logger
 import config
@@ -21,5 +24,5 @@ if __name__ == "__main__":
     agent = Agent(logger)
     question = getQuestionByPos(0)
     print(question)
-    #response = agent.invoke(question['question'])
-    #print(response)

+import os
+os.environ["LOCALLLM"] = "llama3.2"
 from flow import createFlow
 from logger import Logger
 import config
     agent = Agent(logger)
     question = getQuestionByPos(0)
     print(question)
+    response = agent.invoke(question['question'])
+    print(response)

flow.py → pf_agent/flow.py RENAMED Viewed

@@ -1,18 +1,22 @@
 from pocketflow import Flow
-from nodes import Decide, Search, Answer
 def createFlow(logger):
     decide = Decide()
     decide.setLogger(logger)
     search = Search()
     search.setLogger(logger)
     answer = Answer()
     answer.setLogger(logger)
     decide - "search" >> search
     decide - "answer" >> answer
     search - "decide" >> decide
     return Flow(start=decide)

 from pocketflow import Flow
+from nodes import Decide, Search, Wikipedia, Answer
 def createFlow(logger):
     decide = Decide()
     decide.setLogger(logger)
     search = Search()
     search.setLogger(logger)
+    wikipedia = Wikipedia()
+    wikipedia.setLogger(logger)
     answer = Answer()
     answer.setLogger(logger)
     decide - "search" >> search
+    decide - "wikipedia" >> wikipedia
     decide - "answer" >> answer
     search - "decide" >> decide
+    wikipedia - "decide" >> decide
     return Flow(start=decide)

nodes.py → pf_agent/nodes.py RENAMED Viewed

@@ -2,7 +2,7 @@
 import yaml
 from pocketflow import Node
-from utils import callLLM, callWebSearch
 class Decide(Node):
     def setLogger(self,logger):
@@ -17,7 +17,7 @@ class Decide(Node):
         question,context = inputs
         prompt = f"""
 ### CONTEXT
-You are a research assistant that can search the web
 Question: {question}
 Previous research: {context}
@@ -25,37 +25,47 @@ Previous research: {context}
 [1] search
   Description: Look up more information on the web
   Parameters:
-  - query (str): what to search for
-[2] answer
   Description: Answer the question with current knowledge
   Parameters:
   - answer (str): final answer to the question
 ### NEXT ACTION
 Decide the next action based on the context and available actions.
 Return your response in the following format:
 ```yaml
 thinking: |
   <your step-by-step reasoning process>
-action: search OR answer
 reason: <why you choose this action>
-search_query: <specific search query if action is search>
 ```
 IMPORTANT: Make sure to:
 1. Use proper indentation (4 spaces) for all multi-line fields
 2. Use the | character for multi-line text fields
 3. Keep single-line fields without the | character
 """
         response = callLLM(prompt)
-        self.logger.debug(f"=== CALLING LLM\n{prompt}\n=== LLM RESPONSE\n{response}\n\n==========\n\n")
-        yaml_str = response.replace("|","") #.split("```yaml")[1].split("```")[0].strip()
         decision = yaml.safe_load(yaml_str)
         return decision
     def post(self,shared,prep_res,exec_res):
-        if exec_res["action"] == "search":
-            shared["search_query"] = exec_res["search_query"]
         return exec_res["action"]
@@ -64,18 +74,62 @@ class Search(Node):
         self.logger = logger
     def prep(self,shared):
-        return shared["search_query"]
     def exec(self,search_query):
         results = callWebSearch(search_query)
-        self.logger.debug(f"*** SEARCHING\n{search_query}\n*** SEARCH RESULTS\n{results}\n\n**********\n\n")
         return results
     def post(self,shared,prep_res,exec_res):
         previous = shared.get("context","")
-        shared["context"] = f"{previous}\n\nSEARCH: {shared['search_query']}\n\nRESULTS: {exec_res}"
         return "decide"
 class Answer(Node):
     def setLogger(self,logger):

 import yaml
 from pocketflow import Node
+from pf_agent.utils import callLLM, callWebSearch, callWikipediaSearch
 class Decide(Node):
     def setLogger(self,logger):
         question,context = inputs
         prompt = f"""
 ### CONTEXT
+You are a research assistant and I need your help to answer the following question:
 Question: {question}
 Previous research: {context}
 [1] search
   Description: Look up more information on the web
   Parameters:
+  - query (str): keywords to run the search on the web
+[2] wikipedia
+  Description: Look up for more information in the wikipedia in English
+  Parameters:
+  - query (str): keywords to run the search in the English wikipedia
+[3] answer
   Description: Answer the question with current knowledge
   Parameters:
   - answer (str): final answer to the question
 ### NEXT ACTION
 Decide the next action based on the context and available actions.
 Return your response in the following format:
 ```yaml
 thinking: |
   <your step-by-step reasoning process>
+action: search OR answer OR wikipedia
 reason: <why you choose this action>
+parameters: <yaml containing the parameter or parameters required to execute the action selected in yaml format indicating parameter name and value>
 ```
 IMPORTANT: Make sure to:
 1. Use proper indentation (4 spaces) for all multi-line fields
 2. Use the | character for multi-line text fields
 3. Keep single-line fields without the | character
 """
+        self.logger.debug(f"=== Calling LLM to DECIDE")
+        self.logger.debug(f"Context: {context}")
         response = callLLM(prompt)
+        yaml_str = response.replace("|","") #.split("```yaml")[1].split("```")[0].strip()  #.replace("|","") #
         decision = yaml.safe_load(yaml_str)
+        self.logger.debug("LLM responded")
+        self.logger.debug(f"Thinking: {decision['thinking']}")
+        self.logger.debug(f"Action: {decision['action']} {decision['parameters']}")
         return decision
     def post(self,shared,prep_res,exec_res):
+        if exec_res["action"] != "answer":
+            shared["parameters"] = exec_res["parameters"]
         return exec_res["action"]
         self.logger = logger
     def prep(self,shared):
+        if type(shared["parameters"]) == type([]):
+            for p in shared["parameters"]:
+                if 'query' in p:
+                    return p['query']
+        else:
+            return shared["parameters"]["query"]
+        return "invalid query"
     def exec(self,search_query):
+        self.logger.debug("=== Searching the web")
+        self.logger.debug(f"Search query: {search_query}")
         results = callWebSearch(search_query)
+        self.logger.debug(f"Results: {results}")
         return results
     def post(self,shared,prep_res,exec_res):
         previous = shared.get("context","")
+        shared["context"] = f"{previous}\n\nSEARCH: {shared['parameters']}\n\nRESULTS: {exec_res}"
         return "decide"
+class Wikipedia(Node):
+    def setLogger(self,logger):
+        self.logger = logger
+    def prep(self,shared):
+        if type(shared["parameters"]) == type([]):
+            for p in shared["parameters"]:
+                if 'query' in p:
+                    return shared["question"],p['query']
+        else:
+            return shared["question"],shared["parameters"]["query"]
+        return shared["question"],"invalid query"
+    def exec(self,prepared):
+        question,search_query = prepared
+        self.logger.debug("=== Searching wikipedia")
+        self.logger.debug(f"Search query: {search_query}")
+        results = callWikipediaSearch(search_query)
+        self.logger.debug(f"Results: {results}")
+        prompt = f'''You are my research assistant and I need your help to answer the following question: {question}
+From my previous research I have found a Wikipedia page that could contain the answer or, at least, useful information for further research.
+Take a careful look at it. Ignore any HTML markup you may find. You will find sections, paragraphs, etc. formated as plain text and tables
+as comma-separated values. The answer or any useful information could be anywhere.
+If the text contains the answer, give it to me.
+If it is not, extract relevant information that could me help find it elsewere.
+Please don't include information not directly related to the question.
+It is very important that in any case, you are concise, brief and stick to the question being asked. Find the text below:
+{results}'''
+        response = callLLM(prompt)
+        return response
+    def post(self,shared,prep_res,exec_res):
+        previous = shared.get("context","")
+        shared["context"] = f"{previous}\n\nWIKIPEDIA RESULTS: {shared['parameters']}\n\nRESULTS: {exec_res}"
+        return "decide"
 class Answer(Node):
     def setLogger(self,logger):

pf_agent/utils.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import os
+import config
+if "LOCALLLM" in os.environ and os.environ["LOCALLLM"]:
+    from ollama import chat as OllamaChat
+#################################################
+from langchain_community.tools import DuckDuckGoSearchRun
+def callWebSearch(query):
+    return DuckDuckGo(query)
+def DuckDuckGo(query):
+    search_tool = DuckDuckGoSearchRun()
+    results = search_tool.invoke(query)
+    return results
+#################################################
+from langchain_community.tools import WikipediaQueryRun
+from langchain_community.utilities import WikipediaAPIWrapper
+import requests
+from bs4 import BeautifulSoup
+def callWikipediaSearch(query):
+    return callWikipediaLangchain(query)
+def callWikipediaLangchain(query):
+    wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
+    response = wikipedia.run(query)
+    return response
+def callCustomWikipediaSearch(query):
+    searchURL = config.wikipediaSearchURL + query
+    response = requests.get(searchURL, timeout=60)
+    response.raise_for_status()
+    searchResult = response.json()
+    for pageId in searchResult['query']['pages']:
+        if searchResult['query']['pages'][pageId]['index'] == 1:
+            page = searchResult['query']['pages'][pageId]['title']
+            # response3 = requests.get('https://en.wikipedia.org/w/api.php',
+            #                          params={'action': 'parse','page': page,'format': 'json'}).json()
+            # raw_html3 = response3['parse']['text']['*']
+            response2 = requests.get(config.wikipediaRetrieveURL + page)
+            response2.raise_for_status()
+            raw_html = response2.text
+            soup = BeautifulSoup(raw_html, 'html.parser')
+            raw_text = soup.get_text(separator=" ",strip=True)
+            return raw_text
+            # response2 = requests.get(config.wikipediaRetrieveURL + page)
+            # response2.raise_for_status()
+            # return response2.text
+            # response2 = requests.get('https://en.wikipedia.org/w/api.php',
+            #                          params={'action': 'query', 'format': 'json', 'titles': page, 'prop': 'extracts', 'exintro': True, 'explaintext': True })
+            # searchResult2 = response.json()
+            # for pageId2 in searchResult2['query']['pages']:
+            #     if searchResult2['query']['pages'][pageId2]['index'] == 1:
+            #         return searchResult2['query']['pages'][pageId2]['extract']
+    return "No result found in wikipedia. Search elsewhere!!"
+#################################################
+from huggingface_hub import InferenceClient
+def callLLM(query):
+    if "LOCALLLM" in os.environ:
+        return callLocalLLM(query)
+    else:
+        return callHfInferenceClientLLM(query)
+def callLocalLLM(query):
+    response = OllamaChat(model=os.environ["LOCALLLM"], messages=[ { 'role': 'user', 'content': query } ])
+    return response['message']['content']
+def callHfInferenceClientLLM(query):
+    client = InferenceClient(config.hfMoldel)
+    response = client.chat.completions.create(
+        messages = [ {"role": "user", "content": query } ],
+        stream=False, max_tokens=1024 )
+    return response.choices[0].message.content
+#################################################
+if __name__ == "__main__":
+    os.environ["LOCALLLM"] = "llama3.2"
+    # from ollama import chat as OllamaChat
+    # response = callLLM("What is the capital of France?")
+    # print(response)
+    # response = callWebSearch("who is the president of France")
+    # print(response)
+    # response = callHfInferenceClientLLM("What is the capital of France?")
+    # print(response)
+    print(callWikipediaSearch("Mercedes Sosa discography"))

requirements.txt CHANGED Viewed

@@ -7,4 +7,6 @@ langchain_huggingface
 langchain-community
 duckduckgo-search
 pocketflow
 #ollama

 langchain-community
 duckduckgo-search
 pocketflow
+beautifulsoup4
+wikipedia
 #ollama

utils.py DELETED Viewed

@@ -1,42 +0,0 @@
-import os
-from huggingface_hub import InferenceClient
-from langchain_community.tools import DuckDuckGoSearchRun
-import config
-def callWebSearch(query):
-    return DuckDuckGo(query)
-def callLLM(query):
-    if "LOCALLLM" in os.environ:
-        return callLocalLLM(query)
-    else:
-        return callHfInferenceClientLLM(query)
-def DuckDuckGo(query):
-    search_tool = DuckDuckGoSearchRun()
-    results = search_tool.invoke(query)
-    return results
-def callLocalLLM(query):
-    response = OllamaChat(model=os.environ["LOCALLLM"], messages=[ { 'role': 'user', 'content': query } ])
-    return response['message']['content']
-def callHfInferenceClientLLM(query):
-    client = InferenceClient(config.hfMoldel)
-    response = client.chat.completions.create(
-        messages = [ {"role": "user", "content": query } ],
-        stream=False, max_tokens=1024 )
-    return response.choices[0].message.content
-if __name__ == "__main__":
-    os.environ["LOCALLLM"] = "llama3.2"
-    from ollama import chat as OllamaChat
-    response = callLLM("What is the capital of France?")
-    print(response)
-    response = callWebSearch("who is the president of France")
-    print(response)
-    response = callHfInferenceClientLLM("What is the capital of France?")
-    print(response)