Final_Assignment_Template

Sleeping

App Files Files Community

philincloud commited on 29 days ago

Commit

6bef82e

verified ·

1 Parent(s): 60c1cc4

Update langgraph_agent.py

Browse files

Files changed (1) hide show

langgraph_agent.py +239 -240

langgraph_agent.py CHANGED Viewed

@@ -1,269 +1,268 @@
-        import os
-        import io
-        import contextlib
-        import pandas as pd
-        from typing import Dict, List, Union
-        import re
-        from PIL import Image as PILImage # Keep PIL for potential future use or if other parts depend on it, but describe_image is removed.
-        from huggingface_hub import InferenceClient # Keep InferenceClient for other potential HF uses, but describe_image is removed.
-        from langgraph.graph import START, StateGraph, MessagesState
-        from langgraph.prebuilt import tools_condition, ToolNode
-        from langchain_openai import ChatOpenAI
-        from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
-        from langchain_community.document_loaders import WikipediaLoader
-        from langchain_core.messages import SystemMessage, HumanMessage, ToolMessage
-        from langchain_google_genai import ChatGoogleGenerativeAI
-        from langchain_core.tools import tool
-        from langchain_google_community import GoogleSearchAPIWrapper
-        @tool
-        def multiply(a: int, b: int) -> int:
-            """Multiply two integers."""
-            return a * b
-        @tool
-        def add(a: int, b: int) -> int:
-            """Add two integers."""
-            return a + b
-        @tool
-        def subtract(a: int, b: int) -> int:
-            """Subtract the second integer from the first."""
-            return a - b
-        @tool
-        def divide(a: int, b: int) -> float:
-            """Divide first integer by second; error if divisor is zero."""
-            if b == 0:
-                raise ValueError("Cannot divide by zero.")
-            return a / b
-        @tool
-        def modulus(a: int, b: int) -> int:
-            """Return the remainder of dividing first integer by second."""
-            return a % b
-        @tool
-        def wiki_search(query: str) -> dict:
-            """Search Wikipedia for a query and return up to 2 documents."""
-            try:
-                docs = WikipediaLoader(query=query, load_max_docs=5, lang="en", doc_content_chars_max=7000).load()
-                if not docs:
-                    return {"wiki_results": f"No documents found on Wikipedia for '{query}'."}
-                formatted = "\n\n---\n\n".join(
-                    f'<Document source="{d.metadata.get("source", "N/A")}"/>\n{d.page_content}'
-                    for d in docs
-                )
-                return {"wiki_results": formatted}
-            except Exception as e:
-                print(f"Error in wiki_search tool: {e}")
-                return {"wiki_results": f"Error occurred while searching Wikipedia for '{query}'. Details: {str(e)}"}
-        search = GoogleSearchAPIWrapper()
-        @tool
-        def google_web_search(query: str) -> str:
-            """Perform a web search (via Google Custom Search) and return results."""
-            try:
-                return search.run(query)
-            except Exception as e:
-                print(f"Error in google_web_search tool: {e}")
-                return f"Error occurred while searching the web for '{query}'. Details: {str(e)}"
-        # HF_API_TOKEN is no longer directly needed for describe_image as that tool is removed.
-        # But keeping InferenceClient initialization for completeness if other HF tools might be added later.
-        HF_API_TOKEN = os.getenv("HF_API_TOKEN")
-        MODEL = os.getenv("MODEL")
-        HF_INFERENCE_CLIENT = None
-        if HF_API_TOKEN:
-            HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
-        else:
-            print("WARNING: HF_API_TOKEN not set. If any other HF tools are used, they might not function.")
-        @tool
-        def read_file_content(file_path: str) -> Dict[str, str]:
-            """Reads the content of a file and returns its primary information. For text/code/excel, returns content. For media, indicates it's a blob for LLM processing."""
-            try:
-                _, file_extension = os.path.splitext(file_path)
-                file_extension = file_extension.lower()
-                # Prioritize handling of video, audio, and image files for direct LLM processing
-                if file_extension in (".mp4", ".avi", ".mov", ".mkv", ".webm"):
-                    return {"file_type": "video", "file_name": file_path, "file_content": f"Video file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this video content directly as a blob."}
-                elif file_extension == ".mp3":
-                    return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this audio content directly as a blob."}
-                elif file_extension in (".jpeg", ".jpg", ".png"):
-                    return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this image content directly as a blob."}
-                # Handle text and code files
-                elif file_extension in (".txt", ".py"):
-                    with open(file_path, "r", encoding="utf-8") as f:
-                        content = f.read()
-                    return {"file_type": "text/code", "file_name": file_path, "file_content": content}
-                # Handle Excel files
-                elif file_extension == ".xlsx":
-                    df = pd.read_excel(file_path)
-                    content = df.to_string()
-                    return {"file_type": "excel", "file_name": file_path, "file_content": content}
-                else:
-                    return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3, .mp4, .avi, .mov, .mkv, .webm files are recognized."}
-            except FileNotFoundError:
-                return {"file_error": f"File not found: {file_path}. Please ensure the file exists in the environment."}
-            except Exception as e:
-                return {"file_error": f"Error reading file {file_path}: {e}"}
-        @tool
-        def python_interpreter(code: str) -> Dict[str, str]:
-            """Executes Python code and returns its standard output. If there's an error during execution, it returns the error message."""
-            old_stdout = io.StringIO()
-            with contextlib.redirect_stdout(old_stdout):
-                try:
-                    exec_globals = {}
-                    exec_locals = {}
-                    exec(code, exec_globals, exec_locals)
-                    output = old_stdout.getvalue()
-                    return {"execution_result": output.strip()}
-                except Exception as e:
-                    return {"execution_error": str(e)}
-        # --- Youtube Tool (Remains the same) ---
-        @tool
-        def Youtube(url: str, question: str) -> Dict[str, str]:
-            """
-            Tells about the YouTube video identified by the given URL, answering a question about it.
-            Note: This is a simulated response. In a real application, this would interact with a YouTube API
-            or a video analysis service to get actual video information and transcripts.
-            """
-            print(f"Youtube called with URL: {url}, Question: {question}")
-            # Placeholder for actual YouTube API call.
-            # In a real scenario, you'd use a library like `google-api-python-client` for YouTube Data API
-            # or a dedicated video transcription/analysis service.
-            # Simulating the previous video content for demonstration
-            if "https://www.youtube.com/watch?v=1htKBjuUWec" in url or re.search(r'youtube\.com/watch\?v=|youtu\.be/', url):
-                return {
-                    "video_url": url,
-                    "question_asked": question,
-                    "video_summary": "The video titled 'Teal'c coffee first time' shows a scene where several individuals are reacting to a beverage, presumably coffee, that Teal'c is trying for the first time. Key moments include: A person off-screen remarking, 'Wow this coffee's great'; another asking if it's 'cinnamon chicory tea oak'; and Teal'c reacting strongly to the taste or temperature, stating 'isn't that hot' indicating he finds it very warm.",
-                    "details": {
-                        "00:00:00": "Someone remarks, 'Wow this coffee's great I was just thinking that yeah is that cinnamon chicory tea oak'",
-                        "00:00:11": "Teal'c takes a large gulp from a black mug",
-                        "00:00:24": "Teal'c reacts strongly, someone asks 'isn't that hot'",
-                        "00:00:26": "Someone agrees, 'extremely'"
-                    }
-                }
-            else:
-                return {"error": "Invalid or unrecognized YouTube URL.", "url": url}
-        # --- END YOUTUBE TOOL ---
-        API_KEY = os.getenv("GEMINI_API_KEY")
-        HF_API_TOKEN = os.getenv("HF_SPACE_TOKEN") # Kept for potential future HF uses, but not for describe_image
-        GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
-        # Update the tools list (removed describe_image and arvix_search)
-        tools = [
-            multiply, add, subtract, divide, modulus,
-            wiki_search,
-            google_web_search,
-            read_file_content,
-            python_interpreter,
-            Youtube,
-        ]
-        with open("prompt.txt", "r", encoding="utf-8") as f:
-            system_prompt = f.read()
-        sys_msg = SystemMessage(content=system_prompt)
-        def build_graph(provider: str = "gemini"):
-            if provider == "gemini":
-                llm = ChatGoogleGenerativeAI(
-                    model=MODEL,
-                    temperature=1.0,
-                    max_retries=2,
-                    api_key=GEMINI_API_KEY,
-                    max_tokens=5000
-                )
-            elif provider == "huggingface":
-                llm = ChatHuggingFace(
-                    llm=HuggingFaceEndpoint(
-                        url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
-                    ),
-                    temperature=0,
-                )
-            else:
-                raise ValueError("Invalid provider. Choose 'gemini' or 'huggingface'.")
-            llm_with_tools = llm.bind_tools(tools)
-            def assistant(state: MessagesState):
-                messages_to_send = [sys_msg] + state["messages"]
-                # --- IMPORTANT NOTE ON HANDLING BINARY BLOB DATA FOR MULTIMODAL LLMs ---
-                # When read_file_content returns a file_type of "image" or "audio",
-                # the agent should be able to send the actual binary data of that file
-                # as part of the message to the LLM. LangChain's ChatGoogleGenerativeAI
-                # supports this via content parts in HumanMessage.
-                #
-                # For this setup, we're assuming the framework (LangGraph/LangChain)
-                # will correctly handle passing the actual file content when read_file_content
-                # is called and its output indicates a media type.
-                #
-                # A more explicit implementation in the assistant node might look like this
-                # for real binary file handling if the framework doesn't do it implicitly:
-                #
-                # new_messages_to_send = []
-                # for msg in state["messages"]:
-                #    if isinstance(msg, HumanMessage) and msg.tool_calls:
-                #      # If a tool call to read_file_content happened in the previous turn
-                #      # and it returned a media type, we might need to get the file data
-                #      # and append it to the message parts. This logic is complex and
-                #      # depends heavily on how tool outputs are structured and passed.
-                #      # For simplicity in this template, we assume direct handling by the LLM
-                #      # if the tool output indicates media, and the file itself is accessible
-                #      # via the environment.
-                #      pass # Keep original message, tool output will follow
-                #    elif isinstance(msg, HumanMessage) and any(part.get("file_type") in ["image", "audio"] for part in msg.content if isinstance(part, dict)):
-                #      # This is a conceptual example for if the HumanMessage itself contains file data
-                #      # or a reference that needs to be resolved into data.
-                #      # You'd need to load the actual file bytes here.
-                #      # e.g., if msg.content was like: [{"type": "file_reference", "file_path": "image.png"}]
-                #      # with open(msg.content[0]["file_path"], "rb") as f:
-                #      #   file_bytes = f.read()
-                #      # new_messages_to_send.append(
-                #      #     HumanMessage(
-                #      #         content=[
-                #      #             {"type": "text", "text": "Here is the media content:"},
-                #      #             {"type": "image_data" if "image" in msg.content[0]["file_type"] else "audio_data", "data": base64.b64encode(file_bytes).decode('utf-8'), "media_type": "image/png" if "image" in msg.content[0]["file_type"] else "audio/mp3"}
-                #      #         ]
-                #      #     )
-                #      # )
-                #    else:
-                #      new_messages_to_send.append(msg)
-                # llm_response = llm_with_tools.invoke([sys_msg] + new_messages_to_send)
-                # --- END IMPORTANT NOTE ---
-                llm_response = llm_with_tools.invoke(messages_to_send) # For now, keep as is, rely on framework
-                print(f"LLM Raw Response: {llm_response}")
-                return {"messages": [llm_response]}
-            builder = StateGraph(MessagesState)
-            builder.add_node("assistant", assistant)
-            builder.add_node("tools", ToolNode(tools))
-            builder.add_edge(START, "assistant")
-            builder.add_conditional_edges("assistant", tools_condition)
-            builder.add_edge("tools", "assistant")
-            return builder.compile()
-        if __name__ == "__main__":
-            pass

+import os
+import io
+import contextlib
+import pandas as pd
+from typing import Dict, List, Union
+import re
+from PIL import Image as PILImage # Keep PIL for potential future use or if other parts depend on it, but describe_image is removed.
+from huggingface_hub import InferenceClient # Keep InferenceClient for other potential HF uses, but describe_image is removed.
+from langgraph.graph import START, StateGraph, MessagesState
+from langgraph.prebuilt import tools_condition, ToolNode
+from langchain_openai import ChatOpenAI
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
+from langchain_community.document_loaders import WikipediaLoader
+from langchain_core.messages import SystemMessage, HumanMessage, ToolMessage
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_core.tools import tool
+from langchain_google_community import GoogleSearchAPIWrapper
+@tool
+def multiply(a: int, b: int) -> int:
+    """Multiply two integers."""
+    return a * b
+@tool
+def add(a: int, b: int) -> int:
+    """Add two integers."""
+    return a + b
+@tool
+def subtract(a: int, b: int) -> int:
+    """Subtract the second integer from the first."""
+    return a - b
+@tool
+def divide(a: int, b: int) -> float:
+    """Divide first integer by second; error if divisor is zero."""
+    if b == 0:
+        raise ValueError("Cannot divide by zero.")
+    return a / b
+@tool
+def modulus(a: int, b: int) -> int:
+    """Return the remainder of dividing first integer by second."""
+    return a % b
+@tool
+def wiki_search(query: str) -> dict:
+    """Search Wikipedia for a query and return up to 2 documents."""
+    try:
+        docs = WikipediaLoader(query=query, load_max_docs=5, lang="en", doc_content_chars_max=7000).load()
+        if not docs:
+            return {"wiki_results": f"No documents found on Wikipedia for '{query}'."}
+        formatted = "\n\n---\n\n".join(
+            f'<Document source="{d.metadata.get("source", "N/A")}"/>\n{d.page_content}'
+            for d in docs
+        )
+        return {"wiki_results": formatted}
+    except Exception as e:
+        print(f"Error in wiki_search tool: {e}")
+        return {"wiki_results": f"Error occurred while searching Wikipedia for '{query}'. Details: {str(e)}"}
+search = GoogleSearchAPIWrapper()
+@tool
+def google_web_search(query: str) -> str:
+    """Perform a web search (via Google Custom Search) and return results."""
+    try:
+        return search.run(query)
+    except Exception as e:
+        print(f"Error in google_web_search tool: {e}")
+        return f"Error occurred while searching the web for '{query}'. Details: {str(e)}"
+# HF_API_TOKEN is no longer directly needed for describe_image as that tool is removed.
+# But keeping InferenceClient initialization for completeness if other HF tools might be added later.
+HF_API_TOKEN = os.getenv("HF_API_TOKEN")
+MODEL = os.getenv("MODEL")
+HF_INFERENCE_CLIENT = None
+if HF_API_TOKEN:
+    HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
+else:
+    print("WARNING: HF_API_TOKEN not set. If any other HF tools are used, they might not function.")
+@tool
+def read_file_content(file_path: str) -> Dict[str, str]:
+    """Reads the content of a file and returns its primary information. For text/code/excel, returns content. For media, indicates it's a blob for LLM processing."""
+    try:
+        _, file_extension = os.path.splitext(file_path)
+        file_extension = file_extension.lower()
+        # Prioritize handling of video, audio, and image files for direct LLM processing
+        if file_extension in (".mp4", ".avi", ".mov", ".mkv", ".webm"):
+            return {"file_type": "video", "file_name": file_path, "file_content": f"Video file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this video content directly as a blob."}
+        elif file_extension == ".mp3":
+            return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this audio content directly as a blob."}
+        elif file_extension in (".jpeg", ".jpg", ".png"):
+            return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this image content directly as a blob."}
+        # Handle text and code files
+        elif file_extension in (".txt", ".py"):
+            with open(file_path, "r", encoding="utf-8") as f:
+                content = f.read()
+            return {"file_type": "text/code", "file_name": file_path, "file_content": content}
+        # Handle Excel files
+        elif file_extension == ".xlsx":
+            df = pd.read_excel(file_path)
+            content = df.to_string()
+            return {"file_type": "excel", "file_name": file_path, "file_content": content}
+        else:
+            return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3, .mp4, .avi, .mov, .mkv, .webm files are recognized."}
+    except FileNotFoundError:
+        return {"file_error": f"File not found: {file_path}. Please ensure the file exists in the environment."}
+    except Exception as e:
+        return {"file_error": f"Error reading file {file_path}: {e}"}
+@tool
+def python_interpreter(code: str) -> Dict[str, str]:
+    """Executes Python code and returns its standard output. If there's an error during execution, it returns the error message."""
+    old_stdout = io.StringIO()
+    with contextlib.redirect_stdout(old_stdout):
+        try:
+            exec_globals = {}
+            exec_locals = {}
+            exec(code, exec_globals, exec_locals)
+            output = old_stdout.getvalue()
+            return {"execution_result": output.strip()}
+        except Exception as e:
+            return {"execution_error": str(e)}
+# --- Youtube Tool (Remains the same) ---
+@tool
+def Youtube(url: str, question: str) -> Dict[str, str]:
+    """
+    Tells about the YouTube video identified by the given URL, answering a question about it.
+    Note: This is a simulated response. In a real application, this would interact with a YouTube API
+    or a video analysis service to get actual video information and transcripts.
+    """
+    print(f"Youtube called with URL: {url}, Question: {question}")
+    # Placeholder for actual YouTube API call.
+    # In a real scenario, you'd use a library like `google-api-python-client` for YouTube Data API
+    # or a dedicated video transcription/analysis service.
+    # Simulating the previous video content for demonstration
+    if "https://www.youtube.com/watch?v=1htKBjuUWec" in url or re.search(r'youtube\.com/watch\?v=|youtu\.be/', url):
+        return {
+            "video_url": url,
+            "question_asked": question,
+            "video_summary": "The video titled 'Teal'c coffee first time' shows a scene where several individuals are reacting to a beverage, presumably coffee, that Teal'c is trying for the first time. Key moments include: A person off-screen remarking, 'Wow this coffee's great'; another asking if it's 'cinnamon chicory tea oak'; and Teal'c reacting strongly to the taste or temperature, stating 'isn't that hot' indicating he finds it very warm.",
+            "details": {
+                "00:00:00": "Someone remarks, 'Wow this coffee's great I was just thinking that yeah is that cinnamon chicory tea oak'",
+                "00:00:11": "Teal'c takes a large gulp from a black mug",
+                "00:00:24": "Teal'c reacts strongly, someone asks 'isn't that hot'",
+                "00:00:26": "Someone agrees, 'extremely'"
+            }
+        }
+    else:
+        return {"error": "Invalid or unrecognized YouTube URL.", "url": url}
+# --- END YOUTUBE TOOL ---
+API_KEY = os.getenv("GEMINI_API_KEY")
+HF_API_TOKEN = os.getenv("HF_SPACE_TOKEN") # Kept for potential future HF uses, but not for describe_image
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+# Update the tools list (removed describe_image and arvix_search)
+tools = [
+    multiply, add, subtract, divide, modulus,
+    wiki_search,
+    google_web_search,
+    read_file_content,
+    python_interpreter,
+    Youtube,
+]
+with open("prompt.txt", "r", encoding="utf-8") as f:
+    system_prompt = f.read()
+sys_msg = SystemMessage(content=system_prompt)
+def build_graph(provider: str = "gemini"):
+    if provider == "gemini":
+        llm = ChatGoogleGenerativeAI(
+            model=MODEL,
+            temperature=1.0,
+            max_retries=2,
+            api_key=GEMINI_API_KEY,
+            max_tokens=5000
+        )
+    elif provider == "huggingface":
+        llm = ChatHuggingFace(
+            llm=HuggingFaceEndpoint(
+                url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
+            ),
+            temperature=0,
+        )
+    else:
+        raise ValueError("Invalid provider. Choose 'gemini' or 'huggingface'.")
+    llm_with_tools = llm.bind_tools(tools)
+    def assistant(state: MessagesState):
+        messages_to_send = [sys_msg] + state["messages"]
+        # --- IMPORTANT NOTE ON HANDLING BINARY BLOB DATA FOR MULTIMODAL LLMs ---
+        # When read_file_content returns a file_type of "image" or "audio",
+        # the agent should be able to send the actual binary data of that file
+        # as part of the message to the LLM. LangChain's ChatGoogleGenerativeAI
+        # supports this via content parts in HumanMessage.
+        #
+        # For this setup, we're assuming the framework (LangGraph/LangChain)
+        # will correctly handle passing the actual file content when read_file_content
+        # is called and its output indicates a media type.
+        #
+        # A more explicit implementation in the assistant node might look like this
+        # for real binary file handling if the framework doesn't do it implicitly:
+        #
+        # new_messages_to_send = []
+        # for msg in state["messages"]:
+        #    if isinstance(msg, HumanMessage) and msg.tool_calls:
+        #      # If a tool call to read_file_content happened in the previous turn
+        #      # and it returned a media type, we might need to get the file data
+        #      # and append it to the message parts. This logic is complex and
+        #      # depends heavily on how tool outputs are structured and passed.
+        #      # For simplicity in this template, we assume direct handling by the LLM
+        #      # if the tool output indicates media, and the file itself is accessible
+        #      # via the environment.
+        #      pass # Keep original message, tool output will follow
+        #    elif isinstance(msg, HumanMessage) and any(part.get("file_type") in ["image", "audio"] for part in msg.content if isinstance(part, dict)):
+        #      # This is a conceptual example for if the HumanMessage itself contains file data
+        #      # or a reference that needs to be resolved into data.
+        #      # You'd need to load the actual file bytes here.
+        #      # e.g., if msg.content was like: [{"type": "file_reference", "file_path": "image.png"}]
+        #      # with open(msg.content[0]["file_path"], "rb") as f:
+        #      #   file_bytes = f.read()
+        #      # new_messages_to_send.append(
+        #      #     HumanMessage(
+        #      #         content=[
+        #      #             {"type": "text", "text": "Here is the media content:"},
+        #      #             {"type": "image_data" if "image" in msg.content[0]["file_type"] else "audio_data", "data": base64.b64encode(file_bytes).decode('utf-8'), "media_type": "image/png" if "image" in msg.content[0]["file_type"] else "audio/mp3"}
+        #      #         ]
+        #      #     )
+        #      # )
+        #    else:
+        #      new_messages_to_send.append(msg)
+        # llm_response = llm_with_tools.invoke([sys_msg] + new_messages_to_send)
+        # --- END IMPORTANT NOTE ---
+        llm_response = llm_with_tools.invoke(messages_to_send) # For now, keep as is, rely on framework
+        print(f"LLM Raw Response: {llm_response}")
+        return {"messages": [llm_response]}
+    builder = StateGraph(MessagesState)
+    builder.add_node("assistant", assistant)
+    builder.add_node("tools", ToolNode(tools))
+    builder.add_edge(START, "assistant")
+    builder.add_conditional_edges("assistant", tools_condition)
+    builder.add_edge("tools", "assistant")
+    return builder.compile()
+if __name__ == "__main__":
+    pass