Final_Assignment_Template

Sleeping

App Files Files Community

philincloud commited on May 28

Commit

5798d39

verified ·

1 Parent(s): 3ea579f

Update langgraph_agent.py

Browse files

Files changed (1) hide show

langgraph_agent.py +52 -57

langgraph_agent.py CHANGED Viewed

@@ -5,8 +5,8 @@ import pandas as pd
 from typing import Dict, List, Union
 import re
-from PIL import Image as PILImage
-from huggingface_hub import InferenceClient
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition, ToolNode
@@ -82,12 +82,14 @@ def arvix_search(query: str) -> dict:
     )
     return {"arvix_results": formatted}
 HF_API_TOKEN = os.getenv("HF_API_TOKEN")
 HF_INFERENCE_CLIENT = None
 if HF_API_TOKEN:
     HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
 else:
-    print("WARNING: HF_API_TOKEN not set. Image and Audio tools will not function.")
 @tool
 def read_file_content(file_path: str) -> Dict[str, str]:
@@ -105,12 +107,10 @@ def read_file_content(file_path: str) -> Dict[str, str]:
             content = df.to_string()
             return {"file_type": "excel", "file_name": file_path, "file_content": content}
         elif file_extension in (".jpeg", ".jpg", ".png"):
-            return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. Use 'describe_image' tool to get a textual description."}
         elif file_extension == ".mp3":
             # For MP3, we indicate it's an audio file and expect the LLM to handle the blob directly.
-            # In a real Langchain setup, you might actually read the bytes here and pass them
-            # as a part of the message content to the LLM if it supports direct binary upload.
-            # For now, this tool simply confirms its type for the agent.
             return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this audio content directly."}
         else:
             return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3 files are recognized."}
@@ -133,21 +133,6 @@ def python_interpreter(code: str) -> Dict[str, str]:
         except Exception as e:
             return {"execution_error": str(e)}
-@tool
-def describe_image(image_path: str) -> Dict[str, str]:
-    """Generates a textual description for an image file (JPEG, JPG, PNG) using an image-to-text model from the Hugging Face Inference API. Requires HF_API_TOKEN environment variable to be set."""
-    if not HF_INFERENCE_CLIENT:
-        return {"error": "Hugging Face API token not configured for image description. Cannot use this tool."}
-    try:
-        with open(image_path, "rb") as f:
-            image_bytes = f.read()
-        description = HF_INFERENCE_CLIENT.image_to_text(image_bytes)
-        return {"image_description": description, "image_path": image_path}
-    except FileNotFoundError:
-        return {"error": f"Image file not found: {image_path}. Please ensure the file exists."}
-    except Exception as e:
-        return {"error": f"Error describing image {image_path}: {str(e)}"}
 # --- Youtube Tool (Remains the same) ---
 @tool
 def Youtube(url: str, question: str) -> Dict[str, str]:
@@ -181,10 +166,10 @@ def Youtube(url: str, question: str) -> Dict[str, str]:
 # --- END YOUTUBE TOOL ---
 API_KEY = os.getenv("GEMINI_API_KEY")
-HF_API_TOKEN = os.getenv("HF_SPACE_TOKEN")
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
-# Update the tools list (removed transcribe_audio)
 tools = [
     multiply, add, subtract, divide, modulus,
     wiki_search,
@@ -192,8 +177,7 @@ tools = [
     arvix_search,
     read_file_content,
     python_interpreter,
-    describe_image,
-    Youtube, # <-- transcribe_audio has been removed
 ]
 with open("prompt.txt", "r", encoding="utf-8") as f:
@@ -224,38 +208,49 @@ def build_graph(provider: str = "gemini"):
     def assistant(state: MessagesState):
         messages_to_send = [sys_msg] + state["messages"]
-        # When sending messages to Gemini, if read_file_content identified an audio file,
-        # you'll need to ensure the actual binary content of the audio file is included
-        # in the message parts for the LLM to process it natively.
-        # This part requires a bit more advanced handling than just text.
-        # Langchain often handles this when you use `tool_code.File(...)` or similar constructs.
-        # For simplicity in this prompt and code example, we're assuming the framework
-        # will correctly pass the file content if `read_file_content` returns an audio type.
-        # A more robust implementation would involve modifying the `assistant` node
-        # to explicitly read the file bytes and add them to the message parts
-        # if a file is detected in the input state.
-        # Example of how you might include binary content (conceptual, depends on LangChain/API):
         # new_messages_to_send = []
-        # for msg in messages_to_send:
-        #    if isinstance(msg, HumanMessage) and "audio file" in msg.content: # Simplified check
-        #        # Assume you can get the actual file path from the context
-        #        file_path_from_context = "Strawberry pie.mp3" # Or extract from msg.content
-        #        if os.path.exists(file_path_from_context):
-        #            with open(file_path_from_context, "rb") as f:
-        #                audio_bytes = f.read()
-        #            new_messages_to_send.append(
-        #                HumanMessage(
-        #                    content=[
-        #                        {"type": "text", "text": "Here is the audio file:"},
-        #                        {"type": "media", "media_type": "audio/mp3", "data": audio_bytes}
-        #                    ]
-        #                )
-        #            )
-        #    else:
-        #        new_messages_to_send.append(msg)
-        # llm_response = llm_with_tools.invoke(new_messages_to_send)
         llm_response = llm_with_tools.invoke(messages_to_send) # For now, keep as is, rely on framework
         print(f"LLM Raw Response: {llm_response}")

 from typing import Dict, List, Union
 import re
+from PIL import Image as PILImage # Keep PIL for potential future use or if other parts depend on it, but describe_image is removed.
+from huggingface_hub import InferenceClient # Keep InferenceClient for other potential HF uses, but describe_image is removed.
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition, ToolNode
     )
     return {"arvix_results": formatted}
+# HF_API_TOKEN is no longer directly needed for describe_image as that tool is removed.
+# But keeping InferenceClient initialization for completeness if other HF tools might be added later.
 HF_API_TOKEN = os.getenv("HF_API_TOKEN")
 HF_INFERENCE_CLIENT = None
 if HF_API_TOKEN:
     HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
 else:
+    print("WARNING: HF_API_TOKEN not set. If any other HF tools are used, they might not function.")
 @tool
 def read_file_content(file_path: str) -> Dict[str, str]:
             content = df.to_string()
             return {"file_type": "excel", "file_name": file_path, "file_content": content}
         elif file_extension in (".jpeg", ".jpg", ".png"):
+            # For images, we indicate it's an image file and expect the LLM to handle the blob directly.
+            return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this image content directly."}
         elif file_extension == ".mp3":
             # For MP3, we indicate it's an audio file and expect the LLM to handle the blob directly.
             return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this audio content directly."}
         else:
             return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3 files are recognized."}
         except Exception as e:
             return {"execution_error": str(e)}
 # --- Youtube Tool (Remains the same) ---
 @tool
 def Youtube(url: str, question: str) -> Dict[str, str]:
 # --- END YOUTUBE TOOL ---
 API_KEY = os.getenv("GEMINI_API_KEY")
+HF_API_TOKEN = os.getenv("HF_SPACE_TOKEN") # Kept for potential future HF uses, but not for describe_image
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+# Update the tools list (removed describe_image)
 tools = [
     multiply, add, subtract, divide, modulus,
     wiki_search,
     arvix_search,
     read_file_content,
     python_interpreter,
+    Youtube,
 ]
 with open("prompt.txt", "r", encoding="utf-8") as f:
     def assistant(state: MessagesState):
         messages_to_send = [sys_msg] + state["messages"]
+        # --- IMPORTANT NOTE ON HANDLING BINARY BLOB DATA FOR MULTIMODAL LLMs ---
+        # When read_file_content returns a file_type of "image" or "audio",
+        # the agent should be able to send the actual binary data of that file
+        # as part of the message to the LLM. LangChain's ChatGoogleGenerativeAI
+        # supports this via content parts in HumanMessage.
+        #
+        # For this setup, we're assuming the framework (LangGraph/LangChain)
+        # will correctly handle passing the actual file content when read_file_content
+        # is called and its output indicates a media type.
+        #
+        # A more explicit implementation in the assistant node might look like this
+        # for real binary file handling if the framework doesn't do it implicitly:
+        #
         # new_messages_to_send = []
+        # for msg in state["messages"]:
+        #     if isinstance(msg, HumanMessage) and msg.tool_calls:
+        #         # If a tool call to read_file_content happened in the previous turn
+        #         # and it returned a media type, we might need to get the file data
+        #         # and append it to the message parts. This logic is complex and
+        #         # depends heavily on how tool outputs are structured and passed.
+        #         # For simplicity in this template, we assume direct handling by the LLM
+        #         # if the tool output indicates media, and the file itself is accessible
+        #         # via the environment.
+        #         pass # Keep original message, tool output will follow
+        #     elif isinstance(msg, HumanMessage) and any(part.get("file_type") in ["image", "audio"] for part in msg.content if isinstance(part, dict)):
+        #         # This is a conceptual example for if the HumanMessage itself contains file data
+        #         # or a reference that needs to be resolved into data.
+        #         # You'd need to load the actual file bytes here.
+        #         # e.g., if msg.content was like: [{"type": "file_reference", "file_path": "image.png"}]
+        #         # with open(msg.content[0]["file_path"], "rb") as f:
+        #         #    file_bytes = f.read()
+        #         # new_messages_to_send.append(
+        #         #     HumanMessage(
+        #         #         content=[
+        #         #             {"type": "text", "text": "Here is the media content:"},
+        #         #             {"type": "image_data" if "image" in msg.content[0]["file_type"] else "audio_data", "data": base64.b64encode(file_bytes).decode('utf-8'), "media_type": "image/png" if "image" in msg.content[0]["file_type"] else "audio/mp3"}
+        #         #         ]
+        #         #     )
+        #         # )
+        #     else:
+        #         new_messages_to_send.append(msg)
+        # llm_response = llm_with_tools.invoke([sys_msg] + new_messages_to_send)
+        # --- END IMPORTANT NOTE ---
         llm_response = llm_with_tools.invoke(messages_to_send) # For now, keep as is, rely on framework
         print(f"LLM Raw Response: {llm_response}")