Final_Assignment_Template

Sleeping

App Files Files Community

philincloud commited on 24 days ago

Commit

2ea78f5

verified ·

1 Parent(s): 1f94fdd

Update langgraph_agent.py

Browse files

Files changed (1) hide show

langgraph_agent.py +57 -27

langgraph_agent.py CHANGED Viewed

@@ -1,8 +1,12 @@
 import os
 import io
 import contextlib
-import pandas as pd # Added for Excel file handling
-from typing import Dict, List, Union # Added for type hinting
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition, ToolNode
@@ -45,23 +49,22 @@ def modulus(a: int, b: int) -> int:
 def wiki_search(query: str) -> dict:
     """Search Wikipedia for a query and return up to 2 documents."""
     try:
-        docs = WikipediaLoader(query=query, load_max_docs=2, lang="en").load() # Added lang="en" for clarity
         if not docs:
             return {"wiki_results": f"No documents found on Wikipedia for '{query}'."}
         formatted = "\n\n---\n\n".join(
-            f'<Document source="{d.metadata.get("source", "N/A")}"/>\n{d.page_content}' # Added .get for safety
             for d in docs
         )
         return {"wiki_results": formatted}
     except Exception as e:
-        # Log the full error for debugging if possible
         print(f"Error in wiki_search tool: {e}")
         return {"wiki_results": f"Error occurred while searching Wikipedia for '{query}'. Details: {str(e)}"}
 @tool
 def web_search(query: str) -> dict:
     """Perform a web search (via Tavily) and return up to 3 results."""
-    try: # Added try-except block for robustness
         docs = TavilySearchResults(max_results=3).invoke(query=query)
         formatted = "\n\n---\n\n".join(
             f'<Document source="{d.metadata["source"]}"/>\n{d.page_content}'
@@ -82,30 +85,40 @@ def arvix_search(query: str) -> dict:
     )
     return {"arvix_results": formatted}
 @tool
 def read_file_content(file_path: str) -> Dict[str, str]:
     """
-    Reads the content of a file and returns it.
-    Supports text (.txt), Python (.py), and Excel (.xlsx) files.
-    For other file types, returns a message indicating limited support.
     """
     try:
         _, file_extension = os.path.splitext(file_path)
-        content = ""
-        if file_extension.lower() in (".txt", ".py"):
             with open(file_path, "r", encoding="utf-8") as f:
                 content = f.read()
-        elif file_extension.lower() == ".xlsx":
-             # Ensure pandas is installed for this.
              df = pd.read_excel(file_path)
-             content = df.to_string() # Convert Excel to string representation
-        elif file_extension.lower() == ".mp3":
-            content = "Audio file provided. Unable to directly process audio. Consider using a transcription service if available."
-        elif file_extension.lower() == ".png":
-            content = "Image file provided. Unable to directly process images. Consider using an OCR or image analysis service if available."
         else:
-            content = f"Unsupported file type: {file_extension}. Only .txt, .py, and .xlsx files are fully supported for reading content."
-        return {"file_content": content, "file_name": file_path}
     except FileNotFoundError:
         return {"file_error": f"File not found: {file_path}. Please ensure the file exists in the environment."}
     except Exception as e:
@@ -118,10 +131,8 @@ def python_interpreter(code: str) -> Dict[str, str]:
     If there's an error during execution, it returns the error message.
     """
     old_stdout = io.StringIO()
-    # Redirect stdout to capture print statements
     with contextlib.redirect_stdout(old_stdout):
         try:
-            # Create a dictionary to hold the execution scope for exec
             exec_globals = {}
             exec_locals = {}
             exec(code, exec_globals, exec_locals)
@@ -130,6 +141,24 @@ def python_interpreter(code: str) -> Dict[str, str]:
         except Exception as e:
             return {"execution_error": str(e)}
 API_KEY = os.getenv("GEMINI_API_KEY")
 HF_SPACE_TOKEN = os.getenv("HF_SPACE_TOKEN")
@@ -139,8 +168,10 @@ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 tools = [
     multiply, add, subtract, divide, modulus,
     wiki_search, web_search, arvix_search,
-    read_file_content, # Added new tool
-    python_interpreter, # Added new tool
 ]
@@ -153,7 +184,7 @@ def build_graph(provider: str = "gemini"):
     """Build the LangGraph agent with chosen LLM (default: Gemini)."""
     if provider == "gemini":
         llm = ChatGoogleGenerativeAI(
-        model= "gemini-2.5-flash-preview-05-20",
         temperature=1.0,
         max_retries=2,
         api_key=GEMINI_API_KEY,
@@ -168,7 +199,7 @@ def build_graph(provider: str = "gemini"):
             temperature=0,
         )
     else:
-        raise ValueError("Invalid provider. Choose 'openai' or 'huggingface'.")
     llm_with_tools = llm.bind_tools(tools)
@@ -189,4 +220,3 @@ if __name__ == "__main__":
     # This block is intentionally left empty as per user request to remove examples.
     # Your agent will interact with the graph by invoking it with messages.
     pass

 import os
 import io
 import contextlib
+import pandas as pd
+from typing import Dict, List, Union
+# New imports for image and audio processing
+from PIL import Image as PILImage # Used for type checking/potential future local processing
+from huggingface_hub import InferenceClient
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition, ToolNode
 def wiki_search(query: str) -> dict:
     """Search Wikipedia for a query and return up to 2 documents."""
     try:
+        docs = WikipediaLoader(query=query, load_max_docs=2, lang="en").load()
         if not docs:
             return {"wiki_results": f"No documents found on Wikipedia for '{query}'."}
         formatted = "\n\n---\n\n".join(
+            f'<Document source="{d.metadata.get("source", "N/A")}"/>\n{d.page_content}'
             for d in docs
         )
         return {"wiki_results": formatted}
     except Exception as e:
         print(f"Error in wiki_search tool: {e}")
         return {"wiki_results": f"Error occurred while searching Wikipedia for '{query}'. Details: {str(e)}"}
 @tool
 def web_search(query: str) -> dict:
     """Perform a web search (via Tavily) and return up to 3 results."""
+    try:
         docs = TavilySearchResults(max_results=3).invoke(query=query)
         formatted = "\n\n---\n\n".join(
             f'<Document source="{d.metadata["source"]}"/>\n{d.page_content}'
     )
     return {"arvix_results": formatted}
+# Initialize Hugging Face Inference Client
+HF_API_TOKEN = os.getenv("HF_API_TOKEN")
+HF_INFERENCE_CLIENT = None
+if HF_API_TOKEN:
+    HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
+else:
+    print("WARNING: HF_API_TOKEN not set. Image tools will not function.")
 @tool
 def read_file_content(file_path: str) -> Dict[str, str]:
     """
+    Reads the content of a file and returns its primary information.
+    For text/code/excel, returns content. For media, returns a prompt to use specific tools.
     """
     try:
         _, file_extension = os.path.splitext(file_path)
+        file_extension = file_extension.lower()
+        if file_extension in (".txt", ".py"):
             with open(file_path, "r", encoding="utf-8") as f:
                 content = f.read()
+            return {"file_type": "text/code", "file_name": file_path, "file_content": content}
+        elif file_extension == ".xlsx":
              df = pd.read_excel(file_path)
+             content = df.to_string()
+             return {"file_type": "excel", "file_name": file_path, "file_content": content}
+        elif file_extension in (".jpeg", ".jpg", ".png"):
+            # Indicate that it's an image and needs to be described by a specific tool
+            return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. Use 'describe_image' tool to get a textual description."}
+        elif file_extension == ".mp3":
+            # Indicate that it's an audio file and the LLM should process it natively
+            return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. The LLM should process this natively."}
         else:
+            return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3 files are recognized."}
     except FileNotFoundError:
         return {"file_error": f"File not found: {file_path}. Please ensure the file exists in the environment."}
     except Exception as e:
     If there's an error during execution, it returns the error message.
     """
     old_stdout = io.StringIO()
     with contextlib.redirect_stdout(old_stdout):
         try:
             exec_globals = {}
             exec_locals = {}
             exec(code, exec_globals, exec_locals)
         except Exception as e:
             return {"execution_error": str(e)}
+@tool
+def describe_image(image_path: str) -> Dict[str, str]:
+    """
+    Generates a textual description for an image file (JPEG, JPG, PNG) using an image-to-text model
+    from the Hugging Face Inference API. Requires HF_API_TOKEN environment variable to be set.
+    """
+    if not HF_INFERENCE_CLIENT:
+        return {"error": "Hugging Face API token not configured for image description. Cannot use this tool."}
+    try:
+        with open(image_path, "rb") as f:
+            image_bytes = f.read()
+        description = HF_INFERENCE_CLIENT.image_to_text(image_bytes)
+        return {"image_description": description, "image_path": image_path}
+    except FileNotFoundError:
+        return {"error": f"Image file not found: {image_path}. Please ensure the file exists."}
+    except Exception as e:
+        return {"error": f"Error describing image {image_path}: {str(e)}"}
 API_KEY = os.getenv("GEMINI_API_KEY")
 HF_SPACE_TOKEN = os.getenv("HF_SPACE_TOKEN")
 tools = [
     multiply, add, subtract, divide, modulus,
     wiki_search, web_search, arvix_search,
+    read_file_content,
+    python_interpreter,
+    describe_image,  # Added new tool
+    # transcribe_audio, # Removed as per user request
 ]
     """Build the LangGraph agent with chosen LLM (default: Gemini)."""
     if provider == "gemini":
         llm = ChatGoogleGenerativeAI(
+        model= "gemini-1.5-flash-preview-05-20", # This model is capable of native audio processing
         temperature=1.0,
         max_retries=2,
         api_key=GEMINI_API_KEY,
             temperature=0,
         )
     else:
+        raise ValueError("Invalid provider. Choose 'gemini' or 'huggingface'.")
     llm_with_tools = llm.bind_tools(tools)
     # This block is intentionally left empty as per user request to remove examples.
     # Your agent will interact with the graph by invoking it with messages.
     pass