AI_Agents_Course_Submission

Running

App Files Files Community

omarequalmars commited on 3 days ago

Commit

a1c1d9a

1 Parent(s): 0519c89

added excel/csv analysis

Browse files

Files changed (16) hide show

app.py +38 -69
graph/__pycache__/graph_builder.cpython-313.pyc +0 -0
nodes/__pycache__/core.cpython-313.pyc +0 -0
nodes/core.py +10 -55
requirements.txt +2 -1
states/__pycache__/state.cpython-313.pyc +0 -0
tools/__pycache__/__init__.cpython-313.pyc +0 -0
tools/__pycache__/langchain_tools.cpython-313.pyc +0 -0
tools/__pycache__/math_tools.cpython-313.pyc +0 -0
tools/__pycache__/multimodal_tools.cpython-313.pyc +0 -0
tools/__pycache__/search_tools.cpython-313.pyc +0 -0
tools/__pycache__/utils.cpython-313.pyc +0 -0
tools/__pycache__/youtube_tools.cpython-313.pyc +0 -0
tools/langchain_tools.py +181 -16
tools/multimodal_tools.py +233 -3
tools/search_tools.py +8 -8

app.py CHANGED Viewed

@@ -14,35 +14,24 @@ load_dotenv()
 from graph.graph_builder import graph
 from langchain_core.messages import HumanMessage
-# (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- File Download Helper Function ---
 def download_file(task_id: str, api_url: str) -> Optional[str]:
-    """
-    Download file associated with a task_id from the evaluation API
-    Args:
-        task_id: The task ID to download file for
-        api_url: Base API URL
-    Returns:
-        str: Local path to downloaded file, or None if failed
-    """
     try:
         file_url = f"{api_url}/files/{task_id}"
-        print(f"📁 Downloading file for task {task_id} from {file_url}")
         response = requests.get(file_url, timeout=30)
         response.raise_for_status()
-        # Try to get filename from response headers
         content_disposition = response.headers.get('Content-Disposition', '')
         if 'filename=' in content_disposition:
             filename = content_disposition.split('filename=')[1].strip('"')
         else:
-            # Fallback filename based on content type
             content_type = response.headers.get('Content-Type', '')
             if 'image' in content_type:
                 extension = '.jpg'
@@ -61,28 +50,22 @@ def download_file(task_id: str, api_url: str) -> Optional[str]:
         with open(file_path, 'wb') as f:
             f.write(response.content)
-        print(f"✅ File downloaded successfully: {file_path}")
         return file_path
-    except requests.exceptions.RequestException as e:
-        print(f"❌ Error downloading file for task {task_id}: {e}")
-        return None
     except Exception as e:
-        print(f"❌ Unexpected error downloading file for task {task_id}: {e}")
         return None
 # --- Your LangGraph Agent Definition ---
-# ----- THIS IS WHERE YOU BUILD YOUR AGENT ------
 class BasicAgent:
     def __init__(self):
         """Initialize the LangGraph agent"""
         print("LangGraph Agent initialized with multimodal, search, math, and YouTube tools.")
-        # Verify environment variables
         if not os.getenv("OPENROUTER_API_KEY"):
             raise ValueError("OPENROUTER_API_KEY not found in environment variables")
-        # The graph is already compiled and ready to use
         self.graph = graph
         print("✅ Agent ready with tools: multimodal, search, math, YouTube")
@@ -98,17 +81,17 @@ class BasicAgent:
             str: The final answer (formatted for evaluation)
         """
         print(f"🤖 Processing question: {question[:50]}...")
-        if file_path:
-            print(f"📎 Associated file: {file_path}")
         try:
-            # Enhanced question with file information if available
-            enhanced_question = question
-            if file_path:
                 enhanced_question = f"{question}\n\nFile provided: {file_path}"
-                print(f"📝 Enhanced question with file reference")
-            # Create initial state with the enhanced question
             initial_state = {"messages": [HumanMessage(content=enhanced_question)]}
             # Run the LangGraph agent
@@ -118,20 +101,16 @@ class BasicAgent:
             final_message = result["messages"][-1]
             answer = final_message.content
-            # Clean up the answer for evaluation (remove any extra formatting)
-            # The evaluation system expects just the answer, no explanations
             if isinstance(answer, str):
                 answer = answer.strip()
-                # Remove common prefixes that might interfere with evaluation
                 prefixes_to_remove = [
                     "The answer is: ",
                     "Answer: ",
                     "The result is: ",
                     "Result: ",
                     "The final answer is: ",
-                    "Based on the analysis: ",
-                    "Based on the file: ",
                 ]
                 for prefix in prefixes_to_remove:
@@ -146,23 +125,13 @@ class BasicAgent:
             error_msg = f"Error processing question: {str(e)}"
             print(f"❌ {error_msg}")
             return error_msg
-        finally:
-            # Clean up temporary file if it exists
-            if file_path and os.path.exists(file_path) and tempfile.gettempdir() in file_path:
-                try:
-                    os.remove(file_path)
-                    print(f"🧹 Cleaned up temporary file: {file_path}")
-                except Exception as e:
-                    print(f"⚠️ Could not clean up temporary file: {e}")
-# Keep the rest of the file unchanged (run_and_submit_all function and Gradio interface)
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, downloads associated files, runs the BasicAgent on them,
     submits all answers, and displays the results.
     """
-    # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
         username= f"{profile.username}"
@@ -175,14 +144,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent (using your LangGraph agent)
     try:
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
@@ -203,64 +171,67 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Download Files & Run Agent
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
-        file_name = item.get("file_name")  # ✅ Check for associated file
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
-        print(f"\n📋 Processing Task {task_id}")
-        print(f"Question: {question_text[:100]}...")
-        if file_name:
-            print(f"Associated file: {file_name}")
-        # ✅ Download file if it exists
         downloaded_file_path = None
         if file_name:
             downloaded_file_path = download_file(task_id, api_url)
-            if not downloaded_file_path:
-                print(f"⚠️ Failed to download file for task {task_id}, proceeding without file")
         try:
-            # ✅ Pass both question and file to agent
             submitted_answer = agent(question_text, downloaded_file_path)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({
                 "Task ID": task_id,
                 "Question": question_text,
-                "File": file_name if file_name else "None",
                 "Submitted Answer": submitted_answer
             })
-            print(f"✅ Task {task_id} completed")
         except Exception as e:
             print(f"❌ Error running agent on task {task_id}: {e}")
-            error_answer = f"AGENT ERROR: {e}"
             results_log.append({
                 "Task ID": task_id,
                 "Question": question_text,
-                "File": file_name if file_name else "None",
-                "Submitted Answer": error_answer
             })
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
-    # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
@@ -303,7 +274,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
-# --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
     gr.Markdown("# LangGraph Agent Evaluation Runner")
     gr.Markdown(
@@ -320,7 +291,6 @@ with gr.Blocks() as demo:
         - 🔍 **Search**: Web search using multiple providers (DuckDuckGo, Tavily, SerpAPI)
         - 🧮 **Math**: Basic arithmetic, complex calculations, percentages, factorials
         - 📺 **YouTube**: Extract captions, get video information
-        - 📁 **File Processing**: Automatically downloads and processes evaluation files
         ---
         **Note:** Processing all questions may take some time as the agent carefully analyzes each question and uses appropriate tools.
@@ -341,7 +311,6 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
-    # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
     space_id_startup = os.getenv("SPACE_ID")
@@ -361,4 +330,4 @@ if __name__ == "__main__":
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for LangGraph Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 from graph.graph_builder import graph
 from langchain_core.messages import HumanMessage
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- File Download Helper Function ---
 def download_file(task_id: str, api_url: str) -> Optional[str]:
+    """Download file associated with a task_id from the evaluation API"""
     try:
         file_url = f"{api_url}/files/{task_id}"
+        print(f"📁 Downloading file for task {task_id}")
         response = requests.get(file_url, timeout=30)
         response.raise_for_status()
+        # Get filename from headers or create one
         content_disposition = response.headers.get('Content-Disposition', '')
         if 'filename=' in content_disposition:
             filename = content_disposition.split('filename=')[1].strip('"')
         else:
             content_type = response.headers.get('Content-Type', '')
             if 'image' in content_type:
                 extension = '.jpg'
         with open(file_path, 'wb') as f:
             f.write(response.content)
+        print(f"✅ File downloaded: {file_path}")
         return file_path
     except Exception as e:
+        print(f"❌ Error downloading file for task {task_id}: {e}")
         return None
 # --- Your LangGraph Agent Definition ---
 class BasicAgent:
     def __init__(self):
         """Initialize the LangGraph agent"""
         print("LangGraph Agent initialized with multimodal, search, math, and YouTube tools.")
         if not os.getenv("OPENROUTER_API_KEY"):
             raise ValueError("OPENROUTER_API_KEY not found in environment variables")
         self.graph = graph
         print("✅ Agent ready with tools: multimodal, search, math, YouTube")
             str: The final answer (formatted for evaluation)
         """
         print(f"🤖 Processing question: {question[:50]}...")
         try:
+            # CRITICAL: Only modify the prompt if there's actually a valid file
+            if file_path and os.path.exists(file_path):
                 enhanced_question = f"{question}\n\nFile provided: {file_path}"
+                print(f"📎 Processing with file: {file_path}")
+            else:
+                # Keep EXACTLY the same behavior as before for non-file questions
+                enhanced_question = question
+            # Create initial state with the question (enhanced or original)
             initial_state = {"messages": [HumanMessage(content=enhanced_question)]}
             # Run the LangGraph agent
             final_message = result["messages"][-1]
             answer = final_message.content
+            # Clean up the answer for evaluation (UNCHANGED from original)
             if isinstance(answer, str):
                 answer = answer.strip()
                 prefixes_to_remove = [
                     "The answer is: ",
                     "Answer: ",
                     "The result is: ",
                     "Result: ",
                     "The final answer is: ",
                 ]
                 for prefix in prefixes_to_remove:
             error_msg = f"Error processing question: {str(e)}"
             print(f"❌ {error_msg}")
             return error_msg
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, downloads associated files, runs the BasicAgent on them,
     submits all answers, and displays the results.
     """
+    space_id = os.getenv("SPACE_ID")
     if profile:
         username= f"{profile.username}"
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
     try:
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Process Questions (with minimal changes)
     results_log = []
     answers_payload = []
+    downloaded_files = []  # Track files for cleanup
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        file_name = item.get("file_name")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        # Download file if it exists
         downloaded_file_path = None
         if file_name:
+            print(f"📋 Task {task_id} has file: {file_name}")
             downloaded_file_path = download_file(task_id, api_url)
+            if downloaded_file_path:
+                downloaded_files.append(downloaded_file_path)  # Track for cleanup
         try:
+            # Call agent (behavior unchanged for non-file questions)
             submitted_answer = agent(question_text, downloaded_file_path)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({
                 "Task ID": task_id,
                 "Question": question_text,
                 "Submitted Answer": submitted_answer
             })
         except Exception as e:
             print(f"❌ Error running agent on task {task_id}: {e}")
             results_log.append({
                 "Task ID": task_id,
                 "Question": question_text,
+                "Submitted Answer": f"AGENT ERROR: {e}"
             })
+    # Cleanup downloaded files AFTER all processing is complete
+    for file_path in downloaded_files:
+        try:
+            if os.path.exists(file_path):
+                os.remove(file_path)
+                print(f"🧹 Cleaned up: {file_path}")
+        except Exception as e:
+            print(f"⚠️ Could not clean up {file_path}: {e}")
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission (UNCHANGED)
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
+    # 5. Submit (UNCHANGED)
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
+# --- Build Gradio Interface (UNCHANGED) ---
 with gr.Blocks() as demo:
     gr.Markdown("# LangGraph Agent Evaluation Runner")
     gr.Markdown(
         - 🔍 **Search**: Web search using multiple providers (DuckDuckGo, Tavily, SerpAPI)
         - 🧮 **Math**: Basic arithmetic, complex calculations, percentages, factorials
         - 📺 **YouTube**: Extract captions, get video information
         ---
         **Note:** Processing all questions may take some time as the agent carefully analyzes each question and uses appropriate tools.
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     space_host_startup = os.getenv("SPACE_HOST")
     space_id_startup = os.getenv("SPACE_ID")
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for LangGraph Agent Evaluation...")
+    demo.launch(debug=True, share=True)

graph/__pycache__/graph_builder.cpython-313.pyc CHANGED Viewed

Binary files a/graph/__pycache__/graph_builder.cpython-313.pyc and b/graph/__pycache__/graph_builder.cpython-313.pyc differ

nodes/__pycache__/core.cpython-313.pyc CHANGED Viewed

Binary files a/nodes/__pycache__/core.cpython-313.pyc and b/nodes/__pycache__/core.cpython-313.pyc differ

nodes/core.py CHANGED Viewed

@@ -1,82 +1,37 @@
-# nodes/core.py
 from states.state import AgentState
 import os
 from dotenv import load_dotenv
-from langchain_openai import ChatOpenAI  # Using OpenAI-compatible API for OpenRouter
-from tools.langchain_tools import (
-    extract_text,
-    analyze_image_tool,
-    analyze_audio_tool,
-    add,
-    subtract,
-    multiply,
-    divide,
-    search_tool,
-    extract_youtube_transcript,
-    get_youtube_info,
-    calculate_expression,
-    factorial,
-    square_root,
-    percentage,
-    average
-)
 load_dotenv()
-# Read your API key from the environment variable
 openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
 if not openrouter_api_key:
     raise ValueError("OPENROUTER_API_KEY not found in environment variables")
-# Initialize OpenRouter ChatOpenAI with OpenRouter-specific configuration
 chat = ChatOpenAI(
-    model="google/gemini-2.5-pro-preview",  # Free multimodal model
-    # Alternative models you can use:
-    # model="mistralai/mistral-7b-instruct:free",  # Fast, free text model
-    # model="google/gemma-2-9b-it:free",  # Google's free model
-    # model="qwen/qwen-2.5-72b-instruct:free",  # High-quality free model
     temperature=0,
     max_retries=2,
     base_url="https://openrouter.ai/api/v1",
     api_key=openrouter_api_key,
     default_headers={
-        "HTTP-Referer": "https://your-app.com",  # Optional: for analytics
-        "X-Title": "LangGraph Agent",  # Optional: for analytics
     }
 )
-# Core tools list (matching original structure)
-tools = [
-    extract_text,
-    analyze_image_tool,
-    analyze_audio_tool,
-    extract_youtube_transcript,
-    add,
-    subtract,
-    multiply,
-    divide,
-    search_tool
-]
-# Extended tools list (if you want more capabilities)
-extended_tools = tools + [
-    get_youtube_info,
-    calculate_expression,
-    factorial,
-    square_root,
-    percentage,
-    average
-]
-# Use core tools by default (matching original), but you can switch to extended_tools
 chat_with_tools = chat.bind_tools(tools)
 def assistant(state: AgentState):
-    """
-    Assistant node - maintains the exact same system prompt for evaluation compatibility
-    """
     sys_msg = (
         "You are a helpful assistant with access to tools. Understand user requests accurately. "
         "Use your tools when needed to answer effectively. Strictly follow all user instructions and constraints. "

+# nodes/core.py (Updated to include Excel tool)
 from states.state import AgentState
 import os
 from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI
+from tools.langchain_tools import EXTENDED_TOOLS  # ✅ Changed from individual imports
 load_dotenv()
 openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
 if not openrouter_api_key:
     raise ValueError("OPENROUTER_API_KEY not found in environment variables")
+# Initialize OpenRouter ChatOpenAI
 chat = ChatOpenAI(
+    model="google/gemini-2.5-pro-preview",  # Tool-compatible model
     temperature=0,
     max_retries=2,
     base_url="https://openrouter.ai/api/v1",
     api_key=openrouter_api_key,
     default_headers={
+        "HTTP-Referer": "https://your-app.com",
+        "X-Title": "LangGraph Agent",
     }
 )
+# Use EXTENDED_TOOLS which includes Excel support
+tools = EXTENDED_TOOLS
 chat_with_tools = chat.bind_tools(tools)
 def assistant(state: AgentState):
+    """Assistant node with Excel support"""
     sys_msg = (
         "You are a helpful assistant with access to tools. Understand user requests accurately. "
         "Use your tools when needed to answer effectively. Strictly follow all user instructions and constraints. "

requirements.txt CHANGED Viewed

@@ -10,4 +10,5 @@ langchain-community
 gradio
 pandas
 gradio[oauth]

 gradio
 pandas
 gradio[oauth]
+openpyxl
+xlrd

states/__pycache__/state.cpython-313.pyc CHANGED Viewed

Binary files a/states/__pycache__/state.cpython-313.pyc and b/states/__pycache__/state.cpython-313.pyc differ

tools/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary files a/tools/__pycache__/__init__.cpython-313.pyc and b/tools/__pycache__/__init__.cpython-313.pyc differ

tools/__pycache__/langchain_tools.cpython-313.pyc CHANGED Viewed

Binary files a/tools/__pycache__/langchain_tools.cpython-313.pyc and b/tools/__pycache__/langchain_tools.cpython-313.pyc differ

tools/__pycache__/math_tools.cpython-313.pyc CHANGED Viewed

Binary files a/tools/__pycache__/math_tools.cpython-313.pyc and b/tools/__pycache__/math_tools.cpython-313.pyc differ

tools/__pycache__/multimodal_tools.cpython-313.pyc CHANGED Viewed

Binary files a/tools/__pycache__/multimodal_tools.cpython-313.pyc and b/tools/__pycache__/multimodal_tools.cpython-313.pyc differ

tools/__pycache__/search_tools.cpython-313.pyc CHANGED Viewed

Binary files a/tools/__pycache__/search_tools.cpython-313.pyc and b/tools/__pycache__/search_tools.cpython-313.pyc differ

tools/__pycache__/utils.cpython-313.pyc CHANGED Viewed

Binary files a/tools/__pycache__/utils.cpython-313.pyc and b/tools/__pycache__/utils.cpython-313.pyc differ

tools/__pycache__/youtube_tools.cpython-313.pyc CHANGED Viewed

Binary files a/tools/__pycache__/youtube_tools.cpython-313.pyc and b/tools/__pycache__/youtube_tools.cpython-313.pyc differ

tools/langchain_tools.py CHANGED Viewed

@@ -1,6 +1,7 @@
-# tools/langchain_tools.py (Updated)
 """
 LangChain-compatible tool wrappers for our existing tools
 """
 from langchain_core.tools import tool
@@ -11,7 +12,7 @@ from dotenv import load_dotenv
 # Load environment variables FIRST, before any tool imports
 load_dotenv()
-from .multimodal_tools import MultimodalTools, analyze_transcript as _analyze_transcript
 from .search_tools import SearchTools
 from .math_tools import MathTools
 from .youtube_tools import YouTubeTools
@@ -21,7 +22,10 @@ multimodal_tools = MultimodalTools()
 search_tools = SearchTools()
 youtube_tools = YouTubeTools()
-# Rest of the file remains the same...
 @tool
 def extract_text(image_path: str) -> str:
     """Extract text from an image using OCR"""
@@ -38,7 +42,16 @@ def analyze_audio_tool(transcript: str, question: str = "Summarize this audio co
     return multimodal_tools.analyze_audio_transcript(transcript, question)
 @tool
-def search_tool(query: str, max_results: int = 5) -> str:
     """Search the web for information"""
     results = search_tools.search(query, max_results)
     if not results:
@@ -54,6 +67,44 @@ def search_tool(query: str, max_results: int = 5) -> str:
     return "\n".join(formatted_results)
 @tool
 def extract_youtube_transcript(url: str, language_code: str = 'en') -> str:
     """Extract transcript/captions from a YouTube video"""
@@ -63,6 +114,28 @@ def extract_youtube_transcript(url: str, language_code: str = 'en') -> str:
     else:
         return "No captions available for this video"
 @tool
 def add(a: float, b: float) -> float:
     """Add two numbers"""
@@ -85,19 +158,13 @@ def divide(a: float, b: float) -> str:
     return str(result)
 @tool
-def get_youtube_info(url: str) -> str:
-    """Get information about a YouTube video"""
-    info = youtube_tools.get_video_info(url)
-    if info:
-        return f"Title: {info.get('title', 'Unknown')}\nAuthor: {info.get('author', 'Unknown')}\nDuration: {info.get('length', 0)} seconds\nViews: {info.get('views', 0):,}"
-    else:
-        return "Could not retrieve video information"
-@tool
-def calculate_expression(expression: str) -> str:
-    """Calculate a mathematical expression safely"""
-    from .math_tools import calculate_expression as calc_expr
-    return str(calc_expr(expression))
 @tool
 def factorial(n: int) -> str:
@@ -126,3 +193,101 @@ def average(numbers: str) -> str:
         return str(result)
     except Exception as e:
         return f"Error parsing numbers: {str(e)}"

+# tools/langchain_tools.py
 """
 LangChain-compatible tool wrappers for our existing tools
+Complete integration of multimodal, search, math, and YouTube tools
 """
 from langchain_core.tools import tool
 # Load environment variables FIRST, before any tool imports
 load_dotenv()
+from .multimodal_tools import MultimodalTools, analyze_transcript as _analyze_transcript, analyze_excel, analyze_python
 from .search_tools import SearchTools
 from .math_tools import MathTools
 from .youtube_tools import YouTubeTools
 search_tools = SearchTools()
 youtube_tools = YouTubeTools()
+# =============================================================================
+# MULTIMODAL TOOLS
+# =============================================================================
 @tool
 def extract_text(image_path: str) -> str:
     """Extract text from an image using OCR"""
     return multimodal_tools.analyze_audio_transcript(transcript, question)
 @tool
+def analyze_excel_tool(file_path: str, question: str) -> str:
+    """Analyze Excel or CSV files to answer questions about the data"""
+    return analyze_excel(file_path, question)
+# =============================================================================
+# SEARCH TOOLS
+# =============================================================================
+@tool
+def search_tool(query: str, max_results: int = 10) -> str:
     """Search the web for information"""
     results = search_tools.search(query, max_results)
     if not results:
     return "\n".join(formatted_results)
+@tool
+def search_news_tool(query: str, max_results: int = 10) -> str:
+    """Search for news articles about a topic"""
+    results = search_tools.search_news(query, max_results)
+    if not results:
+        return "No news results found"
+    # Format results for the LLM
+    formatted_results = []
+    for i, result in enumerate(results, 1):
+        title = result.get('title', 'No title')
+        content = result.get('content', 'No content')
+        url = result.get('url', 'No URL')
+        formatted_results.append(f"{i}. {title}\n{content[:200]}...\nSource: {url}\n")
+    return "\n".join(formatted_results)
+@tool
+def search_academic_tool(query: str, max_results: int = 10) -> str:
+    """Search for academic research and papers"""
+    results = search_tools.search_academic(query, max_results)
+    if not results:
+        return "No academic results found"
+    # Format results for the LLM
+    formatted_results = []
+    for i, result in enumerate(results, 1):
+        title = result.get('title', 'No title')
+        content = result.get('content', 'No content')
+        url = result.get('url', 'No URL')
+        formatted_results.append(f"{i}. {title}\n{content[:200]}...\nSource: {url}\n")
+    return "\n".join(formatted_results)
+# =============================================================================
+# YOUTUBE TOOLS
+# =============================================================================
 @tool
 def extract_youtube_transcript(url: str, language_code: str = 'en') -> str:
     """Extract transcript/captions from a YouTube video"""
     else:
         return "No captions available for this video"
+@tool
+def get_youtube_info(url: str) -> str:
+    """Get information about a YouTube video"""
+    info = youtube_tools.get_video_info(url)
+    if info:
+        return f"Title: {info.get('title', 'Unknown')}\nAuthor: {info.get('author', 'Unknown')}\nDuration: {info.get('length', 0)} seconds\nViews: {info.get('views', 0):,}"
+    else:
+        return "Could not retrieve video information"
+@tool
+def get_youtube_playlist_info(playlist_url: str) -> str:
+    """Get information about a YouTube playlist"""
+    info = youtube_tools.get_playlist_info(playlist_url)
+    if info:
+        return f"Playlist: {info.get('title', 'Unknown')}\nVideos: {info.get('video_count', 0)}\nOwner: {info.get('owner', 'Unknown')}"
+    else:
+        return "Could not retrieve playlist information"
+# =============================================================================
+# MATH TOOLS - Basic Operations
+# =============================================================================
 @tool
 def add(a: float, b: float) -> float:
     """Add two numbers"""
     return str(result)
 @tool
+def power(base: float, exponent: float) -> float:
+    """Calculate base raised to the power of exponent"""
+    return MathTools.power(base, exponent)
+# =============================================================================
+# MATH TOOLS - Advanced Operations
+# =============================================================================
 @tool
 def factorial(n: int) -> str:
         return str(result)
     except Exception as e:
         return f"Error parsing numbers: {str(e)}"
+@tool
+def calculate_expression(expression: str) -> str:
+    """Calculate a mathematical expression safely"""
+    from .math_tools import calculate_expression as calc_expr
+    return str(calc_expr(expression))
+@tool
+def absolute_value(n: float) -> float:
+    """Calculate absolute value of a number"""
+    return MathTools.absolute(n)
+@tool
+def round_number(n: float, decimals: int = 2) -> float:
+    """Round number to specified decimal places"""
+    return MathTools.round_number(n, decimals)
+@tool
+def min_value(numbers: str) -> str:
+    """Find minimum value in a list of numbers (provide as comma-separated string)"""
+    try:
+        number_list = [float(x.strip()) for x in numbers.split(',')]
+        result = MathTools.min_value(number_list)
+        return str(result)
+    except Exception as e:
+        return f"Error parsing numbers: {str(e)}"
+@tool
+def max_value(numbers: str) -> str:
+    """Find maximum value in a list of numbers (provide as comma-separated string)"""
+    try:
+        number_list = [float(x.strip()) for x in numbers.split(',')]
+        result = MathTools.max_value(number_list)
+        return str(result)
+    except Exception as e:
+        return f"Error parsing numbers: {str(e)}"
+@tool
+def compound_interest(principal: float, rate: float, time: float, compounds_per_year: int = 1) -> str:
+    """Calculate compound interest"""
+    result = MathTools.calculate_compound_interest(principal, rate, time, compounds_per_year)
+    return str(result)
+@tool
+def solve_quadratic(a: float, b: float, c: float) -> str:
+    """Solve quadratic equation ax² + bx + c = 0"""
+    result = MathTools.solve_quadratic(a, b, c)
+    return str(result)
+@tool
+def analyze_python_tool(file_path: str, question: str = "What is the final output of this code?") -> str:
+    """Read and analyze Python code files, can execute code to get results"""
+    return analyze_python(file_path, question)
+# =============================================================================
+# TOOL COLLECTIONS FOR EASY IMPORT
+# =============================================================================
+# Core tools (matching original template)
+CORE_TOOLS = [
+    extract_text,
+    analyze_image_tool,
+    analyze_audio_tool,
+    extract_youtube_transcript,
+    add,
+    subtract,
+    multiply,
+    divide,
+    search_tool
+]
+# Extended tools with new Excel functionality
+EXTENDED_TOOLS = CORE_TOOLS + [
+    analyze_excel_tool,  # NEW: Excel/CSV analysis
+    analyze_python_tool,
+    search_news_tool,
+    search_academic_tool,
+    get_youtube_info,
+    get_youtube_playlist_info,
+    calculate_expression,
+    factorial,
+    square_root,
+    percentage,
+    average
+]
+# All available tools
+ALL_TOOLS = EXTENDED_TOOLS + [
+    power,
+    absolute_value,
+    round_number,
+    min_value,
+    max_value,
+    compound_interest,
+    solve_quadratic
+]
+# Default export (for backwards compatibility)
+tools = CORE_TOOLS

tools/multimodal_tools.py CHANGED Viewed

@@ -1,6 +1,12 @@
 # tools/multimodal_tools.py
 import requests
 import json
 from typing import Optional, Dict, Any
 from .utils import encode_image_to_base64, validate_file_exists, get_env_var, logger
@@ -28,7 +34,7 @@ class MultimodalTools:
                 self.openrouter_url,
                 headers=self.headers,
                 json=payload,
-                timeout=30
             )
             response.raise_for_status()
@@ -78,7 +84,7 @@ class MultimodalTools:
                     }
                 ],
                 "temperature": 0,
-                "max_tokens": 1024
             }
             return self._make_openrouter_request(payload)
@@ -128,7 +134,7 @@ class MultimodalTools:
                     }
                 ],
                 "temperature": 0,
-                "max_tokens": 1024
             }
             return self._make_openrouter_request(payload)
@@ -138,6 +144,219 @@ class MultimodalTools:
             logger.error(error_msg)
             return error_msg
     def describe_image(self, image_path: str) -> str:
         """Get a detailed description of an image"""
         return self.analyze_image(
@@ -164,3 +383,14 @@ def analyze_transcript(transcript: str, question: str = "Summarize this content"
     """Standalone function to analyze audio transcript"""
     tools = MultimodalTools()
     return tools.analyze_audio_transcript(transcript, question)

 # tools/multimodal_tools.py
 import requests
 import json
+import pandas as pd
+import os  # ✅ Added for file operations
+import io  # ✅ Added for code execution
+import contextlib  # ✅ Added for code execution
+import ast  # ✅ Added for code validation
+import traceback  # ✅ Added for error handling
 from typing import Optional, Dict, Any
 from .utils import encode_image_to_base64, validate_file_exists, get_env_var, logger
                 self.openrouter_url,
                 headers=self.headers,
                 json=payload,
+                timeout=60
             )
             response.raise_for_status()
                     }
                 ],
                 "temperature": 0,
+                "max_tokens": 2048
             }
             return self._make_openrouter_request(payload)
                     }
                 ],
                 "temperature": 0,
+                "max_tokens": 2048
             }
             return self._make_openrouter_request(payload)
             logger.error(error_msg)
             return error_msg
+    def analyze_excel_file(self, file_path: str, question: str) -> str:
+        """
+        Analyze Excel or CSV file content using AI
+        Args:
+            file_path: Path to Excel (.xlsx) or CSV file
+            question: Question about the data
+        Returns:
+            AI analysis of the spreadsheet data
+        """
+        if not validate_file_exists(file_path):
+            return f"Error: File not found at {file_path}"
+        try:
+            # Try reading as Excel first, then CSV
+            try:
+                df = pd.read_excel(file_path)
+            except Exception:
+                try:
+                    df = pd.read_csv(file_path)
+                except Exception as e:
+                    return f"Error reading file: Unable to read as Excel or CSV - {str(e)}"
+            # Convert dataframe to text representation for AI analysis
+            data_summary = f"""
+Data file analysis:
+- Shape: {df.shape[0]} rows, {df.shape[1]} columns
+- Columns: {list(df.columns)}
+First few rows:
+{df.head().to_string()}
+Data types:
+{df.dtypes.to_string()}
+Summary statistics:
+{df.describe().to_string()}
+"""
+            payload = {
+                "model": self.text_model,
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": f"Analyze this spreadsheet data and answer the question.\n\n{data_summary}\n\nQuestion: {question}"
+                    }
+                ],
+                "temperature": 0,
+                "max_tokens": 2048
+            }
+            return self._make_openrouter_request(payload)
+        except Exception as e:
+            error_msg = f"Error analyzing Excel file: {str(e)}"
+            logger.error(error_msg)
+            return error_msg
+    # ✅ NEW METHOD - Added Python code processing
+    def _validate_python_code(self, code: str) -> bool:
+        """Validate Python code syntax"""
+        try:
+            ast.parse(code)
+            return True
+        except SyntaxError:
+            return False
+    def _execute_python_code(self, code: str) -> str:
+        """
+        Safely execute Python code and capture output
+        Based on search results from LlamaIndex SimpleCodeExecutor pattern
+        """
+        # Capture stdout and stderr
+        stdout = io.StringIO()
+        stderr = io.StringIO()
+        output = ""
+        return_value = None
+        # Create a safe execution namespace
+        safe_globals = {
+            '__builtins__': {
+                'print': print,
+                'len': len,
+                'str': str,
+                'int': int,
+                'float': float,
+                'list': list,
+                'dict': dict,
+                'sum': sum,
+                'max': max,
+                'min': min,
+                'abs': abs,
+                'round': round,
+                'range': range,
+                'enumerate': enumerate,
+                'zip': zip,
+            }
+        }
+        safe_locals = {}
+        try:
+            # Execute with captured output
+            with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
+                # Try to detect if there's a return value (last expression)
+                try:
+                    tree = ast.parse(code)
+                    last_node = tree.body[-1] if tree.body else None
+                    # If the last statement is an expression, capture its value
+                    if isinstance(last_node, ast.Expr):
+                        # Split code to add a return value assignment
+                        lines = code.rstrip().split('\n')
+                        last_line = lines[-1]
+                        exec_code = '\n'.join(lines[:-1]) + f'\n__result__ = {last_line}'
+                        # Execute modified code
+                        exec(exec_code, safe_globals, safe_locals)
+                        return_value = safe_locals.get('__result__')
+                    else:
+                        # Normal execution
+                        exec(code, safe_globals, safe_locals)
+                except:
+                    # If parsing fails, just execute the code as is
+                    exec(code, safe_globals, safe_locals)
+            # Get output
+            output = stdout.getvalue()
+            if stderr.getvalue():
+                output += "\n" + stderr.getvalue()
+            # Add return value if it exists
+            if return_value is not None:
+                output += f"\n\nFinal result: {return_value}"
+            return output.strip() if output.strip() else str(return_value) if return_value is not None else "Code executed successfully (no output)"
+        except Exception as e:
+            # Capture exception information
+            error_output = f"Error: {type(e).__name__}: {str(e)}"
+            logger.error(f"Code execution error: {error_output}")
+            return error_output
+    def analyze_python_file(self, file_path: str, question: str = "What is the final output of this code?") -> str:
+        """
+        Read and analyze Python code file
+        Args:
+            file_path: Path to Python (.py) file
+            question: Question about the code
+        Returns:
+            Analysis or execution result of the Python code
+        """
+        if not validate_file_exists(file_path):
+            return f"Error: Python file not found at {file_path}"
+        try:
+            # Read the Python file
+            with open(file_path, 'r', encoding='utf-8') as f:
+                code_content = f.read()
+            if not code_content.strip():
+                return "Error: Python file is empty"
+            # Validate syntax
+            if not self._validate_python_code(code_content):
+                return "Error: Python file contains syntax errors"
+            # If question asks for output/result, execute the code
+            if any(keyword in question.lower() for keyword in ['output', 'result', 'execute', 'run', 'final']):
+                logger.info(f"Executing Python code from {file_path}")
+                execution_result = self._execute_python_code(code_content)
+                # Also provide AI analysis if needed
+                if len(execution_result) < 50:  # Short result, add AI analysis
+                    payload = {
+                        "model": self.text_model,
+                        "messages": [
+                            {
+                                "role": "user",
+                                "content": f"Python code:\n``````\n\nExecution result: {execution_result}\n\nQuestion: {question}"
+                            }
+                        ],
+                        "temperature": 0,
+                        "max_tokens": 1024
+                    }
+                    ai_analysis = self._make_openrouter_request(payload)
+                    return f"Execution result: {execution_result}\n\nAnalysis: {ai_analysis}"
+                else:
+                    return execution_result
+            else:
+                # Just analyze the code without execution
+                payload = {
+                    "model": self.text_model,
+                    "messages": [
+                        {
+                            "role": "user",
+                            "content": f"Analyze this Python code and answer the question.\n\nPython code:\n``````\n\nQuestion: {question}"
+                        }
+                    ],
+                    "temperature": 0,
+                    "max_tokens": 2048
+                }
+                return self._make_openrouter_request(payload)
+        except Exception as e:
+            error_msg = f"Error analyzing Python file: {str(e)}"
+            logger.error(error_msg)
+            return error_msg
     def describe_image(self, image_path: str) -> str:
         """Get a detailed description of an image"""
         return self.analyze_image(
     """Standalone function to analyze audio transcript"""
     tools = MultimodalTools()
     return tools.analyze_audio_transcript(transcript, question)
+def analyze_excel(file_path: str, question: str) -> str:
+    """Standalone function to analyze Excel/CSV files"""
+    tools = MultimodalTools()
+    return tools.analyze_excel_file(file_path, question)
+# ✅ NEW FUNCTION - Added Python code convenience function
+def analyze_python(file_path: str, question: str = "What is the final output of this code?") -> str:
+    """Standalone function to analyze Python files"""
+    tools = MultimodalTools()
+    return tools.analyze_python_file(file_path, question)

tools/search_tools.py CHANGED Viewed

@@ -17,7 +17,7 @@ class SearchTools:
         # Tertiary: SerpAPI (expensive, fallback only)
         self.serpapi_key = os.getenv("SERPAPI_KEY")
-    def search_duckduckgo(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
         """
         Free search using DuckDuckGo Instant Answer API
@@ -69,7 +69,7 @@ class SearchTools:
             logger.error(f"DuckDuckGo search failed: {str(e)}")
             return []
-    def search_tavily(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
         """
         Search using Tavily API (cost-effective)
@@ -116,7 +116,7 @@ class SearchTools:
             logger.error(f"Tavily search failed: {str(e)}")
             return []
-    def search_serpapi(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
         """
         Search using SerpAPI (expensive, fallback only)
@@ -162,7 +162,7 @@ class SearchTools:
             logger.error(f"SerpAPI search failed: {str(e)}")
             return []
-    def search(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
         """
         Comprehensive search using multiple providers with fallback strategy
@@ -201,23 +201,23 @@ class SearchTools:
         logger.error("All search providers failed")
         return []
-    def search_news(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
         """Search for news articles"""
         news_query = f"news {query}"
         return self.search(news_query, max_results)
-    def search_academic(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
         """Search for academic content"""
         academic_query = f"academic research {query} site:scholar.google.com OR site:arxiv.org OR site:researchgate.net"
         return self.search(academic_query, max_results)
 # Convenience functions
-def search_web(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
     """Standalone function for web search"""
     tools = SearchTools()
     return tools.search(query, max_results)
-def search_news(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
     """Standalone function for news search"""
     tools = SearchTools()
     return tools.search_news(query, max_results)

         # Tertiary: SerpAPI (expensive, fallback only)
         self.serpapi_key = os.getenv("SERPAPI_KEY")
+    def search_duckduckgo(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
         """
         Free search using DuckDuckGo Instant Answer API
             logger.error(f"DuckDuckGo search failed: {str(e)}")
             return []
+    def search_tavily(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
         """
         Search using Tavily API (cost-effective)
             logger.error(f"Tavily search failed: {str(e)}")
             return []
+    def search_serpapi(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
         """
         Search using SerpAPI (expensive, fallback only)
             logger.error(f"SerpAPI search failed: {str(e)}")
             return []
+    def search(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
         """
         Comprehensive search using multiple providers with fallback strategy
         logger.error("All search providers failed")
         return []
+    def search_news(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
         """Search for news articles"""
         news_query = f"news {query}"
         return self.search(news_query, max_results)
+    def search_academic(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
         """Search for academic content"""
         academic_query = f"academic research {query} site:scholar.google.com OR site:arxiv.org OR site:researchgate.net"
         return self.search(academic_query, max_results)
 # Convenience functions
+def search_web(query: str, max_results: int = 10) -> List[Dict[str, Any]]:
     """Standalone function for web search"""
     tools = SearchTools()
     return tools.search(query, max_results)
+def search_news(query: str, max_results: int = 10) -> List[Dict[str, Any]]:
     """Standalone function for news search"""
     tools = SearchTools()
     return tools.search_news(query, max_results)