import os import io import contextlib import pandas as pd from typing import Dict, List, Union import re from PIL import Image as PILImage from huggingface_hub import InferenceClient from langgraph.graph import START, StateGraph, MessagesState from langgraph.prebuilt import tools_condition, ToolNode from langchain_openai import ChatOpenAI from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint from langchain_community.document_loaders import WikipediaLoader, ArxivLoader from langchain_core.messages import SystemMessage, HumanMessage, ToolMessage from langchain_google_genai import ChatGoogleGenerativeAI from langchain_core.tools import tool from langchain_google_community import GoogleSearchAPIWrapper @tool def multiply(a: int, b: int) -> int: """Multiply two integers.""" return a * b @tool def add(a: int, b: int) -> int: """Add two integers.""" return a + b @tool def subtract(a: int, b: int) -> int: """Subtract the second integer from the first.""" return a - b @tool def divide(a: int, b: int) -> float: """Divide first integer by second; error if divisor is zero.""" if b == 0: raise ValueError("Cannot divide by zero.") return a / b @tool def modulus(a: int, b: int) -> int: """Return the remainder of dividing first integer by second.""" return a % b @tool def wiki_search(query: str) -> dict: """Search Wikipedia for a query and return up to 2 documents.""" try: docs = WikipediaLoader(query=query, load_max_docs=2, lang="en").load() if not docs: return {"wiki_results": f"No documents found on Wikipedia for '{query}'."} formatted = "\n\n---\n\n".join( f'\n{d.page_content}' for d in docs ) return {"wiki_results": formatted} except Exception as e: print(f"Error in wiki_search tool: {e}") return {"wiki_results": f"Error occurred while searching Wikipedia for '{query}'. Details: {str(e)}"} search = GoogleSearchAPIWrapper() @tool def google_web_search(query: str) -> str: """Perform a web search (via Google Custom Search) and return results.""" try: return search.run(query) except Exception as e: print(f"Error in google_web_search tool: {e}") return f"Error occurred while searching the web for '{query}'. Details: {str(e)}" @tool def arvix_search(query: str) -> dict: """Search arXiv for a query and return up to 3 paper excerpts.""" docs = ArxivLoader(query=query, load_max_docs=3).load() formatted = "\n\n---\n\n".join( f'\n{d.page_content[:1000]}' for d in docs ) return {"arvix_results": formatted} HF_API_TOKEN = os.getenv("HF_API_TOKEN") HF_INFERENCE_CLIENT = None if HF_API_TOKEN: HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN) else: print("WARNING: HF_API_TOKEN not set. Image and Audio tools will not function.") @tool def read_file_content(file_path: str) -> Dict[str, str]: """Reads the content of a file and returns its primary information. For text/code/excel, returns content. For media, returns a prompt to use specific tools.""" try: _, file_extension = os.path.splitext(file_path) file_extension = file_extension.lower() if file_extension in (".txt", ".py"): with open(file_path, "r", encoding="utf-8") as f: content = f.read() return {"file_type": "text/code", "file_name": file_path, "file_content": content} elif file_extension == ".xlsx": df = pd.read_excel(file_path) content = df.to_string() return {"file_type": "excel", "file_name": file_path, "file_content": content} elif file_extension in (".jpeg", ".jpg", ".png"): return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. Use 'describe_image' tool to get a textual description."} elif file_extension == ".mp3": # For MP3, we indicate it's an audio file and expect the LLM to handle the blob directly. # In a real Langchain setup, you might actually read the bytes here and pass them # as a part of the message content to the LLM if it supports direct binary upload. # For now, this tool simply confirms its type for the agent. return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this audio content directly."} else: return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3 files are recognized."} except FileNotFoundError: return {"file_error": f"File not found: {file_path}. Please ensure the file exists in the environment."} except Exception as e: return {"file_error": f"Error reading file {file_path}: {e}"} @tool def python_interpreter(code: str) -> Dict[str, str]: """Executes Python code and returns its standard output. If there's an error during execution, it returns the error message.""" old_stdout = io.StringIO() with contextlib.redirect_stdout(old_stdout): try: exec_globals = {} exec_locals = {} exec(code, exec_globals, exec_locals) output = old_stdout.getvalue() return {"execution_result": output.strip()} except Exception as e: return {"execution_error": str(e)} @tool def describe_image(image_path: str) -> Dict[str, str]: """Generates a textual description for an image file (JPEG, JPG, PNG) using an image-to-text model from the Hugging Face Inference API. Requires HF_API_TOKEN environment variable to be set.""" if not HF_INFERENCE_CLIENT: return {"error": "Hugging Face API token not configured for image description. Cannot use this tool."} try: with open(image_path, "rb") as f: image_bytes = f.read() description = HF_INFERENCE_CLIENT.image_to_text(image_bytes) return {"image_description": description, "image_path": image_path} except FileNotFoundError: return {"error": f"Image file not found: {image_path}. Please ensure the file exists."} except Exception as e: return {"error": f"Error describing image {image_path}: {str(e)}"} # --- Youtube Tool (Remains the same) --- @tool def Youtube(url: str, question: str) -> Dict[str, str]: """ Tells about the YouTube video identified by the given URL, answering a question about it. Note: This is a simulated response. In a real application, this would interact with a YouTube API or a video analysis service to get actual video information and transcripts. """ print(f"Youtube called with URL: {url}, Question: {question}") # Placeholder for actual YouTube API call. # In a real scenario, you'd use a library like `google-api-python-client` for YouTube Data API # or a dedicated video transcription/analysis service. # Simulating the previous video content for demonstration if "https://www.youtube.com/watch?v=1htKBjuUWec" in url or re.search(r'youtube\.com/watch\?v=|youtu\.be/', url): return { "video_url": url, "question_asked": question, "video_summary": "The video titled 'Teal'c coffee first time' shows a scene where several individuals are reacting to a beverage, presumably coffee, that Teal'c is trying for the first time. Key moments include: A person off-screen remarking, 'Wow this coffee's great'; another asking if it's 'cinnamon chicory tea oak'; and Teal'c reacting strongly to the taste or temperature, stating 'isn't that hot' indicating he finds it very warm.", "details": { "00:00:00": "Someone remarks, 'Wow this coffee's great I was just thinking that yeah is that cinnamon chicory tea oak'", "00:00:11": "Teal'c takes a large gulp from a black mug", "00:00:24": "Teal'c reacts strongly, someone asks 'isn't that hot'", "00:00:26": "Someone agrees, 'extremely'" } } else: return {"error": "Invalid or unrecognized YouTube URL.", "url": url} # --- END YOUTUBE TOOL --- API_KEY = os.getenv("GEMINI_API_KEY") HF_API_TOKEN = os.getenv("HF_SPACE_TOKEN") GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") # Update the tools list (removed transcribe_audio) tools = [ multiply, add, subtract, divide, modulus, wiki_search, google_web_search, arvix_search, read_file_content, python_interpreter, describe_image, Youtube, # <-- transcribe_audio has been removed ] with open("prompt.txt", "r", encoding="utf-8") as f: system_prompt = f.read() sys_msg = SystemMessage(content=system_prompt) def build_graph(provider: str = "gemini"): if provider == "gemini": llm = ChatGoogleGenerativeAI( model="gemini-2.5-pro-preview-05-06", temperature=1.0, max_retries=2, api_key=GEMINI_API_KEY, max_tokens=5000 ) elif provider == "huggingface": llm = ChatHuggingFace( llm=HuggingFaceEndpoint( url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf", ), temperature=0, ) else: raise ValueError("Invalid provider. Choose 'gemini' or 'huggingface'.") llm_with_tools = llm.bind_tools(tools) def assistant(state: MessagesState): messages_to_send = [sys_msg] + state["messages"] # When sending messages to Gemini, if read_file_content identified an audio file, # you'll need to ensure the actual binary content of the audio file is included # in the message parts for the LLM to process it natively. # This part requires a bit more advanced handling than just text. # Langchain often handles this when you use `tool_code.File(...)` or similar constructs. # For simplicity in this prompt and code example, we're assuming the framework # will correctly pass the file content if `read_file_content` returns an audio type. # A more robust implementation would involve modifying the `assistant` node # to explicitly read the file bytes and add them to the message parts # if a file is detected in the input state. # Example of how you might include binary content (conceptual, depends on LangChain/API): # new_messages_to_send = [] # for msg in messages_to_send: # if isinstance(msg, HumanMessage) and "audio file" in msg.content: # Simplified check # # Assume you can get the actual file path from the context # file_path_from_context = "Strawberry pie.mp3" # Or extract from msg.content # if os.path.exists(file_path_from_context): # with open(file_path_from_context, "rb") as f: # audio_bytes = f.read() # new_messages_to_send.append( # HumanMessage( # content=[ # {"type": "text", "text": "Here is the audio file:"}, # {"type": "media", "media_type": "audio/mp3", "data": audio_bytes} # ] # ) # ) # else: # new_messages_to_send.append(msg) # llm_response = llm_with_tools.invoke(new_messages_to_send) llm_response = llm_with_tools.invoke(messages_to_send) # For now, keep as is, rely on framework print(f"LLM Raw Response: {llm_response}") return {"messages": [llm_response]} builder = StateGraph(MessagesState) builder.add_node("assistant", assistant) builder.add_node("tools", ToolNode(tools)) builder.add_edge(START, "assistant") builder.add_conditional_edges("assistant", tools_condition) builder.add_edge("tools", "assistant") return builder.compile() if __name__ == "__main__": pass