# tools.py

import pandas as pd

from pathlib import Path
import requests
import regex as re
import time
import os
from duckduckgo_search import DDGS
from langchain_core.tools import tool
from langchain_community.document_loaders import ArxivLoader
import arxiv
import fitz  # PyMuPDF
import tempfile

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


# Removed complex safety wrapper - keeping things simple

def _download_file_for_task(task_id: str, ext: str) -> str:
    """
    Helper: attempt to GET the remote file for a given task_id.
    Saves under ./hf_files/{task_id}.{ext}. Returns the local path if successful,
    or an empty string if no file / download failed.
    """
    
    print("reached _download_file_for_task")
    os.makedirs("hf_files", exist_ok=True)
    local_path = os.path.join("hf_files", f"{task_id}.{ext}")
    url = f"{DEFAULT_API_URL}/files/{task_id}"
    
    try:
        resp = requests.get(url, timeout=10)
        if resp.status_code == 200 and resp.content:
            print(f"\n Downloaded file from {url} to {local_path} \n")
            with open(local_path, "wb") as f:
                f.write(resp.content)
            return local_path
    except Exception:
        print(f"Error downloading file from {url} to {local_path}")
        pass

    # If we get here, either 404 or download error
    return ""

@tool
def image_tool(task_id: str) -> str:
    """
    TOOL NAME: Image Analysis Tool

    Purpose: When the user asks about images, photos, or visual content, use this tool to get a description of the image.

    Input: A task_id string that identifies the specific image to analyze.

    Example usage:
    - "What is shown in this image?"
    - "Describe the contents of the picture"
    - "What objects are visible in the photo?"
    """

    import requests, os

    # Try downloading image with one of the allowed extensions
    for ext in ("png", "jpg", "jpeg"):
        file_path = _download_file_for_task(task_id, ext)
        if file_path and os.path.exists(file_path):
            break
    else:
        return f"Error: Image file for task_id '{task_id}' not found."

    # Read the image bytes
    try:
        with open(file_path, "rb") as f:
            image_bytes = f.read()
    except Exception as e:
        return f"Error reading image: {str(e)}"

    # Load HF token
    hf_token = os.getenv("HF_TOKEN")
    if not hf_token:
        return "Error: HF_TOKEN not set in environment."

    # Use a single reliable model
    model = "Salesforce/blip-image-captioning-base"
    headers = {"Authorization": f"Bearer {hf_token}"}

    try:
        response = requests.post(
            f"https://api-inference.huggingface.co/models/{model}",
            headers=headers,
            files={"file": image_bytes},
            timeout=30
        )
    except Exception as e:
        return f"Error calling HuggingFace API: {e}"

    # Parse response
    if response.status_code != 200:
        return f"Error from model ({model}): {response.status_code} - {response.text}"

    try:
        result = response.json()
        if isinstance(result, list) and result:
            caption = result[0].get("generated_text", "").strip()
        elif isinstance(result, dict):
            caption = result.get("generated_text", "").strip()
        else:
            caption = ""
    except Exception as e:
        return f"Error parsing response: {e}"

    if not caption:
        return "No caption generated by model."

    return f"Image Caption:\n{caption}"


@tool
def excel_tool(task_id: str) -> str:
    """
    TOOL NAME: Excel Data Analysis Tool

    Purpose: When the user asks about data in spreadsheets, tables, or Excel files, use this tool to read and analyze the data.

    Input: A task_id string that identifies the specific Excel file to analyze.

    Example usage:
    - "What data is in this spreadsheet?"
    - "Analyze the Excel file contents"
    - "Show me the data from the table"
    """
    print("reached excel_tool")
    sheet   = "Sheet1"

    local_xlsx = _download_file_for_task(task_id, "xlsx")
    if not local_xlsx or not os.path.exists(local_xlsx):
        return "Error: Excel file not found for this task."

    try:
        xls = pd.ExcelFile(local_xlsx)
        df  = pd.read_excel(
            xls,
            sheet_name=sheet if sheet and sheet in xls.sheet_names else xls.sheet_names[0]
        )
        print(f"Excel file read successfully: {str(df.to_dict(orient='records'))}")
        return str(df.to_dict(orient="records"))
    except Exception as e:
        return f"Error reading Excel file: {e}"
    

import openai
@tool
def audio_transcriber_tool(task_id: str) -> str:
    """
    TOOL NAME: Audio Transcription Tool

    Purpose: When the user asks about audio files, speech, or wants to know what was said in an audio recording, use this tool.

    Input: A task_id string that identifies the specific audio file to transcribe.

    Example usage:
    - "What is said in this audio file?"
    - "Transcribe the speech from the recording"
    - "Convert the audio to text"
    """
    print("reached audio_transcriber_tool")


    # Always attempt to download the file, regardless of local existence
    local_audio = ""
    for ext in ("mp3", "wav", "m4a"):
        candidate = _download_file_for_task(task_id, ext)
        if candidate:
            local_audio = candidate
            break

    if not local_audio or not os.path.exists(local_audio):
        print("Error: No audio file found (download failed).")
        return "Error: No audio file found (download failed)."
        

    # Send to OpenAI Whisper
    try:
        openai.api_key = os.getenv("OPENAI_API_KEY")
        if not openai.api_key:
            raise RuntimeError("OPENAI_API_KEY is not set in environment.")

        with open(local_audio, "rb") as audio_file:
            print("reached openai.audio.transcriptions.create")
            response = openai.audio.transcriptions.create(
                model="whisper-1", 
                file=audio_file,
            )
            # print("reached response")
        text = response.text.strip()
    except Exception as e:
        text = f"Error during transcription: {e}"
    print(f"Transcripted as transcript: {text}")
    return text
# tools.py

import re
import requests

@tool
def wikipedia_search_tool(wiki_query: str) -> str:
    """
    TOOL NAME: Wikipedia Search Tool

    Purpose: When the user asks about general knowledge, facts, or wants to know about a specific topic, use this tool.

    Input: A string describing the topic to search for on Wikipedia.

    Example usage:
    - "What is the capital of France?"
    - "Find information about quantum computing"
    - "What is the history of the internet?"
    If no valid wiki_query is provided, returns an empty string.
    """
    print("reached wikipedia search tool")

    # --- Simple in-memory cache to avoid repeated look-ups in a single session
    if not hasattr(wikipedia_search_tool, "_cache"):
        wikipedia_search_tool._cache = {}

    query = wiki_query.strip()
    if not query:
        return ""

    if query in wikipedia_search_tool._cache:
        print("Returning cached Wikipedia result for query:", query)
        return wikipedia_search_tool._cache[query]

    try:
        # 1) Use the MediaWiki API to search for page titles matching the query
        search_params = {
            "action": "query",
            "list": "search",
            "srsearch": query,
            "format": "json",
            "utf8": 1
        }
        search_resp = requests.get("https://en.wikipedia.org/w/api.php", params=search_params, timeout=10)
        search_resp.raise_for_status()
        search_data = search_resp.json()

        search_results = search_data.get("query", {}).get("search", [])
        if not search_results:
            msg = f"No Wikipedia page found for '{query}'. [END_OF_SEARCH]"
            wikipedia_search_tool._cache[query] = msg
            return msg

        # 2) Take the first search result's title
        first_title = search_results[0].get("title", "")
        if not first_title:
            msg = "Unexpected format from Wikipedia search. [END_OF_SEARCH]"
            wikipedia_search_tool._cache[query] = msg
            return msg

        # 3) Fetch the page summary for that title via the REST summary endpoint
        title_for_url = requests.utils.requote_uri(first_title)
        summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title_for_url}"
        summary_resp = requests.get(summary_url, timeout=10)
        summary_resp.raise_for_status()
        summary_data = summary_resp.json()

        # 4) Extract either the "extract" field or a fallback message
        summary_text = summary_data.get("extract")
        if not summary_text:
            summary_text = summary_data.get("description", "No summary available.")

        result = f"Title: {first_title}\n\n{summary_text}\n\n[END_OF_SEARCH]"
        wikipedia_search_tool._cache[query] = result
        print("Submitted wiki successfully")
        return result

    except requests.exceptions.RequestException as e:
        print("Wikipedia search error: ", e)
        return f"Wikipedia search error: {e} [END_OF_SEARCH]"
    except Exception as e:
        print("Unexpected error in wikipedia_search_tool: ", e)
        return f"Unexpected error in wikipedia_search_tool: {e} [END_OF_SEARCH]"

@tool
def arxiv_search_tool(query: str) -> str:
    """
    TOOL NAME: ArXiv Academic Search Tool

    Purpose: When the user asks for academic research, scientific papers, or technical information, use this tool.

    Input: A string describing the academic topic to search for on ArXiv.

    Example usage:
    - "Find research papers about machine learning"
    - "What are recent studies on climate change?"
    - "Search for papers on quantum computing"
    """
    print("Reached ArXiv tool, with query = ", query)
    try:
        # Search arXiv for the top result
        search = arxiv.Search(query=query, max_results=1, sort_by=arxiv.SortCriterion.Relevance)
        result = next(search.results(), None)

        if not result:
            print("No arXiv result found")
            return "No results found. [END_OF_SEARCH]"

        # Download PDF
        pdf_url = result.pdf_url
        response = requests.get(pdf_url)
        response.raise_for_status()

        # Save and open PDF
        with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as tmp:
            tmp.write(response.content)
            tmp.flush()

            doc = fitz.open(tmp.name)
            text = ""
            for page in doc:
                text += page.get_text()

        # Clean and trim text
        text = " ".join(text.split())
        summary = text[:3000] + "..." if len(text) > 3000 else text

        return f"Title: {result.title}\n\nSummary:\n{summary}\n\n[END_OF_SEARCH]"

    except Exception as e:
        return f"Error fetching arXiv content: {e} [END_OF_SEARCH]"


from langchain_openai import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
LLM = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.2)

@tool
def analyze_code_tool(task_id: str) -> str:
    """
    TOOL NAME: Code Analysis Tool

    Purpose: When the user asks about code, programming files, or wants to understand what a script does, use this tool.

    Input: A task_id string that identifies the specific code file to analyze.

    Example usage:
    - "What does this Python code do?"
    - "Analyze the code file for bugs"
    - "Explain the functions in this script"
    """
    print("Reached analyze_code_tool")
    code_txt = ""
    if not task_id:
        code_txt = "No code provided."
    else:
        path = _download_file_for_task(task_id, "py")
        if not path:
            print("Error: .py file not found for this task.")
            return "Error: .py file not found for this task."
        code_txt = Path(path).read_text(encoding="utf-8", errors="ignore")

    lines = code_txt.splitlines()
    code_sample = "\n".join(lines)

    prompt = [
        SystemMessage(content="You are a senior Python code reviewer."),
        HumanMessage(content=(
            "Please analyse the following code. "
            "Summarise what it does, list key functions/classes, "
            "and point out any obvious bugs, performance issues or style problems.\n\n"
            f"```python\n{code_sample}\n```"
            "If you can then find the output of the code and return it in the output."
        ))
    ]
    return LLM.invoke(prompt).content.strip()


# ─────────────────────────── Math Tools ───────────────────────────────

@tool
def add_tool(a: float, b: float) -> str:
    """
    TOOL NAME: Addition Tool

    Purpose: When the user asks to add numbers or perform addition calculations, use this tool.

    Input: Two numbers (a and b) to add together.

    Example usage:
    - "What is 25 + 17?"
    - "Add 3.14 and 2.86"
    - "Calculate the sum of 100 and 250"
    """
    print("Reached add_tool")
    result = a + b
    return f"Addition result: {a} + {b} = {result}"

@tool
def subtract_tool(a: float, b: float) -> str:
    """
    TOOL NAME: Subtraction Tool

    Purpose: When the user asks to subtract numbers or perform subtraction calculations, use this tool.

    Input: Two numbers (a and b) where b is subtracted from a.

    Example usage:
    - "What is 50 - 23?"
    - "Subtract 15.5 from 40.2"
    - "Calculate 1000 minus 347"
    """
    print("Reached subtract_tool")
    result = a - b
    return f"Subtraction result: {a} - {b} = {result}"

@tool
def multiply_tool(a: float, b: float) -> str:
    """
    TOOL NAME: Multiplication Tool

    Purpose: When the user asks to multiply numbers or perform multiplication calculations, use this tool.

    Input: Two numbers (a and b) to multiply together.

    Example usage:
    - "What is 8 × 7?"
    - "Multiply 12.5 by 4"
    - "Calculate the product of 15 and 20"
    """
    print("Reached multiply_tool")
    result = a * b
    return f"Multiplication result: {a} × {b} = {result}"

@tool
def divide_tool(a: float, b: float) -> str:
    """
    TOOL NAME: Division Tool

    Purpose: When the user asks to divide numbers or perform division calculations, use this tool.

    Input: Two numbers (a and b) where a is divided by b.

    Example usage:
    - "What is 100 ÷ 4?"
    - "Divide 75 by 3"
    - "Calculate 144 divided by 12"
    """
    print("Reached divide_tool")
    if b == 0:
        return "Division error: Cannot divide by zero"
    result = a / b
    return f"Division result: {a} ÷ {b} = {result}"

@tool
def web_search_tool(query: str) -> str:
    """
    TOOL NAME: Web Search Tool

    Purpose: When the user asks for current information, recent news, or topics not covered by Wikipedia, use this tool.

    Input: A string describing what to search for on the web.
    """
    print("reached web_search_tool")

    if not hasattr(web_search_tool, "_cache"):
        web_search_tool._cache = {}

    query = query.strip()
    if not query:
        return "No search query provided."

    if query in web_search_tool._cache:
        print("Returning cached web search result for query:", query)
        return web_search_tool._cache[query]

    ddg = DDGS()
    max_retries = 5
    result_text = ""

    for attempt in range(1, max_retries + 1):
        try:
            result_text = str(ddg.text(query, max_results=5))
        except Exception as e:
            if attempt < max_retries:
                print(f"web_search_tool: exception '{e}', retrying in 4 seconds ({attempt}/{max_retries})")
                time.sleep(4)
                continue
            else:
                return f"Error during DuckDuckGo search: {e} [END_OF_SEARCH]"

        if "202 Ratelimit" in result_text:
            if attempt < max_retries:
                print(f"web_search_tool: received '202 Ratelimit', retrying in 4 seconds ({attempt}/{max_retries})")
                time.sleep(4)
                continue
            else:
                break
        break  # Successful

    result_text += "\n\n[END_OF_SEARCH]"
    web_search_tool._cache[query] = result_text
    print("Submitted web search successfully")
    return result_text