from fastmcp import FastMCP
import requests

INSTRUCTIONS="""Official French Government URL List MCP Server

This server provides access to official French government domain lists and URL resources through the Model Context Protocol.

## Available Resources

### 1. Official Government URL List (TXT)
- **URI**: `resource://sitesgouv-txt`
- **Source**: https://www.auracom.fr/gouv/sitesgouv.txt
- **Content**: Raw text file containing official French government URLs

### 2. Public Organization Domains (CSV)
- **URI**: `resource://public-org-domains-csv`
- **Source**: https://gitlab.adullact.net/dinum/noms-de-domaine-organismes-secteur-public/-/raw/master/domains.csv
- **Content**: CSV file with public organization domain information

## How to Use

1. **List Available Resources**: Use `resources/list` to see all available resources
2. **Read a Resource**: Use `resources/read` with the resource URI to get the content
3. **Get Help**: Use `prompts/get` with name `server-help` for detailed guidance

## Server Details
- **Transport**: SSE (Server-Sent Events)
- **Port**: 7860
- **Framework**: FastMCP 2.0

This server is designed to provide easy access to official French government data through the MCP protocol."""

# Create a FastMCP 2.0 server instance
mcp = FastMCP(
    "Official Gouv Url List MCP Server",
    instructions=INSTRUCTIONS
)

@mcp.resource(
    uri="resource://sitesgouv-txt",
    name="Official Gouv URL List (TXT)",
    description="Exposes the raw content of https://www.auracom.fr/gouv/sitesgouv.txt",
    mime_type="text/plain"
)
async def get_sitesgouv_txt() -> str:
    """Fetches the official government URL list from auracom.fr as plain text."""
    response = requests.get("https://www.auracom.fr/gouv/sitesgouv.txt", timeout=10)
    response.raise_for_status()
    return response.text

@mcp.resource(
    uri="resource://public-org-domains-csv",
    name="Public Org Domains (CSV)",
    description="Exposes the raw CSV content of https://gitlab.adullact.net/dinum/noms-de-domaine-organismes-secteur-public/-/blob/master/domains.csv?ref_type=heads",
    mime_type="text/csv"
)
async def get_public_org_domains_csv() -> str:
    """Fetches the CSV of public organization domains from adullact.net as plain text."""
    url = "https://gitlab.adullact.net/dinum/noms-de-domaine-organismes-secteur-public/-/raw/master/domains.csv"
    response = requests.get(url, timeout=10)
    response.raise_for_status()
    return response.text

@mcp.tool(
    name="check_url_official",
    description="Check if a given URL is present in the official French government URL list (sitesgouv.txt)."
)
async def check_url_official(url: str) -> dict:
    """
    Tool: check_url_official
    -------------------------
    Checks if a given URL is present in the official French government URL list (sitesgouv.txt).

    Parameters:
        url (str): The URL to check. The URL can be with or without protocol (http/https), and with or without a trailing slash. Example: "https://www.gouvernement.fr" or "gouvernement.fr".

    Returns:
        dict: A dictionary with the following keys:
            - url (str): The input URL as provided.
            - is_official (bool): True if the normalized URL is present in the official list, False otherwise.

    Normalization:
        - The tool normalizes both the input URL and the official list by:
            * Lowercasing all characters
            * Removing the protocol (http:// or https://)
            * Removing any trailing slashes
        - This ensures that URLs are compared in a consistent way, regardless of formatting.

    Example usage:
        >>> check_url_official("https://www.gouvernement.fr/")
        {"url": "https://www.gouvernement.fr/", "is_official": True}

        >>> check_url_official("example.com")
        {"url": "example.com", "is_official": False}

    Notes for LLMs:
        - Use this tool to verify if a website is officially recognized as a French government site.
        - The official list is fetched live from https://www.auracom.fr/gouv/sitesgouv.txt for each call.
        - This tool is useful for compliance, trust, or filtering tasks involving French government domains.
    """
    response = requests.get("https://www.auracom.fr/gouv/sitesgouv.txt", timeout=10)
    response.raise_for_status()
    official_urls = set(line.strip() for line in response.text.splitlines() if line.strip())
    # Simple normalization: ignore protocol and trailing slashes
    def normalize(u):
        return u.lower().replace("http://", "").replace("https://", "").rstrip("/")
    normalized_official = set(normalize(u) for u in official_urls)
    normalized_input = normalize(url)
    return {
        "url": url,
        "is_official": normalized_input in normalized_official
    }

if __name__ == "__main__":
    mcp.run(transport="sse", host="0.0.0.0", port=7860)