kokluch's picture
docs: improve check_url_official tool docstring for LLM usage
6fba1cf
from fastmcp import FastMCP
import requests
INSTRUCTIONS="""Official French Government URL List MCP Server
This server provides access to official French government domain lists and URL resources through the Model Context Protocol.
## Available Resources
### 1. Official Government URL List (TXT)
- **URI**: `resource://sitesgouv-txt`
- **Source**: https://www.auracom.fr/gouv/sitesgouv.txt
- **Content**: Raw text file containing official French government URLs
### 2. Public Organization Domains (CSV)
- **URI**: `resource://public-org-domains-csv`
- **Source**: https://gitlab.adullact.net/dinum/noms-de-domaine-organismes-secteur-public/-/raw/master/domains.csv
- **Content**: CSV file with public organization domain information
## How to Use
1. **List Available Resources**: Use `resources/list` to see all available resources
2. **Read a Resource**: Use `resources/read` with the resource URI to get the content
3. **Get Help**: Use `prompts/get` with name `server-help` for detailed guidance
## Server Details
- **Transport**: SSE (Server-Sent Events)
- **Port**: 7860
- **Framework**: FastMCP 2.0
This server is designed to provide easy access to official French government data through the MCP protocol."""
# Create a FastMCP 2.0 server instance
mcp = FastMCP(
"Official Gouv Url List MCP Server",
instructions=INSTRUCTIONS
)
@mcp.resource(
uri="resource://sitesgouv-txt",
name="Official Gouv URL List (TXT)",
description="Exposes the raw content of https://www.auracom.fr/gouv/sitesgouv.txt",
mime_type="text/plain"
)
async def get_sitesgouv_txt() -> str:
"""Fetches the official government URL list from auracom.fr as plain text."""
response = requests.get("https://www.auracom.fr/gouv/sitesgouv.txt", timeout=10)
response.raise_for_status()
return response.text
@mcp.resource(
uri="resource://public-org-domains-csv",
name="Public Org Domains (CSV)",
description="Exposes the raw CSV content of https://gitlab.adullact.net/dinum/noms-de-domaine-organismes-secteur-public/-/blob/master/domains.csv?ref_type=heads",
mime_type="text/csv"
)
async def get_public_org_domains_csv() -> str:
"""Fetches the CSV of public organization domains from adullact.net as plain text."""
url = "https://gitlab.adullact.net/dinum/noms-de-domaine-organismes-secteur-public/-/raw/master/domains.csv"
response = requests.get(url, timeout=10)
response.raise_for_status()
return response.text
@mcp.tool(
name="check_url_official",
description="Check if a given URL is present in the official French government URL list (sitesgouv.txt)."
)
async def check_url_official(url: str) -> dict:
"""
Tool: check_url_official
-------------------------
Checks if a given URL is present in the official French government URL list (sitesgouv.txt).
Parameters:
url (str): The URL to check. The URL can be with or without protocol (http/https), and with or without a trailing slash. Example: "https://www.gouvernement.fr" or "gouvernement.fr".
Returns:
dict: A dictionary with the following keys:
- url (str): The input URL as provided.
- is_official (bool): True if the normalized URL is present in the official list, False otherwise.
Normalization:
- The tool normalizes both the input URL and the official list by:
* Lowercasing all characters
* Removing the protocol (http:// or https://)
* Removing any trailing slashes
- This ensures that URLs are compared in a consistent way, regardless of formatting.
Example usage:
>>> check_url_official("https://www.gouvernement.fr/")
{"url": "https://www.gouvernement.fr/", "is_official": True}
>>> check_url_official("example.com")
{"url": "example.com", "is_official": False}
Notes for LLMs:
- Use this tool to verify if a website is officially recognized as a French government site.
- The official list is fetched live from https://www.auracom.fr/gouv/sitesgouv.txt for each call.
- This tool is useful for compliance, trust, or filtering tasks involving French government domains.
"""
response = requests.get("https://www.auracom.fr/gouv/sitesgouv.txt", timeout=10)
response.raise_for_status()
official_urls = set(line.strip() for line in response.text.splitlines() if line.strip())
# Simple normalization: ignore protocol and trailing slashes
def normalize(u):
return u.lower().replace("http://", "").replace("https://", "").rstrip("/")
normalized_official = set(normalize(u) for u in official_urls)
normalized_input = normalize(url)
return {
"url": url,
"is_official": normalized_input in normalized_official
}
if __name__ == "__main__":
mcp.run(transport="sse", host="0.0.0.0", port=7860)