Spaces:

kokluch
/

official-gouv-url-list

Running

App Files Files Community

official-gouv-url-list / app.py

kokluch

docs: improve check_url_official tool docstring for LLM usage

6fba1cf about 1 month ago

raw

history blame contribute delete

4.87 kB

	from fastmcp import FastMCP
	import requests

	INSTRUCTIONS="""Official French Government URL List MCP Server

	This server provides access to official French government domain lists and URL resources through the Model Context Protocol.

	## Available Resources

	### 1. Official Government URL List (TXT)
	- URI: `resource://sitesgouv-txt`
	- Source: https://www.auracom.fr/gouv/sitesgouv.txt
	- Content: Raw text file containing official French government URLs

	### 2. Public Organization Domains (CSV)
	- URI: `resource://public-org-domains-csv`
	- Source: https://gitlab.adullact.net/dinum/noms-de-domaine-organismes-secteur-public/-/raw/master/domains.csv
	- Content: CSV file with public organization domain information

	## How to Use

	1. List Available Resources: Use `resources/list` to see all available resources
	2. Read a Resource: Use `resources/read` with the resource URI to get the content
	3. Get Help: Use `prompts/get` with name `server-help` for detailed guidance

	## Server Details
	- Transport: SSE (Server-Sent Events)
	- Port: 7860
	- Framework: FastMCP 2.0

	This server is designed to provide easy access to official French government data through the MCP protocol."""

	# Create a FastMCP 2.0 server instance
	mcp = FastMCP(
	"Official Gouv Url List MCP Server",
	instructions=INSTRUCTIONS
	)

	@mcp.resource(
	uri="resource://sitesgouv-txt",
	name="Official Gouv URL List (TXT)",
	description="Exposes the raw content of https://www.auracom.fr/gouv/sitesgouv.txt",
	mime_type="text/plain"
	)
	async def get_sitesgouv_txt() -> str:
	"""Fetches the official government URL list from auracom.fr as plain text."""
	response = requests.get("https://www.auracom.fr/gouv/sitesgouv.txt", timeout=10)
	response.raise_for_status()
	return response.text

	@mcp.resource(
	uri="resource://public-org-domains-csv",
	name="Public Org Domains (CSV)",
	description="Exposes the raw CSV content of https://gitlab.adullact.net/dinum/noms-de-domaine-organismes-secteur-public/-/blob/master/domains.csv?ref_type=heads",
	mime_type="text/csv"
	)
	async def get_public_org_domains_csv() -> str:
	"""Fetches the CSV of public organization domains from adullact.net as plain text."""
	url = "https://gitlab.adullact.net/dinum/noms-de-domaine-organismes-secteur-public/-/raw/master/domains.csv"
	response = requests.get(url, timeout=10)
	response.raise_for_status()
	return response.text

	@mcp.tool(
	name="check_url_official",
	description="Check if a given URL is present in the official French government URL list (sitesgouv.txt)."
	)
	async def check_url_official(url: str) -> dict:
	"""
	Tool: check_url_official
	-------------------------
	Checks if a given URL is present in the official French government URL list (sitesgouv.txt).

	Parameters:
	url (str): The URL to check. The URL can be with or without protocol (http/https), and with or without a trailing slash. Example: "https://www.gouvernement.fr" or "gouvernement.fr".

	Returns:
	dict: A dictionary with the following keys:
	- url (str): The input URL as provided.
	- is_official (bool): True if the normalized URL is present in the official list, False otherwise.

	Normalization:
	- The tool normalizes both the input URL and the official list by:
	* Lowercasing all characters
	* Removing the protocol (http:// or https://)
	* Removing any trailing slashes
	- This ensures that URLs are compared in a consistent way, regardless of formatting.

	Example usage:
	>>> check_url_official("https://www.gouvernement.fr/")
	{"url": "https://www.gouvernement.fr/", "is_official": True}

	>>> check_url_official("example.com")
	{"url": "example.com", "is_official": False}

	Notes for LLMs:
	- Use this tool to verify if a website is officially recognized as a French government site.
	- The official list is fetched live from https://www.auracom.fr/gouv/sitesgouv.txt for each call.
	- This tool is useful for compliance, trust, or filtering tasks involving French government domains.
	"""
	response = requests.get("https://www.auracom.fr/gouv/sitesgouv.txt", timeout=10)
	response.raise_for_status()
	official_urls = set(line.strip() for line in response.text.splitlines() if line.strip())
	# Simple normalization: ignore protocol and trailing slashes
	def normalize(u):
	return u.lower().replace("http://", "").replace("https://", "").rstrip("/")
	normalized_official = set(normalize(u) for u in official_urls)
	normalized_input = normalize(url)
	return {
	"url": url,
	"is_official": normalized_input in normalized_official
	}

	if __name__ == "__main__":
	mcp.run(transport="sse", host="0.0.0.0", port=7860)