TDAgentTools / tdagent /tools /get_url_content.py
RodDoSanz's picture
feat: add examples and imporve ui
013177f
raw
history blame
2.04 kB
from collections.abc import Sequence
import gradio as gr
import requests
from tdagent.constants import HttpContentType
def get_url_http_content(
url: str,
content_type: Sequence[HttpContentType] | None = None,
timeout: int = 30,
) -> tuple[str, str]:
"""Get the content of a URL using an HTTP GET request.
Args:
url: The URL to fetch the content from.
content_type: If given it should contain the expected
content types in the response body. The server may
not honor the requested content types.
timeout: Request timeout in seconds. Defaults to 30.
Returns:
A pair of strings (content, error_message). If there is an
error getting content from the URL the `content` will be
empty and `error_message` will, usually, contain the error
cause. Otherwise, `error_message` will be empty and the
content will be filled with data fetched from the URL.
"""
headers = {}
if content_type:
headers["Accept"] = ",".join(content_type)
try:
response = requests.get(
url,
headers=headers,
timeout=timeout,
)
except requests.exceptions.MissingSchema as err:
return "", str(err)
try:
response.raise_for_status()
except requests.HTTPError as err:
return "", str(err)
return response.text, ""
gr_get_url_http_content = gr.Interface(
fn=get_url_http_content,
inputs=[gr.Textbox(label="url"), gr.Textbox(label="content type")],
outputs=gr.Text(label="content"),
title="Get the content of a URL using an HTTP GET request.",
description=(
"Get the content of a URL in one of the specified content types."
" The server may not honor the content type and if it fails the"
" reason should also be returned with the corresponding HTTP"
" error code. Be wary of retrieving the content of malicious urls."
),
examples=[
["https://google.com", "html"],
],
)