Spaces:
Running
Running
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool | |
from bs4 import BeautifulSoup | |
import datetime | |
import requests | |
import pytz | |
import yaml | |
from tools.final_answer import FinalAnswerTool | |
from Gradio_UI import GradioUI | |
def webpage_summarizer(url: str) -> str: | |
"""Extracts and summarizes main content from a webpage | |
Args: | |
url: URL of the webpage to summarize | |
Returns: | |
str: A summary of the webpage content including title and main text | |
""" | |
try: | |
# Add headers to mimic a browser request | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
} | |
# Fetch the webpage | |
response = requests.get(url, headers=headers, timeout=10) | |
response.raise_for_status() # Raise an exception for bad status codes | |
# Parse the HTML | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Get the title | |
title = soup.title.string if soup.title else "No title found" | |
# Remove script and style elements | |
for script in soup(["script", "style"]): | |
script.decompose() | |
# Extract text from paragraphs | |
paragraphs = soup.find_all('p') | |
text_content = [] | |
for p in paragraphs: | |
text = p.get_text().strip() | |
if len(text) > 50: # Only include substantial paragraphs | |
text_content.append(text) | |
# Combine the content | |
summary = f"Title: {title}\n\nContent Summary:\n" | |
summary += "\n\n".join(text_content[:5]) # Include first 5 substantial paragraphs | |
# Limit the total length | |
if len(summary) > 1500: | |
summary = summary[:1500] + "..." | |
return summary | |
except requests.RequestException as e: | |
return f"Error fetching webpage: {str(e)}" | |
except Exception as e: | |
return f"Error processing webpage: {str(e)}" | |
final_answer = FinalAnswerTool() | |
model = HfApiModel( | |
max_tokens=2096, | |
temperature=0.5, | |
#model_id='deepseek-ai/DeepSeek-V2.5', | |
model_id='Qwen/Qwen2.5-Coder-32B-Instruct', | |
custom_role_conversions=None, | |
) | |
# Load prompt templates | |
with open("prompts.yaml", 'r') as stream: | |
prompt_templates = yaml.safe_load(stream) | |
# Create the agent with the webpage summarizer tool | |
agent = CodeAgent( | |
model=model, | |
tools=[final_answer, webpage_summarizer], | |
max_steps=6, | |
verbosity_level=1, | |
grammar=None, | |
planning_interval=None, | |
name=None, | |
description=None, | |
prompt_templates=prompt_templates | |
) | |
# Launch the Gradio interface | |
GradioUI(agent).launch() |