Karim's picture
Update app.py
24654af verified
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
from bs4 import BeautifulSoup
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
@tool
def webpage_summarizer(url: str) -> str:
"""Extracts and summarizes main content from a webpage
Args:
url: URL of the webpage to summarize
Returns:
str: A summary of the webpage content including title and main text
"""
try:
# Add headers to mimic a browser request
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
# Fetch the webpage
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status() # Raise an exception for bad status codes
# Parse the HTML
soup = BeautifulSoup(response.text, 'html.parser')
# Get the title
title = soup.title.string if soup.title else "No title found"
# Remove script and style elements
for script in soup(["script", "style"]):
script.decompose()
# Extract text from paragraphs
paragraphs = soup.find_all('p')
text_content = []
for p in paragraphs:
text = p.get_text().strip()
if len(text) > 50: # Only include substantial paragraphs
text_content.append(text)
# Combine the content
summary = f"Title: {title}\n\nContent Summary:\n"
summary += "\n\n".join(text_content[:5]) # Include first 5 substantial paragraphs
# Limit the total length
if len(summary) > 1500:
summary = summary[:1500] + "..."
return summary
except requests.RequestException as e:
return f"Error fetching webpage: {str(e)}"
except Exception as e:
return f"Error processing webpage: {str(e)}"
final_answer = FinalAnswerTool()
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
#model_id='deepseek-ai/DeepSeek-V2.5',
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
custom_role_conversions=None,
)
# Load prompt templates
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
# Create the agent with the webpage summarizer tool
agent = CodeAgent(
model=model,
tools=[final_answer, webpage_summarizer],
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
# Launch the Gradio interface
GradioUI(agent).launch()