Spaces:
Running
Running
File size: 2,685 Bytes
9b5b26a 1fdb095 9b5b26a c19d193 6aae614 8fe992b 9b5b26a 1fdb095 9b5b26a 3574c03 1fdb095 3574c03 1fdb095 3574c03 1fdb095 3574c03 1fdb095 8c01ffb 1fdb095 8c01ffb 1fdb095 8c01ffb 1fdb095 8c01ffb 1fdb095 8c01ffb 1fdb095 0f1bf05 1fdb095 24654af 1fdb095 861422e 1fdb095 8c01ffb 8fe992b 1fdb095 8c01ffb 861422e 8fe992b 1fdb095 8c01ffb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
from bs4 import BeautifulSoup
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
@tool
def webpage_summarizer(url: str) -> str:
"""Extracts and summarizes main content from a webpage
Args:
url: URL of the webpage to summarize
Returns:
str: A summary of the webpage content including title and main text
"""
try:
# Add headers to mimic a browser request
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
# Fetch the webpage
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status() # Raise an exception for bad status codes
# Parse the HTML
soup = BeautifulSoup(response.text, 'html.parser')
# Get the title
title = soup.title.string if soup.title else "No title found"
# Remove script and style elements
for script in soup(["script", "style"]):
script.decompose()
# Extract text from paragraphs
paragraphs = soup.find_all('p')
text_content = []
for p in paragraphs:
text = p.get_text().strip()
if len(text) > 50: # Only include substantial paragraphs
text_content.append(text)
# Combine the content
summary = f"Title: {title}\n\nContent Summary:\n"
summary += "\n\n".join(text_content[:5]) # Include first 5 substantial paragraphs
# Limit the total length
if len(summary) > 1500:
summary = summary[:1500] + "..."
return summary
except requests.RequestException as e:
return f"Error fetching webpage: {str(e)}"
except Exception as e:
return f"Error processing webpage: {str(e)}"
final_answer = FinalAnswerTool()
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
#model_id='deepseek-ai/DeepSeek-V2.5',
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
custom_role_conversions=None,
)
# Load prompt templates
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
# Create the agent with the webpage summarizer tool
agent = CodeAgent(
model=model,
tools=[final_answer, webpage_summarizer],
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
# Launch the Gradio interface
GradioUI(agent).launch() |