from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool from bs4 import BeautifulSoup import datetime import requests import pytz import yaml from tools.final_answer import FinalAnswerTool from Gradio_UI import GradioUI @tool def webpage_summarizer(url: str) -> str: """Extracts and summarizes main content from a webpage Args: url: URL of the webpage to summarize Returns: str: A summary of the webpage content including title and main text """ try: # Add headers to mimic a browser request headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } # Fetch the webpage response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() # Raise an exception for bad status codes # Parse the HTML soup = BeautifulSoup(response.text, 'html.parser') # Get the title title = soup.title.string if soup.title else "No title found" # Remove script and style elements for script in soup(["script", "style"]): script.decompose() # Extract text from paragraphs paragraphs = soup.find_all('p') text_content = [] for p in paragraphs: text = p.get_text().strip() if len(text) > 50: # Only include substantial paragraphs text_content.append(text) # Combine the content summary = f"Title: {title}\n\nContent Summary:\n" summary += "\n\n".join(text_content[:5]) # Include first 5 substantial paragraphs # Limit the total length if len(summary) > 1500: summary = summary[:1500] + "..." return summary except requests.RequestException as e: return f"Error fetching webpage: {str(e)}" except Exception as e: return f"Error processing webpage: {str(e)}" final_answer = FinalAnswerTool() model = HfApiModel( max_tokens=2096, temperature=0.5, #model_id='deepseek-ai/DeepSeek-V2.5', model_id='Qwen/Qwen2.5-Coder-32B-Instruct', custom_role_conversions=None, ) # Load prompt templates with open("prompts.yaml", 'r') as stream: prompt_templates = yaml.safe_load(stream) # Create the agent with the webpage summarizer tool agent = CodeAgent( model=model, tools=[final_answer, webpage_summarizer], max_steps=6, verbosity_level=1, grammar=None, planning_interval=None, name=None, description=None, prompt_templates=prompt_templates ) # Launch the Gradio interface GradioUI(agent).launch()