File size: 2,685 Bytes
9b5b26a
1fdb095
9b5b26a
 
 
c19d193
6aae614
8fe992b
9b5b26a
 
1fdb095
9b5b26a
3574c03
 
 
 
 
1fdb095
 
3574c03
1fdb095
 
 
 
 
3574c03
1fdb095
 
 
3574c03
1fdb095
 
8c01ffb
1fdb095
 
8c01ffb
1fdb095
 
 
8c01ffb
1fdb095
 
 
8c01ffb
1fdb095
 
 
 
8c01ffb
1fdb095
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f1bf05
1fdb095
 
 
24654af
 
1fdb095
 
 
 
861422e
 
1fdb095
 
 
8c01ffb
8fe992b
1fdb095
8c01ffb
 
 
 
 
 
861422e
8fe992b
 
1fdb095
8c01ffb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
from bs4 import BeautifulSoup
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool

from Gradio_UI import GradioUI



@tool
def webpage_summarizer(url: str) -> str:
    """Extracts and summarizes main content from a webpage
    Args:
        url: URL of the webpage to summarize
    Returns:
        str: A summary of the webpage content including title and main text
    """
    try:
        # Add headers to mimic a browser request
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

        # Fetch the webpage
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()  # Raise an exception for bad status codes

        # Parse the HTML
        soup = BeautifulSoup(response.text, 'html.parser')

        # Get the title
        title = soup.title.string if soup.title else "No title found"

        # Remove script and style elements
        for script in soup(["script", "style"]):
            script.decompose()

        # Extract text from paragraphs
        paragraphs = soup.find_all('p')
        text_content = []

        for p in paragraphs:
            text = p.get_text().strip()
            if len(text) > 50:  # Only include substantial paragraphs
                text_content.append(text)

        # Combine the content
        summary = f"Title: {title}\n\nContent Summary:\n"
        summary += "\n\n".join(text_content[:5])  # Include first 5 substantial paragraphs

        # Limit the total length
        if len(summary) > 1500:
            summary = summary[:1500] + "..."

        return summary

    except requests.RequestException as e:
        return f"Error fetching webpage: {str(e)}"
    except Exception as e:
        return f"Error processing webpage: {str(e)}"

final_answer = FinalAnswerTool()
model = HfApiModel(
    max_tokens=2096,
    temperature=0.5,
    #model_id='deepseek-ai/DeepSeek-V2.5',
    model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
    custom_role_conversions=None,
)

# Load prompt templates
with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)


# Create the agent with the webpage summarizer tool
agent = CodeAgent(
    model=model,
    tools=[final_answer, webpage_summarizer],
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name=None,
    description=None,
    prompt_templates=prompt_templates
)

# Launch the Gradio interface
GradioUI(agent).launch()