File size: 2,941 Bytes
0ea0524
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from smolagents import Tool
from typing import Any, Optional

class SimpleTool(Tool):
    name = "fetch_recent_arxiv_papers"
    description = "Fetches the most recent research papers from arXiv for a given category."
    inputs = {'category': {'type': 'string', 'description': 'The arXiv subject category. Examples include: - "cs.LG": Machine Learning (Computer Science) - "cs.CL": Computation and Language (NLP) - "cs.CV": Computer Vision - "stat.ML": Machine Learning (Statistics) - "math.OC": Optimization and Control'}, 'max_results': {'type': 'integer', 'nullable': True, 'description': 'The number of papers to fetch (default is 5).'}}
    output_type = "array"

    def forward(self, category: str, max_results: int = 2) -> List[Dict]:
        """
        Fetches the most recent research papers from arXiv for a given category.

        Args:
            category (str): The arXiv subject category. Examples include:
                            - "cs.LG": Machine Learning (Computer Science)
                            - "cs.CL": Computation and Language (NLP)
                            - "cs.CV": Computer Vision
                            - "stat.ML": Machine Learning (Statistics)
                            - "math.OC": Optimization and Control
            max_results (int): The number of papers to fetch (default is 5).

        Returns:
            List[Dict]: A list of dictionaries, each containing information about a paper:
                        - title (str)
                        - summary (str)
                        - link (str)
                        - published (str)
                        - authors (List[str])
        """
        import requests
        import xml.etree.ElementTree as ET
        # Import List and Dict from typing
        from typing import List, Dict

        base_url = "http://export.arxiv.org/api/query?"
        query = f"search_query=cat:{category}&start=0&max_results={max_results}&sortBy=submittedDate&sortOrder=descending"

        response = requests.get(base_url + query)
        if response.status_code != 200:
            raise Exception(f"API call failed with status code {response.status_code}")

        root = ET.fromstring(response.text)
        ns = {'atom': 'http://www.w3.org/2005/Atom'}
        papers = []

        for entry in root.findall('atom:entry', ns):
            title = entry.find('atom:title', ns).text.strip()
            summary = entry.find('atom:summary', ns).text.strip()
            link = entry.find('atom:id', ns).text.strip()
            published = entry.find('atom:published', ns).text.strip()
            authors = [a.find('atom:name', ns).text.strip() for a in entry.findall('atom:author', ns)]

            papers.append({
                'title': title,
                'summary': summary,
                'link': link,
                'published': published,
                'authors': authors
            })

        return papers