AI_Research_Assistant / tools /fetch_recent_arxiv_papers.py
MonsieurMory's picture
Upload agent
0ea0524 verified
from smolagents import Tool
from typing import Any, Optional
class SimpleTool(Tool):
name = "fetch_recent_arxiv_papers"
description = "Fetches the most recent research papers from arXiv for a given category."
inputs = {'category': {'type': 'string', 'description': 'The arXiv subject category. Examples include: - "cs.LG": Machine Learning (Computer Science) - "cs.CL": Computation and Language (NLP) - "cs.CV": Computer Vision - "stat.ML": Machine Learning (Statistics) - "math.OC": Optimization and Control'}, 'max_results': {'type': 'integer', 'nullable': True, 'description': 'The number of papers to fetch (default is 5).'}}
output_type = "array"
def forward(self, category: str, max_results: int = 2) -> List[Dict]:
"""
Fetches the most recent research papers from arXiv for a given category.
Args:
category (str): The arXiv subject category. Examples include:
- "cs.LG": Machine Learning (Computer Science)
- "cs.CL": Computation and Language (NLP)
- "cs.CV": Computer Vision
- "stat.ML": Machine Learning (Statistics)
- "math.OC": Optimization and Control
max_results (int): The number of papers to fetch (default is 5).
Returns:
List[Dict]: A list of dictionaries, each containing information about a paper:
- title (str)
- summary (str)
- link (str)
- published (str)
- authors (List[str])
"""
import requests
import xml.etree.ElementTree as ET
# Import List and Dict from typing
from typing import List, Dict
base_url = "http://export.arxiv.org/api/query?"
query = f"search_query=cat:{category}&start=0&max_results={max_results}&sortBy=submittedDate&sortOrder=descending"
response = requests.get(base_url + query)
if response.status_code != 200:
raise Exception(f"API call failed with status code {response.status_code}")
root = ET.fromstring(response.text)
ns = {'atom': 'http://www.w3.org/2005/Atom'}
papers = []
for entry in root.findall('atom:entry', ns):
title = entry.find('atom:title', ns).text.strip()
summary = entry.find('atom:summary', ns).text.strip()
link = entry.find('atom:id', ns).text.strip()
published = entry.find('atom:published', ns).text.strip()
authors = [a.find('atom:name', ns).text.strip() for a in entry.findall('atom:author', ns)]
papers.append({
'title': title,
'summary': summary,
'link': link,
'published': published,
'authors': authors
})
return papers