Spaces:

MonsieurMory
/

AI_Research_Assistant

Runtime error

App Files Files Community

AI_Research_Assistant / tools /fetch_recent_arxiv_papers.py

MonsieurMory

Upload agent

0ea0524 verified 4 months ago

raw

history blame contribute delete

2.94 kB

	from smolagents import Tool
	from typing import Any, Optional

	class SimpleTool(Tool):
	name = "fetch_recent_arxiv_papers"
	description = "Fetches the most recent research papers from arXiv for a given category."
	inputs = {'category': {'type': 'string', 'description': 'The arXiv subject category. Examples include: - "cs.LG": Machine Learning (Computer Science) - "cs.CL": Computation and Language (NLP) - "cs.CV": Computer Vision - "stat.ML": Machine Learning (Statistics) - "math.OC": Optimization and Control'}, 'max_results': {'type': 'integer', 'nullable': True, 'description': 'The number of papers to fetch (default is 5).'}}
	output_type = "array"

	def forward(self, category: str, max_results: int = 2) -> List[Dict]:
	"""
	Fetches the most recent research papers from arXiv for a given category.

	Args:
	category (str): The arXiv subject category. Examples include:
	- "cs.LG": Machine Learning (Computer Science)
	- "cs.CL": Computation and Language (NLP)
	- "cs.CV": Computer Vision
	- "stat.ML": Machine Learning (Statistics)
	- "math.OC": Optimization and Control
	max_results (int): The number of papers to fetch (default is 5).

	Returns:
	List[Dict]: A list of dictionaries, each containing information about a paper:
	- title (str)
	- summary (str)
	- link (str)
	- published (str)
	- authors (List[str])
	"""
	import requests
	import xml.etree.ElementTree as ET
	# Import List and Dict from typing
	from typing import List, Dict

	base_url = "http://export.arxiv.org/api/query?"
	query = f"search_query=cat:{category}&start=0&max_results={max_results}&sortBy=submittedDate&sortOrder=descending"

	response = requests.get(base_url + query)
	if response.status_code != 200:
	raise Exception(f"API call failed with status code {response.status_code}")

	root = ET.fromstring(response.text)
	ns = {'atom': 'http://www.w3.org/2005/Atom'}
	papers = []

	for entry in root.findall('atom:entry', ns):
	title = entry.find('atom:title', ns).text.strip()
	summary = entry.find('atom:summary', ns).text.strip()
	link = entry.find('atom:id', ns).text.strip()
	published = entry.find('atom:published', ns).text.strip()
	authors = [a.find('atom:name', ns).text.strip() for a in entry.findall('atom:author', ns)]

	papers.append({
	'title': title,
	'summary': summary,
	'link': link,
	'published': published,
	'authors': authors
	})

	return papers