Spaces:
Runtime error
Runtime error
from typing import Any, Optional | |
from smolagents.tools import Tool | |
from langchain_community.document_loaders import ArxivLoader | |
#import logging | |
# Configurar el logger | |
#logger = logging.getLogger("smolagent") | |
#logger.setLevel(logging.INFO) | |
#if not logger.handlers: | |
# # Crear un handler para archivo | |
# file_handler = logging.FileHandler("agent_tools_logs.txt") | |
# file_handler.setLevel(logging.INFO) | |
# # Formato del log | |
# formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
# file_handler.setFormatter(formatter) | |
# # Agregar el handler al logger | |
# logger.addHandler(file_handler) | |
class ArxivSearchTool(Tool): | |
name = "arxiv_search" | |
description = "Search arXiv papers based on a query and return relevant papers with their abstracts. Useful for finding scientific papers, research articles, and academic content." | |
inputs = {'query': {'type': 'string', 'description': 'The search query to look up papers on arXiv.'}} | |
output_type = "string" | |
def __init__(self, load_max_docs=3, **kwargs): | |
super().__init__() | |
self.load_max_docs = load_max_docs | |
try: | |
import arxiv | |
except ImportError as e: | |
raise ImportError( | |
"You must install package `arxiv` to run this tool: run `pip install arxiv`." | |
) from e | |
self.is_initialized = True | |
def forward(self, query: str) -> str: | |
#logger.info(f"ArxivSearchTool invocado con query: {query}") | |
try: | |
#logger.info("Check if pymupdf y fitz is installed...") | |
import pymupdf | |
import fitz | |
# Monkey patch to manage fitz.fitz runtime error | |
if not hasattr(fitz, 'fitz'): | |
fitz.fitz = fitz | |
#logger.info(f"Versión de fitz (PyMuPDF): {fitz.__doc__}") | |
#logger.info(f"Ubicación del módulo fitz: {fitz.__file__}") | |
except ImportError as e: | |
raise ImportError( | |
"You must install package `pymupdf` to run this tool: run `pip install pymupdf`." | |
) from e | |
try: | |
# Use ArxivLoader from langchain_community to load papers | |
#logger.info("Creating ArxivLoader object...") | |
loader = ArxivLoader( | |
query=query, | |
load_max_docs=self.load_max_docs, | |
load_all_available_meta=True | |
) | |
# Get the documents (papers) | |
#logger.info("ArxivLoader method load is invoked...") | |
docs = loader.load() | |
if not docs: | |
return f"No arXiv papers found for the query: {query}" | |
# Format the results nicely | |
results = [] | |
#logger.info("Papers found, formatting results...") | |
for doc in docs: | |
# Extract metadata | |
metadata = doc.metadata | |
title = metadata.get('Title', 'Untitled') | |
authors = metadata.get('Authors', 'Unknown Authors') | |
published = metadata.get('Published', 'Unknown Date') | |
paper_url = metadata.get('Entry Id', '#') | |
# Get the abstract (usually in the page_content) | |
abstract = doc.page_content[:800] + "..." if len(doc.page_content) > 800 else doc.page_content | |
# Format each paper | |
paper = f"## {title}\n\n**Authors:** {authors}\n**Published:** {published}\n**URL:** {paper_url}\n\n**Abstract:**\n{abstract}\n\n---\n\n" | |
results.append(paper) | |
#logger.info("Formatting results ended... SUCCESS!") | |
return "\n".join(results) | |
except Exception as e: | |
return f"Error searching arXiv: {str(e)}" | |