Spaces:
Sleeping
Sleeping
File size: 3,035 Bytes
dbf8073 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import json
from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
from markitdown import MarkItDown
from smolagents import (
tool,
)
md = MarkItDown(enable_plugins=True) # Set to True to enable plugins
@tool
def arvix_search(query: str) -> str:
"""Search Arxiv for a query and return maximum 3 result.
Args:
query: The search query."""
search_docs = ArxivLoader(query=query, load_max_docs=3).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
for doc in search_docs
]
)
return formatted_search_docs
@tool
def read_excel_content_to_markdown_content(file_location: str) -> str:
"""Read the content of an Excel file and convert it to markdown content.
Args:
file_location: The path to the Excel file."""
result = md.convert(file_location)
return result.text_content
@tool
def read_pdf_content_to_markdown(file_location: str) -> str:
"""Read the content of a PDF file and convert it to markdown.
Args:
file_location: The path to the PDF file."""
result = md.convert(file_location)
return result.text_content
@tool
def get_audio_transcription(file_path: str) -> str:
"""Get the transcription of the audio file using the file path.
Args:
file_path: The path of the audio file."""
result = md.convert(file_path)
return result.text_content
@tool
def get_python_file_content(file_name: str) -> str:
"""Get the content of a mentioned Python file.
Args:
file_name: The name of the file."""
file_path = f"{file_name}"
with open(file_path, "r") as f:
content = f.read()
return content
@tool
def visit_webpage_to_markdown(url: str) -> str:
"""Visit a web page and return its content in markdown format.
Args:
url: The URL of the web page."""
result = md.convert(url)
return result.text_content
@tool
def extract_markdown_tables_from_markdown_content(markdown_content: str) -> str:
"""Extract and return the markdown tables from a given markdown content string in a structured json format.
Args:
markdown_content: The markdown string containing the table."""
from mrkdwn_analysis import MarkdownAnalyzer
analyzer = MarkdownAnalyzer.from_string(markdown_content)
analyzer.analyse()
return json.dumps(analyzer.identify_tables())
@tool
def wiki_search(query: str) -> str:
"""Search Wikipedia for a query and return maximum 2 results.
Args:
query: The search query."""
search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
for doc in search_docs
]
)
return formatted_search_docs
|