File size: 3,035 Bytes
dbf8073
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import json

from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
from markitdown import MarkItDown
from smolagents import (
    tool,
)

md = MarkItDown(enable_plugins=True)  # Set to True to enable plugins


@tool
def arvix_search(query: str) -> str:
    """Search Arxiv for a query and return maximum 3 result.

    Args:
        query: The search query."""
    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
            for doc in search_docs
        ]
    )
    return formatted_search_docs


@tool
def read_excel_content_to_markdown_content(file_location: str) -> str:
    """Read the content of an Excel file and convert it to markdown content.

    Args:
        file_location: The path to the Excel file."""

    result = md.convert(file_location)
    return result.text_content


@tool
def read_pdf_content_to_markdown(file_location: str) -> str:
    """Read the content of a PDF file and convert it to markdown.

    Args:
        file_location: The path to the PDF file."""

    result = md.convert(file_location)
    return result.text_content


@tool
def get_audio_transcription(file_path: str) -> str:
    """Get the transcription of the audio file using the file path.

    Args:
        file_path: The path of the audio file."""

    result = md.convert(file_path)
    return result.text_content


@tool
def get_python_file_content(file_name: str) -> str:
    """Get the content of a mentioned Python file.

    Args:
        file_name: The name of the file."""
    file_path = f"{file_name}"
    with open(file_path, "r") as f:
        content = f.read()
    return content


@tool
def visit_webpage_to_markdown(url: str) -> str:
    """Visit a web page and return its content in markdown format.

    Args:
        url: The URL of the web page."""
    result = md.convert(url)
    return result.text_content


@tool
def extract_markdown_tables_from_markdown_content(markdown_content: str) -> str:
    """Extract and return the markdown tables from a given markdown content string in a structured json format.

    Args:
        markdown_content: The markdown string containing the table."""
    from mrkdwn_analysis import MarkdownAnalyzer

    analyzer = MarkdownAnalyzer.from_string(markdown_content)
    analyzer.analyse()
    return json.dumps(analyzer.identify_tables())


@tool
def wiki_search(query: str) -> str:
    """Search Wikipedia for a query and return maximum 2 results.

    Args:
        query: The search query."""
    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
            for doc in search_docs
        ]
    )
    return formatted_search_docs