Final_Assignment_Template

Sleeping

Nicolas PHUNG

feat: Add first version of public agent

dbf8073 about 2 months ago

3.04 kB

	import json

	from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
	from markitdown import MarkItDown
	from smolagents import (
	tool,
	)

	md = MarkItDown(enable_plugins=True) # Set to True to enable plugins


	@tool
	def arvix_search(query: str) -> str:
	"""Search Arxiv for a query and return maximum 3 result.

	Args:
	query: The search query."""
	search_docs = ArxivLoader(query=query, load_max_docs=3).load()
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
	for doc in search_docs
	]
	)
	return formatted_search_docs


	@tool
	def read_excel_content_to_markdown_content(file_location: str) -> str:
	"""Read the content of an Excel file and convert it to markdown content.

	Args:
	file_location: The path to the Excel file."""

	result = md.convert(file_location)
	return result.text_content


	@tool
	def read_pdf_content_to_markdown(file_location: str) -> str:
	"""Read the content of a PDF file and convert it to markdown.

	Args:
	file_location: The path to the PDF file."""

	result = md.convert(file_location)
	return result.text_content


	@tool
	def get_audio_transcription(file_path: str) -> str:
	"""Get the transcription of the audio file using the file path.

	Args:
	file_path: The path of the audio file."""

	result = md.convert(file_path)
	return result.text_content


	@tool
	def get_python_file_content(file_name: str) -> str:
	"""Get the content of a mentioned Python file.

	Args:
	file_name: The name of the file."""
	file_path = f"{file_name}"
	with open(file_path, "r") as f:
	content = f.read()
	return content


	@tool
	def visit_webpage_to_markdown(url: str) -> str:
	"""Visit a web page and return its content in markdown format.

	Args:
	url: The URL of the web page."""
	result = md.convert(url)
	return result.text_content


	@tool
	def extract_markdown_tables_from_markdown_content(markdown_content: str) -> str:
	"""Extract and return the markdown tables from a given markdown content string in a structured json format.

	Args:
	markdown_content: The markdown string containing the table."""
	from mrkdwn_analysis import MarkdownAnalyzer

	analyzer = MarkdownAnalyzer.from_string(markdown_content)
	analyzer.analyse()
	return json.dumps(analyzer.identify_tables())


	@tool
	def wiki_search(query: str) -> str:
	"""Search Wikipedia for a query and return maximum 2 results.

	Args:
	query: The search query."""
	search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
	for doc in search_docs
	]
	)
	return formatted_search_docs