## Arxiv-Research
## Tools Creation

In [2]:
from langchain_community.tools import ArxivQueryRun, WikipediaQueryRun, DuckDuckGoSearchResults
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper, DuckDuckGoSearchAPIWrapper
from langchain.tools import Tool

In [None]:
api_wrapper_wiki = WikipediaAPIWrapper(top_k_results = 5, doc_content_chars_max=250)
wiki = WikipediaQueryRun(api_wrapper=api_wrapper_wiki)
print(wiki.name)
wiki_tool= Tool(
 name = "Wikipedia",
 func = wiki.run,
 description = "This tool uses the Wikipedia API to search for a topic."
)
wiki_tool

wikipedia


Tool(name='Wikipedia', description='This tool uses the Wikipedia API to search for a topic.', func=, top_k_results=1, lang='en', load_all_available_meta=False, doc_content_chars_max=250))>)

In [None]:
api_wrapper_arxiv = ArxivAPIWrapper(top_k_results = 5, doc_content_chars_max=250)
arxiv = ArxivQueryRun(api_wrapper=api_wrapper_arxiv)
print(arxiv.name)
arxiv_tool = Tool(
 name = "arxiv",
 func = arxiv.run,
 description = "Searches arXiv for papers matching the query.",
)
arxiv_tool

arxiv


Tool(name='arxiv', description='Searches arXiv for papers matching the query.', func=, arxiv_exceptions=(, , ), top_k_results=1, ARXIV_MAX_QUERY_LENGTH=300, continue_on_failure=False, load_max_docs=100, load_all_available_meta=False, doc_content_chars_max=250))>)

In [5]:
api_wrapper_ddg = DuckDuckGoSearchAPIWrapper(region="us-en", time="d", max_results=10)
ddg = DuckDuckGoSearchResults(api_wrapper=api_wrapper_ddg, source = "news")
print(ddg.name)
ddg_tool = Tool(
 name = "DuckDuckGo_Search",
 func = ddg.run,
 description = "Searches for search queries using the DuckDuckGo Search engine."
)

duckduckgo_results_json


In [7]:
ddg_tools = [ddg_tool]

## Custom Tools

In [10]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
#from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from tqdm.auto import tqdm

import os
from dotenv import load_dotenv
load_dotenv()

True

In [11]:
os.environ["HF_TOKOEN"] = os.getenv("HUGGINGFACE_TOKEN")
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [12]:
loader = WebBaseLoader("https://docs.smith.langchain.com/")
docs = loader.load()
documents = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(docs)
vector_db = FAISS.from_documents(documents,embeddings)
retriever = vector_db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=, search_kwargs={})

In [13]:
from langchain.tools.retriever import create_retriever_tool
retriever_tool = create_retriever_tool(retriever,"langsmith-search", "Search any information about Langsmith")

retriever_tool.name

'langsmith-search'

In [14]:
tools = [wiki_tool, arxiv_tool, ddg_tool, retriever_tool]
tools

[Tool(name='Wikipedia', description='This tool uses the Wikipedia API to search for a topic.', func=, top_k_results=1, lang='en', load_all_available_meta=False, doc_content_chars_max=250))>),
 Tool(name='arxiv', description='Searches arXiv for papers matching the query.', func=, arxiv_exceptions=(, , ), top_k_results=1, ARXIV_MAX_QUERY_LENGTH=300, continue_on_failure=False, load_max_docs=100, load_all_available_meta=False, doc_content_chars_max=250))>),
 Tool(name='DuckDuckGo_Search', description='Searches for search queries using the DuckDuckGo Search engine.', func=),
 Tool(name='langsmith-search', description='Search any information about Langsmith', args_schema=, func=functools.partial(, retriever=VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=, search_kwargs={}), document_prompt=PromptTemplate(input_variables=['page_content'], input_types={}, partial_variables={}, template='{page_content}'), document_separator='\n\n', response_format='content'), coroutin

## Run all these tools with Agents and LLM models.

## Tools, LLM-->AgentExecutor

In [15]:
import openai
from langchain_groq import ChatGroq

In [16]:
groq_api_key = os.getenv("GROQ_API_KEY")
openai.api_key = os.getenv("OPEN_API_KEY")

llm = ChatGroq(groq_api_key=groq_api_key, model_name = "Llama3-8b-8192")

## Prompt Template

In [17]:
from langchain import hub

prompt = hub.pull("hwchase17/openai-functions-agent")
prompt.messages



[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a helpful assistant'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

## Agents

In [18]:
from langchain.agents import create_openai_tools_agent, AgentExecutor

agent = create_openai_tools_agent(llm,tools,prompt)
agent

RunnableAssign(mapper={
 agent_scratchpad: RunnableLambda(lambda x: format_to_openai_tool_messages(x['intermediate_steps']))
})
| ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], optional_variables=['chat_history'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.

## Run Agent

In [19]:
agent_executor = AgentExecutor(agent = agent, tools = ddg_tools, verbose = True)
agent_executor

AgentExecutor(verbose=True, agent=RunnableMultiActionAgent(runnable=RunnableAssign(mapper={
 agent_scratchpad: RunnableLambda(lambda x: format_to_openai_tool_messages(x['intermediate_steps']))
})
| ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], optional_variables=['chat_history'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(

In [20]:
agent_executor.invoke({"input": "Tell me about Langsmith"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `langsmith-search` with `{'query': 'Langsmith'}`


[0mlangsmith-search is not a valid tool, try one of [DuckDuckGo_Search].[32;1m[1;3m
Invoking: `DuckDuckGo_Search` with `Langsmith`


[0m[36;1m[1;3msnippet: AI/ML frameworks API integrations (Meta AutoGen AWS Azure Chroma GCP Google Ads Langfuse LangGraph LangSmith LinkedIn Ads) Pinecone Python PyTorch Semantic Kernel TensorFlow Weaviate. Stipend: ₹ 16,667 /month - ₹ 25,000 /month . Batch: 2027, 2026, 2025 ..., title: Zocket hiring Tech Intern [AI, Stipend: 25K/month] - talentd.in, link: https://www.talentd.in/jobs.php?id=1765, snippet: LangSmith 사용하기 (0) 2025.01.07: LangChain에서 Rerank로 RAG 정확도 높이기 (1) 2024.12.29: Llama 3.2 Vision 자동차 번호 인식 (0) 2024.12.24: LangChain 으로 RAG기반 Wiki 검색 채팅창 만들기 (0) 2024.12.24: Python에서 2차원 배열 중 특정 열의 값을 모두 바꾸기 (0) 2024.10.27, title: ollama 의 deepseek 를 python 에서 사용하기, link: https://keistory.tistory.com/1609[0m[32;1m[1;3mIt seem

{'input': 'Tell me about Langsmith',
 'output': 'It seems like the tool call yield a result about Langsmith, which is likely a project or a platform related to AI/ML and natural language processing. The result includes snippets of text, job postings, and links to external websites. It appears that Langsmith is a company or a research initiative that focuses on developing AI/ML frameworks and tools, and it might be related to API integrations and other technical projects. The text snippets also mention LangChain, a project that seems to be related to Langsmith.'}

In [21]:
agent_executor.invoke({"input": "Search Duck Duck go and answer What is Machine learning?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `DuckDuckGo_Search` with `What is Machine learning?`


[0m[36;1m[1;3msnippet: The machine learning algorithms are a way to extract hidden patterns from large, laborious datasets. They might play a vital part in making decisions for complicated issues in health problems. In the field of health care, machine learning algorithms are becoming more and more important and are regarded as trustworthy methods for illness ..., title: An Empirical Review of Different Machine Learning Techniques ... - Springer, link: https://link.springer.com/chapter/10.1007/978-981-97-7946-8_19, snippet: Unlock the potential of Machine Learning with our in-depth course. Learn to autonomously identify trends and patterns through cutting-edge automation, enhancing your ability to manage diverse and complex data. Our course provides a deep dive into the practical applications of AI and Machine Learning, focusing on essential methodologies an

{'input': 'Search Duck Duck go and answer What is Machine learning?',
 'output': "It seems like you've already called the tool and got some results! Based on the snippet you provided, Machine learning is a way to extract hidden patterns from large datasets, and it's becoming increasingly important in healthcare. It's also used to identify trends and patterns through automation and is applied in various fields, including disease prediction. The results also mention AI and its potential applications."}

In [22]:
agent_executor.invoke({"input": "search on duck duck go and answer me: Who is Max Verstappen?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `DuckDuckGo_Search` with `Who is Max Verstappen?`


[0m[36;1m[1;3msnippet: Max Verstappen has teased a career switch away from Formula 1 in a behind the scenes test with a rival motorsport series. The Dutchman has been linked with a series of team switches over the past year, with both Mercedes and Aston Martin emerging as interested teams to acquire Verstappen's ..., title: Verstappen teases career SWITCH after behind the scenes test, link: https://www.gpfans.com/en/f1-news/1040914/max-verstappen-f1-imsa-career-switch-acura-arx-06/, snippet: Max Verstappen's first appearance in his driver role will be at the F1 75 event on February 18 at the O2 arena to introduce the livery of Red Bull's new 2025 car. Jos Verstappen says fatherhood is ..., title: "Champion of dad jokes": Fans react as Max Verstappen ... - Sportskeeda, link: https://www.sportskeeda.com/f1/news-champion-dad-jokes-fans-react-max-verstappen-cracks-

{'input': 'search on duck duck go and answer me: Who is Max Verstappen?',
 'output': 'I called the tool for tool call id "call_h8p6" and it yielded the following text:\n\nMax Verstappen is a Dutch professional racing driver who currently competes in Formula One for Red Bull Racing. He has been linked with a series of team switches over the past year, including a potential move to Mercedes or Aston Martin. Verstappen has also been known to tease a career switch to other motorsports series, such as the IMSA WeatherTech SportsCar Championship. In addition to his F1 career, Verstappen has also expressed interest in taking on the legendary Le Mans 24 Hours endurance race.'}

In [23]:
agent_executor.invoke({"input": "Tell me about transformer architecture after reading the following research paper (DOI: 1706.03762)?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Wikipedia` with `DOI:1706.03762`


[0mWikipedia is not a valid tool, try one of [DuckDuckGo_Search].[32;1m[1;3m
Invoking: `arxiv` with `DOI:1706.03762`


[0marxiv is not a valid tool, try one of [DuckDuckGo_Search].[32;1m[1;3m
Invoking: `DuckDuckGo_Search` with `Transformer Architecture`


[0m[36;1m[1;3msnippet: In our Transformer architecture [], we customized the embedding layers in both the encoder and decoder to leverage pre-trained word embeddings rather than learning embeddings from scratch.Specifically, we initialized the embedding weights with pre-trained GloVe vectors [] before training the full models.By using these fixed, high-quality embeddings, the Transformer can build on knowledge from ..., title: Mitigating Gender Bias in Hindi to English Neural Machine Translation, link: https://link.springer.com/chapter/10.1007/978-981-97-7946-8_8, snippet: As small as they are, SLMs stem from LLMs in or

{'input': 'Tell me about transformer architecture after reading the following research paper (DOI: 1706.03762)?',
 'output': 'Based on the information gathered from the previous tool calls, I will provide a direct response to your question about the Transformer architecture.\n\nThe Transformer architecture is a type of neural network architecture primarily used for machine translation, language modeling, and text generation tasks. It was introduced in the paper "Attention Is All You Need" by Vaswani et al. in 2017. The architecture is designed to handle sequential data and is particularly well-suited for natural language processing tasks.\n\nThe Transformer architecture is based on self-attention mechanisms, which allow the model to attend to different parts of the input sequence simultaneously and weigh their importance. This is in contrast to traditional recurrent neural network (RNN) architectures, which rely on a sequential processing of the input sequence.\n\nThe architecture cons

In [26]:
agent_executor.invoke({"input": "tell me recent news about Donald trump."})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Wikipedia` with `Donald Trump`


[0mWikipedia is not a valid tool, try one of [DuckDuckGo_Search].[32;1m[1;3m
Invoking: `DuckDuckGo_Search` with `Donald Trump`


[0m[36;1m[1;3msnippet: President Donald Trump speaks during a joint press conference with Japanese Prime Minister Shigeru Ishiba in the East Room at the White House, Feb. 7, 2025 in Washington. Andrew Harnik/Getty Images, title: Trump 2nd term live updates: Musk 'not gaining anything' from DOGE ..., link: https://abcnews.go.com/Politics/live-updates/trump-second-term-live-updates/?id=118389757, snippet: Nearly three weeks into Donald Trump's second term in office, the president is continuing to dramatically reimagine and expand his use of executive authority — and to use that authority to ..., title: Trump's recent actions show his revenge tour isn't slowing down, link: https://www.cnn.com/2025/02/09/politics/trump-actions-settling-scores/index.html

{'input': 'tell me recent news about Donald trump.',
 'output': 'It seems like President Donald Trump has been quite busy since his second term in office. Let me summarize the recent news articles for you:\n\nPresident Trump has been expanding his use of executive authority and settling scores with his political opponents. He made a historic appearance at Super Bowl LIX, becoming the first sitting president to attend the NFL\'s championship game. He also prerecorded an interview with Bret Baier of Fox News Channel that will be aired ahead of the game.\n\nIt\'s interesting to note that President Trump has been using his executive authority to reshape America in his vision. He has been actively working to achieve his goals, and it seems like his "revenge tour" is still ongoing.\n\nI hope this summary helps you stay up-to-date with the latest news about President Trump!'}

In [2]:
import requests

def list_ollama_models(base_url):
 """Fetches the list of available models from the running Ollama instance."""
 try:
 response = requests.get(f"{base_url}/api/tags")
 response.raise_for_status() # Raise an error for HTTP failures
 models = response.json().get("models", [])
 return [model["name"] for model in models]
 except requests.exceptions.RequestException as e:
 return [f"Error: {e}"]

OLLAMA_WSL_IP = "http://127.0.0.1:11434"

# Call this function to check available models
ollama_models = list_ollama_models(OLLAMA_WSL_IP)

# Display models in Streamlit sidebar
print("Available Ollama Models")
print(ollama_models)


Available Ollama Models
['llama3:latest', 'gemma2:2b']
