Spaces:

onisj
/

jarvis_gaia_agent

Starting

App Files Files Community

jarvis_gaia_agent / tools /search.py

onisj

Use free tools only, remove OpenAI dependency

488dc3e 16 days ago

raw

history blame

3.68 kB

	from langchain_core.tools import tool
	from langchain_huggingface import HuggingFacePipeline
	from sentence_transformers import SentenceTransformer
	import logging
	from typing import List, Dict, Any
	import requests
	import os

	logger = logging.getLogger(__name__)

	# Initialize embedding model (free, open-source)
	try:
	embedder = SentenceTransformer("all-MiniLM-L6-v2")
	except Exception as e:
	logger.error(f"Failed to initialize embedding model: {e}")
	embedder = None

	# Global LLM instance
	search_llm = None

	def initialize_search_tools(llm: HuggingFacePipeline) -> None:
	"""Initialize search tools with the provided LLM"""
	global search_llm
	search_llm = llm
	logger.info("Search tools initialized with HuggingFace LLM")

	@tool
	async def search_tool(query: str) -> List[Dict[str, Any]]:
	"""Perform a web search using the query"""
	try:
	if not search_llm:
	logger.warning("Search LLM not initialized")
	return [{"content": "Search unavailable", "url": ""}]

	# Refine query using LLM
	prompt = f"Refine this search query for better results: {query}"
	response = await search_llm.ainvoke(prompt)
	refined_query = response.content.strip()

	# Check for SerpAPI key (free tier available)
	serpapi_key = os.getenv("SERPAPI_API_KEY")
	if serpapi_key:
	try:
	params = {"q": refined_query, "api_key": serpapi_key}
	response = requests.get("https://serpapi.com/search", params=params)
	response.raise_for_status()
	results = response.json().get("organic_results", [])
	return [{"content": r.get("snippet", ""), "url": r.get("link", "")} for r in results]
	except Exception as e:
	logger.warning(f"SerpAPI failed: {e}, falling back to mock search")

	# Mock search if no API key or API fails
	if embedder:
	query_embedding = embedder.encode(refined_query)
	results = [
	{"content": f"Mock result for {refined_query}", "url": "https://example.com"},
	{"content": f"Another mock result for {refined_query}", "url": "https://example.org"}
	]
	else:
	results = [{"content": "Embedding model unavailable", "url": ""}]

	logger.info(f"Search results for query '{refined_query}': {len(results)} items")
	return results
	except Exception as e:
	logger.error(f"Error in search_tool: {e}")
	return [{"content": f"Search failed: {str(e)}", "url": ""}]

	@tool
	async def multi_hop_search_tool(query: str, steps: int = 3) -> List[Dict[str, Any]]:
	"""Perform a multi-hop search by iteratively refining the query"""
	try:
	if not search_llm:
	logger.warning("Search LLM not initialized")
	return [{"content": "Multi-hop search unavailable", "url": ""}]

	results = []
	current_query = query
	for step in range(steps):
	prompt = f"Based on the query '{current_query}', generate a follow-up question to deepen the search."
	response = await search_llm.ainvoke(prompt)
	next_query = response.content.strip()

	step_results = await search_tool.invoke({"query": next_query})
	results.extend(step_results)
	current_query = next_query
	logger.info(f"Multi-hop step {step + 1}: {next_query}")

	return results
	except Exception as e:
	logger.error(f"Error in multi_hop_search_tool: {e}")
	return [{"content": f"Multi-hop search failed: {str(e)}", "url": ""}]