Spaces:

Nielo47
/

RAG-Test

Runtime error

App Files Files Community

RAG-Test / utils /llm_response.py

Nielo47

Update space

ef6d407 2 days ago

raw

history blame contribute delete

9.73 kB

	import ollama
	import faiss
	import os

	from google import genai
	from google.genai import types
	from dotenv import load_dotenv
	from sentence_transformers import SentenceTransformer

	from utils.rag_retriever import search_with_full_query, search_with_multiple_sentences # Assuming these are the new names
	from utils.prompts import icf_classifier_prompt, icf_gemini_prompt # Assuming these are the new names

	# Carrega a chave de API do Gemini de variáveis de ambiente para segurança
	# Certifique-se de que a variável de ambiente 'GEMINI_API_KEY' esteja definida no seu sistema.
	load_dotenv()
	GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')

	if not GEMINI_API_KEY:
	print("AVISO: A variável de ambiente 'GEMINI_API_KEY' não está definida. A API do Gemini pode não funcionar.")
	# WARNING: The 'GEMINI_API_KEY' environment variable is not set. The Gemini API may not work.

	def _generate_context_for_llm(
	input_phrase: str,
	documents: list,
	index: faiss.Index,
	embedder: SentenceTransformer,
	search_strategy: str = 'multiple'
	) -> str:
	"""
	Generates relevant context for the input phrase using the RAG system.

	This function allows choosing between two context search strategies:
	'full': Searches for the most relevant contexts for the entire question.
	'multiple': Segments the question into sentences and searches for multiple contexts, ensuring uniqueness.

	Args:
	input_phrase (str): The user's phrase or question for which context will be generated.
	documents (list): A list of strings, representing the documents/texts from which context will be retrieved.
	index (faiss.Index): The pre-built FAISS index for similarity search in document embeddings.
	embedder (SentenceTransformer): The embedding model used to convert text into vectors.
	search_strategy (str, optional): The context search strategy to be used.
	Can be 'full' or 'multiple'. Defaults to 'multiple'.

	Returns:
	str: A string containing the retrieved contexts, joined by newlines.
	Returns an empty string if no context is found or if the strategy is invalid.

	Raises:
	ValueError: If the provided search strategy is invalid.
	"""
	# contexts_with_distance = [] # Original variable, can be removed if not used elsewhere
	retrieved_contexts_with_distance = [] # More descriptive name

	if search_strategy == 'full':
	# k=5 como padrão para contexto completo
	# k=5 as default for full context
	retrieved_contexts_with_distance = search_with_full_query(
	input_phrase, documents, index, embedder, k=5
	)
	elif search_strategy == 'multiple':
	# k_por_frase=3 para múltiplos
	# k_per_sentence=3 for multiple contexts
	retrieved_contexts_with_distance = search_with_multiple_sentences(
	input_phrase, documents, index, embedder, k_per_sentence=3
	)
	else:
	raise ValueError(
	f"Estratégia de busca de contexto inválida: '{search_strategy}'. Use 'completo' ou 'multiplo'."
	# Invalid context search strategy: '{search_strategy}'. Use 'full' or 'multiple'.
	)

	# Extrai apenas o texto dos documentos da lista de tuplas (índice, texto, distância)
	# Extracts only the text from the documents in the list of tuples (index, text, distance)
	context_texts = [text for _, text, _ in retrieved_contexts_with_distance]
	context_string = "\n".join(context_texts)
	return context_string

	def generate_ollama_response(input_phrase: str, context: str) -> str:
	"""
	Generates a response using the Ollama language model locally.

	Constructs a detailed prompt with the user's input phrase and the retrieved
	context to guide the model in generating an informed response about ICF.

	Args:
	input_phrase (str): The user's original phrase or question.
	context (str): A string containing the relevant context retrieved from RAG.

	Returns:
	str: The response generated by the Ollama model.

	Raises:
	ollama.ResponseError: If there is an error communicating with the Ollama server.
	Exception: For other unexpected errors during response generation.
	"""
	# Prompt com instruções detalhadas sobre CIF
	# Prompt with detailed instructions about ICF
	prompt_text = icf_classifier_prompt(context, input_phrase)
	print("\n--- Prompt Gerado para Ollama ---") # Generated Prompt for Ollama
	print(prompt_text)
	print("--- Fim do Prompt Ollama ---") # End of Ollama Prompt

	try:
	# Assume 'gemma2:latest' or similar appropriate model for Ollama
	response_data = ollama.generate(model='gemma2:latest', prompt=prompt_text)
	return response_data.get('response', 'Nenhuma resposta gerada pelo Ollama.') # No response generated by Ollama.
	except ollama.ResponseError as e:
	print(f"Erro de resposta do Ollama: {e}") # Ollama response error
	return f"Desculpe, ocorreu um erro ao gerar a resposta com Ollama: {e}" # Sorry, an error occurred while generating the response with Ollama
	except Exception as e:
	print(f"Erro inesperado ao gerar resposta com Ollama: {e}") # Unexpected error generating response with Ollama
	return f"Desculpe, ocorreu um erro inesperado: {e}" # Sorry, an unexpected error occurred

	def generate_gemini_response(input_phrase: str, context: str) -> str:
	"""
	Generates a response using the Google Gemini API.

	Connects to the Gemini API, constructs a model-specific prompt,
	and sends the request to obtain a response based on the user's phrase and context.

	Args:
	input_phrase (str): The user's original phrase or question.
	context (str): A string containing the relevant context retrieved from RAG.

	Returns:
	str: The response generated by the Gemini model.
	"""
	if not GEMINI_API_KEY:
	return "Erro: Chave de API do Gemini não configurada. Por favor, defina a variável de ambiente 'GEMINI_API_KEY'."
	# Error: Gemini API key not configured. Please set the 'GEMINI_API_KEY' environment variable.

	try:
	client = genai.Client(api_key=GEMINI_API_KEY)

	# Prompt com instruções detalhadas sobre CIF
	# Prompt with detailed instructions about ICF
	gemini_prompt_text = icf_gemini_prompt(context, input_phrase)

	print("\n--- Prompt Gerado para Gemini ---") # Generated Prompt for Gemini
	print(gemini_prompt_text)
	print("--- Fim do Prompt Gemini ---") # End of Gemini Prompt

	# Configuração da requisição para o modelo Gemini
	#model_name = "gemini-2.5-flash-preview-05-20"
	model_name = "gemini-2.0-flash-001"
	api_response = client.models.generate_content(
	model=model_name, contents=gemini_prompt_text
	)

	return api_response.text

	except Exception as e:
	print(f"Erro da API do Gemini: {e}. Verifique sua GEMINI_API_KEY e os detalhes do erro.")

	if "Authentication" in str(e) or "API key" in str(e):
	return "Erro de autenticação com a API do Gemini. Verifique sua chave de API e permissões."
	return f"Desculpe, ocorreu um erro na API do Gemini: {e}"

	# Função unificada para gerar resposta, permitindo escolher o LLM
	# Unified function to generate response, allowing LLM choice
	def generate_response_with_llm(
	input_phrase: str,
	documents: list,
	index: faiss.Index,
	embedder: SentenceTransformer,
	llm_choice: str = 'gemini',
	rag_strategy: str = 'multiple'
	) -> str:
	"""
	Main function to generate a response using an LLM (Ollama or Gemini),
	based on context retrieved via RAG.

	Args:
	input_phrase (str): The user's original phrase or question.
	documents (list): A list of strings, representing the documents from which context will be retrieved.
	index (faiss.Index): The pre-built FAISS index for similarity search in embeddings.
	embedder (SentenceTransformer): The embedding model.
	llm_choice (str, optional): The LLM to be used ('ollama' or 'gemini'). Defaults to 'gemini'.
	rag_strategy (str, optional): The context search strategy ('full' or 'multiple'). Defaults to 'multiple'.

	Returns:
	str: The response generated by the LLM.
	"""
	# 1. Gerar o contexto
	# 1. Generate context
	retrieved_context = ""
	try:
	retrieved_context = _generate_context_for_llm(
	input_phrase, documents, index, embedder, search_strategy=rag_strategy
	)
	except ValueError as e:
	# Retorna a mensagem de erro da estratégia inválida
	# Returns the error message for invalid strategy
	return str(e)
	except Exception as e:
	# Error retrieving context
	return f"Erro ao recuperar contexto: {e}"


	if not retrieved_context:
	return "Não foi possível encontrar contexto relevante para a sua pergunta. Por favor, reformule ou forneça mais detalhes."
	# Could not find relevant context for your question. Please rephrase or provide more details.

	# 2. Gerar a resposta usando o LLM escolhido
	# 2. Generate response using the chosen LLM
	if llm_choice.lower() == 'ollama':
	return generate_ollama_response(input_phrase, retrieved_context)
	elif llm_choice.lower() == 'gemini':
	return generate_gemini_response(input_phrase, retrieved_context)
	else:
	# Error: Invalid LLM choice ('{llm_choice}'). Valid options are 'ollama' or 'gemini'.
	return f"Erro: Escolha de LLM inválida ('{llm_choice}'). Opções válidas são 'ollama' ou 'gemini'."