RAG-Test / utils /llm_response.py
Nielo47's picture
Update space
ef6d407
import ollama
import faiss
import os
from google import genai
from google.genai import types
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
from utils.rag_retriever import search_with_full_query, search_with_multiple_sentences # Assuming these are the new names
from utils.prompts import icf_classifier_prompt, icf_gemini_prompt # Assuming these are the new names
# Carrega a chave de API do Gemini de variáveis de ambiente para segurança
# Certifique-se de que a variável de ambiente 'GEMINI_API_KEY' esteja definida no seu sistema.
load_dotenv()
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
if not GEMINI_API_KEY:
print("AVISO: A variável de ambiente 'GEMINI_API_KEY' não está definida. A API do Gemini pode não funcionar.")
# WARNING: The 'GEMINI_API_KEY' environment variable is not set. The Gemini API may not work.
def _generate_context_for_llm(
input_phrase: str,
documents: list,
index: faiss.Index,
embedder: SentenceTransformer,
search_strategy: str = 'multiple'
) -> str:
"""
Generates relevant context for the input phrase using the RAG system.
This function allows choosing between two context search strategies:
'full': Searches for the most relevant contexts for the entire question.
'multiple': Segments the question into sentences and searches for multiple contexts, ensuring uniqueness.
Args:
input_phrase (str): The user's phrase or question for which context will be generated.
documents (list): A list of strings, representing the documents/texts from which context will be retrieved.
index (faiss.Index): The pre-built FAISS index for similarity search in document embeddings.
embedder (SentenceTransformer): The embedding model used to convert text into vectors.
search_strategy (str, optional): The context search strategy to be used.
Can be 'full' or 'multiple'. Defaults to 'multiple'.
Returns:
str: A string containing the retrieved contexts, joined by newlines.
Returns an empty string if no context is found or if the strategy is invalid.
Raises:
ValueError: If the provided search strategy is invalid.
"""
# contexts_with_distance = [] # Original variable, can be removed if not used elsewhere
retrieved_contexts_with_distance = [] # More descriptive name
if search_strategy == 'full':
# k=5 como padrão para contexto completo
# k=5 as default for full context
retrieved_contexts_with_distance = search_with_full_query(
input_phrase, documents, index, embedder, k=5
)
elif search_strategy == 'multiple':
# k_por_frase=3 para múltiplos
# k_per_sentence=3 for multiple contexts
retrieved_contexts_with_distance = search_with_multiple_sentences(
input_phrase, documents, index, embedder, k_per_sentence=3
)
else:
raise ValueError(
f"Estratégia de busca de contexto inválida: '{search_strategy}'. Use 'completo' ou 'multiplo'."
# Invalid context search strategy: '{search_strategy}'. Use 'full' or 'multiple'.
)
# Extrai apenas o texto dos documentos da lista de tuplas (índice, texto, distância)
# Extracts only the text from the documents in the list of tuples (index, text, distance)
context_texts = [text for _, text, _ in retrieved_contexts_with_distance]
context_string = "\n".join(context_texts)
return context_string
def generate_ollama_response(input_phrase: str, context: str) -> str:
"""
Generates a response using the Ollama language model locally.
Constructs a detailed prompt with the user's input phrase and the retrieved
context to guide the model in generating an informed response about ICF.
Args:
input_phrase (str): The user's original phrase or question.
context (str): A string containing the relevant context retrieved from RAG.
Returns:
str: The response generated by the Ollama model.
Raises:
ollama.ResponseError: If there is an error communicating with the Ollama server.
Exception: For other unexpected errors during response generation.
"""
# Prompt com instruções detalhadas sobre CIF
# Prompt with detailed instructions about ICF
prompt_text = icf_classifier_prompt(context, input_phrase)
print("\n--- Prompt Gerado para Ollama ---") # Generated Prompt for Ollama
print(prompt_text)
print("--- Fim do Prompt Ollama ---") # End of Ollama Prompt
try:
# Assume 'gemma2:latest' or similar appropriate model for Ollama
response_data = ollama.generate(model='gemma2:latest', prompt=prompt_text)
return response_data.get('response', 'Nenhuma resposta gerada pelo Ollama.') # No response generated by Ollama.
except ollama.ResponseError as e:
print(f"Erro de resposta do Ollama: {e}") # Ollama response error
return f"Desculpe, ocorreu um erro ao gerar a resposta com Ollama: {e}" # Sorry, an error occurred while generating the response with Ollama
except Exception as e:
print(f"Erro inesperado ao gerar resposta com Ollama: {e}") # Unexpected error generating response with Ollama
return f"Desculpe, ocorreu um erro inesperado: {e}" # Sorry, an unexpected error occurred
def generate_gemini_response(input_phrase: str, context: str) -> str:
"""
Generates a response using the Google Gemini API.
Connects to the Gemini API, constructs a model-specific prompt,
and sends the request to obtain a response based on the user's phrase and context.
Args:
input_phrase (str): The user's original phrase or question.
context (str): A string containing the relevant context retrieved from RAG.
Returns:
str: The response generated by the Gemini model.
"""
if not GEMINI_API_KEY:
return "Erro: Chave de API do Gemini não configurada. Por favor, defina a variável de ambiente 'GEMINI_API_KEY'."
# Error: Gemini API key not configured. Please set the 'GEMINI_API_KEY' environment variable.
try:
client = genai.Client(api_key=GEMINI_API_KEY)
# Prompt com instruções detalhadas sobre CIF
# Prompt with detailed instructions about ICF
gemini_prompt_text = icf_gemini_prompt(context, input_phrase)
print("\n--- Prompt Gerado para Gemini ---") # Generated Prompt for Gemini
print(gemini_prompt_text)
print("--- Fim do Prompt Gemini ---") # End of Gemini Prompt
# Configuração da requisição para o modelo Gemini
#model_name = "gemini-2.5-flash-preview-05-20"
model_name = "gemini-2.0-flash-001"
api_response = client.models.generate_content(
model=model_name, contents=gemini_prompt_text
)
return api_response.text
except Exception as e:
print(f"Erro da API do Gemini: {e}. Verifique sua GEMINI_API_KEY e os detalhes do erro.")
if "Authentication" in str(e) or "API key" in str(e):
return "Erro de autenticação com a API do Gemini. Verifique sua chave de API e permissões."
return f"Desculpe, ocorreu um erro na API do Gemini: {e}"
# Função unificada para gerar resposta, permitindo escolher o LLM
# Unified function to generate response, allowing LLM choice
def generate_response_with_llm(
input_phrase: str,
documents: list,
index: faiss.Index,
embedder: SentenceTransformer,
llm_choice: str = 'gemini',
rag_strategy: str = 'multiple'
) -> str:
"""
Main function to generate a response using an LLM (Ollama or Gemini),
based on context retrieved via RAG.
Args:
input_phrase (str): The user's original phrase or question.
documents (list): A list of strings, representing the documents from which context will be retrieved.
index (faiss.Index): The pre-built FAISS index for similarity search in embeddings.
embedder (SentenceTransformer): The embedding model.
llm_choice (str, optional): The LLM to be used ('ollama' or 'gemini'). Defaults to 'gemini'.
rag_strategy (str, optional): The context search strategy ('full' or 'multiple'). Defaults to 'multiple'.
Returns:
str: The response generated by the LLM.
"""
# 1. Gerar o contexto
# 1. Generate context
retrieved_context = ""
try:
retrieved_context = _generate_context_for_llm(
input_phrase, documents, index, embedder, search_strategy=rag_strategy
)
except ValueError as e:
# Retorna a mensagem de erro da estratégia inválida
# Returns the error message for invalid strategy
return str(e)
except Exception as e:
# Error retrieving context
return f"Erro ao recuperar contexto: {e}"
if not retrieved_context:
return "Não foi possível encontrar contexto relevante para a sua pergunta. Por favor, reformule ou forneça mais detalhes."
# Could not find relevant context for your question. Please rephrase or provide more details.
# 2. Gerar a resposta usando o LLM escolhido
# 2. Generate response using the chosen LLM
if llm_choice.lower() == 'ollama':
return generate_ollama_response(input_phrase, retrieved_context)
elif llm_choice.lower() == 'gemini':
return generate_gemini_response(input_phrase, retrieved_context)
else:
# Error: Invalid LLM choice ('{llm_choice}'). Valid options are 'ollama' or 'gemini'.
return f"Erro: Escolha de LLM inválida ('{llm_choice}'). Opções válidas são 'ollama' ou 'gemini'."