import ollama import faiss import os from google import genai from google.genai import types from dotenv import load_dotenv from sentence_transformers import SentenceTransformer from utils.rag_retriever import search_with_full_query, search_with_multiple_sentences # Assuming these are the new names from utils.prompts import icf_classifier_prompt, icf_gemini_prompt # Assuming these are the new names # Carrega a chave de API do Gemini de variáveis de ambiente para segurança # Certifique-se de que a variável de ambiente 'GEMINI_API_KEY' esteja definida no seu sistema. load_dotenv() GEMINI_API_KEY = os.getenv('GEMINI_API_KEY') if not GEMINI_API_KEY: print("AVISO: A variável de ambiente 'GEMINI_API_KEY' não está definida. A API do Gemini pode não funcionar.") # WARNING: The 'GEMINI_API_KEY' environment variable is not set. The Gemini API may not work. def _generate_context_for_llm( input_phrase: str, documents: list, index: faiss.Index, embedder: SentenceTransformer, search_strategy: str = 'multiple' ) -> str: """ Generates relevant context for the input phrase using the RAG system. This function allows choosing between two context search strategies: 'full': Searches for the most relevant contexts for the entire question. 'multiple': Segments the question into sentences and searches for multiple contexts, ensuring uniqueness. Args: input_phrase (str): The user's phrase or question for which context will be generated. documents (list): A list of strings, representing the documents/texts from which context will be retrieved. index (faiss.Index): The pre-built FAISS index for similarity search in document embeddings. embedder (SentenceTransformer): The embedding model used to convert text into vectors. search_strategy (str, optional): The context search strategy to be used. Can be 'full' or 'multiple'. Defaults to 'multiple'. Returns: str: A string containing the retrieved contexts, joined by newlines. Returns an empty string if no context is found or if the strategy is invalid. Raises: ValueError: If the provided search strategy is invalid. """ # contexts_with_distance = [] # Original variable, can be removed if not used elsewhere retrieved_contexts_with_distance = [] # More descriptive name if search_strategy == 'full': # k=5 como padrão para contexto completo # k=5 as default for full context retrieved_contexts_with_distance = search_with_full_query( input_phrase, documents, index, embedder, k=5 ) elif search_strategy == 'multiple': # k_por_frase=3 para múltiplos # k_per_sentence=3 for multiple contexts retrieved_contexts_with_distance = search_with_multiple_sentences( input_phrase, documents, index, embedder, k_per_sentence=3 ) else: raise ValueError( f"Estratégia de busca de contexto inválida: '{search_strategy}'. Use 'completo' ou 'multiplo'." # Invalid context search strategy: '{search_strategy}'. Use 'full' or 'multiple'. ) # Extrai apenas o texto dos documentos da lista de tuplas (índice, texto, distância) # Extracts only the text from the documents in the list of tuples (index, text, distance) context_texts = [text for _, text, _ in retrieved_contexts_with_distance] context_string = "\n".join(context_texts) return context_string def generate_ollama_response(input_phrase: str, context: str) -> str: """ Generates a response using the Ollama language model locally. Constructs a detailed prompt with the user's input phrase and the retrieved context to guide the model in generating an informed response about ICF. Args: input_phrase (str): The user's original phrase or question. context (str): A string containing the relevant context retrieved from RAG. Returns: str: The response generated by the Ollama model. Raises: ollama.ResponseError: If there is an error communicating with the Ollama server. Exception: For other unexpected errors during response generation. """ # Prompt com instruções detalhadas sobre CIF # Prompt with detailed instructions about ICF prompt_text = icf_classifier_prompt(context, input_phrase) print("\n--- Prompt Gerado para Ollama ---") # Generated Prompt for Ollama print(prompt_text) print("--- Fim do Prompt Ollama ---") # End of Ollama Prompt try: # Assume 'gemma2:latest' or similar appropriate model for Ollama response_data = ollama.generate(model='gemma2:latest', prompt=prompt_text) return response_data.get('response', 'Nenhuma resposta gerada pelo Ollama.') # No response generated by Ollama. except ollama.ResponseError as e: print(f"Erro de resposta do Ollama: {e}") # Ollama response error return f"Desculpe, ocorreu um erro ao gerar a resposta com Ollama: {e}" # Sorry, an error occurred while generating the response with Ollama except Exception as e: print(f"Erro inesperado ao gerar resposta com Ollama: {e}") # Unexpected error generating response with Ollama return f"Desculpe, ocorreu um erro inesperado: {e}" # Sorry, an unexpected error occurred def generate_gemini_response(input_phrase: str, context: str) -> str: """ Generates a response using the Google Gemini API. Connects to the Gemini API, constructs a model-specific prompt, and sends the request to obtain a response based on the user's phrase and context. Args: input_phrase (str): The user's original phrase or question. context (str): A string containing the relevant context retrieved from RAG. Returns: str: The response generated by the Gemini model. """ if not GEMINI_API_KEY: return "Erro: Chave de API do Gemini não configurada. Por favor, defina a variável de ambiente 'GEMINI_API_KEY'." # Error: Gemini API key not configured. Please set the 'GEMINI_API_KEY' environment variable. try: client = genai.Client(api_key=GEMINI_API_KEY) # Prompt com instruções detalhadas sobre CIF # Prompt with detailed instructions about ICF gemini_prompt_text = icf_gemini_prompt(context, input_phrase) print("\n--- Prompt Gerado para Gemini ---") # Generated Prompt for Gemini print(gemini_prompt_text) print("--- Fim do Prompt Gemini ---") # End of Gemini Prompt # Configuração da requisição para o modelo Gemini #model_name = "gemini-2.5-flash-preview-05-20" model_name = "gemini-2.0-flash-001" api_response = client.models.generate_content( model=model_name, contents=gemini_prompt_text ) return api_response.text except Exception as e: print(f"Erro da API do Gemini: {e}. Verifique sua GEMINI_API_KEY e os detalhes do erro.") if "Authentication" in str(e) or "API key" in str(e): return "Erro de autenticação com a API do Gemini. Verifique sua chave de API e permissões." return f"Desculpe, ocorreu um erro na API do Gemini: {e}" # Função unificada para gerar resposta, permitindo escolher o LLM # Unified function to generate response, allowing LLM choice def generate_response_with_llm( input_phrase: str, documents: list, index: faiss.Index, embedder: SentenceTransformer, llm_choice: str = 'gemini', rag_strategy: str = 'multiple' ) -> str: """ Main function to generate a response using an LLM (Ollama or Gemini), based on context retrieved via RAG. Args: input_phrase (str): The user's original phrase or question. documents (list): A list of strings, representing the documents from which context will be retrieved. index (faiss.Index): The pre-built FAISS index for similarity search in embeddings. embedder (SentenceTransformer): The embedding model. llm_choice (str, optional): The LLM to be used ('ollama' or 'gemini'). Defaults to 'gemini'. rag_strategy (str, optional): The context search strategy ('full' or 'multiple'). Defaults to 'multiple'. Returns: str: The response generated by the LLM. """ # 1. Gerar o contexto # 1. Generate context retrieved_context = "" try: retrieved_context = _generate_context_for_llm( input_phrase, documents, index, embedder, search_strategy=rag_strategy ) except ValueError as e: # Retorna a mensagem de erro da estratégia inválida # Returns the error message for invalid strategy return str(e) except Exception as e: # Error retrieving context return f"Erro ao recuperar contexto: {e}" if not retrieved_context: return "Não foi possível encontrar contexto relevante para a sua pergunta. Por favor, reformule ou forneça mais detalhes." # Could not find relevant context for your question. Please rephrase or provide more details. # 2. Gerar a resposta usando o LLM escolhido # 2. Generate response using the chosen LLM if llm_choice.lower() == 'ollama': return generate_ollama_response(input_phrase, retrieved_context) elif llm_choice.lower() == 'gemini': return generate_gemini_response(input_phrase, retrieved_context) else: # Error: Invalid LLM choice ('{llm_choice}'). Valid options are 'ollama' or 'gemini'. return f"Erro: Escolha de LLM inválida ('{llm_choice}'). Opções válidas são 'ollama' ou 'gemini'."