<b>Text-Optimizer (Evaluator-Optimizer-pattern)</b>

In [None]:
# Start with imports - ask ChatGPT to e
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
from IPython.display import Markdown, display

<b>Refreshing dot env</b>
</br>

In [14]:
load_dotenv(override=True)
open_api_key = os.getenv("OPENAI_API_KEY")
groq_api_key = os.getenv("GROQ_API_KEY")

API Key Validator

In [None]:
from openai import api_key


def api_key_checker(api_key):
    if api_key:
        print(f"API Key exists and begins {api_key[:8]}")
    else:
        print("API Key not set")

api_key_checker(groq_api_key)
api_key_checker(open_api_key)   

## Helper Functions

### 1. `llm_optimizer` (for refining the prompted text) - GROQ
- **Purpose**: Generates optimized versions of text based on evaluator feedback
- **System Message**: "You are a helpful assistant that refines text based on evaluator feedback. 

### 2. `llm_evaluator` (for judging the llm_optimizer's output) - OpenAI
- **Purpose**: Evaluates the quality of LLM responses using another LLM as a judge
- **Quality Threshold**: Requires score ‚â• 0.7 for acceptance

### 3. `optimize_prompt` (runner)
- **Purpose**: Iteratively optimizes prompts using LLM feedback loop
- **Process**:
  1. LLM optimizer generates improved version
  2. LLM evaluator assesses quality and line count
  3. If accepted, process stops; if not, feedback used for next iteration
- **Max Iterations**: 5 attempts by default

In [16]:
def generate_llm_response(provider, system_msg, user_msg, temperature=0.7):
    if provider == "groq":
        from openai import OpenAI
        client = OpenAI(
            api_key=groq_api_key,
            base_url="https://api.groq.com/openai/v1"
        )
        model = "llama-3.3-70b-versatile"
    elif provider == "openai":
        from openai import OpenAI
        client = OpenAI(api_key=open_api_key)
        model = "gpt-4o-mini"
    else:
        raise ValueError(f"Unsupported provider: {provider}")

    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_msg},
            {"role": "user", "content": user_msg}
        ],
        temperature=temperature
    )
    return response.choices[0].message.content.strip()

def llm_optimizer(provider, prompt, feedback=None):
    system_msg = "You are a helpful assistant that refines text based on evaluator feedback. CRITICAL: You must respond with EXACTLY 3 lines or fewer. Be extremely concise and direct"
    user_msg = prompt if not feedback else f"Refine this text to address the feedback: '{feedback}'\n\nText:\n{prompt}"
    return generate_llm_response(provider, system_msg, user_msg, temperature=0.7)


def llm_evaluator(provider, prompt, response):
  
    # Define the evaluator's role and evaluation criteria
    evaluator_system_message = "You are a strict evaluator judging the quality of LLM outputs."
    
    # Create the evaluation prompt with clear instructions
    evaluation_prompt = (
        f"Evaluate the following response to the prompt. More concise language is better. CRITICAL: You must respond with EXACTLY 3 lines or fewer. Be extremely concise and direct"
        f"Score it 0‚Äì1. If under 0.7, explain what must be improved.\n\n"
        f"Prompt: {prompt}\n\nResponse: {response}"
    )
    
    # Get evaluation from LLM with temperature=0 for consistency
    evaluation_result = generate_llm_response(provider, evaluator_system_message, evaluation_prompt, temperature=0)
    
    # Parse the evaluation score
    # Look for explicit score mentions in the response
    has_acceptable_score = "Score: 0.7" in evaluation_result or "Score: 1" in evaluation_result
    quality_score = 1.0 if has_acceptable_score else 0.5
    
    # Determine if response meets quality threshold
    is_accepted = quality_score >= 0.7
    
    # Return appropriate feedback based on acceptance
    feedback = None if is_accepted else evaluation_result
    
    return is_accepted, feedback

def optimize_prompt_runner(prompt, provider="groq", max_iterations=5):
    current_text = prompt
    previous_feedback = None
    
    for iteration in range(max_iterations):
        print(f"\nüîÑ Iteration {iteration + 1}")
        
        # Step 1: Generate optimized version based on current text and feedback
        optimized_text = llm_optimizer(provider, current_text, previous_feedback)
        print(f"üß† Optimized: {optimized_text}\n")
        
        # Step 2: Evaluate the optimized version
        is_accepted, evaluation_feedback = llm_evaluator('openai', prompt, optimized_text)
        
        if is_accepted:
            print("‚úÖ Accepted by evaluator")
            return optimized_text
        else:
            print(f"‚ùå Feedback: {evaluation_feedback}\n")
            # Step 3: Prepare for next iteration
            current_text = optimized_text
            previous_feedback = evaluation_feedback 

    print("‚ö†Ô∏è Max iterations reached.")
    return current_text


Testing the Evaluator-Optimizer

In [None]:
prompt = "Summarize faiss vector search"
final_output = optimize_prompt_runner(prompt, provider="groq")
print(f"üéØ Final Output: {final_output}")