dspy-example / director_bake_off.py
eusholli's picture
Upload folder using huggingface_hub
f842afa verified
#!/usr/bin/env python3
"""
DSPy Director Bake-Off: A Beginner's Guide to DSPy Programming
This script demonstrates core DSPy concepts through a fun example:
comparing how different movie directors would approach filming the same video idea.
Key DSPy Concepts Demonstrated:
1. Signatures - Define input/output interfaces for LLM tasks
2. Modules - Combine multiple signatures into complex workflows
3. Structured Output - Use Pydantic models for reliable data extraction
4. Async Processing - Handle multiple LLM calls efficiently
5. Chain of Thought - Enable reasoning for complex decisions
Author: DSPy Learning Example
"""
# Standard library imports
import os
import sys
import asyncio
from typing import List
# Third-party imports
import dspy # The main DSPy framework for LLM programming
from dotenv import load_dotenv # For loading environment variables from .env file
from pydantic import BaseModel, Field # For structured data validation
# Load environment variables from .env file (contains API keys)
load_dotenv()
# Global variable to store our configured language model
# This will be set up once and reused throughout the application
lm = None
# ==========================================================================
# SECTION 1: LLM SETUP AND CONFIGURATION
# ==========================================================================
def setup_dspy_provider():
"""
Configure DSPy with an available LLM provider.
DSPy supports many providers (OpenAI, Anthropic, OpenRouter, etc.)
This function tries to connect to OpenRouter, which provides access
to many different models through a single API.
Returns:
str: The name of the provider that was successfully configured
"""
global lm # We'll modify the global lm variable
# Check if we have an OpenRouter API key in our environment variables
if os.getenv('OPENROUTER_API_KEY'):
print("βœ… Configuring DSPy with OpenRouter...")
# Create a DSPy Language Model object
# Format: "provider/model_name"
# Here we use a free model from Moonshot AI via OpenRouter
lm = dspy.LM(
model="openrouter/moonshotai/kimi-k2:free",
api_key=os.getenv('OPENROUTER_API_KEY')
)
# Configure DSPy to use this language model globally
dspy.configure(lm=lm)
return "openrouter"
else:
print("❌ No OpenRouter API key found in environment variables.")
print("Please add OPENROUTER_API_KEY to your .env file")
sys.exit(1)
# ==========================================================================
# SECTION 2: DATA STRUCTURES (PYDANTIC MODELS)
# ==========================================================================
class DirectorCut(BaseModel):
"""
🎬 PYDANTIC MODEL: Structured Data for Cinematic Prompts
This is a Pydantic model that defines the structure of our data.
Pydantic ensures that the LLM returns data in exactly the format we expect.
Think of this as a "template" that the LLM must fill out completely.
Each field has a description that helps the LLM understand what to generate.
Why use Pydantic with DSPy?
- Guarantees consistent output format
- Automatic validation of LLM responses
- Type safety for your Python code
- Clear documentation of expected data structure
"""
# Basic information (echoed from input)
director: str = Field(
...,
description="The director sent as part of input, echoed in the output."
)
video_idea: str = Field(
...,
description="The video idea sent as part of input, echoed in the output."
)
# The seven components of a cinematic prompt
# Each field uses Field(...) where ... means "required"
subject_description: str = Field(
...,
description="A detailed description of the main subject or character."
)
action_description: str = Field(
...,
description="A description of the specific action the subject is performing."
)
setting_description: str = Field(
...,
description="A rich description of the environment, location, and time of day."
)
cinematic_style: str = Field(
...,
description="The overall visual style or medium (e.g., 'Photorealistic, 8K')."
)
shot_and_framing: str = Field(
...,
description="The specific camera shot type and framing (e.g., 'Medium shot')."
)
camera_movement: str = Field(
...,
description="The movement of the camera during the shot (e.g., 'Slow dolly shot')."
)
lighting_and_color: str = Field(
...,
description="The lighting style and color palette that sets the mood."
)
def assemble_prompt(self) -> str:
"""
Combines all cinematic components into a single, formatted prompt.
This method takes all the individual pieces and creates a complete
prompt that could be used with video generation AI models.
Returns:
str: A complete, formatted cinematic prompt
"""
# Collect all the cinematic components (excluding director and video_idea)
components = [
self.subject_description,
self.action_description,
self.setting_description,
self.cinematic_style,
self.shot_and_framing,
self.camera_movement,
self.lighting_and_color,
]
# Join components with commas, filtering out empty strings
prompt_string = ", ".join(filter(None, [c.strip() for c in components if c]))
# Return empty string if no components
if not prompt_string:
return ""
# Capitalize first letter and add period
return prompt_string[0].upper() + prompt_string[1:] + "."
def pretty_print(self):
"""
Displays the director's interpretation in a nice format.
This is a helper method to make the output more readable
when we're testing or debugging our code.
"""
print(f"--- Director {self.director} ---")
print(f"{self.assemble_prompt()}")
print("--------------------------------")
class ResultClass:
"""
πŸ“¦ SIMPLE DATA CONTAINER: Holds All Results
This is a simple class to package up all our results.
We could use a Pydantic model here too, but since this is just
for internal use (not LLM output), a simple class works fine.
"""
def __init__(self, additional_director, director_ideas, director_ranks):
self.additional_director = additional_director # The AI-suggested director
self.director_ideas = director_ideas # List of all DirectorCut objects
self.director_ranks = director_ranks # Ranking results from the judge
# ==========================================================================
# SECTION 3: DSPY SIGNATURES (THE HEART OF DSPY)
# ==========================================================================
"""
πŸ”₯ WHAT ARE DSPY SIGNATURES?
DSPy Signatures are like "function signatures" but for LLM tasks.
They define:
1. What inputs the LLM should expect
2. What outputs the LLM should produce
3. The task description (in the docstring)
Think of them as contracts between your code and the LLM.
The LLM will try to fulfill this contract every time.
Key Components:
- InputField: Data going INTO the LLM
- OutputField: Data coming OUT of the LLM
- Docstring: Instructions for the LLM about what to do
"""
class FindDirector(dspy.Signature):
"""
🎯 SIGNATURE 1: Find Additional Director
This signature asks the LLM to suggest one additional director
who would be perfect for the given video idea, but isn't already
in the user's list.
This demonstrates how DSPy can handle creative, open-ended tasks
where there's no single "correct" answer.
"""
# === INPUTS ===
video_idea = dspy.InputField(
desc="A simple, high-level user idea or concept for a video."
)
director_list: List[str] = dspy.InputField(
desc="The names of directors the user wants to use."
)
# === OUTPUTS ===
additonal_director: str = dspy.OutputField(
desc="The best possible director based on the wanted video idea, that is not already in the provided director list."
)
class GenerateDirectorCut(dspy.Signature):
"""
🎬 SIGNATURE 2: Generate Cinematic Interpretation
This is the core signature that transforms a simple video idea
into a detailed cinematic prompt in the style of a specific director.
Notice how the output is a Pydantic model (DirectorCut).
DSPy will automatically ensure the LLM returns data in exactly
that structure, with all required fields filled out.
This demonstrates DSPy's structured output capabilities.
"""
# === INPUTS ===
video_idea = dspy.InputField(
desc="A simple, high-level user idea or concept for a video."
)
director = dspy.InputField(
desc="The name of the director to generate a cinematic prompt for (optional).",
default=None,
optional=True
)
# === OUTPUTS ===
director_cut: DirectorCut = dspy.OutputField(
desc="A structured object containing all seven deconstructed cinematic aspects."
)
class DirectorJudge(dspy.Signature):
"""
βš–οΈ SIGNATURE 3: Judge and Rank Director Ideas
This signature handles the complex task of comparing multiple
creative interpretations and ranking them objectively.
Notice this takes a List[DirectorCut] as input and returns
both rankings AND an explanation. This shows how DSPy can
handle complex, multi-part outputs.
This demonstrates DSPy's ability to handle reasoning tasks.
"""
# === INPUTS ===
director_ideas: List[DirectorCut] = dspy.InputField(
desc="A list of director interpretations to be ranked"
)
# === OUTPUTS ===
director_rankings: List[int] = dspy.OutputField(
description="Rank between 1, 2, 3 ... N where 1 is best"
)
explanation: str = dspy.OutputField(
description="Explain why the ranking was given and the winner selected. Format your response with clear sections for each director using HTML formatting: use <h4> tags for director names with their rank, <p> tags for paragraphs, and <br> tags for line breaks. Make it well-structured and easy to read."
)
# ==========================================================================
# SECTION 4: DSPY MODULE (COMBINING SIGNATURES INTO WORKFLOWS)
# ==========================================================================
class DirectorBakeOff(dspy.Module):
"""
πŸ—οΈ DSPY MODULE: The Complete Workflow
A DSPy Module combines multiple Signatures into a complete workflow.
Think of it like a class that orchestrates several LLM calls to solve
a complex problem.
This module demonstrates:
1. How to combine multiple signatures
2. Different types of DSPy predictors (Predict vs ChainOfThought)
3. Async processing for efficiency
4. Complex workflow orchestration
The workflow:
1. Find an additional director suggestion
2. Generate cinematic interpretations for all directors (in parallel)
3. Judge and rank all interpretations
4. Return the best result with explanations
"""
def __init__(self):
"""
Initialize the module with three different DSPy predictors.
Notice the different types:
- dspy.Predict: Basic prediction (fast, direct)
- dspy.ChainOfThought: Reasoning-enabled prediction (slower, more thoughtful)
"""
# Basic predictor for finding additional director
self.findDirector = dspy.Predict(FindDirector)
# Basic predictor for generating director cuts
self.genDirectorCut = dspy.Predict(GenerateDirectorCut)
# Chain-of-thought predictor for complex ranking decisions
# This will make the LLM "think step by step" before ranking
self.directorJudge = dspy.ChainOfThought(DirectorJudge)
async def aforward(self, video_idea: str, directors: List[str] = ["Quentin Tarantino", "Alfred Hitchcock", "Richard Curtis"]):
"""
πŸš€ ASYNC FORWARD: The Main Workflow
This is where the magic happens! This method orchestrates the entire
director bake-off process using multiple LLM calls.
Key DSPy concepts demonstrated:
1. Sequential LLM calls (find director first)
2. Parallel LLM calls (generate all director cuts simultaneously)
3. Complex data flow between signatures
4. Async processing for efficiency
Args:
video_idea: The user's video concept
directors: List of director names to compare
Returns:
ResultClass: Complete results including rankings and explanations
"""
# === STEP 1: Display user input ===
print("\n🎬 User Wanted Directors:")
for director in directors:
print(f" - {director}")
# === STEP 2: Find additional director suggestion ===
print("\nπŸ€– Finding AI-suggested director...")
additional_director_result = self.findDirector(
video_idea=video_idea,
director_list=directors
)
additional_director = additional_director_result.additonal_director
print(f" ✨ DSPy Suggested Director: {additional_director}")
# === STEP 3: Generate director interpretations (IN PARALLEL!) ===
print("\n⚑ Generating director interpretations in parallel...")
# Combine user directors + AI suggestion
all_directors = directors + [additional_director]
# Use asyncio.gather to run multiple LLM calls simultaneously
# This is much faster than calling them one by one!
director_ideas = await asyncio.gather(
*[self.genDirectorCut.acall(video_idea=video_idea, director=director)
for director in all_directors]
)
# Display all generated ideas
print("\n🎭 Generated Director Ideas:")
for idea in director_ideas:
idea.director_cut.pretty_print()
# === STEP 4: Judge and rank all interpretations ===
print("\nβš–οΈ Judging and ranking director ideas...")
# Extract just the DirectorCut objects for judging
director_cuts = [idea.director_cut for idea in director_ideas]
# Use Chain-of-Thought for complex ranking decision
director_ranks = self.directorJudge(director_ideas=director_cuts)
# === STEP 5: Display rankings ===
print("\nπŸ† Director Rankings:")
for rank, idea in zip(director_ranks.director_rankings, director_ideas):
print(f" Rank {rank}: {idea.director_cut.director}")
# === STEP 6: Find and display the winner ===
best_rank = min(director_ranks.director_rankings)
best_index = director_ranks.director_rankings.index(best_rank)
best_idea = director_ideas[best_index]
print("\nπŸ₯‡ WINNER - Best Ranked Director Idea:")
best_idea.director_cut.pretty_print()
print(f"\nπŸ’­ Judge's Reasoning:")
print(f" {director_ranks.explanation}")
print("=" * 50)
# === STEP 7: Return complete results ===
return ResultClass(
additional_director=additional_director,
director_ideas=director_ideas,
director_ranks=director_ranks
)
# ==========================================================================
# SECTION 5: MAIN FUNCTIONS AND ENTRY POINT
# ==========================================================================
def run_bake_off(video_idea: str, directors: str = None) -> ResultClass:
"""
🎯 MAIN FUNCTION: Easy-to-use interface for the Director Bake-Off
This function provides a simple interface that handles:
1. LLM setup and configuration
2. Input validation and parsing
3. Running the complete workflow
4. Error handling
This is the function that external code (like our Gradio interface)
calls to use our DSPy system.
Args:
video_idea: A description of the video concept
directors: Comma-separated string of director names (optional)
Returns:
ResultClass: Complete results from the bake-off
"""
print("πŸš€ Running Director Bake-Off...")
# === STEP 1: Ensure LLM is configured ===
global lm
if not lm:
provider = setup_dspy_provider()
print(f" βœ… DSPy configured with {provider} provider.")
# === STEP 2: Parse and validate director input ===
if not (isinstance(directors, str) and directors.strip()):
# Use default directors if none provided
directors = ["Quentin Tarantino", "Alfred Hitchcock", "Richard Curtis"]
print(" πŸ“ Using default directors")
else:
# Parse comma-separated string into list
directors = [d.strip() for d in directors.split(",") if d.strip()]
if not directors:
# Fallback to defaults if parsing failed
directors = ["Quentin Tarantino", "Alfred Hitchcock", "Richard Curtis"]
print(" πŸ“ Parsing failed, using default directors")
# === STEP 3: Create and run the bake-off ===
bake_off = DirectorBakeOff()
result_class = asyncio.run(bake_off.aforward(video_idea=video_idea, directors=directors))
return result_class
# ==========================================================================
# SECTION 6: SCRIPT ENTRY POINT
# ==========================================================================
if __name__ == "__main__":
"""
🎬 DEMO: Run the script directly to see it in action!
This section only runs when you execute this file directly
(not when it's imported as a module).
Try running: python director_bake_off.py
"""
print("🎭 DSPy Director Bake-Off Demo")
print("=" * 40)
# Run with a sample video idea
sample_idea = "A futuristic cityscape with flying cars and neon lights."
print(f"πŸ“ Sample Video Idea: {sample_idea}")
result = run_bake_off(sample_idea)
print("\nπŸŽ‰ Demo completed! Check the output above to see how each director")
print(" would approach filming this futuristic cityscape.")