Spaces:
Sleeping
Sleeping
#!/usr/bin/env python3 | |
""" | |
DSPy Director Bake-Off: A Beginner's Guide to DSPy Programming | |
This script demonstrates core DSPy concepts through a fun example: | |
comparing how different movie directors would approach filming the same video idea. | |
Key DSPy Concepts Demonstrated: | |
1. Signatures - Define input/output interfaces for LLM tasks | |
2. Modules - Combine multiple signatures into complex workflows | |
3. Structured Output - Use Pydantic models for reliable data extraction | |
4. Async Processing - Handle multiple LLM calls efficiently | |
5. Chain of Thought - Enable reasoning for complex decisions | |
Author: DSPy Learning Example | |
""" | |
# Standard library imports | |
import os | |
import sys | |
import asyncio | |
from typing import List | |
# Third-party imports | |
import dspy # The main DSPy framework for LLM programming | |
from dotenv import load_dotenv # For loading environment variables from .env file | |
from pydantic import BaseModel, Field # For structured data validation | |
# Load environment variables from .env file (contains API keys) | |
load_dotenv() | |
# Global variable to store our configured language model | |
# This will be set up once and reused throughout the application | |
lm = None | |
# ========================================================================== | |
# SECTION 1: LLM SETUP AND CONFIGURATION | |
# ========================================================================== | |
def setup_dspy_provider(): | |
""" | |
Configure DSPy with an available LLM provider. | |
DSPy supports many providers (OpenAI, Anthropic, OpenRouter, etc.) | |
This function tries to connect to OpenRouter, which provides access | |
to many different models through a single API. | |
Returns: | |
str: The name of the provider that was successfully configured | |
""" | |
global lm # We'll modify the global lm variable | |
# Check if we have an OpenRouter API key in our environment variables | |
if os.getenv('OPENROUTER_API_KEY'): | |
print("β Configuring DSPy with OpenRouter...") | |
# Create a DSPy Language Model object | |
# Format: "provider/model_name" | |
# Here we use a free model from Moonshot AI via OpenRouter | |
lm = dspy.LM( | |
model="openrouter/moonshotai/kimi-k2:free", | |
api_key=os.getenv('OPENROUTER_API_KEY') | |
) | |
# Configure DSPy to use this language model globally | |
dspy.configure(lm=lm) | |
return "openrouter" | |
else: | |
print("β No OpenRouter API key found in environment variables.") | |
print("Please add OPENROUTER_API_KEY to your .env file") | |
sys.exit(1) | |
# ========================================================================== | |
# SECTION 2: DATA STRUCTURES (PYDANTIC MODELS) | |
# ========================================================================== | |
class DirectorCut(BaseModel): | |
""" | |
π¬ PYDANTIC MODEL: Structured Data for Cinematic Prompts | |
This is a Pydantic model that defines the structure of our data. | |
Pydantic ensures that the LLM returns data in exactly the format we expect. | |
Think of this as a "template" that the LLM must fill out completely. | |
Each field has a description that helps the LLM understand what to generate. | |
Why use Pydantic with DSPy? | |
- Guarantees consistent output format | |
- Automatic validation of LLM responses | |
- Type safety for your Python code | |
- Clear documentation of expected data structure | |
""" | |
# Basic information (echoed from input) | |
director: str = Field( | |
..., | |
description="The director sent as part of input, echoed in the output." | |
) | |
video_idea: str = Field( | |
..., | |
description="The video idea sent as part of input, echoed in the output." | |
) | |
# The seven components of a cinematic prompt | |
# Each field uses Field(...) where ... means "required" | |
subject_description: str = Field( | |
..., | |
description="A detailed description of the main subject or character." | |
) | |
action_description: str = Field( | |
..., | |
description="A description of the specific action the subject is performing." | |
) | |
setting_description: str = Field( | |
..., | |
description="A rich description of the environment, location, and time of day." | |
) | |
cinematic_style: str = Field( | |
..., | |
description="The overall visual style or medium (e.g., 'Photorealistic, 8K')." | |
) | |
shot_and_framing: str = Field( | |
..., | |
description="The specific camera shot type and framing (e.g., 'Medium shot')." | |
) | |
camera_movement: str = Field( | |
..., | |
description="The movement of the camera during the shot (e.g., 'Slow dolly shot')." | |
) | |
lighting_and_color: str = Field( | |
..., | |
description="The lighting style and color palette that sets the mood." | |
) | |
def assemble_prompt(self) -> str: | |
""" | |
Combines all cinematic components into a single, formatted prompt. | |
This method takes all the individual pieces and creates a complete | |
prompt that could be used with video generation AI models. | |
Returns: | |
str: A complete, formatted cinematic prompt | |
""" | |
# Collect all the cinematic components (excluding director and video_idea) | |
components = [ | |
self.subject_description, | |
self.action_description, | |
self.setting_description, | |
self.cinematic_style, | |
self.shot_and_framing, | |
self.camera_movement, | |
self.lighting_and_color, | |
] | |
# Join components with commas, filtering out empty strings | |
prompt_string = ", ".join(filter(None, [c.strip() for c in components if c])) | |
# Return empty string if no components | |
if not prompt_string: | |
return "" | |
# Capitalize first letter and add period | |
return prompt_string[0].upper() + prompt_string[1:] + "." | |
def pretty_print(self): | |
""" | |
Displays the director's interpretation in a nice format. | |
This is a helper method to make the output more readable | |
when we're testing or debugging our code. | |
""" | |
print(f"--- Director {self.director} ---") | |
print(f"{self.assemble_prompt()}") | |
print("--------------------------------") | |
class ResultClass: | |
""" | |
π¦ SIMPLE DATA CONTAINER: Holds All Results | |
This is a simple class to package up all our results. | |
We could use a Pydantic model here too, but since this is just | |
for internal use (not LLM output), a simple class works fine. | |
""" | |
def __init__(self, additional_director, director_ideas, director_ranks): | |
self.additional_director = additional_director # The AI-suggested director | |
self.director_ideas = director_ideas # List of all DirectorCut objects | |
self.director_ranks = director_ranks # Ranking results from the judge | |
# ========================================================================== | |
# SECTION 3: DSPY SIGNATURES (THE HEART OF DSPY) | |
# ========================================================================== | |
""" | |
π₯ WHAT ARE DSPY SIGNATURES? | |
DSPy Signatures are like "function signatures" but for LLM tasks. | |
They define: | |
1. What inputs the LLM should expect | |
2. What outputs the LLM should produce | |
3. The task description (in the docstring) | |
Think of them as contracts between your code and the LLM. | |
The LLM will try to fulfill this contract every time. | |
Key Components: | |
- InputField: Data going INTO the LLM | |
- OutputField: Data coming OUT of the LLM | |
- Docstring: Instructions for the LLM about what to do | |
""" | |
class FindDirector(dspy.Signature): | |
""" | |
π― SIGNATURE 1: Find Additional Director | |
This signature asks the LLM to suggest one additional director | |
who would be perfect for the given video idea, but isn't already | |
in the user's list. | |
This demonstrates how DSPy can handle creative, open-ended tasks | |
where there's no single "correct" answer. | |
""" | |
# === INPUTS === | |
video_idea = dspy.InputField( | |
desc="A simple, high-level user idea or concept for a video." | |
) | |
director_list: List[str] = dspy.InputField( | |
desc="The names of directors the user wants to use." | |
) | |
# === OUTPUTS === | |
additonal_director: str = dspy.OutputField( | |
desc="The best possible director based on the wanted video idea, that is not already in the provided director list." | |
) | |
class GenerateDirectorCut(dspy.Signature): | |
""" | |
π¬ SIGNATURE 2: Generate Cinematic Interpretation | |
This is the core signature that transforms a simple video idea | |
into a detailed cinematic prompt in the style of a specific director. | |
Notice how the output is a Pydantic model (DirectorCut). | |
DSPy will automatically ensure the LLM returns data in exactly | |
that structure, with all required fields filled out. | |
This demonstrates DSPy's structured output capabilities. | |
""" | |
# === INPUTS === | |
video_idea = dspy.InputField( | |
desc="A simple, high-level user idea or concept for a video." | |
) | |
director = dspy.InputField( | |
desc="The name of the director to generate a cinematic prompt for (optional).", | |
default=None, | |
optional=True | |
) | |
# === OUTPUTS === | |
director_cut: DirectorCut = dspy.OutputField( | |
desc="A structured object containing all seven deconstructed cinematic aspects." | |
) | |
class DirectorJudge(dspy.Signature): | |
""" | |
βοΈ SIGNATURE 3: Judge and Rank Director Ideas | |
This signature handles the complex task of comparing multiple | |
creative interpretations and ranking them objectively. | |
Notice this takes a List[DirectorCut] as input and returns | |
both rankings AND an explanation. This shows how DSPy can | |
handle complex, multi-part outputs. | |
This demonstrates DSPy's ability to handle reasoning tasks. | |
""" | |
# === INPUTS === | |
director_ideas: List[DirectorCut] = dspy.InputField( | |
desc="A list of director interpretations to be ranked" | |
) | |
# === OUTPUTS === | |
director_rankings: List[int] = dspy.OutputField( | |
description="Rank between 1, 2, 3 ... N where 1 is best" | |
) | |
explanation: str = dspy.OutputField( | |
description="Explain why the ranking was given and the winner selected. Format your response with clear sections for each director using HTML formatting: use <h4> tags for director names with their rank, <p> tags for paragraphs, and <br> tags for line breaks. Make it well-structured and easy to read." | |
) | |
# ========================================================================== | |
# SECTION 4: DSPY MODULE (COMBINING SIGNATURES INTO WORKFLOWS) | |
# ========================================================================== | |
class DirectorBakeOff(dspy.Module): | |
""" | |
ποΈ DSPY MODULE: The Complete Workflow | |
A DSPy Module combines multiple Signatures into a complete workflow. | |
Think of it like a class that orchestrates several LLM calls to solve | |
a complex problem. | |
This module demonstrates: | |
1. How to combine multiple signatures | |
2. Different types of DSPy predictors (Predict vs ChainOfThought) | |
3. Async processing for efficiency | |
4. Complex workflow orchestration | |
The workflow: | |
1. Find an additional director suggestion | |
2. Generate cinematic interpretations for all directors (in parallel) | |
3. Judge and rank all interpretations | |
4. Return the best result with explanations | |
""" | |
def __init__(self): | |
""" | |
Initialize the module with three different DSPy predictors. | |
Notice the different types: | |
- dspy.Predict: Basic prediction (fast, direct) | |
- dspy.ChainOfThought: Reasoning-enabled prediction (slower, more thoughtful) | |
""" | |
# Basic predictor for finding additional director | |
self.findDirector = dspy.Predict(FindDirector) | |
# Basic predictor for generating director cuts | |
self.genDirectorCut = dspy.Predict(GenerateDirectorCut) | |
# Chain-of-thought predictor for complex ranking decisions | |
# This will make the LLM "think step by step" before ranking | |
self.directorJudge = dspy.ChainOfThought(DirectorJudge) | |
async def aforward(self, video_idea: str, directors: List[str] = ["Quentin Tarantino", "Alfred Hitchcock", "Richard Curtis"]): | |
""" | |
π ASYNC FORWARD: The Main Workflow | |
This is where the magic happens! This method orchestrates the entire | |
director bake-off process using multiple LLM calls. | |
Key DSPy concepts demonstrated: | |
1. Sequential LLM calls (find director first) | |
2. Parallel LLM calls (generate all director cuts simultaneously) | |
3. Complex data flow between signatures | |
4. Async processing for efficiency | |
Args: | |
video_idea: The user's video concept | |
directors: List of director names to compare | |
Returns: | |
ResultClass: Complete results including rankings and explanations | |
""" | |
# === STEP 1: Display user input === | |
print("\n㪠User Wanted Directors:") | |
for director in directors: | |
print(f" - {director}") | |
# === STEP 2: Find additional director suggestion === | |
print("\nπ€ Finding AI-suggested director...") | |
additional_director_result = self.findDirector( | |
video_idea=video_idea, | |
director_list=directors | |
) | |
additional_director = additional_director_result.additonal_director | |
print(f" β¨ DSPy Suggested Director: {additional_director}") | |
# === STEP 3: Generate director interpretations (IN PARALLEL!) === | |
print("\nβ‘ Generating director interpretations in parallel...") | |
# Combine user directors + AI suggestion | |
all_directors = directors + [additional_director] | |
# Use asyncio.gather to run multiple LLM calls simultaneously | |
# This is much faster than calling them one by one! | |
director_ideas = await asyncio.gather( | |
*[self.genDirectorCut.acall(video_idea=video_idea, director=director) | |
for director in all_directors] | |
) | |
# Display all generated ideas | |
print("\nπ Generated Director Ideas:") | |
for idea in director_ideas: | |
idea.director_cut.pretty_print() | |
# === STEP 4: Judge and rank all interpretations === | |
print("\nβοΈ Judging and ranking director ideas...") | |
# Extract just the DirectorCut objects for judging | |
director_cuts = [idea.director_cut for idea in director_ideas] | |
# Use Chain-of-Thought for complex ranking decision | |
director_ranks = self.directorJudge(director_ideas=director_cuts) | |
# === STEP 5: Display rankings === | |
print("\nπ Director Rankings:") | |
for rank, idea in zip(director_ranks.director_rankings, director_ideas): | |
print(f" Rank {rank}: {idea.director_cut.director}") | |
# === STEP 6: Find and display the winner === | |
best_rank = min(director_ranks.director_rankings) | |
best_index = director_ranks.director_rankings.index(best_rank) | |
best_idea = director_ideas[best_index] | |
print("\nπ₯ WINNER - Best Ranked Director Idea:") | |
best_idea.director_cut.pretty_print() | |
print(f"\nπ Judge's Reasoning:") | |
print(f" {director_ranks.explanation}") | |
print("=" * 50) | |
# === STEP 7: Return complete results === | |
return ResultClass( | |
additional_director=additional_director, | |
director_ideas=director_ideas, | |
director_ranks=director_ranks | |
) | |
# ========================================================================== | |
# SECTION 5: MAIN FUNCTIONS AND ENTRY POINT | |
# ========================================================================== | |
def run_bake_off(video_idea: str, directors: str = None) -> ResultClass: | |
""" | |
π― MAIN FUNCTION: Easy-to-use interface for the Director Bake-Off | |
This function provides a simple interface that handles: | |
1. LLM setup and configuration | |
2. Input validation and parsing | |
3. Running the complete workflow | |
4. Error handling | |
This is the function that external code (like our Gradio interface) | |
calls to use our DSPy system. | |
Args: | |
video_idea: A description of the video concept | |
directors: Comma-separated string of director names (optional) | |
Returns: | |
ResultClass: Complete results from the bake-off | |
""" | |
print("π Running Director Bake-Off...") | |
# === STEP 1: Ensure LLM is configured === | |
global lm | |
if not lm: | |
provider = setup_dspy_provider() | |
print(f" β DSPy configured with {provider} provider.") | |
# === STEP 2: Parse and validate director input === | |
if not (isinstance(directors, str) and directors.strip()): | |
# Use default directors if none provided | |
directors = ["Quentin Tarantino", "Alfred Hitchcock", "Richard Curtis"] | |
print(" π Using default directors") | |
else: | |
# Parse comma-separated string into list | |
directors = [d.strip() for d in directors.split(",") if d.strip()] | |
if not directors: | |
# Fallback to defaults if parsing failed | |
directors = ["Quentin Tarantino", "Alfred Hitchcock", "Richard Curtis"] | |
print(" π Parsing failed, using default directors") | |
# === STEP 3: Create and run the bake-off === | |
bake_off = DirectorBakeOff() | |
result_class = asyncio.run(bake_off.aforward(video_idea=video_idea, directors=directors)) | |
return result_class | |
# ========================================================================== | |
# SECTION 6: SCRIPT ENTRY POINT | |
# ========================================================================== | |
if __name__ == "__main__": | |
""" | |
π¬ DEMO: Run the script directly to see it in action! | |
This section only runs when you execute this file directly | |
(not when it's imported as a module). | |
Try running: python director_bake_off.py | |
""" | |
print("π DSPy Director Bake-Off Demo") | |
print("=" * 40) | |
# Run with a sample video idea | |
sample_idea = "A futuristic cityscape with flying cars and neon lights." | |
print(f"π Sample Video Idea: {sample_idea}") | |
result = run_bake_off(sample_idea) | |
print("\nπ Demo completed! Check the output above to see how each director") | |
print(" would approach filming this futuristic cityscape.") | |