File size: 18,809 Bytes
f842afa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
#!/usr/bin/env python3
"""
DSPy Director Bake-Off: A Beginner's Guide to DSPy Programming

This script demonstrates core DSPy concepts through a fun example:
comparing how different movie directors would approach filming the same video idea.

Key DSPy Concepts Demonstrated:
1. Signatures - Define input/output interfaces for LLM tasks
2. Modules - Combine multiple signatures into complex workflows
3. Structured Output - Use Pydantic models for reliable data extraction
4. Async Processing - Handle multiple LLM calls efficiently
5. Chain of Thought - Enable reasoning for complex decisions

Author: DSPy Learning Example
"""

# Standard library imports
import os
import sys
import asyncio
from typing import List

# Third-party imports
import dspy                    # The main DSPy framework for LLM programming
from dotenv import load_dotenv # For loading environment variables from .env file
from pydantic import BaseModel, Field  # For structured data validation

# Load environment variables from .env file (contains API keys)
load_dotenv()

# Global variable to store our configured language model
# This will be set up once and reused throughout the application
lm = None


# ==========================================================================
# SECTION 1: LLM SETUP AND CONFIGURATION
# ==========================================================================

def setup_dspy_provider():
    """
    Configure DSPy with an available LLM provider.
    
    DSPy supports many providers (OpenAI, Anthropic, OpenRouter, etc.)
    This function tries to connect to OpenRouter, which provides access
    to many different models through a single API.
    
    Returns:
        str: The name of the provider that was successfully configured
    """
    global lm  # We'll modify the global lm variable

    # Check if we have an OpenRouter API key in our environment variables
    if os.getenv('OPENROUTER_API_KEY'):
        print("βœ… Configuring DSPy with OpenRouter...")
        
        # Create a DSPy Language Model object
        # Format: "provider/model_name"
        # Here we use a free model from Moonshot AI via OpenRouter
        lm = dspy.LM(
            model="openrouter/moonshotai/kimi-k2:free", 
            api_key=os.getenv('OPENROUTER_API_KEY')
        )
        
        # Configure DSPy to use this language model globally
        dspy.configure(lm=lm)
        return "openrouter"
    else:
        print("❌ No OpenRouter API key found in environment variables.")
        print("Please add OPENROUTER_API_KEY to your .env file")
        sys.exit(1)


# ==========================================================================
# SECTION 2: DATA STRUCTURES (PYDANTIC MODELS)
# ==========================================================================

class DirectorCut(BaseModel):
    """
    🎬 PYDANTIC MODEL: Structured Data for Cinematic Prompts
    
    This is a Pydantic model that defines the structure of our data.
    Pydantic ensures that the LLM returns data in exactly the format we expect.
    
    Think of this as a "template" that the LLM must fill out completely.
    Each field has a description that helps the LLM understand what to generate.
    
    Why use Pydantic with DSPy?
    - Guarantees consistent output format
    - Automatic validation of LLM responses
    - Type safety for your Python code
    - Clear documentation of expected data structure
    """
    
    # Basic information (echoed from input)
    director: str = Field(
        ..., 
        description="The director sent as part of input, echoed in the output."
    )
    video_idea: str = Field(
        ..., 
        description="The video idea sent as part of input, echoed in the output."
    )
    
    # The seven components of a cinematic prompt
    # Each field uses Field(...) where ... means "required"
    subject_description: str = Field(
        ..., 
        description="A detailed description of the main subject or character."
    )
    action_description: str = Field(
        ..., 
        description="A description of the specific action the subject is performing."
    )
    setting_description: str = Field(
        ..., 
        description="A rich description of the environment, location, and time of day."
    )
    cinematic_style: str = Field(
        ..., 
        description="The overall visual style or medium (e.g., 'Photorealistic, 8K')."
    )
    shot_and_framing: str = Field(
        ..., 
        description="The specific camera shot type and framing (e.g., 'Medium shot')."
    )
    camera_movement: str = Field(
        ..., 
        description="The movement of the camera during the shot (e.g., 'Slow dolly shot')."
    )
    lighting_and_color: str = Field(
        ..., 
        description="The lighting style and color palette that sets the mood."
    )

    def assemble_prompt(self) -> str:
        """
        Combines all cinematic components into a single, formatted prompt.
        
        This method takes all the individual pieces and creates a complete
        prompt that could be used with video generation AI models.
        
        Returns:
            str: A complete, formatted cinematic prompt
        """
        # Collect all the cinematic components (excluding director and video_idea)
        components = [
            self.subject_description,
            self.action_description,
            self.setting_description,
            self.cinematic_style,
            self.shot_and_framing,
            self.camera_movement,
            self.lighting_and_color,
        ]
        
        # Join components with commas, filtering out empty strings
        prompt_string = ", ".join(filter(None, [c.strip() for c in components if c]))
        
        # Return empty string if no components
        if not prompt_string:
            return ""
            
        # Capitalize first letter and add period
        return prompt_string[0].upper() + prompt_string[1:] + "."

    def pretty_print(self):
        """
        Displays the director's interpretation in a nice format.
        
        This is a helper method to make the output more readable
        when we're testing or debugging our code.
        """
        print(f"--- Director {self.director} ---")
        print(f"{self.assemble_prompt()}")
        print("--------------------------------")


class ResultClass:
    """
    πŸ“¦ SIMPLE DATA CONTAINER: Holds All Results
    
    This is a simple class to package up all our results.
    We could use a Pydantic model here too, but since this is just
    for internal use (not LLM output), a simple class works fine.
    """
    def __init__(self, additional_director, director_ideas, director_ranks):
        self.additional_director = additional_director  # The AI-suggested director
        self.director_ideas = director_ideas            # List of all DirectorCut objects
        self.director_ranks = director_ranks            # Ranking results from the judge


# ==========================================================================
# SECTION 3: DSPY SIGNATURES (THE HEART OF DSPY)
# ==========================================================================

"""
πŸ”₯ WHAT ARE DSPY SIGNATURES?

DSPy Signatures are like "function signatures" but for LLM tasks.
They define:
1. What inputs the LLM should expect
2. What outputs the LLM should produce
3. The task description (in the docstring)

Think of them as contracts between your code and the LLM.
The LLM will try to fulfill this contract every time.

Key Components:
- InputField: Data going INTO the LLM
- OutputField: Data coming OUT of the LLM
- Docstring: Instructions for the LLM about what to do
"""

class FindDirector(dspy.Signature):
    """
    🎯 SIGNATURE 1: Find Additional Director
    
    This signature asks the LLM to suggest one additional director
    who would be perfect for the given video idea, but isn't already
    in the user's list.
    
    This demonstrates how DSPy can handle creative, open-ended tasks
    where there's no single "correct" answer.
    """
    
    # === INPUTS ===
    video_idea = dspy.InputField(
        desc="A simple, high-level user idea or concept for a video."
    )
    director_list: List[str] = dspy.InputField(
        desc="The names of directors the user wants to use."
    )

    # === OUTPUTS ===
    additonal_director: str = dspy.OutputField(
        desc="The best possible director based on the wanted video idea, that is not already in the provided director list."
    )


class GenerateDirectorCut(dspy.Signature):
    """
    🎬 SIGNATURE 2: Generate Cinematic Interpretation
    
    This is the core signature that transforms a simple video idea
    into a detailed cinematic prompt in the style of a specific director.
    
    Notice how the output is a Pydantic model (DirectorCut).
    DSPy will automatically ensure the LLM returns data in exactly
    that structure, with all required fields filled out.
    
    This demonstrates DSPy's structured output capabilities.
    """
    
    # === INPUTS ===
    video_idea = dspy.InputField(
        desc="A simple, high-level user idea or concept for a video."
    )
    director = dspy.InputField(
        desc="The name of the director to generate a cinematic prompt for (optional).",
        default=None,
        optional=True
    )

    # === OUTPUTS ===
    director_cut: DirectorCut = dspy.OutputField(
        desc="A structured object containing all seven deconstructed cinematic aspects."
    )


class DirectorJudge(dspy.Signature):
    """
    βš–οΈ SIGNATURE 3: Judge and Rank Director Ideas
    
    This signature handles the complex task of comparing multiple
    creative interpretations and ranking them objectively.
    
    Notice this takes a List[DirectorCut] as input and returns
    both rankings AND an explanation. This shows how DSPy can
    handle complex, multi-part outputs.
    
    This demonstrates DSPy's ability to handle reasoning tasks.
    """
    
    # === INPUTS ===
    director_ideas: List[DirectorCut] = dspy.InputField(
        desc="A list of director interpretations to be ranked"
    )
    
    # === OUTPUTS ===
    director_rankings: List[int] = dspy.OutputField(
        description="Rank between 1, 2, 3 ... N where 1 is best"
    )
    explanation: str = dspy.OutputField(
        description="Explain why the ranking was given and the winner selected. Format your response with clear sections for each director using HTML formatting: use <h4> tags for director names with their rank, <p> tags for paragraphs, and <br> tags for line breaks. Make it well-structured and easy to read."
    )


# ==========================================================================
# SECTION 4: DSPY MODULE (COMBINING SIGNATURES INTO WORKFLOWS)
# ==========================================================================

class DirectorBakeOff(dspy.Module):
    """
    πŸ—οΈ DSPY MODULE: The Complete Workflow
    
    A DSPy Module combines multiple Signatures into a complete workflow.
    Think of it like a class that orchestrates several LLM calls to solve
    a complex problem.
    
    This module demonstrates:
    1. How to combine multiple signatures
    2. Different types of DSPy predictors (Predict vs ChainOfThought)
    3. Async processing for efficiency
    4. Complex workflow orchestration
    
    The workflow:
    1. Find an additional director suggestion
    2. Generate cinematic interpretations for all directors (in parallel)
    3. Judge and rank all interpretations
    4. Return the best result with explanations
    """
    
    def __init__(self):
        """
        Initialize the module with three different DSPy predictors.
        
        Notice the different types:
        - dspy.Predict: Basic prediction (fast, direct)
        - dspy.ChainOfThought: Reasoning-enabled prediction (slower, more thoughtful)
        """
        # Basic predictor for finding additional director
        self.findDirector = dspy.Predict(FindDirector)
        
        # Basic predictor for generating director cuts
        self.genDirectorCut = dspy.Predict(GenerateDirectorCut)
        
        # Chain-of-thought predictor for complex ranking decisions
        # This will make the LLM "think step by step" before ranking
        self.directorJudge = dspy.ChainOfThought(DirectorJudge)

    async def aforward(self, video_idea: str, directors: List[str] = ["Quentin Tarantino", "Alfred Hitchcock", "Richard Curtis"]):
        """
        πŸš€ ASYNC FORWARD: The Main Workflow
        
        This is where the magic happens! This method orchestrates the entire
        director bake-off process using multiple LLM calls.
        
        Key DSPy concepts demonstrated:
        1. Sequential LLM calls (find director first)
        2. Parallel LLM calls (generate all director cuts simultaneously)
        3. Complex data flow between signatures
        4. Async processing for efficiency
        
        Args:
            video_idea: The user's video concept
            directors: List of director names to compare
            
        Returns:
            ResultClass: Complete results including rankings and explanations
        """
        
        # === STEP 1: Display user input ===
        print("\n🎬 User Wanted Directors:")
        for director in directors:
            print(f"   - {director}")

        # === STEP 2: Find additional director suggestion ===
        print("\nπŸ€– Finding AI-suggested director...")
        additional_director_result = self.findDirector(
            video_idea=video_idea, 
            director_list=directors
        )
        additional_director = additional_director_result.additonal_director
        print(f"   ✨ DSPy Suggested Director: {additional_director}")

        # === STEP 3: Generate director interpretations (IN PARALLEL!) ===
        print("\n⚑ Generating director interpretations in parallel...")
        
        # Combine user directors + AI suggestion
        all_directors = directors + [additional_director]
        
        # Use asyncio.gather to run multiple LLM calls simultaneously
        # This is much faster than calling them one by one!
        director_ideas = await asyncio.gather(
            *[self.genDirectorCut.acall(video_idea=video_idea, director=director) 
              for director in all_directors]
        )
        
        # Display all generated ideas
        print("\n🎭 Generated Director Ideas:")
        for idea in director_ideas:
            idea.director_cut.pretty_print()

        # === STEP 4: Judge and rank all interpretations ===
        print("\nβš–οΈ Judging and ranking director ideas...")
        
        # Extract just the DirectorCut objects for judging
        director_cuts = [idea.director_cut for idea in director_ideas]
        
        # Use Chain-of-Thought for complex ranking decision
        director_ranks = self.directorJudge(director_ideas=director_cuts)

        # === STEP 5: Display rankings ===
        print("\nπŸ† Director Rankings:")
        for rank, idea in zip(director_ranks.director_rankings, director_ideas):
            print(f"   Rank {rank}: {idea.director_cut.director}")

        # === STEP 6: Find and display the winner ===
        best_rank = min(director_ranks.director_rankings)
        best_index = director_ranks.director_rankings.index(best_rank)
        best_idea = director_ideas[best_index]
        
        print("\nπŸ₯‡ WINNER - Best Ranked Director Idea:")
        best_idea.director_cut.pretty_print()
        
        print(f"\nπŸ’­ Judge's Reasoning:")
        print(f"   {director_ranks.explanation}")
        print("=" * 50)

        # === STEP 7: Return complete results ===
        return ResultClass(
            additional_director=additional_director,
            director_ideas=director_ideas,
            director_ranks=director_ranks
        )


# ==========================================================================
# SECTION 5: MAIN FUNCTIONS AND ENTRY POINT
# ==========================================================================

def run_bake_off(video_idea: str, directors: str = None) -> ResultClass:
    """
    🎯 MAIN FUNCTION: Easy-to-use interface for the Director Bake-Off
    
    This function provides a simple interface that handles:
    1. LLM setup and configuration
    2. Input validation and parsing
    3. Running the complete workflow
    4. Error handling
    
    This is the function that external code (like our Gradio interface)
    calls to use our DSPy system.
    
    Args:
        video_idea: A description of the video concept
        directors: Comma-separated string of director names (optional)
        
    Returns:
        ResultClass: Complete results from the bake-off
    """
    
    print("πŸš€ Running Director Bake-Off...")
    
    # === STEP 1: Ensure LLM is configured ===
    global lm
    if not lm:
        provider = setup_dspy_provider()
        print(f"   βœ… DSPy configured with {provider} provider.")
    
    # === STEP 2: Parse and validate director input ===
    if not (isinstance(directors, str) and directors.strip()):
        # Use default directors if none provided
        directors = ["Quentin Tarantino", "Alfred Hitchcock", "Richard Curtis"]
        print("   πŸ“ Using default directors")
    else:
        # Parse comma-separated string into list
        directors = [d.strip() for d in directors.split(",") if d.strip()]
        if not directors:
            # Fallback to defaults if parsing failed
            directors = ["Quentin Tarantino", "Alfred Hitchcock", "Richard Curtis"]
            print("   πŸ“ Parsing failed, using default directors")

    # === STEP 3: Create and run the bake-off ===
    bake_off = DirectorBakeOff()
    result_class = asyncio.run(bake_off.aforward(video_idea=video_idea, directors=directors))
    
    return result_class


# ==========================================================================
# SECTION 6: SCRIPT ENTRY POINT
# ==========================================================================

if __name__ == "__main__":
    """
    🎬 DEMO: Run the script directly to see it in action!
    
    This section only runs when you execute this file directly
    (not when it's imported as a module).
    
    Try running: python director_bake_off.py
    """
    print("🎭 DSPy Director Bake-Off Demo")
    print("=" * 40)
    
    # Run with a sample video idea
    sample_idea = "A futuristic cityscape with flying cars and neon lights."
    print(f"πŸ“ Sample Video Idea: {sample_idea}")
    
    result = run_bake_off(sample_idea)
    
    print("\nπŸŽ‰ Demo completed! Check the output above to see how each director")
    print("   would approach filming this futuristic cityscape.")