File size: 3,719 Bytes
53ea588
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD 2-Clause License

"""Acknowledgment processor that provides verbal feedback during conversation pauses.

A simple processor that adds natural conversational acknowledgments when users stops speaking if the LLM or RAG
is taking a long time to respond, helping to create more engaging interactions.
"""

import random

from loguru import logger
from pipecat.frames.frames import Frame, TTSSpeakFrame, UserStoppedSpeakingFrame
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor


class AcknowledgmentProcessor(FrameProcessor):
    """Processor that sends configurable acknowledgment responses during conversation pauses.

    This processor enhances conversation flow by sending occasional acknowledgment words
    (like "Hmmm" or "Let me think") when users pause speaking. It only works when 2-phase
    End-of-Utterance (EOU) detection is disabled.

    Input Frames:
        - UserStoppedSpeakingFrame (consumed): Indicates when a user has stopped speaking

    Output Frames:
        - TTSSpeakFrame: Contains the acknowledgment text to be spoken

    Args:
        filler_words (list[str]): List of acknowledgment phrases to use.
            Each phrase should be a short, natural acknowledgment (e.g., "Hmmm", "Let me think").
        filler_probability (float): Probability (0.0 to 1.0) of sending an
            acknowledgment when a pause is detected. Defaults to 0.5.
    """

    def __init__(self, filler_words=None, filler_probability=0.5, **kwargs):
        """Initialize the acknowledgment processor.

        Args:
            filler_words (list[str]): List of acknowledgment phrases to use.
                Each phrase should be a short, natural acknowledgment (e.g., "Hmmm", "Let me think").
            filler_probability (float, optional): Probability (0.0 to 1.0) of sending an
                acknowledgment when a pause is detected. Defaults to 0.5.
            **kwargs: Additional arguments passed to the parent FrameProcessor.

        Raises:
            ValueError: If filler_probability is not between 0.0 and 1.0
            ValueError: If filler_words is empty or None
        """
        super().__init__(**kwargs)
        self.filler_words = filler_words
        self.filler_probability = filler_probability

    async def process_frame(self, frame: Frame, direction: FrameDirection):
        """Process incoming frames and generate acknowledgments when appropriate.

        Args:
            frame (Frame): The incoming frame to process
            direction (FrameDirection): The direction the frame is traveling

        Returns:
            None
        """
        await super().process_frame(frame, direction)

        # If user is present and they've just stopped speaking, send a filler TTSSpeakFrame
        if isinstance(frame, UserStoppedSpeakingFrame):
            # Add a probability to skip sending a filler word
            filler_probability = self.filler_probability
            if random.random() < filler_probability:
                filler = random.choice(self.filler_words)
                logger.debug(f"User stopped speaking, sending filler word: {filler}")
                filler_frame = TTSSpeakFrame(filler)
                await self.push_frame(filler_frame, FrameDirection.DOWNSTREAM)
            else:
                filler = ""
                logger.debug(f"User stopped speaking, sending filler word: {filler}")
                filler_frame = TTSSpeakFrame(filler)
                await self.push_frame(filler_frame, FrameDirection.DOWNSTREAM)

        await self.push_frame(frame, direction)