fciannella's picture
Working with service run on 7860
53ea588
raw
history blame
3.72 kB
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD 2-Clause License
"""Acknowledgment processor that provides verbal feedback during conversation pauses.
A simple processor that adds natural conversational acknowledgments when users stops speaking if the LLM or RAG
is taking a long time to respond, helping to create more engaging interactions.
"""
import random
from loguru import logger
from pipecat.frames.frames import Frame, TTSSpeakFrame, UserStoppedSpeakingFrame
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
class AcknowledgmentProcessor(FrameProcessor):
"""Processor that sends configurable acknowledgment responses during conversation pauses.
This processor enhances conversation flow by sending occasional acknowledgment words
(like "Hmmm" or "Let me think") when users pause speaking. It only works when 2-phase
End-of-Utterance (EOU) detection is disabled.
Input Frames:
- UserStoppedSpeakingFrame (consumed): Indicates when a user has stopped speaking
Output Frames:
- TTSSpeakFrame: Contains the acknowledgment text to be spoken
Args:
filler_words (list[str]): List of acknowledgment phrases to use.
Each phrase should be a short, natural acknowledgment (e.g., "Hmmm", "Let me think").
filler_probability (float): Probability (0.0 to 1.0) of sending an
acknowledgment when a pause is detected. Defaults to 0.5.
"""
def __init__(self, filler_words=None, filler_probability=0.5, **kwargs):
"""Initialize the acknowledgment processor.
Args:
filler_words (list[str]): List of acknowledgment phrases to use.
Each phrase should be a short, natural acknowledgment (e.g., "Hmmm", "Let me think").
filler_probability (float, optional): Probability (0.0 to 1.0) of sending an
acknowledgment when a pause is detected. Defaults to 0.5.
**kwargs: Additional arguments passed to the parent FrameProcessor.
Raises:
ValueError: If filler_probability is not between 0.0 and 1.0
ValueError: If filler_words is empty or None
"""
super().__init__(**kwargs)
self.filler_words = filler_words
self.filler_probability = filler_probability
async def process_frame(self, frame: Frame, direction: FrameDirection):
"""Process incoming frames and generate acknowledgments when appropriate.
Args:
frame (Frame): The incoming frame to process
direction (FrameDirection): The direction the frame is traveling
Returns:
None
"""
await super().process_frame(frame, direction)
# If user is present and they've just stopped speaking, send a filler TTSSpeakFrame
if isinstance(frame, UserStoppedSpeakingFrame):
# Add a probability to skip sending a filler word
filler_probability = self.filler_probability
if random.random() < filler_probability:
filler = random.choice(self.filler_words)
logger.debug(f"User stopped speaking, sending filler word: {filler}")
filler_frame = TTSSpeakFrame(filler)
await self.push_frame(filler_frame, FrameDirection.DOWNSTREAM)
else:
filler = ""
logger.debug(f"User stopped speaking, sending filler word: {filler}")
filler_frame = TTSSpeakFrame(filler)
await self.push_frame(filler_frame, FrameDirection.DOWNSTREAM)
await self.push_frame(frame, direction)