Spaces:

nvidia
/

voice-agent-examples

Running

App Files Files Community

voice-agent-examples / src /nvidia_pipecat /processors /acknowledgment.py

fciannella

Working with service run on 7860

53ea588 about 1 month ago

raw

history blame

3.72 kB

	# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	# SPDX-License-Identifier: BSD 2-Clause License

	"""Acknowledgment processor that provides verbal feedback during conversation pauses.

	A simple processor that adds natural conversational acknowledgments when users stops speaking if the LLM or RAG
	is taking a long time to respond, helping to create more engaging interactions.
	"""

	import random

	from loguru import logger
	from pipecat.frames.frames import Frame, TTSSpeakFrame, UserStoppedSpeakingFrame
	from pipecat.processors.frame_processor import FrameDirection, FrameProcessor


	class AcknowledgmentProcessor(FrameProcessor):
	"""Processor that sends configurable acknowledgment responses during conversation pauses.

	This processor enhances conversation flow by sending occasional acknowledgment words
	(like "Hmmm" or "Let me think") when users pause speaking. It only works when 2-phase
	End-of-Utterance (EOU) detection is disabled.

	Input Frames:
	- UserStoppedSpeakingFrame (consumed): Indicates when a user has stopped speaking

	Output Frames:
	- TTSSpeakFrame: Contains the acknowledgment text to be spoken

	Args:
	filler_words (list[str]): List of acknowledgment phrases to use.
	Each phrase should be a short, natural acknowledgment (e.g., "Hmmm", "Let me think").
	filler_probability (float): Probability (0.0 to 1.0) of sending an
	acknowledgment when a pause is detected. Defaults to 0.5.
	"""

	def __init__(self, filler_words=None, filler_probability=0.5, **kwargs):
	"""Initialize the acknowledgment processor.

	Args:
	filler_words (list[str]): List of acknowledgment phrases to use.
	Each phrase should be a short, natural acknowledgment (e.g., "Hmmm", "Let me think").
	filler_probability (float, optional): Probability (0.0 to 1.0) of sending an
	acknowledgment when a pause is detected. Defaults to 0.5.
	**kwargs: Additional arguments passed to the parent FrameProcessor.

	Raises:
	ValueError: If filler_probability is not between 0.0 and 1.0
	ValueError: If filler_words is empty or None
	"""
	super().__init__(**kwargs)
	self.filler_words = filler_words
	self.filler_probability = filler_probability

	async def process_frame(self, frame: Frame, direction: FrameDirection):
	"""Process incoming frames and generate acknowledgments when appropriate.

	Args:
	frame (Frame): The incoming frame to process
	direction (FrameDirection): The direction the frame is traveling

	Returns:
	None
	"""
	await super().process_frame(frame, direction)

	# If user is present and they've just stopped speaking, send a filler TTSSpeakFrame
	if isinstance(frame, UserStoppedSpeakingFrame):
	# Add a probability to skip sending a filler word
	filler_probability = self.filler_probability
	if random.random() < filler_probability:
	filler = random.choice(self.filler_words)
	logger.debug(f"User stopped speaking, sending filler word: {filler}")
	filler_frame = TTSSpeakFrame(filler)
	await self.push_frame(filler_frame, FrameDirection.DOWNSTREAM)
	else:
	filler = ""
	logger.debug(f"User stopped speaking, sending filler word: {filler}")
	filler_frame = TTSSpeakFrame(filler)
	await self.push_frame(filler_frame, FrameDirection.DOWNSTREAM)

	await self.push_frame(frame, direction)