# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: BSD 2-Clause License """Gesture provider processor. A frame processor that automatically manages facial expressions for the ACE avatar based on conversation events and speaking states. Helps create more natural interactions by adding contextual facial gestures during conversations. For available facial gestures, see the ACE Animgraph documentation: https://docs.nvidia.com/ace/animation-graph-microservice/latest/default-animation-graph.html """ import random from loguru import logger from pipecat.frames.frames import ( BotStartedSpeakingFrame, BotStoppedSpeakingFrame, Frame, StartInterruptionFrame, UserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from nvidia_pipecat.frames.action import StartFacialGestureBotActionFrame class FacialGestureProviderProcessor(FrameProcessor): """Manages automated facial gestures for the ACE avatar during conversations. This processor monitors conversation state changes and triggers appropriate facial expressions in response to events like the user finishing speaking or interruptions occurring. It includes configurable randomization to make gestures feel natural. Input Frames: - UserStoppedSpeakingFrame (consumed): Triggered when user finishes speaking - StartInterruptionFrame (consumed): Triggered during conversation interruptions - BotStartedSpeakingFrame (consumed): Indicates bot began speaking - BotStoppedSpeakingFrame (consumed): Indicates bot finished speaking Output Frames: - StartFacialGestureBotActionFrame: Triggers facial expressions on the avatar Args: user_stopped_speaking_gesture (str): Facial gesture to trigger when user stops speaking. See ACE Animgraph docs for available gestures. Defaults to "Taunt". start_interruption_gesture (str): Facial gesture to trigger during interruptions. See ACE Animgraph docs for available gestures. Defaults to "Pensive". probability (float): Probability (0.0 to 1.0) that a gesture will be triggered for any given event. Used to make behavior less predictable. Defaults to 0.5. **kwargs: Additional arguments passed to parent FrameProcessor. Typical usage example: >>> processor = FacialGestureProviderProcessor( ... user_stopped_speaking_gesture="Smile", ... start_interruption_gesture="Concerned", ... probability=0.75 ... ) """ def __init__( self, user_stopped_speaking_gesture="Taunt", start_interruption_gesture="Pensive", probability=0.5, **kwargs ): """Initialize the facial gesture provider. Args: user_stopped_speaking_gesture (str): Facial gesture to trigger when user stops speaking. See ACE Animgraph docs for available gestures. Defaults to "Taunt" by default. start_interruption_gesture (str): Facial gesture to trigger during interruptions. See ACE Animgraph docs for available gestures. Defaults to "Pensive" by default. probability (float): Probability (0.0 to 1.0) that a gesture will be triggered for any given event. Used to make behavior less predictable. Defaults to 0.5. **kwargs: Additional arguments passed to parent FrameProcessor. """ super().__init__(**kwargs) self.user_stopped_speaking_gesture = user_stopped_speaking_gesture self.start_interruption_gesture = start_interruption_gesture self._bot_speaking = False self.probability = probability async def process_frame(self, frame: Frame, direction: FrameDirection): """Process an incoming frame and trigger facial gestures if appropriate. Monitors conversation state changes and randomly triggers configured facial gestures based on the probability setting. Args: frame (Frame): The incoming frame to process. direction (FrameDirection): The direction the frame is traveling. Returns: None """ await super().process_frame(frame, direction) new_frame: Frame | None = None frame_direction: FrameDirection | None = None if isinstance(frame, UserStoppedSpeakingFrame): if random.random() < self.probability: logger.info("User stopped speaking gesture provider") new_frame = StartFacialGestureBotActionFrame(facial_gesture=self.user_stopped_speaking_gesture) frame_direction = FrameDirection.DOWNSTREAM elif isinstance(frame, StartInterruptionFrame): logger.info("Start interruption frame gesture provider") if self._bot_speaking and random.random() < self.probability: new_frame = StartFacialGestureBotActionFrame(facial_gesture=self.start_interruption_gesture) frame_direction = FrameDirection.DOWNSTREAM self._bot_speaking = False elif isinstance(frame, BotStartedSpeakingFrame): self._bot_speaking = True elif isinstance(frame, BotStoppedSpeakingFrame): self._bot_speaking = False # Push facial gesture frame after the incoming frame. # With this the StartInterruptionFrame will not delete it by resetting the frame queues. await self.push_frame(frame, direction) await self.push_frame(new_frame, frame_direction)