name: voice-agent services: riva-tts-magpie: image: nvcr.io/nim/nvidia/magpie-tts-multilingual:latest environment: - NGC_API_KEY=${NVIDIA_API_KEY} - NIM_HTTP_API_PORT=9000 - NIM_GRPC_API_PORT=50051 ports: - "19000:9000" - "50151:50051" volumes: - nim_cache:/opt/nim/.cache shm_size: 8GB deploy: resources: reservations: devices: - driver: nvidia device_ids: ['0'] capabilities: [gpu] riva-asr-parakeet: image: nvcr.io/nvstaging/riva/riva-speech:main_decoder_partials.26684117-linux-x86_64 environment: - NGC_API_KEY=${NVIDIA_API_KEY} - NIM_HTTP_API_PORT=9001 - NIM_GRPC_API_PORT=50052 - NIM_TAGS_SELECTOR=mode=str,vad=silero ports: - "19001:9001" - "50152:50052" volumes: - nim_cache:/opt/nim/.cache deploy: resources: reservations: devices: - driver: nvidia device_ids: ['1'] capabilities: [gpu] nvidia-llm: image: nvcr.io/nim/meta/llama-3.1-8b-instruct:latest environment: - NGC_API_KEY=${NVIDIA_API_KEY} - NIM_HTTP_API_PORT=8000 ports: - "18000:8000" volumes: - nim_cache:/opt/nim/.cache shm_size: 16GB deploy: resources: reservations: devices: - driver: nvidia device_ids: ['2'] capabilities: [gpu] nvidia-slm: image: nvcr.io/nim/google/gemma-2-2b-instruct:1.4.0 environment: - NGC_API_KEY=${NVIDIA_API_KEY} - NIM_HTTP_API_PORT=8000 ports: - "8080:8000" volumes: - nim_cache:/opt/nim/.cache shm_size: 8GB deploy: resources: reservations: devices: - driver: nvidia device_ids: ['3'] capabilities: [gpu] python-app: build: context: ../../ dockerfile: examples/speech_planner/Dockerfile ports: - "8100:8100" environment: - NVIDIA_API_KEY=${NVIDIA_API_KEY} - RIVA_ASR_URL=riva-asr-parakeet:50052 - RIVA_TTS_URL=riva-tts-magpie:50051 - NVIDIA_LLM_URL=http://nvidia-llm:8000/v1 - NVIDIA_SLM_URL=http://nvidia-slm:8000/v1 - RIVA_ASR_MODEL=parakeet-1.1b-en-US-asr-streaming-silero-vad-asr-bls-ensemble - RIVA_TTS_MODEL=magpie_tts_ensemble-Magpie-Multilingual - NVIDIA_LLM_MODEL=meta/llama-3.1-8b-instruct - NVIDIA_SLM_MODEL=google/gemma-2-2b-instruct - RIVA_ASR_LANGUAGE=en-US - RIVA_TTS_LANGUAGE=en-US - RIVA_TTS_VOICE_ID=Magpie-Multilingual.EN-US.Sofia - ZERO_SHOT_AUDIO_PROMPT= # set this only if using a zero-shot TTS model with a custom audio prompt - STATIC_DIR=/app/static volumes: - ../static:/app/static depends_on: - riva-tts-magpie - riva-asr-parakeet - nvidia-llm - nvidia-slm volumes: nim_cache: