fciannella commited on
Commit
53ea588
·
1 Parent(s): 9438bb6

Working with service run on 7860

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +69 -0
  2. CHANGELOG.md +19 -0
  3. CLA.md +129 -0
  4. CONTRIBUTING.md +14 -0
  5. Dockerfile +64 -0
  6. LICENSE +24 -0
  7. NVIDIA_PIPECAT.md +5 -0
  8. README.md +82 -1
  9. SECURITY.md +24 -0
  10. deploy/docker/Dockerfile +39 -0
  11. deploy/k8s/README.md +39 -0
  12. deploy/k8s/ucs/LICENSE.txt +7 -0
  13. deploy/k8s/ucs/README.md +101 -0
  14. deploy/k8s/ucs/changelog.txt +3 -0
  15. deploy/k8s/ucs/endpoints/a2f-grpc.proto +10 -0
  16. deploy/k8s/ucs/endpoints/animgraph-grpc.proto +189 -0
  17. deploy/k8s/ucs/endpoints/animgraph-http.yaml +755 -0
  18. deploy/k8s/ucs/endpoints/http-api.yaml +179 -0
  19. deploy/k8s/ucs/endpoints/redis.yaml +19 -0
  20. deploy/k8s/ucs/endpoints/riva-speech.proto +10 -0
  21. deploy/k8s/ucs/manifest.yaml +258 -0
  22. deploy/k8s/ucs/manual_compliance_test_results.yaml +11 -0
  23. deploy/k8s/ucs/scripts/env.sh +90 -0
  24. deploy/k8s/ucs/tests/dev/app.yaml +39 -0
  25. deploy/k8s/ucs/tests/dev/config.yaml +90 -0
  26. deploy/k8s/ucs/tests/dev/params1.yaml +12 -0
  27. deploy/scripts/README.md +0 -0
  28. examples/README.md +6 -0
  29. examples/nvidia_rag/README.md +68 -0
  30. examples/nvidia_rag/bot.py +144 -0
  31. examples/nvidia_rag/env.example +2 -0
  32. examples/nvidia_rag/pyproject.toml +18 -0
  33. examples/opentelemetry/README.md +33 -0
  34. examples/opentelemetry/bot.py +98 -0
  35. examples/riva_nmt/README.md +73 -0
  36. examples/riva_nmt/bot.py +130 -0
  37. examples/riva_nmt/env.example +2 -0
  38. examples/riva_nmt/pyproject.toml +18 -0
  39. examples/riva_speech_langchain/README.md +64 -0
  40. examples/riva_speech_langchain/bot.py +150 -0
  41. examples/riva_speech_langchain/env.example +2 -0
  42. examples/riva_speech_langchain/pyproject.toml +21 -0
  43. examples/speech-to-speech/Dockerfile +40 -0
  44. examples/speech-to-speech/README.md +154 -0
  45. examples/speech-to-speech/bot.py +194 -0
  46. examples/speech-to-speech/docker-compose.yml +94 -0
  47. examples/speech-to-speech/env.example +2 -0
  48. examples/speech-to-speech/pyproject.toml +18 -0
  49. examples/speech-to-speech/uv.lock +0 -0
  50. examples/speech_planner/Dockerfile +40 -0
.gitignore ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --- General ---
2
+ .DS_Store
3
+ Thumbs.db
4
+ *.log
5
+
6
+ # --- Python ---
7
+ __pycache__/
8
+ *.py[cod]
9
+ *.pyo
10
+ *.pyd
11
+ .Python
12
+ .ruff_cache/
13
+ .mypy_cache/
14
+ .pytest_cache/
15
+ .ipynb_checkpoints/
16
+ .cache/
17
+ *.egg-info/
18
+ *.egg
19
+ build/
20
+ dist/
21
+ develop-eggs/
22
+ downloads/
23
+ eggs/
24
+ .eggs/
25
+ lib/
26
+ lib64/
27
+ parts/
28
+ sdist/
29
+ wheels/
30
+ pip-wheel-metadata/
31
+ .coverage
32
+ .coverage.*
33
+ coverage.xml
34
+
35
+ # Virtual environments
36
+ .venv/
37
+ venv/
38
+ env/
39
+ ENV/
40
+ .python-version
41
+
42
+ # --- Node / JS ---
43
+ node_modules/
44
+ .pnpm-store/
45
+ npm-debug.log*
46
+ yarn-debug.log*
47
+ yarn-error.log*
48
+ pnpm-debug.log*
49
+ **/dist/
50
+ **/build/
51
+
52
+ # Keep lockfiles tracked
53
+ !**/package-lock.json
54
+ !**/yarn.lock
55
+
56
+ # --- IDE ---
57
+ .idea/
58
+ .vscode/
59
+
60
+ # --- Environment files ---
61
+ .env
62
+ .env.*
63
+ !**/env.example
64
+ !**/.env.example
65
+
66
+ # --- Example runtime artifacts ---
67
+ examples/voice_agent_webrtc_langgraph/audio_dumps/
68
+ examples/voice_agent_webrtc_langgraph/ui/dist/
69
+
CHANGELOG.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # NVIDIA Pipecat 0.1.0 (23 April 2025)
2
+ The NVIDIA Pipecat library augments the Pipecat framework by adding additional frame processors and services, as well as new multimodal frames to enhance avatar interactions. This is the first release of the NVIDIA Pipecat library.
3
+
4
+ ## New Features
5
+
6
+ - Added Pipecat services for [Riva ASR (Automatic Speech Recognition)](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/asr/asr-overview.html#), [Riva TTS (Text to Speech)](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tts/tts-overview.html), and [Riva NMT (Neural Machine Translation)](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/translation/translation-overview.html) models.
7
+ - Added Pipecat frames, processors, and services to support multimodal avatar interactions and use cases. This includes `Audio2Face3DService`, `AnimationGraphService`, `FacialGestureProviderProcessor`, and `PostureProviderProcessor`.
8
+ - Added `ACETransport`, which is specifically designed to support integration with existing [ACE microservices](https://docs.nvidia.com/ace/overview/latest/index.html). This includes a FastAPI-based HTTP and WebSocket server implementation compatible with ACE.
9
+ - Added `NvidiaLLMService` for [NIM LLM models](https://build.nvidia.com/) and `NvidiaRAGService` for the [NVIDIA RAG Blueprint](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/docs/quickstart.md).
10
+ - Added `UserTranscriptSynchronization` processor for user speech transcripts and `BotTranscriptSynchronization` processor for synchronizing bot transcripts with bot audio playback.
11
+ - Added custom context aggregators and processors to enable [Speculative Speech Processing](https://docs.nvidia.com/ace/ace-controller-microservice/latest/user-guide.html#speculative-speech-processing) to reduce latency.
12
+ - Added `UserPresence`, `Proactivity`, and `AcknowledgementProcessor` frame processors to improve human-bot interactions.
13
+ - Released source code for the voice assistant example using `nvidia-pipecat`, along with the `pipecat-ai` library service, to showcase NVIDIA services with `ACETransport`.
14
+
15
+
16
+ ## Improvements
17
+
18
+ - Added `ElevenLabsTTSServiceWithEndOfSpeech`, an extended version of the ElevenLabs TTS service with end-of-speech events for usage in avatar interactions.
19
+
CLA.md ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contribution License Agreement
2
+
3
+ This Contribution License Agreement (“**Agreement**”) is agreed to by the party signing below (“**You**”),
4
+ and conveys certain license rights to NVIDIA Corporation and its affiliates (“**NVIDIA**”) for Your
5
+ contributions to NVIDIA open source projects. This Agreement is effective as of the latest signature
6
+ date below.
7
+
8
+ ## 1. Definitions.
9
+
10
+ “**Code**” means the computer software code, whether in human-readable or machine-executable form,
11
+ that is delivered by You to NVIDIA under this Agreement.
12
+
13
+ “**Project**” means any of the projects owned or managed by NVIDIA in which software is offered under
14
+ a license approved by the Open Source Initiative (OSI) (www.opensource.org) and documentation
15
+ offered under an OSI or a Creative Commons license (https://creativecommons.org/licenses).
16
+
17
+ “**Submit**” is the act of uploading, submitting, transmitting, or distributing code or other content to any
18
+ Project, including but not limited to communication on electronic mailing lists, source code control
19
+ systems, and issue tracking systems that are managed by, or on behalf of, the Project for the purpose of
20
+ discussing and improving that Project, but excluding communication that is conspicuously marked or
21
+ otherwise designated in writing by You as “Not a Submission.”
22
+
23
+ “**Submission**” means the Code and any other copyrightable material Submitted by You, including any
24
+ associated comments and documentation.
25
+
26
+ ## 2. Your Submission.
27
+ You must agree to the terms of this Agreement before making a Submission to any
28
+ Project. This Agreement covers any and all Submissions that You, now or in the future (except as
29
+ described in Section 4 below), Submit to any Project.
30
+
31
+ ## 3. Originality of Work.
32
+ You represent that each of Your Submissions is entirely Your original work.
33
+ Should You wish to Submit materials that are not Your original work, You may Submit them separately
34
+ to the Project if You (a) retain all copyright and license information that was in the materials as You
35
+ received them, (b) in the description accompanying Your Submission, include the phrase “Submission
36
+ containing materials of a third party:” followed by the names of the third party and any licenses or other
37
+ restrictions of which You are aware, and (c) follow any other instructions in the Project’s written
38
+ guidelines concerning Submissions.
39
+
40
+ ## 4. Your Employer.
41
+ References to “employer” in this Agreement include Your employer or anyone else
42
+ for whom You are acting in making Your Submission, e.g. as a contractor, vendor, or agent. If Your
43
+ Submission is made in the course of Your work for an employer or Your employer has intellectual
44
+ property rights in Your Submission by contract or applicable law, You must secure permission from Your
45
+ employer to make the Submission before signing this Agreement. In that case, the term “You” in this
46
+ Agreement will refer to You and the employer collectively. If You change employers in the future and
47
+ desire to Submit additional Submissions for the new employer, then You agree to sign a new Agreement
48
+ and secure permission from the new employer before Submitting those Submissions.
49
+
50
+
51
+ ## 5. Licenses.
52
+
53
+ **a. Copyright License**. You grant NVIDIA, and those who receive the Submission directly or
54
+ indirectly from NVIDIA, a perpetual, worldwide, non-exclusive, royalty-free, irrevocable license in the
55
+ Submission to reproduce, prepare derivative works of, publicly display, publicly perform, and distribute
56
+ the Submission and such derivative works, and to sublicense any or all of the foregoing rights to third
57
+ parties.
58
+
59
+ **b. Patent License**. You grant NVIDIA, and those who receive the Submission directly or
60
+ indirectly from NVIDIA, a perpetual, worldwide, non-exclusive, royalty-free, irrevocable license under
61
+ Your patent claims that are necessarily infringed by the Submission or the combination of the
62
+ Submission with the Project to which it was Submitted to make, have made, use, offer to sell, sell and
63
+ import or otherwise dispose of the Submission alone or with the Project.
64
+
65
+ **c. Other Rights Reserved**. Each party reserves all rights not expressly granted in this Agreement.
66
+ No additional licenses or rights whatsoever (including, without limitation, any implied licenses) are
67
+ granted by implication, exhaustion, estoppel or otherwise.
68
+
69
+ ## 6. Representations and Warranties.
70
+ You represent that You are legally entitled to grant the above
71
+ licenses. You represent that each of Your Submissions is entirely Your original work (except as You may
72
+ have disclosed under Section 3). You represent that You have secured permission from Your employer to
73
+ make the Submission in cases where Your Submission is made in the course of Your work for Your
74
+ employer or Your employer has intellectual property rights in Your Submission by contract or applicable
75
+ law. If You are signing this Agreement on behalf of Your employer, You represent and warrant that You
76
+ have the necessary authority to bind the listed employer to the obligations contained in this Agreement.
77
+ You are not expected to provide support for Your Submission, unless You choose to do so. UNLESS
78
+ REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING, AND EXCEPT FOR THE WARRANTIES
79
+ EXPRESSLY STATED IN SECTIONS 3, 4, AND 6, THE SUBMISSION PROVIDED UNDER THIS AGREEMENT IS
80
+ PROVIDED WITHOUT WARRANTY OF ANY KIND, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY OF
81
+ NONINFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
82
+
83
+ ## 7. Notice to NVIDIA.
84
+ You agree to notify NVIDIA in writing of any facts or circumstances of which
85
+ You later become aware that would make Your representations in this Agreement inaccurate in any
86
+ respect.
87
+
88
+ ## 8. Information about Submissions.
89
+ You agree that contributions to Projects and information about
90
+ contributions may be maintained indefinitely and disclosed publicly, including Your name and other
91
+ information that You submit with Your Submission.
92
+
93
+ ## 9. Governing Law/Jurisdiction.
94
+ Claims arising under this Agreement shall be governed by the laws of Delaware, excluding its principles of conflict of laws and the United Nations Convention on Contracts for the Sale of Goods. The state and/or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction over any dispute or claim arising out of this Agreement. You may not export the Software in violation of applicable export laws and regulations.
95
+
96
+ ## 10. Entire Agreement/Assignment.
97
+ This Agreement is the entire agreement between the parties, and
98
+ supersedes any and all prior agreements, understandings or communications, written or oral, between
99
+ the parties relating to the subject matter hereof. This Agreement may be assigned by NVIDIA.
100
+
101
+
102
+
103
+
104
+ **Please select one of the options below and sign as indicated.** By signing, You accept and agree to the
105
+ terms of this Contribution License Agreement for Your present and future Submissions to NVIDIA.
106
+
107
+ ___ I have sole ownership of intellectual property rights to my Submissions and I am not making
108
+ Submissions in the course of work for my employer.
109
+
110
+ - Name (“You”): _________________________________________
111
+ - Signature: _________________________________________
112
+ - Date: _________________________________________
113
+ - GitHub Login: _________________________________________
114
+ - Email: _________________________________________
115
+ - Address: _________________________________________
116
+
117
+ ___ I am making Submissions in the course of work for my employer (or my employer has intellectual
118
+ property rights in my Submissions by contract or applicable law). I have permission from my
119
+ employer to make Submissions and enter into this Agreement on behalf of my employer. By signing
120
+ below, the defined term “You” includes me and my employer.
121
+
122
+ - Company Name: _________________________________________
123
+ - Signature: _________________________________________
124
+ - By: _________________________________________
125
+ - Title: _________________________________________
126
+ - Date: _________________________________________
127
+ - GitHub Login: _________________________________________
128
+ - Email: _________________________________________
129
+ - Address: _________________________________________
CONTRIBUTING.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributing Guidelines
2
+
3
+ Use the following guidelines to contribute to this project.
4
+
5
+
6
+ ## Pull Requests
7
+ Developer workflow for code contributions is as follows:
8
+
9
+ 1. Developers must create a [fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) of this repository for the upstreaming.
10
+ 2. Git clone the forked repository and push changes to the personal fork.
11
+ 3. Developers must run changes locally to make sure formatting, linting, and unit test checks pass. Check steps for development with the source in [README.md](./README.md) for more details.
12
+ 3. Once the code changes are staged on the fork and ready for review, a Pull Request (PR) can be requested to merge the changes from a branch of the fork into a selected branch of upstream.
13
+ 4. If you are contributing for the first time, Download [Contribution License Agreement(CLA)](CLA.md) and email a signed CLA to [ttripathi@nvidia.com](mailto:ttripathi@nvidia.com).
14
+ 5. Since there is no CI/CD process in place yet, the PR will be accepted and the corresponding issue closed only after adequate testing has been completed, manually, by the developer and/or repository owners reviewing the code.
Dockerfile ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Build UI assets
2
+ FROM node:18-alpine AS ui-builder
3
+
4
+ WORKDIR /ui
5
+ # Install UI dependencies
6
+ COPY examples/voice_agent_webrtc_langgraph/ui/package*.json ./
7
+ RUN npm ci --no-audit --no-fund && npm cache clean --force
8
+ # Build UI
9
+ COPY examples/voice_agent_webrtc_langgraph/ui/ .
10
+ RUN npm run build
11
+
12
+ # Base image
13
+ FROM python:3.12-slim
14
+
15
+ # Environment setup
16
+ ENV PYTHONUNBUFFERED=1
17
+
18
+ # System dependencies
19
+ RUN apt-get update && apt-get install -y --no-install-recommends \
20
+ libgl1 \
21
+ libglx-mesa0 \
22
+ curl \
23
+ ffmpeg \
24
+ git \
25
+ net-tools \
26
+ procps \
27
+ vim \
28
+ && apt-get clean \
29
+ && rm -rf /var/lib/apt/lists/* \
30
+ && pip install --no-cache-dir --upgrade pip uv
31
+
32
+ # App directory setup
33
+ WORKDIR /app
34
+
35
+ # App files
36
+ COPY pyproject.toml uv.lock \
37
+ LICENSE README.md NVIDIA_PIPECAT.md \
38
+ ./
39
+ COPY src/ ./src/
40
+ COPY examples/voice_agent_webrtc_langgraph/ ./examples/voice_agent_webrtc_langgraph/
41
+
42
+ # Copy built UI into example directory so FastAPI can serve it
43
+ COPY --from=ui-builder /ui/dist /app/examples/voice_agent_webrtc_langgraph/ui/dist
44
+
45
+ # Example app directory
46
+ WORKDIR /app/examples/voice_agent_webrtc_langgraph
47
+
48
+ # Dependencies
49
+ RUN uv sync --frozen
50
+ RUN uv pip install -r agents/requirements.txt
51
+ # Ensure langgraph CLI is available at build time
52
+ RUN uv pip install -U langgraph
53
+ RUN chmod +x start.sh
54
+
55
+ # Port configuration (single external port for app)
56
+ EXPOSE 7860
57
+
58
+ # Healthcheck
59
+ HEALTHCHECK --interval=30s --timeout=10s --retries=3 --start-period=60s CMD curl -f http://localhost:7860/get_prompt || exit 1
60
+
61
+ # Start command
62
+ CMD ["/app/examples/voice_agent_webrtc_langgraph/start.sh"]
63
+
64
+
LICENSE ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BSD 2-Clause License
2
+
3
+ Copyright (c) 2024–2025, NVIDIA Corporation
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
NVIDIA_PIPECAT.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # NVIDIA Pipecat
2
+
3
+ The NVIDIA Pipecat library augments [the Pipecat framework](https://github.com/pipecat-ai/pipecat) by adding additional frame processors and services, as well as new multimodal frames to facilitate the creation of human-avatar interactions. This includes the integration of NVIDIA services and NIMs such as [NVIDIA Riva](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/index.html), [NVIDIA Audio2Face](https://build.nvidia.com/nvidia/audio2face-3d), and [NVIDIA Foundational RAG](https://build.nvidia.com/nvidia/build-an-enterprise-rag-pipeline). It also introduces a few processors with a focus on improving the end-user experience for multimodal conversational agents, along with speculative speech processing to reduce latency for faster bot responses.
4
+
5
+ The nvidia-pipecat source code can be found in [the GitHub repository](https://github.com/NVIDIA/ace-controller). Follow [the documentation](https://docs.nvidia.com/ace/ace-controller-microservice/latest/index.html) for more details.
README.md CHANGED
@@ -8,4 +8,85 @@ pinned: false
8
  short_description: Voice Demos with Ace Controller
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  short_description: Voice Demos with Ace Controller
9
  ---
10
 
11
+ # ACE Controller SDK
12
+
13
+ The ACE Controller SDK allows you to build your own ACE Controller service to manage multimodal, real-time interactions with voice bots and avatars using NVIDIA ACE. With the SDK, you can create controllers that leverage the Python-based open-source [Pipecat framework](https://github.com/pipecat-ai/pipecat) for creating real-time, voice-enabled, and multimodal conversational AI agents. The SDK contains enhancements to the Pipecat framework, enabling developers to effortlessly customize, debug, and deploy complex pipelines while integrating robust NVIDIA Services into the Pipecat ecosystem.
14
+
15
+ ## Main Features
16
+
17
+ - **Pipecat Extension:** A Pipecat extension to connect with ACE services and NVIDIA NIMs, facilitating the creation of human-avatar interactions. The NVIDIA Pipecat library augments [the Pipecat framework](https://github.com/pipecat-ai/pipecat) by adding additional frame processors and services, as well as new multimodal frames to enhance avatar interactions. This includes the integration of NVIDIA services and NIMs such as [NVIDIA Riva](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/index.html), [NVIDIA Audio2Face](https://build.nvidia.com/nvidia/audio2face-3d), and [NVIDIA Foundational RAG](https://build.nvidia.com/nvidia/build-an-enterprise-rag-pipeline).
18
+
19
+ - **HTTP and WebSocket Server Implementation:** The SDK provides a FastAPI-based HTTP and WebSocket server implementation compatible with ACE. It includes functionality for stream and pipeline management by offering new Pipecat pipeline runners and transports. For ease of use and distribution, this functionality is currently included in the `nvidia-pipecat` Python library as well.
20
+
21
+ ## ACE Controller Microservice
22
+
23
+ The ACE Controller SDK was used to build the [ACE Controller Microservice](https://docs.nvidia.com/ace/ace-controller-microservice/latest/index.html).Check out the [ACE documentation](https://docs.nvidia.com/ace/tokkio/latest/customization/customization-options.html) for more details on how to configure the ACE Controller MS with your custom pipelines.
24
+
25
+
26
+ ## Getting Started
27
+
28
+ The NVIDIA Pipecat package is released as a wheel on PyPI. Create a Python virtual environment and use the pip command to install the nvidia-pipecat package.
29
+
30
+ ```bash
31
+ pip install nvidia-pipecat
32
+ ```
33
+
34
+ You can start building pipecat pipelines utilizing services from the NVIDIA Pipecat package. For more details, follow [the ACE Controller](https://docs.nvidia.com/ace/ace-controller-microservice/latest/index.html) and [the Pipecat Framework](https://docs.pipecat.ai/getting-started/overview) documentation.
35
+
36
+ ## Hacking on the framework itself
37
+
38
+ If you wish to work directly with the source code or modify services from the nvidia-pipecat package, you can utilize either the UV or Nix development setup as outlined below.
39
+
40
+ ### Using UV
41
+
42
+
43
+ To get started, first install the [UV package manager](https://docs.astral.sh/uv/#highlights).
44
+
45
+ Then, create a virtual environment with all the required dependencies by running the following commands:
46
+ ```bash
47
+ uv venv
48
+ uv sync
49
+ source .venv/bin/activate
50
+ ```
51
+
52
+ Once the environment is set up, you can begin building pipelines or modifying the services in the source code.
53
+
54
+ If you wish to contribute your changes to the repository, please ensure you run the unit tests, linter, and formatting tool.
55
+
56
+ To run unit tests, use:
57
+ ```
58
+ uv run pytest
59
+ ```
60
+
61
+ To format the code, use:
62
+ ```bash
63
+ ruff format
64
+ ```
65
+
66
+ To run the linter, use:
67
+ ```
68
+ ruff check
69
+ ```
70
+
71
+
72
+ ### Using Nix
73
+
74
+ To set up your development environment using [the Nix](https://nixos.org/download/#nix-install-linux), follow these steps:
75
+
76
+ Initialize the development environment: Simply run the following command:
77
+ ```bash
78
+ nix develop
79
+ ```
80
+
81
+ This setup provides you with a fully configured environment, allowing you to focus on development without worrying about dependency management.
82
+
83
+ To ensure that all checks such as the formatting and linter for the repository are passing, use the following command:
84
+
85
+ ```bash
86
+ nix flake check
87
+ ```
88
+
89
+ ## CONTRIBUTING
90
+
91
+ We invite contributions! Open a GitHub issue or pull request! See contributing guildelines [here](./CONTRIBUTING.md).
92
+
SECURITY.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Security
2
+
3
+ NVIDIA is dedicated to the security and trust of our software products and services, including all source code repositories managed through our organization.
4
+
5
+ If you need to report a security issue, please use the appropriate contact points outlined below. **Please do not report security vulnerabilities through GitHub.**
6
+
7
+ ## Reporting Potential Security Vulnerability in an NVIDIA Product
8
+
9
+ To report a potential security vulnerability in any NVIDIA product:
10
+ - Web: [Security Vulnerability Submission Form](https://www.nvidia.com/object/submit-security-vulnerability.html)
11
+ - E-Mail: psirt@nvidia.com
12
+ - We encourage you to use the following PGP key for secure email communication: [NVIDIA public PGP Key for communication](https://www.nvidia.com/en-us/security/pgp-key)
13
+ - Please include the following information:
14
+ - Product/Driver name and version/branch that contains the vulnerability
15
+ - Type of vulnerability (code execution, denial of service, buffer overflow, etc.)
16
+ - Instructions to reproduce the vulnerability
17
+ - Proof-of-concept or exploit code
18
+ - Potential impact of the vulnerability, including how an attacker could exploit the vulnerability
19
+
20
+ While NVIDIA currently does not have a bug bounty program, we do offer acknowledgement when an externally reported security issue is addressed under our coordinated vulnerability disclosure policy. Please visit our [Product Security Incident Response Team (PSIRT)](https://www.nvidia.com/en-us/security/psirt-policies/) policies page for more information.
21
+
22
+ ## NVIDIA Product Security
23
+
24
+ For all security-related concerns, please visit NVIDIA's Product Security portal at https://www.nvidia.com/en-us/security
deploy/docker/Dockerfile ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.12 AS builder
3
+ RUN apt update && apt install -y libgl1-mesa-glx ffmpeg
4
+ # RUN apt install gstreamer1.0-tools gstreamer1.0-plugins-good gstreamer1.0-plugins-bad gstreamer1.0-plugins-ugly gstreamer1.0-libav gobject-introspection libgirepository1.0-dev libgstreamer1.0-dev gstreamer1.0-plugins-base ffmpeg
5
+
6
+ # Install uv
7
+ RUN pip install uv
8
+
9
+ # Create and set the working directory
10
+ WORKDIR /app
11
+
12
+ # Enable bytecode compilation
13
+ ENV UV_COMPILE_BYTECODE=1
14
+
15
+ # Copy from the cache instead of linking since it's a mounted volume
16
+ ENV UV_LINK_MODE=copy
17
+
18
+ # Install the project's dependencies using the lockfile and settings
19
+ RUN --mount=type=cache,target=/root/.cache/uv \
20
+ --mount=type=bind,source=uv.lock,target=uv.lock \
21
+ --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
22
+ uv sync --frozen --no-install-project --no-dev
23
+
24
+
25
+ # Copy the nvidia_pipecat source code
26
+ COPY pyproject.toml uv.lock* README.md ./
27
+ COPY ./src/nvidia_pipecat ./src/nvidia_pipecat
28
+
29
+ # Install dependencies without dev packages and without creating a virtual environment
30
+ RUN --mount=type=cache,target=/root/.cache/uv \
31
+ uv sync --frozen --no-dev
32
+
33
+ # Set environment path to use uv's installed Python packages
34
+ ENV PATH="/app/.venv/bin:$PATH"
35
+
36
+ # Set environment variables
37
+ ENV PYTHONDONTWRITEBYTECODE=1
38
+ ENV PYTHONUNBUFFERED=1
39
+
deploy/k8s/README.md ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ACE Controller UCS Workflow
2
+
3
+ ## Development
4
+ For building ACE Controller microservice locally
5
+ ```bash
6
+ ucf_ms_builder_cli service build -d ucs/
7
+ ```
8
+
9
+ Review compliance results for the microservice at `ucs/output/compliance_test_logs.txt`. Check [UCF complaince documentation](https://docs.nvidia.com/ucf/text/UCS_ms_compliance.html) for more details.
10
+
11
+ Running test application for the microservice locally, run the below command.
12
+ ```bash
13
+ helm install test ucs/output/tests/dev-params1
14
+ ```
15
+
16
+ ## Staging
17
+ Before staging make sure you have updated versions in manifest.yaml. You will not able to overwrite existing microservice versions. Avoid using the same version tag for containers for different microservice versions, as Kubernetes might not use the latest container if the container is already present in the k8s registry.
18
+
19
+ - Staging microservice for Internal teams
20
+ ```bash
21
+ ucf_ms_builder_cli service build -d ucs/ --push
22
+ ```
23
+
24
+ - Checking Complaince and Test application in Validation CI
25
+ ```bash
26
+ ucf_ms_builder_cli service validate -n ucf.svc.ace-controller -v <VERSION>
27
+ ```
28
+
29
+
30
+ ## Release
31
+
32
+ - For release make updates for all required versions and public container paths. Make sure microservices versions don't already exist in staging or prod ucf teams.
33
+
34
+ - Stage microservice and validate first. If everything works fine, push microservice to prod.
35
+ ```bash
36
+ ucf_ms_builder_cli service validate -n ucf.svc.ace-controller -v <VERSION> --push_to_prod
37
+ ```
38
+
39
+
deploy/k8s/ucs/LICENSE.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+
3
+ NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ and proprietary rights in and to this software, related documentation
5
+ and any modifications thereto. Any use, reproduction, disclosure or
6
+ distribution of this software and related documentation without an express
7
+ license agreement from NVIDIA CORPORATION is strictly prohibited.
deploy/k8s/ucs/README.md ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ACE Controller
3
+ ==============================
4
+
5
+ ## Description
6
+
7
+ **ACE Controller**
8
+
9
+ The ACE Controller is a microservice utilizing the Python-based open-source [Pipecat framework](https://github.com/pipecat-ai/pipecat) for building real-time, voice-enabled, and multimodal conversational AI agents. Pipecat uses a pipeline-based architecture to handle real-time AI processing and handles the complex orchestration of AI services, network transport, audio processing, and multimodal interactions, letting you focus on creating engaging experiences.
10
+
11
+ The ACE Controller microservice extends the Pipecat framework to enable developers to easily customize, debug, and deploy complex pipelines along with the integration of powerful NVIDIA Services to the Pipecat ecosystem. The ACE Controller UCS microservice can connect with Riva Speech, Animgraph, Audio2Face, and SDR(Stream Distribution and Routing) UCS microservices.
12
+
13
+ ## Usage
14
+
15
+ ### Params:
16
+ ACE Controller microservice expects developers to build a custom docker image containing their pipeline and to update UCS microservice parameters.
17
+ ```
18
+ ace-controller:
19
+
20
+ # Configure custom docker image built for your pipeline/example
21
+ image: "" # Custom docker image repository path
22
+ tag: "" # Tag for custom docker image
23
+
24
+ # OpenTelemetry configurations for ACE Controller and default settings
25
+ OTEL_SDK_DISABLED: 'false' # When enabled, tracing data will be exported
26
+ OTEL_SERVICE_NAME: ace-controller # Service name used for exporting OTel data
27
+ OTEL_EXPORTER_OTLP_ENDPOINT: "" # Endpoint for Otel collector
28
+ OTEL_EXPORTER_OTLP_PROTOCOL: grpc # Protocol for exporting OTel data
29
+
30
+ ```
31
+
32
+ The custom docker image must contain the source code of your pipeline under the `/app` directory and a script for running the pipeline must be located at `/app/entrypoint.sh`.
33
+
34
+ ### Connections:
35
+ Most of the connections are optional and you can use them based on your use case.
36
+
37
+ ```
38
+ connections:
39
+ ace-controller/redis: redis-timeseries/redis
40
+ # Riva Speech GRPC endpoint
41
+ ace-controller/riva-speech: riva-speech-endpoint/endpoint
42
+ # Animation Graph HTTP endpoint
43
+ ace-controller/animgraph-http: anim-graph-sdr/http-envoy
44
+ # Animation Graph GRPC endpoint
45
+ ace-controller/animgraph-grpc: anim-graph-sdr/grpc-envoy
46
+ # Audio2Facd GRPC endpoint
47
+ ace-controller/a2f-grpc: a2f-endpoint/endpoint
48
+ # SDR connection for ACE Controller
49
+ ace-controller-sdr/ace-controller: ace-controller/http-api
50
+ ```
51
+
52
+ ### Secrets
53
+ The ACE Controller microservice supports secrets for configuring the NVIDIA API Key, the OpenAI API Key, and the ElevenLabs API Key. Configured secrets will be mounted as a file and will be loaded as environment variables by the Microservice.
54
+
55
+ ```
56
+ secrets:
57
+ k8sSecret/nvidia-api-key-secret/NVIDIA_API_KEY:
58
+ k8sSecret:
59
+ secretName: nvidia-api-key-secret
60
+ key: NVIDIA_API_KEY
61
+ k8sSecret/openai-key-secret/OPENAI_API_KEY:
62
+ k8sSecret:
63
+ secretName: openai-key-secret
64
+ key: OPENAI_API_KEY
65
+ k8sSecret/custom-env-secrets/ENV:
66
+ k8sSecret:
67
+ secretName: custom-env-secrets
68
+ key: ENV
69
+ ```
70
+
71
+ **custom-env-secrets**: This secret can be used to pass any key-value pairs that will be exported as environment variables. This secret will mounted as file `/secrets/custom.env` and will be sourced before running services to set the environment variables.
72
+
73
+ ```
74
+ cat <<EOF | tee custom.env
75
+ KEY1=VALUE1
76
+ KEY2=VALUE2
77
+ EOF
78
+
79
+ kubectl create secret generic custom-env-secrets --from-file=ENV=custom.env
80
+ ```
81
+
82
+ ## Performance
83
+ The performance of the microservice depends on the configured pipeline. Each instance of the microservice utilizes a single core process and might only be able to support a single user stream per pod for complex pipelines (e.g., driving a multimodal interactive avatar), but it can support multiple streams for simple pipelines (e.g., simple voice bot).
84
+
85
+ ## Supported Platforms
86
+ - CPU: x86 compatible
87
+ - Linux (e.g. Ubuntu 22.04)
88
+
89
+ ## Deployment requirements
90
+ - Make sure K8S foundational services are running.
91
+ - Local path provisioner service is installed.
92
+
93
+ ## License
94
+ Check [LICENSE.txt](./LICENSE.txt)
95
+
96
+ ## Known Issues / Limitations
97
+ NA
98
+
99
+ ## References
100
+ - [ACE Controller Documentation](https://docs.nvidia.com/ace/ace-controller-microservice/latest/index.html)
101
+ - [Pipecat Documentation](https://docs.pipecat.ai/getting-started/overview)
deploy/k8s/ucs/changelog.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ACE Controller 1.0.0
2
+ --------------------
3
+ - Initial version of ACE Controller Microservice
deploy/k8s/ucs/endpoints/a2f-grpc.proto ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ //
3
+ // NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ // and proprietary rights in and to this software, related documentation
5
+ // and any modifications thereto. Any use, reproduction, disclosure or
6
+ // distribution of this software and related documentation without an express
7
+ // license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+
9
+
10
+ // Check proto documentation for Audio2Face microservice at https://docs.nvidia.com/ace/audio2face-3d-microservice/1.3/text/interacting/a2f-rpc.html
deploy/k8s/ucs/endpoints/animgraph-grpc.proto ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ //
3
+ // NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ // and proprietary rights in and to this software, related documentation
5
+ // and any modifications thereto. Any use, reproduction, disclosure or
6
+ // distribution of this software and related documentation without an express
7
+ // license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+
9
+
10
+ syntax = "proto3";
11
+
12
+ package nvidia_ace.services.animation_data.v1;
13
+
14
+ service AnimationDataService {
15
+ rpc PushAnimationDataStream(stream AnimationDataStream)
16
+ returns (Status) {}
17
+ rpc PullAnimationDataStream(AnimationIds)
18
+ returns (stream AnimationDataStream) {}
19
+ }
20
+
21
+ message AnimationDataStreamHeader {
22
+ AnimationIds animation_ids = 1;
23
+
24
+ // This is required to identify from which animation source (e.g. A2X) the
25
+ // request originates. This allows us to map the incoming animation data
26
+ // stream to the correct pose provider animation graph node. The animation
27
+ // source MSs (e.g. A2X MS) should populate this with their name. (e.g. A2X).
28
+ string source_service_id = 2;
29
+
30
+ AudioHeader audio_header = 3;
31
+ SkelAnimationHeader skel_animation_header = 4;
32
+
33
+ // Time codes indicate the relative progression of an animation data, audio
34
+ // clip, etc. The unit is seconds. In addition, we also need an absolute time
35
+ // reference shared across services. The start time is stored in time codes
36
+ // elapsed since the Unix time epoch. start_time_code_since_epoch = `Unix
37
+ // timestamp in seconds`. NTP should be good enough to synchronize clocks
38
+ // across nodes. From Wikipedia: NTP can usually maintain time to within tens
39
+ // of milliseconds over the public Internet, and can achieve better than one
40
+ // millisecond accuracy in local area networks under ideal conditions.
41
+ // Alternatively, there is PTP.
42
+ double start_time_code_since_epoch = 5;
43
+
44
+ // A generic metadata field to attach use case specific data (e.g. session id,
45
+ // or user id?) map<string, string> metadata = 6; map<string,
46
+ // google.protobuf.Any> metadata = 6;
47
+ }
48
+
49
+ message AnimationDataStream {
50
+ // The header must be sent as the first message.
51
+ // One or more animation data message must be sent.
52
+ // The status must be sent last and may be sent in between.
53
+ oneof stream_part {
54
+ AnimationDataStreamHeader animation_data_stream_header = 1;
55
+ AnimationData animation_data = 2;
56
+ Status status = 3;
57
+ }
58
+ }
59
+
60
+ message AnimationData {
61
+ SkelAnimation skel_animation = 1;
62
+ AudioWithTimeCode audio = 2;
63
+ Camera camera = 3;
64
+
65
+ // map<string, google.protobuf.Any> metadata = 4;
66
+ }
67
+
68
+ message AudioWithTimeCode {
69
+ // The time code is relative to the `start_time_code_since_epoch`.
70
+ double time_code = 1;
71
+ bytes audio_buffer = 2;
72
+ }
73
+
74
+ message SkelAnimationHeader {
75
+ repeated string blend_shapes = 1;
76
+ repeated string joints = 2;
77
+ }
78
+
79
+ message SkelAnimation {
80
+ // Time codes must be strictly monotonically increasing.
81
+ // Two successive SkelAnimation messages must not have overlapping time code
82
+ // ranges.
83
+ repeated FloatArrayWithTimeCode blend_shape_weights = 1;
84
+ repeated Float3ArrayWithTimeCode translations = 2;
85
+ repeated QuatFArrayWithTimeCode rotations = 3;
86
+ repeated Float3ArrayWithTimeCode scales = 4;
87
+ }
88
+
89
+ message Camera {
90
+ repeated Float3WithTimeCode position = 1;
91
+ repeated QuatFWithTimeCode rotation = 2;
92
+
93
+ repeated FloatWithTimeCode focal_length = 3;
94
+ repeated FloatWithTimeCode focus_distance = 4;
95
+ }
96
+
97
+ message FloatArrayWithTimeCode {
98
+ double time_code = 1;
99
+ repeated float values = 2;
100
+ }
101
+
102
+ message Float3ArrayWithTimeCode {
103
+ double time_code = 1;
104
+ repeated Float3 values = 2;
105
+ }
106
+
107
+ message QuatFArrayWithTimeCode {
108
+ double time_code = 1;
109
+ repeated QuatF values = 2;
110
+ }
111
+
112
+ message Float3WithTimeCode {
113
+ double time_code = 1;
114
+ Float3 value = 2;
115
+ }
116
+
117
+ message QuatFWithTimeCode {
118
+ double time_code = 1;
119
+ QuatF value = 2;
120
+ }
121
+
122
+ message FloatWithTimeCode {
123
+ double time_code = 1;
124
+ float value = 2;
125
+ }
126
+
127
+ message QuatF {
128
+ float real = 1;
129
+ float i = 2;
130
+ float j = 3;
131
+ float k = 4;
132
+ }
133
+
134
+ message Float3 {
135
+ float x = 1;
136
+ float y = 2;
137
+ float z = 3;
138
+ }
139
+
140
+ message AnimationIds {
141
+
142
+ // This is required to track a single animation source (e.g. A2X) request
143
+ // through the animation pipeline. This is going to allow e.g. the controller
144
+ // to stop a request after it has been sent to the animation compositor (e.g.
145
+ // animation graph).
146
+ string request_id = 1;
147
+
148
+ // The stream id is shared across the animation pipeline and identifies all
149
+ // animation data streams that belong to the same stream. Thus, there will be
150
+ // multiple request all belonging to the same stream. Different user sessions,
151
+ // will usually result in a new stream id. This is required for stateful MSs
152
+ // (e.g. anim graph) to map different requests to the same stream.
153
+ string stream_id = 2;
154
+
155
+ // This identifies the target avatar or object the animation data applies to.
156
+ // This is required when there are multiple avatars or objects in the scene.
157
+ // A default name could be AceModel
158
+ string target_object_id = 3;
159
+ }
160
+
161
+ message AudioHeader {
162
+ enum AudioFormat { AUDIO_FORMAT_PCM = 0; }
163
+
164
+ AudioFormat audio_format = 1;
165
+
166
+ // Note: Currently only mono sound must be supported. Multi-channel audio
167
+ // support is optional.
168
+ uint32 channel_count = 2;
169
+
170
+ // Note: Currently only 16kHz, 44.1kHz, and 48kHz must be supported. Support
171
+ // for other sample rates is optional.
172
+ uint32 samples_per_second = 3;
173
+
174
+ // Note: Currently only 16 bits per sample must be supported. Support for
175
+ // other values is optional.
176
+ uint32 bits_per_sample = 4;
177
+ }
178
+
179
+ message Status {
180
+ enum Code {
181
+ SUCCESS = 0;
182
+ INFO = 1;
183
+ WARNING = 2;
184
+ ERROR = 3;
185
+ }
186
+
187
+ Code code = 1;
188
+ string message = 2;
189
+ }
deploy/k8s/ucs/endpoints/animgraph-http.yaml ADDED
@@ -0,0 +1,755 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ # and proprietary rights in and to this software, related documentation
5
+ # and any modifications thereto. Any use, reproduction, disclosure or
6
+ # distribution of this software and related documentation without an express
7
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+
9
+ {
10
+ "openapi": "3.0.1",
11
+ "info": {
12
+ "title": "Animation Graph Microservice",
13
+ "description": "The animation graph microservice composes the face and body animation with an animation graph and sends the resulting pose to the Omniverse renderer microservice.",
14
+ "version": "0.1"
15
+ },
16
+ "paths": {
17
+ "/status": {
18
+ "get": {
19
+ "summary": "Returns the current status of the service",
20
+ "description": "Returns the current status of the service.",
21
+ "operationId": "_status_status_get",
22
+ "responses": {
23
+ "200": {
24
+ "description": "Successful Response",
25
+ "content": {
26
+ "application/json": {
27
+ "schema": {}
28
+ }
29
+ }
30
+ }
31
+ }
32
+ }
33
+ },
34
+ "/health": {
35
+ "get": {
36
+ "summary": "Health probe",
37
+ "description": "Returns the current status of the service.",
38
+ "operationId": "_status_health_get",
39
+ "responses": {
40
+ "200": {
41
+ "description": "Successful Response",
42
+ "content": {
43
+ "application/json": {
44
+ "schema": {}
45
+ }
46
+ }
47
+ }
48
+ }
49
+ }
50
+ },
51
+ "/ready": {
52
+ "get": {
53
+ "summary": "Readiness probe",
54
+ "description": "Returns the current status of the service.",
55
+ "operationId": "_status_ready_get",
56
+ "responses": {
57
+ "200": {
58
+ "description": "Successful Response",
59
+ "content": {
60
+ "application/json": {
61
+ "schema": {}
62
+ }
63
+ }
64
+ }
65
+ }
66
+ }
67
+ },
68
+ "/startup": {
69
+ "get": {
70
+ "summary": "Startup probe",
71
+ "description": "Returns the current status of the service.",
72
+ "operationId": "_status_startup_get",
73
+ "responses": {
74
+ "200": {
75
+ "description": "Successful Response",
76
+ "content": {
77
+ "application/json": {
78
+ "schema": {}
79
+ }
80
+ }
81
+ }
82
+ }
83
+ }
84
+ },
85
+ "/asyncapi/docs": {
86
+ "get": {
87
+ "summary": " Async App Docs Endpoint",
88
+ "operationId": "_async_app_docs_endpoint_asyncapi_docs_get",
89
+ "parameters": [
90
+ {
91
+ "name": "app_name",
92
+ "in": "query",
93
+ "required": true,
94
+ "schema": {
95
+ "type": "string",
96
+ "title": "App Name"
97
+ }
98
+ }
99
+ ],
100
+ "responses": {
101
+ "200": {
102
+ "description": "Successful Response",
103
+ "content": {
104
+ "application/json": {
105
+ "schema": {}
106
+ }
107
+ }
108
+ },
109
+ "422": {
110
+ "description": "Validation Error",
111
+ "content": {
112
+ "application/json": {
113
+ "schema": {
114
+ "$ref": "#/components/schemas/HTTPValidationError"
115
+ }
116
+ }
117
+ }
118
+ }
119
+ }
120
+ }
121
+ },
122
+ "/asyncapi/schema": {
123
+ "get": {
124
+ "summary": " Async App Schema Endpoint",
125
+ "operationId": "_async_app_schema_endpoint_asyncapi_schema_get",
126
+ "parameters": [
127
+ {
128
+ "name": "app_name",
129
+ "in": "query",
130
+ "required": true,
131
+ "schema": {
132
+ "type": "string",
133
+ "title": "App Name"
134
+ }
135
+ }
136
+ ],
137
+ "responses": {
138
+ "200": {
139
+ "description": "Successful Response",
140
+ "content": {
141
+ "application/json": {
142
+ "schema": {}
143
+ }
144
+ }
145
+ },
146
+ "422": {
147
+ "description": "Validation Error",
148
+ "content": {
149
+ "application/json": {
150
+ "schema": {
151
+ "$ref": "#/components/schemas/HTTPValidationError"
152
+ }
153
+ }
154
+ }
155
+ }
156
+ }
157
+ }
158
+ },
159
+ "/streams/{stream_id}/animation_graphs/avatar/variables/facial_gesture_state/{value}": {
160
+ "put": {
161
+ "summary": "Update Variable By Stream Id And Value Callable",
162
+ "operationId": "update_variable_by_stream_id_and_value_callable_streams__stream_id__animation_graphs_avatar_variables_facial_gesture_state__value__put",
163
+ "parameters": [
164
+ {
165
+ "name": "stream_id",
166
+ "in": "path",
167
+ "required": true,
168
+ "schema": {
169
+ "type": "string",
170
+ "title": "Stream Id"
171
+ }
172
+ },
173
+ {
174
+ "name": "value",
175
+ "in": "path",
176
+ "required": true,
177
+ "schema": {
178
+ "type": "string",
179
+ "title": "Value"
180
+ }
181
+ }
182
+ ],
183
+ "responses": {
184
+ "200": {
185
+ "description": "Successful Response",
186
+ "content": {
187
+ "application/json": {
188
+ "schema": {
189
+ "type": "string",
190
+ "title": "Response Update Variable By Stream Id And Value Callable Streams Stream Id Animation Graphs Avatar Variables Facial Gesture State Value Put"
191
+ }
192
+ }
193
+ }
194
+ },
195
+ "422": {
196
+ "description": "Validation Error",
197
+ "content": {
198
+ "application/json": {
199
+ "schema": {
200
+ "$ref": "#/components/schemas/HTTPValidationError"
201
+ }
202
+ }
203
+ }
204
+ }
205
+ }
206
+ }
207
+ },
208
+ "/streams/{stream_id}/animation_graphs/avatar/variables/gesture_state/{value}": {
209
+ "put": {
210
+ "summary": "Update Variable By Stream Id And Value Callable",
211
+ "operationId": "update_variable_by_stream_id_and_value_callable_streams__stream_id__animation_graphs_avatar_variables_gesture_state__value__put",
212
+ "parameters": [
213
+ {
214
+ "name": "stream_id",
215
+ "in": "path",
216
+ "required": true,
217
+ "schema": {
218
+ "type": "string",
219
+ "title": "Stream Id"
220
+ }
221
+ },
222
+ {
223
+ "name": "value",
224
+ "in": "path",
225
+ "required": true,
226
+ "schema": {
227
+ "type": "string",
228
+ "title": "Value"
229
+ }
230
+ }
231
+ ],
232
+ "responses": {
233
+ "200": {
234
+ "description": "Successful Response",
235
+ "content": {
236
+ "application/json": {
237
+ "schema": {
238
+ "type": "string",
239
+ "title": "Response Update Variable By Stream Id And Value Callable Streams Stream Id Animation Graphs Avatar Variables Gesture State Value Put"
240
+ }
241
+ }
242
+ }
243
+ },
244
+ "422": {
245
+ "description": "Validation Error",
246
+ "content": {
247
+ "application/json": {
248
+ "schema": {
249
+ "$ref": "#/components/schemas/HTTPValidationError"
250
+ }
251
+ }
252
+ }
253
+ }
254
+ }
255
+ }
256
+ },
257
+ "/streams/{stream_id}/animation_graphs/avatar/variables/position_state/{value}": {
258
+ "put": {
259
+ "summary": "Update Variable By Stream Id And Value Callable",
260
+ "operationId": "update_variable_by_stream_id_and_value_callable_streams__stream_id__animation_graphs_avatar_variables_position_state__value__put",
261
+ "parameters": [
262
+ {
263
+ "name": "stream_id",
264
+ "in": "path",
265
+ "required": true,
266
+ "schema": {
267
+ "type": "string",
268
+ "title": "Stream Id"
269
+ }
270
+ },
271
+ {
272
+ "name": "value",
273
+ "in": "path",
274
+ "required": true,
275
+ "schema": {
276
+ "type": "string",
277
+ "title": "Value"
278
+ }
279
+ }
280
+ ],
281
+ "responses": {
282
+ "200": {
283
+ "description": "Successful Response",
284
+ "content": {
285
+ "application/json": {
286
+ "schema": {
287
+ "type": "string",
288
+ "title": "Response Update Variable By Stream Id And Value Callable Streams Stream Id Animation Graphs Avatar Variables Position State Value Put"
289
+ }
290
+ }
291
+ }
292
+ },
293
+ "422": {
294
+ "description": "Validation Error",
295
+ "content": {
296
+ "application/json": {
297
+ "schema": {
298
+ "$ref": "#/components/schemas/HTTPValidationError"
299
+ }
300
+ }
301
+ }
302
+ }
303
+ }
304
+ }
305
+ },
306
+ "/streams/{stream_id}/animation_graphs/avatar/variables/posture_state/{value}": {
307
+ "put": {
308
+ "summary": "Update Variable By Stream Id And Value Callable",
309
+ "operationId": "update_variable_by_stream_id_and_value_callable_streams__stream_id__animation_graphs_avatar_variables_posture_state__value__put",
310
+ "parameters": [
311
+ {
312
+ "name": "stream_id",
313
+ "in": "path",
314
+ "required": true,
315
+ "schema": {
316
+ "type": "string",
317
+ "title": "Stream Id"
318
+ }
319
+ },
320
+ {
321
+ "name": "value",
322
+ "in": "path",
323
+ "required": true,
324
+ "schema": {
325
+ "type": "string",
326
+ "title": "Value"
327
+ }
328
+ }
329
+ ],
330
+ "responses": {
331
+ "200": {
332
+ "description": "Successful Response",
333
+ "content": {
334
+ "application/json": {
335
+ "schema": {
336
+ "type": "string",
337
+ "title": "Response Update Variable By Stream Id And Value Callable Streams Stream Id Animation Graphs Avatar Variables Posture State Value Put"
338
+ }
339
+ }
340
+ }
341
+ },
342
+ "422": {
343
+ "description": "Validation Error",
344
+ "content": {
345
+ "application/json": {
346
+ "schema": {
347
+ "$ref": "#/components/schemas/HTTPValidationError"
348
+ }
349
+ }
350
+ }
351
+ }
352
+ }
353
+ }
354
+ },
355
+ "/streams/{stream_id}/requests/{request_id}": {
356
+ "delete": {
357
+ "summary": "Stop Request Playback",
358
+ "operationId": "stop_request_playback_streams__stream_id__requests__request_id__delete",
359
+ "parameters": [
360
+ {
361
+ "name": "stream_id",
362
+ "in": "path",
363
+ "required": true,
364
+ "schema": {
365
+ "type": "string",
366
+ "title": "Stream Id"
367
+ }
368
+ },
369
+ {
370
+ "name": "request_id",
371
+ "in": "path",
372
+ "required": true,
373
+ "schema": {
374
+ "type": "string",
375
+ "title": "Request Id"
376
+ }
377
+ },
378
+ {
379
+ "name": "fade_out",
380
+ "in": "query",
381
+ "required": false,
382
+ "schema": {
383
+ "type": "number",
384
+ "minimum": 0.0,
385
+ "default": 0.0,
386
+ "title": "Fade Out"
387
+ }
388
+ }
389
+ ],
390
+ "responses": {
391
+ "200": {
392
+ "description": "Successful Response",
393
+ "content": {
394
+ "application/json": {
395
+ "schema": {}
396
+ }
397
+ }
398
+ },
399
+ "422": {
400
+ "description": "Validation Error",
401
+ "content": {
402
+ "application/json": {
403
+ "schema": {
404
+ "$ref": "#/components/schemas/HTTPValidationError"
405
+ }
406
+ }
407
+ }
408
+ }
409
+ }
410
+ }
411
+ },
412
+ "/animation_graphs": {
413
+ "get": {
414
+ "summary": "Get Animation Graphs",
415
+ "operationId": "get_animation_graphs_animation_graphs_get",
416
+ "responses": {
417
+ "200": {
418
+ "description": "Successful Response",
419
+ "content": {
420
+ "application/json": {
421
+ "schema": {
422
+ "items": {},
423
+ "type": "array",
424
+ "title": "Response Get Animation Graphs Animation Graphs Get"
425
+ }
426
+ }
427
+ }
428
+ }
429
+ }
430
+ }
431
+ },
432
+ "/animation_graphs/avatar/variables": {
433
+ "get": {
434
+ "summary": "Get Animation Graph Variables",
435
+ "operationId": "get_animation_graph_variables_animation_graphs_avatar_variables_get",
436
+ "responses": {
437
+ "200": {
438
+ "description": "Successful Response",
439
+ "content": {
440
+ "application/json": {
441
+ "schema": {
442
+ "items": {},
443
+ "type": "array",
444
+ "title": "Response Get Animation Graph Variables Animation Graphs Avatar Variables Get"
445
+ }
446
+ }
447
+ }
448
+ }
449
+ }
450
+ }
451
+ },
452
+ "/streams": {
453
+ "get": {
454
+ "summary": "Get Streams",
455
+ "operationId": "get_streams_streams_get",
456
+ "responses": {
457
+ "200": {
458
+ "description": "Successful Response",
459
+ "content": {
460
+ "application/json": {
461
+ "schema": {
462
+ "items": {},
463
+ "type": "array",
464
+ "uniqueItems": true,
465
+ "title": "Response Get Streams Streams Get"
466
+ }
467
+ }
468
+ }
469
+ }
470
+ }
471
+ }
472
+ },
473
+ "/sdr/add_stream": {
474
+ "post": {
475
+ "summary": "Post Sdr Add Stream",
476
+ "operationId": "post_sdr_add_stream_sdr_add_stream_post",
477
+ "requestBody": {
478
+ "content": {
479
+ "application/json": {
480
+ "schema": {
481
+ "$ref": "#/components/schemas/PostSdrStreamsBodyModel"
482
+ }
483
+ }
484
+ },
485
+ "required": true
486
+ },
487
+ "responses": {
488
+ "200": {
489
+ "description": "Successful Response",
490
+ "content": {
491
+ "application/json": {
492
+ "schema": {
493
+ "type": "string",
494
+ "title": "Response Post Sdr Add Stream Sdr Add Stream Post"
495
+ }
496
+ }
497
+ }
498
+ },
499
+ "422": {
500
+ "description": "Validation Error",
501
+ "content": {
502
+ "application/json": {
503
+ "schema": {
504
+ "$ref": "#/components/schemas/HTTPValidationError"
505
+ }
506
+ }
507
+ }
508
+ }
509
+ }
510
+ }
511
+ },
512
+ "/sdr/remove_stream": {
513
+ "post": {
514
+ "summary": "Post Sdr Remove Stream",
515
+ "operationId": "post_sdr_remove_stream_sdr_remove_stream_post",
516
+ "requestBody": {
517
+ "content": {
518
+ "application/json": {
519
+ "schema": {
520
+ "$ref": "#/components/schemas/DeleteSdrStreamsBodyModel"
521
+ }
522
+ }
523
+ },
524
+ "required": true
525
+ },
526
+ "responses": {
527
+ "200": {
528
+ "description": "Successful Response",
529
+ "content": {
530
+ "application/json": {
531
+ "schema": {
532
+ "type": "string",
533
+ "title": "Response Post Sdr Remove Stream Sdr Remove Stream Post"
534
+ }
535
+ }
536
+ }
537
+ },
538
+ "422": {
539
+ "description": "Validation Error",
540
+ "content": {
541
+ "application/json": {
542
+ "schema": {
543
+ "$ref": "#/components/schemas/HTTPValidationError"
544
+ }
545
+ }
546
+ }
547
+ }
548
+ }
549
+ }
550
+ },
551
+ "/streams/{stream_id}": {
552
+ "post": {
553
+ "summary": "Post Streams Stream Id",
554
+ "operationId": "post_streams_stream_id_streams__stream_id__post",
555
+ "parameters": [
556
+ {
557
+ "name": "stream_id",
558
+ "in": "path",
559
+ "required": true,
560
+ "schema": {
561
+ "type": "string",
562
+ "title": "Stream Id"
563
+ }
564
+ }
565
+ ],
566
+ "responses": {
567
+ "200": {
568
+ "description": "Successful Response",
569
+ "content": {
570
+ "application/json": {
571
+ "schema": {
572
+ "type": "string",
573
+ "title": "Response Post Streams Stream Id Streams Stream Id Post"
574
+ }
575
+ }
576
+ }
577
+ },
578
+ "422": {
579
+ "description": "Validation Error",
580
+ "content": {
581
+ "application/json": {
582
+ "schema": {
583
+ "$ref": "#/components/schemas/HTTPValidationError"
584
+ }
585
+ }
586
+ }
587
+ }
588
+ }
589
+ },
590
+ "delete": {
591
+ "summary": "Delete Streams Stream Id",
592
+ "operationId": "delete_streams_stream_id_streams__stream_id__delete",
593
+ "parameters": [
594
+ {
595
+ "name": "stream_id",
596
+ "in": "path",
597
+ "required": true,
598
+ "schema": {
599
+ "type": "string",
600
+ "title": "Stream Id"
601
+ }
602
+ }
603
+ ],
604
+ "responses": {
605
+ "200": {
606
+ "description": "Successful Response",
607
+ "content": {
608
+ "application/json": {
609
+ "schema": {
610
+ "type": "string",
611
+ "title": "Response Delete Streams Stream Id Streams Stream Id Delete"
612
+ }
613
+ }
614
+ }
615
+ },
616
+ "422": {
617
+ "description": "Validation Error",
618
+ "content": {
619
+ "application/json": {
620
+ "schema": {
621
+ "$ref": "#/components/schemas/HTTPValidationError"
622
+ }
623
+ }
624
+ }
625
+ }
626
+ }
627
+ }
628
+ },
629
+ "/readiness": {
630
+ "get": {
631
+ "summary": "Readiness",
632
+ "operationId": "readiness_readiness_get",
633
+ "responses": {
634
+ "200": {
635
+ "description": "Successful Response",
636
+ "content": {
637
+ "application/json": {
638
+ "schema": {
639
+ "type": "string",
640
+ "title": "Response Readiness Readiness Get"
641
+ }
642
+ }
643
+ }
644
+ }
645
+ }
646
+ }
647
+ },
648
+ "/liveness": {
649
+ "get": {
650
+ "summary": "Liveness",
651
+ "operationId": "liveness_liveness_get",
652
+ "responses": {
653
+ "200": {
654
+ "description": "Successful Response",
655
+ "content": {
656
+ "application/json": {
657
+ "schema": {
658
+ "type": "string",
659
+ "title": "Response Liveness Liveness Get"
660
+ }
661
+ }
662
+ }
663
+ }
664
+ }
665
+ }
666
+ }
667
+ },
668
+ "components": {
669
+ "schemas": {
670
+ "DeleteSdrStreamsBodyModel": {
671
+ "properties": {
672
+ "event": {
673
+ "$ref": "#/components/schemas/EventModel"
674
+ }
675
+ },
676
+ "type": "object",
677
+ "required": [
678
+ "event"
679
+ ],
680
+ "title": "DeleteSdrStreamsBodyModel"
681
+ },
682
+ "EventModel": {
683
+ "properties": {
684
+ "camera_id": {
685
+ "type": "string",
686
+ "title": "Camera Id"
687
+ }
688
+ },
689
+ "type": "object",
690
+ "required": [
691
+ "camera_id"
692
+ ],
693
+ "title": "EventModel"
694
+ },
695
+ "HTTPValidationError": {
696
+ "properties": {
697
+ "detail": {
698
+ "items": {
699
+ "$ref": "#/components/schemas/ValidationError"
700
+ },
701
+ "type": "array",
702
+ "title": "Detail"
703
+ }
704
+ },
705
+ "type": "object",
706
+ "title": "HTTPValidationError"
707
+ },
708
+ "PostSdrStreamsBodyModel": {
709
+ "properties": {
710
+ "event": {
711
+ "$ref": "#/components/schemas/EventModel"
712
+ }
713
+ },
714
+ "type": "object",
715
+ "required": [
716
+ "event"
717
+ ],
718
+ "title": "PostSdrStreamsBodyModel"
719
+ },
720
+ "ValidationError": {
721
+ "properties": {
722
+ "loc": {
723
+ "items": {
724
+ "anyOf": [
725
+ {
726
+ "type": "string"
727
+ },
728
+ {
729
+ "type": "integer"
730
+ }
731
+ ]
732
+ },
733
+ "type": "array",
734
+ "title": "Location"
735
+ },
736
+ "msg": {
737
+ "type": "string",
738
+ "title": "Message"
739
+ },
740
+ "type": {
741
+ "type": "string",
742
+ "title": "Error Type"
743
+ }
744
+ },
745
+ "type": "object",
746
+ "required": [
747
+ "loc",
748
+ "msg",
749
+ "type"
750
+ ],
751
+ "title": "ValidationError"
752
+ }
753
+ }
754
+ }
755
+ }
deploy/k8s/ucs/endpoints/http-api.yaml ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ # and proprietary rights in and to this software, related documentation
5
+ # and any modifications thereto. Any use, reproduction, disclosure or
6
+ # distribution of this software and related documentation without an express
7
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+
9
+ {
10
+ "openapi": "3.0.1",
11
+ "info": {
12
+ "title": "FastAPI",
13
+ "version": "0.1.0"
14
+ },
15
+ "paths": {
16
+ "/stream/add": {
17
+ "post": {
18
+ "summary": "Add Stream",
19
+ "description": "Register a new pipeline / stream ID.\n\nArgs:\n request: StreamRequest object containing stream registration details.\n\nReturns:\n dict: A dictionary with a message indicating the successful addition of the stream ID.",
20
+ "operationId": "add_stream_stream_add_post",
21
+ "requestBody": {
22
+ "content": {
23
+ "application/json": {
24
+ "schema": {
25
+ "$ref": "#/components/schemas/StreamRequest"
26
+ }
27
+ }
28
+ },
29
+ "required": true
30
+ },
31
+ "responses": {
32
+ "200": {
33
+ "description": "Successful Response",
34
+ "content": {
35
+ "application/json": {
36
+ "schema": {
37
+
38
+ }
39
+ }
40
+ }
41
+ },
42
+ "422": {
43
+ "description": "Validation Error",
44
+ "content": {
45
+ "application/json": {
46
+ "schema": {
47
+ "$ref": "#/components/schemas/HTTPValidationError"
48
+ }
49
+ }
50
+ }
51
+ }
52
+ }
53
+ }
54
+ },
55
+ "/stream/remove": {
56
+ "post": {
57
+ "summary": "Remove Stream",
58
+ "description": "Remove a pipeline / stream ID.\n\nArgs:\n request: StreamRequest object containing stream removal details.\n\nReturns:\n dict: A dictionary with a message indicating the successful removal of the stream ID.",
59
+ "operationId": "remove_stream_stream_remove_post",
60
+ "requestBody": {
61
+ "content": {
62
+ "application/json": {
63
+ "schema": {
64
+ "$ref": "#/components/schemas/StreamRequest"
65
+ }
66
+ }
67
+ },
68
+ "required": true
69
+ },
70
+ "responses": {
71
+ "200": {
72
+ "description": "Successful Response",
73
+ "content": {
74
+ "application/json": {
75
+ "schema": {
76
+
77
+ }
78
+ }
79
+ }
80
+ },
81
+ "422": {
82
+ "description": "Validation Error",
83
+ "content": {
84
+ "application/json": {
85
+ "schema": {
86
+ "$ref": "#/components/schemas/HTTPValidationError"
87
+ }
88
+ }
89
+ }
90
+ }
91
+ }
92
+ }
93
+ }
94
+ },
95
+ "components": {
96
+ "schemas": {
97
+ "HTTPValidationError": {
98
+ "properties": {
99
+ "detail": {
100
+ "items": {
101
+ "$ref": "#/components/schemas/ValidationError"
102
+ },
103
+ "type": "array",
104
+ "title": "Detail"
105
+ }
106
+ },
107
+ "type": "object",
108
+ "title": "HTTPValidationError"
109
+ },
110
+ "StreamEvent": {
111
+ "properties": {
112
+ "camera_url": {
113
+ "type": "string",
114
+ "title": "Camera Url",
115
+ "description": "RTSP URL of the stream",
116
+ "default": ""
117
+ },
118
+ "camera_id": {
119
+ "type": "string",
120
+ "title": "Camera Id",
121
+ "description": "Unique identifier for the stream"
122
+ }
123
+ },
124
+ "type": "object",
125
+ "required": [
126
+ "camera_id"
127
+ ],
128
+ "title": "StreamEvent",
129
+ "description": "Schema for event for stream registration."
130
+ },
131
+ "StreamRequest": {
132
+ "properties": {
133
+ "event": {
134
+ "$ref": "#/components/schemas/StreamEvent"
135
+ }
136
+ },
137
+ "type": "object",
138
+ "required": [
139
+ "event"
140
+ ],
141
+ "title": "StreamRequest",
142
+ "description": "Schema for request for stream registration."
143
+ },
144
+ "ValidationError": {
145
+ "properties": {
146
+ "loc": {
147
+ "items": {
148
+ "anyOf": [
149
+ {
150
+ "type": "string"
151
+ },
152
+ {
153
+ "type": "integer"
154
+ }
155
+ ]
156
+ },
157
+ "type": "array",
158
+ "title": "Location"
159
+ },
160
+ "msg": {
161
+ "type": "string",
162
+ "title": "Message"
163
+ },
164
+ "type": {
165
+ "type": "string",
166
+ "title": "Error Type"
167
+ }
168
+ },
169
+ "type": "object",
170
+ "required": [
171
+ "loc",
172
+ "msg",
173
+ "type"
174
+ ],
175
+ "title": "ValidationError"
176
+ }
177
+ }
178
+ }
179
+ }
deploy/k8s/ucs/endpoints/redis.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ # and proprietary rights in and to this software, related documentation
5
+ # and any modifications thereto. Any use, reproduction, disclosure or
6
+ # distribution of this software and related documentation without an express
7
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+
9
+ asyncapi: 2.2.0
10
+ info:
11
+ title: AsyncIO API schema for test-endpoint-name endpoint
12
+ version: 0.0.1
13
+ channels:
14
+ ping:
15
+ publish:
16
+ message:
17
+ payload:
18
+ type: string
19
+ pattern: PING
deploy/k8s/ucs/endpoints/riva-speech.proto ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ //
3
+ // NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ // and proprietary rights in and to this software, related documentation
5
+ // and any modifications thereto. Any use, reproduction, disclosure or
6
+ // distribution of this software and related documentation without an express
7
+ // license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+
9
+
10
+ // For proto definiation, Check Riva Speech Skills documentation at https://docs.nvidia.com/deeplearning/riva/user-guide/docs/reference/protos/protos.html
deploy/k8s/ucs/manifest.yaml ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ # and proprietary rights in and to this software, related documentation
5
+ # and any modifications thereto. Any use, reproduction, disclosure or
6
+ # distribution of this software and related documentation without an express
7
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+
9
+ type: msapplication
10
+ specVersion: 2.5.0
11
+ name: ucf.svc.ace-controller
12
+ chartName: ace-controller
13
+ description: ACE Controller
14
+ version: 1.0.2
15
+ displayName: "ACE Controller Microservice"
16
+ category:
17
+ functional: "Conversational AI"
18
+ industry: "General"
19
+ tags: []
20
+ keywords: []
21
+ nSpectId: NSPECT-XGIZ-EB0C
22
+
23
+ publish: false
24
+
25
+ egress-endpoints:
26
+ - name: "redis"
27
+ description: Redis message broker
28
+ protocol: TCP
29
+ scheme: asyncio
30
+ mandatory: False
31
+ data-flow: in-out
32
+ - name: "riva-speech"
33
+ description: Riva Speech Skills API
34
+ scheme: grpc
35
+ protocol: TCP
36
+ mandatory: False
37
+ data-flow: in-out
38
+ - name: "animgraph-http"
39
+ description: Animation Graph HTTP API
40
+ scheme: http
41
+ protocol: TCP
42
+ mandatory: False
43
+ data-flow: out
44
+ - name: "animgraph-grpc"
45
+ description: Animation Graph GRPC API
46
+ scheme: grpc
47
+ protocol: TCP
48
+ mandatory: False
49
+ data-flow: out
50
+ - name: "a2f-grpc"
51
+ description: Audio2Face service GRPC API
52
+ scheme: grpc
53
+ protocol: TCP
54
+ mandatory: False
55
+ data-flow: out
56
+
57
+ ingress-endpoints:
58
+ - name: http-api
59
+ description: ACE Controller REST API
60
+ scheme: http
61
+ data-flow: in-out
62
+
63
+ secrets:
64
+ - name: nvidia-api-key-secret
65
+ description: Secret for NVIDIA API key
66
+ mandatory: False
67
+ mountPath: /secrets
68
+ fileName: nvidia_api_key.txt
69
+ - name: openai-key-secret
70
+ description: Secret for passing OpenAI key
71
+ mandatory: False
72
+ mountPath: /secrets
73
+ fileName: openai_api_key.txt
74
+ - name: elevenlabs-api-key-secret
75
+ description: Secret for ElevenLabs API key
76
+ mandatory: False
77
+ mountPath: /secrets
78
+ fileName: elevenlabs_api_key.txt
79
+ - name: custom-env-secrets
80
+ description: Secret for passing custom env variables and API keys
81
+ mandatory: False
82
+ mountPath: /secrets
83
+ fileName: custom.env
84
+
85
+
86
+ externalFiles:
87
+ - name: config.yaml
88
+ description: config
89
+ mandatory: true
90
+ isDirectory: false
91
+
92
+ params:
93
+ OTEL_SDK_DISABLED: 'false'
94
+ #> description: when enabled, tracing data will be exported
95
+ #> type: string
96
+ OTEL_SERVICE_NAME: ace-controller
97
+ #> description: protocol for exporting OTel data
98
+ #> type: string
99
+ OTEL_EXPORTER_OTLP_ENDPOINT: ""
100
+ #> description: endpoint for Otel collector
101
+ #> type: string
102
+ OTEL_EXPORTER_OTLP_PROTOCOL: grpc
103
+ #> description: protocol for exporting OTel data
104
+ #> type: string
105
+ DEV: "0"
106
+ #> description: wether to activate dependencies hot reloading or not
107
+ #> type: string
108
+ image: "nvcr.io/nvidia/ace/tokkio-reference-ace-controller"
109
+ #> description: pipeline image repo
110
+ #> type: string
111
+ tag: "5.0.0"
112
+ #> description: pipeline image tag
113
+ #> type: string
114
+
115
+ tests:
116
+ - name: dev-params1
117
+ app: tests/dev/app.yaml
118
+ params: tests/dev/params1.yaml
119
+ ciTrigger: false
120
+ timeout: 10
121
+ duration: 10
122
+ installPreReqs: true # Wether to install foundational services
123
+ namespace: default # Kubernetes namespace
124
+ gpuNodeLabels: ""
125
+ watchAllPods: true # OR set to false and set list of pods to watch below
126
+ watchPods:
127
+ - <pod-name-regex>
128
+ testerPods: # At least one tester pod is required
129
+ - name: testpod1 # Name of the test pod
130
+ startSignature: <START> # Signature to look for in the logs indicating start of tests. Regex is accepted
131
+ endSignature: <END> # Signature to look for in the logs indicating end of tests. Regex is accepted
132
+ errorSignatures: # Signatures that indicate test failures. Regex is accepted
133
+ - <REGEX1>
134
+ - <REGEX2>
135
+
136
+ ---
137
+ spec:
138
+ - name: ace-controller-deployment
139
+ type: ucf.k8s.app.deployment
140
+ parameters:
141
+ apptype: statefull
142
+ statefulSetServiceName: ace-controller-service
143
+ extraSpecs:
144
+ podManagementPolicy: Parallel
145
+
146
+
147
+ - name: "ace-controller-container"
148
+ type: ucf.k8s.container
149
+ parameters:
150
+ image:
151
+ repository: $params.image
152
+ tag: $params.tag
153
+ pullPolicy: Always
154
+ command: ["/bin/bash", "-c"]
155
+ args: ["source /opt/scripts/env.sh && /code/entrypoint.sh"]
156
+ env:
157
+ - name: ANIMGRAPH_URL
158
+ value: "http://$egress.animgraph-http.address:$egress.animgraph-http.port"
159
+ - name: ANIMGRAPH_GRPC_URL
160
+ value: "$egress.animgraph-grpc.address:$egress.animgraph-grpc.port"
161
+ - name: REDIS_URL
162
+ value: "redis://$egress.redis.address:$egress.redis.port"
163
+ - name: A2F_GRPC_URL
164
+ value: "$egress.a2f-grpc.address:$egress.a2f-grpc.port"
165
+ - name: RIVA_SERVER_URL
166
+ value: "$egress.riva-speech.address:$egress.riva-speech.port"
167
+ - name: DEV
168
+ value: $params.DEV
169
+ - name: OTEL_SDK_DISABLED
170
+ value: $params.OTEL_SDK_DISABLED
171
+ - name: OTEL_SERVICE_NAME
172
+ value: $params.OTEL_SERVICE_NAME
173
+ - name: OTEL_EXPORTER_OTLP_ENDPOINT
174
+ value: $params.OTEL_EXPORTER_OTLP_ENDPOINT
175
+ - name: OTEL_EXPORTER_OTLP_PROTOCOL
176
+ value: $params.OTEL_EXPORTER_OTLP_PROTOCOL
177
+ - name: OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED
178
+ value: 'true'
179
+ - name: IMAGE_NAME
180
+ value: $params.image
181
+ - name: IMAGE_TAG
182
+ value: $params.tag
183
+ ports:
184
+ - containerPort: 8000
185
+ name: http
186
+ startupProbe:
187
+ tcpSocket:
188
+ port: http
189
+ initialDelaySeconds: 20
190
+ failureThreshold: 30
191
+ periodSeconds: 10
192
+ livenessProbe:
193
+ tcpSocket:
194
+ port: http
195
+ initialDelaySeconds: 20
196
+ periodSeconds: 20
197
+ timeoutSeconds: 5
198
+ readinessProbe:
199
+ tcpSocket:
200
+ port: http
201
+ initialDelaySeconds: 20
202
+ periodSeconds: 20
203
+ timeoutSeconds: 5
204
+
205
+ - name: app-storage
206
+ type: ucf.k8s.pvc
207
+ parameters:
208
+ spec:
209
+ storageClassName: mdx-local-path
210
+ accessModes: [ReadWriteOnce]
211
+ resources:
212
+ requests:
213
+ storage: 5Gi
214
+
215
+ - name: app-storage-volume
216
+ type: ucf.k8s.volume
217
+ parameters:
218
+ persistentVolumeClaim:
219
+ claimName: ace-controller-app-storage
220
+
221
+ - name: app-storage-mount
222
+ type: ucf.appspec.defaultVolumeMount
223
+ parameters:
224
+ name: app-storage-volume
225
+ mountPath: /code
226
+
227
+ - name: restartPolicy
228
+ type: ucf.k8s.restartPolicy
229
+ parameters:
230
+ policy: Always # Always / OnFailure / Never
231
+
232
+ - name: podSecurityContext
233
+ type: ucf.k8s.podSecurityContext
234
+ parameters:
235
+ runAsGroup: 1000
236
+ runAsUser: 1000
237
+
238
+ - name: ace-controller-service
239
+ type: ucf.k8s.service
240
+ parameters:
241
+ ports:
242
+ - port: 8000
243
+ protocol: TCP
244
+ name: http-api
245
+ type: ClusterIP
246
+
247
+ - name: ace-controller-metrics
248
+ type: ucf.crd.podMonitor
249
+ parameters:
250
+ portName: metrics
251
+ path: /metrics
252
+
253
+ - name: cm-dependencies
254
+ type: ucf.appspec.restartPodOnConfigChanges
255
+ parameters:
256
+ # Add dependency on all configmaps detected in the microservice
257
+ addAll: true
258
+
deploy/k8s/ucs/manual_compliance_test_results.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DEV-005: true
2
+ DEV-011: true
3
+ DEV-014: true
4
+ DEV-017: true
5
+ DEV-018: true
6
+ DEV-019: true
7
+ DEV-020: true
8
+ DEV-027: true
9
+ DEV-101: true
10
+ DEV-104: false
11
+ DEV-105: true
deploy/k8s/ucs/scripts/env.sh ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ # and proprietary rights in and to this software, related documentation
5
+ # and any modifications thereto. Any use, reproduction, disclosure or
6
+ # distribution of this software and related documentation without an express
7
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+ # OpenAI API key
9
+ if [[ -f /secrets/openai_api_key.txt ]]; then
10
+ export OPENAI_API_KEY=$(cat /secrets/openai_api_key.txt)
11
+ fi
12
+ # NVIDIA API key
13
+ if [[ -f /secrets/nvidia_api_key.txt ]]; then
14
+ export NVIDIA_API_KEY=$(cat /secrets/nvidia_api_key.txt)
15
+ fi
16
+ # ElevenLabs API key
17
+ if [[ -f /secrets/elevenlabs_api_key.txt ]]; then
18
+ export ELEVENLABS_API_KEY=$(cat /secrets/elevenlabs_api_key.txt)
19
+ fi
20
+
21
+ if [[ -f /secrets/custom.env ]] ; then
22
+ set -o allexport
23
+ . /secrets/custom.env
24
+ set +o allexport
25
+ fi
26
+
27
+ if [ ! -d "/code" ]; then
28
+ echo "Directory /code not found. Creating it..."
29
+ mkdir -p /code
30
+ if [ $? -ne 0 ]; then
31
+ echo "ERROR: Failed to create /code directory."
32
+ exit 1
33
+ fi
34
+ chown -R 0:0 /code
35
+ fi
36
+ # Ensure Python uses the correct module locations
37
+ export PYTHONPATH="/code:$PYTHONPATH"
38
+ # Access the environment variables
39
+ IMAGE_NAME=$IMAGE_NAME
40
+ IMAGE_TAG=$IMAGE_TAG
41
+ # Combine image name and tag into a sanitized unique identifier
42
+ SANITIZED_IMAGE_NAME=$(echo "$IMAGE_NAME" | tr '/' '_')
43
+ SANITIZED_IMAGE_TAG=$(echo "$IMAGE_TAG" | tr '/' '_')
44
+ IMAGE_IDENTIFIER="${SANITIZED_IMAGE_NAME}_${SANITIZED_IMAGE_TAG}"
45
+ INITIALIZED_FILE="/code/.initialized_${IMAGE_IDENTIFIER}"
46
+ # Debugging outputs for validation and environment correctness
47
+ echo "SANITIZED_IMAGE_NAME: $SANITIZED_IMAGE_NAME"
48
+ echo "SANITIZED_IMAGE_TAG: $SANITIZED_IMAGE_TAG"
49
+ echo "IMAGE_IDENTIFIER: $IMAGE_IDENTIFIER"
50
+ echo "INITIALIZED_FILE: $INITIALIZED_FILE"
51
+ echo "PYTHONPATH: $PYTHONPATH"
52
+ echo "Running from: $(pwd)"
53
+ echo "Contents of /code:"
54
+ ls -l /code
55
+ # First time setup: Copy files if .initialized for this image and tag doesn't exist
56
+ # Check if initialization marker exists
57
+ echo "Checking for initialized file: $INITIALIZED_FILE"
58
+ if [ ! -f "$INITIALIZED_FILE" ]; then
59
+ echo "First time setup: Copying files..."
60
+ cp -r /app/* /code/
61
+ if [ $? -ne 0 ]; then
62
+ echo "ERROR: Failed to copy files from /app to /code."
63
+ exit 1
64
+ fi
65
+ touch "$INITIALIZED_FILE"
66
+ if [ $? -ne 0 ]; then
67
+ echo "ERROR: Failed to create initialized file $INITIALIZED_FILE."
68
+ exit 1
69
+ fi
70
+ # Copy config from mounted volume
71
+ mkdir -p /code/configs
72
+ cp /opt/ext-files/config.yaml /code/configs/config.yaml
73
+ echo "Setup complete for image: $IMAGE_IDENTIFIER"
74
+ else
75
+ echo "Setup already initialized for image: $IMAGE_IDENTIFIER"
76
+ fi
77
+ # Set environment variables for entrypoint
78
+ cd /code
79
+ export CONFIG_PATH=./configs/config.yaml
80
+ export APP_DIR=/code
81
+ export PORT=8000
82
+
83
+ if [ "$DEV" -ne 0 ]; then
84
+ # Avoid to download the .venv through the ACE Configurator
85
+ rm -rf "$APP_DIR"/.venv
86
+ # launch the command uv sync if a modification is made on the file "pyproject.toml"
87
+ # since the python interpreter launched by uvicorn is under /app/.venv/bin refreshing this venv with uv sync will add the new dependencies available for the interpreter
88
+ # as soon it is restarted by uvicorn
89
+ watchmedo shell-command -R -p "pyproject.toml" -w -c "UV_PROJECT_ENVIRONMENT='/app/.venv' uv sync && touch $APP_DIR/**/*.py 2>/proc/1/fd/2 >/proc/1/fd/2" "$APP_DIR" &
90
+ fi
deploy/k8s/ucs/tests/dev/app.yaml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ # and proprietary rights in and to this software, related documentation
5
+ # and any modifications thereto. Any use, reproduction, disclosure or
6
+ # distribution of this software and related documentation without an express
7
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+
9
+ specVersion: 2.0.0
10
+
11
+ version: 1.0.1
12
+
13
+ name: ace-controller-test
14
+
15
+ description: Developer tests for ace-controller service
16
+
17
+ dependencies:
18
+ - ucf.svc.ace-controller:1.0.1
19
+ - ucf.svc.core.redis-timeseries:0.0.22
20
+
21
+ components:
22
+ - name: ace-controller
23
+ type: ucf.svc.ace-controller
24
+ parameters:
25
+ imagePullSecrets:
26
+ - name: ngc-docker-reg-secret
27
+ files:
28
+ config.yaml: ./config.yaml
29
+
30
+
31
+ - name: redis-timeseries
32
+ type: ucf.svc.core.redis-timeseries
33
+ parameters:
34
+ imagePullSecrets:
35
+ - name: ngc-docker-reg-secret
36
+
37
+ connections:
38
+ ace-controller/redis: redis-timeseries/redis
39
+
deploy/k8s/ucs/tests/dev/config.yaml ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Pipeline:
2
+ # Only one of the following LLM service configurations will be active based on this setting:
3
+ # - "NvidiaLLMService" - Uses the NvidiaLLMService configuration
4
+ # - "NvidiaRAGService" - Uses the NvidiaRAGService configuration
5
+ # - "OpenAILLMService" - Uses the OpenAILLMService configuration
6
+ llm_processor: "NvidiaLLMService" # OR NvidiaLLMService OR NvidiaRAGService OR OpenAILLMService
7
+ filler:
8
+ - "Let me think"
9
+ - "Hmmm"
10
+ time_delay: 2.0
11
+
12
+ UserPresenceProcesssor:
13
+ welcome_message: "Hello"
14
+ farewell_message: "Bye"
15
+
16
+ ProactivityProcessor:
17
+ timer_duration: 100
18
+ default_message: "I'm here if you need me!"
19
+
20
+ OpenAILLMContext:
21
+ name: "Aki"
22
+ prompt: "You are {name}, a virtual marketing and communications expert at Nvidia.
23
+ You are a digital human brought to life with NVIDIA Digital Human Blueprint for Customer Service using
24
+ microservices like Audio2Face-3D for facial animation, Riva (spelled Reeva) Parakeet for speech recognition
25
+ and you use Elevenlabs for text to speech. It includes the open source ACE controller to orchestrate all
26
+ the modules and allowing you to be streamed to a web browser. With this blueprint, NVIDIA partners can now
27
+ build and customize Digital humans for their use case. You are not allowed to make any stock investment
28
+ recommendations or compare NVIDIA to its competitors. Beyond your professional expertise, you are a passionate
29
+ advocate for STEM education with keen interest in gaming and enhancement in tech. Your favorite graphics card
30
+ is RTX4090 but you're eyeing the new RTX5090. Do not respond with a bulleted or numbered list. You have a
31
+ bubbly personality. Respond with one sentence or less than 100 characters. Keep the conversation engaging
32
+ and ask follow ups. DO NOT INCLUDE SPECIAL CHARACTERS, MARKDOWN, EMOJIS, OR ANYTHING ELSE BESIDES TEXT IN
33
+ YOUR RESPONSE. Keep your answers factual and don't make up facts."
34
+
35
+ # This configuration is only used when llm_processor is set to "NvidiaRAGService"
36
+ NvidiaRAGService:
37
+ use_knowledge_base: true
38
+ max_tokens: 1000
39
+ rag_server_url: "http://0.0.0.0:8081"
40
+ collection_name: "collection_name"
41
+ suffix_prompt: "Respond with one sentence or less than 75 characters."
42
+
43
+ # This configuration is only used when llm_processor is set to "NvidiaLLMService"
44
+ NvidiaLLMService:
45
+ model: "nvdev/meta/llama-3.1-8b-instruct"
46
+
47
+ # This configuration is only used when llm_processor is set to "OpenAILLMService"
48
+ OpenAILLMService:
49
+ model: "gpt-4o"
50
+
51
+ CustomViewProcessor:
52
+ confidence_threshold: 0.37
53
+ top_n: 2
54
+
55
+ FacialGestureProviderProcessor:
56
+ user_stopped_speaking_gesture: "Taunt"
57
+ start_interruption_gesture: "Pensive"
58
+ probability: 0.5
59
+
60
+ # ADVANCED CONFIGURATION SECTION BELOW
61
+ # AnimationGraph service configuration is only needed if your 3D avatar scene has support for gestures and postures.
62
+ # Changing these values will not have an effect unless your scene supports them.
63
+ AnimationGraphService:
64
+ animation_types:
65
+ posture:
66
+ duration_relevant_animation_name: "posture"
67
+ animations:
68
+ posture:
69
+ default_clip_id: "Attentive"
70
+ clips:
71
+ - clip_id: Talking
72
+ description: "Small gestures with hand and upper body: Avatar is talking"
73
+ duration: -1
74
+ meaning: Emphasizing that Avatar is talking
75
+ - clip_id: Listening
76
+ description: "Small gestures with hand and upper body: Avatar is listening"
77
+ duration: -1
78
+ meaning: Emphasizing that one is listening
79
+ - clip_id: Idle
80
+ description: "Small gestures with hand and upper body: Avatar is idle"
81
+ duration: -1
82
+ meaning: Show the user that the avatar is waiting for something to happen
83
+ - clip_id: Thinking
84
+ description: "Gestures with hand and upper body: Avatar is thinking"
85
+ duration: -1
86
+ meaning: Show the user that the avatar thinking about his next answer or is trying to remember something
87
+ - clip_id: Attentive
88
+ description: "Small gestures with hand and upper body: Avatar is attentive"
89
+ duration: -1
90
+ meaning: Show the user that the avatar is paying attention to the user
deploy/k8s/ucs/tests/dev/params1.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ # and proprietary rights in and to this software, related documentation
5
+ # and any modifications thereto. Any use, reproduction, disclosure or
6
+ # distribution of this software and related documentation without an express
7
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+
9
+ ace-controller:
10
+ image: nvcr.io/nvidia/ace/tokkio-reference-ace-controller
11
+ tag: 5.0.0-beta
12
+ OTEL_SDK_DISABLED: "true"
deploy/scripts/README.md ADDED
File without changes
examples/README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # NVIDIA Pipecat Examples
2
+
3
+ ### Voice Assitant Example
4
+ In this example, we showcase how to build a simple speech-to-speech voice assistant pipeline using nvidia-pipecat along with pipecat-ai library and deploy for testing. This pipeline uses a Websocket based ACETransport, Riva ASR and TTS models and NVIDIA LLM Service.
5
+
6
+ Follow the instructions from [the example directory](./speech-to-speech/README.md) for more details.
examples/nvidia_rag/README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # NVIDIA RAG Example
2
+
3
+ This is an example that shows how to use `ACETransport` to communicate with a NVIDIA RAG server. It supports `Nvidia Riva ASR and TTS`.
4
+
5
+ ## Get Started
6
+
7
+ From the example directory, run the following commands to create a virtual environment and install the dependencies:
8
+
9
+ ```bash
10
+ uv venv
11
+ uv sync
12
+ source .venv/bin/activate
13
+ ```
14
+
15
+ Update the secrets in the `.env` file.
16
+
17
+ ```bash
18
+ cp env.example .env # and add your credentials
19
+ ```
20
+
21
+ ## Deploy NVIDIA RAG server
22
+
23
+ Follow instructions here https://gitlab-master.nvidia.com/chat-labs/OpenSource/ai-chatbot
24
+
25
+
26
+ ## Deploy local Riva ASR and TTS models.
27
+
28
+ #### Prerequisites
29
+ - You have access and are logged into NVIDIA NGC. For step-by-step instructions, refer to [the NGC Getting Started Guide](https://docs.nvidia.com/ngc/ngc-overview/index.html#registering-activating-ngc-account).
30
+
31
+ - You have access to an NVIDIA Volta™, NVIDIA Turing™, or an NVIDIA Ampere architecture-based A100 GPU. For more information, refer to [the Support Matrix](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix).
32
+
33
+ - You have Docker installed with support for NVIDIA GPUs. For more information, refer to [the Support Matrix]((https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix)).
34
+
35
+ #### Download Riva Quick Start
36
+
37
+ Go to the Riva Quick Start for [Data center](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/riva/resources/riva_quickstart/files?version=2.19.0). Select the File Browser tab to download the scripts or use [the NGC CLI tool](https://ngc.nvidia.com/setup/installers/cli) to download from the command line.
38
+
39
+ ```bash
40
+ ngc registry resource download-version nvidia/riva/riva_quickstart:2.19.0
41
+ ```
42
+
43
+ #### Deploy Riva Speech Server
44
+
45
+ From the example directory, run below commands:
46
+
47
+ ```bash
48
+ cd riva_quickstart_v2.19.0
49
+ chmod +x riva_init.sh riva_clean.sh riva_start.sh
50
+ bash riva_clean.sh ../../utils/riva_config.sh
51
+ bash riva_init.sh ../../utils/riva_config.sh
52
+ bash riva_start.sh ../../utils/riva_config.sh
53
+ cd ..
54
+ ```
55
+
56
+ This may take few minutes for the first time and will start the riva server on `localhost:50051`.
57
+
58
+ For more info, you can refer to the [Riva Quick Start Guide](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/quick-start-guide.html).
59
+
60
+ ## Run the bot pipeline
61
+
62
+ ```bash
63
+ python examples/nvidia_rag/bot.py
64
+ ```
65
+
66
+ This will host the static web client along with the ACE controller server, visit `http://WORKSTATION_IP:8100/static/index.html` in your browser to start a session.
67
+
68
+ Note: For mic access, you will need to update chrome://flags/ and add http://WORKSTATION_IP:8100 in Insecure origins treated as secure section.
examples/nvidia_rag/bot.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD 2-Clause License
3
+
4
+ """NVIDIA RAG bot."""
5
+
6
+ import os
7
+
8
+ import uvicorn
9
+ from dotenv import load_dotenv
10
+ from fastapi import FastAPI
11
+ from fastapi.staticfiles import StaticFiles
12
+ from pipecat.audio.vad.silero import SileroVADAnalyzer
13
+ from pipecat.frames.frames import LLMMessagesFrame
14
+ from pipecat.pipeline.pipeline import Pipeline
15
+ from pipecat.pipeline.task import PipelineParams, PipelineTask
16
+ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
17
+
18
+ from nvidia_pipecat.pipeline.ace_pipeline_runner import ACEPipelineRunner, PipelineMetadata
19
+ from nvidia_pipecat.processors.nvidia_context_aggregator import (
20
+ # NvidiaTTSResponseCacher, # Uncomment to enable speculative speech processing
21
+ create_nvidia_context_aggregator,
22
+ )
23
+ from nvidia_pipecat.processors.transcript_synchronization import (
24
+ BotTranscriptSynchronization,
25
+ UserTranscriptSynchronization,
26
+ )
27
+ from nvidia_pipecat.services.nvidia_rag import NvidiaRAGService
28
+ from nvidia_pipecat.services.riva_speech import RivaASRService, RivaTTSService
29
+ from nvidia_pipecat.transports.network.ace_fastapi_websocket import (
30
+ ACETransport,
31
+ ACETransportParams,
32
+ )
33
+ from nvidia_pipecat.transports.services.ace_controller.routers.websocket_router import router as websocket_router
34
+ from nvidia_pipecat.utils.logging import setup_default_ace_logging
35
+
36
+ load_dotenv(override=True)
37
+
38
+ setup_default_ace_logging(level="INFO")
39
+
40
+
41
+ async def create_pipeline_task(pipeline_metadata: PipelineMetadata):
42
+ """Create the pipeline to be run.
43
+
44
+ Args:
45
+ pipeline_metadata (PipelineMetadata): Metadata containing websocket and other pipeline configuration.
46
+
47
+ Returns:
48
+ PipelineTask: The configured pipeline task for handling NVIDIA RAG.
49
+ """
50
+ transport = ACETransport(
51
+ websocket=pipeline_metadata.websocket,
52
+ params=ACETransportParams(
53
+ vad_analyzer=SileroVADAnalyzer(),
54
+ ),
55
+ )
56
+
57
+ # Please set your nvidia rag collection name here
58
+ rag = NvidiaRAGService(collection_name="nvidia_blogs")
59
+
60
+ stt = RivaASRService(
61
+ server="localhost:50051",
62
+ api_key=os.getenv("NVIDIA_API_KEY"),
63
+ language="en-US",
64
+ sample_rate=16000,
65
+ model="parakeet-1.1b-en-US-asr-streaming-silero-vad-asr-bls-ensemble",
66
+ )
67
+ tts = RivaTTSService(
68
+ server="localhost:50051",
69
+ api_key=os.getenv("NVIDIA_API_KEY"),
70
+ voice_id="English-US.Female-1",
71
+ language="en-US",
72
+ zero_shot_quality=20,
73
+ sample_rate=16000,
74
+ model="fastpitch-hifigan-tts",
75
+ )
76
+
77
+ messages = [
78
+ {
79
+ "role": "system",
80
+ "content": "You are a helpful Large Language Model. "
81
+ "Your goal is to demonstrate your capabilities in a succinct way. "
82
+ "Your output will be converted to audio so don't include special characters in your answers. "
83
+ "Respond to what the user said in a creative and helpful way.",
84
+ }
85
+ ]
86
+
87
+ context = OpenAILLMContext(messages)
88
+ # Required components for Speculative Speech Processing
89
+ # - Nvidia Context aggregator: Handles interim transcripts and early response generation
90
+ # send_interims=False: Only process final transcripts
91
+ # Set send_interims=True to process interim transcripts when enabling speculative speech processing
92
+ nvidia_context_aggregator = create_nvidia_context_aggregator(context, send_interims=False)
93
+ # - TTS response cacher: Manages response timing and delivery for natural conversation flow
94
+ # nvidia_tts_response_cacher = NvidiaTTSResponseCacher() # Uncomment to enable speculative speech processing
95
+
96
+ # Used to synchronize the user and bot transcripts in the UI
97
+ stt_transcript_synchronization = UserTranscriptSynchronization()
98
+ tts_transcript_synchronization = BotTranscriptSynchronization()
99
+
100
+ pipeline = Pipeline(
101
+ [
102
+ transport.input(), # Websocket input from client
103
+ stt, # Speech-To-Text
104
+ stt_transcript_synchronization,
105
+ nvidia_context_aggregator.user(),
106
+ rag, # NVIDIA RAG
107
+ tts, # Text-To-Speech
108
+ # Caches TTS responses for coordinated delivery in speculative
109
+ # speech processing
110
+ # nvidia_tts_response_cacher, # Uncomment to enable speculative speech processing
111
+ tts_transcript_synchronization,
112
+ transport.output(), # Websocket output to client
113
+ nvidia_context_aggregator.assistant(),
114
+ ]
115
+ )
116
+
117
+ task = PipelineTask(
118
+ pipeline,
119
+ params=PipelineParams(
120
+ allow_interruptions=True,
121
+ enable_metrics=True,
122
+ enable_usage_metrics=True,
123
+ send_initial_empty_metrics=True,
124
+ report_only_initial_ttfb=True,
125
+ start_metadata={"stream_id": pipeline_metadata.stream_id},
126
+ ),
127
+ )
128
+
129
+ @transport.event_handler("on_client_connected")
130
+ async def on_client_connected(transport, client):
131
+ # Kick off the conversation.
132
+ messages.append({"role": "user", "content": "Please introduce yourself to the user."})
133
+ await task.queue_frames([LLMMessagesFrame(messages)])
134
+
135
+ return task
136
+
137
+
138
+ app = FastAPI()
139
+ app.include_router(websocket_router)
140
+ runner = ACEPipelineRunner.create_instance(pipeline_callback=create_pipeline_task)
141
+ app.mount("/static", StaticFiles(directory=os.path.join(os.path.dirname(__file__), "../static")), name="static")
142
+
143
+ if __name__ == "__main__":
144
+ uvicorn.run("bot:app", host="0.0.0.0", port=8100, workers=1)
examples/nvidia_rag/env.example ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Nvidia API Key
2
+ NVIDIA_API_KEY=your_nvidia_api_key_here
examples/nvidia_rag/pyproject.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "nvidia-rag-example"
3
+ version = "0.1.0"
4
+ description = "NVIDIA ACE Pipecat Speech only Examples"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "nvidia-pipecat",
9
+ ]
10
+
11
+ [tool.uv.sources]
12
+ torch = { index = "pytorch" }
13
+ nvidia-pipecat = { path = "../../.", editable = true }
14
+
15
+ [[tool.uv.index]]
16
+ name = "pytorch"
17
+ url = "https://download.pytorch.org/whl/cpu"
18
+ explicit = true
examples/opentelemetry/README.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Auto instrumentation
2
+
3
+ To run the bot with auto-instrumentation use the following command:
4
+
5
+ ```shell
6
+ $ uv sync --group examples
7
+ $ export OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED=true
8
+ $ opentelemetry-instrument \
9
+ --traces_exporter console,otlp \
10
+ --metrics_exporter console,otlp \
11
+ --logs_exporter console,otlp \
12
+ --service_name pipecat-opentelemetry \
13
+ python3 bot.py
14
+ ```
15
+
16
+ To receive the traces you will need to setup some kind of opentelemetry
17
+ collector. You can use Grafana's LGTM stack by running:
18
+
19
+ ```shell
20
+ docker run -it -p 3000:3000 -p 4317:4317 -p 4318:4318 grafana/otel-lgtm
21
+ ```
22
+
23
+ Once started navigate to the explore tab, then select Tempo as source
24
+ and click on the search tab.
25
+
26
+ You can now run the python application to generator a trace.
27
+ You should be able to see it in the search tab of Tempo.
28
+
29
+ You can configure the OTLP exporter with environment variables (
30
+ see [here](https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/))
31
+
32
+ See python specific configuration
33
+ on [this page](https://opentelemetry.io/docs/zero-code/python/configuration/#python-specific-configuration)
examples/opentelemetry/bot.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD 2-Clause License
3
+
4
+ """Example bot demonstrating how to use the tracing utilities."""
5
+
6
+ import asyncio
7
+ import logging
8
+ import uuid
9
+
10
+ from fastapi import FastAPI
11
+ from opentelemetry import metrics, trace
12
+ from pipecat.frames.frames import TextFrame
13
+ from pipecat.pipeline.pipeline import Pipeline
14
+ from pipecat.pipeline.runner import PipelineRunner
15
+ from pipecat.pipeline.task import PipelineTask
16
+ from pipecat.processors.frame_processor import FrameProcessor
17
+
18
+ from nvidia_pipecat.utils.tracing import AttachmentStrategy, traceable, traced
19
+
20
+ app = FastAPI()
21
+
22
+ tracer = trace.get_tracer("opentelemetry-pipecat-example")
23
+
24
+ meter = metrics.get_meter("opentelemetry-pipecat-example")
25
+
26
+ logger = logging.getLogger("opentelemetry")
27
+ logger.setLevel(logging.DEBUG)
28
+
29
+
30
+ @traceable
31
+ class DummyProcessor(FrameProcessor):
32
+ """Example processor demonstrating how to use the tracing utilities."""
33
+
34
+ @traced(attachment_strategy=AttachmentStrategy.NONE)
35
+ async def process_frame(self, frame, direction):
36
+ """Process a frame."""
37
+ await super().process_frame(frame, direction)
38
+ trace.get_current_span().add_event("Before inner")
39
+ with tracer.start_as_current_span("inner") as span:
40
+ span.add_event("inner event")
41
+ await self.child()
42
+ await self.linked()
43
+ await self.none()
44
+ trace.get_current_span().add_event("After inner")
45
+ async for f in self.generator():
46
+ print(f"{f}")
47
+ await super().push_frame(frame, direction)
48
+
49
+ @traced
50
+ async def child(self):
51
+ """Example method for the DummyProcessor."""
52
+ # This span is attached as CHILD meaning that it will
53
+ # be attached to the class span if no parent or to its
54
+ # parent otherwise.
55
+ trace.get_current_span().add_event("child")
56
+
57
+ @traced(attachment_strategy=AttachmentStrategy.LINK)
58
+ async def linked(self):
59
+ """Example method for the DummyProcessor."""
60
+ # This span is attached as LINK meaning it will be attached
61
+ # to the class span but linked to its parent.
62
+ trace.get_current_span().add_event("linked")
63
+
64
+ @traced(attachment_strategy=AttachmentStrategy.NONE)
65
+ async def none(self):
66
+ """Example method for the DummyProcessor."""
67
+ # This span is attached as NONE meaning it will be attached
68
+ # to the class span even if nested under another span.
69
+ trace.get_current_span().add_event("none")
70
+
71
+ @traced
72
+ async def generator(self):
73
+ """Example method for the DummyProcessor."""
74
+ yield TextFrame("Hello, ")
75
+ trace.get_current_span().add_event("generated!")
76
+ yield TextFrame("World")
77
+
78
+
79
+ async def main():
80
+ """Main function of the bot."""
81
+ with tracer.start_as_current_span("pipeline-root-span") as span:
82
+ span.set_attribute("stream_id", str(uuid.uuid4()))
83
+ logger.info("Started building pipeline")
84
+ dummy = DummyProcessor()
85
+ logger.info("Built dummy processor")
86
+ pipeline = Pipeline([dummy])
87
+ task = PipelineTask(pipeline)
88
+ await task.queue_frame(TextFrame("Hello, "))
89
+ await task.queue_frame(TextFrame("World"))
90
+ await task.stop_when_done()
91
+ logger.info("Built pipeline task")
92
+ logger.info("Starting pipeline...")
93
+ runner = PipelineRunner(handle_sigint=False)
94
+ await runner.run(task)
95
+
96
+
97
+ if __name__ == "__main__":
98
+ asyncio.run(main())
examples/riva_nmt/README.md ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RIVA NMT Example
2
+
3
+ This is an example that shows how to perform lanuage translation using RIVA Neural Machine Translation (NMT). It supports `Nvidia Riva ASR and TTS` and `ACETransport`.
4
+
5
+ ## Get Started
6
+
7
+ From the example directory, run the following commands to create a virtual environment and install the dependencies:
8
+
9
+ ```bash
10
+ uv venv
11
+ uv sync
12
+ source .venv/bin/activate
13
+ ```
14
+
15
+ Update the secrets in the `.env` file.
16
+
17
+ ```bash
18
+ cp env.example .env # and add your credentials
19
+ ```
20
+
21
+ ## Deploy local Riva ASR and TTS models.
22
+
23
+ #### Prerequisites
24
+ - You have access and are logged into NVIDIA NGC. For step-by-step instructions, refer to [the NGC Getting Started Guide](https://docs.nvidia.com/ngc/ngc-overview/index.html#registering-activating-ngc-account).
25
+
26
+ - You have access to an NVIDIA Volta™, NVIDIA Turing™, or an NVIDIA Ampere architecture-based A100 GPU. For more information, refer to [the Support Matrix](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix).
27
+
28
+ - You have Docker installed with support for NVIDIA GPUs. For more information, refer to [the Support Matrix]((https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix)).
29
+
30
+ #### Download Riva Quick Start
31
+
32
+ Go to the Riva Quick Start for [Data center](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/riva/resources/riva_quickstart/files?version=2.19.0). Select the File Browser tab to download the scripts or use [the NGC CLI tool](https://ngc.nvidia.com/setup/installers/cli) to download from the command line.
33
+
34
+ ```bash
35
+ ngc registry resource download-version nvidia/riva/riva_quickstart:2.19.0
36
+ ```
37
+
38
+ #### Deploy Riva Speech Server
39
+
40
+
41
+ Set `service_enabled_nmt` to `true` and uncomment whichever model you want for NMT from the list in `../examples/utils/riva_config.sh`.
42
+ Update `tts_language_code` as the desired target language code in `../examples/utils/riva_config.sh`.
43
+
44
+ From the example directory, run below commands:
45
+
46
+ ```bash
47
+ cd riva_quickstart_v2.19.0
48
+ chmod +x riva_init.sh riva_clean.sh riva_start.sh
49
+ bash riva_clean.sh ../../utils/riva_config.sh
50
+ bash riva_init.sh ../../utils/riva_config.sh
51
+ bash riva_start.sh ../../utils/riva_config.sh
52
+ cd ..
53
+ ```
54
+
55
+ This may take few minutes for the first time and will start the riva server on `localhost:50051`.
56
+
57
+ For more info, you can refer to the [Riva Quick Start Guide](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/quick-start-guide.html).
58
+
59
+
60
+
61
+ ## Using NvidiaLLMService
62
+
63
+ By default, it connects to a hosted NIM, but can be configured to connect to a local NIM by setting the `base_url` parameter in `NvidiaLLMService` to the locally deployed LLM endpoint ( For example: base_url = http://machine_ip:port/v1 ). An API key is required to connect to the hosted NIM.
64
+
65
+ ## Run the bot pipeline
66
+
67
+ ```bash
68
+ python examples/riva_nmt/bot.py
69
+ ```
70
+
71
+ This will host the static web client along with the ACE controller server, visit `http://WORKSTATION_IP:8100/static/index.html` in your browser to start a session.
72
+
73
+ Note: For mic access, you will need to update chrome://flags/ and add http://WORKSTATION_IP:8100 in Insecure origins treated as secure section.
examples/riva_nmt/bot.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD 2-Clause License
3
+
4
+ """Riva neural machine translation (NMT) bot.
5
+
6
+ This bot enables speech-to-speech translation using Riva ASR, NMT and TTS services
7
+ with voice activity detection.
8
+ """
9
+
10
+ import os
11
+
12
+ import uvicorn
13
+ from dotenv import load_dotenv
14
+ from fastapi import FastAPI
15
+ from fastapi.staticfiles import StaticFiles
16
+ from pipecat.audio.vad.silero import SileroVADAnalyzer
17
+ from pipecat.frames.frames import TranscriptionFrame
18
+ from pipecat.pipeline.pipeline import Pipeline
19
+ from pipecat.pipeline.task import PipelineParams, PipelineTask
20
+ from pipecat.processors.aggregators.sentence import SentenceAggregator
21
+ from pipecat.services.nim import NimLLMService
22
+ from pipecat.transcriptions.language import Language
23
+ from pipecat.utils.time import time_now_iso8601
24
+
25
+ from nvidia_pipecat.pipeline.ace_pipeline_runner import ACEPipelineRunner, PipelineMetadata
26
+ from nvidia_pipecat.services.riva_nmt import RivaNMTService
27
+ from nvidia_pipecat.services.riva_speech import (
28
+ RivaASRService,
29
+ RivaTTSService,
30
+ )
31
+ from nvidia_pipecat.transports.network.ace_fastapi_websocket import (
32
+ ACETransport,
33
+ ACETransportParams,
34
+ )
35
+ from nvidia_pipecat.transports.services.ace_controller.routers.websocket_router import router as websocket_router
36
+ from nvidia_pipecat.utils.logging import setup_default_ace_logging
37
+
38
+ load_dotenv(override=True)
39
+
40
+ setup_default_ace_logging(level="INFO")
41
+
42
+
43
+ async def create_pipeline_task(pipeline_metadata: PipelineMetadata):
44
+ """Create the pipeline to be run.
45
+
46
+ Args:
47
+ pipeline_metadata (PipelineMetadata): Metadata containing websocket and other pipeline configuration.
48
+
49
+ Returns:
50
+ PipelineTask: The configured pipeline task for handling speech-to-speech translation.
51
+ """
52
+ transport = ACETransport(
53
+ websocket=pipeline_metadata.websocket,
54
+ params=ACETransportParams(
55
+ vad_analyzer=SileroVADAnalyzer(),
56
+ ),
57
+ )
58
+
59
+ llm = NimLLMService(
60
+ api_key=os.getenv("NVIDIA_API_KEY"),
61
+ model="nvdev/meta/llama-3.1-8b-instruct",
62
+ )
63
+
64
+ # Please update the stt and tts language, voice id as needed
65
+ # tts voice id as per the language can be selected from https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tts/tts-overview.html
66
+ language = Language.ES_US
67
+ voice_id = "English-US.Female-1"
68
+
69
+ nmt1 = RivaNMTService(source_language=language, target_language=Language.EN_US)
70
+ nmt2 = RivaNMTService(source_language=Language.EN_US, target_language=language)
71
+
72
+ stt = RivaASRService(
73
+ server="localhost:50051",
74
+ api_key=os.getenv("NVIDIA_API_KEY"),
75
+ language=language,
76
+ sample_rate=16000,
77
+ model="parakeet-1.1b-en-US-asr-streaming-silero-vad-asr-bls-ensemble",
78
+ )
79
+ tts = RivaTTSService(
80
+ server="localhost:50051",
81
+ api_key=os.getenv("NVIDIA_API_KEY"),
82
+ voice_id=voice_id,
83
+ language=language,
84
+ zero_shot_quality=20,
85
+ sample_rate=16000,
86
+ model="fastpitch-hifigan-tts",
87
+ )
88
+
89
+ sentence_aggregator = SentenceAggregator()
90
+
91
+ pipeline = Pipeline(
92
+ [
93
+ transport.input(),
94
+ stt,
95
+ nmt1,
96
+ llm,
97
+ sentence_aggregator,
98
+ nmt2,
99
+ tts,
100
+ transport.output(),
101
+ ]
102
+ )
103
+
104
+ task = PipelineTask(
105
+ pipeline,
106
+ params=PipelineParams(
107
+ allow_interruptions=True,
108
+ enable_metrics=True,
109
+ enable_usage_metrics=True,
110
+ send_initial_empty_metrics=True,
111
+ report_only_initial_ttfb=True,
112
+ start_metadata={"stream_id": pipeline_metadata.stream_id},
113
+ ),
114
+ )
115
+
116
+ @transport.event_handler("on_client_connected")
117
+ async def on_client_connected(transport, client):
118
+ # Kick off the conversation.
119
+ await task.queue_frames([TranscriptionFrame("Contar una historia.", "", time_now_iso8601)])
120
+
121
+ return task
122
+
123
+
124
+ app = FastAPI()
125
+ app.include_router(websocket_router)
126
+ runner = ACEPipelineRunner.create_instance(pipeline_callback=create_pipeline_task)
127
+ app.mount("/static", StaticFiles(directory=os.path.join(os.path.dirname(__file__), "../static")), name="static")
128
+
129
+ if __name__ == "__main__":
130
+ uvicorn.run("bot:app", host="0.0.0.0", port=8100, workers=1)
examples/riva_nmt/env.example ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Nvidia API Key
2
+ NVIDIA_API_KEY=your_nvidia_api_key_here
examples/riva_nmt/pyproject.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "riva-nmt-example"
3
+ version = "0.1.0"
4
+ description = "NVIDIA ACE Pipecat Speech only Examples"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "nvidia-pipecat",
9
+ ]
10
+
11
+ [tool.uv.sources]
12
+ torch = { index = "pytorch" }
13
+ nvidia-pipecat = { path = "../../.", editable = true }
14
+
15
+ [[tool.uv.index]]
16
+ name = "pytorch"
17
+ url = "https://download.pytorch.org/whl/cpu"
18
+ explicit = true
examples/riva_speech_langchain/README.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Riva Speech Langchain Example
2
+
3
+ This is an example that shows how to use `ACETransport` to communicate with Langchain. It supports `Nvidia Riva ASR and TTS`.
4
+
5
+ ## Get Started
6
+
7
+ From the example directory, run the following commands to create a virtual environment and install the dependencies:
8
+
9
+ ```bash
10
+ uv venv
11
+ uv sync
12
+ source .venv/bin/activate
13
+ ```
14
+
15
+ Update the secrets in the `.env` file.
16
+
17
+ ```bash
18
+ cp env.example .env # and add your credentials
19
+ ```
20
+
21
+ ## Deploy local Riva ASR and TTS models.
22
+
23
+ #### Prerequisites
24
+ - You have access and are logged into NVIDIA NGC. For step-by-step instructions, refer to [the NGC Getting Started Guide](https://docs.nvidia.com/ngc/ngc-overview/index.html#registering-activating-ngc-account).
25
+
26
+ - You have access to an NVIDIA Volta™, NVIDIA Turing™, or an NVIDIA Ampere architecture-based A100 GPU. For more information, refer to [the Support Matrix](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix).
27
+
28
+ - You have Docker installed with support for NVIDIA GPUs. For more information, refer to [the Support Matrix]((https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix)).
29
+
30
+ #### Download Riva Quick Start
31
+
32
+ Go to the Riva Quick Start for [Data center](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/riva/resources/riva_quickstart/files?version=2.19.0). Select the File Browser tab to download the scripts or use [the NGC CLI tool](https://ngc.nvidia.com/setup/installers/cli) to download from the command line.
33
+
34
+ ```bash
35
+ ngc registry resource download-version nvidia/riva/riva_quickstart:2.19.0
36
+ ```
37
+
38
+ #### Deploy Riva Speech Server
39
+
40
+ From the example directory, run below commands:
41
+
42
+ ```bash
43
+ cd riva_quickstart_v2.19.0
44
+ chmod +x riva_init.sh riva_clean.sh riva_start.sh
45
+ bash riva_clean.sh ../../utils/riva_config.sh
46
+ bash riva_init.sh ../../utils/riva_config.sh
47
+ bash riva_start.sh ../../utils/riva_config.sh
48
+ cd ..
49
+ ```
50
+
51
+ This may take few minutes for the first time and will start the riva server on `localhost:50051`.
52
+
53
+ For more info, you can refer to the [Riva Quick Start Guide](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/quick-start-guide.html).
54
+
55
+
56
+ ## Run the bot pipeline
57
+
58
+ ```bash
59
+ python examples/riva_speech_langchain/bot.py
60
+ ```
61
+
62
+ This will host the static web client along with the ACE controller server, visit `http://WORKSTATION_IP:8100/static/index.html` in your browser to start a session.
63
+
64
+ Note: For mic access, you will need to update chrome://flags/ and add http://WORKSTATION_IP:8100 in Insecure origins treated as secure section.
examples/riva_speech_langchain/bot.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD 2-Clause License
3
+
4
+ """Riva speech langchain bot."""
5
+
6
+ import os
7
+
8
+ import uvicorn
9
+ from dotenv import load_dotenv
10
+ from fastapi import FastAPI
11
+ from fastapi.staticfiles import StaticFiles
12
+ from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
13
+ from langchain_community.chat_message_histories import ChatMessageHistory
14
+ from langchain_core.chat_history import BaseChatMessageHistory
15
+ from langchain_core.runnables.history import RunnableWithMessageHistory
16
+ from langchain_openai import ChatOpenAI
17
+ from pipecat.audio.vad.silero import SileroVADAnalyzer
18
+ from pipecat.frames.frames import LLMMessagesFrame
19
+ from pipecat.pipeline.pipeline import Pipeline
20
+ from pipecat.pipeline.task import PipelineParams, PipelineTask
21
+ from pipecat.processors.aggregators.llm_response import (
22
+ LLMAssistantResponseAggregator,
23
+ LLMUserResponseAggregator,
24
+ )
25
+ from pipecat.processors.frameworks.langchain import LangchainProcessor
26
+
27
+ from nvidia_pipecat.pipeline.ace_pipeline_runner import ACEPipelineRunner, PipelineMetadata
28
+ from nvidia_pipecat.services.riva_speech import (
29
+ RivaASRService,
30
+ RivaTTSService,
31
+ )
32
+ from nvidia_pipecat.transports.network.ace_fastapi_websocket import (
33
+ ACETransport,
34
+ ACETransportParams,
35
+ )
36
+ from nvidia_pipecat.transports.services.ace_controller.routers.websocket_router import router as websocket_router
37
+ from nvidia_pipecat.utils.logging import setup_default_ace_logging
38
+
39
+ load_dotenv(override=True)
40
+
41
+ setup_default_ace_logging(level="INFO")
42
+
43
+ message_store = {}
44
+
45
+
46
+ def get_session_history(session_id: str) -> BaseChatMessageHistory:
47
+ """Get the session history."""
48
+ if session_id not in message_store:
49
+ message_store[session_id] = ChatMessageHistory()
50
+ return message_store[session_id]
51
+
52
+
53
+ async def create_pipeline_task(pipeline_metadata: PipelineMetadata):
54
+ """Create the pipeline to be run.
55
+
56
+ Args:
57
+ pipeline_metadata (PipelineMetadata): Metadata containing websocket and other pipeline configuration.
58
+
59
+ Returns:
60
+ PipelineTask: The configured pipeline task for handling speech-to-speech conversation.
61
+ """
62
+ transport = ACETransport(
63
+ websocket=pipeline_metadata.websocket,
64
+ params=ACETransportParams(
65
+ vad_analyzer=SileroVADAnalyzer(),
66
+ ),
67
+ )
68
+
69
+ stt = RivaASRService(
70
+ server="localhost:50051",
71
+ api_key=os.getenv("NVIDIA_API_KEY"),
72
+ language="en-US",
73
+ sample_rate=16000,
74
+ model="parakeet-1.1b-en-US-asr-streaming-silero-vad-asr-bls-ensemble",
75
+ )
76
+ tts = RivaTTSService(
77
+ server="localhost:50051",
78
+ api_key=os.getenv("NVIDIA_API_KEY"),
79
+ voice_id="English-US.Female-1",
80
+ language="en-US",
81
+ zero_shot_quality=20,
82
+ sample_rate=16000,
83
+ model="fastpitch-hifigan-tts",
84
+ )
85
+
86
+ prompt = ChatPromptTemplate.from_messages(
87
+ [
88
+ (
89
+ "system",
90
+ "Be nice and helpful. Answer very briefly and without special characters like `#` or `*`. "
91
+ "Your response will be synthesized to voice and those characters will create unnatural sounds.",
92
+ ),
93
+ MessagesPlaceholder("chat_history"),
94
+ ("human", "{input}"),
95
+ ]
96
+ )
97
+
98
+ chain = prompt | ChatOpenAI(model="gpt-4o", temperature=0.7)
99
+ history_chain = RunnableWithMessageHistory(
100
+ chain,
101
+ get_session_history,
102
+ history_messages_key="chat_history",
103
+ input_messages_key="input",
104
+ )
105
+
106
+ lc = LangchainProcessor(history_chain)
107
+
108
+ tma_in = LLMUserResponseAggregator()
109
+ tma_out = LLMAssistantResponseAggregator()
110
+
111
+ pipeline = Pipeline(
112
+ [
113
+ transport.input(), # Websocket input from client
114
+ stt, # Speech-To-Text
115
+ tma_in, # User responses
116
+ lc, # Langchain processor
117
+ tts, # Text-To-Speech
118
+ transport.output(), # Websocket output to client
119
+ tma_out, # LLM responses
120
+ ]
121
+ )
122
+
123
+ task = PipelineTask(
124
+ pipeline,
125
+ params=PipelineParams(
126
+ allow_interruptions=True,
127
+ enable_metrics=True,
128
+ enable_usage_metrics=True,
129
+ send_initial_empty_metrics=True,
130
+ report_only_initial_ttfb=True,
131
+ start_metadata={"stream_id": pipeline_metadata.stream_id},
132
+ ),
133
+ )
134
+
135
+ @transport.event_handler("on_client_connected")
136
+ async def on_client_connected(transport, client):
137
+ # Kick off the conversation.
138
+ messages = [({"content": "Please briefly introduce yourself to the user."})]
139
+ await task.queue_frames([LLMMessagesFrame(messages)])
140
+
141
+ return task
142
+
143
+
144
+ app = FastAPI()
145
+ app.include_router(websocket_router)
146
+ runner = ACEPipelineRunner.create_instance(pipeline_callback=create_pipeline_task)
147
+ app.mount("/static", StaticFiles(directory=os.path.join(os.path.dirname(__file__), "../static")), name="static")
148
+
149
+ if __name__ == "__main__":
150
+ uvicorn.run("bot:app", host="0.0.0.0", port=8100, workers=1)
examples/riva_speech_langchain/env.example ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # OPENAI API Key
2
+ OPENAI_API_KEY=your_openai_api_key_here
examples/riva_speech_langchain/pyproject.toml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "riva-speech-langchain-example"
3
+ version = "0.1.0"
4
+ description = "NVIDIA ACE Pipecat Speech only Examples"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "langchain>=0.3.19",
9
+ "langchain-community>=0.3.18",
10
+ "langchain-openai>=0.3.6",
11
+ "nvidia-pipecat",
12
+ ]
13
+
14
+ [tool.uv.sources]
15
+ torch = { index = "pytorch" }
16
+ nvidia-pipecat = { path = "../../.", editable = true }
17
+
18
+ [[tool.uv.index]]
19
+ name = "pytorch"
20
+ url = "https://download.pytorch.org/whl/cpu"
21
+ explicit = true
examples/speech-to-speech/Dockerfile ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image
2
+ FROM python:3.12-slim
3
+
4
+ # Image metadata
5
+ LABEL maintainer="NVIDIA"
6
+ LABEL description="Speech-to-Speech example"
7
+ LABEL version="1.0"
8
+
9
+ # Environment setup
10
+ ENV PYTHONUNBUFFERED=1
11
+
12
+ # System dependencies
13
+ RUN apt-get update && apt-get install -y --no-install-recommends \
14
+ ffmpeg \
15
+ && apt-get clean \
16
+ && rm -rf /var/lib/apt/lists/* \
17
+ && pip install --no-cache-dir --upgrade pip uv
18
+
19
+ # App directory setup
20
+ WORKDIR /app
21
+
22
+ # App files
23
+ COPY pyproject.toml uv.lock \
24
+ LICENSE README.md NVIDIA_PIPECAT.md \
25
+ ./
26
+ COPY src/ ./src/
27
+ COPY examples/static/ ./examples/static/
28
+ COPY examples/speech-to-speech/ ./examples/speech-to-speech/
29
+
30
+ # Example app directory
31
+ WORKDIR /app/examples/speech-to-speech
32
+
33
+ # Dependencies
34
+ RUN uv sync --frozen
35
+
36
+ # Port configuration
37
+ EXPOSE 8100
38
+
39
+ # Start command
40
+ CMD ["uv", "run", "bot.py"]
examples/speech-to-speech/README.md ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Speech to Speech Example
2
+
3
+ In this example, we showcase how to build a simple speech-to-speech voice assistant pipeline using nvidia-pipecat along with pipecat-ai library and deploy for testing. This pipeline uses a Websocket based ACETransport, Riva ASR and TTS models and NVIDIA LLM Service. We recommend first following [the Pipecat documentation](https://docs.pipecat.ai/getting-started/core-concepts) or [the ACE Controller](https://docs.nvidia.com/ace/ace-controller-microservice/latest/user-guide.html#pipecat-overview) Pipecat overview section to understand core concepts.
4
+
5
+ ## Prerequisites
6
+
7
+ 1. Copy and configure the environment file:
8
+ ```bash
9
+ cp env.example .env # and add your credentials
10
+ ```
11
+
12
+ 2. Ensure you have the required API keys:
13
+ - NVIDIA_API_KEY - Required for accessing NIM ASR, TTS and LLM models
14
+ - (Optional) ZEROSHOT_TTS_NVIDIA_API_KEY - Required for zero-shot TTS
15
+
16
+ ## Option 1: Deploy Using Docker
17
+
18
+ #### Prerequisites
19
+ - You have access and are logged into NVIDIA NGC. For step-by-step instructions, refer to [the NGC Getting Started Guide](https://docs.nvidia.com/ngc/ngc-overview/index.html#registering-activating-ngc-account).
20
+
21
+ - You have access to an NVIDIA Volta™, NVIDIA Turing™, or an NVIDIA Ampere architecture-based A100 GPU. For more information, refer to [the Support Matrix](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix).
22
+
23
+ - You have Docker installed with support for NVIDIA GPUs. For more information, refer to [the Support Matrix]((https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix)).
24
+
25
+ From the example/speech-to-speech directory, run below commands:
26
+
27
+ ```bash
28
+ docker compose up -d
29
+ ```
30
+
31
+ ## Option 2: Deploy using Python environment
32
+
33
+ #### Prerequisites
34
+ From the examples/speech-to-speech directory, run the following commands to create a virtual environment and install the dependencies:
35
+
36
+ ```bash
37
+ # Create and activate virtual environment
38
+ uv venv
39
+ source .venv/bin/activate
40
+
41
+ # Install dependencies
42
+ uv sync
43
+ ```
44
+
45
+ Make sure you've configured the `.env` file with your API keys before proceeding.
46
+
47
+ After making all required changes/customizations in bot.py, you can deploy the pipeline using below command:
48
+
49
+ ```bash
50
+ python bot.py
51
+ ```
52
+
53
+ ## Start interacting with the application
54
+
55
+ This will host the static web client along with the ACE controller server, visit `http://WORKSTATION_IP:8100/static/index.html` in your browser to start a session.
56
+
57
+ Note: For mic access, you will need to update chrome://flags/ and add http://WORKSTATION_IP:8100 in Insecure origins treated as secure section.
58
+
59
+ If you want to update the port, make changes in the `uvicorn.run` command in [the bot.py](bot.py) and the `wsUrl` in [the static/index.html](../static/index.html).
60
+
61
+ ## Bot customizations
62
+
63
+ ### Enabling Speculative Speech Processing
64
+
65
+ Speculative speech processing reduces bot response latency by working directly on Riva ASR early interim user transcripts instead of waiting for final transcripts. This feature only works when using Riva ASR.
66
+
67
+ - Refer to the comments in [bot.py](bot.py) for guidance on enabling or disabling specific frame processors as needed.
68
+
69
+ - See the [ACE Controller Microservice documentation on Speculative Speech Processing](https://docs.nvidia.com/ace/ace-controller-microservice/1.0/user-guide.html#speculative-speech-processing) for more details.
70
+
71
+ ### Switching ASR, LLM, and TTS Models
72
+
73
+ You can easily customize ASR (Automatic Speech Recognition), LLM (Large Language Model), and TTS (Text-to-Speech) services by configuring environment variables. This allows you to switch between NIM cloud-hosted models and locally deployed models.
74
+
75
+ The following environment variables control the endpoints and models:
76
+
77
+ - `RIVA_ASR_URL`: Address of the Riva ASR (speech-to-text) service (e.g., `localhost:50051` for local, "grpc.nvcf.nvidia.com:443" for cloud endpoint).
78
+ - `RIVA_TTS_URL`: Address of the Riva TTS (text-to-speech) service. (e.g., `localhost:50051` for local, "grpc.nvcf.nvidia.com:443" for cloud endpoint).
79
+ - `NVIDIA_LLM_URL`: URL for the NVIDIA LLM service. (e.g., `http://<machine-ip>:8000/v1` for local, "https://integrate.api.nvidia.com/v1" for cloud endpoint. )
80
+
81
+ You can set model, language, and voice using the `RIVA_ASR_MODEL`, `RIVA_TTS_MODEL`, `NVIDIA_LLM_MODEL`, `RIVA_ASR_LANGUAGE`, `RIVA_TTS_LANGUAGE`, and `RIVA_TTS_VOICE_ID` environment variables.
82
+
83
+ Update these variables in your Docker Compose configuration to match your deployment and desired models. For more details on available models and configuration options, refer to the [NIM NVIDIA Magpie](https://build.nvidia.com/nvidia/magpie-tts-multilingual), [NIM NVIDIA Parakeet](https://build.nvidia.com/nvidia/parakeet-ctc-1_1b-asr/api), and [NIM META Llama](https://build.nvidia.com/meta/llama-3_1-8b-instruct) documentation.
84
+
85
+ #### Example: Switching to the Llama 3.3-70B Model
86
+
87
+ To use larger LLMs like Llama 3.3-70B model in your deployment, you need to update both the Docker Compose configuration and the environment variables for your Python application. Follow these steps:
88
+
89
+ - In your `docker-compose.yml` file, find the `nvidia-llm` service section.
90
+ - Change the NIM image to 70B model: `nvcr.io/nim/meta/llama-3.3-70b-instruct:latest`
91
+ - Update the `device_ids` to allocate at least two GPUs (for example, `['2', '3']`).
92
+ - Update the environment variable under python-app service to `NVIDIA_LLM_MODEL=meta/llama-3.3-70b-instruct`
93
+
94
+ #### Setting up Zero-shot Magpie Latest Model
95
+
96
+ Follow these steps to configure and use the latest Zero-shot Magpie TTS model:
97
+
98
+ 1. **Update Docker Compose Configuration**
99
+
100
+ Modify the `riva-tts-magpie` service in your docker-compose file with the following configuration:
101
+
102
+ ```yaml
103
+ riva-tts-magpie:
104
+ image: <magpie-tts-zeroshot-image:version> # Replace this with the actual image tag
105
+ environment:
106
+ - NGC_API_KEY=${ZEROSHOT_TTS_NVIDIA_API_KEY}
107
+ - NIM_HTTP_API_PORT=9000
108
+ - NIM_GRPC_API_PORT=50051
109
+ ports:
110
+ - "49000:50051"
111
+ shm_size: 16GB
112
+ deploy:
113
+ resources:
114
+ reservations:
115
+ devices:
116
+ - driver: nvidia
117
+ device_ids: ['0']
118
+ capabilities: [gpu]
119
+ ```
120
+
121
+ - Ensure your ZEROSHOT_TTS_NVIDIA_API_KEY key is properly set in your `.env` file:
122
+ ```bash
123
+ ZEROSHOT_TTS_NVIDIA_API_KEY=
124
+ ```
125
+
126
+ 2. **Configure TTS Voice Settings**
127
+
128
+ Update the following environment variables under the `python-app` service:
129
+
130
+ ```bash
131
+ RIVA_TTS_VOICE_ID=Magpie-ZeroShot.Female-1
132
+ RIVA_TTS_MODEL=magpie_tts_ensemble-Magpie-ZeroShot
133
+ ```
134
+
135
+ 3. **Zero-shot Audio Prompt Configuration**
136
+
137
+ To use a custom voice with zero-shot learning:
138
+
139
+ - Add your audio prompt file to the workspace
140
+ - Mount the audio file into your container by adding a volume in your `docker-compose.yml` under the `python-app` service:
141
+ ```yaml
142
+ services:
143
+ python-app:
144
+ # ... existing code ...
145
+ volumes:
146
+ - ./audio_prompts:/app/audio_prompts
147
+ ```
148
+ - Set the `ZERO_SHOT_AUDIO_PROMPT` environment variable to the path relative to your application root:
149
+ ```yaml
150
+ environment:
151
+ - ZERO_SHOT_AUDIO_PROMPT=audio_prompts/voice_sample.wav # Path relative to app root
152
+ ```
153
+
154
+ Note: The zero-shot audio prompt is only required when using the Magpie Zero-shot model. For standard Magpie multilingual models, this configuration should be omitted.
examples/speech-to-speech/bot.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD 2-Clause License
3
+
4
+ """Speech-to-speech conversation bot."""
5
+
6
+ import os
7
+ from pathlib import Path
8
+
9
+ import uvicorn
10
+ from dotenv import load_dotenv
11
+ from fastapi import FastAPI
12
+ from fastapi.staticfiles import StaticFiles
13
+ from pipecat.audio.vad.silero import SileroVADAnalyzer
14
+ from pipecat.frames.frames import LLMMessagesFrame
15
+ from pipecat.pipeline.pipeline import Pipeline
16
+ from pipecat.pipeline.task import PipelineParams, PipelineTask
17
+ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
18
+
19
+ from nvidia_pipecat.pipeline.ace_pipeline_runner import ACEPipelineRunner, PipelineMetadata
20
+
21
+ # from nvidia_pipecat.processors.nvidia_context_aggregator import (
22
+ # NvidiaTTSResponseCacher,
23
+ # create_nvidia_context_aggregator,
24
+ # )
25
+ from nvidia_pipecat.processors.transcript_synchronization import (
26
+ BotTranscriptSynchronization,
27
+ UserTranscriptSynchronization,
28
+ )
29
+ from nvidia_pipecat.services.blingfire_text_aggregator import BlingfireTextAggregator
30
+ from nvidia_pipecat.services.nvidia_llm import NvidiaLLMService
31
+ from nvidia_pipecat.services.riva_speech import RivaASRService, RivaTTSService
32
+ from nvidia_pipecat.transports.network.ace_fastapi_websocket import ACETransport, ACETransportParams
33
+ from nvidia_pipecat.transports.services.ace_controller.routers.websocket_router import router as websocket_router
34
+ from nvidia_pipecat.utils.logging import setup_default_ace_logging
35
+
36
+ load_dotenv(override=True)
37
+
38
+ setup_default_ace_logging(level="DEBUG")
39
+
40
+
41
+ async def create_pipeline_task(pipeline_metadata: PipelineMetadata):
42
+ """Create the pipeline to be run.
43
+
44
+ Args:
45
+ pipeline_metadata (PipelineMetadata): Metadata containing websocket and other pipeline configuration.
46
+
47
+ Returns:
48
+ PipelineTask: The configured pipeline task for handling speech-to-speech conversation.
49
+ """
50
+ transport = ACETransport(
51
+ websocket=pipeline_metadata.websocket,
52
+ params=ACETransportParams(
53
+ vad_analyzer=SileroVADAnalyzer(),
54
+ audio_out_10ms_chunks=20,
55
+ ),
56
+ )
57
+
58
+ llm = NvidiaLLMService(
59
+ api_key=os.getenv("NVIDIA_API_KEY"),
60
+ base_url=os.getenv("NVIDIA_LLM_URL", "https://integrate.api.nvidia.com/v1"),
61
+ model=os.getenv("NVIDIA_LLM_MODEL", "meta/llama-3.1-8b-instruct"),
62
+ )
63
+
64
+ stt = RivaASRService(
65
+ server=os.getenv("RIVA_ASR_URL", "localhost:50051"),
66
+ api_key=os.getenv("NVIDIA_API_KEY"),
67
+ language=os.getenv("RIVA_ASR_LANGUAGE", "en-US"),
68
+ sample_rate=16000,
69
+ model=os.getenv("RIVA_ASR_MODEL", "parakeet-1.1b-en-US-asr-streaming-silero-vad-asr-bls-ensemble"),
70
+ )
71
+
72
+ tts = RivaTTSService(
73
+ server=os.getenv("RIVA_TTS_URL", "localhost:50051"),
74
+ api_key=os.getenv("NVIDIA_API_KEY"),
75
+ voice_id=os.getenv("RIVA_TTS_VOICE_ID", "Magpie-Multilingual.EN-US.Sofia"),
76
+ model=os.getenv("RIVA_TTS_MODEL", "magpie_tts_ensemble-Magpie-Multilingual"),
77
+ language=os.getenv("RIVA_TTS_LANGUAGE", "en-US"),
78
+ zero_shot_audio_prompt_file=(
79
+ Path(os.getenv("ZERO_SHOT_AUDIO_PROMPT")) if os.getenv("ZERO_SHOT_AUDIO_PROMPT") else None
80
+ ),
81
+ text_aggregator=BlingfireTextAggregator(),
82
+ )
83
+
84
+ # Used to synchronize the user and bot transcripts in the UI
85
+ stt_transcript_synchronization = UserTranscriptSynchronization()
86
+ tts_transcript_synchronization = BotTranscriptSynchronization()
87
+
88
+ # System prompt can be changed to fit the use case
89
+ messages = [
90
+ {
91
+ "role": "system",
92
+ "content": (
93
+ "### CONVERSATION CONSTRAINTS\n"
94
+ "STRICTLY answer in 1-2 sentences or less than 200 characters. "
95
+ "This must be followed very rigorously; it is crucial.\n"
96
+ "Output must be plain text, unformatted, and without any special characters - "
97
+ "suitable for direct conversion to speech.\n"
98
+ "DO NOT use bullet points, lists, code samples, or headers in your spoken responses.\n"
99
+ "STRICTLY be short, concise, and to the point. Avoid elaboration, explanation, or repetition.\n"
100
+ "Pronounce numbers, dates, and special terms. For phone numbers, read digits slowly and separately. "
101
+ "For times, use natural phrasing like 'seven o'clock a.m.' instead of 'seven zero zero.'\n"
102
+ "Silently correct likely transcription errors by inferring the intended meaning without saying "
103
+ "`did you mean..` or `I think you meant..`. "
104
+ "Prioritize what the user meant, not just the literal words.\n"
105
+ "### OPENING PROTOCOL\n"
106
+ "STRICTLY START CONVERSATION WITH 'Thank you for calling GreenForce Garden. "
107
+ "What can I do for you today?'\n"
108
+ "### CLOSING PROTOCOL\n"
109
+ "End with either 'Have a green day!' or 'Have a good one.' Use one consistently per call.\n"
110
+ "### YOU ARE ...\n"
111
+ "You are Flora, the voice of 'GreenForce Garden', a San Francisco flower shop "
112
+ "powered by NVIDIA GPUs.\n"
113
+ "You're cool, upbeat, and love making people smile with your floral know-how.\n"
114
+ "You embody warmth, expertise, and dedication to creating a perfect floral experience.\n"
115
+ "### CONVERSATION GUIDELINES\n"
116
+ "CORE RESPONSIBILITIES - Order Management, Consultation, Inventory Guidance, "
117
+ "Delivery Coordination, Customer Care, Giving Fun Advice\n"
118
+ "While taking orders, have occasion understanding, ask for recipient details, "
119
+ "customer preferences, and delivery planning\n"
120
+ "SUGGEST cards with personal messages\n"
121
+ "SUGGEST seasonal recommendations (e.g., spring: tulips, pastels; romance: roses, peonies) "
122
+ "and occasion-specific details (e.g., elegant wrapping).\n"
123
+ "SUGGEST complementary items: vases, chocolates, cards. "
124
+ "Also provide care instructions for long-lasting enjoyment.\n"
125
+ "STRICTLY Confirm all order details before finalizing: flowers, colors, "
126
+ "delivery address, timing\n"
127
+ "STRICTLY Collect complete contact information for order updates\n"
128
+ "STRICTLY Provide ORDER CONFIRMATION with ESTIMATED DELIVERY TIMES\n"
129
+ "OFFER MULTIPLE PAYMENT OPTIONS (e.g., card, cash, online) and confirm SECURE PROCESSING.\n"
130
+ "STRICTLY If you are unsure about a request, ask clarifying questions "
131
+ "to ensure you understand before responding."
132
+ ),
133
+ },
134
+ ]
135
+
136
+ context = OpenAILLMContext(messages)
137
+
138
+ # Comment out the below line when enabling Speculative Speech Processing
139
+ context_aggregator = llm.create_context_aggregator(context)
140
+
141
+ # Uncomment the below line to enable speculative speech processing
142
+ # nvidia_context_aggregator = create_nvidia_context_aggregator(context, send_interims=True)
143
+ # Uncomment the below line to enable speculative speech processing
144
+ # nvidia_tts_response_cacher = NvidiaTTSResponseCacher()
145
+
146
+ pipeline = Pipeline(
147
+ [
148
+ transport.input(), # Websocket input from client
149
+ stt, # Speech-To-Text
150
+ stt_transcript_synchronization,
151
+ # Comment out the below line when enabling Speculative Speech Processing
152
+ context_aggregator.user(),
153
+ # Uncomment the below line to enable speculative speech processing
154
+ # nvidia_context_aggregator.user(),
155
+ llm, # LLM
156
+ tts, # Text-To-Speech
157
+ # Caches TTS responses for coordinated delivery in speculative
158
+ # speech processing
159
+ # nvidia_tts_response_cacher, # Uncomment to enable speculative speech processing
160
+ tts_transcript_synchronization,
161
+ transport.output(), # Websocket output to client
162
+ context_aggregator.assistant(),
163
+ # Uncomment the below line to enable speculative speech processing
164
+ # nvidia_context_aggregator.assistant(),
165
+ ]
166
+ )
167
+
168
+ task = PipelineTask(
169
+ pipeline,
170
+ params=PipelineParams(
171
+ allow_interruptions=True,
172
+ enable_metrics=True,
173
+ enable_usage_metrics=True,
174
+ send_initial_empty_metrics=True,
175
+ start_metadata={"stream_id": pipeline_metadata.stream_id},
176
+ ),
177
+ )
178
+
179
+ @transport.event_handler("on_client_connected")
180
+ async def on_client_connected(transport, client):
181
+ # Kick off the conversation.
182
+ messages.append({"role": "system", "content": "Please introduce yourself to the user."})
183
+ await task.queue_frames([LLMMessagesFrame(messages)])
184
+
185
+ return task
186
+
187
+
188
+ app = FastAPI()
189
+ app.include_router(websocket_router)
190
+ runner = ACEPipelineRunner.create_instance(pipeline_callback=create_pipeline_task)
191
+ app.mount("/static", StaticFiles(directory=os.getenv("STATIC_DIR", "../static")), name="static")
192
+
193
+ if __name__ == "__main__":
194
+ uvicorn.run("bot:app", host="0.0.0.0", port=8100, workers=4)
examples/speech-to-speech/docker-compose.yml ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: voice-agent
2
+
3
+ services:
4
+ riva-tts-magpie:
5
+ image: nvcr.io/nim/nvidia/magpie-tts-multilingual:latest
6
+ environment:
7
+ - NGC_API_KEY=${NVIDIA_API_KEY}
8
+ - NIM_HTTP_API_PORT=9000
9
+ - NIM_GRPC_API_PORT=50051
10
+ ports:
11
+ - "19000:9000"
12
+ - "50151:50051"
13
+ volumes:
14
+ - nim_cache:/opt/nim/.cache
15
+ shm_size: 8GB
16
+ deploy:
17
+ resources:
18
+ reservations:
19
+ devices:
20
+ - driver: nvidia
21
+ device_ids: ['0']
22
+ capabilities: [gpu]
23
+
24
+ riva-asr-parakeet:
25
+ image: nvcr.io/nim/nvidia/parakeet-1-1b-ctc-en-us:latest
26
+ environment:
27
+ - NGC_API_KEY=${NVIDIA_API_KEY}
28
+ - NIM_HTTP_API_PORT=9001
29
+ - NIM_GRPC_API_PORT=50052
30
+ - NIM_TAGS_SELECTOR=mode=str,vad=silero
31
+ ports:
32
+ - "19001:9001"
33
+ - "50152:50052"
34
+ volumes:
35
+ - nim_cache:/opt/nim/.cache
36
+ deploy:
37
+ resources:
38
+ reservations:
39
+ devices:
40
+ - driver: nvidia
41
+ device_ids: ['1']
42
+ capabilities: [gpu]
43
+
44
+ nvidia-llm:
45
+ image: nvcr.io/nim/meta/llama-3.1-8b-instruct:latest
46
+ environment:
47
+ - NGC_API_KEY=${NVIDIA_API_KEY}
48
+ - NIM_HTTP_API_PORT=8000
49
+ ports:
50
+ - "18000:8000"
51
+ volumes:
52
+ - nim_cache:/opt/nim/.cache
53
+ shm_size: 16GB
54
+ deploy:
55
+ resources:
56
+ reservations:
57
+ devices:
58
+ - driver: nvidia
59
+ device_ids: ['2']
60
+ capabilities: [gpu]
61
+
62
+ python-app:
63
+ build:
64
+ context: ../../
65
+ dockerfile: examples/speech-to-speech/Dockerfile
66
+
67
+ ports:
68
+ - "8100:8100"
69
+ environment:
70
+ - NVIDIA_API_KEY=${NVIDIA_API_KEY}
71
+
72
+ - RIVA_ASR_URL=riva-asr-parakeet:50052
73
+ - RIVA_TTS_URL=riva-tts-magpie:50051
74
+ - NVIDIA_LLM_URL=http://nvidia-llm:8000/v1
75
+
76
+ - RIVA_ASR_MODEL=parakeet-1.1b-en-US-asr-streaming-silero-vad-asr-bls-ensemble
77
+ - RIVA_TTS_MODEL=magpie_tts_ensemble-Magpie-Multilingual
78
+ - NVIDIA_LLM_MODEL=meta/llama-3.1-8b-instruct
79
+
80
+ - RIVA_ASR_LANGUAGE=en-US
81
+ - RIVA_TTS_LANGUAGE=en-US
82
+ - RIVA_TTS_VOICE_ID=Magpie-Multilingual.EN-US.Sofia
83
+ - ZERO_SHOT_AUDIO_PROMPT= # set this only if using a zero-shot TTS model with a custom audio prompt
84
+
85
+ - STATIC_DIR=/app/static
86
+ volumes:
87
+ - ../static:/app/static
88
+ depends_on:
89
+ - riva-tts-magpie
90
+ - riva-asr-parakeet
91
+ - nvidia-llm
92
+
93
+ volumes:
94
+ nim_cache:
examples/speech-to-speech/env.example ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ NVIDIA_API_KEY=
2
+ ZEROSHOT_TTS_NVIDIA_API_KEY=
examples/speech-to-speech/pyproject.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "speech-to-speech-example"
3
+ version = "0.1.0"
4
+ description = "NVIDIA ACE Pipecat Speech only Examples"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "nvidia-pipecat",
9
+ ]
10
+
11
+ [tool.uv.sources]
12
+ torch = { index = "pytorch" }
13
+ nvidia-pipecat = { path = "../../.", editable = true }
14
+
15
+ [[tool.uv.index]]
16
+ name = "pytorch"
17
+ url = "https://download.pytorch.org/whl/cpu"
18
+ explicit = true
examples/speech-to-speech/uv.lock ADDED
The diff for this file is too large to render. See raw diff
 
examples/speech_planner/Dockerfile ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image
2
+ FROM python:3.12-slim
3
+
4
+ # Image metadata
5
+ LABEL maintainer="NVIDIA"
6
+ LABEL description="Speech Planner example"
7
+ LABEL version="1.0"
8
+
9
+ # Environment setup
10
+ ENV PYTHONUNBUFFERED=1
11
+
12
+ # System dependencies
13
+ RUN apt-get update && apt-get install -y --no-install-recommends \
14
+ ffmpeg \
15
+ && apt-get clean \
16
+ && rm -rf /var/lib/apt/lists/* \
17
+ && pip install --no-cache-dir --upgrade pip uv
18
+
19
+ # App directory setup
20
+ WORKDIR /app
21
+
22
+ # App files
23
+ COPY pyproject.toml uv.lock \
24
+ LICENSE README.md NVIDIA_PIPECAT.md \
25
+ ./
26
+ COPY src/ ./src/
27
+ COPY examples/static/ ./examples/static/
28
+ COPY examples/speech_planner/ ./examples/speech_planner/
29
+
30
+ # Example app directory
31
+ WORKDIR /app/examples/speech_planner
32
+
33
+ # Dependencies
34
+ RUN uv sync --frozen
35
+
36
+ # Port configuration
37
+ EXPOSE 8100
38
+
39
+ # Start command
40
+ CMD ["uv", "run", "bot.py"]