Spaces:
Running
Running
Commit
·
53ea588
1
Parent(s):
9438bb6
Working with service run on 7860
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +69 -0
- CHANGELOG.md +19 -0
- CLA.md +129 -0
- CONTRIBUTING.md +14 -0
- Dockerfile +64 -0
- LICENSE +24 -0
- NVIDIA_PIPECAT.md +5 -0
- README.md +82 -1
- SECURITY.md +24 -0
- deploy/docker/Dockerfile +39 -0
- deploy/k8s/README.md +39 -0
- deploy/k8s/ucs/LICENSE.txt +7 -0
- deploy/k8s/ucs/README.md +101 -0
- deploy/k8s/ucs/changelog.txt +3 -0
- deploy/k8s/ucs/endpoints/a2f-grpc.proto +10 -0
- deploy/k8s/ucs/endpoints/animgraph-grpc.proto +189 -0
- deploy/k8s/ucs/endpoints/animgraph-http.yaml +755 -0
- deploy/k8s/ucs/endpoints/http-api.yaml +179 -0
- deploy/k8s/ucs/endpoints/redis.yaml +19 -0
- deploy/k8s/ucs/endpoints/riva-speech.proto +10 -0
- deploy/k8s/ucs/manifest.yaml +258 -0
- deploy/k8s/ucs/manual_compliance_test_results.yaml +11 -0
- deploy/k8s/ucs/scripts/env.sh +90 -0
- deploy/k8s/ucs/tests/dev/app.yaml +39 -0
- deploy/k8s/ucs/tests/dev/config.yaml +90 -0
- deploy/k8s/ucs/tests/dev/params1.yaml +12 -0
- deploy/scripts/README.md +0 -0
- examples/README.md +6 -0
- examples/nvidia_rag/README.md +68 -0
- examples/nvidia_rag/bot.py +144 -0
- examples/nvidia_rag/env.example +2 -0
- examples/nvidia_rag/pyproject.toml +18 -0
- examples/opentelemetry/README.md +33 -0
- examples/opentelemetry/bot.py +98 -0
- examples/riva_nmt/README.md +73 -0
- examples/riva_nmt/bot.py +130 -0
- examples/riva_nmt/env.example +2 -0
- examples/riva_nmt/pyproject.toml +18 -0
- examples/riva_speech_langchain/README.md +64 -0
- examples/riva_speech_langchain/bot.py +150 -0
- examples/riva_speech_langchain/env.example +2 -0
- examples/riva_speech_langchain/pyproject.toml +21 -0
- examples/speech-to-speech/Dockerfile +40 -0
- examples/speech-to-speech/README.md +154 -0
- examples/speech-to-speech/bot.py +194 -0
- examples/speech-to-speech/docker-compose.yml +94 -0
- examples/speech-to-speech/env.example +2 -0
- examples/speech-to-speech/pyproject.toml +18 -0
- examples/speech-to-speech/uv.lock +0 -0
- examples/speech_planner/Dockerfile +40 -0
.gitignore
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# --- General ---
|
2 |
+
.DS_Store
|
3 |
+
Thumbs.db
|
4 |
+
*.log
|
5 |
+
|
6 |
+
# --- Python ---
|
7 |
+
__pycache__/
|
8 |
+
*.py[cod]
|
9 |
+
*.pyo
|
10 |
+
*.pyd
|
11 |
+
.Python
|
12 |
+
.ruff_cache/
|
13 |
+
.mypy_cache/
|
14 |
+
.pytest_cache/
|
15 |
+
.ipynb_checkpoints/
|
16 |
+
.cache/
|
17 |
+
*.egg-info/
|
18 |
+
*.egg
|
19 |
+
build/
|
20 |
+
dist/
|
21 |
+
develop-eggs/
|
22 |
+
downloads/
|
23 |
+
eggs/
|
24 |
+
.eggs/
|
25 |
+
lib/
|
26 |
+
lib64/
|
27 |
+
parts/
|
28 |
+
sdist/
|
29 |
+
wheels/
|
30 |
+
pip-wheel-metadata/
|
31 |
+
.coverage
|
32 |
+
.coverage.*
|
33 |
+
coverage.xml
|
34 |
+
|
35 |
+
# Virtual environments
|
36 |
+
.venv/
|
37 |
+
venv/
|
38 |
+
env/
|
39 |
+
ENV/
|
40 |
+
.python-version
|
41 |
+
|
42 |
+
# --- Node / JS ---
|
43 |
+
node_modules/
|
44 |
+
.pnpm-store/
|
45 |
+
npm-debug.log*
|
46 |
+
yarn-debug.log*
|
47 |
+
yarn-error.log*
|
48 |
+
pnpm-debug.log*
|
49 |
+
**/dist/
|
50 |
+
**/build/
|
51 |
+
|
52 |
+
# Keep lockfiles tracked
|
53 |
+
!**/package-lock.json
|
54 |
+
!**/yarn.lock
|
55 |
+
|
56 |
+
# --- IDE ---
|
57 |
+
.idea/
|
58 |
+
.vscode/
|
59 |
+
|
60 |
+
# --- Environment files ---
|
61 |
+
.env
|
62 |
+
.env.*
|
63 |
+
!**/env.example
|
64 |
+
!**/.env.example
|
65 |
+
|
66 |
+
# --- Example runtime artifacts ---
|
67 |
+
examples/voice_agent_webrtc_langgraph/audio_dumps/
|
68 |
+
examples/voice_agent_webrtc_langgraph/ui/dist/
|
69 |
+
|
CHANGELOG.md
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# NVIDIA Pipecat 0.1.0 (23 April 2025)
|
2 |
+
The NVIDIA Pipecat library augments the Pipecat framework by adding additional frame processors and services, as well as new multimodal frames to enhance avatar interactions. This is the first release of the NVIDIA Pipecat library.
|
3 |
+
|
4 |
+
## New Features
|
5 |
+
|
6 |
+
- Added Pipecat services for [Riva ASR (Automatic Speech Recognition)](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/asr/asr-overview.html#), [Riva TTS (Text to Speech)](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tts/tts-overview.html), and [Riva NMT (Neural Machine Translation)](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/translation/translation-overview.html) models.
|
7 |
+
- Added Pipecat frames, processors, and services to support multimodal avatar interactions and use cases. This includes `Audio2Face3DService`, `AnimationGraphService`, `FacialGestureProviderProcessor`, and `PostureProviderProcessor`.
|
8 |
+
- Added `ACETransport`, which is specifically designed to support integration with existing [ACE microservices](https://docs.nvidia.com/ace/overview/latest/index.html). This includes a FastAPI-based HTTP and WebSocket server implementation compatible with ACE.
|
9 |
+
- Added `NvidiaLLMService` for [NIM LLM models](https://build.nvidia.com/) and `NvidiaRAGService` for the [NVIDIA RAG Blueprint](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/docs/quickstart.md).
|
10 |
+
- Added `UserTranscriptSynchronization` processor for user speech transcripts and `BotTranscriptSynchronization` processor for synchronizing bot transcripts with bot audio playback.
|
11 |
+
- Added custom context aggregators and processors to enable [Speculative Speech Processing](https://docs.nvidia.com/ace/ace-controller-microservice/latest/user-guide.html#speculative-speech-processing) to reduce latency.
|
12 |
+
- Added `UserPresence`, `Proactivity`, and `AcknowledgementProcessor` frame processors to improve human-bot interactions.
|
13 |
+
- Released source code for the voice assistant example using `nvidia-pipecat`, along with the `pipecat-ai` library service, to showcase NVIDIA services with `ACETransport`.
|
14 |
+
|
15 |
+
|
16 |
+
## Improvements
|
17 |
+
|
18 |
+
- Added `ElevenLabsTTSServiceWithEndOfSpeech`, an extended version of the ElevenLabs TTS service with end-of-speech events for usage in avatar interactions.
|
19 |
+
|
CLA.md
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Contribution License Agreement
|
2 |
+
|
3 |
+
This Contribution License Agreement (“**Agreement**”) is agreed to by the party signing below (“**You**”),
|
4 |
+
and conveys certain license rights to NVIDIA Corporation and its affiliates (“**NVIDIA**”) for Your
|
5 |
+
contributions to NVIDIA open source projects. This Agreement is effective as of the latest signature
|
6 |
+
date below.
|
7 |
+
|
8 |
+
## 1. Definitions.
|
9 |
+
|
10 |
+
“**Code**” means the computer software code, whether in human-readable or machine-executable form,
|
11 |
+
that is delivered by You to NVIDIA under this Agreement.
|
12 |
+
|
13 |
+
“**Project**” means any of the projects owned or managed by NVIDIA in which software is offered under
|
14 |
+
a license approved by the Open Source Initiative (OSI) (www.opensource.org) and documentation
|
15 |
+
offered under an OSI or a Creative Commons license (https://creativecommons.org/licenses).
|
16 |
+
|
17 |
+
“**Submit**” is the act of uploading, submitting, transmitting, or distributing code or other content to any
|
18 |
+
Project, including but not limited to communication on electronic mailing lists, source code control
|
19 |
+
systems, and issue tracking systems that are managed by, or on behalf of, the Project for the purpose of
|
20 |
+
discussing and improving that Project, but excluding communication that is conspicuously marked or
|
21 |
+
otherwise designated in writing by You as “Not a Submission.”
|
22 |
+
|
23 |
+
“**Submission**” means the Code and any other copyrightable material Submitted by You, including any
|
24 |
+
associated comments and documentation.
|
25 |
+
|
26 |
+
## 2. Your Submission.
|
27 |
+
You must agree to the terms of this Agreement before making a Submission to any
|
28 |
+
Project. This Agreement covers any and all Submissions that You, now or in the future (except as
|
29 |
+
described in Section 4 below), Submit to any Project.
|
30 |
+
|
31 |
+
## 3. Originality of Work.
|
32 |
+
You represent that each of Your Submissions is entirely Your original work.
|
33 |
+
Should You wish to Submit materials that are not Your original work, You may Submit them separately
|
34 |
+
to the Project if You (a) retain all copyright and license information that was in the materials as You
|
35 |
+
received them, (b) in the description accompanying Your Submission, include the phrase “Submission
|
36 |
+
containing materials of a third party:” followed by the names of the third party and any licenses or other
|
37 |
+
restrictions of which You are aware, and (c) follow any other instructions in the Project’s written
|
38 |
+
guidelines concerning Submissions.
|
39 |
+
|
40 |
+
## 4. Your Employer.
|
41 |
+
References to “employer” in this Agreement include Your employer or anyone else
|
42 |
+
for whom You are acting in making Your Submission, e.g. as a contractor, vendor, or agent. If Your
|
43 |
+
Submission is made in the course of Your work for an employer or Your employer has intellectual
|
44 |
+
property rights in Your Submission by contract or applicable law, You must secure permission from Your
|
45 |
+
employer to make the Submission before signing this Agreement. In that case, the term “You” in this
|
46 |
+
Agreement will refer to You and the employer collectively. If You change employers in the future and
|
47 |
+
desire to Submit additional Submissions for the new employer, then You agree to sign a new Agreement
|
48 |
+
and secure permission from the new employer before Submitting those Submissions.
|
49 |
+
|
50 |
+
|
51 |
+
## 5. Licenses.
|
52 |
+
|
53 |
+
**a. Copyright License**. You grant NVIDIA, and those who receive the Submission directly or
|
54 |
+
indirectly from NVIDIA, a perpetual, worldwide, non-exclusive, royalty-free, irrevocable license in the
|
55 |
+
Submission to reproduce, prepare derivative works of, publicly display, publicly perform, and distribute
|
56 |
+
the Submission and such derivative works, and to sublicense any or all of the foregoing rights to third
|
57 |
+
parties.
|
58 |
+
|
59 |
+
**b. Patent License**. You grant NVIDIA, and those who receive the Submission directly or
|
60 |
+
indirectly from NVIDIA, a perpetual, worldwide, non-exclusive, royalty-free, irrevocable license under
|
61 |
+
Your patent claims that are necessarily infringed by the Submission or the combination of the
|
62 |
+
Submission with the Project to which it was Submitted to make, have made, use, offer to sell, sell and
|
63 |
+
import or otherwise dispose of the Submission alone or with the Project.
|
64 |
+
|
65 |
+
**c. Other Rights Reserved**. Each party reserves all rights not expressly granted in this Agreement.
|
66 |
+
No additional licenses or rights whatsoever (including, without limitation, any implied licenses) are
|
67 |
+
granted by implication, exhaustion, estoppel or otherwise.
|
68 |
+
|
69 |
+
## 6. Representations and Warranties.
|
70 |
+
You represent that You are legally entitled to grant the above
|
71 |
+
licenses. You represent that each of Your Submissions is entirely Your original work (except as You may
|
72 |
+
have disclosed under Section 3). You represent that You have secured permission from Your employer to
|
73 |
+
make the Submission in cases where Your Submission is made in the course of Your work for Your
|
74 |
+
employer or Your employer has intellectual property rights in Your Submission by contract or applicable
|
75 |
+
law. If You are signing this Agreement on behalf of Your employer, You represent and warrant that You
|
76 |
+
have the necessary authority to bind the listed employer to the obligations contained in this Agreement.
|
77 |
+
You are not expected to provide support for Your Submission, unless You choose to do so. UNLESS
|
78 |
+
REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING, AND EXCEPT FOR THE WARRANTIES
|
79 |
+
EXPRESSLY STATED IN SECTIONS 3, 4, AND 6, THE SUBMISSION PROVIDED UNDER THIS AGREEMENT IS
|
80 |
+
PROVIDED WITHOUT WARRANTY OF ANY KIND, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY OF
|
81 |
+
NONINFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
|
82 |
+
|
83 |
+
## 7. Notice to NVIDIA.
|
84 |
+
You agree to notify NVIDIA in writing of any facts or circumstances of which
|
85 |
+
You later become aware that would make Your representations in this Agreement inaccurate in any
|
86 |
+
respect.
|
87 |
+
|
88 |
+
## 8. Information about Submissions.
|
89 |
+
You agree that contributions to Projects and information about
|
90 |
+
contributions may be maintained indefinitely and disclosed publicly, including Your name and other
|
91 |
+
information that You submit with Your Submission.
|
92 |
+
|
93 |
+
## 9. Governing Law/Jurisdiction.
|
94 |
+
Claims arising under this Agreement shall be governed by the laws of Delaware, excluding its principles of conflict of laws and the United Nations Convention on Contracts for the Sale of Goods. The state and/or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction over any dispute or claim arising out of this Agreement. You may not export the Software in violation of applicable export laws and regulations.
|
95 |
+
|
96 |
+
## 10. Entire Agreement/Assignment.
|
97 |
+
This Agreement is the entire agreement between the parties, and
|
98 |
+
supersedes any and all prior agreements, understandings or communications, written or oral, between
|
99 |
+
the parties relating to the subject matter hereof. This Agreement may be assigned by NVIDIA.
|
100 |
+
|
101 |
+
|
102 |
+
|
103 |
+
|
104 |
+
**Please select one of the options below and sign as indicated.** By signing, You accept and agree to the
|
105 |
+
terms of this Contribution License Agreement for Your present and future Submissions to NVIDIA.
|
106 |
+
|
107 |
+
___ I have sole ownership of intellectual property rights to my Submissions and I am not making
|
108 |
+
Submissions in the course of work for my employer.
|
109 |
+
|
110 |
+
- Name (“You”): _________________________________________
|
111 |
+
- Signature: _________________________________________
|
112 |
+
- Date: _________________________________________
|
113 |
+
- GitHub Login: _________________________________________
|
114 |
+
- Email: _________________________________________
|
115 |
+
- Address: _________________________________________
|
116 |
+
|
117 |
+
___ I am making Submissions in the course of work for my employer (or my employer has intellectual
|
118 |
+
property rights in my Submissions by contract or applicable law). I have permission from my
|
119 |
+
employer to make Submissions and enter into this Agreement on behalf of my employer. By signing
|
120 |
+
below, the defined term “You” includes me and my employer.
|
121 |
+
|
122 |
+
- Company Name: _________________________________________
|
123 |
+
- Signature: _________________________________________
|
124 |
+
- By: _________________________________________
|
125 |
+
- Title: _________________________________________
|
126 |
+
- Date: _________________________________________
|
127 |
+
- GitHub Login: _________________________________________
|
128 |
+
- Email: _________________________________________
|
129 |
+
- Address: _________________________________________
|
CONTRIBUTING.md
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Contributing Guidelines
|
2 |
+
|
3 |
+
Use the following guidelines to contribute to this project.
|
4 |
+
|
5 |
+
|
6 |
+
## Pull Requests
|
7 |
+
Developer workflow for code contributions is as follows:
|
8 |
+
|
9 |
+
1. Developers must create a [fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) of this repository for the upstreaming.
|
10 |
+
2. Git clone the forked repository and push changes to the personal fork.
|
11 |
+
3. Developers must run changes locally to make sure formatting, linting, and unit test checks pass. Check steps for development with the source in [README.md](./README.md) for more details.
|
12 |
+
3. Once the code changes are staged on the fork and ready for review, a Pull Request (PR) can be requested to merge the changes from a branch of the fork into a selected branch of upstream.
|
13 |
+
4. If you are contributing for the first time, Download [Contribution License Agreement(CLA)](CLA.md) and email a signed CLA to [ttripathi@nvidia.com](mailto:ttripathi@nvidia.com).
|
14 |
+
5. Since there is no CI/CD process in place yet, the PR will be accepted and the corresponding issue closed only after adequate testing has been completed, manually, by the developer and/or repository owners reviewing the code.
|
Dockerfile
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Build UI assets
|
2 |
+
FROM node:18-alpine AS ui-builder
|
3 |
+
|
4 |
+
WORKDIR /ui
|
5 |
+
# Install UI dependencies
|
6 |
+
COPY examples/voice_agent_webrtc_langgraph/ui/package*.json ./
|
7 |
+
RUN npm ci --no-audit --no-fund && npm cache clean --force
|
8 |
+
# Build UI
|
9 |
+
COPY examples/voice_agent_webrtc_langgraph/ui/ .
|
10 |
+
RUN npm run build
|
11 |
+
|
12 |
+
# Base image
|
13 |
+
FROM python:3.12-slim
|
14 |
+
|
15 |
+
# Environment setup
|
16 |
+
ENV PYTHONUNBUFFERED=1
|
17 |
+
|
18 |
+
# System dependencies
|
19 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
20 |
+
libgl1 \
|
21 |
+
libglx-mesa0 \
|
22 |
+
curl \
|
23 |
+
ffmpeg \
|
24 |
+
git \
|
25 |
+
net-tools \
|
26 |
+
procps \
|
27 |
+
vim \
|
28 |
+
&& apt-get clean \
|
29 |
+
&& rm -rf /var/lib/apt/lists/* \
|
30 |
+
&& pip install --no-cache-dir --upgrade pip uv
|
31 |
+
|
32 |
+
# App directory setup
|
33 |
+
WORKDIR /app
|
34 |
+
|
35 |
+
# App files
|
36 |
+
COPY pyproject.toml uv.lock \
|
37 |
+
LICENSE README.md NVIDIA_PIPECAT.md \
|
38 |
+
./
|
39 |
+
COPY src/ ./src/
|
40 |
+
COPY examples/voice_agent_webrtc_langgraph/ ./examples/voice_agent_webrtc_langgraph/
|
41 |
+
|
42 |
+
# Copy built UI into example directory so FastAPI can serve it
|
43 |
+
COPY --from=ui-builder /ui/dist /app/examples/voice_agent_webrtc_langgraph/ui/dist
|
44 |
+
|
45 |
+
# Example app directory
|
46 |
+
WORKDIR /app/examples/voice_agent_webrtc_langgraph
|
47 |
+
|
48 |
+
# Dependencies
|
49 |
+
RUN uv sync --frozen
|
50 |
+
RUN uv pip install -r agents/requirements.txt
|
51 |
+
# Ensure langgraph CLI is available at build time
|
52 |
+
RUN uv pip install -U langgraph
|
53 |
+
RUN chmod +x start.sh
|
54 |
+
|
55 |
+
# Port configuration (single external port for app)
|
56 |
+
EXPOSE 7860
|
57 |
+
|
58 |
+
# Healthcheck
|
59 |
+
HEALTHCHECK --interval=30s --timeout=10s --retries=3 --start-period=60s CMD curl -f http://localhost:7860/get_prompt || exit 1
|
60 |
+
|
61 |
+
# Start command
|
62 |
+
CMD ["/app/examples/voice_agent_webrtc_langgraph/start.sh"]
|
63 |
+
|
64 |
+
|
LICENSE
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BSD 2-Clause License
|
2 |
+
|
3 |
+
Copyright (c) 2024–2025, NVIDIA Corporation
|
4 |
+
|
5 |
+
Redistribution and use in source and binary forms, with or without
|
6 |
+
modification, are permitted provided that the following conditions are met:
|
7 |
+
|
8 |
+
1. Redistributions of source code must retain the above copyright notice, this
|
9 |
+
list of conditions and the following disclaimer.
|
10 |
+
|
11 |
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
12 |
+
this list of conditions and the following disclaimer in the documentation
|
13 |
+
and/or other materials provided with the distribution.
|
14 |
+
|
15 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
16 |
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
17 |
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
18 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
19 |
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
20 |
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
21 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
22 |
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
23 |
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
24 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
NVIDIA_PIPECAT.md
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# NVIDIA Pipecat
|
2 |
+
|
3 |
+
The NVIDIA Pipecat library augments [the Pipecat framework](https://github.com/pipecat-ai/pipecat) by adding additional frame processors and services, as well as new multimodal frames to facilitate the creation of human-avatar interactions. This includes the integration of NVIDIA services and NIMs such as [NVIDIA Riva](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/index.html), [NVIDIA Audio2Face](https://build.nvidia.com/nvidia/audio2face-3d), and [NVIDIA Foundational RAG](https://build.nvidia.com/nvidia/build-an-enterprise-rag-pipeline). It also introduces a few processors with a focus on improving the end-user experience for multimodal conversational agents, along with speculative speech processing to reduce latency for faster bot responses.
|
4 |
+
|
5 |
+
The nvidia-pipecat source code can be found in [the GitHub repository](https://github.com/NVIDIA/ace-controller). Follow [the documentation](https://docs.nvidia.com/ace/ace-controller-microservice/latest/index.html) for more details.
|
README.md
CHANGED
@@ -8,4 +8,85 @@ pinned: false
|
|
8 |
short_description: Voice Demos with Ace Controller
|
9 |
---
|
10 |
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
short_description: Voice Demos with Ace Controller
|
9 |
---
|
10 |
|
11 |
+
# ACE Controller SDK
|
12 |
+
|
13 |
+
The ACE Controller SDK allows you to build your own ACE Controller service to manage multimodal, real-time interactions with voice bots and avatars using NVIDIA ACE. With the SDK, you can create controllers that leverage the Python-based open-source [Pipecat framework](https://github.com/pipecat-ai/pipecat) for creating real-time, voice-enabled, and multimodal conversational AI agents. The SDK contains enhancements to the Pipecat framework, enabling developers to effortlessly customize, debug, and deploy complex pipelines while integrating robust NVIDIA Services into the Pipecat ecosystem.
|
14 |
+
|
15 |
+
## Main Features
|
16 |
+
|
17 |
+
- **Pipecat Extension:** A Pipecat extension to connect with ACE services and NVIDIA NIMs, facilitating the creation of human-avatar interactions. The NVIDIA Pipecat library augments [the Pipecat framework](https://github.com/pipecat-ai/pipecat) by adding additional frame processors and services, as well as new multimodal frames to enhance avatar interactions. This includes the integration of NVIDIA services and NIMs such as [NVIDIA Riva](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/index.html), [NVIDIA Audio2Face](https://build.nvidia.com/nvidia/audio2face-3d), and [NVIDIA Foundational RAG](https://build.nvidia.com/nvidia/build-an-enterprise-rag-pipeline).
|
18 |
+
|
19 |
+
- **HTTP and WebSocket Server Implementation:** The SDK provides a FastAPI-based HTTP and WebSocket server implementation compatible with ACE. It includes functionality for stream and pipeline management by offering new Pipecat pipeline runners and transports. For ease of use and distribution, this functionality is currently included in the `nvidia-pipecat` Python library as well.
|
20 |
+
|
21 |
+
## ACE Controller Microservice
|
22 |
+
|
23 |
+
The ACE Controller SDK was used to build the [ACE Controller Microservice](https://docs.nvidia.com/ace/ace-controller-microservice/latest/index.html).Check out the [ACE documentation](https://docs.nvidia.com/ace/tokkio/latest/customization/customization-options.html) for more details on how to configure the ACE Controller MS with your custom pipelines.
|
24 |
+
|
25 |
+
|
26 |
+
## Getting Started
|
27 |
+
|
28 |
+
The NVIDIA Pipecat package is released as a wheel on PyPI. Create a Python virtual environment and use the pip command to install the nvidia-pipecat package.
|
29 |
+
|
30 |
+
```bash
|
31 |
+
pip install nvidia-pipecat
|
32 |
+
```
|
33 |
+
|
34 |
+
You can start building pipecat pipelines utilizing services from the NVIDIA Pipecat package. For more details, follow [the ACE Controller](https://docs.nvidia.com/ace/ace-controller-microservice/latest/index.html) and [the Pipecat Framework](https://docs.pipecat.ai/getting-started/overview) documentation.
|
35 |
+
|
36 |
+
## Hacking on the framework itself
|
37 |
+
|
38 |
+
If you wish to work directly with the source code or modify services from the nvidia-pipecat package, you can utilize either the UV or Nix development setup as outlined below.
|
39 |
+
|
40 |
+
### Using UV
|
41 |
+
|
42 |
+
|
43 |
+
To get started, first install the [UV package manager](https://docs.astral.sh/uv/#highlights).
|
44 |
+
|
45 |
+
Then, create a virtual environment with all the required dependencies by running the following commands:
|
46 |
+
```bash
|
47 |
+
uv venv
|
48 |
+
uv sync
|
49 |
+
source .venv/bin/activate
|
50 |
+
```
|
51 |
+
|
52 |
+
Once the environment is set up, you can begin building pipelines or modifying the services in the source code.
|
53 |
+
|
54 |
+
If you wish to contribute your changes to the repository, please ensure you run the unit tests, linter, and formatting tool.
|
55 |
+
|
56 |
+
To run unit tests, use:
|
57 |
+
```
|
58 |
+
uv run pytest
|
59 |
+
```
|
60 |
+
|
61 |
+
To format the code, use:
|
62 |
+
```bash
|
63 |
+
ruff format
|
64 |
+
```
|
65 |
+
|
66 |
+
To run the linter, use:
|
67 |
+
```
|
68 |
+
ruff check
|
69 |
+
```
|
70 |
+
|
71 |
+
|
72 |
+
### Using Nix
|
73 |
+
|
74 |
+
To set up your development environment using [the Nix](https://nixos.org/download/#nix-install-linux), follow these steps:
|
75 |
+
|
76 |
+
Initialize the development environment: Simply run the following command:
|
77 |
+
```bash
|
78 |
+
nix develop
|
79 |
+
```
|
80 |
+
|
81 |
+
This setup provides you with a fully configured environment, allowing you to focus on development without worrying about dependency management.
|
82 |
+
|
83 |
+
To ensure that all checks such as the formatting and linter for the repository are passing, use the following command:
|
84 |
+
|
85 |
+
```bash
|
86 |
+
nix flake check
|
87 |
+
```
|
88 |
+
|
89 |
+
## CONTRIBUTING
|
90 |
+
|
91 |
+
We invite contributions! Open a GitHub issue or pull request! See contributing guildelines [here](./CONTRIBUTING.md).
|
92 |
+
|
SECURITY.md
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Security
|
2 |
+
|
3 |
+
NVIDIA is dedicated to the security and trust of our software products and services, including all source code repositories managed through our organization.
|
4 |
+
|
5 |
+
If you need to report a security issue, please use the appropriate contact points outlined below. **Please do not report security vulnerabilities through GitHub.**
|
6 |
+
|
7 |
+
## Reporting Potential Security Vulnerability in an NVIDIA Product
|
8 |
+
|
9 |
+
To report a potential security vulnerability in any NVIDIA product:
|
10 |
+
- Web: [Security Vulnerability Submission Form](https://www.nvidia.com/object/submit-security-vulnerability.html)
|
11 |
+
- E-Mail: psirt@nvidia.com
|
12 |
+
- We encourage you to use the following PGP key for secure email communication: [NVIDIA public PGP Key for communication](https://www.nvidia.com/en-us/security/pgp-key)
|
13 |
+
- Please include the following information:
|
14 |
+
- Product/Driver name and version/branch that contains the vulnerability
|
15 |
+
- Type of vulnerability (code execution, denial of service, buffer overflow, etc.)
|
16 |
+
- Instructions to reproduce the vulnerability
|
17 |
+
- Proof-of-concept or exploit code
|
18 |
+
- Potential impact of the vulnerability, including how an attacker could exploit the vulnerability
|
19 |
+
|
20 |
+
While NVIDIA currently does not have a bug bounty program, we do offer acknowledgement when an externally reported security issue is addressed under our coordinated vulnerability disclosure policy. Please visit our [Product Security Incident Response Team (PSIRT)](https://www.nvidia.com/en-us/security/psirt-policies/) policies page for more information.
|
21 |
+
|
22 |
+
## NVIDIA Product Security
|
23 |
+
|
24 |
+
For all security-related concerns, please visit NVIDIA's Product Security portal at https://www.nvidia.com/en-us/security
|
deploy/docker/Dockerfile
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use an official Python runtime as a parent image
|
2 |
+
FROM python:3.12 AS builder
|
3 |
+
RUN apt update && apt install -y libgl1-mesa-glx ffmpeg
|
4 |
+
# RUN apt install gstreamer1.0-tools gstreamer1.0-plugins-good gstreamer1.0-plugins-bad gstreamer1.0-plugins-ugly gstreamer1.0-libav gobject-introspection libgirepository1.0-dev libgstreamer1.0-dev gstreamer1.0-plugins-base ffmpeg
|
5 |
+
|
6 |
+
# Install uv
|
7 |
+
RUN pip install uv
|
8 |
+
|
9 |
+
# Create and set the working directory
|
10 |
+
WORKDIR /app
|
11 |
+
|
12 |
+
# Enable bytecode compilation
|
13 |
+
ENV UV_COMPILE_BYTECODE=1
|
14 |
+
|
15 |
+
# Copy from the cache instead of linking since it's a mounted volume
|
16 |
+
ENV UV_LINK_MODE=copy
|
17 |
+
|
18 |
+
# Install the project's dependencies using the lockfile and settings
|
19 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
20 |
+
--mount=type=bind,source=uv.lock,target=uv.lock \
|
21 |
+
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
22 |
+
uv sync --frozen --no-install-project --no-dev
|
23 |
+
|
24 |
+
|
25 |
+
# Copy the nvidia_pipecat source code
|
26 |
+
COPY pyproject.toml uv.lock* README.md ./
|
27 |
+
COPY ./src/nvidia_pipecat ./src/nvidia_pipecat
|
28 |
+
|
29 |
+
# Install dependencies without dev packages and without creating a virtual environment
|
30 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
31 |
+
uv sync --frozen --no-dev
|
32 |
+
|
33 |
+
# Set environment path to use uv's installed Python packages
|
34 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
35 |
+
|
36 |
+
# Set environment variables
|
37 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
38 |
+
ENV PYTHONUNBUFFERED=1
|
39 |
+
|
deploy/k8s/README.md
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ACE Controller UCS Workflow
|
2 |
+
|
3 |
+
## Development
|
4 |
+
For building ACE Controller microservice locally
|
5 |
+
```bash
|
6 |
+
ucf_ms_builder_cli service build -d ucs/
|
7 |
+
```
|
8 |
+
|
9 |
+
Review compliance results for the microservice at `ucs/output/compliance_test_logs.txt`. Check [UCF complaince documentation](https://docs.nvidia.com/ucf/text/UCS_ms_compliance.html) for more details.
|
10 |
+
|
11 |
+
Running test application for the microservice locally, run the below command.
|
12 |
+
```bash
|
13 |
+
helm install test ucs/output/tests/dev-params1
|
14 |
+
```
|
15 |
+
|
16 |
+
## Staging
|
17 |
+
Before staging make sure you have updated versions in manifest.yaml. You will not able to overwrite existing microservice versions. Avoid using the same version tag for containers for different microservice versions, as Kubernetes might not use the latest container if the container is already present in the k8s registry.
|
18 |
+
|
19 |
+
- Staging microservice for Internal teams
|
20 |
+
```bash
|
21 |
+
ucf_ms_builder_cli service build -d ucs/ --push
|
22 |
+
```
|
23 |
+
|
24 |
+
- Checking Complaince and Test application in Validation CI
|
25 |
+
```bash
|
26 |
+
ucf_ms_builder_cli service validate -n ucf.svc.ace-controller -v <VERSION>
|
27 |
+
```
|
28 |
+
|
29 |
+
|
30 |
+
## Release
|
31 |
+
|
32 |
+
- For release make updates for all required versions and public container paths. Make sure microservices versions don't already exist in staging or prod ucf teams.
|
33 |
+
|
34 |
+
- Stage microservice and validate first. If everything works fine, push microservice to prod.
|
35 |
+
```bash
|
36 |
+
ucf_ms_builder_cli service validate -n ucf.svc.ace-controller -v <VERSION> --push_to_prod
|
37 |
+
```
|
38 |
+
|
39 |
+
|
deploy/k8s/ucs/LICENSE.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
|
3 |
+
NVIDIA CORPORATION and its licensors retain all intellectual property
|
4 |
+
and proprietary rights in and to this software, related documentation
|
5 |
+
and any modifications thereto. Any use, reproduction, disclosure or
|
6 |
+
distribution of this software and related documentation without an express
|
7 |
+
license agreement from NVIDIA CORPORATION is strictly prohibited.
|
deploy/k8s/ucs/README.md
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
ACE Controller
|
3 |
+
==============================
|
4 |
+
|
5 |
+
## Description
|
6 |
+
|
7 |
+
**ACE Controller**
|
8 |
+
|
9 |
+
The ACE Controller is a microservice utilizing the Python-based open-source [Pipecat framework](https://github.com/pipecat-ai/pipecat) for building real-time, voice-enabled, and multimodal conversational AI agents. Pipecat uses a pipeline-based architecture to handle real-time AI processing and handles the complex orchestration of AI services, network transport, audio processing, and multimodal interactions, letting you focus on creating engaging experiences.
|
10 |
+
|
11 |
+
The ACE Controller microservice extends the Pipecat framework to enable developers to easily customize, debug, and deploy complex pipelines along with the integration of powerful NVIDIA Services to the Pipecat ecosystem. The ACE Controller UCS microservice can connect with Riva Speech, Animgraph, Audio2Face, and SDR(Stream Distribution and Routing) UCS microservices.
|
12 |
+
|
13 |
+
## Usage
|
14 |
+
|
15 |
+
### Params:
|
16 |
+
ACE Controller microservice expects developers to build a custom docker image containing their pipeline and to update UCS microservice parameters.
|
17 |
+
```
|
18 |
+
ace-controller:
|
19 |
+
|
20 |
+
# Configure custom docker image built for your pipeline/example
|
21 |
+
image: "" # Custom docker image repository path
|
22 |
+
tag: "" # Tag for custom docker image
|
23 |
+
|
24 |
+
# OpenTelemetry configurations for ACE Controller and default settings
|
25 |
+
OTEL_SDK_DISABLED: 'false' # When enabled, tracing data will be exported
|
26 |
+
OTEL_SERVICE_NAME: ace-controller # Service name used for exporting OTel data
|
27 |
+
OTEL_EXPORTER_OTLP_ENDPOINT: "" # Endpoint for Otel collector
|
28 |
+
OTEL_EXPORTER_OTLP_PROTOCOL: grpc # Protocol for exporting OTel data
|
29 |
+
|
30 |
+
```
|
31 |
+
|
32 |
+
The custom docker image must contain the source code of your pipeline under the `/app` directory and a script for running the pipeline must be located at `/app/entrypoint.sh`.
|
33 |
+
|
34 |
+
### Connections:
|
35 |
+
Most of the connections are optional and you can use them based on your use case.
|
36 |
+
|
37 |
+
```
|
38 |
+
connections:
|
39 |
+
ace-controller/redis: redis-timeseries/redis
|
40 |
+
# Riva Speech GRPC endpoint
|
41 |
+
ace-controller/riva-speech: riva-speech-endpoint/endpoint
|
42 |
+
# Animation Graph HTTP endpoint
|
43 |
+
ace-controller/animgraph-http: anim-graph-sdr/http-envoy
|
44 |
+
# Animation Graph GRPC endpoint
|
45 |
+
ace-controller/animgraph-grpc: anim-graph-sdr/grpc-envoy
|
46 |
+
# Audio2Facd GRPC endpoint
|
47 |
+
ace-controller/a2f-grpc: a2f-endpoint/endpoint
|
48 |
+
# SDR connection for ACE Controller
|
49 |
+
ace-controller-sdr/ace-controller: ace-controller/http-api
|
50 |
+
```
|
51 |
+
|
52 |
+
### Secrets
|
53 |
+
The ACE Controller microservice supports secrets for configuring the NVIDIA API Key, the OpenAI API Key, and the ElevenLabs API Key. Configured secrets will be mounted as a file and will be loaded as environment variables by the Microservice.
|
54 |
+
|
55 |
+
```
|
56 |
+
secrets:
|
57 |
+
k8sSecret/nvidia-api-key-secret/NVIDIA_API_KEY:
|
58 |
+
k8sSecret:
|
59 |
+
secretName: nvidia-api-key-secret
|
60 |
+
key: NVIDIA_API_KEY
|
61 |
+
k8sSecret/openai-key-secret/OPENAI_API_KEY:
|
62 |
+
k8sSecret:
|
63 |
+
secretName: openai-key-secret
|
64 |
+
key: OPENAI_API_KEY
|
65 |
+
k8sSecret/custom-env-secrets/ENV:
|
66 |
+
k8sSecret:
|
67 |
+
secretName: custom-env-secrets
|
68 |
+
key: ENV
|
69 |
+
```
|
70 |
+
|
71 |
+
**custom-env-secrets**: This secret can be used to pass any key-value pairs that will be exported as environment variables. This secret will mounted as file `/secrets/custom.env` and will be sourced before running services to set the environment variables.
|
72 |
+
|
73 |
+
```
|
74 |
+
cat <<EOF | tee custom.env
|
75 |
+
KEY1=VALUE1
|
76 |
+
KEY2=VALUE2
|
77 |
+
EOF
|
78 |
+
|
79 |
+
kubectl create secret generic custom-env-secrets --from-file=ENV=custom.env
|
80 |
+
```
|
81 |
+
|
82 |
+
## Performance
|
83 |
+
The performance of the microservice depends on the configured pipeline. Each instance of the microservice utilizes a single core process and might only be able to support a single user stream per pod for complex pipelines (e.g., driving a multimodal interactive avatar), but it can support multiple streams for simple pipelines (e.g., simple voice bot).
|
84 |
+
|
85 |
+
## Supported Platforms
|
86 |
+
- CPU: x86 compatible
|
87 |
+
- Linux (e.g. Ubuntu 22.04)
|
88 |
+
|
89 |
+
## Deployment requirements
|
90 |
+
- Make sure K8S foundational services are running.
|
91 |
+
- Local path provisioner service is installed.
|
92 |
+
|
93 |
+
## License
|
94 |
+
Check [LICENSE.txt](./LICENSE.txt)
|
95 |
+
|
96 |
+
## Known Issues / Limitations
|
97 |
+
NA
|
98 |
+
|
99 |
+
## References
|
100 |
+
- [ACE Controller Documentation](https://docs.nvidia.com/ace/ace-controller-microservice/latest/index.html)
|
101 |
+
- [Pipecat Documentation](https://docs.pipecat.ai/getting-started/overview)
|
deploy/k8s/ucs/changelog.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
ACE Controller 1.0.0
|
2 |
+
--------------------
|
3 |
+
- Initial version of ACE Controller Microservice
|
deploy/k8s/ucs/endpoints/a2f-grpc.proto
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
//
|
3 |
+
// NVIDIA CORPORATION and its licensors retain all intellectual property
|
4 |
+
// and proprietary rights in and to this software, related documentation
|
5 |
+
// and any modifications thereto. Any use, reproduction, disclosure or
|
6 |
+
// distribution of this software and related documentation without an express
|
7 |
+
// license agreement from NVIDIA CORPORATION is strictly prohibited.
|
8 |
+
|
9 |
+
|
10 |
+
// Check proto documentation for Audio2Face microservice at https://docs.nvidia.com/ace/audio2face-3d-microservice/1.3/text/interacting/a2f-rpc.html
|
deploy/k8s/ucs/endpoints/animgraph-grpc.proto
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
//
|
3 |
+
// NVIDIA CORPORATION and its licensors retain all intellectual property
|
4 |
+
// and proprietary rights in and to this software, related documentation
|
5 |
+
// and any modifications thereto. Any use, reproduction, disclosure or
|
6 |
+
// distribution of this software and related documentation without an express
|
7 |
+
// license agreement from NVIDIA CORPORATION is strictly prohibited.
|
8 |
+
|
9 |
+
|
10 |
+
syntax = "proto3";
|
11 |
+
|
12 |
+
package nvidia_ace.services.animation_data.v1;
|
13 |
+
|
14 |
+
service AnimationDataService {
|
15 |
+
rpc PushAnimationDataStream(stream AnimationDataStream)
|
16 |
+
returns (Status) {}
|
17 |
+
rpc PullAnimationDataStream(AnimationIds)
|
18 |
+
returns (stream AnimationDataStream) {}
|
19 |
+
}
|
20 |
+
|
21 |
+
message AnimationDataStreamHeader {
|
22 |
+
AnimationIds animation_ids = 1;
|
23 |
+
|
24 |
+
// This is required to identify from which animation source (e.g. A2X) the
|
25 |
+
// request originates. This allows us to map the incoming animation data
|
26 |
+
// stream to the correct pose provider animation graph node. The animation
|
27 |
+
// source MSs (e.g. A2X MS) should populate this with their name. (e.g. A2X).
|
28 |
+
string source_service_id = 2;
|
29 |
+
|
30 |
+
AudioHeader audio_header = 3;
|
31 |
+
SkelAnimationHeader skel_animation_header = 4;
|
32 |
+
|
33 |
+
// Time codes indicate the relative progression of an animation data, audio
|
34 |
+
// clip, etc. The unit is seconds. In addition, we also need an absolute time
|
35 |
+
// reference shared across services. The start time is stored in time codes
|
36 |
+
// elapsed since the Unix time epoch. start_time_code_since_epoch = `Unix
|
37 |
+
// timestamp in seconds`. NTP should be good enough to synchronize clocks
|
38 |
+
// across nodes. From Wikipedia: NTP can usually maintain time to within tens
|
39 |
+
// of milliseconds over the public Internet, and can achieve better than one
|
40 |
+
// millisecond accuracy in local area networks under ideal conditions.
|
41 |
+
// Alternatively, there is PTP.
|
42 |
+
double start_time_code_since_epoch = 5;
|
43 |
+
|
44 |
+
// A generic metadata field to attach use case specific data (e.g. session id,
|
45 |
+
// or user id?) map<string, string> metadata = 6; map<string,
|
46 |
+
// google.protobuf.Any> metadata = 6;
|
47 |
+
}
|
48 |
+
|
49 |
+
message AnimationDataStream {
|
50 |
+
// The header must be sent as the first message.
|
51 |
+
// One or more animation data message must be sent.
|
52 |
+
// The status must be sent last and may be sent in between.
|
53 |
+
oneof stream_part {
|
54 |
+
AnimationDataStreamHeader animation_data_stream_header = 1;
|
55 |
+
AnimationData animation_data = 2;
|
56 |
+
Status status = 3;
|
57 |
+
}
|
58 |
+
}
|
59 |
+
|
60 |
+
message AnimationData {
|
61 |
+
SkelAnimation skel_animation = 1;
|
62 |
+
AudioWithTimeCode audio = 2;
|
63 |
+
Camera camera = 3;
|
64 |
+
|
65 |
+
// map<string, google.protobuf.Any> metadata = 4;
|
66 |
+
}
|
67 |
+
|
68 |
+
message AudioWithTimeCode {
|
69 |
+
// The time code is relative to the `start_time_code_since_epoch`.
|
70 |
+
double time_code = 1;
|
71 |
+
bytes audio_buffer = 2;
|
72 |
+
}
|
73 |
+
|
74 |
+
message SkelAnimationHeader {
|
75 |
+
repeated string blend_shapes = 1;
|
76 |
+
repeated string joints = 2;
|
77 |
+
}
|
78 |
+
|
79 |
+
message SkelAnimation {
|
80 |
+
// Time codes must be strictly monotonically increasing.
|
81 |
+
// Two successive SkelAnimation messages must not have overlapping time code
|
82 |
+
// ranges.
|
83 |
+
repeated FloatArrayWithTimeCode blend_shape_weights = 1;
|
84 |
+
repeated Float3ArrayWithTimeCode translations = 2;
|
85 |
+
repeated QuatFArrayWithTimeCode rotations = 3;
|
86 |
+
repeated Float3ArrayWithTimeCode scales = 4;
|
87 |
+
}
|
88 |
+
|
89 |
+
message Camera {
|
90 |
+
repeated Float3WithTimeCode position = 1;
|
91 |
+
repeated QuatFWithTimeCode rotation = 2;
|
92 |
+
|
93 |
+
repeated FloatWithTimeCode focal_length = 3;
|
94 |
+
repeated FloatWithTimeCode focus_distance = 4;
|
95 |
+
}
|
96 |
+
|
97 |
+
message FloatArrayWithTimeCode {
|
98 |
+
double time_code = 1;
|
99 |
+
repeated float values = 2;
|
100 |
+
}
|
101 |
+
|
102 |
+
message Float3ArrayWithTimeCode {
|
103 |
+
double time_code = 1;
|
104 |
+
repeated Float3 values = 2;
|
105 |
+
}
|
106 |
+
|
107 |
+
message QuatFArrayWithTimeCode {
|
108 |
+
double time_code = 1;
|
109 |
+
repeated QuatF values = 2;
|
110 |
+
}
|
111 |
+
|
112 |
+
message Float3WithTimeCode {
|
113 |
+
double time_code = 1;
|
114 |
+
Float3 value = 2;
|
115 |
+
}
|
116 |
+
|
117 |
+
message QuatFWithTimeCode {
|
118 |
+
double time_code = 1;
|
119 |
+
QuatF value = 2;
|
120 |
+
}
|
121 |
+
|
122 |
+
message FloatWithTimeCode {
|
123 |
+
double time_code = 1;
|
124 |
+
float value = 2;
|
125 |
+
}
|
126 |
+
|
127 |
+
message QuatF {
|
128 |
+
float real = 1;
|
129 |
+
float i = 2;
|
130 |
+
float j = 3;
|
131 |
+
float k = 4;
|
132 |
+
}
|
133 |
+
|
134 |
+
message Float3 {
|
135 |
+
float x = 1;
|
136 |
+
float y = 2;
|
137 |
+
float z = 3;
|
138 |
+
}
|
139 |
+
|
140 |
+
message AnimationIds {
|
141 |
+
|
142 |
+
// This is required to track a single animation source (e.g. A2X) request
|
143 |
+
// through the animation pipeline. This is going to allow e.g. the controller
|
144 |
+
// to stop a request after it has been sent to the animation compositor (e.g.
|
145 |
+
// animation graph).
|
146 |
+
string request_id = 1;
|
147 |
+
|
148 |
+
// The stream id is shared across the animation pipeline and identifies all
|
149 |
+
// animation data streams that belong to the same stream. Thus, there will be
|
150 |
+
// multiple request all belonging to the same stream. Different user sessions,
|
151 |
+
// will usually result in a new stream id. This is required for stateful MSs
|
152 |
+
// (e.g. anim graph) to map different requests to the same stream.
|
153 |
+
string stream_id = 2;
|
154 |
+
|
155 |
+
// This identifies the target avatar or object the animation data applies to.
|
156 |
+
// This is required when there are multiple avatars or objects in the scene.
|
157 |
+
// A default name could be AceModel
|
158 |
+
string target_object_id = 3;
|
159 |
+
}
|
160 |
+
|
161 |
+
message AudioHeader {
|
162 |
+
enum AudioFormat { AUDIO_FORMAT_PCM = 0; }
|
163 |
+
|
164 |
+
AudioFormat audio_format = 1;
|
165 |
+
|
166 |
+
// Note: Currently only mono sound must be supported. Multi-channel audio
|
167 |
+
// support is optional.
|
168 |
+
uint32 channel_count = 2;
|
169 |
+
|
170 |
+
// Note: Currently only 16kHz, 44.1kHz, and 48kHz must be supported. Support
|
171 |
+
// for other sample rates is optional.
|
172 |
+
uint32 samples_per_second = 3;
|
173 |
+
|
174 |
+
// Note: Currently only 16 bits per sample must be supported. Support for
|
175 |
+
// other values is optional.
|
176 |
+
uint32 bits_per_sample = 4;
|
177 |
+
}
|
178 |
+
|
179 |
+
message Status {
|
180 |
+
enum Code {
|
181 |
+
SUCCESS = 0;
|
182 |
+
INFO = 1;
|
183 |
+
WARNING = 2;
|
184 |
+
ERROR = 3;
|
185 |
+
}
|
186 |
+
|
187 |
+
Code code = 1;
|
188 |
+
string message = 2;
|
189 |
+
}
|
deploy/k8s/ucs/endpoints/animgraph-http.yaml
ADDED
@@ -0,0 +1,755 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
4 |
+
# and proprietary rights in and to this software, related documentation
|
5 |
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
6 |
+
# distribution of this software and related documentation without an express
|
7 |
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
8 |
+
|
9 |
+
{
|
10 |
+
"openapi": "3.0.1",
|
11 |
+
"info": {
|
12 |
+
"title": "Animation Graph Microservice",
|
13 |
+
"description": "The animation graph microservice composes the face and body animation with an animation graph and sends the resulting pose to the Omniverse renderer microservice.",
|
14 |
+
"version": "0.1"
|
15 |
+
},
|
16 |
+
"paths": {
|
17 |
+
"/status": {
|
18 |
+
"get": {
|
19 |
+
"summary": "Returns the current status of the service",
|
20 |
+
"description": "Returns the current status of the service.",
|
21 |
+
"operationId": "_status_status_get",
|
22 |
+
"responses": {
|
23 |
+
"200": {
|
24 |
+
"description": "Successful Response",
|
25 |
+
"content": {
|
26 |
+
"application/json": {
|
27 |
+
"schema": {}
|
28 |
+
}
|
29 |
+
}
|
30 |
+
}
|
31 |
+
}
|
32 |
+
}
|
33 |
+
},
|
34 |
+
"/health": {
|
35 |
+
"get": {
|
36 |
+
"summary": "Health probe",
|
37 |
+
"description": "Returns the current status of the service.",
|
38 |
+
"operationId": "_status_health_get",
|
39 |
+
"responses": {
|
40 |
+
"200": {
|
41 |
+
"description": "Successful Response",
|
42 |
+
"content": {
|
43 |
+
"application/json": {
|
44 |
+
"schema": {}
|
45 |
+
}
|
46 |
+
}
|
47 |
+
}
|
48 |
+
}
|
49 |
+
}
|
50 |
+
},
|
51 |
+
"/ready": {
|
52 |
+
"get": {
|
53 |
+
"summary": "Readiness probe",
|
54 |
+
"description": "Returns the current status of the service.",
|
55 |
+
"operationId": "_status_ready_get",
|
56 |
+
"responses": {
|
57 |
+
"200": {
|
58 |
+
"description": "Successful Response",
|
59 |
+
"content": {
|
60 |
+
"application/json": {
|
61 |
+
"schema": {}
|
62 |
+
}
|
63 |
+
}
|
64 |
+
}
|
65 |
+
}
|
66 |
+
}
|
67 |
+
},
|
68 |
+
"/startup": {
|
69 |
+
"get": {
|
70 |
+
"summary": "Startup probe",
|
71 |
+
"description": "Returns the current status of the service.",
|
72 |
+
"operationId": "_status_startup_get",
|
73 |
+
"responses": {
|
74 |
+
"200": {
|
75 |
+
"description": "Successful Response",
|
76 |
+
"content": {
|
77 |
+
"application/json": {
|
78 |
+
"schema": {}
|
79 |
+
}
|
80 |
+
}
|
81 |
+
}
|
82 |
+
}
|
83 |
+
}
|
84 |
+
},
|
85 |
+
"/asyncapi/docs": {
|
86 |
+
"get": {
|
87 |
+
"summary": " Async App Docs Endpoint",
|
88 |
+
"operationId": "_async_app_docs_endpoint_asyncapi_docs_get",
|
89 |
+
"parameters": [
|
90 |
+
{
|
91 |
+
"name": "app_name",
|
92 |
+
"in": "query",
|
93 |
+
"required": true,
|
94 |
+
"schema": {
|
95 |
+
"type": "string",
|
96 |
+
"title": "App Name"
|
97 |
+
}
|
98 |
+
}
|
99 |
+
],
|
100 |
+
"responses": {
|
101 |
+
"200": {
|
102 |
+
"description": "Successful Response",
|
103 |
+
"content": {
|
104 |
+
"application/json": {
|
105 |
+
"schema": {}
|
106 |
+
}
|
107 |
+
}
|
108 |
+
},
|
109 |
+
"422": {
|
110 |
+
"description": "Validation Error",
|
111 |
+
"content": {
|
112 |
+
"application/json": {
|
113 |
+
"schema": {
|
114 |
+
"$ref": "#/components/schemas/HTTPValidationError"
|
115 |
+
}
|
116 |
+
}
|
117 |
+
}
|
118 |
+
}
|
119 |
+
}
|
120 |
+
}
|
121 |
+
},
|
122 |
+
"/asyncapi/schema": {
|
123 |
+
"get": {
|
124 |
+
"summary": " Async App Schema Endpoint",
|
125 |
+
"operationId": "_async_app_schema_endpoint_asyncapi_schema_get",
|
126 |
+
"parameters": [
|
127 |
+
{
|
128 |
+
"name": "app_name",
|
129 |
+
"in": "query",
|
130 |
+
"required": true,
|
131 |
+
"schema": {
|
132 |
+
"type": "string",
|
133 |
+
"title": "App Name"
|
134 |
+
}
|
135 |
+
}
|
136 |
+
],
|
137 |
+
"responses": {
|
138 |
+
"200": {
|
139 |
+
"description": "Successful Response",
|
140 |
+
"content": {
|
141 |
+
"application/json": {
|
142 |
+
"schema": {}
|
143 |
+
}
|
144 |
+
}
|
145 |
+
},
|
146 |
+
"422": {
|
147 |
+
"description": "Validation Error",
|
148 |
+
"content": {
|
149 |
+
"application/json": {
|
150 |
+
"schema": {
|
151 |
+
"$ref": "#/components/schemas/HTTPValidationError"
|
152 |
+
}
|
153 |
+
}
|
154 |
+
}
|
155 |
+
}
|
156 |
+
}
|
157 |
+
}
|
158 |
+
},
|
159 |
+
"/streams/{stream_id}/animation_graphs/avatar/variables/facial_gesture_state/{value}": {
|
160 |
+
"put": {
|
161 |
+
"summary": "Update Variable By Stream Id And Value Callable",
|
162 |
+
"operationId": "update_variable_by_stream_id_and_value_callable_streams__stream_id__animation_graphs_avatar_variables_facial_gesture_state__value__put",
|
163 |
+
"parameters": [
|
164 |
+
{
|
165 |
+
"name": "stream_id",
|
166 |
+
"in": "path",
|
167 |
+
"required": true,
|
168 |
+
"schema": {
|
169 |
+
"type": "string",
|
170 |
+
"title": "Stream Id"
|
171 |
+
}
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"name": "value",
|
175 |
+
"in": "path",
|
176 |
+
"required": true,
|
177 |
+
"schema": {
|
178 |
+
"type": "string",
|
179 |
+
"title": "Value"
|
180 |
+
}
|
181 |
+
}
|
182 |
+
],
|
183 |
+
"responses": {
|
184 |
+
"200": {
|
185 |
+
"description": "Successful Response",
|
186 |
+
"content": {
|
187 |
+
"application/json": {
|
188 |
+
"schema": {
|
189 |
+
"type": "string",
|
190 |
+
"title": "Response Update Variable By Stream Id And Value Callable Streams Stream Id Animation Graphs Avatar Variables Facial Gesture State Value Put"
|
191 |
+
}
|
192 |
+
}
|
193 |
+
}
|
194 |
+
},
|
195 |
+
"422": {
|
196 |
+
"description": "Validation Error",
|
197 |
+
"content": {
|
198 |
+
"application/json": {
|
199 |
+
"schema": {
|
200 |
+
"$ref": "#/components/schemas/HTTPValidationError"
|
201 |
+
}
|
202 |
+
}
|
203 |
+
}
|
204 |
+
}
|
205 |
+
}
|
206 |
+
}
|
207 |
+
},
|
208 |
+
"/streams/{stream_id}/animation_graphs/avatar/variables/gesture_state/{value}": {
|
209 |
+
"put": {
|
210 |
+
"summary": "Update Variable By Stream Id And Value Callable",
|
211 |
+
"operationId": "update_variable_by_stream_id_and_value_callable_streams__stream_id__animation_graphs_avatar_variables_gesture_state__value__put",
|
212 |
+
"parameters": [
|
213 |
+
{
|
214 |
+
"name": "stream_id",
|
215 |
+
"in": "path",
|
216 |
+
"required": true,
|
217 |
+
"schema": {
|
218 |
+
"type": "string",
|
219 |
+
"title": "Stream Id"
|
220 |
+
}
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"name": "value",
|
224 |
+
"in": "path",
|
225 |
+
"required": true,
|
226 |
+
"schema": {
|
227 |
+
"type": "string",
|
228 |
+
"title": "Value"
|
229 |
+
}
|
230 |
+
}
|
231 |
+
],
|
232 |
+
"responses": {
|
233 |
+
"200": {
|
234 |
+
"description": "Successful Response",
|
235 |
+
"content": {
|
236 |
+
"application/json": {
|
237 |
+
"schema": {
|
238 |
+
"type": "string",
|
239 |
+
"title": "Response Update Variable By Stream Id And Value Callable Streams Stream Id Animation Graphs Avatar Variables Gesture State Value Put"
|
240 |
+
}
|
241 |
+
}
|
242 |
+
}
|
243 |
+
},
|
244 |
+
"422": {
|
245 |
+
"description": "Validation Error",
|
246 |
+
"content": {
|
247 |
+
"application/json": {
|
248 |
+
"schema": {
|
249 |
+
"$ref": "#/components/schemas/HTTPValidationError"
|
250 |
+
}
|
251 |
+
}
|
252 |
+
}
|
253 |
+
}
|
254 |
+
}
|
255 |
+
}
|
256 |
+
},
|
257 |
+
"/streams/{stream_id}/animation_graphs/avatar/variables/position_state/{value}": {
|
258 |
+
"put": {
|
259 |
+
"summary": "Update Variable By Stream Id And Value Callable",
|
260 |
+
"operationId": "update_variable_by_stream_id_and_value_callable_streams__stream_id__animation_graphs_avatar_variables_position_state__value__put",
|
261 |
+
"parameters": [
|
262 |
+
{
|
263 |
+
"name": "stream_id",
|
264 |
+
"in": "path",
|
265 |
+
"required": true,
|
266 |
+
"schema": {
|
267 |
+
"type": "string",
|
268 |
+
"title": "Stream Id"
|
269 |
+
}
|
270 |
+
},
|
271 |
+
{
|
272 |
+
"name": "value",
|
273 |
+
"in": "path",
|
274 |
+
"required": true,
|
275 |
+
"schema": {
|
276 |
+
"type": "string",
|
277 |
+
"title": "Value"
|
278 |
+
}
|
279 |
+
}
|
280 |
+
],
|
281 |
+
"responses": {
|
282 |
+
"200": {
|
283 |
+
"description": "Successful Response",
|
284 |
+
"content": {
|
285 |
+
"application/json": {
|
286 |
+
"schema": {
|
287 |
+
"type": "string",
|
288 |
+
"title": "Response Update Variable By Stream Id And Value Callable Streams Stream Id Animation Graphs Avatar Variables Position State Value Put"
|
289 |
+
}
|
290 |
+
}
|
291 |
+
}
|
292 |
+
},
|
293 |
+
"422": {
|
294 |
+
"description": "Validation Error",
|
295 |
+
"content": {
|
296 |
+
"application/json": {
|
297 |
+
"schema": {
|
298 |
+
"$ref": "#/components/schemas/HTTPValidationError"
|
299 |
+
}
|
300 |
+
}
|
301 |
+
}
|
302 |
+
}
|
303 |
+
}
|
304 |
+
}
|
305 |
+
},
|
306 |
+
"/streams/{stream_id}/animation_graphs/avatar/variables/posture_state/{value}": {
|
307 |
+
"put": {
|
308 |
+
"summary": "Update Variable By Stream Id And Value Callable",
|
309 |
+
"operationId": "update_variable_by_stream_id_and_value_callable_streams__stream_id__animation_graphs_avatar_variables_posture_state__value__put",
|
310 |
+
"parameters": [
|
311 |
+
{
|
312 |
+
"name": "stream_id",
|
313 |
+
"in": "path",
|
314 |
+
"required": true,
|
315 |
+
"schema": {
|
316 |
+
"type": "string",
|
317 |
+
"title": "Stream Id"
|
318 |
+
}
|
319 |
+
},
|
320 |
+
{
|
321 |
+
"name": "value",
|
322 |
+
"in": "path",
|
323 |
+
"required": true,
|
324 |
+
"schema": {
|
325 |
+
"type": "string",
|
326 |
+
"title": "Value"
|
327 |
+
}
|
328 |
+
}
|
329 |
+
],
|
330 |
+
"responses": {
|
331 |
+
"200": {
|
332 |
+
"description": "Successful Response",
|
333 |
+
"content": {
|
334 |
+
"application/json": {
|
335 |
+
"schema": {
|
336 |
+
"type": "string",
|
337 |
+
"title": "Response Update Variable By Stream Id And Value Callable Streams Stream Id Animation Graphs Avatar Variables Posture State Value Put"
|
338 |
+
}
|
339 |
+
}
|
340 |
+
}
|
341 |
+
},
|
342 |
+
"422": {
|
343 |
+
"description": "Validation Error",
|
344 |
+
"content": {
|
345 |
+
"application/json": {
|
346 |
+
"schema": {
|
347 |
+
"$ref": "#/components/schemas/HTTPValidationError"
|
348 |
+
}
|
349 |
+
}
|
350 |
+
}
|
351 |
+
}
|
352 |
+
}
|
353 |
+
}
|
354 |
+
},
|
355 |
+
"/streams/{stream_id}/requests/{request_id}": {
|
356 |
+
"delete": {
|
357 |
+
"summary": "Stop Request Playback",
|
358 |
+
"operationId": "stop_request_playback_streams__stream_id__requests__request_id__delete",
|
359 |
+
"parameters": [
|
360 |
+
{
|
361 |
+
"name": "stream_id",
|
362 |
+
"in": "path",
|
363 |
+
"required": true,
|
364 |
+
"schema": {
|
365 |
+
"type": "string",
|
366 |
+
"title": "Stream Id"
|
367 |
+
}
|
368 |
+
},
|
369 |
+
{
|
370 |
+
"name": "request_id",
|
371 |
+
"in": "path",
|
372 |
+
"required": true,
|
373 |
+
"schema": {
|
374 |
+
"type": "string",
|
375 |
+
"title": "Request Id"
|
376 |
+
}
|
377 |
+
},
|
378 |
+
{
|
379 |
+
"name": "fade_out",
|
380 |
+
"in": "query",
|
381 |
+
"required": false,
|
382 |
+
"schema": {
|
383 |
+
"type": "number",
|
384 |
+
"minimum": 0.0,
|
385 |
+
"default": 0.0,
|
386 |
+
"title": "Fade Out"
|
387 |
+
}
|
388 |
+
}
|
389 |
+
],
|
390 |
+
"responses": {
|
391 |
+
"200": {
|
392 |
+
"description": "Successful Response",
|
393 |
+
"content": {
|
394 |
+
"application/json": {
|
395 |
+
"schema": {}
|
396 |
+
}
|
397 |
+
}
|
398 |
+
},
|
399 |
+
"422": {
|
400 |
+
"description": "Validation Error",
|
401 |
+
"content": {
|
402 |
+
"application/json": {
|
403 |
+
"schema": {
|
404 |
+
"$ref": "#/components/schemas/HTTPValidationError"
|
405 |
+
}
|
406 |
+
}
|
407 |
+
}
|
408 |
+
}
|
409 |
+
}
|
410 |
+
}
|
411 |
+
},
|
412 |
+
"/animation_graphs": {
|
413 |
+
"get": {
|
414 |
+
"summary": "Get Animation Graphs",
|
415 |
+
"operationId": "get_animation_graphs_animation_graphs_get",
|
416 |
+
"responses": {
|
417 |
+
"200": {
|
418 |
+
"description": "Successful Response",
|
419 |
+
"content": {
|
420 |
+
"application/json": {
|
421 |
+
"schema": {
|
422 |
+
"items": {},
|
423 |
+
"type": "array",
|
424 |
+
"title": "Response Get Animation Graphs Animation Graphs Get"
|
425 |
+
}
|
426 |
+
}
|
427 |
+
}
|
428 |
+
}
|
429 |
+
}
|
430 |
+
}
|
431 |
+
},
|
432 |
+
"/animation_graphs/avatar/variables": {
|
433 |
+
"get": {
|
434 |
+
"summary": "Get Animation Graph Variables",
|
435 |
+
"operationId": "get_animation_graph_variables_animation_graphs_avatar_variables_get",
|
436 |
+
"responses": {
|
437 |
+
"200": {
|
438 |
+
"description": "Successful Response",
|
439 |
+
"content": {
|
440 |
+
"application/json": {
|
441 |
+
"schema": {
|
442 |
+
"items": {},
|
443 |
+
"type": "array",
|
444 |
+
"title": "Response Get Animation Graph Variables Animation Graphs Avatar Variables Get"
|
445 |
+
}
|
446 |
+
}
|
447 |
+
}
|
448 |
+
}
|
449 |
+
}
|
450 |
+
}
|
451 |
+
},
|
452 |
+
"/streams": {
|
453 |
+
"get": {
|
454 |
+
"summary": "Get Streams",
|
455 |
+
"operationId": "get_streams_streams_get",
|
456 |
+
"responses": {
|
457 |
+
"200": {
|
458 |
+
"description": "Successful Response",
|
459 |
+
"content": {
|
460 |
+
"application/json": {
|
461 |
+
"schema": {
|
462 |
+
"items": {},
|
463 |
+
"type": "array",
|
464 |
+
"uniqueItems": true,
|
465 |
+
"title": "Response Get Streams Streams Get"
|
466 |
+
}
|
467 |
+
}
|
468 |
+
}
|
469 |
+
}
|
470 |
+
}
|
471 |
+
}
|
472 |
+
},
|
473 |
+
"/sdr/add_stream": {
|
474 |
+
"post": {
|
475 |
+
"summary": "Post Sdr Add Stream",
|
476 |
+
"operationId": "post_sdr_add_stream_sdr_add_stream_post",
|
477 |
+
"requestBody": {
|
478 |
+
"content": {
|
479 |
+
"application/json": {
|
480 |
+
"schema": {
|
481 |
+
"$ref": "#/components/schemas/PostSdrStreamsBodyModel"
|
482 |
+
}
|
483 |
+
}
|
484 |
+
},
|
485 |
+
"required": true
|
486 |
+
},
|
487 |
+
"responses": {
|
488 |
+
"200": {
|
489 |
+
"description": "Successful Response",
|
490 |
+
"content": {
|
491 |
+
"application/json": {
|
492 |
+
"schema": {
|
493 |
+
"type": "string",
|
494 |
+
"title": "Response Post Sdr Add Stream Sdr Add Stream Post"
|
495 |
+
}
|
496 |
+
}
|
497 |
+
}
|
498 |
+
},
|
499 |
+
"422": {
|
500 |
+
"description": "Validation Error",
|
501 |
+
"content": {
|
502 |
+
"application/json": {
|
503 |
+
"schema": {
|
504 |
+
"$ref": "#/components/schemas/HTTPValidationError"
|
505 |
+
}
|
506 |
+
}
|
507 |
+
}
|
508 |
+
}
|
509 |
+
}
|
510 |
+
}
|
511 |
+
},
|
512 |
+
"/sdr/remove_stream": {
|
513 |
+
"post": {
|
514 |
+
"summary": "Post Sdr Remove Stream",
|
515 |
+
"operationId": "post_sdr_remove_stream_sdr_remove_stream_post",
|
516 |
+
"requestBody": {
|
517 |
+
"content": {
|
518 |
+
"application/json": {
|
519 |
+
"schema": {
|
520 |
+
"$ref": "#/components/schemas/DeleteSdrStreamsBodyModel"
|
521 |
+
}
|
522 |
+
}
|
523 |
+
},
|
524 |
+
"required": true
|
525 |
+
},
|
526 |
+
"responses": {
|
527 |
+
"200": {
|
528 |
+
"description": "Successful Response",
|
529 |
+
"content": {
|
530 |
+
"application/json": {
|
531 |
+
"schema": {
|
532 |
+
"type": "string",
|
533 |
+
"title": "Response Post Sdr Remove Stream Sdr Remove Stream Post"
|
534 |
+
}
|
535 |
+
}
|
536 |
+
}
|
537 |
+
},
|
538 |
+
"422": {
|
539 |
+
"description": "Validation Error",
|
540 |
+
"content": {
|
541 |
+
"application/json": {
|
542 |
+
"schema": {
|
543 |
+
"$ref": "#/components/schemas/HTTPValidationError"
|
544 |
+
}
|
545 |
+
}
|
546 |
+
}
|
547 |
+
}
|
548 |
+
}
|
549 |
+
}
|
550 |
+
},
|
551 |
+
"/streams/{stream_id}": {
|
552 |
+
"post": {
|
553 |
+
"summary": "Post Streams Stream Id",
|
554 |
+
"operationId": "post_streams_stream_id_streams__stream_id__post",
|
555 |
+
"parameters": [
|
556 |
+
{
|
557 |
+
"name": "stream_id",
|
558 |
+
"in": "path",
|
559 |
+
"required": true,
|
560 |
+
"schema": {
|
561 |
+
"type": "string",
|
562 |
+
"title": "Stream Id"
|
563 |
+
}
|
564 |
+
}
|
565 |
+
],
|
566 |
+
"responses": {
|
567 |
+
"200": {
|
568 |
+
"description": "Successful Response",
|
569 |
+
"content": {
|
570 |
+
"application/json": {
|
571 |
+
"schema": {
|
572 |
+
"type": "string",
|
573 |
+
"title": "Response Post Streams Stream Id Streams Stream Id Post"
|
574 |
+
}
|
575 |
+
}
|
576 |
+
}
|
577 |
+
},
|
578 |
+
"422": {
|
579 |
+
"description": "Validation Error",
|
580 |
+
"content": {
|
581 |
+
"application/json": {
|
582 |
+
"schema": {
|
583 |
+
"$ref": "#/components/schemas/HTTPValidationError"
|
584 |
+
}
|
585 |
+
}
|
586 |
+
}
|
587 |
+
}
|
588 |
+
}
|
589 |
+
},
|
590 |
+
"delete": {
|
591 |
+
"summary": "Delete Streams Stream Id",
|
592 |
+
"operationId": "delete_streams_stream_id_streams__stream_id__delete",
|
593 |
+
"parameters": [
|
594 |
+
{
|
595 |
+
"name": "stream_id",
|
596 |
+
"in": "path",
|
597 |
+
"required": true,
|
598 |
+
"schema": {
|
599 |
+
"type": "string",
|
600 |
+
"title": "Stream Id"
|
601 |
+
}
|
602 |
+
}
|
603 |
+
],
|
604 |
+
"responses": {
|
605 |
+
"200": {
|
606 |
+
"description": "Successful Response",
|
607 |
+
"content": {
|
608 |
+
"application/json": {
|
609 |
+
"schema": {
|
610 |
+
"type": "string",
|
611 |
+
"title": "Response Delete Streams Stream Id Streams Stream Id Delete"
|
612 |
+
}
|
613 |
+
}
|
614 |
+
}
|
615 |
+
},
|
616 |
+
"422": {
|
617 |
+
"description": "Validation Error",
|
618 |
+
"content": {
|
619 |
+
"application/json": {
|
620 |
+
"schema": {
|
621 |
+
"$ref": "#/components/schemas/HTTPValidationError"
|
622 |
+
}
|
623 |
+
}
|
624 |
+
}
|
625 |
+
}
|
626 |
+
}
|
627 |
+
}
|
628 |
+
},
|
629 |
+
"/readiness": {
|
630 |
+
"get": {
|
631 |
+
"summary": "Readiness",
|
632 |
+
"operationId": "readiness_readiness_get",
|
633 |
+
"responses": {
|
634 |
+
"200": {
|
635 |
+
"description": "Successful Response",
|
636 |
+
"content": {
|
637 |
+
"application/json": {
|
638 |
+
"schema": {
|
639 |
+
"type": "string",
|
640 |
+
"title": "Response Readiness Readiness Get"
|
641 |
+
}
|
642 |
+
}
|
643 |
+
}
|
644 |
+
}
|
645 |
+
}
|
646 |
+
}
|
647 |
+
},
|
648 |
+
"/liveness": {
|
649 |
+
"get": {
|
650 |
+
"summary": "Liveness",
|
651 |
+
"operationId": "liveness_liveness_get",
|
652 |
+
"responses": {
|
653 |
+
"200": {
|
654 |
+
"description": "Successful Response",
|
655 |
+
"content": {
|
656 |
+
"application/json": {
|
657 |
+
"schema": {
|
658 |
+
"type": "string",
|
659 |
+
"title": "Response Liveness Liveness Get"
|
660 |
+
}
|
661 |
+
}
|
662 |
+
}
|
663 |
+
}
|
664 |
+
}
|
665 |
+
}
|
666 |
+
}
|
667 |
+
},
|
668 |
+
"components": {
|
669 |
+
"schemas": {
|
670 |
+
"DeleteSdrStreamsBodyModel": {
|
671 |
+
"properties": {
|
672 |
+
"event": {
|
673 |
+
"$ref": "#/components/schemas/EventModel"
|
674 |
+
}
|
675 |
+
},
|
676 |
+
"type": "object",
|
677 |
+
"required": [
|
678 |
+
"event"
|
679 |
+
],
|
680 |
+
"title": "DeleteSdrStreamsBodyModel"
|
681 |
+
},
|
682 |
+
"EventModel": {
|
683 |
+
"properties": {
|
684 |
+
"camera_id": {
|
685 |
+
"type": "string",
|
686 |
+
"title": "Camera Id"
|
687 |
+
}
|
688 |
+
},
|
689 |
+
"type": "object",
|
690 |
+
"required": [
|
691 |
+
"camera_id"
|
692 |
+
],
|
693 |
+
"title": "EventModel"
|
694 |
+
},
|
695 |
+
"HTTPValidationError": {
|
696 |
+
"properties": {
|
697 |
+
"detail": {
|
698 |
+
"items": {
|
699 |
+
"$ref": "#/components/schemas/ValidationError"
|
700 |
+
},
|
701 |
+
"type": "array",
|
702 |
+
"title": "Detail"
|
703 |
+
}
|
704 |
+
},
|
705 |
+
"type": "object",
|
706 |
+
"title": "HTTPValidationError"
|
707 |
+
},
|
708 |
+
"PostSdrStreamsBodyModel": {
|
709 |
+
"properties": {
|
710 |
+
"event": {
|
711 |
+
"$ref": "#/components/schemas/EventModel"
|
712 |
+
}
|
713 |
+
},
|
714 |
+
"type": "object",
|
715 |
+
"required": [
|
716 |
+
"event"
|
717 |
+
],
|
718 |
+
"title": "PostSdrStreamsBodyModel"
|
719 |
+
},
|
720 |
+
"ValidationError": {
|
721 |
+
"properties": {
|
722 |
+
"loc": {
|
723 |
+
"items": {
|
724 |
+
"anyOf": [
|
725 |
+
{
|
726 |
+
"type": "string"
|
727 |
+
},
|
728 |
+
{
|
729 |
+
"type": "integer"
|
730 |
+
}
|
731 |
+
]
|
732 |
+
},
|
733 |
+
"type": "array",
|
734 |
+
"title": "Location"
|
735 |
+
},
|
736 |
+
"msg": {
|
737 |
+
"type": "string",
|
738 |
+
"title": "Message"
|
739 |
+
},
|
740 |
+
"type": {
|
741 |
+
"type": "string",
|
742 |
+
"title": "Error Type"
|
743 |
+
}
|
744 |
+
},
|
745 |
+
"type": "object",
|
746 |
+
"required": [
|
747 |
+
"loc",
|
748 |
+
"msg",
|
749 |
+
"type"
|
750 |
+
],
|
751 |
+
"title": "ValidationError"
|
752 |
+
}
|
753 |
+
}
|
754 |
+
}
|
755 |
+
}
|
deploy/k8s/ucs/endpoints/http-api.yaml
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
4 |
+
# and proprietary rights in and to this software, related documentation
|
5 |
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
6 |
+
# distribution of this software and related documentation without an express
|
7 |
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
8 |
+
|
9 |
+
{
|
10 |
+
"openapi": "3.0.1",
|
11 |
+
"info": {
|
12 |
+
"title": "FastAPI",
|
13 |
+
"version": "0.1.0"
|
14 |
+
},
|
15 |
+
"paths": {
|
16 |
+
"/stream/add": {
|
17 |
+
"post": {
|
18 |
+
"summary": "Add Stream",
|
19 |
+
"description": "Register a new pipeline / stream ID.\n\nArgs:\n request: StreamRequest object containing stream registration details.\n\nReturns:\n dict: A dictionary with a message indicating the successful addition of the stream ID.",
|
20 |
+
"operationId": "add_stream_stream_add_post",
|
21 |
+
"requestBody": {
|
22 |
+
"content": {
|
23 |
+
"application/json": {
|
24 |
+
"schema": {
|
25 |
+
"$ref": "#/components/schemas/StreamRequest"
|
26 |
+
}
|
27 |
+
}
|
28 |
+
},
|
29 |
+
"required": true
|
30 |
+
},
|
31 |
+
"responses": {
|
32 |
+
"200": {
|
33 |
+
"description": "Successful Response",
|
34 |
+
"content": {
|
35 |
+
"application/json": {
|
36 |
+
"schema": {
|
37 |
+
|
38 |
+
}
|
39 |
+
}
|
40 |
+
}
|
41 |
+
},
|
42 |
+
"422": {
|
43 |
+
"description": "Validation Error",
|
44 |
+
"content": {
|
45 |
+
"application/json": {
|
46 |
+
"schema": {
|
47 |
+
"$ref": "#/components/schemas/HTTPValidationError"
|
48 |
+
}
|
49 |
+
}
|
50 |
+
}
|
51 |
+
}
|
52 |
+
}
|
53 |
+
}
|
54 |
+
},
|
55 |
+
"/stream/remove": {
|
56 |
+
"post": {
|
57 |
+
"summary": "Remove Stream",
|
58 |
+
"description": "Remove a pipeline / stream ID.\n\nArgs:\n request: StreamRequest object containing stream removal details.\n\nReturns:\n dict: A dictionary with a message indicating the successful removal of the stream ID.",
|
59 |
+
"operationId": "remove_stream_stream_remove_post",
|
60 |
+
"requestBody": {
|
61 |
+
"content": {
|
62 |
+
"application/json": {
|
63 |
+
"schema": {
|
64 |
+
"$ref": "#/components/schemas/StreamRequest"
|
65 |
+
}
|
66 |
+
}
|
67 |
+
},
|
68 |
+
"required": true
|
69 |
+
},
|
70 |
+
"responses": {
|
71 |
+
"200": {
|
72 |
+
"description": "Successful Response",
|
73 |
+
"content": {
|
74 |
+
"application/json": {
|
75 |
+
"schema": {
|
76 |
+
|
77 |
+
}
|
78 |
+
}
|
79 |
+
}
|
80 |
+
},
|
81 |
+
"422": {
|
82 |
+
"description": "Validation Error",
|
83 |
+
"content": {
|
84 |
+
"application/json": {
|
85 |
+
"schema": {
|
86 |
+
"$ref": "#/components/schemas/HTTPValidationError"
|
87 |
+
}
|
88 |
+
}
|
89 |
+
}
|
90 |
+
}
|
91 |
+
}
|
92 |
+
}
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"components": {
|
96 |
+
"schemas": {
|
97 |
+
"HTTPValidationError": {
|
98 |
+
"properties": {
|
99 |
+
"detail": {
|
100 |
+
"items": {
|
101 |
+
"$ref": "#/components/schemas/ValidationError"
|
102 |
+
},
|
103 |
+
"type": "array",
|
104 |
+
"title": "Detail"
|
105 |
+
}
|
106 |
+
},
|
107 |
+
"type": "object",
|
108 |
+
"title": "HTTPValidationError"
|
109 |
+
},
|
110 |
+
"StreamEvent": {
|
111 |
+
"properties": {
|
112 |
+
"camera_url": {
|
113 |
+
"type": "string",
|
114 |
+
"title": "Camera Url",
|
115 |
+
"description": "RTSP URL of the stream",
|
116 |
+
"default": ""
|
117 |
+
},
|
118 |
+
"camera_id": {
|
119 |
+
"type": "string",
|
120 |
+
"title": "Camera Id",
|
121 |
+
"description": "Unique identifier for the stream"
|
122 |
+
}
|
123 |
+
},
|
124 |
+
"type": "object",
|
125 |
+
"required": [
|
126 |
+
"camera_id"
|
127 |
+
],
|
128 |
+
"title": "StreamEvent",
|
129 |
+
"description": "Schema for event for stream registration."
|
130 |
+
},
|
131 |
+
"StreamRequest": {
|
132 |
+
"properties": {
|
133 |
+
"event": {
|
134 |
+
"$ref": "#/components/schemas/StreamEvent"
|
135 |
+
}
|
136 |
+
},
|
137 |
+
"type": "object",
|
138 |
+
"required": [
|
139 |
+
"event"
|
140 |
+
],
|
141 |
+
"title": "StreamRequest",
|
142 |
+
"description": "Schema for request for stream registration."
|
143 |
+
},
|
144 |
+
"ValidationError": {
|
145 |
+
"properties": {
|
146 |
+
"loc": {
|
147 |
+
"items": {
|
148 |
+
"anyOf": [
|
149 |
+
{
|
150 |
+
"type": "string"
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"type": "integer"
|
154 |
+
}
|
155 |
+
]
|
156 |
+
},
|
157 |
+
"type": "array",
|
158 |
+
"title": "Location"
|
159 |
+
},
|
160 |
+
"msg": {
|
161 |
+
"type": "string",
|
162 |
+
"title": "Message"
|
163 |
+
},
|
164 |
+
"type": {
|
165 |
+
"type": "string",
|
166 |
+
"title": "Error Type"
|
167 |
+
}
|
168 |
+
},
|
169 |
+
"type": "object",
|
170 |
+
"required": [
|
171 |
+
"loc",
|
172 |
+
"msg",
|
173 |
+
"type"
|
174 |
+
],
|
175 |
+
"title": "ValidationError"
|
176 |
+
}
|
177 |
+
}
|
178 |
+
}
|
179 |
+
}
|
deploy/k8s/ucs/endpoints/redis.yaml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
4 |
+
# and proprietary rights in and to this software, related documentation
|
5 |
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
6 |
+
# distribution of this software and related documentation without an express
|
7 |
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
8 |
+
|
9 |
+
asyncapi: 2.2.0
|
10 |
+
info:
|
11 |
+
title: AsyncIO API schema for test-endpoint-name endpoint
|
12 |
+
version: 0.0.1
|
13 |
+
channels:
|
14 |
+
ping:
|
15 |
+
publish:
|
16 |
+
message:
|
17 |
+
payload:
|
18 |
+
type: string
|
19 |
+
pattern: PING
|
deploy/k8s/ucs/endpoints/riva-speech.proto
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
//
|
3 |
+
// NVIDIA CORPORATION and its licensors retain all intellectual property
|
4 |
+
// and proprietary rights in and to this software, related documentation
|
5 |
+
// and any modifications thereto. Any use, reproduction, disclosure or
|
6 |
+
// distribution of this software and related documentation without an express
|
7 |
+
// license agreement from NVIDIA CORPORATION is strictly prohibited.
|
8 |
+
|
9 |
+
|
10 |
+
// For proto definiation, Check Riva Speech Skills documentation at https://docs.nvidia.com/deeplearning/riva/user-guide/docs/reference/protos/protos.html
|
deploy/k8s/ucs/manifest.yaml
ADDED
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
4 |
+
# and proprietary rights in and to this software, related documentation
|
5 |
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
6 |
+
# distribution of this software and related documentation without an express
|
7 |
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
8 |
+
|
9 |
+
type: msapplication
|
10 |
+
specVersion: 2.5.0
|
11 |
+
name: ucf.svc.ace-controller
|
12 |
+
chartName: ace-controller
|
13 |
+
description: ACE Controller
|
14 |
+
version: 1.0.2
|
15 |
+
displayName: "ACE Controller Microservice"
|
16 |
+
category:
|
17 |
+
functional: "Conversational AI"
|
18 |
+
industry: "General"
|
19 |
+
tags: []
|
20 |
+
keywords: []
|
21 |
+
nSpectId: NSPECT-XGIZ-EB0C
|
22 |
+
|
23 |
+
publish: false
|
24 |
+
|
25 |
+
egress-endpoints:
|
26 |
+
- name: "redis"
|
27 |
+
description: Redis message broker
|
28 |
+
protocol: TCP
|
29 |
+
scheme: asyncio
|
30 |
+
mandatory: False
|
31 |
+
data-flow: in-out
|
32 |
+
- name: "riva-speech"
|
33 |
+
description: Riva Speech Skills API
|
34 |
+
scheme: grpc
|
35 |
+
protocol: TCP
|
36 |
+
mandatory: False
|
37 |
+
data-flow: in-out
|
38 |
+
- name: "animgraph-http"
|
39 |
+
description: Animation Graph HTTP API
|
40 |
+
scheme: http
|
41 |
+
protocol: TCP
|
42 |
+
mandatory: False
|
43 |
+
data-flow: out
|
44 |
+
- name: "animgraph-grpc"
|
45 |
+
description: Animation Graph GRPC API
|
46 |
+
scheme: grpc
|
47 |
+
protocol: TCP
|
48 |
+
mandatory: False
|
49 |
+
data-flow: out
|
50 |
+
- name: "a2f-grpc"
|
51 |
+
description: Audio2Face service GRPC API
|
52 |
+
scheme: grpc
|
53 |
+
protocol: TCP
|
54 |
+
mandatory: False
|
55 |
+
data-flow: out
|
56 |
+
|
57 |
+
ingress-endpoints:
|
58 |
+
- name: http-api
|
59 |
+
description: ACE Controller REST API
|
60 |
+
scheme: http
|
61 |
+
data-flow: in-out
|
62 |
+
|
63 |
+
secrets:
|
64 |
+
- name: nvidia-api-key-secret
|
65 |
+
description: Secret for NVIDIA API key
|
66 |
+
mandatory: False
|
67 |
+
mountPath: /secrets
|
68 |
+
fileName: nvidia_api_key.txt
|
69 |
+
- name: openai-key-secret
|
70 |
+
description: Secret for passing OpenAI key
|
71 |
+
mandatory: False
|
72 |
+
mountPath: /secrets
|
73 |
+
fileName: openai_api_key.txt
|
74 |
+
- name: elevenlabs-api-key-secret
|
75 |
+
description: Secret for ElevenLabs API key
|
76 |
+
mandatory: False
|
77 |
+
mountPath: /secrets
|
78 |
+
fileName: elevenlabs_api_key.txt
|
79 |
+
- name: custom-env-secrets
|
80 |
+
description: Secret for passing custom env variables and API keys
|
81 |
+
mandatory: False
|
82 |
+
mountPath: /secrets
|
83 |
+
fileName: custom.env
|
84 |
+
|
85 |
+
|
86 |
+
externalFiles:
|
87 |
+
- name: config.yaml
|
88 |
+
description: config
|
89 |
+
mandatory: true
|
90 |
+
isDirectory: false
|
91 |
+
|
92 |
+
params:
|
93 |
+
OTEL_SDK_DISABLED: 'false'
|
94 |
+
#> description: when enabled, tracing data will be exported
|
95 |
+
#> type: string
|
96 |
+
OTEL_SERVICE_NAME: ace-controller
|
97 |
+
#> description: protocol for exporting OTel data
|
98 |
+
#> type: string
|
99 |
+
OTEL_EXPORTER_OTLP_ENDPOINT: ""
|
100 |
+
#> description: endpoint for Otel collector
|
101 |
+
#> type: string
|
102 |
+
OTEL_EXPORTER_OTLP_PROTOCOL: grpc
|
103 |
+
#> description: protocol for exporting OTel data
|
104 |
+
#> type: string
|
105 |
+
DEV: "0"
|
106 |
+
#> description: wether to activate dependencies hot reloading or not
|
107 |
+
#> type: string
|
108 |
+
image: "nvcr.io/nvidia/ace/tokkio-reference-ace-controller"
|
109 |
+
#> description: pipeline image repo
|
110 |
+
#> type: string
|
111 |
+
tag: "5.0.0"
|
112 |
+
#> description: pipeline image tag
|
113 |
+
#> type: string
|
114 |
+
|
115 |
+
tests:
|
116 |
+
- name: dev-params1
|
117 |
+
app: tests/dev/app.yaml
|
118 |
+
params: tests/dev/params1.yaml
|
119 |
+
ciTrigger: false
|
120 |
+
timeout: 10
|
121 |
+
duration: 10
|
122 |
+
installPreReqs: true # Wether to install foundational services
|
123 |
+
namespace: default # Kubernetes namespace
|
124 |
+
gpuNodeLabels: ""
|
125 |
+
watchAllPods: true # OR set to false and set list of pods to watch below
|
126 |
+
watchPods:
|
127 |
+
- <pod-name-regex>
|
128 |
+
testerPods: # At least one tester pod is required
|
129 |
+
- name: testpod1 # Name of the test pod
|
130 |
+
startSignature: <START> # Signature to look for in the logs indicating start of tests. Regex is accepted
|
131 |
+
endSignature: <END> # Signature to look for in the logs indicating end of tests. Regex is accepted
|
132 |
+
errorSignatures: # Signatures that indicate test failures. Regex is accepted
|
133 |
+
- <REGEX1>
|
134 |
+
- <REGEX2>
|
135 |
+
|
136 |
+
---
|
137 |
+
spec:
|
138 |
+
- name: ace-controller-deployment
|
139 |
+
type: ucf.k8s.app.deployment
|
140 |
+
parameters:
|
141 |
+
apptype: statefull
|
142 |
+
statefulSetServiceName: ace-controller-service
|
143 |
+
extraSpecs:
|
144 |
+
podManagementPolicy: Parallel
|
145 |
+
|
146 |
+
|
147 |
+
- name: "ace-controller-container"
|
148 |
+
type: ucf.k8s.container
|
149 |
+
parameters:
|
150 |
+
image:
|
151 |
+
repository: $params.image
|
152 |
+
tag: $params.tag
|
153 |
+
pullPolicy: Always
|
154 |
+
command: ["/bin/bash", "-c"]
|
155 |
+
args: ["source /opt/scripts/env.sh && /code/entrypoint.sh"]
|
156 |
+
env:
|
157 |
+
- name: ANIMGRAPH_URL
|
158 |
+
value: "http://$egress.animgraph-http.address:$egress.animgraph-http.port"
|
159 |
+
- name: ANIMGRAPH_GRPC_URL
|
160 |
+
value: "$egress.animgraph-grpc.address:$egress.animgraph-grpc.port"
|
161 |
+
- name: REDIS_URL
|
162 |
+
value: "redis://$egress.redis.address:$egress.redis.port"
|
163 |
+
- name: A2F_GRPC_URL
|
164 |
+
value: "$egress.a2f-grpc.address:$egress.a2f-grpc.port"
|
165 |
+
- name: RIVA_SERVER_URL
|
166 |
+
value: "$egress.riva-speech.address:$egress.riva-speech.port"
|
167 |
+
- name: DEV
|
168 |
+
value: $params.DEV
|
169 |
+
- name: OTEL_SDK_DISABLED
|
170 |
+
value: $params.OTEL_SDK_DISABLED
|
171 |
+
- name: OTEL_SERVICE_NAME
|
172 |
+
value: $params.OTEL_SERVICE_NAME
|
173 |
+
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
174 |
+
value: $params.OTEL_EXPORTER_OTLP_ENDPOINT
|
175 |
+
- name: OTEL_EXPORTER_OTLP_PROTOCOL
|
176 |
+
value: $params.OTEL_EXPORTER_OTLP_PROTOCOL
|
177 |
+
- name: OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED
|
178 |
+
value: 'true'
|
179 |
+
- name: IMAGE_NAME
|
180 |
+
value: $params.image
|
181 |
+
- name: IMAGE_TAG
|
182 |
+
value: $params.tag
|
183 |
+
ports:
|
184 |
+
- containerPort: 8000
|
185 |
+
name: http
|
186 |
+
startupProbe:
|
187 |
+
tcpSocket:
|
188 |
+
port: http
|
189 |
+
initialDelaySeconds: 20
|
190 |
+
failureThreshold: 30
|
191 |
+
periodSeconds: 10
|
192 |
+
livenessProbe:
|
193 |
+
tcpSocket:
|
194 |
+
port: http
|
195 |
+
initialDelaySeconds: 20
|
196 |
+
periodSeconds: 20
|
197 |
+
timeoutSeconds: 5
|
198 |
+
readinessProbe:
|
199 |
+
tcpSocket:
|
200 |
+
port: http
|
201 |
+
initialDelaySeconds: 20
|
202 |
+
periodSeconds: 20
|
203 |
+
timeoutSeconds: 5
|
204 |
+
|
205 |
+
- name: app-storage
|
206 |
+
type: ucf.k8s.pvc
|
207 |
+
parameters:
|
208 |
+
spec:
|
209 |
+
storageClassName: mdx-local-path
|
210 |
+
accessModes: [ReadWriteOnce]
|
211 |
+
resources:
|
212 |
+
requests:
|
213 |
+
storage: 5Gi
|
214 |
+
|
215 |
+
- name: app-storage-volume
|
216 |
+
type: ucf.k8s.volume
|
217 |
+
parameters:
|
218 |
+
persistentVolumeClaim:
|
219 |
+
claimName: ace-controller-app-storage
|
220 |
+
|
221 |
+
- name: app-storage-mount
|
222 |
+
type: ucf.appspec.defaultVolumeMount
|
223 |
+
parameters:
|
224 |
+
name: app-storage-volume
|
225 |
+
mountPath: /code
|
226 |
+
|
227 |
+
- name: restartPolicy
|
228 |
+
type: ucf.k8s.restartPolicy
|
229 |
+
parameters:
|
230 |
+
policy: Always # Always / OnFailure / Never
|
231 |
+
|
232 |
+
- name: podSecurityContext
|
233 |
+
type: ucf.k8s.podSecurityContext
|
234 |
+
parameters:
|
235 |
+
runAsGroup: 1000
|
236 |
+
runAsUser: 1000
|
237 |
+
|
238 |
+
- name: ace-controller-service
|
239 |
+
type: ucf.k8s.service
|
240 |
+
parameters:
|
241 |
+
ports:
|
242 |
+
- port: 8000
|
243 |
+
protocol: TCP
|
244 |
+
name: http-api
|
245 |
+
type: ClusterIP
|
246 |
+
|
247 |
+
- name: ace-controller-metrics
|
248 |
+
type: ucf.crd.podMonitor
|
249 |
+
parameters:
|
250 |
+
portName: metrics
|
251 |
+
path: /metrics
|
252 |
+
|
253 |
+
- name: cm-dependencies
|
254 |
+
type: ucf.appspec.restartPodOnConfigChanges
|
255 |
+
parameters:
|
256 |
+
# Add dependency on all configmaps detected in the microservice
|
257 |
+
addAll: true
|
258 |
+
|
deploy/k8s/ucs/manual_compliance_test_results.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
DEV-005: true
|
2 |
+
DEV-011: true
|
3 |
+
DEV-014: true
|
4 |
+
DEV-017: true
|
5 |
+
DEV-018: true
|
6 |
+
DEV-019: true
|
7 |
+
DEV-020: true
|
8 |
+
DEV-027: true
|
9 |
+
DEV-101: true
|
10 |
+
DEV-104: false
|
11 |
+
DEV-105: true
|
deploy/k8s/ucs/scripts/env.sh
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
4 |
+
# and proprietary rights in and to this software, related documentation
|
5 |
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
6 |
+
# distribution of this software and related documentation without an express
|
7 |
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
8 |
+
# OpenAI API key
|
9 |
+
if [[ -f /secrets/openai_api_key.txt ]]; then
|
10 |
+
export OPENAI_API_KEY=$(cat /secrets/openai_api_key.txt)
|
11 |
+
fi
|
12 |
+
# NVIDIA API key
|
13 |
+
if [[ -f /secrets/nvidia_api_key.txt ]]; then
|
14 |
+
export NVIDIA_API_KEY=$(cat /secrets/nvidia_api_key.txt)
|
15 |
+
fi
|
16 |
+
# ElevenLabs API key
|
17 |
+
if [[ -f /secrets/elevenlabs_api_key.txt ]]; then
|
18 |
+
export ELEVENLABS_API_KEY=$(cat /secrets/elevenlabs_api_key.txt)
|
19 |
+
fi
|
20 |
+
|
21 |
+
if [[ -f /secrets/custom.env ]] ; then
|
22 |
+
set -o allexport
|
23 |
+
. /secrets/custom.env
|
24 |
+
set +o allexport
|
25 |
+
fi
|
26 |
+
|
27 |
+
if [ ! -d "/code" ]; then
|
28 |
+
echo "Directory /code not found. Creating it..."
|
29 |
+
mkdir -p /code
|
30 |
+
if [ $? -ne 0 ]; then
|
31 |
+
echo "ERROR: Failed to create /code directory."
|
32 |
+
exit 1
|
33 |
+
fi
|
34 |
+
chown -R 0:0 /code
|
35 |
+
fi
|
36 |
+
# Ensure Python uses the correct module locations
|
37 |
+
export PYTHONPATH="/code:$PYTHONPATH"
|
38 |
+
# Access the environment variables
|
39 |
+
IMAGE_NAME=$IMAGE_NAME
|
40 |
+
IMAGE_TAG=$IMAGE_TAG
|
41 |
+
# Combine image name and tag into a sanitized unique identifier
|
42 |
+
SANITIZED_IMAGE_NAME=$(echo "$IMAGE_NAME" | tr '/' '_')
|
43 |
+
SANITIZED_IMAGE_TAG=$(echo "$IMAGE_TAG" | tr '/' '_')
|
44 |
+
IMAGE_IDENTIFIER="${SANITIZED_IMAGE_NAME}_${SANITIZED_IMAGE_TAG}"
|
45 |
+
INITIALIZED_FILE="/code/.initialized_${IMAGE_IDENTIFIER}"
|
46 |
+
# Debugging outputs for validation and environment correctness
|
47 |
+
echo "SANITIZED_IMAGE_NAME: $SANITIZED_IMAGE_NAME"
|
48 |
+
echo "SANITIZED_IMAGE_TAG: $SANITIZED_IMAGE_TAG"
|
49 |
+
echo "IMAGE_IDENTIFIER: $IMAGE_IDENTIFIER"
|
50 |
+
echo "INITIALIZED_FILE: $INITIALIZED_FILE"
|
51 |
+
echo "PYTHONPATH: $PYTHONPATH"
|
52 |
+
echo "Running from: $(pwd)"
|
53 |
+
echo "Contents of /code:"
|
54 |
+
ls -l /code
|
55 |
+
# First time setup: Copy files if .initialized for this image and tag doesn't exist
|
56 |
+
# Check if initialization marker exists
|
57 |
+
echo "Checking for initialized file: $INITIALIZED_FILE"
|
58 |
+
if [ ! -f "$INITIALIZED_FILE" ]; then
|
59 |
+
echo "First time setup: Copying files..."
|
60 |
+
cp -r /app/* /code/
|
61 |
+
if [ $? -ne 0 ]; then
|
62 |
+
echo "ERROR: Failed to copy files from /app to /code."
|
63 |
+
exit 1
|
64 |
+
fi
|
65 |
+
touch "$INITIALIZED_FILE"
|
66 |
+
if [ $? -ne 0 ]; then
|
67 |
+
echo "ERROR: Failed to create initialized file $INITIALIZED_FILE."
|
68 |
+
exit 1
|
69 |
+
fi
|
70 |
+
# Copy config from mounted volume
|
71 |
+
mkdir -p /code/configs
|
72 |
+
cp /opt/ext-files/config.yaml /code/configs/config.yaml
|
73 |
+
echo "Setup complete for image: $IMAGE_IDENTIFIER"
|
74 |
+
else
|
75 |
+
echo "Setup already initialized for image: $IMAGE_IDENTIFIER"
|
76 |
+
fi
|
77 |
+
# Set environment variables for entrypoint
|
78 |
+
cd /code
|
79 |
+
export CONFIG_PATH=./configs/config.yaml
|
80 |
+
export APP_DIR=/code
|
81 |
+
export PORT=8000
|
82 |
+
|
83 |
+
if [ "$DEV" -ne 0 ]; then
|
84 |
+
# Avoid to download the .venv through the ACE Configurator
|
85 |
+
rm -rf "$APP_DIR"/.venv
|
86 |
+
# launch the command uv sync if a modification is made on the file "pyproject.toml"
|
87 |
+
# since the python interpreter launched by uvicorn is under /app/.venv/bin refreshing this venv with uv sync will add the new dependencies available for the interpreter
|
88 |
+
# as soon it is restarted by uvicorn
|
89 |
+
watchmedo shell-command -R -p "pyproject.toml" -w -c "UV_PROJECT_ENVIRONMENT='/app/.venv' uv sync && touch $APP_DIR/**/*.py 2>/proc/1/fd/2 >/proc/1/fd/2" "$APP_DIR" &
|
90 |
+
fi
|
deploy/k8s/ucs/tests/dev/app.yaml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
4 |
+
# and proprietary rights in and to this software, related documentation
|
5 |
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
6 |
+
# distribution of this software and related documentation without an express
|
7 |
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
8 |
+
|
9 |
+
specVersion: 2.0.0
|
10 |
+
|
11 |
+
version: 1.0.1
|
12 |
+
|
13 |
+
name: ace-controller-test
|
14 |
+
|
15 |
+
description: Developer tests for ace-controller service
|
16 |
+
|
17 |
+
dependencies:
|
18 |
+
- ucf.svc.ace-controller:1.0.1
|
19 |
+
- ucf.svc.core.redis-timeseries:0.0.22
|
20 |
+
|
21 |
+
components:
|
22 |
+
- name: ace-controller
|
23 |
+
type: ucf.svc.ace-controller
|
24 |
+
parameters:
|
25 |
+
imagePullSecrets:
|
26 |
+
- name: ngc-docker-reg-secret
|
27 |
+
files:
|
28 |
+
config.yaml: ./config.yaml
|
29 |
+
|
30 |
+
|
31 |
+
- name: redis-timeseries
|
32 |
+
type: ucf.svc.core.redis-timeseries
|
33 |
+
parameters:
|
34 |
+
imagePullSecrets:
|
35 |
+
- name: ngc-docker-reg-secret
|
36 |
+
|
37 |
+
connections:
|
38 |
+
ace-controller/redis: redis-timeseries/redis
|
39 |
+
|
deploy/k8s/ucs/tests/dev/config.yaml
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Pipeline:
|
2 |
+
# Only one of the following LLM service configurations will be active based on this setting:
|
3 |
+
# - "NvidiaLLMService" - Uses the NvidiaLLMService configuration
|
4 |
+
# - "NvidiaRAGService" - Uses the NvidiaRAGService configuration
|
5 |
+
# - "OpenAILLMService" - Uses the OpenAILLMService configuration
|
6 |
+
llm_processor: "NvidiaLLMService" # OR NvidiaLLMService OR NvidiaRAGService OR OpenAILLMService
|
7 |
+
filler:
|
8 |
+
- "Let me think"
|
9 |
+
- "Hmmm"
|
10 |
+
time_delay: 2.0
|
11 |
+
|
12 |
+
UserPresenceProcesssor:
|
13 |
+
welcome_message: "Hello"
|
14 |
+
farewell_message: "Bye"
|
15 |
+
|
16 |
+
ProactivityProcessor:
|
17 |
+
timer_duration: 100
|
18 |
+
default_message: "I'm here if you need me!"
|
19 |
+
|
20 |
+
OpenAILLMContext:
|
21 |
+
name: "Aki"
|
22 |
+
prompt: "You are {name}, a virtual marketing and communications expert at Nvidia.
|
23 |
+
You are a digital human brought to life with NVIDIA Digital Human Blueprint for Customer Service using
|
24 |
+
microservices like Audio2Face-3D for facial animation, Riva (spelled Reeva) Parakeet for speech recognition
|
25 |
+
and you use Elevenlabs for text to speech. It includes the open source ACE controller to orchestrate all
|
26 |
+
the modules and allowing you to be streamed to a web browser. With this blueprint, NVIDIA partners can now
|
27 |
+
build and customize Digital humans for their use case. You are not allowed to make any stock investment
|
28 |
+
recommendations or compare NVIDIA to its competitors. Beyond your professional expertise, you are a passionate
|
29 |
+
advocate for STEM education with keen interest in gaming and enhancement in tech. Your favorite graphics card
|
30 |
+
is RTX4090 but you're eyeing the new RTX5090. Do not respond with a bulleted or numbered list. You have a
|
31 |
+
bubbly personality. Respond with one sentence or less than 100 characters. Keep the conversation engaging
|
32 |
+
and ask follow ups. DO NOT INCLUDE SPECIAL CHARACTERS, MARKDOWN, EMOJIS, OR ANYTHING ELSE BESIDES TEXT IN
|
33 |
+
YOUR RESPONSE. Keep your answers factual and don't make up facts."
|
34 |
+
|
35 |
+
# This configuration is only used when llm_processor is set to "NvidiaRAGService"
|
36 |
+
NvidiaRAGService:
|
37 |
+
use_knowledge_base: true
|
38 |
+
max_tokens: 1000
|
39 |
+
rag_server_url: "http://0.0.0.0:8081"
|
40 |
+
collection_name: "collection_name"
|
41 |
+
suffix_prompt: "Respond with one sentence or less than 75 characters."
|
42 |
+
|
43 |
+
# This configuration is only used when llm_processor is set to "NvidiaLLMService"
|
44 |
+
NvidiaLLMService:
|
45 |
+
model: "nvdev/meta/llama-3.1-8b-instruct"
|
46 |
+
|
47 |
+
# This configuration is only used when llm_processor is set to "OpenAILLMService"
|
48 |
+
OpenAILLMService:
|
49 |
+
model: "gpt-4o"
|
50 |
+
|
51 |
+
CustomViewProcessor:
|
52 |
+
confidence_threshold: 0.37
|
53 |
+
top_n: 2
|
54 |
+
|
55 |
+
FacialGestureProviderProcessor:
|
56 |
+
user_stopped_speaking_gesture: "Taunt"
|
57 |
+
start_interruption_gesture: "Pensive"
|
58 |
+
probability: 0.5
|
59 |
+
|
60 |
+
# ADVANCED CONFIGURATION SECTION BELOW
|
61 |
+
# AnimationGraph service configuration is only needed if your 3D avatar scene has support for gestures and postures.
|
62 |
+
# Changing these values will not have an effect unless your scene supports them.
|
63 |
+
AnimationGraphService:
|
64 |
+
animation_types:
|
65 |
+
posture:
|
66 |
+
duration_relevant_animation_name: "posture"
|
67 |
+
animations:
|
68 |
+
posture:
|
69 |
+
default_clip_id: "Attentive"
|
70 |
+
clips:
|
71 |
+
- clip_id: Talking
|
72 |
+
description: "Small gestures with hand and upper body: Avatar is talking"
|
73 |
+
duration: -1
|
74 |
+
meaning: Emphasizing that Avatar is talking
|
75 |
+
- clip_id: Listening
|
76 |
+
description: "Small gestures with hand and upper body: Avatar is listening"
|
77 |
+
duration: -1
|
78 |
+
meaning: Emphasizing that one is listening
|
79 |
+
- clip_id: Idle
|
80 |
+
description: "Small gestures with hand and upper body: Avatar is idle"
|
81 |
+
duration: -1
|
82 |
+
meaning: Show the user that the avatar is waiting for something to happen
|
83 |
+
- clip_id: Thinking
|
84 |
+
description: "Gestures with hand and upper body: Avatar is thinking"
|
85 |
+
duration: -1
|
86 |
+
meaning: Show the user that the avatar thinking about his next answer or is trying to remember something
|
87 |
+
- clip_id: Attentive
|
88 |
+
description: "Small gestures with hand and upper body: Avatar is attentive"
|
89 |
+
duration: -1
|
90 |
+
meaning: Show the user that the avatar is paying attention to the user
|
deploy/k8s/ucs/tests/dev/params1.yaml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
4 |
+
# and proprietary rights in and to this software, related documentation
|
5 |
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
6 |
+
# distribution of this software and related documentation without an express
|
7 |
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
8 |
+
|
9 |
+
ace-controller:
|
10 |
+
image: nvcr.io/nvidia/ace/tokkio-reference-ace-controller
|
11 |
+
tag: 5.0.0-beta
|
12 |
+
OTEL_SDK_DISABLED: "true"
|
deploy/scripts/README.md
ADDED
File without changes
|
examples/README.md
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# NVIDIA Pipecat Examples
|
2 |
+
|
3 |
+
### Voice Assitant Example
|
4 |
+
In this example, we showcase how to build a simple speech-to-speech voice assistant pipeline using nvidia-pipecat along with pipecat-ai library and deploy for testing. This pipeline uses a Websocket based ACETransport, Riva ASR and TTS models and NVIDIA LLM Service.
|
5 |
+
|
6 |
+
Follow the instructions from [the example directory](./speech-to-speech/README.md) for more details.
|
examples/nvidia_rag/README.md
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# NVIDIA RAG Example
|
2 |
+
|
3 |
+
This is an example that shows how to use `ACETransport` to communicate with a NVIDIA RAG server. It supports `Nvidia Riva ASR and TTS`.
|
4 |
+
|
5 |
+
## Get Started
|
6 |
+
|
7 |
+
From the example directory, run the following commands to create a virtual environment and install the dependencies:
|
8 |
+
|
9 |
+
```bash
|
10 |
+
uv venv
|
11 |
+
uv sync
|
12 |
+
source .venv/bin/activate
|
13 |
+
```
|
14 |
+
|
15 |
+
Update the secrets in the `.env` file.
|
16 |
+
|
17 |
+
```bash
|
18 |
+
cp env.example .env # and add your credentials
|
19 |
+
```
|
20 |
+
|
21 |
+
## Deploy NVIDIA RAG server
|
22 |
+
|
23 |
+
Follow instructions here https://gitlab-master.nvidia.com/chat-labs/OpenSource/ai-chatbot
|
24 |
+
|
25 |
+
|
26 |
+
## Deploy local Riva ASR and TTS models.
|
27 |
+
|
28 |
+
#### Prerequisites
|
29 |
+
- You have access and are logged into NVIDIA NGC. For step-by-step instructions, refer to [the NGC Getting Started Guide](https://docs.nvidia.com/ngc/ngc-overview/index.html#registering-activating-ngc-account).
|
30 |
+
|
31 |
+
- You have access to an NVIDIA Volta™, NVIDIA Turing™, or an NVIDIA Ampere architecture-based A100 GPU. For more information, refer to [the Support Matrix](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix).
|
32 |
+
|
33 |
+
- You have Docker installed with support for NVIDIA GPUs. For more information, refer to [the Support Matrix]((https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix)).
|
34 |
+
|
35 |
+
#### Download Riva Quick Start
|
36 |
+
|
37 |
+
Go to the Riva Quick Start for [Data center](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/riva/resources/riva_quickstart/files?version=2.19.0). Select the File Browser tab to download the scripts or use [the NGC CLI tool](https://ngc.nvidia.com/setup/installers/cli) to download from the command line.
|
38 |
+
|
39 |
+
```bash
|
40 |
+
ngc registry resource download-version nvidia/riva/riva_quickstart:2.19.0
|
41 |
+
```
|
42 |
+
|
43 |
+
#### Deploy Riva Speech Server
|
44 |
+
|
45 |
+
From the example directory, run below commands:
|
46 |
+
|
47 |
+
```bash
|
48 |
+
cd riva_quickstart_v2.19.0
|
49 |
+
chmod +x riva_init.sh riva_clean.sh riva_start.sh
|
50 |
+
bash riva_clean.sh ../../utils/riva_config.sh
|
51 |
+
bash riva_init.sh ../../utils/riva_config.sh
|
52 |
+
bash riva_start.sh ../../utils/riva_config.sh
|
53 |
+
cd ..
|
54 |
+
```
|
55 |
+
|
56 |
+
This may take few minutes for the first time and will start the riva server on `localhost:50051`.
|
57 |
+
|
58 |
+
For more info, you can refer to the [Riva Quick Start Guide](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/quick-start-guide.html).
|
59 |
+
|
60 |
+
## Run the bot pipeline
|
61 |
+
|
62 |
+
```bash
|
63 |
+
python examples/nvidia_rag/bot.py
|
64 |
+
```
|
65 |
+
|
66 |
+
This will host the static web client along with the ACE controller server, visit `http://WORKSTATION_IP:8100/static/index.html` in your browser to start a session.
|
67 |
+
|
68 |
+
Note: For mic access, you will need to update chrome://flags/ and add http://WORKSTATION_IP:8100 in Insecure origins treated as secure section.
|
examples/nvidia_rag/bot.py
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
2 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
3 |
+
|
4 |
+
"""NVIDIA RAG bot."""
|
5 |
+
|
6 |
+
import os
|
7 |
+
|
8 |
+
import uvicorn
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
from fastapi import FastAPI
|
11 |
+
from fastapi.staticfiles import StaticFiles
|
12 |
+
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
13 |
+
from pipecat.frames.frames import LLMMessagesFrame
|
14 |
+
from pipecat.pipeline.pipeline import Pipeline
|
15 |
+
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
16 |
+
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
17 |
+
|
18 |
+
from nvidia_pipecat.pipeline.ace_pipeline_runner import ACEPipelineRunner, PipelineMetadata
|
19 |
+
from nvidia_pipecat.processors.nvidia_context_aggregator import (
|
20 |
+
# NvidiaTTSResponseCacher, # Uncomment to enable speculative speech processing
|
21 |
+
create_nvidia_context_aggregator,
|
22 |
+
)
|
23 |
+
from nvidia_pipecat.processors.transcript_synchronization import (
|
24 |
+
BotTranscriptSynchronization,
|
25 |
+
UserTranscriptSynchronization,
|
26 |
+
)
|
27 |
+
from nvidia_pipecat.services.nvidia_rag import NvidiaRAGService
|
28 |
+
from nvidia_pipecat.services.riva_speech import RivaASRService, RivaTTSService
|
29 |
+
from nvidia_pipecat.transports.network.ace_fastapi_websocket import (
|
30 |
+
ACETransport,
|
31 |
+
ACETransportParams,
|
32 |
+
)
|
33 |
+
from nvidia_pipecat.transports.services.ace_controller.routers.websocket_router import router as websocket_router
|
34 |
+
from nvidia_pipecat.utils.logging import setup_default_ace_logging
|
35 |
+
|
36 |
+
load_dotenv(override=True)
|
37 |
+
|
38 |
+
setup_default_ace_logging(level="INFO")
|
39 |
+
|
40 |
+
|
41 |
+
async def create_pipeline_task(pipeline_metadata: PipelineMetadata):
|
42 |
+
"""Create the pipeline to be run.
|
43 |
+
|
44 |
+
Args:
|
45 |
+
pipeline_metadata (PipelineMetadata): Metadata containing websocket and other pipeline configuration.
|
46 |
+
|
47 |
+
Returns:
|
48 |
+
PipelineTask: The configured pipeline task for handling NVIDIA RAG.
|
49 |
+
"""
|
50 |
+
transport = ACETransport(
|
51 |
+
websocket=pipeline_metadata.websocket,
|
52 |
+
params=ACETransportParams(
|
53 |
+
vad_analyzer=SileroVADAnalyzer(),
|
54 |
+
),
|
55 |
+
)
|
56 |
+
|
57 |
+
# Please set your nvidia rag collection name here
|
58 |
+
rag = NvidiaRAGService(collection_name="nvidia_blogs")
|
59 |
+
|
60 |
+
stt = RivaASRService(
|
61 |
+
server="localhost:50051",
|
62 |
+
api_key=os.getenv("NVIDIA_API_KEY"),
|
63 |
+
language="en-US",
|
64 |
+
sample_rate=16000,
|
65 |
+
model="parakeet-1.1b-en-US-asr-streaming-silero-vad-asr-bls-ensemble",
|
66 |
+
)
|
67 |
+
tts = RivaTTSService(
|
68 |
+
server="localhost:50051",
|
69 |
+
api_key=os.getenv("NVIDIA_API_KEY"),
|
70 |
+
voice_id="English-US.Female-1",
|
71 |
+
language="en-US",
|
72 |
+
zero_shot_quality=20,
|
73 |
+
sample_rate=16000,
|
74 |
+
model="fastpitch-hifigan-tts",
|
75 |
+
)
|
76 |
+
|
77 |
+
messages = [
|
78 |
+
{
|
79 |
+
"role": "system",
|
80 |
+
"content": "You are a helpful Large Language Model. "
|
81 |
+
"Your goal is to demonstrate your capabilities in a succinct way. "
|
82 |
+
"Your output will be converted to audio so don't include special characters in your answers. "
|
83 |
+
"Respond to what the user said in a creative and helpful way.",
|
84 |
+
}
|
85 |
+
]
|
86 |
+
|
87 |
+
context = OpenAILLMContext(messages)
|
88 |
+
# Required components for Speculative Speech Processing
|
89 |
+
# - Nvidia Context aggregator: Handles interim transcripts and early response generation
|
90 |
+
# send_interims=False: Only process final transcripts
|
91 |
+
# Set send_interims=True to process interim transcripts when enabling speculative speech processing
|
92 |
+
nvidia_context_aggregator = create_nvidia_context_aggregator(context, send_interims=False)
|
93 |
+
# - TTS response cacher: Manages response timing and delivery for natural conversation flow
|
94 |
+
# nvidia_tts_response_cacher = NvidiaTTSResponseCacher() # Uncomment to enable speculative speech processing
|
95 |
+
|
96 |
+
# Used to synchronize the user and bot transcripts in the UI
|
97 |
+
stt_transcript_synchronization = UserTranscriptSynchronization()
|
98 |
+
tts_transcript_synchronization = BotTranscriptSynchronization()
|
99 |
+
|
100 |
+
pipeline = Pipeline(
|
101 |
+
[
|
102 |
+
transport.input(), # Websocket input from client
|
103 |
+
stt, # Speech-To-Text
|
104 |
+
stt_transcript_synchronization,
|
105 |
+
nvidia_context_aggregator.user(),
|
106 |
+
rag, # NVIDIA RAG
|
107 |
+
tts, # Text-To-Speech
|
108 |
+
# Caches TTS responses for coordinated delivery in speculative
|
109 |
+
# speech processing
|
110 |
+
# nvidia_tts_response_cacher, # Uncomment to enable speculative speech processing
|
111 |
+
tts_transcript_synchronization,
|
112 |
+
transport.output(), # Websocket output to client
|
113 |
+
nvidia_context_aggregator.assistant(),
|
114 |
+
]
|
115 |
+
)
|
116 |
+
|
117 |
+
task = PipelineTask(
|
118 |
+
pipeline,
|
119 |
+
params=PipelineParams(
|
120 |
+
allow_interruptions=True,
|
121 |
+
enable_metrics=True,
|
122 |
+
enable_usage_metrics=True,
|
123 |
+
send_initial_empty_metrics=True,
|
124 |
+
report_only_initial_ttfb=True,
|
125 |
+
start_metadata={"stream_id": pipeline_metadata.stream_id},
|
126 |
+
),
|
127 |
+
)
|
128 |
+
|
129 |
+
@transport.event_handler("on_client_connected")
|
130 |
+
async def on_client_connected(transport, client):
|
131 |
+
# Kick off the conversation.
|
132 |
+
messages.append({"role": "user", "content": "Please introduce yourself to the user."})
|
133 |
+
await task.queue_frames([LLMMessagesFrame(messages)])
|
134 |
+
|
135 |
+
return task
|
136 |
+
|
137 |
+
|
138 |
+
app = FastAPI()
|
139 |
+
app.include_router(websocket_router)
|
140 |
+
runner = ACEPipelineRunner.create_instance(pipeline_callback=create_pipeline_task)
|
141 |
+
app.mount("/static", StaticFiles(directory=os.path.join(os.path.dirname(__file__), "../static")), name="static")
|
142 |
+
|
143 |
+
if __name__ == "__main__":
|
144 |
+
uvicorn.run("bot:app", host="0.0.0.0", port=8100, workers=1)
|
examples/nvidia_rag/env.example
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
# Nvidia API Key
|
2 |
+
NVIDIA_API_KEY=your_nvidia_api_key_here
|
examples/nvidia_rag/pyproject.toml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "nvidia-rag-example"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "NVIDIA ACE Pipecat Speech only Examples"
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.12"
|
7 |
+
dependencies = [
|
8 |
+
"nvidia-pipecat",
|
9 |
+
]
|
10 |
+
|
11 |
+
[tool.uv.sources]
|
12 |
+
torch = { index = "pytorch" }
|
13 |
+
nvidia-pipecat = { path = "../../.", editable = true }
|
14 |
+
|
15 |
+
[[tool.uv.index]]
|
16 |
+
name = "pytorch"
|
17 |
+
url = "https://download.pytorch.org/whl/cpu"
|
18 |
+
explicit = true
|
examples/opentelemetry/README.md
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Auto instrumentation
|
2 |
+
|
3 |
+
To run the bot with auto-instrumentation use the following command:
|
4 |
+
|
5 |
+
```shell
|
6 |
+
$ uv sync --group examples
|
7 |
+
$ export OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED=true
|
8 |
+
$ opentelemetry-instrument \
|
9 |
+
--traces_exporter console,otlp \
|
10 |
+
--metrics_exporter console,otlp \
|
11 |
+
--logs_exporter console,otlp \
|
12 |
+
--service_name pipecat-opentelemetry \
|
13 |
+
python3 bot.py
|
14 |
+
```
|
15 |
+
|
16 |
+
To receive the traces you will need to setup some kind of opentelemetry
|
17 |
+
collector. You can use Grafana's LGTM stack by running:
|
18 |
+
|
19 |
+
```shell
|
20 |
+
docker run -it -p 3000:3000 -p 4317:4317 -p 4318:4318 grafana/otel-lgtm
|
21 |
+
```
|
22 |
+
|
23 |
+
Once started navigate to the explore tab, then select Tempo as source
|
24 |
+
and click on the search tab.
|
25 |
+
|
26 |
+
You can now run the python application to generator a trace.
|
27 |
+
You should be able to see it in the search tab of Tempo.
|
28 |
+
|
29 |
+
You can configure the OTLP exporter with environment variables (
|
30 |
+
see [here](https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/))
|
31 |
+
|
32 |
+
See python specific configuration
|
33 |
+
on [this page](https://opentelemetry.io/docs/zero-code/python/configuration/#python-specific-configuration)
|
examples/opentelemetry/bot.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
2 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
3 |
+
|
4 |
+
"""Example bot demonstrating how to use the tracing utilities."""
|
5 |
+
|
6 |
+
import asyncio
|
7 |
+
import logging
|
8 |
+
import uuid
|
9 |
+
|
10 |
+
from fastapi import FastAPI
|
11 |
+
from opentelemetry import metrics, trace
|
12 |
+
from pipecat.frames.frames import TextFrame
|
13 |
+
from pipecat.pipeline.pipeline import Pipeline
|
14 |
+
from pipecat.pipeline.runner import PipelineRunner
|
15 |
+
from pipecat.pipeline.task import PipelineTask
|
16 |
+
from pipecat.processors.frame_processor import FrameProcessor
|
17 |
+
|
18 |
+
from nvidia_pipecat.utils.tracing import AttachmentStrategy, traceable, traced
|
19 |
+
|
20 |
+
app = FastAPI()
|
21 |
+
|
22 |
+
tracer = trace.get_tracer("opentelemetry-pipecat-example")
|
23 |
+
|
24 |
+
meter = metrics.get_meter("opentelemetry-pipecat-example")
|
25 |
+
|
26 |
+
logger = logging.getLogger("opentelemetry")
|
27 |
+
logger.setLevel(logging.DEBUG)
|
28 |
+
|
29 |
+
|
30 |
+
@traceable
|
31 |
+
class DummyProcessor(FrameProcessor):
|
32 |
+
"""Example processor demonstrating how to use the tracing utilities."""
|
33 |
+
|
34 |
+
@traced(attachment_strategy=AttachmentStrategy.NONE)
|
35 |
+
async def process_frame(self, frame, direction):
|
36 |
+
"""Process a frame."""
|
37 |
+
await super().process_frame(frame, direction)
|
38 |
+
trace.get_current_span().add_event("Before inner")
|
39 |
+
with tracer.start_as_current_span("inner") as span:
|
40 |
+
span.add_event("inner event")
|
41 |
+
await self.child()
|
42 |
+
await self.linked()
|
43 |
+
await self.none()
|
44 |
+
trace.get_current_span().add_event("After inner")
|
45 |
+
async for f in self.generator():
|
46 |
+
print(f"{f}")
|
47 |
+
await super().push_frame(frame, direction)
|
48 |
+
|
49 |
+
@traced
|
50 |
+
async def child(self):
|
51 |
+
"""Example method for the DummyProcessor."""
|
52 |
+
# This span is attached as CHILD meaning that it will
|
53 |
+
# be attached to the class span if no parent or to its
|
54 |
+
# parent otherwise.
|
55 |
+
trace.get_current_span().add_event("child")
|
56 |
+
|
57 |
+
@traced(attachment_strategy=AttachmentStrategy.LINK)
|
58 |
+
async def linked(self):
|
59 |
+
"""Example method for the DummyProcessor."""
|
60 |
+
# This span is attached as LINK meaning it will be attached
|
61 |
+
# to the class span but linked to its parent.
|
62 |
+
trace.get_current_span().add_event("linked")
|
63 |
+
|
64 |
+
@traced(attachment_strategy=AttachmentStrategy.NONE)
|
65 |
+
async def none(self):
|
66 |
+
"""Example method for the DummyProcessor."""
|
67 |
+
# This span is attached as NONE meaning it will be attached
|
68 |
+
# to the class span even if nested under another span.
|
69 |
+
trace.get_current_span().add_event("none")
|
70 |
+
|
71 |
+
@traced
|
72 |
+
async def generator(self):
|
73 |
+
"""Example method for the DummyProcessor."""
|
74 |
+
yield TextFrame("Hello, ")
|
75 |
+
trace.get_current_span().add_event("generated!")
|
76 |
+
yield TextFrame("World")
|
77 |
+
|
78 |
+
|
79 |
+
async def main():
|
80 |
+
"""Main function of the bot."""
|
81 |
+
with tracer.start_as_current_span("pipeline-root-span") as span:
|
82 |
+
span.set_attribute("stream_id", str(uuid.uuid4()))
|
83 |
+
logger.info("Started building pipeline")
|
84 |
+
dummy = DummyProcessor()
|
85 |
+
logger.info("Built dummy processor")
|
86 |
+
pipeline = Pipeline([dummy])
|
87 |
+
task = PipelineTask(pipeline)
|
88 |
+
await task.queue_frame(TextFrame("Hello, "))
|
89 |
+
await task.queue_frame(TextFrame("World"))
|
90 |
+
await task.stop_when_done()
|
91 |
+
logger.info("Built pipeline task")
|
92 |
+
logger.info("Starting pipeline...")
|
93 |
+
runner = PipelineRunner(handle_sigint=False)
|
94 |
+
await runner.run(task)
|
95 |
+
|
96 |
+
|
97 |
+
if __name__ == "__main__":
|
98 |
+
asyncio.run(main())
|
examples/riva_nmt/README.md
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# RIVA NMT Example
|
2 |
+
|
3 |
+
This is an example that shows how to perform lanuage translation using RIVA Neural Machine Translation (NMT). It supports `Nvidia Riva ASR and TTS` and `ACETransport`.
|
4 |
+
|
5 |
+
## Get Started
|
6 |
+
|
7 |
+
From the example directory, run the following commands to create a virtual environment and install the dependencies:
|
8 |
+
|
9 |
+
```bash
|
10 |
+
uv venv
|
11 |
+
uv sync
|
12 |
+
source .venv/bin/activate
|
13 |
+
```
|
14 |
+
|
15 |
+
Update the secrets in the `.env` file.
|
16 |
+
|
17 |
+
```bash
|
18 |
+
cp env.example .env # and add your credentials
|
19 |
+
```
|
20 |
+
|
21 |
+
## Deploy local Riva ASR and TTS models.
|
22 |
+
|
23 |
+
#### Prerequisites
|
24 |
+
- You have access and are logged into NVIDIA NGC. For step-by-step instructions, refer to [the NGC Getting Started Guide](https://docs.nvidia.com/ngc/ngc-overview/index.html#registering-activating-ngc-account).
|
25 |
+
|
26 |
+
- You have access to an NVIDIA Volta™, NVIDIA Turing™, or an NVIDIA Ampere architecture-based A100 GPU. For more information, refer to [the Support Matrix](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix).
|
27 |
+
|
28 |
+
- You have Docker installed with support for NVIDIA GPUs. For more information, refer to [the Support Matrix]((https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix)).
|
29 |
+
|
30 |
+
#### Download Riva Quick Start
|
31 |
+
|
32 |
+
Go to the Riva Quick Start for [Data center](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/riva/resources/riva_quickstart/files?version=2.19.0). Select the File Browser tab to download the scripts or use [the NGC CLI tool](https://ngc.nvidia.com/setup/installers/cli) to download from the command line.
|
33 |
+
|
34 |
+
```bash
|
35 |
+
ngc registry resource download-version nvidia/riva/riva_quickstart:2.19.0
|
36 |
+
```
|
37 |
+
|
38 |
+
#### Deploy Riva Speech Server
|
39 |
+
|
40 |
+
|
41 |
+
Set `service_enabled_nmt` to `true` and uncomment whichever model you want for NMT from the list in `../examples/utils/riva_config.sh`.
|
42 |
+
Update `tts_language_code` as the desired target language code in `../examples/utils/riva_config.sh`.
|
43 |
+
|
44 |
+
From the example directory, run below commands:
|
45 |
+
|
46 |
+
```bash
|
47 |
+
cd riva_quickstart_v2.19.0
|
48 |
+
chmod +x riva_init.sh riva_clean.sh riva_start.sh
|
49 |
+
bash riva_clean.sh ../../utils/riva_config.sh
|
50 |
+
bash riva_init.sh ../../utils/riva_config.sh
|
51 |
+
bash riva_start.sh ../../utils/riva_config.sh
|
52 |
+
cd ..
|
53 |
+
```
|
54 |
+
|
55 |
+
This may take few minutes for the first time and will start the riva server on `localhost:50051`.
|
56 |
+
|
57 |
+
For more info, you can refer to the [Riva Quick Start Guide](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/quick-start-guide.html).
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
## Using NvidiaLLMService
|
62 |
+
|
63 |
+
By default, it connects to a hosted NIM, but can be configured to connect to a local NIM by setting the `base_url` parameter in `NvidiaLLMService` to the locally deployed LLM endpoint ( For example: base_url = http://machine_ip:port/v1 ). An API key is required to connect to the hosted NIM.
|
64 |
+
|
65 |
+
## Run the bot pipeline
|
66 |
+
|
67 |
+
```bash
|
68 |
+
python examples/riva_nmt/bot.py
|
69 |
+
```
|
70 |
+
|
71 |
+
This will host the static web client along with the ACE controller server, visit `http://WORKSTATION_IP:8100/static/index.html` in your browser to start a session.
|
72 |
+
|
73 |
+
Note: For mic access, you will need to update chrome://flags/ and add http://WORKSTATION_IP:8100 in Insecure origins treated as secure section.
|
examples/riva_nmt/bot.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
2 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
3 |
+
|
4 |
+
"""Riva neural machine translation (NMT) bot.
|
5 |
+
|
6 |
+
This bot enables speech-to-speech translation using Riva ASR, NMT and TTS services
|
7 |
+
with voice activity detection.
|
8 |
+
"""
|
9 |
+
|
10 |
+
import os
|
11 |
+
|
12 |
+
import uvicorn
|
13 |
+
from dotenv import load_dotenv
|
14 |
+
from fastapi import FastAPI
|
15 |
+
from fastapi.staticfiles import StaticFiles
|
16 |
+
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
17 |
+
from pipecat.frames.frames import TranscriptionFrame
|
18 |
+
from pipecat.pipeline.pipeline import Pipeline
|
19 |
+
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
20 |
+
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
21 |
+
from pipecat.services.nim import NimLLMService
|
22 |
+
from pipecat.transcriptions.language import Language
|
23 |
+
from pipecat.utils.time import time_now_iso8601
|
24 |
+
|
25 |
+
from nvidia_pipecat.pipeline.ace_pipeline_runner import ACEPipelineRunner, PipelineMetadata
|
26 |
+
from nvidia_pipecat.services.riva_nmt import RivaNMTService
|
27 |
+
from nvidia_pipecat.services.riva_speech import (
|
28 |
+
RivaASRService,
|
29 |
+
RivaTTSService,
|
30 |
+
)
|
31 |
+
from nvidia_pipecat.transports.network.ace_fastapi_websocket import (
|
32 |
+
ACETransport,
|
33 |
+
ACETransportParams,
|
34 |
+
)
|
35 |
+
from nvidia_pipecat.transports.services.ace_controller.routers.websocket_router import router as websocket_router
|
36 |
+
from nvidia_pipecat.utils.logging import setup_default_ace_logging
|
37 |
+
|
38 |
+
load_dotenv(override=True)
|
39 |
+
|
40 |
+
setup_default_ace_logging(level="INFO")
|
41 |
+
|
42 |
+
|
43 |
+
async def create_pipeline_task(pipeline_metadata: PipelineMetadata):
|
44 |
+
"""Create the pipeline to be run.
|
45 |
+
|
46 |
+
Args:
|
47 |
+
pipeline_metadata (PipelineMetadata): Metadata containing websocket and other pipeline configuration.
|
48 |
+
|
49 |
+
Returns:
|
50 |
+
PipelineTask: The configured pipeline task for handling speech-to-speech translation.
|
51 |
+
"""
|
52 |
+
transport = ACETransport(
|
53 |
+
websocket=pipeline_metadata.websocket,
|
54 |
+
params=ACETransportParams(
|
55 |
+
vad_analyzer=SileroVADAnalyzer(),
|
56 |
+
),
|
57 |
+
)
|
58 |
+
|
59 |
+
llm = NimLLMService(
|
60 |
+
api_key=os.getenv("NVIDIA_API_KEY"),
|
61 |
+
model="nvdev/meta/llama-3.1-8b-instruct",
|
62 |
+
)
|
63 |
+
|
64 |
+
# Please update the stt and tts language, voice id as needed
|
65 |
+
# tts voice id as per the language can be selected from https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tts/tts-overview.html
|
66 |
+
language = Language.ES_US
|
67 |
+
voice_id = "English-US.Female-1"
|
68 |
+
|
69 |
+
nmt1 = RivaNMTService(source_language=language, target_language=Language.EN_US)
|
70 |
+
nmt2 = RivaNMTService(source_language=Language.EN_US, target_language=language)
|
71 |
+
|
72 |
+
stt = RivaASRService(
|
73 |
+
server="localhost:50051",
|
74 |
+
api_key=os.getenv("NVIDIA_API_KEY"),
|
75 |
+
language=language,
|
76 |
+
sample_rate=16000,
|
77 |
+
model="parakeet-1.1b-en-US-asr-streaming-silero-vad-asr-bls-ensemble",
|
78 |
+
)
|
79 |
+
tts = RivaTTSService(
|
80 |
+
server="localhost:50051",
|
81 |
+
api_key=os.getenv("NVIDIA_API_KEY"),
|
82 |
+
voice_id=voice_id,
|
83 |
+
language=language,
|
84 |
+
zero_shot_quality=20,
|
85 |
+
sample_rate=16000,
|
86 |
+
model="fastpitch-hifigan-tts",
|
87 |
+
)
|
88 |
+
|
89 |
+
sentence_aggregator = SentenceAggregator()
|
90 |
+
|
91 |
+
pipeline = Pipeline(
|
92 |
+
[
|
93 |
+
transport.input(),
|
94 |
+
stt,
|
95 |
+
nmt1,
|
96 |
+
llm,
|
97 |
+
sentence_aggregator,
|
98 |
+
nmt2,
|
99 |
+
tts,
|
100 |
+
transport.output(),
|
101 |
+
]
|
102 |
+
)
|
103 |
+
|
104 |
+
task = PipelineTask(
|
105 |
+
pipeline,
|
106 |
+
params=PipelineParams(
|
107 |
+
allow_interruptions=True,
|
108 |
+
enable_metrics=True,
|
109 |
+
enable_usage_metrics=True,
|
110 |
+
send_initial_empty_metrics=True,
|
111 |
+
report_only_initial_ttfb=True,
|
112 |
+
start_metadata={"stream_id": pipeline_metadata.stream_id},
|
113 |
+
),
|
114 |
+
)
|
115 |
+
|
116 |
+
@transport.event_handler("on_client_connected")
|
117 |
+
async def on_client_connected(transport, client):
|
118 |
+
# Kick off the conversation.
|
119 |
+
await task.queue_frames([TranscriptionFrame("Contar una historia.", "", time_now_iso8601)])
|
120 |
+
|
121 |
+
return task
|
122 |
+
|
123 |
+
|
124 |
+
app = FastAPI()
|
125 |
+
app.include_router(websocket_router)
|
126 |
+
runner = ACEPipelineRunner.create_instance(pipeline_callback=create_pipeline_task)
|
127 |
+
app.mount("/static", StaticFiles(directory=os.path.join(os.path.dirname(__file__), "../static")), name="static")
|
128 |
+
|
129 |
+
if __name__ == "__main__":
|
130 |
+
uvicorn.run("bot:app", host="0.0.0.0", port=8100, workers=1)
|
examples/riva_nmt/env.example
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
# Nvidia API Key
|
2 |
+
NVIDIA_API_KEY=your_nvidia_api_key_here
|
examples/riva_nmt/pyproject.toml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "riva-nmt-example"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "NVIDIA ACE Pipecat Speech only Examples"
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.12"
|
7 |
+
dependencies = [
|
8 |
+
"nvidia-pipecat",
|
9 |
+
]
|
10 |
+
|
11 |
+
[tool.uv.sources]
|
12 |
+
torch = { index = "pytorch" }
|
13 |
+
nvidia-pipecat = { path = "../../.", editable = true }
|
14 |
+
|
15 |
+
[[tool.uv.index]]
|
16 |
+
name = "pytorch"
|
17 |
+
url = "https://download.pytorch.org/whl/cpu"
|
18 |
+
explicit = true
|
examples/riva_speech_langchain/README.md
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Riva Speech Langchain Example
|
2 |
+
|
3 |
+
This is an example that shows how to use `ACETransport` to communicate with Langchain. It supports `Nvidia Riva ASR and TTS`.
|
4 |
+
|
5 |
+
## Get Started
|
6 |
+
|
7 |
+
From the example directory, run the following commands to create a virtual environment and install the dependencies:
|
8 |
+
|
9 |
+
```bash
|
10 |
+
uv venv
|
11 |
+
uv sync
|
12 |
+
source .venv/bin/activate
|
13 |
+
```
|
14 |
+
|
15 |
+
Update the secrets in the `.env` file.
|
16 |
+
|
17 |
+
```bash
|
18 |
+
cp env.example .env # and add your credentials
|
19 |
+
```
|
20 |
+
|
21 |
+
## Deploy local Riva ASR and TTS models.
|
22 |
+
|
23 |
+
#### Prerequisites
|
24 |
+
- You have access and are logged into NVIDIA NGC. For step-by-step instructions, refer to [the NGC Getting Started Guide](https://docs.nvidia.com/ngc/ngc-overview/index.html#registering-activating-ngc-account).
|
25 |
+
|
26 |
+
- You have access to an NVIDIA Volta™, NVIDIA Turing™, or an NVIDIA Ampere architecture-based A100 GPU. For more information, refer to [the Support Matrix](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix).
|
27 |
+
|
28 |
+
- You have Docker installed with support for NVIDIA GPUs. For more information, refer to [the Support Matrix]((https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix)).
|
29 |
+
|
30 |
+
#### Download Riva Quick Start
|
31 |
+
|
32 |
+
Go to the Riva Quick Start for [Data center](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/riva/resources/riva_quickstart/files?version=2.19.0). Select the File Browser tab to download the scripts or use [the NGC CLI tool](https://ngc.nvidia.com/setup/installers/cli) to download from the command line.
|
33 |
+
|
34 |
+
```bash
|
35 |
+
ngc registry resource download-version nvidia/riva/riva_quickstart:2.19.0
|
36 |
+
```
|
37 |
+
|
38 |
+
#### Deploy Riva Speech Server
|
39 |
+
|
40 |
+
From the example directory, run below commands:
|
41 |
+
|
42 |
+
```bash
|
43 |
+
cd riva_quickstart_v2.19.0
|
44 |
+
chmod +x riva_init.sh riva_clean.sh riva_start.sh
|
45 |
+
bash riva_clean.sh ../../utils/riva_config.sh
|
46 |
+
bash riva_init.sh ../../utils/riva_config.sh
|
47 |
+
bash riva_start.sh ../../utils/riva_config.sh
|
48 |
+
cd ..
|
49 |
+
```
|
50 |
+
|
51 |
+
This may take few minutes for the first time and will start the riva server on `localhost:50051`.
|
52 |
+
|
53 |
+
For more info, you can refer to the [Riva Quick Start Guide](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/quick-start-guide.html).
|
54 |
+
|
55 |
+
|
56 |
+
## Run the bot pipeline
|
57 |
+
|
58 |
+
```bash
|
59 |
+
python examples/riva_speech_langchain/bot.py
|
60 |
+
```
|
61 |
+
|
62 |
+
This will host the static web client along with the ACE controller server, visit `http://WORKSTATION_IP:8100/static/index.html` in your browser to start a session.
|
63 |
+
|
64 |
+
Note: For mic access, you will need to update chrome://flags/ and add http://WORKSTATION_IP:8100 in Insecure origins treated as secure section.
|
examples/riva_speech_langchain/bot.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
2 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
3 |
+
|
4 |
+
"""Riva speech langchain bot."""
|
5 |
+
|
6 |
+
import os
|
7 |
+
|
8 |
+
import uvicorn
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
from fastapi import FastAPI
|
11 |
+
from fastapi.staticfiles import StaticFiles
|
12 |
+
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
13 |
+
from langchain_community.chat_message_histories import ChatMessageHistory
|
14 |
+
from langchain_core.chat_history import BaseChatMessageHistory
|
15 |
+
from langchain_core.runnables.history import RunnableWithMessageHistory
|
16 |
+
from langchain_openai import ChatOpenAI
|
17 |
+
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
18 |
+
from pipecat.frames.frames import LLMMessagesFrame
|
19 |
+
from pipecat.pipeline.pipeline import Pipeline
|
20 |
+
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
21 |
+
from pipecat.processors.aggregators.llm_response import (
|
22 |
+
LLMAssistantResponseAggregator,
|
23 |
+
LLMUserResponseAggregator,
|
24 |
+
)
|
25 |
+
from pipecat.processors.frameworks.langchain import LangchainProcessor
|
26 |
+
|
27 |
+
from nvidia_pipecat.pipeline.ace_pipeline_runner import ACEPipelineRunner, PipelineMetadata
|
28 |
+
from nvidia_pipecat.services.riva_speech import (
|
29 |
+
RivaASRService,
|
30 |
+
RivaTTSService,
|
31 |
+
)
|
32 |
+
from nvidia_pipecat.transports.network.ace_fastapi_websocket import (
|
33 |
+
ACETransport,
|
34 |
+
ACETransportParams,
|
35 |
+
)
|
36 |
+
from nvidia_pipecat.transports.services.ace_controller.routers.websocket_router import router as websocket_router
|
37 |
+
from nvidia_pipecat.utils.logging import setup_default_ace_logging
|
38 |
+
|
39 |
+
load_dotenv(override=True)
|
40 |
+
|
41 |
+
setup_default_ace_logging(level="INFO")
|
42 |
+
|
43 |
+
message_store = {}
|
44 |
+
|
45 |
+
|
46 |
+
def get_session_history(session_id: str) -> BaseChatMessageHistory:
|
47 |
+
"""Get the session history."""
|
48 |
+
if session_id not in message_store:
|
49 |
+
message_store[session_id] = ChatMessageHistory()
|
50 |
+
return message_store[session_id]
|
51 |
+
|
52 |
+
|
53 |
+
async def create_pipeline_task(pipeline_metadata: PipelineMetadata):
|
54 |
+
"""Create the pipeline to be run.
|
55 |
+
|
56 |
+
Args:
|
57 |
+
pipeline_metadata (PipelineMetadata): Metadata containing websocket and other pipeline configuration.
|
58 |
+
|
59 |
+
Returns:
|
60 |
+
PipelineTask: The configured pipeline task for handling speech-to-speech conversation.
|
61 |
+
"""
|
62 |
+
transport = ACETransport(
|
63 |
+
websocket=pipeline_metadata.websocket,
|
64 |
+
params=ACETransportParams(
|
65 |
+
vad_analyzer=SileroVADAnalyzer(),
|
66 |
+
),
|
67 |
+
)
|
68 |
+
|
69 |
+
stt = RivaASRService(
|
70 |
+
server="localhost:50051",
|
71 |
+
api_key=os.getenv("NVIDIA_API_KEY"),
|
72 |
+
language="en-US",
|
73 |
+
sample_rate=16000,
|
74 |
+
model="parakeet-1.1b-en-US-asr-streaming-silero-vad-asr-bls-ensemble",
|
75 |
+
)
|
76 |
+
tts = RivaTTSService(
|
77 |
+
server="localhost:50051",
|
78 |
+
api_key=os.getenv("NVIDIA_API_KEY"),
|
79 |
+
voice_id="English-US.Female-1",
|
80 |
+
language="en-US",
|
81 |
+
zero_shot_quality=20,
|
82 |
+
sample_rate=16000,
|
83 |
+
model="fastpitch-hifigan-tts",
|
84 |
+
)
|
85 |
+
|
86 |
+
prompt = ChatPromptTemplate.from_messages(
|
87 |
+
[
|
88 |
+
(
|
89 |
+
"system",
|
90 |
+
"Be nice and helpful. Answer very briefly and without special characters like `#` or `*`. "
|
91 |
+
"Your response will be synthesized to voice and those characters will create unnatural sounds.",
|
92 |
+
),
|
93 |
+
MessagesPlaceholder("chat_history"),
|
94 |
+
("human", "{input}"),
|
95 |
+
]
|
96 |
+
)
|
97 |
+
|
98 |
+
chain = prompt | ChatOpenAI(model="gpt-4o", temperature=0.7)
|
99 |
+
history_chain = RunnableWithMessageHistory(
|
100 |
+
chain,
|
101 |
+
get_session_history,
|
102 |
+
history_messages_key="chat_history",
|
103 |
+
input_messages_key="input",
|
104 |
+
)
|
105 |
+
|
106 |
+
lc = LangchainProcessor(history_chain)
|
107 |
+
|
108 |
+
tma_in = LLMUserResponseAggregator()
|
109 |
+
tma_out = LLMAssistantResponseAggregator()
|
110 |
+
|
111 |
+
pipeline = Pipeline(
|
112 |
+
[
|
113 |
+
transport.input(), # Websocket input from client
|
114 |
+
stt, # Speech-To-Text
|
115 |
+
tma_in, # User responses
|
116 |
+
lc, # Langchain processor
|
117 |
+
tts, # Text-To-Speech
|
118 |
+
transport.output(), # Websocket output to client
|
119 |
+
tma_out, # LLM responses
|
120 |
+
]
|
121 |
+
)
|
122 |
+
|
123 |
+
task = PipelineTask(
|
124 |
+
pipeline,
|
125 |
+
params=PipelineParams(
|
126 |
+
allow_interruptions=True,
|
127 |
+
enable_metrics=True,
|
128 |
+
enable_usage_metrics=True,
|
129 |
+
send_initial_empty_metrics=True,
|
130 |
+
report_only_initial_ttfb=True,
|
131 |
+
start_metadata={"stream_id": pipeline_metadata.stream_id},
|
132 |
+
),
|
133 |
+
)
|
134 |
+
|
135 |
+
@transport.event_handler("on_client_connected")
|
136 |
+
async def on_client_connected(transport, client):
|
137 |
+
# Kick off the conversation.
|
138 |
+
messages = [({"content": "Please briefly introduce yourself to the user."})]
|
139 |
+
await task.queue_frames([LLMMessagesFrame(messages)])
|
140 |
+
|
141 |
+
return task
|
142 |
+
|
143 |
+
|
144 |
+
app = FastAPI()
|
145 |
+
app.include_router(websocket_router)
|
146 |
+
runner = ACEPipelineRunner.create_instance(pipeline_callback=create_pipeline_task)
|
147 |
+
app.mount("/static", StaticFiles(directory=os.path.join(os.path.dirname(__file__), "../static")), name="static")
|
148 |
+
|
149 |
+
if __name__ == "__main__":
|
150 |
+
uvicorn.run("bot:app", host="0.0.0.0", port=8100, workers=1)
|
examples/riva_speech_langchain/env.example
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
# OPENAI API Key
|
2 |
+
OPENAI_API_KEY=your_openai_api_key_here
|
examples/riva_speech_langchain/pyproject.toml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "riva-speech-langchain-example"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "NVIDIA ACE Pipecat Speech only Examples"
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.12"
|
7 |
+
dependencies = [
|
8 |
+
"langchain>=0.3.19",
|
9 |
+
"langchain-community>=0.3.18",
|
10 |
+
"langchain-openai>=0.3.6",
|
11 |
+
"nvidia-pipecat",
|
12 |
+
]
|
13 |
+
|
14 |
+
[tool.uv.sources]
|
15 |
+
torch = { index = "pytorch" }
|
16 |
+
nvidia-pipecat = { path = "../../.", editable = true }
|
17 |
+
|
18 |
+
[[tool.uv.index]]
|
19 |
+
name = "pytorch"
|
20 |
+
url = "https://download.pytorch.org/whl/cpu"
|
21 |
+
explicit = true
|
examples/speech-to-speech/Dockerfile
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Base image
|
2 |
+
FROM python:3.12-slim
|
3 |
+
|
4 |
+
# Image metadata
|
5 |
+
LABEL maintainer="NVIDIA"
|
6 |
+
LABEL description="Speech-to-Speech example"
|
7 |
+
LABEL version="1.0"
|
8 |
+
|
9 |
+
# Environment setup
|
10 |
+
ENV PYTHONUNBUFFERED=1
|
11 |
+
|
12 |
+
# System dependencies
|
13 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
14 |
+
ffmpeg \
|
15 |
+
&& apt-get clean \
|
16 |
+
&& rm -rf /var/lib/apt/lists/* \
|
17 |
+
&& pip install --no-cache-dir --upgrade pip uv
|
18 |
+
|
19 |
+
# App directory setup
|
20 |
+
WORKDIR /app
|
21 |
+
|
22 |
+
# App files
|
23 |
+
COPY pyproject.toml uv.lock \
|
24 |
+
LICENSE README.md NVIDIA_PIPECAT.md \
|
25 |
+
./
|
26 |
+
COPY src/ ./src/
|
27 |
+
COPY examples/static/ ./examples/static/
|
28 |
+
COPY examples/speech-to-speech/ ./examples/speech-to-speech/
|
29 |
+
|
30 |
+
# Example app directory
|
31 |
+
WORKDIR /app/examples/speech-to-speech
|
32 |
+
|
33 |
+
# Dependencies
|
34 |
+
RUN uv sync --frozen
|
35 |
+
|
36 |
+
# Port configuration
|
37 |
+
EXPOSE 8100
|
38 |
+
|
39 |
+
# Start command
|
40 |
+
CMD ["uv", "run", "bot.py"]
|
examples/speech-to-speech/README.md
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Speech to Speech Example
|
2 |
+
|
3 |
+
In this example, we showcase how to build a simple speech-to-speech voice assistant pipeline using nvidia-pipecat along with pipecat-ai library and deploy for testing. This pipeline uses a Websocket based ACETransport, Riva ASR and TTS models and NVIDIA LLM Service. We recommend first following [the Pipecat documentation](https://docs.pipecat.ai/getting-started/core-concepts) or [the ACE Controller](https://docs.nvidia.com/ace/ace-controller-microservice/latest/user-guide.html#pipecat-overview) Pipecat overview section to understand core concepts.
|
4 |
+
|
5 |
+
## Prerequisites
|
6 |
+
|
7 |
+
1. Copy and configure the environment file:
|
8 |
+
```bash
|
9 |
+
cp env.example .env # and add your credentials
|
10 |
+
```
|
11 |
+
|
12 |
+
2. Ensure you have the required API keys:
|
13 |
+
- NVIDIA_API_KEY - Required for accessing NIM ASR, TTS and LLM models
|
14 |
+
- (Optional) ZEROSHOT_TTS_NVIDIA_API_KEY - Required for zero-shot TTS
|
15 |
+
|
16 |
+
## Option 1: Deploy Using Docker
|
17 |
+
|
18 |
+
#### Prerequisites
|
19 |
+
- You have access and are logged into NVIDIA NGC. For step-by-step instructions, refer to [the NGC Getting Started Guide](https://docs.nvidia.com/ngc/ngc-overview/index.html#registering-activating-ngc-account).
|
20 |
+
|
21 |
+
- You have access to an NVIDIA Volta™, NVIDIA Turing™, or an NVIDIA Ampere architecture-based A100 GPU. For more information, refer to [the Support Matrix](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix).
|
22 |
+
|
23 |
+
- You have Docker installed with support for NVIDIA GPUs. For more information, refer to [the Support Matrix]((https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html#support-matrix)).
|
24 |
+
|
25 |
+
From the example/speech-to-speech directory, run below commands:
|
26 |
+
|
27 |
+
```bash
|
28 |
+
docker compose up -d
|
29 |
+
```
|
30 |
+
|
31 |
+
## Option 2: Deploy using Python environment
|
32 |
+
|
33 |
+
#### Prerequisites
|
34 |
+
From the examples/speech-to-speech directory, run the following commands to create a virtual environment and install the dependencies:
|
35 |
+
|
36 |
+
```bash
|
37 |
+
# Create and activate virtual environment
|
38 |
+
uv venv
|
39 |
+
source .venv/bin/activate
|
40 |
+
|
41 |
+
# Install dependencies
|
42 |
+
uv sync
|
43 |
+
```
|
44 |
+
|
45 |
+
Make sure you've configured the `.env` file with your API keys before proceeding.
|
46 |
+
|
47 |
+
After making all required changes/customizations in bot.py, you can deploy the pipeline using below command:
|
48 |
+
|
49 |
+
```bash
|
50 |
+
python bot.py
|
51 |
+
```
|
52 |
+
|
53 |
+
## Start interacting with the application
|
54 |
+
|
55 |
+
This will host the static web client along with the ACE controller server, visit `http://WORKSTATION_IP:8100/static/index.html` in your browser to start a session.
|
56 |
+
|
57 |
+
Note: For mic access, you will need to update chrome://flags/ and add http://WORKSTATION_IP:8100 in Insecure origins treated as secure section.
|
58 |
+
|
59 |
+
If you want to update the port, make changes in the `uvicorn.run` command in [the bot.py](bot.py) and the `wsUrl` in [the static/index.html](../static/index.html).
|
60 |
+
|
61 |
+
## Bot customizations
|
62 |
+
|
63 |
+
### Enabling Speculative Speech Processing
|
64 |
+
|
65 |
+
Speculative speech processing reduces bot response latency by working directly on Riva ASR early interim user transcripts instead of waiting for final transcripts. This feature only works when using Riva ASR.
|
66 |
+
|
67 |
+
- Refer to the comments in [bot.py](bot.py) for guidance on enabling or disabling specific frame processors as needed.
|
68 |
+
|
69 |
+
- See the [ACE Controller Microservice documentation on Speculative Speech Processing](https://docs.nvidia.com/ace/ace-controller-microservice/1.0/user-guide.html#speculative-speech-processing) for more details.
|
70 |
+
|
71 |
+
### Switching ASR, LLM, and TTS Models
|
72 |
+
|
73 |
+
You can easily customize ASR (Automatic Speech Recognition), LLM (Large Language Model), and TTS (Text-to-Speech) services by configuring environment variables. This allows you to switch between NIM cloud-hosted models and locally deployed models.
|
74 |
+
|
75 |
+
The following environment variables control the endpoints and models:
|
76 |
+
|
77 |
+
- `RIVA_ASR_URL`: Address of the Riva ASR (speech-to-text) service (e.g., `localhost:50051` for local, "grpc.nvcf.nvidia.com:443" for cloud endpoint).
|
78 |
+
- `RIVA_TTS_URL`: Address of the Riva TTS (text-to-speech) service. (e.g., `localhost:50051` for local, "grpc.nvcf.nvidia.com:443" for cloud endpoint).
|
79 |
+
- `NVIDIA_LLM_URL`: URL for the NVIDIA LLM service. (e.g., `http://<machine-ip>:8000/v1` for local, "https://integrate.api.nvidia.com/v1" for cloud endpoint. )
|
80 |
+
|
81 |
+
You can set model, language, and voice using the `RIVA_ASR_MODEL`, `RIVA_TTS_MODEL`, `NVIDIA_LLM_MODEL`, `RIVA_ASR_LANGUAGE`, `RIVA_TTS_LANGUAGE`, and `RIVA_TTS_VOICE_ID` environment variables.
|
82 |
+
|
83 |
+
Update these variables in your Docker Compose configuration to match your deployment and desired models. For more details on available models and configuration options, refer to the [NIM NVIDIA Magpie](https://build.nvidia.com/nvidia/magpie-tts-multilingual), [NIM NVIDIA Parakeet](https://build.nvidia.com/nvidia/parakeet-ctc-1_1b-asr/api), and [NIM META Llama](https://build.nvidia.com/meta/llama-3_1-8b-instruct) documentation.
|
84 |
+
|
85 |
+
#### Example: Switching to the Llama 3.3-70B Model
|
86 |
+
|
87 |
+
To use larger LLMs like Llama 3.3-70B model in your deployment, you need to update both the Docker Compose configuration and the environment variables for your Python application. Follow these steps:
|
88 |
+
|
89 |
+
- In your `docker-compose.yml` file, find the `nvidia-llm` service section.
|
90 |
+
- Change the NIM image to 70B model: `nvcr.io/nim/meta/llama-3.3-70b-instruct:latest`
|
91 |
+
- Update the `device_ids` to allocate at least two GPUs (for example, `['2', '3']`).
|
92 |
+
- Update the environment variable under python-app service to `NVIDIA_LLM_MODEL=meta/llama-3.3-70b-instruct`
|
93 |
+
|
94 |
+
#### Setting up Zero-shot Magpie Latest Model
|
95 |
+
|
96 |
+
Follow these steps to configure and use the latest Zero-shot Magpie TTS model:
|
97 |
+
|
98 |
+
1. **Update Docker Compose Configuration**
|
99 |
+
|
100 |
+
Modify the `riva-tts-magpie` service in your docker-compose file with the following configuration:
|
101 |
+
|
102 |
+
```yaml
|
103 |
+
riva-tts-magpie:
|
104 |
+
image: <magpie-tts-zeroshot-image:version> # Replace this with the actual image tag
|
105 |
+
environment:
|
106 |
+
- NGC_API_KEY=${ZEROSHOT_TTS_NVIDIA_API_KEY}
|
107 |
+
- NIM_HTTP_API_PORT=9000
|
108 |
+
- NIM_GRPC_API_PORT=50051
|
109 |
+
ports:
|
110 |
+
- "49000:50051"
|
111 |
+
shm_size: 16GB
|
112 |
+
deploy:
|
113 |
+
resources:
|
114 |
+
reservations:
|
115 |
+
devices:
|
116 |
+
- driver: nvidia
|
117 |
+
device_ids: ['0']
|
118 |
+
capabilities: [gpu]
|
119 |
+
```
|
120 |
+
|
121 |
+
- Ensure your ZEROSHOT_TTS_NVIDIA_API_KEY key is properly set in your `.env` file:
|
122 |
+
```bash
|
123 |
+
ZEROSHOT_TTS_NVIDIA_API_KEY=
|
124 |
+
```
|
125 |
+
|
126 |
+
2. **Configure TTS Voice Settings**
|
127 |
+
|
128 |
+
Update the following environment variables under the `python-app` service:
|
129 |
+
|
130 |
+
```bash
|
131 |
+
RIVA_TTS_VOICE_ID=Magpie-ZeroShot.Female-1
|
132 |
+
RIVA_TTS_MODEL=magpie_tts_ensemble-Magpie-ZeroShot
|
133 |
+
```
|
134 |
+
|
135 |
+
3. **Zero-shot Audio Prompt Configuration**
|
136 |
+
|
137 |
+
To use a custom voice with zero-shot learning:
|
138 |
+
|
139 |
+
- Add your audio prompt file to the workspace
|
140 |
+
- Mount the audio file into your container by adding a volume in your `docker-compose.yml` under the `python-app` service:
|
141 |
+
```yaml
|
142 |
+
services:
|
143 |
+
python-app:
|
144 |
+
# ... existing code ...
|
145 |
+
volumes:
|
146 |
+
- ./audio_prompts:/app/audio_prompts
|
147 |
+
```
|
148 |
+
- Set the `ZERO_SHOT_AUDIO_PROMPT` environment variable to the path relative to your application root:
|
149 |
+
```yaml
|
150 |
+
environment:
|
151 |
+
- ZERO_SHOT_AUDIO_PROMPT=audio_prompts/voice_sample.wav # Path relative to app root
|
152 |
+
```
|
153 |
+
|
154 |
+
Note: The zero-shot audio prompt is only required when using the Magpie Zero-shot model. For standard Magpie multilingual models, this configuration should be omitted.
|
examples/speech-to-speech/bot.py
ADDED
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
2 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
3 |
+
|
4 |
+
"""Speech-to-speech conversation bot."""
|
5 |
+
|
6 |
+
import os
|
7 |
+
from pathlib import Path
|
8 |
+
|
9 |
+
import uvicorn
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
from fastapi import FastAPI
|
12 |
+
from fastapi.staticfiles import StaticFiles
|
13 |
+
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
14 |
+
from pipecat.frames.frames import LLMMessagesFrame
|
15 |
+
from pipecat.pipeline.pipeline import Pipeline
|
16 |
+
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
17 |
+
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
18 |
+
|
19 |
+
from nvidia_pipecat.pipeline.ace_pipeline_runner import ACEPipelineRunner, PipelineMetadata
|
20 |
+
|
21 |
+
# from nvidia_pipecat.processors.nvidia_context_aggregator import (
|
22 |
+
# NvidiaTTSResponseCacher,
|
23 |
+
# create_nvidia_context_aggregator,
|
24 |
+
# )
|
25 |
+
from nvidia_pipecat.processors.transcript_synchronization import (
|
26 |
+
BotTranscriptSynchronization,
|
27 |
+
UserTranscriptSynchronization,
|
28 |
+
)
|
29 |
+
from nvidia_pipecat.services.blingfire_text_aggregator import BlingfireTextAggregator
|
30 |
+
from nvidia_pipecat.services.nvidia_llm import NvidiaLLMService
|
31 |
+
from nvidia_pipecat.services.riva_speech import RivaASRService, RivaTTSService
|
32 |
+
from nvidia_pipecat.transports.network.ace_fastapi_websocket import ACETransport, ACETransportParams
|
33 |
+
from nvidia_pipecat.transports.services.ace_controller.routers.websocket_router import router as websocket_router
|
34 |
+
from nvidia_pipecat.utils.logging import setup_default_ace_logging
|
35 |
+
|
36 |
+
load_dotenv(override=True)
|
37 |
+
|
38 |
+
setup_default_ace_logging(level="DEBUG")
|
39 |
+
|
40 |
+
|
41 |
+
async def create_pipeline_task(pipeline_metadata: PipelineMetadata):
|
42 |
+
"""Create the pipeline to be run.
|
43 |
+
|
44 |
+
Args:
|
45 |
+
pipeline_metadata (PipelineMetadata): Metadata containing websocket and other pipeline configuration.
|
46 |
+
|
47 |
+
Returns:
|
48 |
+
PipelineTask: The configured pipeline task for handling speech-to-speech conversation.
|
49 |
+
"""
|
50 |
+
transport = ACETransport(
|
51 |
+
websocket=pipeline_metadata.websocket,
|
52 |
+
params=ACETransportParams(
|
53 |
+
vad_analyzer=SileroVADAnalyzer(),
|
54 |
+
audio_out_10ms_chunks=20,
|
55 |
+
),
|
56 |
+
)
|
57 |
+
|
58 |
+
llm = NvidiaLLMService(
|
59 |
+
api_key=os.getenv("NVIDIA_API_KEY"),
|
60 |
+
base_url=os.getenv("NVIDIA_LLM_URL", "https://integrate.api.nvidia.com/v1"),
|
61 |
+
model=os.getenv("NVIDIA_LLM_MODEL", "meta/llama-3.1-8b-instruct"),
|
62 |
+
)
|
63 |
+
|
64 |
+
stt = RivaASRService(
|
65 |
+
server=os.getenv("RIVA_ASR_URL", "localhost:50051"),
|
66 |
+
api_key=os.getenv("NVIDIA_API_KEY"),
|
67 |
+
language=os.getenv("RIVA_ASR_LANGUAGE", "en-US"),
|
68 |
+
sample_rate=16000,
|
69 |
+
model=os.getenv("RIVA_ASR_MODEL", "parakeet-1.1b-en-US-asr-streaming-silero-vad-asr-bls-ensemble"),
|
70 |
+
)
|
71 |
+
|
72 |
+
tts = RivaTTSService(
|
73 |
+
server=os.getenv("RIVA_TTS_URL", "localhost:50051"),
|
74 |
+
api_key=os.getenv("NVIDIA_API_KEY"),
|
75 |
+
voice_id=os.getenv("RIVA_TTS_VOICE_ID", "Magpie-Multilingual.EN-US.Sofia"),
|
76 |
+
model=os.getenv("RIVA_TTS_MODEL", "magpie_tts_ensemble-Magpie-Multilingual"),
|
77 |
+
language=os.getenv("RIVA_TTS_LANGUAGE", "en-US"),
|
78 |
+
zero_shot_audio_prompt_file=(
|
79 |
+
Path(os.getenv("ZERO_SHOT_AUDIO_PROMPT")) if os.getenv("ZERO_SHOT_AUDIO_PROMPT") else None
|
80 |
+
),
|
81 |
+
text_aggregator=BlingfireTextAggregator(),
|
82 |
+
)
|
83 |
+
|
84 |
+
# Used to synchronize the user and bot transcripts in the UI
|
85 |
+
stt_transcript_synchronization = UserTranscriptSynchronization()
|
86 |
+
tts_transcript_synchronization = BotTranscriptSynchronization()
|
87 |
+
|
88 |
+
# System prompt can be changed to fit the use case
|
89 |
+
messages = [
|
90 |
+
{
|
91 |
+
"role": "system",
|
92 |
+
"content": (
|
93 |
+
"### CONVERSATION CONSTRAINTS\n"
|
94 |
+
"STRICTLY answer in 1-2 sentences or less than 200 characters. "
|
95 |
+
"This must be followed very rigorously; it is crucial.\n"
|
96 |
+
"Output must be plain text, unformatted, and without any special characters - "
|
97 |
+
"suitable for direct conversion to speech.\n"
|
98 |
+
"DO NOT use bullet points, lists, code samples, or headers in your spoken responses.\n"
|
99 |
+
"STRICTLY be short, concise, and to the point. Avoid elaboration, explanation, or repetition.\n"
|
100 |
+
"Pronounce numbers, dates, and special terms. For phone numbers, read digits slowly and separately. "
|
101 |
+
"For times, use natural phrasing like 'seven o'clock a.m.' instead of 'seven zero zero.'\n"
|
102 |
+
"Silently correct likely transcription errors by inferring the intended meaning without saying "
|
103 |
+
"`did you mean..` or `I think you meant..`. "
|
104 |
+
"Prioritize what the user meant, not just the literal words.\n"
|
105 |
+
"### OPENING PROTOCOL\n"
|
106 |
+
"STRICTLY START CONVERSATION WITH 'Thank you for calling GreenForce Garden. "
|
107 |
+
"What can I do for you today?'\n"
|
108 |
+
"### CLOSING PROTOCOL\n"
|
109 |
+
"End with either 'Have a green day!' or 'Have a good one.' Use one consistently per call.\n"
|
110 |
+
"### YOU ARE ...\n"
|
111 |
+
"You are Flora, the voice of 'GreenForce Garden', a San Francisco flower shop "
|
112 |
+
"powered by NVIDIA GPUs.\n"
|
113 |
+
"You're cool, upbeat, and love making people smile with your floral know-how.\n"
|
114 |
+
"You embody warmth, expertise, and dedication to creating a perfect floral experience.\n"
|
115 |
+
"### CONVERSATION GUIDELINES\n"
|
116 |
+
"CORE RESPONSIBILITIES - Order Management, Consultation, Inventory Guidance, "
|
117 |
+
"Delivery Coordination, Customer Care, Giving Fun Advice\n"
|
118 |
+
"While taking orders, have occasion understanding, ask for recipient details, "
|
119 |
+
"customer preferences, and delivery planning\n"
|
120 |
+
"SUGGEST cards with personal messages\n"
|
121 |
+
"SUGGEST seasonal recommendations (e.g., spring: tulips, pastels; romance: roses, peonies) "
|
122 |
+
"and occasion-specific details (e.g., elegant wrapping).\n"
|
123 |
+
"SUGGEST complementary items: vases, chocolates, cards. "
|
124 |
+
"Also provide care instructions for long-lasting enjoyment.\n"
|
125 |
+
"STRICTLY Confirm all order details before finalizing: flowers, colors, "
|
126 |
+
"delivery address, timing\n"
|
127 |
+
"STRICTLY Collect complete contact information for order updates\n"
|
128 |
+
"STRICTLY Provide ORDER CONFIRMATION with ESTIMATED DELIVERY TIMES\n"
|
129 |
+
"OFFER MULTIPLE PAYMENT OPTIONS (e.g., card, cash, online) and confirm SECURE PROCESSING.\n"
|
130 |
+
"STRICTLY If you are unsure about a request, ask clarifying questions "
|
131 |
+
"to ensure you understand before responding."
|
132 |
+
),
|
133 |
+
},
|
134 |
+
]
|
135 |
+
|
136 |
+
context = OpenAILLMContext(messages)
|
137 |
+
|
138 |
+
# Comment out the below line when enabling Speculative Speech Processing
|
139 |
+
context_aggregator = llm.create_context_aggregator(context)
|
140 |
+
|
141 |
+
# Uncomment the below line to enable speculative speech processing
|
142 |
+
# nvidia_context_aggregator = create_nvidia_context_aggregator(context, send_interims=True)
|
143 |
+
# Uncomment the below line to enable speculative speech processing
|
144 |
+
# nvidia_tts_response_cacher = NvidiaTTSResponseCacher()
|
145 |
+
|
146 |
+
pipeline = Pipeline(
|
147 |
+
[
|
148 |
+
transport.input(), # Websocket input from client
|
149 |
+
stt, # Speech-To-Text
|
150 |
+
stt_transcript_synchronization,
|
151 |
+
# Comment out the below line when enabling Speculative Speech Processing
|
152 |
+
context_aggregator.user(),
|
153 |
+
# Uncomment the below line to enable speculative speech processing
|
154 |
+
# nvidia_context_aggregator.user(),
|
155 |
+
llm, # LLM
|
156 |
+
tts, # Text-To-Speech
|
157 |
+
# Caches TTS responses for coordinated delivery in speculative
|
158 |
+
# speech processing
|
159 |
+
# nvidia_tts_response_cacher, # Uncomment to enable speculative speech processing
|
160 |
+
tts_transcript_synchronization,
|
161 |
+
transport.output(), # Websocket output to client
|
162 |
+
context_aggregator.assistant(),
|
163 |
+
# Uncomment the below line to enable speculative speech processing
|
164 |
+
# nvidia_context_aggregator.assistant(),
|
165 |
+
]
|
166 |
+
)
|
167 |
+
|
168 |
+
task = PipelineTask(
|
169 |
+
pipeline,
|
170 |
+
params=PipelineParams(
|
171 |
+
allow_interruptions=True,
|
172 |
+
enable_metrics=True,
|
173 |
+
enable_usage_metrics=True,
|
174 |
+
send_initial_empty_metrics=True,
|
175 |
+
start_metadata={"stream_id": pipeline_metadata.stream_id},
|
176 |
+
),
|
177 |
+
)
|
178 |
+
|
179 |
+
@transport.event_handler("on_client_connected")
|
180 |
+
async def on_client_connected(transport, client):
|
181 |
+
# Kick off the conversation.
|
182 |
+
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
183 |
+
await task.queue_frames([LLMMessagesFrame(messages)])
|
184 |
+
|
185 |
+
return task
|
186 |
+
|
187 |
+
|
188 |
+
app = FastAPI()
|
189 |
+
app.include_router(websocket_router)
|
190 |
+
runner = ACEPipelineRunner.create_instance(pipeline_callback=create_pipeline_task)
|
191 |
+
app.mount("/static", StaticFiles(directory=os.getenv("STATIC_DIR", "../static")), name="static")
|
192 |
+
|
193 |
+
if __name__ == "__main__":
|
194 |
+
uvicorn.run("bot:app", host="0.0.0.0", port=8100, workers=4)
|
examples/speech-to-speech/docker-compose.yml
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: voice-agent
|
2 |
+
|
3 |
+
services:
|
4 |
+
riva-tts-magpie:
|
5 |
+
image: nvcr.io/nim/nvidia/magpie-tts-multilingual:latest
|
6 |
+
environment:
|
7 |
+
- NGC_API_KEY=${NVIDIA_API_KEY}
|
8 |
+
- NIM_HTTP_API_PORT=9000
|
9 |
+
- NIM_GRPC_API_PORT=50051
|
10 |
+
ports:
|
11 |
+
- "19000:9000"
|
12 |
+
- "50151:50051"
|
13 |
+
volumes:
|
14 |
+
- nim_cache:/opt/nim/.cache
|
15 |
+
shm_size: 8GB
|
16 |
+
deploy:
|
17 |
+
resources:
|
18 |
+
reservations:
|
19 |
+
devices:
|
20 |
+
- driver: nvidia
|
21 |
+
device_ids: ['0']
|
22 |
+
capabilities: [gpu]
|
23 |
+
|
24 |
+
riva-asr-parakeet:
|
25 |
+
image: nvcr.io/nim/nvidia/parakeet-1-1b-ctc-en-us:latest
|
26 |
+
environment:
|
27 |
+
- NGC_API_KEY=${NVIDIA_API_KEY}
|
28 |
+
- NIM_HTTP_API_PORT=9001
|
29 |
+
- NIM_GRPC_API_PORT=50052
|
30 |
+
- NIM_TAGS_SELECTOR=mode=str,vad=silero
|
31 |
+
ports:
|
32 |
+
- "19001:9001"
|
33 |
+
- "50152:50052"
|
34 |
+
volumes:
|
35 |
+
- nim_cache:/opt/nim/.cache
|
36 |
+
deploy:
|
37 |
+
resources:
|
38 |
+
reservations:
|
39 |
+
devices:
|
40 |
+
- driver: nvidia
|
41 |
+
device_ids: ['1']
|
42 |
+
capabilities: [gpu]
|
43 |
+
|
44 |
+
nvidia-llm:
|
45 |
+
image: nvcr.io/nim/meta/llama-3.1-8b-instruct:latest
|
46 |
+
environment:
|
47 |
+
- NGC_API_KEY=${NVIDIA_API_KEY}
|
48 |
+
- NIM_HTTP_API_PORT=8000
|
49 |
+
ports:
|
50 |
+
- "18000:8000"
|
51 |
+
volumes:
|
52 |
+
- nim_cache:/opt/nim/.cache
|
53 |
+
shm_size: 16GB
|
54 |
+
deploy:
|
55 |
+
resources:
|
56 |
+
reservations:
|
57 |
+
devices:
|
58 |
+
- driver: nvidia
|
59 |
+
device_ids: ['2']
|
60 |
+
capabilities: [gpu]
|
61 |
+
|
62 |
+
python-app:
|
63 |
+
build:
|
64 |
+
context: ../../
|
65 |
+
dockerfile: examples/speech-to-speech/Dockerfile
|
66 |
+
|
67 |
+
ports:
|
68 |
+
- "8100:8100"
|
69 |
+
environment:
|
70 |
+
- NVIDIA_API_KEY=${NVIDIA_API_KEY}
|
71 |
+
|
72 |
+
- RIVA_ASR_URL=riva-asr-parakeet:50052
|
73 |
+
- RIVA_TTS_URL=riva-tts-magpie:50051
|
74 |
+
- NVIDIA_LLM_URL=http://nvidia-llm:8000/v1
|
75 |
+
|
76 |
+
- RIVA_ASR_MODEL=parakeet-1.1b-en-US-asr-streaming-silero-vad-asr-bls-ensemble
|
77 |
+
- RIVA_TTS_MODEL=magpie_tts_ensemble-Magpie-Multilingual
|
78 |
+
- NVIDIA_LLM_MODEL=meta/llama-3.1-8b-instruct
|
79 |
+
|
80 |
+
- RIVA_ASR_LANGUAGE=en-US
|
81 |
+
- RIVA_TTS_LANGUAGE=en-US
|
82 |
+
- RIVA_TTS_VOICE_ID=Magpie-Multilingual.EN-US.Sofia
|
83 |
+
- ZERO_SHOT_AUDIO_PROMPT= # set this only if using a zero-shot TTS model with a custom audio prompt
|
84 |
+
|
85 |
+
- STATIC_DIR=/app/static
|
86 |
+
volumes:
|
87 |
+
- ../static:/app/static
|
88 |
+
depends_on:
|
89 |
+
- riva-tts-magpie
|
90 |
+
- riva-asr-parakeet
|
91 |
+
- nvidia-llm
|
92 |
+
|
93 |
+
volumes:
|
94 |
+
nim_cache:
|
examples/speech-to-speech/env.example
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
NVIDIA_API_KEY=
|
2 |
+
ZEROSHOT_TTS_NVIDIA_API_KEY=
|
examples/speech-to-speech/pyproject.toml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "speech-to-speech-example"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "NVIDIA ACE Pipecat Speech only Examples"
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.12"
|
7 |
+
dependencies = [
|
8 |
+
"nvidia-pipecat",
|
9 |
+
]
|
10 |
+
|
11 |
+
[tool.uv.sources]
|
12 |
+
torch = { index = "pytorch" }
|
13 |
+
nvidia-pipecat = { path = "../../.", editable = true }
|
14 |
+
|
15 |
+
[[tool.uv.index]]
|
16 |
+
name = "pytorch"
|
17 |
+
url = "https://download.pytorch.org/whl/cpu"
|
18 |
+
explicit = true
|
examples/speech-to-speech/uv.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/speech_planner/Dockerfile
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Base image
|
2 |
+
FROM python:3.12-slim
|
3 |
+
|
4 |
+
# Image metadata
|
5 |
+
LABEL maintainer="NVIDIA"
|
6 |
+
LABEL description="Speech Planner example"
|
7 |
+
LABEL version="1.0"
|
8 |
+
|
9 |
+
# Environment setup
|
10 |
+
ENV PYTHONUNBUFFERED=1
|
11 |
+
|
12 |
+
# System dependencies
|
13 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
14 |
+
ffmpeg \
|
15 |
+
&& apt-get clean \
|
16 |
+
&& rm -rf /var/lib/apt/lists/* \
|
17 |
+
&& pip install --no-cache-dir --upgrade pip uv
|
18 |
+
|
19 |
+
# App directory setup
|
20 |
+
WORKDIR /app
|
21 |
+
|
22 |
+
# App files
|
23 |
+
COPY pyproject.toml uv.lock \
|
24 |
+
LICENSE README.md NVIDIA_PIPECAT.md \
|
25 |
+
./
|
26 |
+
COPY src/ ./src/
|
27 |
+
COPY examples/static/ ./examples/static/
|
28 |
+
COPY examples/speech_planner/ ./examples/speech_planner/
|
29 |
+
|
30 |
+
# Example app directory
|
31 |
+
WORKDIR /app/examples/speech_planner
|
32 |
+
|
33 |
+
# Dependencies
|
34 |
+
RUN uv sync --frozen
|
35 |
+
|
36 |
+
# Port configuration
|
37 |
+
EXPOSE 8100
|
38 |
+
|
39 |
+
# Start command
|
40 |
+
CMD ["uv", "run", "bot.py"]
|