Spaces:
Running
Running
File size: 7,272 Bytes
53ea588 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 |
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
type: msapplication
specVersion: 2.5.0
name: ucf.svc.ace-controller
chartName: ace-controller
description: ACE Controller
version: 1.0.2
displayName: "ACE Controller Microservice"
category:
functional: "Conversational AI"
industry: "General"
tags: []
keywords: []
nSpectId: NSPECT-XGIZ-EB0C
publish: false
egress-endpoints:
- name: "redis"
description: Redis message broker
protocol: TCP
scheme: asyncio
mandatory: False
data-flow: in-out
- name: "riva-speech"
description: Riva Speech Skills API
scheme: grpc
protocol: TCP
mandatory: False
data-flow: in-out
- name: "animgraph-http"
description: Animation Graph HTTP API
scheme: http
protocol: TCP
mandatory: False
data-flow: out
- name: "animgraph-grpc"
description: Animation Graph GRPC API
scheme: grpc
protocol: TCP
mandatory: False
data-flow: out
- name: "a2f-grpc"
description: Audio2Face service GRPC API
scheme: grpc
protocol: TCP
mandatory: False
data-flow: out
ingress-endpoints:
- name: http-api
description: ACE Controller REST API
scheme: http
data-flow: in-out
secrets:
- name: nvidia-api-key-secret
description: Secret for NVIDIA API key
mandatory: False
mountPath: /secrets
fileName: nvidia_api_key.txt
- name: openai-key-secret
description: Secret for passing OpenAI key
mandatory: False
mountPath: /secrets
fileName: openai_api_key.txt
- name: elevenlabs-api-key-secret
description: Secret for ElevenLabs API key
mandatory: False
mountPath: /secrets
fileName: elevenlabs_api_key.txt
- name: custom-env-secrets
description: Secret for passing custom env variables and API keys
mandatory: False
mountPath: /secrets
fileName: custom.env
externalFiles:
- name: config.yaml
description: config
mandatory: true
isDirectory: false
params:
OTEL_SDK_DISABLED: 'false'
#> description: when enabled, tracing data will be exported
#> type: string
OTEL_SERVICE_NAME: ace-controller
#> description: protocol for exporting OTel data
#> type: string
OTEL_EXPORTER_OTLP_ENDPOINT: ""
#> description: endpoint for Otel collector
#> type: string
OTEL_EXPORTER_OTLP_PROTOCOL: grpc
#> description: protocol for exporting OTel data
#> type: string
DEV: "0"
#> description: wether to activate dependencies hot reloading or not
#> type: string
image: "nvcr.io/nvidia/ace/tokkio-reference-ace-controller"
#> description: pipeline image repo
#> type: string
tag: "5.0.0"
#> description: pipeline image tag
#> type: string
tests:
- name: dev-params1
app: tests/dev/app.yaml
params: tests/dev/params1.yaml
ciTrigger: false
timeout: 10
duration: 10
installPreReqs: true # Wether to install foundational services
namespace: default # Kubernetes namespace
gpuNodeLabels: ""
watchAllPods: true # OR set to false and set list of pods to watch below
watchPods:
- <pod-name-regex>
testerPods: # At least one tester pod is required
- name: testpod1 # Name of the test pod
startSignature: <START> # Signature to look for in the logs indicating start of tests. Regex is accepted
endSignature: <END> # Signature to look for in the logs indicating end of tests. Regex is accepted
errorSignatures: # Signatures that indicate test failures. Regex is accepted
- <REGEX1>
- <REGEX2>
---
spec:
- name: ace-controller-deployment
type: ucf.k8s.app.deployment
parameters:
apptype: statefull
statefulSetServiceName: ace-controller-service
extraSpecs:
podManagementPolicy: Parallel
- name: "ace-controller-container"
type: ucf.k8s.container
parameters:
image:
repository: $params.image
tag: $params.tag
pullPolicy: Always
command: ["/bin/bash", "-c"]
args: ["source /opt/scripts/env.sh && /code/entrypoint.sh"]
env:
- name: ANIMGRAPH_URL
value: "http://$egress.animgraph-http.address:$egress.animgraph-http.port"
- name: ANIMGRAPH_GRPC_URL
value: "$egress.animgraph-grpc.address:$egress.animgraph-grpc.port"
- name: REDIS_URL
value: "redis://$egress.redis.address:$egress.redis.port"
- name: A2F_GRPC_URL
value: "$egress.a2f-grpc.address:$egress.a2f-grpc.port"
- name: RIVA_SERVER_URL
value: "$egress.riva-speech.address:$egress.riva-speech.port"
- name: DEV
value: $params.DEV
- name: OTEL_SDK_DISABLED
value: $params.OTEL_SDK_DISABLED
- name: OTEL_SERVICE_NAME
value: $params.OTEL_SERVICE_NAME
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: $params.OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_EXPORTER_OTLP_PROTOCOL
value: $params.OTEL_EXPORTER_OTLP_PROTOCOL
- name: OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED
value: 'true'
- name: IMAGE_NAME
value: $params.image
- name: IMAGE_TAG
value: $params.tag
ports:
- containerPort: 8000
name: http
startupProbe:
tcpSocket:
port: http
initialDelaySeconds: 20
failureThreshold: 30
periodSeconds: 10
livenessProbe:
tcpSocket:
port: http
initialDelaySeconds: 20
periodSeconds: 20
timeoutSeconds: 5
readinessProbe:
tcpSocket:
port: http
initialDelaySeconds: 20
periodSeconds: 20
timeoutSeconds: 5
- name: app-storage
type: ucf.k8s.pvc
parameters:
spec:
storageClassName: mdx-local-path
accessModes: [ReadWriteOnce]
resources:
requests:
storage: 5Gi
- name: app-storage-volume
type: ucf.k8s.volume
parameters:
persistentVolumeClaim:
claimName: ace-controller-app-storage
- name: app-storage-mount
type: ucf.appspec.defaultVolumeMount
parameters:
name: app-storage-volume
mountPath: /code
- name: restartPolicy
type: ucf.k8s.restartPolicy
parameters:
policy: Always # Always / OnFailure / Never
- name: podSecurityContext
type: ucf.k8s.podSecurityContext
parameters:
runAsGroup: 1000
runAsUser: 1000
- name: ace-controller-service
type: ucf.k8s.service
parameters:
ports:
- port: 8000
protocol: TCP
name: http-api
type: ClusterIP
- name: ace-controller-metrics
type: ucf.crd.podMonitor
parameters:
portName: metrics
path: /metrics
- name: cm-dependencies
type: ucf.appspec.restartPodOnConfigChanges
parameters:
# Add dependency on all configmaps detected in the microservice
addAll: true
|