nekomeowww commited on
Commit
2160235
·
1 Parent(s): 56c75a3

release: build 60371bd7e5a9f74f023fc8c57fca5cced4b0e47b

Browse files
.gitattributes CHANGED
@@ -1,3 +1,5 @@
 
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
1
+
2
+ # Default
3
  *.7z filter=lfs diff=lfs merge=lfs -text
4
  *.arrow filter=lfs diff=lfs merge=lfs -text
5
  *.bin filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,10 +1,70 @@
1
  ---
2
- title: Conversational Webgpu
3
- emoji: ⚡
4
- colorFrom: yellow
5
- colorTo: green
6
  sdk: static
7
  pinned: false
 
 
 
 
 
 
 
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ emoji: "\U0001F4AC"
 
 
 
3
  sdk: static
4
  pinned: false
5
+ license: mit
6
+ title: Realtime Conversational WebGPU (Vue)
7
+ colorFrom: purple
8
+ colorTo: indigo
9
+ models:
10
+ - HuggingFaceTB/SmolLM2-1.7B-Instruct
11
+ - onnx-community/whisper-base
12
+ - onnx-community/silero-vad
13
+ short_description: Yet another Realtime Conversational WebGPU
14
  ---
15
 
16
+ <h1 align="center">Realtime Conversational WebGPU (Vue)</h1>
17
+
18
+ <p align="center">
19
+ [<a href="https://conversational-webgpu-vue.netlify.app/">Try it</a>]
20
+ </p>
21
+
22
+ > Heavily inspired by [WebGPU Video Object Detection - a Hugging Face Space by WebML Community](https://huggingface.co/spaces/webml-community/webgpu-video-object-detection)
23
+
24
+ # Realtime Conversational WebGPU
25
+
26
+ ## Getting Started
27
+
28
+ Follow the steps below to set up and run the application.
29
+
30
+ ### 1. Clone the Repository
31
+
32
+ Clone the examples repository from GitHub:
33
+
34
+ ```sh
35
+ git clone https://github.com/proj-airi/webai-examples.git
36
+ ```
37
+
38
+ ### 2. Navigate to the Project Directory
39
+
40
+ Change your working directory to the `conversational-webgpu` folder:
41
+
42
+ ```sh
43
+ cd apps/conversational-webgpu
44
+ ```
45
+
46
+ ### 3. Install Dependencies
47
+
48
+ Install the necessary dependencies using npm:
49
+
50
+ ```sh
51
+ npm i
52
+ ```
53
+
54
+ ### 4. Run the Development Server
55
+
56
+ Start the development server:
57
+
58
+ ```sh
59
+ npm run dev
60
+ ```
61
+
62
+ The application should now be running locally. Open your browser and go to `http://localhost:5175` to see it in action.
63
+
64
+ ## Acknowledgements
65
+
66
+ Great thanks to what WebML Community have done.
67
+
68
+ > [Source code](https://huggingface.co/spaces/webml-community/conversational-webgpu)
69
+
70
+ > [UI inspiration](https://app.sesame.com/)
assets/index-DGmKQH7N.js ADDED
The diff for this file is too large to render. See raw diff
 
assets/index-cAxkOY9l.css ADDED
@@ -0,0 +1 @@
 
 
1
+ @keyframes ripple-314fbe9f{0%{transform:scale(1);opacity:.7}to{transform:scale(2);opacity:0}}.embla[data-v-314fbe9f]{position:relative;overflow:hidden}.embla[data-v-314fbe9f]:before,.embla[data-v-314fbe9f]:after{content:"";position:absolute;top:0;bottom:0;width:48px;z-index:1;pointer-events:none}.embla-edge-disabled.embla[data-v-314fbe9f]:before,.embla-edge-disabled.embla[data-v-314fbe9f]:after{display:none}.embla[data-v-314fbe9f]:before{left:-24px;background:linear-gradient(to right,#ffffff 32px,transparent)}.embla[data-v-314fbe9f]:after{right:-24px;background:linear-gradient(to left,#ffffff 32px,transparent)}.dark .embla[data-v-314fbe9f]:before{left:-24px;background:linear-gradient(to right,#121212 32px,transparent)}.dark .embla[data-v-314fbe9f]:after{right:-24px;background:linear-gradient(to left,#121212 32px,transparent)}.fade-enter-active[data-v-314fbe9f],.fade-leave-active[data-v-314fbe9f]{transition:opacity .5s ease}.fade-enter-from[data-v-314fbe9f],.fade-leave-to[data-v-314fbe9f]{opacity:0}.fade-enter-to[data-v-314fbe9f],.fade-leave-from[data-v-314fbe9f]{opacity:1}.fade-scale-enter-active[data-v-314fbe9f],.fade-scale-leave-active[data-v-314fbe9f]{transition:all .2s ease-in-out}.fade-scale-enter-from[data-v-314fbe9f],.fade-scale-leave-to[data-v-314fbe9f]{opacity:0;transform:scale(.8)}.fade-scale-enter-to[data-v-314fbe9f],.fade-scale-leave-from[data-v-314fbe9f]{opacity:1;transform:scale(1)}*,:before,:after{box-sizing:border-box;border-width:0;border-style:solid;border-color:var(--un-default-border-color, #e5e7eb)}:before,:after{--un-content: ""}html,:host{line-height:1.5;-webkit-text-size-adjust:100%;-moz-tab-size:4;tab-size:4;font-family:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji";font-feature-settings:normal;font-variation-settings:normal;-webkit-tap-highlight-color:transparent}body{margin:0;line-height:inherit}hr{height:0;color:inherit;border-top-width:1px}abbr:where([title]){text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,samp,pre{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace;font-feature-settings:normal;font-variation-settings:normal;font-size:1em}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit;border-collapse:collapse}button,input,optgroup,select,textarea{font-family:inherit;font-feature-settings:inherit;font-variation-settings:inherit;font-size:100%;font-weight:inherit;line-height:inherit;color:inherit;margin:0;padding:0}button,select{text-transform:none}button,[type=button],[type=reset],[type=submit]{-webkit-appearance:button;background-color:transparent;background-image:none}:-moz-focusring{outline:auto}:-moz-ui-invalid{box-shadow:none}progress{vertical-align:baseline}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}summary{display:list-item}blockquote,dl,dd,h1,h2,h3,h4,h5,h6,hr,figure,p,pre{margin:0}fieldset{margin:0;padding:0}legend{padding:0}ol,ul,menu{list-style:none;margin:0;padding:0}dialog{padding:0}textarea{resize:vertical}input::placeholder,textarea::placeholder{opacity:1;color:#9ca3af}button,[role=button]{cursor:pointer}:disabled{cursor:default}img,svg,video,canvas,audio,iframe,embed,object{display:block;vertical-align:middle}img,video{max-width:100%;height:auto}[hidden]:where(:not([hidden=until-found])){display:none}:root{--bg-color-light: rgb(255 255 255);--bg-color-dark: rgb(18 18 18);--bg-color: var(--bg-color-light)}html,body,#app{height:100%;margin:0;padding:0;overscroll-behavior:none}html{background:var(--bg-color);transition:all .3s ease-in-out}html.dark{--bg-color: var(--bg-color-dark);color-scheme:dark}*,:before,:after{--un-rotate:0;--un-rotate-x:0;--un-rotate-y:0;--un-rotate-z:0;--un-scale-x:1;--un-scale-y:1;--un-scale-z:1;--un-skew-x:0;--un-skew-y:0;--un-translate-x:0;--un-translate-y:0;--un-translate-z:0;--un-pan-x: ;--un-pan-y: ;--un-pinch-zoom: ;--un-scroll-snap-strictness:proximity;--un-ordinal: ;--un-slashed-zero: ;--un-numeric-figure: ;--un-numeric-spacing: ;--un-numeric-fraction: ;--un-border-spacing-x:0;--un-border-spacing-y:0;--un-ring-offset-shadow:0 0 rgb(0 0 0 / 0);--un-ring-shadow:0 0 rgb(0 0 0 / 0);--un-shadow-inset: ;--un-shadow:0 0 rgb(0 0 0 / 0);--un-ring-inset: ;--un-ring-offset-width:0px;--un-ring-offset-color:#fff;--un-ring-width:0px;--un-ring-color:rgb(147 197 253 / .5);--un-blur: ;--un-brightness: ;--un-contrast: ;--un-drop-shadow: ;--un-grayscale: ;--un-hue-rotate: ;--un-invert: ;--un-saturate: ;--un-sepia: ;--un-backdrop-blur: ;--un-backdrop-brightness: ;--un-backdrop-contrast: ;--un-backdrop-grayscale: ;--un-backdrop-hue-rotate: ;--un-backdrop-invert: ;--un-backdrop-opacity: ;--un-backdrop-saturate: ;--un-backdrop-sepia: }::backdrop{--un-rotate:0;--un-rotate-x:0;--un-rotate-y:0;--un-rotate-z:0;--un-scale-x:1;--un-scale-y:1;--un-scale-z:1;--un-skew-x:0;--un-skew-y:0;--un-translate-x:0;--un-translate-y:0;--un-translate-z:0;--un-pan-x: ;--un-pan-y: ;--un-pinch-zoom: ;--un-scroll-snap-strictness:proximity;--un-ordinal: ;--un-slashed-zero: ;--un-numeric-figure: ;--un-numeric-spacing: ;--un-numeric-fraction: ;--un-border-spacing-x:0;--un-border-spacing-y:0;--un-ring-offset-shadow:0 0 rgb(0 0 0 / 0);--un-ring-shadow:0 0 rgb(0 0 0 / 0);--un-shadow-inset: ;--un-shadow:0 0 rgb(0 0 0 / 0);--un-ring-inset: ;--un-ring-offset-width:0px;--un-ring-offset-color:#fff;--un-ring-width:0px;--un-ring-color:rgb(147 197 253 / .5);--un-blur: ;--un-brightness: ;--un-contrast: ;--un-drop-shadow: ;--un-grayscale: ;--un-hue-rotate: ;--un-invert: ;--un-saturate: ;--un-sepia: ;--un-backdrop-blur: ;--un-backdrop-brightness: ;--un-backdrop-contrast: ;--un-backdrop-grayscale: ;--un-backdrop-hue-rotate: ;--un-backdrop-invert: ;--un-backdrop-opacity: ;--un-backdrop-saturate: ;--un-backdrop-sepia: }@font-face{font-family:DM Mono;font-style:normal;font-weight:400;font-display:swap;src:url(https://fonts.gstatic.com/s/dmmono/v15/aFTU7PB1QTsUX8KYthSQBK6PYK3EXw.woff2) format("woff2");unicode-range:U+0100-02BA,U+02BD-02C5,U+02C7-02CC,U+02CE-02D7,U+02DD-02FF,U+0304,U+0308,U+0329,U+1D00-1DBF,U+1E00-1E9F,U+1EF2-1EFF,U+2020,U+20A0-20AB,U+20AD-20C0,U+2113,U+2C60-2C7F,U+A720-A7FF}@font-face{font-family:DM Mono;font-style:normal;font-weight:400;font-display:swap;src:url(https://fonts.gstatic.com/s/dmmono/v15/aFTU7PB1QTsUX8KYthqQBK6PYK0.woff2) format("woff2");unicode-range:U+0000-00FF,U+0131,U+0152-0153,U+02BB-02BC,U+02C6,U+02DA,U+02DC,U+0304,U+0308,U+0329,U+2000-206F,U+20AC,U+2122,U+2191,U+2193,U+2212,U+2215,U+FEFF,U+FFFD}@font-face{font-family:DM Sans;font-style:normal;font-weight:400;font-display:swap;src:url(https://fonts.gstatic.com/s/dmsans/v16/rP2tp2ywxg089UriI5-g4vlH9VoD8CmcqZG40F9JadbnoEwAopxRR232RmYJp8I5zzw.woff2) format("woff2");unicode-range:U+0100-02BA,U+02BD-02C5,U+02C7-02CC,U+02CE-02D7,U+02DD-02FF,U+0304,U+0308,U+0329,U+1D00-1DBF,U+1E00-1E9F,U+1EF2-1EFF,U+2020,U+20A0-20AB,U+20AD-20C0,U+2113,U+2C60-2C7F,U+A720-A7FF}@font-face{font-family:DM Sans;font-style:normal;font-weight:400;font-display:swap;src:url(https://fonts.gstatic.com/s/dmsans/v16/rP2tp2ywxg089UriI5-g4vlH9VoD8CmcqZG40F9JadbnoEwAopxRSW32RmYJp8I5.woff2) format("woff2");unicode-range:U+0000-00FF,U+0131,U+0152-0153,U+02BB-02BC,U+02C6,U+02DA,U+02DC,U+0304,U+0308,U+0329,U+2000-206F,U+20AC,U+2122,U+2191,U+2193,U+2212,U+2215,U+FEFF,U+FFFD}@font-face{font-family:"DM Serif Display";font-style:normal;font-weight:400;font-display:swap;src:url(https://fonts.gstatic.com/s/dmserifdisplay/v16/-nFnOHM81r4j6k0gjAW3mujVU2B2G_5x0vrx52jJ3Q.woff2) format("woff2");unicode-range:U+0100-02BA,U+02BD-02C5,U+02C7-02CC,U+02CE-02D7,U+02DD-02FF,U+0304,U+0308,U+0329,U+1D00-1DBF,U+1E00-1E9F,U+1EF2-1EFF,U+2020,U+20A0-20AB,U+20AD-20C0,U+2113,U+2C60-2C7F,U+A720-A7FF}@font-face{font-family:"DM Serif Display";font-style:normal;font-weight:400;font-display:swap;src:url(https://fonts.gstatic.com/s/dmserifdisplay/v16/-nFnOHM81r4j6k0gjAW3mujVU2B2G_Bx0vrx52g.woff2) format("woff2");unicode-range:U+0000-00FF,U+0131,U+0152-0153,U+02BB-02BC,U+02C6,U+02DA,U+02DC,U+0304,U+0308,U+0329,U+2000-206F,U+20AC,U+2122,U+2191,U+2193,U+2212,U+2215,U+FEFF,U+FFFD}.i-solar\:end-call-rounded-bold,[i-solar\:end-call-rounded-bold=""]{--un-icon:url("data:image/svg+xml;utf8,%3Csvg viewBox='0 0 24 24' width='1.2em' height='1.2em' xmlns='http://www.w3.org/2000/svg' %3E%3Cpath fill='currentColor' d='m5.607 16.897l1.34-.38C8.156 16.174 9 14.983 9 13.618c0 0 0-1.654 3-1.654s3 1.654 3 1.654c0 1.365.844 2.556 2.053 2.9l1.34.38C20.218 17.414 22 15.91 22 13.85c0-1.237-.277-2.477-1.083-3.347C19.56 9.04 16.807 7 12 7s-7.56 2.039-8.917 3.503C2.277 11.373 2 12.613 2 13.85c0 2.06 1.782 3.565 3.607 3.047'/%3E%3C/svg%3E");-webkit-mask:var(--un-icon) no-repeat;mask:var(--un-icon) no-repeat;-webkit-mask-size:100% 100%;mask-size:100% 100%;background-color:currentColor;color:inherit;width:1.2em;height:1.2em}.i-solar\:phone-bold,[i-solar\:phone-bold=""]{--un-icon:url("data:image/svg+xml;utf8,%3Csvg viewBox='0 0 24 24' width='1.2em' height='1.2em' xmlns='http://www.w3.org/2000/svg' %3E%3Cpath fill='currentColor' d='m16.556 12.906l-.455.453s-1.083 1.076-4.038-1.862s-1.872-4.014-1.872-4.014l.286-.286c.707-.702.774-1.83.157-2.654L9.374 2.86C8.61 1.84 7.135 1.705 6.26 2.575l-1.57 1.56c-.433.432-.723.99-.688 1.61c.09 1.587.808 5 4.812 8.982c4.247 4.222 8.232 4.39 9.861 4.238c.516-.048.964-.31 1.325-.67l1.42-1.412c.96-.953.69-2.588-.538-3.255l-1.91-1.039c-.806-.437-1.787-.309-2.417.317'/%3E%3C/svg%3E");-webkit-mask:var(--un-icon) no-repeat;mask:var(--un-icon) no-repeat;-webkit-mask-size:100% 100%;mask-size:100% 100%;background-color:currentColor;color:inherit;width:1.2em;height:1.2em}.i-svg-spinners\:3-dots-bounce,[i-svg-spinners\:3-dots-bounce=""]{--un-icon:url("data:image/svg+xml;utf8,%3Csvg viewBox='0 0 24 24' width='1.2em' height='1.2em' xmlns='http://www.w3.org/2000/svg' %3E%3Ccircle cx='4' cy='12' r='3' fill='currentColor'%3E%3Canimate id='svgSpinners3DotsBounce0' attributeName='cy' begin='0;svgSpinners3DotsBounce1.end+0.25s' calcMode='spline' dur='0.6s' keySplines='.33,.66,.66,1;.33,0,.66,.33' values='12;6;12'/%3E%3C/circle%3E%3Ccircle cx='12' cy='12' r='3' fill='currentColor'%3E%3Canimate attributeName='cy' begin='svgSpinners3DotsBounce0.begin+0.1s' calcMode='spline' dur='0.6s' keySplines='.33,.66,.66,1;.33,0,.66,.33' values='12;6;12'/%3E%3C/circle%3E%3Ccircle cx='20' cy='12' r='3' fill='currentColor'%3E%3Canimate id='svgSpinners3DotsBounce1' attributeName='cy' begin='svgSpinners3DotsBounce0.begin+0.2s' calcMode='spline' dur='0.6s' keySplines='.33,.66,.66,1;.33,0,.66,.33' values='12;6;12'/%3E%3C/circle%3E%3C/svg%3E");-webkit-mask:var(--un-icon) no-repeat;mask:var(--un-icon) no-repeat;-webkit-mask-size:100% 100%;mask-size:100% 100%;background-color:currentColor;color:inherit;width:1.2em;height:1.2em}.prose :where(h1,h2,h3,h4,h5,h6):not(:where(.not-prose,.not-prose *)){color:var(--un-prose-headings);font-weight:600;line-height:1.25}.prose :where(a):not(:where(.not-prose,.not-prose *)){color:var(--un-prose-links);text-decoration:underline;font-weight:500}.prose :where(a code):not(:where(.not-prose,.not-prose *)){color:var(--un-prose-links)}.prose :where(p,ul,ol,pre):not(:where(.not-prose,.not-prose *)){margin:1em 0;line-height:1.75}.prose :where(blockquote):not(:where(.not-prose,.not-prose *)){margin:1em 0;padding-left:1em;font-style:italic;border-left:.25em solid var(--un-prose-borders)}.prose :where(h1):not(:where(.not-prose,.not-prose *)){margin:1rem 0;font-size:2.25em}.prose :where(h2):not(:where(.not-prose,.not-prose *)){margin:1.75em 0 .5em;font-size:1.75em}.prose :where(h3):not(:where(.not-prose,.not-prose *)){margin:1.5em 0 .5em;font-size:1.375em}.prose :where(h4):not(:where(.not-prose,.not-prose *)){margin:1em 0;font-size:1.125em}.prose :where(img,video):not(:where(.not-prose,.not-prose *)){max-width:100%}.prose :where(figure,picture):not(:where(.not-prose,.not-prose *)){margin:1em 0}.prose :where(figcaption):not(:where(.not-prose,.not-prose *)){color:var(--un-prose-captions);font-size:.875em}.prose :where(code):not(:where(.not-prose,.not-prose *)){color:var(--un-prose-code);font-size:.875em;font-weight:600;font-family:DM Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace}.prose :where(:not(pre)>code):not(:where(.not-prose,.not-prose *)):before,.prose :where(:not(pre)>code):not(:where(.not-prose,.not-prose *)):after{content:"`"}.prose :where(pre):not(:where(.not-prose,.not-prose *)){padding:1.25rem 1.5rem;overflow-x:auto;border-radius:.375rem}.prose :where(pre,code):not(:where(.not-prose,.not-prose *)){white-space:pre;word-spacing:normal;word-break:normal;word-wrap:normal;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-hyphens:none;-moz-hyphens:none;hyphens:none;background:transparent}.prose :where(pre code):not(:where(.not-prose,.not-prose *)){font-weight:inherit}.prose :where(ol,ul):not(:where(.not-prose,.not-prose *)){padding-left:1.25em}.prose :where(ol):not(:where(.not-prose,.not-prose *)){list-style-type:decimal}.prose :where(ol[type=A]):not(:where(.not-prose,.not-prose *)){list-style-type:upper-alpha}.prose :where(ol[type=a]):not(:where(.not-prose,.not-prose *)){list-style-type:lower-alpha}.prose :where(ol[type=A s]):not(:where(.not-prose,.not-prose *)){list-style-type:upper-alpha}.prose :where(ol[type=a s]):not(:where(.not-prose,.not-prose *)){list-style-type:lower-alpha}.prose :where(ol[type=I]):not(:where(.not-prose,.not-prose *)){list-style-type:upper-roman}.prose :where(ol[type=i]):not(:where(.not-prose,.not-prose *)){list-style-type:lower-roman}.prose :where(ol[type=I s]):not(:where(.not-prose,.not-prose *)){list-style-type:upper-roman}.prose :where(ol[type=i s]):not(:where(.not-prose,.not-prose *)){list-style-type:lower-roman}.prose :where(ol[type="1"]):not(:where(.not-prose,.not-prose *)){list-style-type:decimal}.prose :where(ul):not(:where(.not-prose,.not-prose *)){list-style-type:disc}.prose :where(ol>li):not(:where(.not-prose,.not-prose *))::marker,.prose :where(ul>li):not(:where(.not-prose,.not-prose *))::marker,.prose :where(summary):not(:where(.not-prose,.not-prose *))::marker{color:var(--un-prose-lists)}.prose :where(hr):not(:where(.not-prose,.not-prose *)){margin:2em 0;border:1px solid var(--un-prose-hr)}.prose :where(table):not(:where(.not-prose,.not-prose *)){display:block;margin:1em 0;border-collapse:collapse;overflow-x:auto}.prose :where(tr):not(:where(.not-prose,.not-prose *)):nth-child(2n){background:var(--un-prose-bg-soft)}.prose :where(td,th):not(:where(.not-prose,.not-prose *)){border:1px solid var(--un-prose-borders);padding:.625em 1em}.prose :where(abbr):not(:where(.not-prose,.not-prose *)){cursor:help}.prose :where(kbd):not(:where(.not-prose,.not-prose *)){color:var(--un-prose-code);border:1px solid;padding:.25rem .5rem;font-size:.875em;border-radius:.25rem}.prose :where(details):not(:where(.not-prose,.not-prose *)){margin:1em 0;padding:1.25rem 1.5rem;background:var(--un-prose-bg-soft)}.prose :where(summary):not(:where(.not-prose,.not-prose *)){cursor:pointer;font-weight:600}.prose{color:var(--un-prose-body);max-width:65ch}.pointer-events-none{pointer-events:none}.absolute{position:absolute}.relative{position:relative}.inset-0{top:0;right:0;bottom:0;left:0}.z-10{z-index:10}.m-auto{margin:auto}.-mt-4{margin-top:-1rem}.mb-4{margin-bottom:1rem}.hidden{display:none}.aspect-square{aspect-ratio:1/1}.h-100dvh{height:100dvh}.h-32{height:8rem}.h-50{height:12.5rem}.h-80{height:20rem}.h-full,[h-full=""]{height:100%}.min-w-0{min-width:0}.w-100dvw{width:100dvw}.w-120{width:30rem}.w-140{width:35rem}.w-32{width:8rem}.w-fit,[w-fit=""]{width:fit-content}.w-full{width:100%}.flex,[flex=""]{display:flex}.flex-\[0_0_80\%\]{flex:0 0 80%}.flex-shrink-0,.shrink-0{flex-shrink:0}.grow-0{flex-grow:0}.basis-full{flex-basis:100%}.flex-col,[flex-col=""]{flex-direction:column}.transform{transform:translate(var(--un-translate-x)) translateY(var(--un-translate-y)) translateZ(var(--un-translate-z)) rotate(var(--un-rotate)) rotateX(var(--un-rotate-x)) rotateY(var(--un-rotate-y)) rotate(var(--un-rotate-z)) skew(var(--un-skew-x)) skewY(var(--un-skew-y)) scaleX(var(--un-scale-x)) scaleY(var(--un-scale-y)) scaleZ(var(--un-scale-z))}@keyframes ping{0%{transform:scale(1);opacity:1}75%,to{transform:scale(2);opacity:0}}.animate-ping{animation:ping 1s cubic-bezier(0,0,.2,1) infinite}.cursor-pointer{cursor:pointer}.items-center,[items-center=""]{align-items:center}.justify-center,[justify-center=""]{justify-content:center}.justify-between,[justify-between=""]{justify-content:space-between}.gap-2,[gap-2=""]{gap:.5rem}.gap-4{gap:1rem}.overflow-hidden{overflow:hidden}.border-2{border-width:2px}.border-cyan-200{--un-border-opacity:1;border-color:rgb(165 243 252 / var(--un-border-opacity))}.dark .dark\:border-cyan-500{--un-border-opacity:1;border-color:rgb(6 182 212 / var(--un-border-opacity))}.rounded-full{border-radius:9999px}.rounded-lg,[rounded-lg=""]{border-radius:.5rem}.rounded-xl,[rounded-xl=""]{border-radius:.75rem}.bg-cyan-200{--un-bg-opacity:1;background-color:rgb(165 243 252 / var(--un-bg-opacity))}.bg-cyan-300{--un-bg-opacity:1;background-color:rgb(103 232 249 / var(--un-bg-opacity))}.bg-red-200{--un-bg-opacity:1;background-color:rgb(254 202 202 / var(--un-bg-opacity))}.bg-red-300{--un-bg-opacity:1;background-color:rgb(252 165 165 / var(--un-bg-opacity))}.dark .dark\:bg-cyan-600{--un-bg-opacity:1;background-color:rgb(8 145 178 / var(--un-bg-opacity))}.dark .dark\:bg-cyan-800{--un-bg-opacity:1;background-color:rgb(21 94 117 / var(--un-bg-opacity))}.dark .dark\:bg-red-400{--un-bg-opacity:1;background-color:rgb(248 113 113 / var(--un-bg-opacity))}.dark [bg~="dark:cyan-950"]{--un-bg-opacity:1;background-color:rgb(8 51 68 / var(--un-bg-opacity))}[bg~=cyan-50]{--un-bg-opacity:1;background-color:rgb(236 254 255 / var(--un-bg-opacity))}.dark [bg~="dark:hover:cyan-900"]:hover{--un-bg-opacity:1;background-color:rgb(22 78 99 / var(--un-bg-opacity))}[bg~="hover:cyan-100"]:hover{--un-bg-opacity:1;background-color:rgb(207 250 254 / var(--un-bg-opacity))}.p-4,[p-4=""]{padding:1rem}.px-1,[px-1=""]{padding-left:.25rem;padding-right:.25rem}.px-16{padding-left:4rem;padding-right:4rem}.px-4,[px-4=""]{padding-left:1rem;padding-right:1rem}.px-8{padding-left:2rem;padding-right:2rem}.py-1,[py-1=""]{padding-top:.25rem;padding-bottom:.25rem}.py-2,[py-2=""]{padding-top:.5rem;padding-bottom:.5rem}.pl-0{padding-left:0}.pt-4{padding-top:1rem}.text-center{text-align:center}.text-left{text-align:left}.text-2xl,[text-2xl=""]{font-size:1.5rem;line-height:2rem}.text-lg,[text-lg=""]{font-size:1.125rem;line-height:1.75rem}.text-sm,[text-sm=""]{font-size:.875rem;line-height:1.25rem}.dark .dark\:text-white,.dark [text~="dark:white"]{--un-text-opacity:1;color:rgb(255 255 255 / var(--un-text-opacity))}.dark [text~="dark:cyan-500"]{--un-text-opacity:1;color:rgb(6 182 212 / var(--un-text-opacity))}.text-gray-700{--un-text-opacity:1;color:rgb(55 65 81 / var(--un-text-opacity))}.text-red-700{--un-text-opacity:1;color:rgb(185 28 28 / var(--un-text-opacity))}[text~=black]{--un-text-opacity:1;color:rgb(0 0 0 / var(--un-text-opacity))}[text~=cyan-400]{--un-text-opacity:1;color:rgb(34 211 238 / var(--un-text-opacity))}[text~=red-400]{--un-text-opacity:1;color:rgb(248 113 113 / var(--un-text-opacity))}[text~="hover:red-300"]:hover{--un-text-opacity:1;color:rgb(252 165 165 / var(--un-text-opacity))}[text~="active:red-400"]:active{--un-text-opacity:1;color:rgb(248 113 113 / var(--un-text-opacity))}.font-sans{font-family:DM Sans,ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji"}.opacity-0{opacity:0}.opacity-75{opacity:.75}.shadow-inner{--un-shadow:inset 0 2px 4px 0 var(--un-shadow-color, rgb(0 0 0 / .05));box-shadow:var(--un-ring-offset-shadow),var(--un-ring-shadow),var(--un-shadow)}.outline-none,[outline-none=""]{outline:2px solid transparent;outline-offset:2px}.transition{transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,backdrop-filter;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.transition-transform{transition-property:transform;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}[transition~=all]{transition-property:all;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.duration-300,[transition~=duration-300]{transition-duration:.3s}.duration-500,[transition~=duration-500]{transition-duration:.5s}.ease,.ease-in-out,[transition~=ease-in-out]{transition-timing-function:cubic-bezier(.4,0,.2,1)}.ease-out{transition-timing-function:cubic-bezier(0,0,.2,1)}
assets/play-worklet-CqUYQx_r.js ADDED
@@ -0,0 +1 @@
 
 
1
+ var c=Object.defineProperty;var l=(s,e,t)=>e in s?c(s,e,{enumerable:!0,configurable:!0,writable:!0,value:t}):s[e]=t;var n=(s,e,t)=>l(s,typeof e!="symbol"?e+"":e,t);class d extends AudioWorkletProcessor{constructor(){super();n(this,"bufferQueue",[]);n(this,"currentChunkOffset",0);n(this,"hadData",!1);this.bufferQueue=[],this.currentChunkOffset=0,this.hadData=!1,this.port.onmessage=t=>{const r=t.data;r instanceof Float32Array?(this.hadData=!0,this.bufferQueue.push(r)):r==="stop"&&(this.bufferQueue=[],this.currentChunkOffset=0)}}process(t,r){const f=r[0][0];if(!f)return!0;const h=f.length;let u=0;for(this.hadData&&this.bufferQueue.length===0&&(this.port.postMessage({type:"playback_ended"}),this.hadData=!1);u<h;)if(this.bufferQueue.length>0){const a=this.bufferQueue[0],o=a.length-this.currentChunkOffset,i=Math.min(o,h-u);f.set(a.subarray(this.currentChunkOffset,this.currentChunkOffset+i),u),this.currentChunkOffset+=i,u+=i,this.currentChunkOffset>=a.length&&(this.bufferQueue.shift(),this.currentChunkOffset=0)}else f.fill(0,u),u=h;return!0}}registerProcessor("buffered-audio-worklet-processor",d);
assets/vad-processor-0sEQXaXZ.js ADDED
@@ -0,0 +1 @@
 
 
1
+ let s=0;const r=new Float32Array(512);class l extends AudioWorkletProcessor{process(o,n,f){const e=o[0][0];if(!e)return!1;if(e.length>512)this.port.postMessage({buffer:e});else{const t=512-s;e.length>=t?(r.set(e.subarray(0,t),s),this.port.postMessage({buffer:r}),r.fill(0),r.set(e.subarray(t),0),s=e.length-t):(r.set(e,s),s+=e.length)}return!0}}registerProcessor("vad-processor",l);
assets/worker-yoCrhISy.ts ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type {
2
+ AutomaticSpeechRecognitionPipeline,
3
+ CausalLMOutputWithPast,
4
+ GPT2Tokenizer,
5
+ LlamaForCausalLM,
6
+ PreTrainedModel,
7
+ StoppingCriteriaList,
8
+ } from '@huggingface/transformers'
9
+ import type { Device, DType } from '@xsai-transformers/shared/types'
10
+ import type { GenerateOptions } from 'kokoro-js'
11
+ import type {
12
+ WorkerMessageEventError,
13
+ WorkerMessageEventInfo,
14
+ WorkerMessageEventOutput,
15
+ WorkerMessageEventProgress,
16
+ WorkerMessageEventSetVoiceResponse,
17
+ WorkerMessageEventStatus,
18
+ } from '../types/worker'
19
+
20
+ import {
21
+ // VAD
22
+ AutoModel,
23
+
24
+ AutoModelForCausalLM,
25
+ // LLM
26
+ AutoTokenizer,
27
+ InterruptableStoppingCriteria,
28
+ pipeline,
29
+
30
+ // Speech recognition
31
+ Tensor,
32
+ TextStreamer,
33
+ } from '@huggingface/transformers'
34
+ import { isWebGPUSupported } from 'gpuu/webgpu'
35
+ import { KokoroTTS, TextSplitterStream } from 'kokoro-js'
36
+
37
+ import {
38
+ EXIT_THRESHOLD,
39
+ INPUT_SAMPLE_RATE,
40
+ MAX_BUFFER_DURATION,
41
+ MAX_NUM_PREV_BUFFERS,
42
+ MIN_SILENCE_DURATION_SAMPLES,
43
+ MIN_SPEECH_DURATION_SAMPLES,
44
+ SPEECH_PAD_SAMPLES,
45
+ SPEECH_THRESHOLD,
46
+ } from '../constants'
47
+
48
+ interface Message {
49
+ role: 'system' | 'user' | 'assistant'
50
+ content: string
51
+ }
52
+
53
+ type Voices = GenerateOptions['voice']
54
+ export type PretrainedConfig = NonNullable<Parameters<typeof AutoModel.from_pretrained>[1]>['config']
55
+
56
+ const whisperDtypeMap: Record<Device, DType> = {
57
+ webgpu: {
58
+ encoder_model: 'fp32',
59
+ decoder_model_merged: 'fp32',
60
+ },
61
+ wasm: {
62
+ encoder_model: 'fp32',
63
+ decoder_model_merged: 'q8',
64
+ },
65
+ }
66
+
67
+ const model_id = 'onnx-community/Kokoro-82M-v1.0-ONNX'
68
+ let voice: Voices | undefined
69
+ let silero_vad: PreTrainedModel
70
+ let transcriber: AutomaticSpeechRecognitionPipeline
71
+ let tts: KokoroTTS
72
+
73
+ const SYSTEM_MESSAGE: Message = {
74
+ role: 'system',
75
+ content:
76
+ 'You\'re a helpful and conversational voice assistant. Keep your responses short, clear, and casual.',
77
+ }
78
+ let messages: Message[] = [SYSTEM_MESSAGE]
79
+ let past_key_values_cache: any = null
80
+ let stopping_criteria: InterruptableStoppingCriteria | null = null
81
+
82
+ // Global audio buffer to store incoming audio
83
+ const BUFFER = new Float32Array(MAX_BUFFER_DURATION * INPUT_SAMPLE_RATE)
84
+ let bufferPointer = 0
85
+
86
+ // Initial state for VAD
87
+ const sr = new Tensor('int64', [INPUT_SAMPLE_RATE], [])
88
+ let state = new Tensor('float32', new Float32Array(2 * 1 * 128), [2, 1, 128])
89
+
90
+ // Whether we are in the process of adding audio to the buffer
91
+ let isRecording = false
92
+ let isPlaying = false // new flag
93
+
94
+ let tokenizer: GPT2Tokenizer
95
+ let llm: LlamaForCausalLM
96
+
97
+ const prevBuffers: Float32Array[] = []
98
+
99
+ export async function loadModels() {
100
+ tts = await KokoroTTS.from_pretrained(model_id, {
101
+ dtype: 'fp32',
102
+ device: 'webgpu',
103
+ })
104
+
105
+ const device = 'webgpu'
106
+ globalThis.postMessage({ type: 'info', data: { message: `Using device: "${device}"` } } satisfies WorkerMessageEventInfo)
107
+ globalThis.postMessage({ type: 'info', data: { message: 'Loading models...', duration: 'until_next' } } satisfies WorkerMessageEventInfo)
108
+
109
+ // Load models
110
+ silero_vad = await AutoModel.from_pretrained(
111
+ 'onnx-community/silero-vad',
112
+ {
113
+ config: { model_type: 'custom' } as PretrainedConfig,
114
+ dtype: 'fp32', // Full-precision
115
+ progress_callback: progress => globalThis.postMessage({ type: 'progress', data: { message: progress } } satisfies WorkerMessageEventProgress),
116
+ },
117
+ ).catch((error: Error) => {
118
+ globalThis.postMessage({ type: 'error', data: { error, message: error.message } } satisfies WorkerMessageEventError<Error>)
119
+ throw error
120
+ })
121
+
122
+ transcriber = await pipeline(
123
+ 'automatic-speech-recognition',
124
+ 'onnx-community/whisper-base', // or "onnx-community/moonshine-base-ONNX",
125
+ {
126
+ device,
127
+ dtype: whisperDtypeMap[device as keyof typeof whisperDtypeMap],
128
+ progress_callback: progress => globalThis.postMessage({ type: 'progress', data: { message: progress } } satisfies WorkerMessageEventProgress),
129
+ },
130
+ ).catch((error: Error) => {
131
+ globalThis.postMessage({ type: 'error', data: { error, message: error.message } } satisfies WorkerMessageEventError<Error>)
132
+ throw error
133
+ })
134
+
135
+ await transcriber(new Float32Array(INPUT_SAMPLE_RATE)) // Compile shaders
136
+
137
+ llm = await AutoModelForCausalLM.from_pretrained(
138
+ 'HuggingFaceTB/SmolLM2-1.7B-Instruct',
139
+ {
140
+ dtype: await isWebGPUSupported() ? 'q4f16' : 'int8',
141
+ device: await isWebGPUSupported() ? 'webgpu' : 'wasm',
142
+ progress_callback: progress => globalThis.postMessage({ type: 'progress', data: { message: progress } } satisfies WorkerMessageEventProgress),
143
+ },
144
+ ).catch((error: Error) => {
145
+ globalThis.postMessage({ type: 'error', data: { error, message: error.message } } satisfies WorkerMessageEventError<Error>)
146
+ throw error
147
+ })
148
+
149
+ tokenizer = await AutoTokenizer.from_pretrained(
150
+ 'HuggingFaceTB/SmolLM2-1.7B-Instruct',
151
+ ).catch((error: Error) => {
152
+ globalThis.postMessage({ type: 'error', data: { error, message: error.message } } satisfies WorkerMessageEventError<Error>)
153
+ throw error
154
+ })
155
+
156
+ await llm.generate({ ...tokenizer('x'), max_new_tokens: 1 }) // Compile shaders
157
+
158
+ globalThis.postMessage({
159
+ type: 'status',
160
+ data: {
161
+ status: 'ready',
162
+ message: 'Ready!',
163
+ voices: tts.voices,
164
+ },
165
+ } as WorkerMessageEventStatus)
166
+ }
167
+
168
+ loadModels()
169
+
170
+ /**
171
+ * Perform Voice Activity Detection (VAD)
172
+ * @param buffer The new audio buffer
173
+ * @returns `true` if the buffer is speech, `false` otherwise.
174
+ */
175
+ async function vad(buffer?: Float32Array): Promise<boolean> {
176
+ if (!buffer) {
177
+ // Possibly closed or interrupted
178
+ return false
179
+ }
180
+
181
+ const input = new Tensor('float32', buffer, [1, buffer.length])
182
+
183
+ const { stateN, output } = await silero_vad({ input, sr, state })
184
+ state = stateN // Update state
185
+
186
+ const isSpeech = output.data[0]
187
+
188
+ // Use heuristics to determine if the buffer is speech or not
189
+ return (
190
+ // Case 1: We are above the threshold (definitely speech)
191
+ isSpeech > SPEECH_THRESHOLD
192
+ // Case 2: We are in the process of recording, and the probability is above the negative (exit) threshold
193
+ || (isRecording && isSpeech >= EXIT_THRESHOLD)
194
+ )
195
+ }
196
+
197
+ interface SpeechData {
198
+ start: number
199
+ end: number
200
+ duration: number
201
+ }
202
+
203
+ type BatchEncodingItem = number[] | number[][] | Tensor
204
+ /**
205
+ * Holds the output of the tokenizer's call function.
206
+ */
207
+ interface BatchEncoding {
208
+ /**
209
+ * List of token ids to be fed to a model.
210
+ */
211
+ input_ids: BatchEncodingItem
212
+ /**
213
+ * List of indices specifying which tokens should be attended to by the model.
214
+ */
215
+ attention_mask: BatchEncodingItem
216
+ /**
217
+ * List of token type ids to be fed to a model.
218
+ */
219
+ token_type_ids?: BatchEncodingItem
220
+ }
221
+
222
+ /**
223
+ * Transcribe the audio buffer
224
+ * @param buffer The audio buffer
225
+ * @param _data Additional data
226
+ */
227
+ async function speechToSpeech(buffer: Float32Array, _data: SpeechData): Promise<void> {
228
+ isPlaying = true
229
+
230
+ // 1. Transcribe the audio from the user
231
+ const result = await transcriber(buffer)
232
+ const text = (result as { text: string }).text.trim()
233
+
234
+ if (['', '[BLANK_AUDIO]'].includes(text)) {
235
+ // If the transcription is empty or a blank audio, we skip the rest of the processing
236
+ return
237
+ }
238
+
239
+ messages.push({ role: 'user', content: text })
240
+
241
+ // Set up text-to-speech streaming
242
+ const splitter = new TextSplitterStream()
243
+ const stream = tts!.stream(splitter, { voice });
244
+ (async () => {
245
+ for await (const { text, audio } of stream) {
246
+ globalThis.postMessage({ type: 'output', data: { text, result: audio } } satisfies WorkerMessageEventOutput)
247
+ }
248
+ })()
249
+
250
+ // 2. Generate a response using the LLM
251
+ const inputs = tokenizer.apply_chat_template(messages, {
252
+ add_generation_prompt: true,
253
+ return_dict: true,
254
+ }) as BatchEncoding
255
+
256
+ const streamer = new TextStreamer(tokenizer, {
257
+ skip_prompt: true,
258
+ skip_special_tokens: true,
259
+ callback_function: (text: string) => {
260
+ splitter.push(text)
261
+ },
262
+ token_callback_function: () => {},
263
+ })
264
+
265
+ stopping_criteria = new InterruptableStoppingCriteria()
266
+ type GenerationFunctionParameters = Parameters<typeof llm.generate>[0] & Record<string, any>
267
+
268
+ const generatedRes = await llm.generate({
269
+ ...inputs,
270
+ past_key_values: past_key_values_cache,
271
+ do_sample: false, // TODO: do_sample: true is bugged (invalid data location on top-k sample)
272
+ max_new_tokens: 1024,
273
+ streamer,
274
+ stopping_criteria: stopping_criteria as unknown as StoppingCriteriaList,
275
+ return_dict_in_generate: true,
276
+ } as GenerationFunctionParameters)
277
+
278
+ const { past_key_values, sequences } = generatedRes as CausalLMOutputWithPast & { sequences: Tensor }
279
+ past_key_values_cache = past_key_values
280
+
281
+ // Finally, close the stream to signal that no more text will be added.
282
+ splitter.close()
283
+
284
+ const decoded = tokenizer.batch_decode(
285
+ // TODO: fix null as any
286
+ sequences.slice(null, [(inputs.input_ids as Tensor).dims[1], null as any]),
287
+ { skip_special_tokens: true },
288
+ )
289
+
290
+ messages.push({ role: 'assistant', content: decoded[0] })
291
+ }
292
+
293
+ // Track the number of samples after the last speech chunk
294
+ let postSpeechSamples = 0
295
+ function resetAfterRecording(offset = 0): void {
296
+ globalThis.postMessage({
297
+ type: 'status',
298
+ data: {
299
+ status: 'recording_end',
300
+ message: 'Transcribing...',
301
+ duration: 'until_next',
302
+ },
303
+ } satisfies WorkerMessageEventStatus)
304
+
305
+ BUFFER.fill(0, offset)
306
+ bufferPointer = offset
307
+ isRecording = false
308
+ postSpeechSamples = 0
309
+ }
310
+
311
+ function dispatchForTranscriptionAndResetAudioBuffer(overflow?: Float32Array): void {
312
+ // Get start and end time of the speech segment, minus the padding
313
+ const now = Date.now()
314
+ const end
315
+ = now - ((postSpeechSamples + SPEECH_PAD_SAMPLES) / INPUT_SAMPLE_RATE) * 1000
316
+ const start = end - (bufferPointer / INPUT_SAMPLE_RATE) * 1000
317
+ const duration = end - start
318
+ const overflowLength = overflow?.length ?? 0
319
+
320
+ // Send the audio buffer to the worker
321
+ const buffer = BUFFER.slice(0, bufferPointer + SPEECH_PAD_SAMPLES)
322
+
323
+ const prevLength = prevBuffers.reduce((acc, b) => acc + b.length, 0)
324
+ const paddedBuffer = new Float32Array(prevLength + buffer.length)
325
+ let offset = 0
326
+ for (const prev of prevBuffers) {
327
+ paddedBuffer.set(prev, offset)
328
+ offset += prev.length
329
+ }
330
+ paddedBuffer.set(buffer, offset)
331
+ speechToSpeech(paddedBuffer, { start, end, duration })
332
+
333
+ // Set overflow (if present) and reset the rest of the audio buffer
334
+ if (overflow) {
335
+ BUFFER.set(overflow, 0)
336
+ }
337
+ resetAfterRecording(overflowLength)
338
+ }
339
+
340
+ globalThis.onmessage = async (event: MessageEvent) => {
341
+ const { type, buffer } = event.data
342
+
343
+ // refuse new audio while playing back
344
+ if (type === 'audio' && isPlaying)
345
+ return
346
+
347
+ switch (type) {
348
+ case 'start_call': {
349
+ const name = tts!.voices[voice ?? 'af_heart']?.name ?? 'Heart'
350
+ greet(`Hey there, my name is ${name}! How can I help you today?`)
351
+ return
352
+ }
353
+ case 'end_call':
354
+ messages = [SYSTEM_MESSAGE]
355
+ past_key_values_cache = null
356
+ break
357
+ case 'interrupt':
358
+ stopping_criteria?.interrupt()
359
+ return
360
+ case 'set_voice':
361
+ voice = event.data.voice
362
+
363
+ globalThis.postMessage({
364
+ type: 'set_voice_response',
365
+ data: {
366
+ ok: true,
367
+ },
368
+ } satisfies WorkerMessageEventSetVoiceResponse)
369
+
370
+ return
371
+ case 'playback_ended':
372
+ isPlaying = false
373
+ return
374
+ }
375
+
376
+ const wasRecording = isRecording // Save current state
377
+ const isSpeech = await vad(buffer)
378
+
379
+ if (!wasRecording && !isSpeech) {
380
+ // We are not recording, and the buffer is not speech,
381
+ // so we will probably discard the buffer. So, we insert
382
+ // into a FIFO queue with maximum size of PREV_BUFFER_SIZE
383
+ if (prevBuffers.length >= MAX_NUM_PREV_BUFFERS) {
384
+ // If the queue is full, we discard the oldest buffer
385
+ prevBuffers.shift()
386
+ }
387
+ prevBuffers.push(buffer)
388
+ return
389
+ }
390
+
391
+ const remaining = BUFFER.length - bufferPointer
392
+ if (buffer.length >= remaining) {
393
+ // The buffer is larger than (or equal to) the remaining space in the global buffer,
394
+ // so we perform transcription and copy the overflow to the global buffer
395
+ BUFFER.set(buffer.subarray(0, remaining), bufferPointer)
396
+ bufferPointer += remaining
397
+
398
+ // Dispatch the audio buffer
399
+ const overflow = buffer.subarray(remaining)
400
+ dispatchForTranscriptionAndResetAudioBuffer(overflow)
401
+ return
402
+ }
403
+ else {
404
+ // The buffer is smaller than the remaining space in the global buffer,
405
+ // so we copy it to the global buffer
406
+ BUFFER.set(buffer, bufferPointer)
407
+ bufferPointer += buffer.length
408
+ }
409
+
410
+ if (isSpeech) {
411
+ if (!isRecording) {
412
+ // Indicate start of recording
413
+ globalThis.postMessage({
414
+ type: 'status',
415
+ data: {
416
+ status: 'recording_start',
417
+ message: 'Listening...',
418
+ duration: 'until_next',
419
+ },
420
+ } satisfies WorkerMessageEventStatus)
421
+ }
422
+
423
+ // Start or continue recording
424
+ isRecording = true
425
+ postSpeechSamples = 0 // Reset the post-speech samples
426
+
427
+ return
428
+ }
429
+
430
+ postSpeechSamples += buffer.length
431
+
432
+ // At this point we're confident that we were recording (wasRecording === true), but the latest buffer is not speech.
433
+ // So, we check whether we have reached the end of the current audio chunk.
434
+ if (postSpeechSamples < MIN_SILENCE_DURATION_SAMPLES) {
435
+ // There was a short pause, but not long enough to consider the end of a speech chunk
436
+ // (e.g., the speaker took a breath), so we continue recording
437
+ return
438
+ }
439
+
440
+ if (bufferPointer < MIN_SPEECH_DURATION_SAMPLES) {
441
+ // The entire buffer (including the new chunk) is smaller than the minimum
442
+ // duration of a speech chunk, so we can safely discard the buffer.
443
+ resetAfterRecording()
444
+ return
445
+ }
446
+
447
+ dispatchForTranscriptionAndResetAudioBuffer()
448
+ }
449
+
450
+ function greet(text: string): void {
451
+ isPlaying = true
452
+
453
+ const splitter = new TextSplitterStream()
454
+ const stream = tts!.stream(splitter, { voice });
455
+
456
+ (async () => {
457
+ for await (const { text: chunkText, audio } of stream) {
458
+ globalThis.postMessage({ type: 'output', data: { text: chunkText, result: audio } } satisfies WorkerMessageEventOutput)
459
+ }
460
+ })()
461
+
462
+ splitter.push(text)
463
+ splitter.close()
464
+ messages.push({ role: 'assistant', content: text })
465
+ }
favicon-96x96.png ADDED
favicon.svg ADDED
index.html CHANGED
@@ -1,19 +1,24 @@
1
  <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
 
 
 
 
 
19
  </html>
 
1
  <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <title>Realtime Conversational WebGPU (Vue)</title>
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=0" />
7
+ <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96" />
8
+ <link rel="icon" type="image/svg+xml" href="/favicon.svg" />
9
+ <script>
10
+ ;(function () {
11
+ const prefersDark = window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches
12
+ const setting = localStorage.getItem('vueuse-color-scheme') || 'auto'
13
+ if (setting === 'dark' || (prefersDark && setting !== 'light'))
14
+ document.documentElement.classList.toggle('dark', true)
15
+ })()
16
+ </script>
17
+ <script type="module" crossorigin src="/assets/index-DGmKQH7N.js"></script>
18
+ <link rel="stylesheet" crossorigin href="/assets/index-cAxkOY9l.css">
19
+ </head>
20
+ <body class="font-sans">
21
+ <div id="app"></div>
22
+ <noscript> This website requires JavaScript to function properly. Please enable JavaScript to continue. </noscript>
23
+ </body>
24
  </html>
style.css DELETED
@@ -1,28 +0,0 @@
1
- body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
- }
5
-
6
- h1 {
7
- font-size: 16px;
8
- margin-top: 0;
9
- }
10
-
11
- p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
16
- }
17
-
18
- .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
24
- }
25
-
26
- .card p:last-child {
27
- margin-bottom: 0;
28
- }