Merge branch 'dev'

Browse files

* dev:
filter pendding text is empty message
add whisper large v3 model

Files changed (7) hide show

moyoyo_asr_models/ggml-large-v3-turbo-encoder.mlmodelc/analytics/coremldata.bin +3 -0
moyoyo_asr_models/ggml-large-v3-turbo-encoder.mlmodelc/coremldata.bin +3 -0
moyoyo_asr_models/ggml-large-v3-turbo-encoder.mlmodelc/metadata.json +68 -0
moyoyo_asr_models/ggml-large-v3-turbo-encoder.mlmodelc/model.mil +0 -0
moyoyo_asr_models/ggml-large-v3-turbo-encoder.mlmodelc/weights/weight.bin +3 -0
moyoyo_asr_models/ggml-large-v3-turbo-q5_0.bin +3 -0
transcribe/strategy.py +5 -4

moyoyo_asr_models/ggml-large-v3-turbo-encoder.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:311e822db8601dd4f6051f276975a410f77290e20058815f0bbc2d3fe6339f86
+size 243

moyoyo_asr_models/ggml-large-v3-turbo-encoder.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53adfc091caf04e1f1cf9f42215860bd1f9481d2e0116a0b71e78b9e87003045
+size 319

moyoyo_asr_models/ggml-large-v3-turbo-encoder.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,68 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1500 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 1280]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 6,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 32,
+      "Gelu" : 34,
+      "LayerNorm" : 65,
+      "Transpose" : 33,
+      "Softmax" : 640,
+      "Squeeze" : 1,
+      "Cast" : 2,
+      "Add" : 65,
+      "Einsum" : 1280,
+      "ExpandDims" : 1,
+      "Split" : 96,
+      "Conv" : 194
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "12.0",
+      "tvOS" : "15.0",
+      "visionOS" : "1.0",
+      "watchOS" : "8.0",
+      "iOS" : "15.0",
+      "macCatalyst" : "15.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.1.0",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 128 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 128, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "coreml_encoder_large_v3_turbo",
+    "method" : "predict"
+  }
+]

moyoyo_asr_models/ggml-large-v3-turbo-encoder.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

moyoyo_asr_models/ggml-large-v3-turbo-encoder.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fcc450fb244d55335f6df82a41558de1b07d44acaf67c7b7b3040da44f94bdd3
+size 1273969152

moyoyo_asr_models/ggml-large-v3-turbo-q5_0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:394221709cd5ad1f40c46e6031ca61bce88931e6e088c188294c6d5a55ffa7e2
+size 574041195

transcribe/strategy.py CHANGED Viewed

@@ -312,10 +312,11 @@ class TranscriptStabilityAnalyzer:
                 context=self._transcript_buffer.latest_paragraph,
                 is_end_sentence=True
             )
-            yield TranscriptResult(
-                seg_id=self._transcript_buffer.get_seg_id(),
-                context=self._transcript_buffer.pending_text,
-            )
         else:
             yield TranscriptResult(
                 seg_id=self._transcript_buffer.get_seg_id(),

                 context=self._transcript_buffer.latest_paragraph,
                 is_end_sentence=True
             )
+            if (context := self._transcript_buffer.current_not_commit_text.strip()):
+                yield TranscriptResult(
+                    seg_id=self._transcript_buffer.get_seg_id(),
+                    context=context,
+                )
         else:
             yield TranscriptResult(
                 seg_id=self._transcript_buffer.get_seg_id(),