Xin Zhang commited on
Commit
5cd2109
·
2 Parent(s): 9e17d35 b2b3b92

Merge branch 'dev'

Browse files

* dev:
filter pendding text is empty message
add whisper large v3 model

moyoyo_asr_models/ggml-large-v3-turbo-encoder.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:311e822db8601dd4f6051f276975a410f77290e20058815f0bbc2d3fe6339f86
3
+ size 243
moyoyo_asr_models/ggml-large-v3-turbo-encoder.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53adfc091caf04e1f1cf9f42215860bd1f9481d2e0116a0b71e78b9e87003045
3
+ size 319
moyoyo_asr_models/ggml-large-v3-turbo-encoder.mlmodelc/metadata.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float16",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 1500 × 1280)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1500, 1280]",
13
+ "name" : "output",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "modelParameters" : [
18
+
19
+ ],
20
+ "specificationVersion" : 6,
21
+ "mlProgramOperationTypeHistogram" : {
22
+ "Concat" : 32,
23
+ "Gelu" : 34,
24
+ "LayerNorm" : 65,
25
+ "Transpose" : 33,
26
+ "Softmax" : 640,
27
+ "Squeeze" : 1,
28
+ "Cast" : 2,
29
+ "Add" : 65,
30
+ "Einsum" : 1280,
31
+ "ExpandDims" : 1,
32
+ "Split" : 96,
33
+ "Conv" : 194
34
+ },
35
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
36
+ "isUpdatable" : "0",
37
+ "availability" : {
38
+ "macOS" : "12.0",
39
+ "tvOS" : "15.0",
40
+ "visionOS" : "1.0",
41
+ "watchOS" : "8.0",
42
+ "iOS" : "15.0",
43
+ "macCatalyst" : "15.0"
44
+ },
45
+ "modelType" : {
46
+ "name" : "MLModelType_mlProgram"
47
+ },
48
+ "userDefinedMetadata" : {
49
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
50
+ "com.github.apple.coremltools.source" : "torch==2.1.0",
51
+ "com.github.apple.coremltools.version" : "8.0"
52
+ },
53
+ "inputSchema" : [
54
+ {
55
+ "hasShapeFlexibility" : "0",
56
+ "isOptional" : "0",
57
+ "dataType" : "Float32",
58
+ "formattedType" : "MultiArray (Float32 1 × 128 × 3000)",
59
+ "shortDescription" : "",
60
+ "shape" : "[1, 128, 3000]",
61
+ "name" : "logmel_data",
62
+ "type" : "MultiArray"
63
+ }
64
+ ],
65
+ "generatedClassName" : "coreml_encoder_large_v3_turbo",
66
+ "method" : "predict"
67
+ }
68
+ ]
moyoyo_asr_models/ggml-large-v3-turbo-encoder.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
moyoyo_asr_models/ggml-large-v3-turbo-encoder.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcc450fb244d55335f6df82a41558de1b07d44acaf67c7b7b3040da44f94bdd3
3
+ size 1273969152
moyoyo_asr_models/ggml-large-v3-turbo-q5_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:394221709cd5ad1f40c46e6031ca61bce88931e6e088c188294c6d5a55ffa7e2
3
+ size 574041195
transcribe/strategy.py CHANGED
@@ -312,10 +312,11 @@ class TranscriptStabilityAnalyzer:
312
  context=self._transcript_buffer.latest_paragraph,
313
  is_end_sentence=True
314
  )
315
- yield TranscriptResult(
316
- seg_id=self._transcript_buffer.get_seg_id(),
317
- context=self._transcript_buffer.pending_text,
318
- )
 
319
  else:
320
  yield TranscriptResult(
321
  seg_id=self._transcript_buffer.get_seg_id(),
 
312
  context=self._transcript_buffer.latest_paragraph,
313
  is_end_sentence=True
314
  )
315
+ if (context := self._transcript_buffer.current_not_commit_text.strip()):
316
+ yield TranscriptResult(
317
+ seg_id=self._transcript_buffer.get_seg_id(),
318
+ context=context,
319
+ )
320
  else:
321
  yield TranscriptResult(
322
  seg_id=self._transcript_buffer.get_seg_id(),