add text length threhold

Files changed (4) hide show

config.py CHANGED Viewed

@@ -21,6 +21,8 @@ console_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s
 console_handler.setFormatter(console_formatter)
 logging.getLogger().addHandler(console_handler)
 BASE_DIR = pathlib.Path(__file__).parent
 MODEL_DIR = BASE_DIR / "moyoyo_asr_models"

 console_handler.setFormatter(console_formatter)
 logging.getLogger().addHandler(console_handler)
+# 文字输出长度阈值
+TEXT_THREHOLD = 16
 BASE_DIR = pathlib.Path(__file__).parent
 MODEL_DIR = BASE_DIR / "moyoyo_asr_models"

tests/test_whisper_cpp.py CHANGED Viewed

@@ -3,7 +3,7 @@ import config
 import soundfile
 from pywhispercpp.utils import to_timestamp
-mel, _, = soundfile.read("/Users/david/Samples/Audio/en/sample-10.wav")
 # mel, _, = soundfile.read(f"{config.ASSERT_DIR}/jfk.flac")
 models_dir = config.MODEL_DIR.as_posix()
@@ -19,7 +19,7 @@ model = Model(
               no_context=True
               )
 print(mel.shape, mel.dtype) # (160000,) float64
-segments = model.transcribe(mel[:, 0],
                             # initial_prompt="",# 'The following is an English sentence.', # "以下是简体中文句子。"
                             language='en',
                             # initial_prompt="以下是简体中文句子。",

 import soundfile
 from pywhispercpp.utils import to_timestamp
+mel, _, = soundfile.read("test/6_before_cut_56640.wav")
 # mel, _, = soundfile.read(f"{config.ASSERT_DIR}/jfk.flac")
 models_dir = config.MODEL_DIR.as_posix()
               no_context=True
               )
 print(mel.shape, mel.dtype) # (160000,) float64
+segments = model.transcribe(mel,
                             # initial_prompt="",# 'The following is an English sentence.', # "以下是简体中文句子。"
                             language='en',
                             # initial_prompt="以下是简体中文句子。",

transcribe/pipelines/pipe_translate.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from .base import MetaItem, BasePipe, Segment
 from llama_cpp import Llama
 from ..helpers.translator import QwenTranslator
-from config import LLM_MODEL_PATH, LLM_SYS_PROMPT_EN, LLM_SYS_PROMPT_ZH, LLM_LARGE_MODEL_PATH
 class TranslatePipe(BasePipe):
@@ -16,8 +16,12 @@ class TranslatePipe(BasePipe):
     def process(self, in_data: MetaItem) -> MetaItem:
         context = in_data.transcribe_content
-        result = self.translator.translate(
-            context, src_lang=in_data.source_language, dst_lang=in_data.destination_language)
         in_data.translate_content = result
         return in_data

 from .base import MetaItem, BasePipe, Segment
 from llama_cpp import Llama
 from ..helpers.translator import QwenTranslator
+from config import LLM_MODEL_PATH, LLM_SYS_PROMPT_EN, LLM_SYS_PROMPT_ZH, LLM_LARGE_MODEL_PATH, ALL_MARKERS
 class TranslatePipe(BasePipe):
     def process(self, in_data: MetaItem) -> MetaItem:
         context = in_data.transcribe_content
+        all_punctuatioin = all([ch in ALL_MARKERS for ch in context])
+        if all_punctuatioin:
+            result = ""
+        else:
+            result = self.translator.translate(
+                context, src_lang=in_data.source_language, dst_lang=in_data.destination_language)
         in_data.translate_content = result
         return in_data

transcribe/strategy.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import List, Tuple, Optional, Deque, Any, Iterator,Literal
 from config import SENTENCE_END_MARKERS, ALL_MARKERS,SENTENCE_END_PATTERN,REGEX_MARKERS, PAUSEE_END_PATTERN,SAMPLE_RATE
 from enum import Enum
 import wordninja
 import re
 logger = logging.getLogger("TranscriptionStrategy")
@@ -199,7 +199,7 @@ class TranscriptBuffer:
         count = 0
         current_sentences = []
-        while len(self._sentences) and count < 20:
             item = self._sentences.popleft()
             current_sentences.append(item)
             if self._separator:
@@ -265,10 +265,10 @@ class TranscriptBuffer:
                 self.update_pending_text(stable_str)
                 self.commit_line()
-            current_text_len =  len(self.current_not_commit_text.split(self._separator)) if self._separator else len(self.current_not_commit_text)
             # current_text_len = len(self.current_not_commit_text.split(self._separator))
             self.update_pending_text(remaining_string)
-            if current_text_len >= 20:
                 self.commit_paragraph()
                 self._current_seg_id += 1
                 return True

 from config import SENTENCE_END_MARKERS, ALL_MARKERS,SENTENCE_END_PATTERN,REGEX_MARKERS, PAUSEE_END_PATTERN,SAMPLE_RATE
 from enum import Enum
 import wordninja
+import config
 import re
 logger = logging.getLogger("TranscriptionStrategy")
         count = 0
         current_sentences = []
+        while len(self._sentences): # and count < 20:
             item = self._sentences.popleft()
             current_sentences.append(item)
             if self._separator:
                 self.update_pending_text(stable_str)
                 self.commit_line()
+            current_text_len = len(self.current_not_commit_text.split(self._separator)) if self._separator else len(self.current_not_commit_text)
             # current_text_len = len(self.current_not_commit_text.split(self._separator))
             self.update_pending_text(remaining_string)
+            if current_text_len >= config.TEXT_THREHOLD:
                 self.commit_paragraph()
                 self._current_seg_id += 1
                 return True