qqwjq1981 commited on
Commit
46034f5
·
verified ·
1 Parent(s): c0f8674

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -639,7 +639,6 @@ def extract_ocr_subtitles_parallel(video_path, transcription_json, interval_sec=
639
  logger.info(f"✅ OCR extraction completed: {len(ocr_results)} frames successful, {ocr_failures} frames failed.")
640
  return ocr_results
641
 
642
-
643
  def collapse_ocr_subtitles(ocr_json, text_similarity_threshold=90):
644
  collapsed = []
645
  current = None
@@ -654,17 +653,19 @@ def collapse_ocr_subtitles(ocr_json, text_similarity_threshold=90):
654
  sim = fuzz.ratio(current["text"], text)
655
  if sim >= text_similarity_threshold:
656
  current["end"] = time
 
657
  else:
 
 
 
658
  collapsed.append(current)
659
  current = {"start": time, "end": time, "text": text}
660
  if current:
661
  collapsed.append(current)
662
 
663
- # Log collapsed OCR summary
664
  logger.info(f"✅ OCR subtitles collapsed into {len(collapsed)} segments.")
665
  for idx, seg in enumerate(collapsed):
666
  logger.debug(f"[OCR Collapsed {idx}] {seg['start']:.2f}s - {seg['end']:.2f}s: {seg['text'][:50]}...")
667
-
668
  return collapsed
669
 
670
  def merge_speaker_and_time_from_whisperx(ocr_json, whisperx_json, text_sim_threshold=80, replace_threshold=90):
 
639
  logger.info(f"✅ OCR extraction completed: {len(ocr_results)} frames successful, {ocr_failures} frames failed.")
640
  return ocr_results
641
 
 
642
  def collapse_ocr_subtitles(ocr_json, text_similarity_threshold=90):
643
  collapsed = []
644
  current = None
 
653
  sim = fuzz.ratio(current["text"], text)
654
  if sim >= text_similarity_threshold:
655
  current["end"] = time
656
+ logger.debug(f"MERGED: Current end extended to {time:.2f}s for text: '{current['text'][:50]}...' (Similarity: {sim})")
657
  else:
658
+ logger.debug(f"NOT MERGING (Similarity: {sim} < Threshold: {text_similarity_threshold}):")
659
+ logger.debug(f" Previous segment: {current['start']:.2f}s - {current['end']:.2f}s: '{current['text'][:50]}...'")
660
+ logger.debug(f" New segment: {time:.2f}s: '{text[:50]}...'")
661
  collapsed.append(current)
662
  current = {"start": time, "end": time, "text": text}
663
  if current:
664
  collapsed.append(current)
665
 
 
666
  logger.info(f"✅ OCR subtitles collapsed into {len(collapsed)} segments.")
667
  for idx, seg in enumerate(collapsed):
668
  logger.debug(f"[OCR Collapsed {idx}] {seg['start']:.2f}s - {seg['end']:.2f}s: {seg['text'][:50]}...")
 
669
  return collapsed
670
 
671
  def merge_speaker_and_time_from_whisperx(ocr_json, whisperx_json, text_sim_threshold=80, replace_threshold=90):