Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -639,7 +639,6 @@ def extract_ocr_subtitles_parallel(video_path, transcription_json, interval_sec=
|
|
639 |
logger.info(f"✅ OCR extraction completed: {len(ocr_results)} frames successful, {ocr_failures} frames failed.")
|
640 |
return ocr_results
|
641 |
|
642 |
-
|
643 |
def collapse_ocr_subtitles(ocr_json, text_similarity_threshold=90):
|
644 |
collapsed = []
|
645 |
current = None
|
@@ -654,17 +653,19 @@ def collapse_ocr_subtitles(ocr_json, text_similarity_threshold=90):
|
|
654 |
sim = fuzz.ratio(current["text"], text)
|
655 |
if sim >= text_similarity_threshold:
|
656 |
current["end"] = time
|
|
|
657 |
else:
|
|
|
|
|
|
|
658 |
collapsed.append(current)
|
659 |
current = {"start": time, "end": time, "text": text}
|
660 |
if current:
|
661 |
collapsed.append(current)
|
662 |
|
663 |
-
# Log collapsed OCR summary
|
664 |
logger.info(f"✅ OCR subtitles collapsed into {len(collapsed)} segments.")
|
665 |
for idx, seg in enumerate(collapsed):
|
666 |
logger.debug(f"[OCR Collapsed {idx}] {seg['start']:.2f}s - {seg['end']:.2f}s: {seg['text'][:50]}...")
|
667 |
-
|
668 |
return collapsed
|
669 |
|
670 |
def merge_speaker_and_time_from_whisperx(ocr_json, whisperx_json, text_sim_threshold=80, replace_threshold=90):
|
|
|
639 |
logger.info(f"✅ OCR extraction completed: {len(ocr_results)} frames successful, {ocr_failures} frames failed.")
|
640 |
return ocr_results
|
641 |
|
|
|
642 |
def collapse_ocr_subtitles(ocr_json, text_similarity_threshold=90):
|
643 |
collapsed = []
|
644 |
current = None
|
|
|
653 |
sim = fuzz.ratio(current["text"], text)
|
654 |
if sim >= text_similarity_threshold:
|
655 |
current["end"] = time
|
656 |
+
logger.debug(f"MERGED: Current end extended to {time:.2f}s for text: '{current['text'][:50]}...' (Similarity: {sim})")
|
657 |
else:
|
658 |
+
logger.debug(f"NOT MERGING (Similarity: {sim} < Threshold: {text_similarity_threshold}):")
|
659 |
+
logger.debug(f" Previous segment: {current['start']:.2f}s - {current['end']:.2f}s: '{current['text'][:50]}...'")
|
660 |
+
logger.debug(f" New segment: {time:.2f}s: '{text[:50]}...'")
|
661 |
collapsed.append(current)
|
662 |
current = {"start": time, "end": time, "text": text}
|
663 |
if current:
|
664 |
collapsed.append(current)
|
665 |
|
|
|
666 |
logger.info(f"✅ OCR subtitles collapsed into {len(collapsed)} segments.")
|
667 |
for idx, seg in enumerate(collapsed):
|
668 |
logger.debug(f"[OCR Collapsed {idx}] {seg['start']:.2f}s - {seg['end']:.2f}s: {seg['text'][:50]}...")
|
|
|
669 |
return collapsed
|
670 |
|
671 |
def merge_speaker_and_time_from_whisperx(ocr_json, whisperx_json, text_sim_threshold=80, replace_threshold=90):
|