Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -638,30 +638,37 @@ def post_edit_transcribed_segments(transcription_json, video_path,
|
|
638 |
updated_entry["ocr_similarity"] = best_score if best_score >= 0 else None
|
639 |
|
640 |
merged_segments.append(updated_entry)
|
641 |
-
|
642 |
# Step 4: Insert unused OCR segments (Phase 2)
|
643 |
inserted_segments = []
|
644 |
for ocr_idx, ocr in enumerate(collapsed_ocr):
|
645 |
-
if ocr_idx
|
646 |
-
|
647 |
-
|
648 |
-
|
649 |
-
|
650 |
-
|
651 |
-
|
652 |
-
|
653 |
-
|
654 |
-
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
|
664 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
665 |
|
666 |
# Step 5: Combine and sort
|
667 |
final_segments = merged_segments + inserted_segments
|
|
|
638 |
updated_entry["ocr_similarity"] = best_score if best_score >= 0 else None
|
639 |
|
640 |
merged_segments.append(updated_entry)
|
641 |
+
|
642 |
# Step 4: Insert unused OCR segments (Phase 2)
|
643 |
inserted_segments = []
|
644 |
for ocr_idx, ocr in enumerate(collapsed_ocr):
|
645 |
+
if ocr_idx in used_ocr_indices:
|
646 |
+
continue
|
647 |
+
|
648 |
+
# Check for fuzzy duplicates in WhisperX
|
649 |
+
duplicate = False
|
650 |
+
for whisper_seg in transcription_json:
|
651 |
+
if abs(ocr["start"] - whisper_seg["start"]) < time_tolerance or abs(ocr["end"] - whisper_seg["end"]) < time_tolerance:
|
652 |
+
sim = fuzz.ratio(ocr["text"], whisper_seg["text"])
|
653 |
+
if sim >= text_similarity_threshold:
|
654 |
+
duplicate = True
|
655 |
+
break
|
656 |
+
|
657 |
+
if duplicate:
|
658 |
+
logger.debug(f"🟡 Skipping near-duplicate OCR: '{ocr['text']}'")
|
659 |
+
continue
|
660 |
+
|
661 |
+
# Infer speaker from nearest WhisperX entry
|
662 |
+
nearby = sorted(transcription_json, key=lambda x: abs(x["start"] - ocr["start"]))
|
663 |
+
speaker_guess = nearby[0].get("speaker", "unknown") if nearby else "unknown"
|
664 |
+
|
665 |
+
inserted_segment = {
|
666 |
+
"start": ocr["start"],
|
667 |
+
"end": ocr["end"],
|
668 |
+
"text": ocr["text"],
|
669 |
+
"speaker": speaker_guess
|
670 |
+
}
|
671 |
+
inserted_segments.append(inserted_segment)
|
672 |
|
673 |
# Step 5: Combine and sort
|
674 |
final_segments = merged_segments + inserted_segments
|