qqwjq1981 commited on
Commit
548c12a
·
verified ·
1 Parent(s): 7d826f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -21
app.py CHANGED
@@ -638,30 +638,37 @@ def post_edit_transcribed_segments(transcription_json, video_path,
638
  updated_entry["ocr_similarity"] = best_score if best_score >= 0 else None
639
 
640
  merged_segments.append(updated_entry)
641
-
642
  # Step 4: Insert unused OCR segments (Phase 2)
643
  inserted_segments = []
644
  for ocr_idx, ocr in enumerate(collapsed_ocr):
645
- if ocr_idx not in used_ocr_indices:
646
- # Try to assign the speaker based on nearby merged segments
647
- nearby_speakers = []
648
- for seg in merged_segments:
649
- if abs(seg["start"] - ocr["start"]) <= 2.0 or abs(seg["end"] - ocr["end"]) <= 2.0:
650
- if "speaker" in seg:
651
- nearby_speakers.append(seg["speaker"])
652
-
653
- if nearby_speakers:
654
- assigned_speaker = nearby_speakers[0] # Take the first nearby speaker
655
- else:
656
- assigned_speaker = "SPEAKER_00"
657
-
658
- inserted_segment = {
659
- "start": ocr["start"],
660
- "end": ocr["end"],
661
- "text": ocr["text"],
662
- "speaker": assigned_speaker
663
- }
664
- inserted_segments.append(inserted_segment)
 
 
 
 
 
 
 
665
 
666
  # Step 5: Combine and sort
667
  final_segments = merged_segments + inserted_segments
 
638
  updated_entry["ocr_similarity"] = best_score if best_score >= 0 else None
639
 
640
  merged_segments.append(updated_entry)
641
+
642
  # Step 4: Insert unused OCR segments (Phase 2)
643
  inserted_segments = []
644
  for ocr_idx, ocr in enumerate(collapsed_ocr):
645
+ if ocr_idx in used_ocr_indices:
646
+ continue
647
+
648
+ # Check for fuzzy duplicates in WhisperX
649
+ duplicate = False
650
+ for whisper_seg in transcription_json:
651
+ if abs(ocr["start"] - whisper_seg["start"]) < time_tolerance or abs(ocr["end"] - whisper_seg["end"]) < time_tolerance:
652
+ sim = fuzz.ratio(ocr["text"], whisper_seg["text"])
653
+ if sim >= text_similarity_threshold:
654
+ duplicate = True
655
+ break
656
+
657
+ if duplicate:
658
+ logger.debug(f"🟡 Skipping near-duplicate OCR: '{ocr['text']}'")
659
+ continue
660
+
661
+ # Infer speaker from nearest WhisperX entry
662
+ nearby = sorted(transcription_json, key=lambda x: abs(x["start"] - ocr["start"]))
663
+ speaker_guess = nearby[0].get("speaker", "unknown") if nearby else "unknown"
664
+
665
+ inserted_segment = {
666
+ "start": ocr["start"],
667
+ "end": ocr["end"],
668
+ "text": ocr["text"],
669
+ "speaker": speaker_guess
670
+ }
671
+ inserted_segments.append(inserted_segment)
672
 
673
  # Step 5: Combine and sort
674
  final_segments = merged_segments + inserted_segments