qqwjq1981 commited on
Commit
c34772c
·
verified ·
1 Parent(s): b83a564

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -6
app.py CHANGED
@@ -482,7 +482,7 @@ def solve_optimal_alignment(original_segments, generated_durations, total_durati
482
  )
483
 
484
  return original_segments
485
-
486
  def ocr_frame_worker(args):
487
  frame_idx, frame_time, frame = args
488
  ocr = PaddleOCR(use_angle_cls=True, lang="ch") # Initialize OCR inside worker
@@ -491,7 +491,17 @@ def ocr_frame_worker(args):
491
  combined_text = " ".join(texts).strip()
492
  return {"time": frame_time, "text": combined_text}
493
 
494
- def extract_ocr_subtitles_parallel(video_path, interval_sec=0.5, num_workers=4):
 
 
 
 
 
 
 
 
 
 
495
  cap = cv2.VideoCapture(video_path)
496
  fps = cap.get(cv2.CAP_PROP_FPS)
497
  frames = []
@@ -501,7 +511,8 @@ def extract_ocr_subtitles_parallel(video_path, interval_sec=0.5, num_workers=4):
501
  while success:
502
  if frame_idx % int(fps * interval_sec) == 0:
503
  frame_time = frame_idx / fps
504
- frames.append((frame_idx, frame_time, frame.copy()))
 
505
  success, frame = cap.read()
506
  frame_idx += 1
507
  cap.release()
@@ -517,6 +528,7 @@ def extract_ocr_subtitles_parallel(video_path, interval_sec=0.5, num_workers=4):
517
  ocr_results.append(result)
518
  except Exception as e:
519
  print(f"⚠️ OCR failed for a frame: {e}")
 
520
  return ocr_results
521
 
522
  def collapse_ocr_subtitles(ocr_json, text_similarity_threshold=90):
@@ -550,9 +562,13 @@ def post_edit_transcribed_segments(transcription_json, video_path,
550
  use OCR subtitles to post-correct and safely insert missing captions.
551
  """
552
 
553
- # Step 1: Extract OCR subtitles
554
- ocr_json = extract_ocr_subtitles_parallel(video_path, interval_sec=interval_sec, num_workers=num_workers)
555
-
 
 
 
 
556
  # Step 2: Collapse repetitive OCR
557
  collapsed_ocr = collapse_ocr_subtitles(ocr_json, text_similarity_threshold=90)
558
 
 
482
  )
483
 
484
  return original_segments
485
+
486
  def ocr_frame_worker(args):
487
  frame_idx, frame_time, frame = args
488
  ocr = PaddleOCR(use_angle_cls=True, lang="ch") # Initialize OCR inside worker
 
491
  combined_text = " ".join(texts).strip()
492
  return {"time": frame_time, "text": combined_text}
493
 
494
+ def frame_is_in_audio_segments(frame_time, audio_segments, tolerance=0.2):
495
+ """
496
+ Check if the frame_time falls within any audio segment (plus/minus tolerance).
497
+ """
498
+ for segment in audio_segments:
499
+ start, end = segment["start"], segment["end"]
500
+ if (start - tolerance) <= frame_time <= (end + tolerance):
501
+ return True
502
+ return False
503
+
504
+ def extract_ocr_subtitles_parallel(video_path, transcription_json, interval_sec=0.5, num_workers=4):
505
  cap = cv2.VideoCapture(video_path)
506
  fps = cap.get(cv2.CAP_PROP_FPS)
507
  frames = []
 
511
  while success:
512
  if frame_idx % int(fps * interval_sec) == 0:
513
  frame_time = frame_idx / fps
514
+ if frame_is_in_audio_segments(frame_time, transcription_json):
515
+ frames.append((frame_idx, frame_time, frame.copy()))
516
  success, frame = cap.read()
517
  frame_idx += 1
518
  cap.release()
 
528
  ocr_results.append(result)
529
  except Exception as e:
530
  print(f"⚠️ OCR failed for a frame: {e}")
531
+
532
  return ocr_results
533
 
534
  def collapse_ocr_subtitles(ocr_json, text_similarity_threshold=90):
 
562
  use OCR subtitles to post-correct and safely insert missing captions.
563
  """
564
 
565
+ # Step 1: Extract OCR subtitles (only near audio segments)
566
+ ocr_json = extract_ocr_subtitles_parallel(
567
+ video_path,
568
+ transcription_json,
569
+ interval_sec=interval_sec,
570
+ num_workers=num_workers
571
+ )
572
  # Step 2: Collapse repetitive OCR
573
  collapsed_ocr = collapse_ocr_subtitles(ocr_json, text_similarity_threshold=90)
574