qqwjq1981 commited on
Commit
c6f940f
·
verified ·
1 Parent(s): 548c12a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -1
app.py CHANGED
@@ -494,6 +494,58 @@ def init_ocr_model():
494
  if ocr_model is None:
495
  ocr_model = PaddleOCR(use_angle_cls=True, lang="ch")
496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
  def ocr_frame_worker(args):
498
  frame_idx, frame_time, frame = args
499
 
@@ -509,7 +561,8 @@ def ocr_frame_worker(args):
509
  frame = frame.astype(np.uint8)
510
 
511
  try:
512
- result = ocr_model.ocr(frame, cls=True)
 
513
  texts = [line[1][0] for line in result[0]] if result[0] else []
514
  combined_text = " ".join(texts).strip()
515
  return {"time": frame_time, "text": combined_text}
 
494
  if ocr_model is None:
495
  ocr_model = PaddleOCR(use_angle_cls=True, lang="ch")
496
 
497
+ def find_best_subtitle_region(frame, ocr_model, region_height_ratio=0.35, num_strips=5, min_conf=0.5):
498
+ """
499
+ Automatically identifies the best subtitle region in a video frame using OCR confidence.
500
+
501
+ Parameters:
502
+ - frame: full video frame (BGR np.ndarray)
503
+ - ocr_model: a loaded PaddleOCR model
504
+ - region_height_ratio: portion of image height to scan (from bottom up)
505
+ - num_strips: how many horizontal strips to evaluate
506
+ - min_conf: minimum average confidence to consider a region valid
507
+
508
+ Returns:
509
+ - crop_region: the cropped image region with highest OCR confidence
510
+ - region_box: (y_start, y_end) of the region in the original frame
511
+ """
512
+ height, width, _ = frame.shape
513
+ region_height = int(height * region_height_ratio)
514
+ base_y_start = height - region_height
515
+ strip_height = region_height // num_strips
516
+
517
+ best_score = -1
518
+ best_crop = None
519
+ best_bounds = (0, height)
520
+
521
+ for i in range(num_strips):
522
+ y_start = base_y_start + i * strip_height
523
+ y_end = y_start + strip_height
524
+ strip = frame[y_start:y_end, :]
525
+
526
+ try:
527
+ result = ocr_model.ocr(strip, cls=True)
528
+ if not result or not result[0]:
529
+ continue
530
+
531
+ total_score = sum(line[1][1] for line in result[0])
532
+ avg_score = total_score / len(result[0])
533
+
534
+ if avg_score > best_score:
535
+ best_score = avg_score
536
+ best_crop = strip
537
+ best_bounds = (y_start, y_end)
538
+
539
+ except Exception as e:
540
+ continue # Fail silently on OCR issues
541
+
542
+ if best_score >= min_conf and best_crop is not None:
543
+ return best_crop, best_bounds
544
+ else:
545
+ # Fallback to center-bottom strip
546
+ fallback_y = height - int(height * 0.2)
547
+ return frame[fallback_y:, :], (fallback_y, height)
548
+
549
  def ocr_frame_worker(args):
550
  frame_idx, frame_time, frame = args
551
 
 
561
  frame = frame.astype(np.uint8)
562
 
563
  try:
564
+ subtitle_crop, _ = find_best_subtitle_region(frame, ocr_model)
565
+ result = ocr_model.ocr(subtitle_crop, cls=True)
566
  texts = [line[1][0] for line in result[0]] if result[0] else []
567
  combined_text = " ".join(texts).strip()
568
  return {"time": frame_time, "text": combined_text}