qqwjq1981 commited on
Commit
a12a54f
·
verified ·
1 Parent(s): f96fc4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -13
app.py CHANGED
@@ -448,6 +448,7 @@ def create_subtitle_clip_pil(text, start_time, end_time, video_width, video_heig
448
  logger.error(f"❌ Failed to create subtitle clip: {e}")
449
  return None
450
 
 
451
  def solve_optimal_alignment(original_segments, generated_durations, total_duration):
452
  """
453
  Aligns speech segments using quadratic programming. If optimization fails,
@@ -458,6 +459,9 @@ def solve_optimal_alignment(original_segments, generated_durations, total_durati
458
  d = np.array(generated_durations)
459
  m = np.array([(seg['start'] + seg['end']) / 2 for seg in original_segments])
460
 
 
 
 
461
  try:
462
  s = cp.Variable(N)
463
  objective = cp.Minimize(cp.sum_squares(s + d / 2 - m))
@@ -520,7 +524,6 @@ def solve_optimal_alignment(original_segments, generated_durations, total_durati
520
  )
521
 
522
  return original_segments
523
-
524
  # ocr_model = None
525
  # ocr_lock = threading.Lock()
526
 
@@ -1091,18 +1094,13 @@ def add_transcript_voiceover(video_path, translated_json, output_path, process_m
1091
  # text_clips = [clip for _, clip, _, _ in results if clip]
1092
  # generated_durations = [dur for _, _, _, dur in results if dur > 0]
1093
 
1094
- # Generate text_clips / generated_durations with result_map
1095
- ordered_idx = sorted(result_map.keys())
1096
- text_clips = [
1097
- result_map[i][0] for i in ordered_idx
1098
- if result_map[i][0]
1099
- ]
1100
- generated_durations = [
1101
- result_map[i][2] for i in ordered_idx
1102
- if result_map[i][2] > 0
1103
- ]
1104
-
1105
-
1106
  # Align using optimization (modifies translated_json in-place)
1107
  if generated_durations:
1108
  translated_json = solve_optimal_alignment(translated_json, generated_durations, video.duration)
 
448
  logger.error(f"❌ Failed to create subtitle clip: {e}")
449
  return None
450
 
451
+
452
  def solve_optimal_alignment(original_segments, generated_durations, total_duration):
453
  """
454
  Aligns speech segments using quadratic programming. If optimization fails,
 
459
  d = np.array(generated_durations)
460
  m = np.array([(seg['start'] + seg['end']) / 2 for seg in original_segments])
461
 
462
+ if N == 0 or len(generated_durations) == 0:
463
+ logger.warning("⚠️ Alignment skipped: empty segments or durations.")
464
+ return original_segments # or raise an error, depending on your app logic
465
  try:
466
  s = cp.Variable(N)
467
  objective = cp.Minimize(cp.sum_squares(s + d / 2 - m))
 
524
  )
525
 
526
  return original_segments
 
527
  # ocr_model = None
528
  # ocr_lock = threading.Lock()
529
 
 
1094
  # text_clips = [clip for _, clip, _, _ in results if clip]
1095
  # generated_durations = [dur for _, _, _, dur in results if dur > 0]
1096
 
1097
+ # Sort and filter together
1098
+ results.sort(key=lambda x: x[0])
1099
+ filtered = [(translated_json[i], txt, aud, dur) for i, txt, aud, dur in results if dur > 0]
1100
+
1101
+ translated_json = [entry for entry, _, _, _ in filtered]
1102
+ generated_durations = [dur for _, _, _, dur in filtered]
1103
+
 
 
 
 
 
1104
  # Align using optimization (modifies translated_json in-place)
1105
  if generated_durations:
1106
  translated_json = solve_optimal_alignment(translated_json, generated_durations, video.duration)