studio_V1_4_asr_GPT

Running

App Files Files Community

qqwjq1981 commited on 8 days ago

Commit

a12a54f

verified ·

1 Parent(s): f96fc4d

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -13

app.py CHANGED Viewed

@@ -448,6 +448,7 @@ def create_subtitle_clip_pil(text, start_time, end_time, video_width, video_heig
         logger.error(f"❌ Failed to create subtitle clip: {e}")
         return None
 def solve_optimal_alignment(original_segments, generated_durations, total_duration):
     """
     Aligns speech segments using quadratic programming. If optimization fails,
@@ -458,6 +459,9 @@ def solve_optimal_alignment(original_segments, generated_durations, total_durati
     d = np.array(generated_durations)
     m = np.array([(seg['start'] + seg['end']) / 2 for seg in original_segments])
     try:
         s = cp.Variable(N)
         objective = cp.Minimize(cp.sum_squares(s + d / 2 - m))
@@ -520,7 +524,6 @@ def solve_optimal_alignment(original_segments, generated_durations, total_durati
             )
     return original_segments
 # ocr_model = None
 # ocr_lock = threading.Lock()
@@ -1091,18 +1094,13 @@ def add_transcript_voiceover(video_path, translated_json, output_path, process_m
 #     text_clips = [clip for _, clip, _, _ in results if clip]
 #     generated_durations = [dur for _, _, _, dur in results if dur > 0]
-    # Generate text_clips / generated_durations with result_map
-    ordered_idx = sorted(result_map.keys())
-    text_clips = [
-        result_map[i][0] for i in ordered_idx
-        if result_map[i][0]
-    ]
-    generated_durations = [
-        result_map[i][2] for i in ordered_idx
-        if result_map[i][2] > 0
-    ]
     # Align using optimization (modifies translated_json in-place)
     if generated_durations:
         translated_json = solve_optimal_alignment(translated_json, generated_durations, video.duration)

         logger.error(f"❌ Failed to create subtitle clip: {e}")
         return None
 def solve_optimal_alignment(original_segments, generated_durations, total_duration):
     """
     Aligns speech segments using quadratic programming. If optimization fails,
     d = np.array(generated_durations)
     m = np.array([(seg['start'] + seg['end']) / 2 for seg in original_segments])
+    if N == 0 or len(generated_durations) == 0:
+        logger.warning("⚠️ Alignment skipped: empty segments or durations.")
+        return original_segments  # or raise an error, depending on your app logic
     try:
         s = cp.Variable(N)
         objective = cp.Minimize(cp.sum_squares(s + d / 2 - m))
             )
     return original_segments
 # ocr_model = None
 # ocr_lock = threading.Lock()
 #     text_clips = [clip for _, clip, _, _ in results if clip]
 #     generated_durations = [dur for _, _, _, dur in results if dur > 0]
+    # Sort and filter together
+    results.sort(key=lambda x: x[0])
+    filtered = [(translated_json[i], txt, aud, dur) for i, txt, aud, dur in results if dur > 0]
+    translated_json = [entry for entry, _, _, _ in filtered]
+    generated_durations = [dur for _, _, _, dur in filtered]
     # Align using optimization (modifies translated_json in-place)
     if generated_durations:
         translated_json = solve_optimal_alignment(translated_json, generated_durations, video.duration)