Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -448,6 +448,7 @@ def create_subtitle_clip_pil(text, start_time, end_time, video_width, video_heig
|
|
448 |
logger.error(f"❌ Failed to create subtitle clip: {e}")
|
449 |
return None
|
450 |
|
|
|
451 |
def solve_optimal_alignment(original_segments, generated_durations, total_duration):
|
452 |
"""
|
453 |
Aligns speech segments using quadratic programming. If optimization fails,
|
@@ -458,6 +459,9 @@ def solve_optimal_alignment(original_segments, generated_durations, total_durati
|
|
458 |
d = np.array(generated_durations)
|
459 |
m = np.array([(seg['start'] + seg['end']) / 2 for seg in original_segments])
|
460 |
|
|
|
|
|
|
|
461 |
try:
|
462 |
s = cp.Variable(N)
|
463 |
objective = cp.Minimize(cp.sum_squares(s + d / 2 - m))
|
@@ -520,7 +524,6 @@ def solve_optimal_alignment(original_segments, generated_durations, total_durati
|
|
520 |
)
|
521 |
|
522 |
return original_segments
|
523 |
-
|
524 |
# ocr_model = None
|
525 |
# ocr_lock = threading.Lock()
|
526 |
|
@@ -1091,18 +1094,13 @@ def add_transcript_voiceover(video_path, translated_json, output_path, process_m
|
|
1091 |
# text_clips = [clip for _, clip, _, _ in results if clip]
|
1092 |
# generated_durations = [dur for _, _, _, dur in results if dur > 0]
|
1093 |
|
1094 |
-
#
|
1095 |
-
|
1096 |
-
|
1097 |
-
|
1098 |
-
|
1099 |
-
]
|
1100 |
-
|
1101 |
-
result_map[i][2] for i in ordered_idx
|
1102 |
-
if result_map[i][2] > 0
|
1103 |
-
]
|
1104 |
-
|
1105 |
-
|
1106 |
# Align using optimization (modifies translated_json in-place)
|
1107 |
if generated_durations:
|
1108 |
translated_json = solve_optimal_alignment(translated_json, generated_durations, video.duration)
|
|
|
448 |
logger.error(f"❌ Failed to create subtitle clip: {e}")
|
449 |
return None
|
450 |
|
451 |
+
|
452 |
def solve_optimal_alignment(original_segments, generated_durations, total_duration):
|
453 |
"""
|
454 |
Aligns speech segments using quadratic programming. If optimization fails,
|
|
|
459 |
d = np.array(generated_durations)
|
460 |
m = np.array([(seg['start'] + seg['end']) / 2 for seg in original_segments])
|
461 |
|
462 |
+
if N == 0 or len(generated_durations) == 0:
|
463 |
+
logger.warning("⚠️ Alignment skipped: empty segments or durations.")
|
464 |
+
return original_segments # or raise an error, depending on your app logic
|
465 |
try:
|
466 |
s = cp.Variable(N)
|
467 |
objective = cp.Minimize(cp.sum_squares(s + d / 2 - m))
|
|
|
524 |
)
|
525 |
|
526 |
return original_segments
|
|
|
527 |
# ocr_model = None
|
528 |
# ocr_lock = threading.Lock()
|
529 |
|
|
|
1094 |
# text_clips = [clip for _, clip, _, _ in results if clip]
|
1095 |
# generated_durations = [dur for _, _, _, dur in results if dur > 0]
|
1096 |
|
1097 |
+
# Sort and filter together
|
1098 |
+
results.sort(key=lambda x: x[0])
|
1099 |
+
filtered = [(translated_json[i], txt, aud, dur) for i, txt, aud, dur in results if dur > 0]
|
1100 |
+
|
1101 |
+
translated_json = [entry for entry, _, _, _ in filtered]
|
1102 |
+
generated_durations = [dur for _, _, _, dur in filtered]
|
1103 |
+
|
|
|
|
|
|
|
|
|
|
|
1104 |
# Align using optimization (modifies translated_json in-place)
|
1105 |
if generated_durations:
|
1106 |
translated_json = solve_optimal_alignment(translated_json, generated_durations, video.duration)
|