Spaces:

namnh2002
/

video-summarization_timesformer

Running

App Files Files Community

nam_nguyenhoai_AI commited on Jun 7, 2024

Commit

48858f9

1 Parent(s): 3d8c5e9

fix bugs and update src

Browse files

Files changed (2) hide show

app.py +18 -18
utils.py +2 -2

app.py CHANGED Viewed

@@ -6,13 +6,15 @@ import numpy as np
 from utils import *
 from algorithm import *
-def make_video(video_path, outdir='./summarized_video',encoder='Kmeans'):
-    if encoder not in ["Kmeans", "Sum of Squared Difference 01", "Sum of Squared Difference 02"]:
-        encoder = "Kmeans"
-    # nen them vao cac truong hop mo hinh khac
-    margin_width = 50
-    model, processor, device = load_model()
     # total_params = sum(param.numel() for param in model.parameters())
     # print('Total parameters: {:.2f}M'.format(total_params / 1e6))
@@ -35,8 +37,7 @@ def make_video(video_path, outdir='./summarized_video',encoder='Kmeans'):
         frame_width, frame_height = int(raw_video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(raw_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
         frame_rate = int(raw_video.get(cv2.CAP_PROP_FPS))
         #length = int(raw_video.get(cv2.CAP_PROP_FRAME_COUNT))
-        output_width = frame_width * 2 + margin_width
         filename = os.path.basename(filename)
         # Find the size to resize
@@ -55,13 +56,8 @@ def make_video(video_path, outdir='./summarized_video',encoder='Kmeans'):
         frames = []
         features = []
-        # output_path = os.path.join(outdir, filename[:filename.rfind('.')] + '_video_depth.mp4')
         with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
             output_path = tmpfile.name
-        #out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"avc1"), frame_rate, (output_width, frame_height))
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        out = cv2.VideoWriter(output_path, fourcc, frame_rate, (output_width, frame_height))
-        # count=0
         while raw_video.isOpened():
             ret, raw_frame = raw_video.read()
@@ -102,13 +98,15 @@ def make_video(video_path, outdir='./summarized_video',encoder='Kmeans'):
         print("Shape of each clip: ", features[0].shape)
         selected_frames = []
-        if encoder == "Kmeans":
             selected_frames = kmeans(number_of_clusters, features)
-        elif encoder == "Sum of Squared Difference 01":
             selected_frames = tt01(features, 400)
         else:
             selected_frames = tt02(features, 400)
         video_writer = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), frame_rate, (frames[0].shape[1], frames[0].shape[0]))
         for idx in selected_frames:
             video_writer.write(frames[idx])
@@ -144,16 +142,18 @@ with gr.Blocks(css=css) as demo:
     with gr.Row():
         input_video = gr.Video(label="Input Video")
         algorithm_type = gr.Dropdown(["Kmeans", "Sum of Squared Difference 01", "Sum of Squared Difference 02"], type="value", label='Algorithm')
     submit = gr.Button("Submit")
     processed_video = gr.Video(label="Summarized Video")
-    def on_submit(uploaded_video,algorithm_type):
         # Process the video and get the path of the output video
-        output_video_path = make_video(uploaded_video,encoder=algorithm_type)
         return output_video_path
     submit.click(on_submit, inputs=[input_video, algorithm_type], outputs=processed_video)
 if __name__ == '__main__':
-    demo.queue().launch()

 from utils import *
 from algorithm import *
+def make_video(video_path, outdir='./summarized_video', algorithm='Kmeans', model_version='K600'):
+    if algorithm not in ["Kmeans", "Sum of Squared Difference 01", "Sum of Squared Difference 02"]:
+        algorithm = "Kmeans"
+    if model_version not in ["K600", "K400", "SSv2"]:
+        model_version = "K600"
+    # nen them vao cac truong hop mo hinh khac
+    model, processor, device = load_model(model_version)
     # total_params = sum(param.numel() for param in model.parameters())
     # print('Total parameters: {:.2f}M'.format(total_params / 1e6))
         frame_width, frame_height = int(raw_video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(raw_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
         frame_rate = int(raw_video.get(cv2.CAP_PROP_FPS))
         #length = int(raw_video.get(cv2.CAP_PROP_FRAME_COUNT))
         filename = os.path.basename(filename)
         # Find the size to resize
         frames = []
         features = []
         with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
             output_path = tmpfile.name
         while raw_video.isOpened():
             ret, raw_frame = raw_video.read()
         print("Shape of each clip: ", features[0].shape)
         selected_frames = []
+        if algorithm == "Kmeans":
             selected_frames = kmeans(number_of_clusters, features)
+        elif algorithm == "Sum of Squared Difference 01":
             selected_frames = tt01(features, 400)
         else:
             selected_frames = tt02(features, 400)
+        print("Selected frame: ", selected_frames)
         video_writer = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), frame_rate, (frames[0].shape[1], frames[0].shape[0]))
         for idx in selected_frames:
             video_writer.write(frames[idx])
     with gr.Row():
         input_video = gr.Video(label="Input Video")
         algorithm_type = gr.Dropdown(["Kmeans", "Sum of Squared Difference 01", "Sum of Squared Difference 02"], type="value", label='Algorithm')
+        model_type = gr.Dropdown(["K600", "K400", "SSv2"], type="value", label='Model Type')
     submit = gr.Button("Submit")
     processed_video = gr.Video(label="Summarized Video")
+    def on_submit(uploaded_video, algorithm_type, model_type):
         # Process the video and get the path of the output video
+        output_video_path = make_video(uploaded_video, encoder=algorithm_type, model_version= model_type)
         return output_video_path
     submit.click(on_submit, inputs=[input_video, algorithm_type], outputs=processed_video)
 if __name__ == '__main__':
+    demo.queue().launch(share=True)

utils.py CHANGED Viewed

@@ -52,10 +52,10 @@ def to_video(selected_frames, frames, output_path, video_fps):
     video_writer.release()
     print("Completed summarizing the video (wait for a moment to load).")
-def load_model():
     try:
         DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
-        model = TimesformerModel.from_pretrained("facebook/timesformer-base-finetuned-k600").to(DEVICE).eval()
         processor=VideoMAEImageProcessor.from_pretrained("MCG-NJU/videomae-base")
         return model, processor, DEVICE

     video_writer.release()
     print("Completed summarizing the video (wait for a moment to load).")
+def load_model(model_version):
     try:
         DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+        model = TimesformerModel.from_pretrained(f"facebook/timesformer-base-finetuned-{model_version}").to(DEVICE).eval()
         processor=VideoMAEImageProcessor.from_pretrained("MCG-NJU/videomae-base")
         return model, processor, DEVICE