Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| sys.path.append("../") | |
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| plt.rcParams["font.family"] = "serif" | |
| import decord | |
| import PIL, PIL.Image | |
| import librosa | |
| from IPython.display import Markdown, display | |
| import pandas as pd | |
| from util import * | |
| css = """ | |
| <style> | |
| body { | |
| font-family: 'Arial', serif; | |
| margin: 0; | |
| padding: 0; | |
| color: black; | |
| } | |
| .header { | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| margin-top: 5px; | |
| color: black; | |
| } | |
| .footer { | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| margin-top: 5px; | |
| } | |
| .image { | |
| margin-right: 20px; | |
| } | |
| .content { | |
| text-align: center; | |
| color: black; | |
| } | |
| .title { | |
| font-size: 2.5em; | |
| font-weight: bold; | |
| margin-bottom: 10px; | |
| } | |
| .authors { | |
| color: #4a90e2; | |
| font-size: 1.05em; | |
| margin: 10px 0; | |
| } | |
| .affiliations { | |
| font-size: 1.em; | |
| margin-bottom: 20px; | |
| } | |
| .buttons { | |
| display: flex; | |
| justify-content: center; | |
| gap: 10px; | |
| } | |
| .button { | |
| background-color: #545758; | |
| text-decoration: none; | |
| padding: 8px 16px; | |
| border-radius: 5px; | |
| font-size: 1.05em; | |
| } | |
| .button:hover { | |
| background-color: #333; | |
| } | |
| </style> | |
| """ | |
| header = css + """ | |
| <div class="header"> | |
| <!-- <div class="image"> | |
| <img src="./media_assets/pouring-water-logo5.png" alt="logo" width="100"> | |
| </div> --> | |
| <div class="content"> | |
| <img src="https://bpiyush.github.io/pouring-water-website/assets/pouring-water-logo5.png" alt="logo" width="80" style="margin-bottom: -50px; margin-right: 30px;"> | |
| <div class="title" style="font-size: 44px; margin-left: -30px;">The Sound of Water</div> | |
| <div style="font-size: 30px; margin-left: -30px;"><b>Inferring Physical Properties from Pouring Liquids</b></div> | |
| <div class="authors"> | |
| <a style="color: #92eaff; href="https://bpiyush.github.io/">Piyush Bagad</a><sup>1</sup>, | |
| <a style="color: #92eaff; href="https://makarandtapaswi.github.io/">Makarand Tapaswi</a><sup>2</sup>, | |
| <a style="color: #92eaff; href="https://www.ceessnoek.info/">Cees G. M. Snoek</a><sup>3</sup>, | |
| <a style="color: #92eaff; href="https://www.robots.ox.ac.uk/~az/">Andrew Zisserman</a><sup>1</sup>, | |
| </div> | |
| <div class="affiliations"> | |
| <sup>1</sup>University of Oxford, <sup>2</sup>IIIT Hyderabad, <sup>3</sup>University of Amsterdam | |
| </div> | |
| <div class="buttons"> | |
| <a href="#" style="color: #92eaff;" class="button">arXiv</a> | |
| <a href="https://bpiyush.github.io/pouring-water-website/" style="color: #92eaff;" class="button">π Project</a> | |
| <a href="https://github.com/bpiyush/SoundOfWater" style="color: #92eaff;" class="button"> <img src="https://bpiyush.github.io/pouring-water-website/assets/github-logo.png" alt="logo" style="height:16px; float: left;"> Code</a> | |
| <a href="https://huggingface.co/datasets/bpiyush/sound-of-water" style="color: #92eaff;" class="button">π€ Data</a> | |
| <a href="https://huggingface.co/bpiyush/sound-of-water-models" style="color: #92eaff;" class="button">π€ Models</a> | |
| <a href="#" style="color: #92eaff;" class="button">π― Demo</a> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| footer = css + """ | |
| <div class="header" style="justify-content: left;"> | |
| <div class="content" style="font-size: 16px;"> | |
| Please give us a π on <a href='https://github.com/bpiyush/SoundOfWater'>Github</a> if you like our work! | |
| Tips to get better results: | |
| <br><br> | |
| <ol style="text-align: left; font-size: 14px; margin-left: 30px"> | |
| <li>The first example may take up to 30-60s for processing since the model is also loaded.</li> | |
| <li> | |
| If you are providing a link, it may take a few seconds to download video from YouTube. | |
| Note that the entire video shall be used. | |
| If the sound of pouring is not clear, the results will be random. | |
| </li> | |
| <li>Although the model is somewhat robust to noise, make sure there is not too much noise such that the pouring is audible.</li> | |
| <li>Note that the video is not used during the inference. The displayed frame is only for reference.</li> | |
| </ol> | |
| </div> | |
| </div> | |
| """ | |
| from download_youtube import download_youtube_video_ytdlp | |
| def download_from_youtube( | |
| video_id, | |
| save_dir="/tmp/", | |
| convert_to_mp4=False, | |
| ): | |
| """ | |
| Downloads a YouTube video from start to end times. | |
| Args: | |
| video_id (str): YouTube video ID. | |
| save_dir (str): Directory to save the video. | |
| convert_to_mp4 (bool): Whether to convert the video to mp4 format. | |
| The saved video is in the format: {save_dir}/{video_id}.mp4 | |
| """ | |
| import datetime | |
| from subprocess import call | |
| print("Downloading video from YouTube...") | |
| print("Video ID:", video_id) | |
| command = [ | |
| "yt-dlp", | |
| "-o", "'{}%(id)s.%(ext)s'".format(save_dir), | |
| "--cookies ./youtube_cookies.txt", | |
| "--verbose", | |
| "--force-overwrites", | |
| f"https://www.youtube.com/watch?v={video_id}", | |
| ] | |
| try: | |
| call(" ".join(command), shell=True) | |
| except Exception as e: | |
| print(e) | |
| raise IOError("Failed to download to download YouTube video.") | |
| # If not mp4, convert to mp4 | |
| from glob import glob | |
| saved_filepath = glob(os.path.join(save_dir, f"{video_id}.*"))[0] | |
| print("Saved file:", saved_filepath) | |
| if convert_to_mp4: | |
| ext = saved_filepath.split(".")[-1] | |
| to_save = saved_filepath.replace(ext, "mp4") | |
| if ext != "mp4": | |
| # convert to mp4 using ffmpeg | |
| command = "ffmpeg -y -i {} {}".format(saved_filepath, to_save) | |
| call(command, shell=True) | |
| return to_save | |
| else: | |
| return saved_filepath | |
| def configure_input(): | |
| gr.Markdown( | |
| "#### Either upload a video file or provide a YouTube link to a video. Note that the entire video shall be used.", | |
| ) | |
| video_input = gr.Video(label="Upload Video", height=520) | |
| youtube_link = gr.Textbox(label="YouTube Link", value=None) | |
| gr.Markdown( | |
| "Note: Often, YouTube download can fail because the video may not be public or YouTube asks for Sign in."\ | |
| "We recommend downloading the video in other ways on your machine and uploading it here."\ | |
| " Alternatively, you can clone the repository and run the demo locally which can allow for Sign-in.", | |
| ) | |
| return [video_input, youtube_link] | |
| # video_backend = "decord" | |
| video_backend = "torchvision" | |
| def get_predictions(video_path): | |
| model = load_model() | |
| frame = load_frame(video_path, video_backend=video_backend) | |
| S = load_spectrogram(video_path) | |
| audio = load_audio_tensor(video_path) | |
| z_audio, y_audio = get_model_output(audio, model) | |
| image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio) | |
| return image, df_show, tsne_image | |
| def get_video_id_from_url(url): | |
| import re | |
| if "v=" in url: | |
| video_id = re.findall(r"v=([a-zA-Z0-9_-]+)", url) | |
| elif "youtu.be" in url: | |
| video_id = re.findall(r"youtu.be/([a-zA-Z0-9_-]+)", url) | |
| elif "shorts" in url: | |
| video_id = re.findall(r"shorts/([a-zA-Z0-9_-]+)", url) | |
| else: | |
| raise ValueError("Invalid YouTube URL") | |
| print("Video URL:", url) | |
| print("Video ID:", video_id) | |
| if len(video_id) > 0: | |
| return video_id[0] | |
| else: | |
| raise ValueError("Invalid YouTube URL") | |
| note = """ | |
| **Note**: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end. | |
| Thus, it may not be accurate if the wavelength is not estimated correctly at the end. | |
| $$ | |
| H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta} | |
| $$ | |
| """ | |
| # Example usage in a Gradio interface | |
| def process_input(video, youtube_link): | |
| provided_video = video is not None | |
| if youtube_link is None: | |
| provided_link = False | |
| elif isinstance(youtube_link, str): | |
| provided_link = len(youtube_link) > 0 | |
| else: | |
| raise ValueError(f"Invalid type of link {youtube_link}.") | |
| if provided_video and provided_link: | |
| raise ValueError("Please provide either a video file or a YouTube link, not both.") | |
| if provided_video: | |
| print(video) | |
| # # Load model globally | |
| # model = load_model() | |
| # The input is a video file path | |
| video_path = video | |
| # Get predictions | |
| image, df_show, tsne_image = get_predictions(video_path) | |
| return image, df_show, gr.Markdown(note), tsne_image | |
| else: | |
| print(provided_link) | |
| assert provided_link, \ | |
| "YouTube Link cannot be empty if no video is provided." | |
| video_id = get_video_id_from_url(youtube_link) | |
| print("Video ID:", video_id) | |
| video_path = download_youtube_video_ytdlp( | |
| video_id, save_dir="/tmp/", | |
| ) | |
| # Get predictions | |
| image, df_show, tsne_image = get_predictions(video_path) | |
| # Add youtube link to the note | |
| local_note = f"{note}\n\nYou can watch the original video here: "\ | |
| f"[YouTube Link](https://www.youtube.com/watch?v={video_id})" | |
| return image, df_show, gr.Markdown(local_note), tsne_image | |
| def configure_outputs(): | |
| image_wide = gr.Image(label="Estimated pitch") | |
| dataframe = gr.DataFrame(label="Estimated physical properties") | |
| image_tsne = gr.Image(label="TSNE of features", width=300) | |
| markdown = gr.Markdown(label="Note") | |
| return [image_wide, dataframe, markdown, image_tsne] | |
| # Configure pre-defined examples | |
| examples = [ | |
| ["./media_assets/example_video.mp4", None], | |
| ["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None], | |
| ["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None], | |
| ["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None], | |
| ["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None], | |
| # Shows that it works with background noise | |
| ["./media_assets/l74zJHCZ9uA.webm", None], | |
| # Shows that it works with a slightly differently shaped container | |
| ["./media_assets/LpRPV0hIymU.webm", None], | |
| ["./media_assets/k-HnMsS36J8.webm", None], | |
| # [None, "https://www.youtube.com/shorts/6eUQTdkTooo"], | |
| # [None, "https://www.youtube.com/shorts/VxZT15cG6tw"], | |
| # [None, "https://www.youtube.com/shorts/GSXQnNhliDY"], | |
| ] | |
| # Define Gradio interface | |
| with gr.Blocks( | |
| css=custom_css, | |
| theme=gr.themes.Default(), | |
| ) as demo: | |
| # Add the header | |
| gr.HTML(header) | |
| gr.Interface( | |
| fn=process_input, | |
| inputs=configure_input(), | |
| outputs=configure_outputs(), | |
| examples=examples, | |
| ) | |
| # Add the footer | |
| gr.HTML(footer) | |
| # Launch the interface | |
| demo.launch(allowed_paths=["."], share=True) |