File size: 9,025 Bytes

6ff2047

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "machine_shape": "hm"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "gpuClass": "standard"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Super Wav2Lip\n",
        "\n",
        "This Colab project is based on [Wav2Lip-GFPGAN](https://github.com/ajay-sainy/Wav2Lip-GFPGAN), but updates the requirements.txt (to function properly) and updates Colab file for ease of use."
      ],
      "metadata": {
        "id": "JHXiEHrtVVxl"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 1. Installation\n",
        "\n",
        "Run this block to install the necessary dependencies."
      ],
      "metadata": {
        "id": "VBrG_nOFpmWj"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!git clone https://github.com/indianajson/wav2lip-HD.git\n",
        "basePath = \"/content/wav2lip-HD\"\n",
        "%cd {basePath}\n",
        "\n",
        "wav2lipFolderName = 'Wav2Lip-master'\n",
        "gfpganFolderName = 'GFPGAN-master'\n",
        "wav2lipPath = basePath + '/' + wav2lipFolderName\n",
        "gfpganPath = basePath + '/' + gfpganFolderName\n",
        "\n",
        "!wget 'https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth' -O {wav2lipPath}'/face_detection/detection/sfd/s3fd.pth'\n",
        "\n",
        "!wget 'https://iiitaphyd-my.sharepoint.com/personal/radrabha_m_research_iiit_ac_in/_layouts/15/download.aspx?share=EdjI7bZlgApMqsVoEUUXpLsBxqXbn5z8VTmoxp55YNDcIA' -O {wav2lipPath}'/checkpoints/wav2lip_gan.pth'\n",
        "#!wget 'https://iiitaphyd-my.sharepoint.com/:u:/g/personal/radrabha_m_research_iiit_ac_in/Eb3LEzbfuKlJiR600lQWRxgBIY27JZg80f7V9jtMfbNDaQ?e=TBFBVW' -O {wav2lipPath}'/checkpoints/wav2lip.pth'\n",
        "\n",
        "!gdown https://drive.google.com/uc?id=1fQtBSYEyuai9MjBOF8j7zZ4oQ9W2N64q --output {wav2lipPath}'/checkpoints/'\n",
        "\n",
        "!pip install -r requirements.txt\n",
        "!pip install -U librosa==0.8.1 # The process will fail without downgrading librosa\n",
        "!mkdir inputs\n",
        "\n",
        "!cd $gfpganFolderName && python setup.py develop\n",
        "!wget https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth -P {gfpganFolderName}'/experiments/pretrained_models'\n",
        "\n",
        "%cd {basePath}\n",
        "\n",
        "from IPython.display import clear_output\n",
        "clear_output()\n",
        "\n",
        "print(\"Installation complete.\")"
      ],
      "metadata": {
        "id": "YhFe3CJGAIiV"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 2. Synchronize Video and Speech"
      ],
      "metadata": {
        "id": "G0BMWoIyX2dc"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "EqX_2YtkUjRI",
        "cellView": "form"
      },
      "outputs": [],
      "source": [
        "#@markdown Adjust the parameters below then run the code block to synthesize the speech onto your video file:\n",
        "\n",
        "import os\n",
        "outputPath = basePath+'/outputs' \n",
        "inputAudio = 'audio.mp3' #@param{type:\"string\"}\n",
        "inputAudioPath = basePath + '/inputs/' + inputAudio  \n",
        "inputVideo = 'video.mov' #@param{type:\"string\"}\n",
        "inputVideoPath = basePath + '/inputs/'+inputVideo\n",
        "lipSyncedOutputPath = basePath + '/outputs/result.mp4'  \n",
        "model = \"wav2lip\" #@param [\"wav2lip\", \"wav2lip_gan\"] {type:\"string\"}\n",
        "\n",
        "\n",
        "if not os.path.exists(outputPath):\n",
        "  os.makedirs(outputPath)\n",
        "\n",
        "from IPython.display import clear_output\n",
        "clear_output()\n",
        "\n",
        "!cd $wav2lipFolderName && python inference.py \\\n",
        "--checkpoint_path checkpoints/{model}.pth \\\n",
        "--face {inputVideoPath} \\\n",
        "--audio {inputAudioPath} \\\n",
        "--outfile {lipSyncedOutputPath}\n",
        "\n",
        "\n",
        "\n",
        "#print(\"Video synthesis complete.\")"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 3. Boost the Resolution of the Synthesized Video\n",
        "\n"
      ],
      "metadata": {
        "id": "oAIIanxYqFcE"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import cv2\n",
        "from tqdm import tqdm\n",
        "from os import path\n",
        "\n",
        "import os\n",
        "\n",
        "inputVideoPath = outputPath+'/result.mp4'\n",
        "unProcessedFramesFolderPath = outputPath+'/frames'\n",
        "\n",
        "if not os.path.exists(unProcessedFramesFolderPath):\n",
        "  os.makedirs(unProcessedFramesFolderPath)\n",
        "\n",
        "vidcap = cv2.VideoCapture(inputVideoPath)\n",
        "numberOfFrames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))\n",
        "fps = vidcap.get(cv2.CAP_PROP_FPS)\n",
        "print(\"FPS: \", fps, \"Frames: \", numberOfFrames)\n",
        "\n",
        "for frameNumber in tqdm(range(numberOfFrames)):\n",
        "    _,image = vidcap.read()\n",
        "    cv2.imwrite(path.join(unProcessedFramesFolderPath, str(frameNumber).zfill(4)+'.jpg'), image)\n",
        "\n",
        "\n",
        "!cd $gfpganFolderName && \\\n",
        "  python inference_gfpgan.py -i $unProcessedFramesFolderPath -o $outputPath -v 1.3 -s 2 --only_center_face --bg_upsampler None\n",
        "\n",
        "import os\n",
        "restoredFramesPath = outputPath + '/restored_imgs/'\n",
        "processedVideoOutputPath = outputPath\n",
        "\n",
        "dir_list = os.listdir(restoredFramesPath)\n",
        "dir_list.sort()\n",
        "\n",
        "import cv2\n",
        "import numpy as np\n",
        "\n",
        "#Get FPS of original video for writer\n",
        "inputVideoPath = outputPath+'/result.mp4'\n",
        "vidcap = cv2.VideoCapture(inputVideoPath)\n",
        "fps = vidcap.get(cv2.CAP_PROP_FPS)\n",
        "print(\"The video is \"+str(fps)+\" FPS.\")\n",
        "\n",
        "batch = 0\n",
        "batchSize = 1300\n",
        "from tqdm import tqdm\n",
        "for i in tqdm(range(0, len(dir_list), batchSize)):\n",
        "  img_array = []\n",
        "  start, end = i, i+batchSize\n",
        "  print(\"processing \", start, end, end=\"\\r\")\n",
        "  for filename in  tqdm(dir_list[start:end]):\n",
        "      filename = restoredFramesPath+filename;\n",
        "      img = cv2.imread(filename)\n",
        "      if img is None:\n",
        "        continue\n",
        "      height, width, layers = img.shape\n",
        "      size = (width,height)\n",
        "      img_array.append(img)\n",
        "  out = cv2.VideoWriter(processedVideoOutputPath+'/output_'+str(batch).zfill(4)+'.mp4',cv2.VideoWriter_fourcc(*'DIVX'), fps, size)\n",
        "  batch = batch + 1\n",
        " \n",
        "  for i in range(len(img_array)):\n",
        "    out.write(img_array[i])\n",
        "  out.release()\n",
        "\n",
        "from IPython.display import clear_output\n",
        "clear_output()\n",
        "\n",
        "print(\"Video upscaling complete.\")"
      ],
      "metadata": {
        "id": "XibzGPIVJfvP"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 4. Clear Cached Files\n",
        "\n",
        "Run this block once you've downloaded your final video file. This will empty /inputs and /outputs, so you can start again, fresh.\n"
      ],
      "metadata": {
        "id": "ajuOpF9t0psE"
      }
    },
    {
      "cell_type": "markdown",
      "source": [],
      "metadata": {
        "id": "J8JhzA9t_xrS"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "%cd /content/wav2lip-HD/\n",
        "\n",
        "#@markdown Choose whether to remove both inputs and outputs, or just one of the two. You may want to preserve inputs if you are only changing one of the two inputs. \n",
        "\n",
        "removeInputs = True #@param {type:\"boolean\"}\n",
        "removeOutputs = True #@param {type:\"boolean\"}\n",
        "\n",
        "if removeInputs == True:\n",
        "  %rm inputs/*\n",
        "if removeOutputs == True:\n",
        "  %rm outputs/frames/*\n",
        "  %rm outputs/restored_imgs/*\n",
        "  %rm outputs/*\n",
        "\n",
        "\n",
        "from IPython.display import clear_output\n",
        "clear_output()\n",
        "\n",
        "print(\"Cleared cached files.\")\n"
      ],
      "metadata": {
        "id": "o2trJyiC0mRs",
        "cellView": "form"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}