{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Setup Environment" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "if os.path.basename(os.getcwd()) == \"notebooks\":\n", " os.chdir(\"..\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Install Dependencies" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "execution": { "iopub.execute_input": "2025-07-08T09:44:37.937172Z", "iopub.status.busy": "2025-07-08T09:44:37.936893Z", "iopub.status.idle": "2025-07-08T09:49:21.948303Z", "shell.execute_reply": "2025-07-08T09:49:21.947471Z", "shell.execute_reply.started": "2025-07-08T09:44:37.937153Z" }, "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.8/410.8 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.9/72.9 MB\u001b[0m \u001b[31m14.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0mm\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0mm00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:04\u001b[0mm\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:02\u001b[0mm\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCloning into 'diffusers'...\n", "remote: Enumerating objects: 98145, done.\u001b[K\n", "remote: Counting objects: 100% (222/222), done.\u001b[K\n", "remote: Compressing objects: 100% (161/161), done.\u001b[K\n", "remote: Total 98145 (delta 107), reused 91 (delta 52), pack-reused 97923 (from 3)\u001b[K\n", "Receiving objects: 100% (98145/98145), 73.31 MiB | 22.71 MiB/s, done.\n", "Resolving deltas: 100% (72392/72392), done.\n", "/kaggle/working/diffusers\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Checking if build backend supports build_editable ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build editable ... \u001b[?25l\u001b[?25hdone\n", " Preparing editable metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Building editable for diffusers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n" ] } ], "source": [ "# !pip install -q git+https://github.com/huggingface/diffusers.git\n", "!pip install -q peft==0.15.0 bitsandbytes\n", "!git clone https://github.com/huggingface/diffusers\n", "%cd diffusers\n", "!pip install -e . -q" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "trusted": true }, "outputs": [], "source": [ "# !pip install git+https://github.com/huggingface/diffusers" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2025-07-08T09:49:21.950216Z", "iopub.status.busy": "2025-07-08T09:49:21.949956Z", "iopub.status.idle": "2025-07-08T09:49:21.955584Z", "shell.execute_reply": "2025-07-08T09:49:21.954816Z", "shell.execute_reply.started": "2025-07-08T09:49:21.950192Z" }, "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/kaggle/working/diffusers/examples/controlnet\n" ] } ], "source": [ "%cd examples/controlnet" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Training" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.status.idle": "2025-07-08T12:05:34.302241Z", "shell.execute_reply": "2025-07-08T12:05:34.301446Z", "shell.execute_reply.started": "2025-07-08T09:50:31.884323Z" }, "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Steps: 96%|████▊| 18000/18750 [52:01<1:18:01, 6.24s/it, loss=0.301, lr=0.0002]Configuration saved in ./ckpts/Stable-Diffusion-2.1-Openpose-ControlNet/checkpoint-18000/controlnet/config.json\n", "Model weights saved in ./ckpts/Stable-Diffusion-2.1-Openpose-ControlNet/checkpoint-18000/controlnet/diffusion_pytorch_model.safetensors\n", "Steps: 99%|████▉| 18500/18750 [1:44:08<26:02, 6.25s/it, loss=0.338, lr=0.0002]Configuration saved in ./ckpts/Stable-Diffusion-2.1-Openpose-ControlNet/checkpoint-18500/controlnet/config.json\n", "Model weights saved in ./ckpts/Stable-Diffusion-2.1-Openpose-ControlNet/checkpoint-18500/controlnet/diffusion_pytorch_model.safetensors\n", "Steps: 100%|█████| 18750/18750 [2:10:12<00:00, 6.26s/it, loss=0.416, lr=0.0002]Configuration saved in ./ckpts/Stable-Diffusion-2.1-Openpose-ControlNet/config.json\n", "Model weights saved in ./ckpts/Stable-Diffusion-2.1-Openpose-ControlNet/diffusion_pytorch_model.safetensors\n", "Steps: 100%|█████| 18750/18750 [2:10:16<00:00, 6.25s/it, loss=0.416, lr=0.0002]\n" ] } ], "source": [ "!accelerate launch train_controlnet.py \\\n", " --pretrained_model_name_or_path=\"stabilityai/stable-diffusion-2-1\" \\\n", " --resume_from_checkpoint \"./ckpts/Stable-Diffusion-2.1-Openpose-ControlNet/checkpoint-17500\" \\\n", " --output_dir=\"./ckpts/Stable-Diffusion-2.1-Openpose-ControlNet\" \\\n", " --dataset_name=\"HighCWu/open_pose_controlnet_subset\" \\\n", " --resolution=512 \\\n", " --learning_rate=2e-4 \\\n", " --train_batch_size=4 \\\n", " --gradient_accumulation_steps=2 \\\n", " --gradient_checkpointing \\\n", " --use_8bit_adam \\\n", " --num_train_epochs=50 \\\n", " --mixed_precision \"fp16\" \\\n", " --checkpoints_total_limit=2 \\\n", " --checkpointing_steps=500 \\\n", " --validation_steps=100\n", " # --image_column \\\n", " # --conditioning_image_column \\\n", " # --caption_column \\\n", " # --max_train_steps=10000\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Inference" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-07-08T12:15:50.516724Z", "iopub.status.busy": "2025-07-08T12:15:50.515955Z", "iopub.status.idle": "2025-07-08T12:17:09.002224Z", "shell.execute_reply": "2025-07-08T12:17:09.001257Z", "shell.execute_reply.started": "2025-07-08T12:15:50.516697Z" }, "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.4/290.4 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m29.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m82.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25h" ] } ], "source": [ "!pip install -q controlnet-aux" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2025-07-08T12:17:09.004232Z", "iopub.status.busy": "2025-07-08T12:17:09.003985Z", "iopub.status.idle": "2025-07-08T12:17:09.009019Z", "shell.execute_reply": "2025-07-08T12:17:09.008057Z", "shell.execute_reply.started": "2025-07-08T12:17:09.004203Z" }, "trusted": true }, "outputs": [], "source": [ "import cv2\n", "from PIL import Image\n", "import numpy as np\n", "# from diffusers.utils import load_image\n", "from PIL import Image\n", "import PIL\n", "import requests" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-07-08T12:17:09.010551Z", "iopub.status.busy": "2025-07-08T12:17:09.010007Z", "iopub.status.idle": "2025-07-08T12:17:43.288449Z", "shell.execute_reply": "2025-07-08T12:17:43.287691Z", "shell.execute_reply.started": "2025-07-08T12:17:09.010526Z" }, "trusted": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b294b69e673b4cbf84e080d236bf8158", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Fetching 63 files: 0%| | 0/63 [00:00 PIL.Image.Image:\n", " \"\"\"\n", " Loads `image` to a PIL Image.\n", "\n", " Args:\n", " image (`str` or `PIL.Image.Image`):\n", " The image to convert to the PIL Image format.\n", " convert_method (Callable[[PIL.Image.Image], PIL.Image.Image], *optional*):\n", " A conversion method to apply to the image after loading it. When set to `None` the image will be converted\n", " \"RGB\".\n", "\n", " Returns:\n", " `PIL.Image.Image`:\n", " A PIL Image.\n", " \"\"\"\n", " if isinstance(image, str):\n", " if image.startswith(\"http://\") or image.startswith(\"https://\"):\n", " image = PIL.Image.open(requests.get(image, stream=True, timeout=200).raw)\n", " elif os.path.isfile(image):\n", " image = PIL.Image.open(image)\n", " else:\n", " raise ValueError(\n", " f\"Incorrect path or URL. URLs must start with `http://` or `https://`, and {image} is not a valid path.\"\n", " )\n", " elif isinstance(image, PIL.Image.Image):\n", " image = image\n", " else:\n", " raise ValueError(\n", " \"Incorrect format used for the image. Should be a URL linking to an image, a local path, or a PIL image.\"\n", " )\n", "\n", " image = PIL.ImageOps.exif_transpose(image)\n", "\n", " if convert_method is not None:\n", " image = convert_method(image)\n", " else:\n", " image = image.convert(\"RGB\")\n", "\n", " return image" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2025-07-08T12:17:43.297604Z", "iopub.status.busy": "2025-07-08T12:17:43.297363Z", "iopub.status.idle": "2025-07-08T12:17:51.415654Z", "shell.execute_reply": "2025-07-08T12:17:51.414909Z", "shell.execute_reply.started": "2025-07-08T12:17:43.297580Z" }, "trusted": true }, "outputs": [], "source": [ "def image_grid(imgs, rows, cols):\n", " assert len(imgs) == rows * cols\n", "\n", " w, h = imgs[0].size\n", " grid = Image.new(\"RGB\", size=(cols * w, rows * h))\n", " grid_w, grid_h = grid.size\n", "\n", " for i, img in enumerate(imgs):\n", " grid.paste(img, box=(i % cols * w, i // cols * h))\n", " return grid\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.status.busy": "2025-07-08T12:13:08.387428Z", "iopub.status.idle": "2025-07-08T12:13:08.387805Z", "shell.execute_reply": "2025-07-08T12:13:08.387633Z", "shell.execute_reply.started": "2025-07-08T12:13:08.387617Z" }, "trusted": true }, "outputs": [], "source": [ "urls = \"yoga1.jpeg\", \"yoga2.jpeg\", \"yoga3.jpeg\", \"yoga4.jpeg\"\n", "imgs = [\n", " load_image(\"https://huggingface.co/datasets/YiYiXu/controlnet-testing/resolve/main/\" + url) \n", " for url in urls\n", "]\n", "\n", "image_grid(imgs, 2, 2)\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2025-07-08T12:17:51.417162Z", "iopub.status.busy": "2025-07-08T12:17:51.416851Z", "iopub.status.idle": "2025-07-08T12:18:15.234196Z", "shell.execute_reply": "2025-07-08T12:18:15.233323Z", "shell.execute_reply.started": "2025-07-08T12:17:51.417138Z" }, "trusted": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.11/dist-packages/controlnet_aux/mediapipe_face/mediapipe_face_common.py:7: UserWarning: The module 'mediapipe' is not installed. The package will have limited functionality. Please install it using the command: pip install 'mediapipe'\n", " warnings.warn(\n", "/usr/local/lib/python3.11/dist-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers\n", " warnings.warn(f\"Importing from {__name__} is deprecated, please import via timm.layers\", FutureWarning)\n", "/usr/local/lib/python3.11/dist-packages/timm/models/registry.py:4: FutureWarning: Importing from timm.models.registry is deprecated, please import via timm.models\n", " warnings.warn(f\"Importing from {__name__} is deprecated, please import via timm.models\", FutureWarning)\n", "/usr/local/lib/python3.11/dist-packages/controlnet_aux/segment_anything/modeling/tiny_vit_sam.py:654: UserWarning: Overwriting tiny_vit_5m_224 in registry with controlnet_aux.segment_anything.modeling.tiny_vit_sam.tiny_vit_5m_224. This is because the name being registered conflicts with an existing name. Please check if this is not expected.\n", " return register_model(fn_wrapper)\n", "/usr/local/lib/python3.11/dist-packages/controlnet_aux/segment_anything/modeling/tiny_vit_sam.py:654: UserWarning: Overwriting tiny_vit_11m_224 in registry with controlnet_aux.segment_anything.modeling.tiny_vit_sam.tiny_vit_11m_224. This is because the name being registered conflicts with an existing name. Please check if this is not expected.\n", " return register_model(fn_wrapper)\n", "/usr/local/lib/python3.11/dist-packages/controlnet_aux/segment_anything/modeling/tiny_vit_sam.py:654: UserWarning: Overwriting tiny_vit_21m_224 in registry with controlnet_aux.segment_anything.modeling.tiny_vit_sam.tiny_vit_21m_224. This is because the name being registered conflicts with an existing name. Please check if this is not expected.\n", " return register_model(fn_wrapper)\n", "/usr/local/lib/python3.11/dist-packages/controlnet_aux/segment_anything/modeling/tiny_vit_sam.py:654: UserWarning: Overwriting tiny_vit_21m_384 in registry with controlnet_aux.segment_anything.modeling.tiny_vit_sam.tiny_vit_21m_384. This is because the name being registered conflicts with an existing name. Please check if this is not expected.\n", " return register_model(fn_wrapper)\n", "/usr/local/lib/python3.11/dist-packages/controlnet_aux/segment_anything/modeling/tiny_vit_sam.py:654: UserWarning: Overwriting tiny_vit_21m_512 in registry with controlnet_aux.segment_anything.modeling.tiny_vit_sam.tiny_vit_21m_512. This is because the name being registered conflicts with an existing name. Please check if this is not expected.\n", " return register_model(fn_wrapper)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8fe6d8083b7d4535af03ef53d62ba556", "version_major": 2, "version_minor": 0 }, "text/plain": [ "body_pose_model.pth: 0%| | 0.00/209M [00:00" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from controlnet_aux import OpenposeDetector\n", "\n", "model = OpenposeDetector.from_pretrained(\"lllyasviel/ControlNet\")\n", "\n", "poses = [model(img) for img in imgs]\n", "image_grid(poses, 2, 2)\n", "# poses" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2025-07-08T12:18:15.235687Z", "iopub.status.busy": "2025-07-08T12:18:15.235222Z", "iopub.status.idle": "2025-07-08T12:19:10.969606Z", "shell.execute_reply": "2025-07-08T12:19:10.968652Z", "shell.execute_reply.started": "2025-07-08T12:18:15.235659Z" }, "trusted": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025-07-08 12:18:18.767595: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "E0000 00:00:1751977098.965819 35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "E0000 00:00:1751977099.020166 35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b3a5530ab5704dddbee38d77619485ef", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model_index.json: 0%| | 0.00/537 [00:00" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "generator = [torch.Generator(device=\"cpu\").manual_seed(2) for i in range(4)]\n", "prompt = \"a man is doing yoga\"\n", "output = pipe(\n", " [prompt] * 4,\n", " poses,\n", " negative_prompt=[\"monochrome, lowres, bad anatomy, worst quality, low quality\"] * 4,\n", " generator=generator,\n", " num_inference_steps=20,\n", ")\n", "image_grid(output.images, 2, 2)\n" ] } ], "metadata": { "kaggle": { "accelerator": "gpu", "dataSources": [], "dockerImageVersionId": 31041, "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 4 }