Spaces:

weathon
/

VSF

Running on Zero

VSF

File size: 6,200 Bytes

a67076d

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b53f5b58",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e3c8361d88484d9984ecd3746399940f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d82356db6da94c5cb78b8f5681f87304",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5052153c971542aa9dc0a9561f040bc5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import torch\n",
    "from diffusers import AutoencoderKLWan\n",
    "from pipeline import WanPipeline\n",
    "from diffusers.utils import export_to_video\n",
    "\n",
    "model_id = \"Wan-AI/Wan2.1-T2V-1.3B-Diffusers\"\n",
    "vae = AutoencoderKLWan.from_pretrained(model_id, subfolder=\"vae\", torch_dtype=torch.float32)\n",
    "pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)\n",
    "pipe.load_lora_weights(\n",
    "    \"Kijai/WanVideo_comfy\",\n",
    "    weight_name=\"Wan21_CausVid_bidirect2_T2V_1_3B_lora_rank32.safetensors\",\n",
    "    adapter_name=\"lora\"\n",
    ") \n",
    "pipe = pipe.to(\"cuda\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "cc3c8947",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "False\n",
      "True\n",
      "2 510\n",
      "32760\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "`num_frames - 1` has to be divisible by 4. Rounding to the nearest number.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2f9f2e39d9584aab8cc0f27e4f061dc1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/12 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
      "To disable this warning, you can either:\n",
      "\t- Avoid using `tokenizers` before the fork if possible\n",
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'vsf.mp4'"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from processor import WanAttnProcessor2_0\n",
    "\n",
    "# prompt = \"A chef cat and a dog baking a cake together in a kitchen. The cat is carefully measuring flour, while the dog is stirring the batter with a wooden spoon. The cat is wearing a chef suit\"\n",
    "# neg_prompt = \"chef hat\"\n",
    "prompt = \"A cessna flying over a snowy mountain landscape, with a clear blue sky and fluffy white clouds. The plane is flying at a low altitude, casting a shadow on the snow-covered ground below. The mountains are rugged and steep, with patches of evergreen trees visible in the foreground.\"\n",
    "neg_prompt = \"trees\"\n",
    "\n",
    "neg_prompt_embeds, _ = pipe.encode_prompt(\n",
    "    prompt=neg_prompt,\n",
    "    padding=False,\n",
    "    do_classifier_free_guidance=False,\n",
    ")\n",
    "\n",
    "pos_prompt_embeds, _ = pipe.encode_prompt( \n",
    "    prompt=prompt,\n",
    "    do_classifier_free_guidance=False, \n",
    "    max_sequence_length=512 - neg_prompt_embeds.shape[1],\n",
    ")\n",
    "pipe.set_adapters(\"lora\", 0.5)\n",
    "\n",
    "\n",
    "\n",
    "neg_len = neg_prompt_embeds.shape[1]\n",
    "pos_len = pos_prompt_embeds.shape[1]\n",
    "print(neg_len, pos_len)\n",
    "height = 480\n",
    "width = 832\n",
    "frames = 81\n",
    "\n",
    "img_len = (height//8) * (width//8) * 3 * (frames // 4 + 1) // 12\n",
    "print(img_len)\n",
    "mask = torch.zeros((1, img_len, pos_len+neg_len)).cuda()\n",
    "mask[:, :, -neg_len:] = -torch.inf # this should be negative -torch.inf #\n",
    "# mask[:, :, -neg_len:] = -0.2 # this should be negative -torch.inf #\n",
    "\n",
    "for block in pipe.transformer.blocks:\n",
    "    block.attn2.processor = WanAttnProcessor2_0(scale=1.7, neg_prompt_length=neg_len, attn_mask=mask)\n",
    "# should we still do exploation in space \n",
    "\n",
    "prompt_embeds = torch.cat([pos_prompt_embeds, neg_prompt_embeds], dim=1)\n",
    "\n",
    "output = pipe(\n",
    "    prompt_embeds=prompt_embeds,\n",
    "    # prompt_embeds=pos_prompt_embeds,\n",
    "    negative_prompt=neg_prompt,\n",
    "    height=height,\n",
    "    width=width,\n",
    "    num_frames=frames + 1,\n",
    "    num_inference_steps=12,\n",
    "    guidance_scale=0.0, \n",
    "    generator=torch.Generator(device=\"cuda\").manual_seed(42),\n",
    ").frames[0]\n",
    "export_to_video(output, \"vsf.mp4\", fps=15)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "neg",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}