This Pull Request also extends a video & uses end frame

#2
.gitattributes CHANGED
@@ -2,4 +2,9 @@
2
  *.bin filter=lfs diff=lfs merge=lfs -text
3
  *.mp4 filter=lfs diff=lfs merge=lfs -text
4
  *.pt filter=lfs diff=lfs merge=lfs -text
5
- *.pth filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
2
  *.bin filter=lfs diff=lfs merge=lfs -text
3
  *.mp4 filter=lfs diff=lfs merge=lfs -text
4
  *.pt filter=lfs diff=lfs merge=lfs -text
5
+ *.pth filter=lfs diff=lfs merge=lfs -text img_examples/Example1.png filter=lfs diff=lfs merge=lfs -text
6
+ img_examples/Example2.webp filter=lfs diff=lfs merge=lfs -text
7
+ img_examples/Example3.jpg filter=lfs diff=lfs merge=lfs -text
8
+ img_examples/Example4.webp filter=lfs diff=lfs merge=lfs -text
9
+ img_examples/Example5.png filter=lfs diff=lfs merge=lfs -text
10
+ img_examples/Example6.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -4,8 +4,18 @@ emoji: 🎬
4
  colorFrom: indigo
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.29.0
8
  app_file: app.py
9
- pinned: false
10
- license: mit
 
 
 
 
 
 
 
 
11
  ---
 
 
 
4
  colorFrom: indigo
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 5.29.1
8
  app_file: app.py
9
+ license: apache-2.0
10
+ short_description: Text-to-Video/Image-to-Video/Video extender (timed prompt)
11
+ tags:
12
+ - Image-to-Video
13
+ - Image-2-Video
14
+ - Img-to-Vid
15
+ - Img-2-Vid
16
+ - language models
17
+ - LLMs
18
+ suggested_hardware: zero-a10g
19
  ---
20
+
21
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
The diff for this file is too large to render. See raw diff
 
app_lora.py ADDED
The diff for this file is too large to render. See raw diff
 
diffusers_helper/bucket_tools.py CHANGED
@@ -15,6 +15,79 @@ bucket_options = {
15
  (864, 448),
16
  (960, 416),
17
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  }
19
 
20
 
@@ -26,5 +99,5 @@ def find_nearest_bucket(h, w, resolution=640):
26
  if metric <= min_metric:
27
  min_metric = metric
28
  best_bucket = (bucket_h, bucket_w)
 
29
  return best_bucket
30
-
 
15
  (864, 448),
16
  (960, 416),
17
  ],
18
+ 672: [
19
+ (480, 864),
20
+ (512, 832),
21
+ (544, 768),
22
+ (576, 704),
23
+ (608, 672),
24
+ (640, 640),
25
+ (672, 608),
26
+ (704, 576),
27
+ (768, 544),
28
+ (832, 512),
29
+ (864, 480),
30
+ ],
31
+ 704: [
32
+ (480, 960),
33
+ (512, 864),
34
+ (544, 832),
35
+ (576, 768),
36
+ (608, 704),
37
+ (640, 672),
38
+ (672, 640),
39
+ (704, 608),
40
+ (768, 576),
41
+ (832, 544),
42
+ (864, 512),
43
+ (960, 480),
44
+ ],
45
+ 768: [
46
+ (512, 960),
47
+ (544, 864),
48
+ (576, 832),
49
+ (608, 768),
50
+ (640, 704),
51
+ (672, 672),
52
+ (704, 640),
53
+ (768, 608),
54
+ (832, 576),
55
+ (864, 544),
56
+ (960, 512),
57
+ ],
58
+ 832: [
59
+ (544, 960),
60
+ (576, 864),
61
+ (608, 832),
62
+ (640, 768),
63
+ (672, 704),
64
+ (704, 672),
65
+ (768, 640),
66
+ (832, 608),
67
+ (864, 576),
68
+ (960, 544),
69
+ ],
70
+ 864: [
71
+ (576, 960),
72
+ (608, 864),
73
+ (640, 832),
74
+ (672, 768),
75
+ (704, 704),
76
+ (768, 672),
77
+ (832, 640),
78
+ (864, 608),
79
+ (960, 576),
80
+ ],
81
+ 960: [
82
+ (608, 960),
83
+ (640, 864),
84
+ (672, 832),
85
+ (704, 768),
86
+ (768, 704),
87
+ (832, 672),
88
+ (864, 640),
89
+ (960, 608),
90
+ ],
91
  }
92
 
93
 
 
99
  if metric <= min_metric:
100
  min_metric = metric
101
  best_bucket = (bucket_h, bucket_w)
102
+ print("The resolution of the generated video will be " + str(best_bucket))
103
  return best_bucket
 
img_examples/Example1.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a906a1d14d1699f67ca54865c7aa5857e55246f4ec63bbaf3edcf359e73bebd1
3
+ size 240647
img_examples/Example1.png ADDED

Git LFS Details

  • SHA256: a057c160bcf3ecfa41d150ec9550423f87efefb9a9793420fea382760daff98b
  • Pointer size: 131 Bytes
  • Size of remote file: 513 kB
img_examples/Example2.webp ADDED

Git LFS Details

  • SHA256: 736480a5f8d043eacad5758f0e80b427aabfa4d98839769615ee61f3fda9f77e
  • Pointer size: 131 Bytes
  • Size of remote file: 137 kB
img_examples/Example3.jpg ADDED

Git LFS Details

  • SHA256: b1a9be93d2f117d687e08c91c043e67598bdb7c44f5c932f18a3026790fb82fa
  • Pointer size: 131 Bytes
  • Size of remote file: 208 kB
img_examples/Example4.webp ADDED

Git LFS Details

  • SHA256: dd4e7ef35f4cfc8d44ff97f38b68ba7cc248ad5b54c89f8525f5046508f7c4a3
  • Pointer size: 131 Bytes
  • Size of remote file: 119 kB
img_examples/Example5.png ADDED

Git LFS Details

  • SHA256: b6a7b7521a2ffe77f60a78bb52013c1ef73bfcefbd809f45cfdeef804aee8906
  • Pointer size: 131 Bytes
  • Size of remote file: 431 kB
img_examples/Example6.png ADDED

Git LFS Details

  • SHA256: 59e76d165d9bece1775302a7e4032f31b28545937726d42f41b0c67aae9d4143
  • Pointer size: 131 Bytes
  • Size of remote file: 721 kB
requirements.txt CHANGED
@@ -1,18 +1,23 @@
1
- accelerate==1.6.0
2
- diffusers==0.33.1
3
- transformers==4.46.2
4
- sentencepiece==0.2.0
5
- pillow==11.1.0
6
- av==12.1.0
7
- numpy==1.26.2
8
- scipy==1.12.0
9
- requests==2.31.0
10
- torchsde==0.2.6
11
- torch>=2.0.0
12
- torchvision
13
- torchaudio
14
- einops
15
- opencv-contrib-python
16
- safetensors
17
- huggingface_hub
18
- spaces
 
 
 
 
 
 
1
+ accelerate==1.7.0
2
+ diffusers==0.33.1
3
+ transformers==4.52.4
4
+ sentencepiece==0.2.0
5
+ pillow==11.2.1
6
+ av==12.1.0
7
+ numpy==1.26.2
8
+ scipy==1.12.0
9
+ requests==2.32.4
10
+ torchsde==0.2.6
11
+ torch>=2.0.0
12
+ torchvision
13
+ torchaudio
14
+ einops
15
+ opencv-contrib-python
16
+ safetensors
17
+ huggingface_hub
18
+ decord
19
+ imageio_ffmpeg==0.6.0
20
+ sageattention==1.0.6
21
+ xformers==0.0.29.post3
22
+ bitsandbytes==0.46.0
23
+ pillow-heif==0.22.0