danieldk HF Staff commited on
Commit
2fafa6a
·
1 Parent(s): 0c3eb4e

Build (aarch64-linux)

Browse files
Files changed (21) hide show
  1. build/torch28-cxx11-cu126-aarch64-linux/activation/__init__.py +57 -0
  2. build/torch28-cxx11-cu126-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
  3. build/torch28-cxx11-cu126-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
  4. build/torch28-cxx11-cu126-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
  5. build/torch28-cxx11-cu126-aarch64-linux/activation/_activation_0c3eb4e_dirty.abi3.so +3 -0
  6. build/torch28-cxx11-cu126-aarch64-linux/activation/_ops.py +9 -0
  7. build/torch28-cxx11-cu126-aarch64-linux/activation/layers.py +128 -0
  8. build/torch28-cxx11-cu128-aarch64-linux/activation/__init__.py +57 -0
  9. build/torch28-cxx11-cu128-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
  10. build/torch28-cxx11-cu128-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
  11. build/torch28-cxx11-cu128-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
  12. build/torch28-cxx11-cu128-aarch64-linux/activation/_activation_0c3eb4e_dirty.abi3.so +3 -0
  13. build/torch28-cxx11-cu128-aarch64-linux/activation/_ops.py +9 -0
  14. build/torch28-cxx11-cu128-aarch64-linux/activation/layers.py +128 -0
  15. build/torch28-cxx11-cu129-aarch64-linux/activation/__init__.py +57 -0
  16. build/torch28-cxx11-cu129-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
  17. build/torch28-cxx11-cu129-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
  18. build/torch28-cxx11-cu129-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
  19. build/torch28-cxx11-cu129-aarch64-linux/activation/_activation_0c3eb4e_dirty.abi3.so +3 -0
  20. build/torch28-cxx11-cu129-aarch64-linux/activation/_ops.py +9 -0
  21. build/torch28-cxx11-cu129-aarch64-linux/activation/layers.py +128 -0
build/torch28-cxx11-cu126-aarch64-linux/activation/__init__.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from ._ops import ops
4
+
5
+ from . import layers
6
+
7
+
8
+ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
9
+ ops.silu_and_mul(out, x)
10
+ return out
11
+
12
+
13
+ def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
14
+ ops.mul_and_silu(out, x)
15
+ return out
16
+
17
+
18
+ def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
19
+ ops.gelu_and_mul(out, x)
20
+ return out
21
+
22
+
23
+ def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
24
+ ops.gelu_tanh_and_mul(out, x)
25
+ return out
26
+
27
+
28
+ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None:
29
+ ops.fatrelu_and_mul(out, x, threshold)
30
+ return out
31
+
32
+
33
+ def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
34
+ ops.gelu_fast(out, x)
35
+ return out
36
+
37
+
38
+ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
39
+ ops.gelu_new(out, x)
40
+ return out
41
+
42
+
43
+ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
44
+ ops.gelu_quick(out, x)
45
+ return out
46
+
47
+
48
+ __all__ = [
49
+ "silu_and_mul",
50
+ "gelu_and_mul",
51
+ "gelu_tanh_and_mul",
52
+ "fatrelu_and_mul",
53
+ "gelu_fast",
54
+ "gelu_new",
55
+ "gelu_quick",
56
+ "layers",
57
+ ]
build/torch28-cxx11-cu126-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (2.5 kB). View file
 
build/torch28-cxx11-cu126-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (539 Bytes). View file
 
build/torch28-cxx11-cu126-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc ADDED
Binary file (6.92 kB). View file
 
build/torch28-cxx11-cu126-aarch64-linux/activation/_activation_0c3eb4e_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02b62f5d045f370c3fb7c0e7ef458165feb987fba186b8cb9aee55c735a82e93
3
+ size 2699928
build/torch28-cxx11-cu126-aarch64-linux/activation/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _activation_0c3eb4e_dirty
3
+ ops = torch.ops._activation_0c3eb4e_dirty
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_activation_0c3eb4e_dirty::{op_name}"
build/torch28-cxx11-cu126-aarch64-linux/activation/layers.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ """An activation function for SwiGLU.
9
+
10
+ The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
11
+
12
+ Shapes:
13
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
14
+ return: (num_tokens, d) or (batch_size, seq_len, d)
15
+ """
16
+
17
+ can_torch_compile: bool = True
18
+
19
+ def forward(self, x: torch.Tensor):
20
+ d = x.shape[-1] // 2
21
+ output_shape = x.shape[:-1] + (d,)
22
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
23
+ ops.silu_and_mul(out, x)
24
+ return out
25
+
26
+
27
+ class MulAndSilu(nn.Module):
28
+ """An activation function for SwiGLU.
29
+
30
+ The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
31
+
32
+ Shapes:
33
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
34
+ return: (num_tokens, d) or (batch_size, seq_len, d)
35
+ """
36
+
37
+ can_torch_compile: bool = True
38
+
39
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.mul_and_silu(out, x)
44
+ return out
45
+
46
+
47
+ class GeluAndMul(nn.Module):
48
+ """An activation function for GeGLU.
49
+
50
+ The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
51
+
52
+ Shapes:
53
+ x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
54
+ return: (batch_size, seq_len, d) or (num_tokens, d)
55
+ """
56
+
57
+ can_torch_compile: bool = True
58
+
59
+ def forward(self, x: torch.Tensor):
60
+ d = x.shape[-1] // 2
61
+ output_shape = x.shape[:-1] + (d,)
62
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
63
+ ops.gelu_and_mul(out, x)
64
+ return out
65
+
66
+
67
+ class GeluTanhAndMul(nn.Module):
68
+ can_torch_compile: bool = True
69
+
70
+ def forward(self, x: torch.Tensor):
71
+ d = x.shape[-1] // 2
72
+ output_shape = x.shape[:-1] + (d,)
73
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
74
+ ops.gelu_tanh_and_mul(out, x)
75
+ return out
76
+
77
+
78
+ class FatreluAndMul(nn.Module):
79
+ """An activation function for FATReLU.
80
+
81
+ The function computes x -> FATReLU(x[:d]) * x[d:] where
82
+ d = x.shape[-1] // 2.
83
+ This is used in openbmb/MiniCPM-S-1B-sft.
84
+
85
+ Shapes:
86
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
87
+ return: (num_tokens, d) or (batch_size, seq_len, d)
88
+ """
89
+
90
+ can_torch_compile: bool = True
91
+
92
+ def __init__(self, threshold: float = 0.0):
93
+ super().__init__()
94
+ self.threshold = threshold
95
+
96
+ def forward(self, x: torch.Tensor):
97
+ d = x.shape[-1] // 2
98
+ output_shape = x.shape[:-1] + (d,)
99
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
100
+ ops.fatrelu_and_mul(out, x, self.threshold)
101
+ return out
102
+
103
+
104
+ class FastGELU(nn.Module):
105
+ can_torch_compile: bool = True
106
+
107
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
108
+ out = torch.empty_like(x)
109
+ ops.gelu_fast(out, x)
110
+ return out
111
+
112
+
113
+ class NewGELU(nn.Module):
114
+ can_torch_compile: bool = True
115
+
116
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
117
+ out = torch.empty_like(x)
118
+ ops.gelu_new(out, x)
119
+ return out
120
+
121
+
122
+ class QuickGELU(nn.Module):
123
+ can_torch_compile: bool = True
124
+
125
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
126
+ out = torch.empty_like(x)
127
+ ops.gelu_quick(out, x)
128
+ return out
build/torch28-cxx11-cu128-aarch64-linux/activation/__init__.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from ._ops import ops
4
+
5
+ from . import layers
6
+
7
+
8
+ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
9
+ ops.silu_and_mul(out, x)
10
+ return out
11
+
12
+
13
+ def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
14
+ ops.mul_and_silu(out, x)
15
+ return out
16
+
17
+
18
+ def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
19
+ ops.gelu_and_mul(out, x)
20
+ return out
21
+
22
+
23
+ def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
24
+ ops.gelu_tanh_and_mul(out, x)
25
+ return out
26
+
27
+
28
+ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None:
29
+ ops.fatrelu_and_mul(out, x, threshold)
30
+ return out
31
+
32
+
33
+ def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
34
+ ops.gelu_fast(out, x)
35
+ return out
36
+
37
+
38
+ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
39
+ ops.gelu_new(out, x)
40
+ return out
41
+
42
+
43
+ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
44
+ ops.gelu_quick(out, x)
45
+ return out
46
+
47
+
48
+ __all__ = [
49
+ "silu_and_mul",
50
+ "gelu_and_mul",
51
+ "gelu_tanh_and_mul",
52
+ "fatrelu_and_mul",
53
+ "gelu_fast",
54
+ "gelu_new",
55
+ "gelu_quick",
56
+ "layers",
57
+ ]
build/torch28-cxx11-cu128-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (2.5 kB). View file
 
build/torch28-cxx11-cu128-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (539 Bytes). View file
 
build/torch28-cxx11-cu128-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc ADDED
Binary file (6.92 kB). View file
 
build/torch28-cxx11-cu128-aarch64-linux/activation/_activation_0c3eb4e_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f7fe0a00eaf2e228f237ee3058ac9eb2c6fbc4927b1276d0f566bb05bb043b9
3
+ size 3683080
build/torch28-cxx11-cu128-aarch64-linux/activation/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _activation_0c3eb4e_dirty
3
+ ops = torch.ops._activation_0c3eb4e_dirty
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_activation_0c3eb4e_dirty::{op_name}"
build/torch28-cxx11-cu128-aarch64-linux/activation/layers.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ """An activation function for SwiGLU.
9
+
10
+ The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
11
+
12
+ Shapes:
13
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
14
+ return: (num_tokens, d) or (batch_size, seq_len, d)
15
+ """
16
+
17
+ can_torch_compile: bool = True
18
+
19
+ def forward(self, x: torch.Tensor):
20
+ d = x.shape[-1] // 2
21
+ output_shape = x.shape[:-1] + (d,)
22
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
23
+ ops.silu_and_mul(out, x)
24
+ return out
25
+
26
+
27
+ class MulAndSilu(nn.Module):
28
+ """An activation function for SwiGLU.
29
+
30
+ The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
31
+
32
+ Shapes:
33
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
34
+ return: (num_tokens, d) or (batch_size, seq_len, d)
35
+ """
36
+
37
+ can_torch_compile: bool = True
38
+
39
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.mul_and_silu(out, x)
44
+ return out
45
+
46
+
47
+ class GeluAndMul(nn.Module):
48
+ """An activation function for GeGLU.
49
+
50
+ The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
51
+
52
+ Shapes:
53
+ x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
54
+ return: (batch_size, seq_len, d) or (num_tokens, d)
55
+ """
56
+
57
+ can_torch_compile: bool = True
58
+
59
+ def forward(self, x: torch.Tensor):
60
+ d = x.shape[-1] // 2
61
+ output_shape = x.shape[:-1] + (d,)
62
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
63
+ ops.gelu_and_mul(out, x)
64
+ return out
65
+
66
+
67
+ class GeluTanhAndMul(nn.Module):
68
+ can_torch_compile: bool = True
69
+
70
+ def forward(self, x: torch.Tensor):
71
+ d = x.shape[-1] // 2
72
+ output_shape = x.shape[:-1] + (d,)
73
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
74
+ ops.gelu_tanh_and_mul(out, x)
75
+ return out
76
+
77
+
78
+ class FatreluAndMul(nn.Module):
79
+ """An activation function for FATReLU.
80
+
81
+ The function computes x -> FATReLU(x[:d]) * x[d:] where
82
+ d = x.shape[-1] // 2.
83
+ This is used in openbmb/MiniCPM-S-1B-sft.
84
+
85
+ Shapes:
86
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
87
+ return: (num_tokens, d) or (batch_size, seq_len, d)
88
+ """
89
+
90
+ can_torch_compile: bool = True
91
+
92
+ def __init__(self, threshold: float = 0.0):
93
+ super().__init__()
94
+ self.threshold = threshold
95
+
96
+ def forward(self, x: torch.Tensor):
97
+ d = x.shape[-1] // 2
98
+ output_shape = x.shape[:-1] + (d,)
99
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
100
+ ops.fatrelu_and_mul(out, x, self.threshold)
101
+ return out
102
+
103
+
104
+ class FastGELU(nn.Module):
105
+ can_torch_compile: bool = True
106
+
107
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
108
+ out = torch.empty_like(x)
109
+ ops.gelu_fast(out, x)
110
+ return out
111
+
112
+
113
+ class NewGELU(nn.Module):
114
+ can_torch_compile: bool = True
115
+
116
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
117
+ out = torch.empty_like(x)
118
+ ops.gelu_new(out, x)
119
+ return out
120
+
121
+
122
+ class QuickGELU(nn.Module):
123
+ can_torch_compile: bool = True
124
+
125
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
126
+ out = torch.empty_like(x)
127
+ ops.gelu_quick(out, x)
128
+ return out
build/torch28-cxx11-cu129-aarch64-linux/activation/__init__.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from ._ops import ops
4
+
5
+ from . import layers
6
+
7
+
8
+ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
9
+ ops.silu_and_mul(out, x)
10
+ return out
11
+
12
+
13
+ def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
14
+ ops.mul_and_silu(out, x)
15
+ return out
16
+
17
+
18
+ def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
19
+ ops.gelu_and_mul(out, x)
20
+ return out
21
+
22
+
23
+ def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
24
+ ops.gelu_tanh_and_mul(out, x)
25
+ return out
26
+
27
+
28
+ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None:
29
+ ops.fatrelu_and_mul(out, x, threshold)
30
+ return out
31
+
32
+
33
+ def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
34
+ ops.gelu_fast(out, x)
35
+ return out
36
+
37
+
38
+ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
39
+ ops.gelu_new(out, x)
40
+ return out
41
+
42
+
43
+ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
44
+ ops.gelu_quick(out, x)
45
+ return out
46
+
47
+
48
+ __all__ = [
49
+ "silu_and_mul",
50
+ "gelu_and_mul",
51
+ "gelu_tanh_and_mul",
52
+ "fatrelu_and_mul",
53
+ "gelu_fast",
54
+ "gelu_new",
55
+ "gelu_quick",
56
+ "layers",
57
+ ]
build/torch28-cxx11-cu129-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (2.5 kB). View file
 
build/torch28-cxx11-cu129-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (539 Bytes). View file
 
build/torch28-cxx11-cu129-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc ADDED
Binary file (6.92 kB). View file
 
build/torch28-cxx11-cu129-aarch64-linux/activation/_activation_0c3eb4e_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9f6a161283a05672bb3ed442990cf8a5ce553625bb482cd31ce514e07cfcf0a
3
+ size 3684504
build/torch28-cxx11-cu129-aarch64-linux/activation/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _activation_0c3eb4e_dirty
3
+ ops = torch.ops._activation_0c3eb4e_dirty
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_activation_0c3eb4e_dirty::{op_name}"
build/torch28-cxx11-cu129-aarch64-linux/activation/layers.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ """An activation function for SwiGLU.
9
+
10
+ The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
11
+
12
+ Shapes:
13
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
14
+ return: (num_tokens, d) or (batch_size, seq_len, d)
15
+ """
16
+
17
+ can_torch_compile: bool = True
18
+
19
+ def forward(self, x: torch.Tensor):
20
+ d = x.shape[-1] // 2
21
+ output_shape = x.shape[:-1] + (d,)
22
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
23
+ ops.silu_and_mul(out, x)
24
+ return out
25
+
26
+
27
+ class MulAndSilu(nn.Module):
28
+ """An activation function for SwiGLU.
29
+
30
+ The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
31
+
32
+ Shapes:
33
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
34
+ return: (num_tokens, d) or (batch_size, seq_len, d)
35
+ """
36
+
37
+ can_torch_compile: bool = True
38
+
39
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.mul_and_silu(out, x)
44
+ return out
45
+
46
+
47
+ class GeluAndMul(nn.Module):
48
+ """An activation function for GeGLU.
49
+
50
+ The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
51
+
52
+ Shapes:
53
+ x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
54
+ return: (batch_size, seq_len, d) or (num_tokens, d)
55
+ """
56
+
57
+ can_torch_compile: bool = True
58
+
59
+ def forward(self, x: torch.Tensor):
60
+ d = x.shape[-1] // 2
61
+ output_shape = x.shape[:-1] + (d,)
62
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
63
+ ops.gelu_and_mul(out, x)
64
+ return out
65
+
66
+
67
+ class GeluTanhAndMul(nn.Module):
68
+ can_torch_compile: bool = True
69
+
70
+ def forward(self, x: torch.Tensor):
71
+ d = x.shape[-1] // 2
72
+ output_shape = x.shape[:-1] + (d,)
73
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
74
+ ops.gelu_tanh_and_mul(out, x)
75
+ return out
76
+
77
+
78
+ class FatreluAndMul(nn.Module):
79
+ """An activation function for FATReLU.
80
+
81
+ The function computes x -> FATReLU(x[:d]) * x[d:] where
82
+ d = x.shape[-1] // 2.
83
+ This is used in openbmb/MiniCPM-S-1B-sft.
84
+
85
+ Shapes:
86
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
87
+ return: (num_tokens, d) or (batch_size, seq_len, d)
88
+ """
89
+
90
+ can_torch_compile: bool = True
91
+
92
+ def __init__(self, threshold: float = 0.0):
93
+ super().__init__()
94
+ self.threshold = threshold
95
+
96
+ def forward(self, x: torch.Tensor):
97
+ d = x.shape[-1] // 2
98
+ output_shape = x.shape[:-1] + (d,)
99
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
100
+ ops.fatrelu_and_mul(out, x, self.threshold)
101
+ return out
102
+
103
+
104
+ class FastGELU(nn.Module):
105
+ can_torch_compile: bool = True
106
+
107
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
108
+ out = torch.empty_like(x)
109
+ ops.gelu_fast(out, x)
110
+ return out
111
+
112
+
113
+ class NewGELU(nn.Module):
114
+ can_torch_compile: bool = True
115
+
116
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
117
+ out = torch.empty_like(x)
118
+ ops.gelu_new(out, x)
119
+ return out
120
+
121
+
122
+ class QuickGELU(nn.Module):
123
+ can_torch_compile: bool = True
124
+
125
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
126
+ out = torch.empty_like(x)
127
+ ops.gelu_quick(out, x)
128
+ return out