Build (x86_64-linux)
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py +5 -0
- build/torch26-cxx11-cu118-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} +2 -2
- build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py +3 -3
- build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py +49 -0
- build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py +5 -0
- build/torch26-cxx11-cu124-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} +2 -2
- build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py +3 -3
- build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py +49 -0
- build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py +5 -0
- build/torch26-cxx11-cu126-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} +2 -2
- build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py +3 -3
- build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py +49 -0
- build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py +5 -0
- build/torch26-cxx98-cu118-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} +2 -2
- build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py +3 -3
- build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py +49 -0
- build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py +5 -0
- build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so +3 -0
- build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +0 -3
- build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py +3 -3
- build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py +5 -3
- build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py +5 -0
- build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so +3 -0
- build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +0 -3
- build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py +3 -3
- build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py +49 -0
- build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py +5 -0
- build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
- build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
- build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
- build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +3 -0
- build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +0 -3
- build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py +3 -3
- build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py +49 -0
- build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py +5 -0
- build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
- build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
- build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
- build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +3 -0
- build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +0 -3
- build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py +3 -3
- build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py +49 -0
- build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py +5 -0
- build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
- build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
- build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
- build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +3 -0
- build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +0 -3
- build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py +3 -3
- build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py +49 -0
build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py
CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
|
10 |
return out
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
13 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
ops.gelu_and_mul(out, x)
|
15 |
return out
|
|
|
10 |
return out
|
11 |
|
12 |
|
13 |
+
def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
+
ops.mul_and_silu(out, x)
|
15 |
+
return out
|
16 |
+
|
17 |
+
|
18 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
19 |
ops.gelu_and_mul(out, x)
|
20 |
return out
|
build/torch26-cxx11-cu118-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b6ba32ecc6fc898df3b0cebee85e9afc6881749fe58142280f051ca3332d913
|
3 |
+
size 2546864
|
build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _activation_be5bedb
|
3 |
+
ops = torch.ops._activation_be5bedb
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_activation_be5bedb::{op_name}"
|
build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py
CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
can_torch_compile: bool = True
|
9 |
|
10 |
def forward(self, x: torch.Tensor):
|
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
|
|
15 |
return out
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
class GeluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
can_torch_compile: bool = True
|
20 |
|
21 |
def forward(self, x: torch.Tensor):
|
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
|
|
38 |
|
39 |
|
40 |
class FatreluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
can_torch_compile: bool = True
|
42 |
|
43 |
def __init__(self, threshold: float = 0.0):
|
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
8 |
+
"""An activation function for SwiGLU.
|
9 |
+
|
10 |
+
The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
11 |
+
|
12 |
+
Shapes:
|
13 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
14 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
15 |
+
"""
|
16 |
+
|
17 |
can_torch_compile: bool = True
|
18 |
|
19 |
def forward(self, x: torch.Tensor):
|
|
|
24 |
return out
|
25 |
|
26 |
|
27 |
+
class MulAndSilu(nn.Module):
|
28 |
+
"""An activation function for SwiGLU.
|
29 |
+
|
30 |
+
The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
|
31 |
+
|
32 |
+
Shapes:
|
33 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
34 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
35 |
+
"""
|
36 |
+
|
37 |
+
can_torch_compile: bool = True
|
38 |
+
|
39 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
40 |
+
d = x.shape[-1] // 2
|
41 |
+
output_shape = x.shape[:-1] + (d,)
|
42 |
+
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
43 |
+
ops.mul_and_silu(out, x)
|
44 |
+
return out
|
45 |
+
|
46 |
+
|
47 |
class GeluAndMul(nn.Module):
|
48 |
+
"""An activation function for GeGLU.
|
49 |
+
|
50 |
+
The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
51 |
+
|
52 |
+
Shapes:
|
53 |
+
x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
|
54 |
+
return: (batch_size, seq_len, d) or (num_tokens, d)
|
55 |
+
"""
|
56 |
+
|
57 |
can_torch_compile: bool = True
|
58 |
|
59 |
def forward(self, x: torch.Tensor):
|
|
|
76 |
|
77 |
|
78 |
class FatreluAndMul(nn.Module):
|
79 |
+
"""An activation function for FATReLU.
|
80 |
+
|
81 |
+
The function computes x -> FATReLU(x[:d]) * x[d:] where
|
82 |
+
d = x.shape[-1] // 2.
|
83 |
+
This is used in openbmb/MiniCPM-S-1B-sft.
|
84 |
+
|
85 |
+
Shapes:
|
86 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
87 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
88 |
+
"""
|
89 |
+
|
90 |
can_torch_compile: bool = True
|
91 |
|
92 |
def __init__(self, threshold: float = 0.0):
|
build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py
CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
|
10 |
return out
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
13 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
ops.gelu_and_mul(out, x)
|
15 |
return out
|
|
|
10 |
return out
|
11 |
|
12 |
|
13 |
+
def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
+
ops.mul_and_silu(out, x)
|
15 |
+
return out
|
16 |
+
|
17 |
+
|
18 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
19 |
ops.gelu_and_mul(out, x)
|
20 |
return out
|
build/torch26-cxx11-cu124-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:331dcb3900d5e47a11d3577cdbac54f15a0b6e14910239293323c1d9e4eb9f49
|
3 |
+
size 2616928
|
build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _activation_be5bedb
|
3 |
+
ops = torch.ops._activation_be5bedb
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_activation_be5bedb::{op_name}"
|
build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py
CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
can_torch_compile: bool = True
|
9 |
|
10 |
def forward(self, x: torch.Tensor):
|
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
|
|
15 |
return out
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
class GeluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
can_torch_compile: bool = True
|
20 |
|
21 |
def forward(self, x: torch.Tensor):
|
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
|
|
38 |
|
39 |
|
40 |
class FatreluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
can_torch_compile: bool = True
|
42 |
|
43 |
def __init__(self, threshold: float = 0.0):
|
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
8 |
+
"""An activation function for SwiGLU.
|
9 |
+
|
10 |
+
The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
11 |
+
|
12 |
+
Shapes:
|
13 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
14 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
15 |
+
"""
|
16 |
+
|
17 |
can_torch_compile: bool = True
|
18 |
|
19 |
def forward(self, x: torch.Tensor):
|
|
|
24 |
return out
|
25 |
|
26 |
|
27 |
+
class MulAndSilu(nn.Module):
|
28 |
+
"""An activation function for SwiGLU.
|
29 |
+
|
30 |
+
The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
|
31 |
+
|
32 |
+
Shapes:
|
33 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
34 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
35 |
+
"""
|
36 |
+
|
37 |
+
can_torch_compile: bool = True
|
38 |
+
|
39 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
40 |
+
d = x.shape[-1] // 2
|
41 |
+
output_shape = x.shape[:-1] + (d,)
|
42 |
+
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
43 |
+
ops.mul_and_silu(out, x)
|
44 |
+
return out
|
45 |
+
|
46 |
+
|
47 |
class GeluAndMul(nn.Module):
|
48 |
+
"""An activation function for GeGLU.
|
49 |
+
|
50 |
+
The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
51 |
+
|
52 |
+
Shapes:
|
53 |
+
x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
|
54 |
+
return: (batch_size, seq_len, d) or (num_tokens, d)
|
55 |
+
"""
|
56 |
+
|
57 |
can_torch_compile: bool = True
|
58 |
|
59 |
def forward(self, x: torch.Tensor):
|
|
|
76 |
|
77 |
|
78 |
class FatreluAndMul(nn.Module):
|
79 |
+
"""An activation function for FATReLU.
|
80 |
+
|
81 |
+
The function computes x -> FATReLU(x[:d]) * x[d:] where
|
82 |
+
d = x.shape[-1] // 2.
|
83 |
+
This is used in openbmb/MiniCPM-S-1B-sft.
|
84 |
+
|
85 |
+
Shapes:
|
86 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
87 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
88 |
+
"""
|
89 |
+
|
90 |
can_torch_compile: bool = True
|
91 |
|
92 |
def __init__(self, threshold: float = 0.0):
|
build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py
CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
|
10 |
return out
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
13 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
ops.gelu_and_mul(out, x)
|
15 |
return out
|
|
|
10 |
return out
|
11 |
|
12 |
|
13 |
+
def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
+
ops.mul_and_silu(out, x)
|
15 |
+
return out
|
16 |
+
|
17 |
+
|
18 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
19 |
ops.gelu_and_mul(out, x)
|
20 |
return out
|
build/torch26-cxx11-cu126-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ce11492b9675a44afb3b896ed80e425f2a47e29481c4aad9c4a6ac59520f011
|
3 |
+
size 2621472
|
build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _activation_be5bedb
|
3 |
+
ops = torch.ops._activation_be5bedb
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_activation_be5bedb::{op_name}"
|
build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py
CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
can_torch_compile: bool = True
|
9 |
|
10 |
def forward(self, x: torch.Tensor):
|
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
|
|
15 |
return out
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
class GeluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
can_torch_compile: bool = True
|
20 |
|
21 |
def forward(self, x: torch.Tensor):
|
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
|
|
38 |
|
39 |
|
40 |
class FatreluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
can_torch_compile: bool = True
|
42 |
|
43 |
def __init__(self, threshold: float = 0.0):
|
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
8 |
+
"""An activation function for SwiGLU.
|
9 |
+
|
10 |
+
The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
11 |
+
|
12 |
+
Shapes:
|
13 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
14 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
15 |
+
"""
|
16 |
+
|
17 |
can_torch_compile: bool = True
|
18 |
|
19 |
def forward(self, x: torch.Tensor):
|
|
|
24 |
return out
|
25 |
|
26 |
|
27 |
+
class MulAndSilu(nn.Module):
|
28 |
+
"""An activation function for SwiGLU.
|
29 |
+
|
30 |
+
The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
|
31 |
+
|
32 |
+
Shapes:
|
33 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
34 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
35 |
+
"""
|
36 |
+
|
37 |
+
can_torch_compile: bool = True
|
38 |
+
|
39 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
40 |
+
d = x.shape[-1] // 2
|
41 |
+
output_shape = x.shape[:-1] + (d,)
|
42 |
+
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
43 |
+
ops.mul_and_silu(out, x)
|
44 |
+
return out
|
45 |
+
|
46 |
+
|
47 |
class GeluAndMul(nn.Module):
|
48 |
+
"""An activation function for GeGLU.
|
49 |
+
|
50 |
+
The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
51 |
+
|
52 |
+
Shapes:
|
53 |
+
x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
|
54 |
+
return: (batch_size, seq_len, d) or (num_tokens, d)
|
55 |
+
"""
|
56 |
+
|
57 |
can_torch_compile: bool = True
|
58 |
|
59 |
def forward(self, x: torch.Tensor):
|
|
|
76 |
|
77 |
|
78 |
class FatreluAndMul(nn.Module):
|
79 |
+
"""An activation function for FATReLU.
|
80 |
+
|
81 |
+
The function computes x -> FATReLU(x[:d]) * x[d:] where
|
82 |
+
d = x.shape[-1] // 2.
|
83 |
+
This is used in openbmb/MiniCPM-S-1B-sft.
|
84 |
+
|
85 |
+
Shapes:
|
86 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
87 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
88 |
+
"""
|
89 |
+
|
90 |
can_torch_compile: bool = True
|
91 |
|
92 |
def __init__(self, threshold: float = 0.0):
|
build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py
CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
|
10 |
return out
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
13 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
ops.gelu_and_mul(out, x)
|
15 |
return out
|
|
|
10 |
return out
|
11 |
|
12 |
|
13 |
+
def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
+
ops.mul_and_silu(out, x)
|
15 |
+
return out
|
16 |
+
|
17 |
+
|
18 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
19 |
ops.gelu_and_mul(out, x)
|
20 |
return out
|
build/torch26-cxx98-cu118-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:434bd1ae43b7cbdb10d86b82da9a237ec05ef9d9fb4fc15cdc9096d3d5ed3fa7
|
3 |
+
size 2539352
|
build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _activation_be5bedb
|
3 |
+
ops = torch.ops._activation_be5bedb
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_activation_be5bedb::{op_name}"
|
build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py
CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
can_torch_compile: bool = True
|
9 |
|
10 |
def forward(self, x: torch.Tensor):
|
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
|
|
15 |
return out
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
class GeluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
can_torch_compile: bool = True
|
20 |
|
21 |
def forward(self, x: torch.Tensor):
|
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
|
|
38 |
|
39 |
|
40 |
class FatreluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
can_torch_compile: bool = True
|
42 |
|
43 |
def __init__(self, threshold: float = 0.0):
|
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
8 |
+
"""An activation function for SwiGLU.
|
9 |
+
|
10 |
+
The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
11 |
+
|
12 |
+
Shapes:
|
13 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
14 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
15 |
+
"""
|
16 |
+
|
17 |
can_torch_compile: bool = True
|
18 |
|
19 |
def forward(self, x: torch.Tensor):
|
|
|
24 |
return out
|
25 |
|
26 |
|
27 |
+
class MulAndSilu(nn.Module):
|
28 |
+
"""An activation function for SwiGLU.
|
29 |
+
|
30 |
+
The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
|
31 |
+
|
32 |
+
Shapes:
|
33 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
34 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
35 |
+
"""
|
36 |
+
|
37 |
+
can_torch_compile: bool = True
|
38 |
+
|
39 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
40 |
+
d = x.shape[-1] // 2
|
41 |
+
output_shape = x.shape[:-1] + (d,)
|
42 |
+
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
43 |
+
ops.mul_and_silu(out, x)
|
44 |
+
return out
|
45 |
+
|
46 |
+
|
47 |
class GeluAndMul(nn.Module):
|
48 |
+
"""An activation function for GeGLU.
|
49 |
+
|
50 |
+
The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
51 |
+
|
52 |
+
Shapes:
|
53 |
+
x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
|
54 |
+
return: (batch_size, seq_len, d) or (num_tokens, d)
|
55 |
+
"""
|
56 |
+
|
57 |
can_torch_compile: bool = True
|
58 |
|
59 |
def forward(self, x: torch.Tensor):
|
|
|
76 |
|
77 |
|
78 |
class FatreluAndMul(nn.Module):
|
79 |
+
"""An activation function for FATReLU.
|
80 |
+
|
81 |
+
The function computes x -> FATReLU(x[:d]) * x[d:] where
|
82 |
+
d = x.shape[-1] // 2.
|
83 |
+
This is used in openbmb/MiniCPM-S-1B-sft.
|
84 |
+
|
85 |
+
Shapes:
|
86 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
87 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
88 |
+
"""
|
89 |
+
|
90 |
can_torch_compile: bool = True
|
91 |
|
92 |
def __init__(self, threshold: float = 0.0):
|
build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py
CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
|
10 |
return out
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
13 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
ops.gelu_and_mul(out, x)
|
15 |
return out
|
|
|
10 |
return out
|
11 |
|
12 |
|
13 |
+
def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
+
ops.mul_and_silu(out, x)
|
15 |
+
return out
|
16 |
+
|
17 |
+
|
18 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
19 |
ops.gelu_and_mul(out, x)
|
20 |
return out
|
build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53ddfb42466bfe01feb98348f5c2d6beefd589aeb3dec4c5c36609e11a6bde4c
|
3 |
+
size 2605136
|
build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:e364773259dc1b91f3c0d3b076da83c5a9c6ee18ffdace30315c602dffd1dabe
|
3 |
-
size 2502264
|
|
|
|
|
|
|
|
build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _activation_be5bedb
|
3 |
+
ops = torch.ops._activation_be5bedb
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_activation_be5bedb::{op_name}"
|
build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py
CHANGED
@@ -23,7 +23,8 @@ class SiluAndMul(nn.Module):
|
|
23 |
ops.silu_and_mul(out, x)
|
24 |
return out
|
25 |
|
26 |
-
|
|
|
27 |
"""An activation function for SwiGLU.
|
28 |
|
29 |
The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
|
@@ -37,11 +38,12 @@ class MulAndSilu(CustomOp):
|
|
37 |
|
38 |
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
39 |
d = x.shape[-1] // 2
|
40 |
-
output_shape =
|
41 |
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
42 |
-
|
43 |
return out
|
44 |
|
|
|
45 |
class GeluAndMul(nn.Module):
|
46 |
"""An activation function for GeGLU.
|
47 |
|
|
|
23 |
ops.silu_and_mul(out, x)
|
24 |
return out
|
25 |
|
26 |
+
|
27 |
+
class MulAndSilu(nn.Module):
|
28 |
"""An activation function for SwiGLU.
|
29 |
|
30 |
The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
|
|
|
38 |
|
39 |
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
40 |
d = x.shape[-1] // 2
|
41 |
+
output_shape = x.shape[:-1] + (d,)
|
42 |
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
43 |
+
ops.mul_and_silu(out, x)
|
44 |
return out
|
45 |
|
46 |
+
|
47 |
class GeluAndMul(nn.Module):
|
48 |
"""An activation function for GeGLU.
|
49 |
|
build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py
CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
|
10 |
return out
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
13 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
ops.gelu_and_mul(out, x)
|
15 |
return out
|
|
|
10 |
return out
|
11 |
|
12 |
|
13 |
+
def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
+
ops.mul_and_silu(out, x)
|
15 |
+
return out
|
16 |
+
|
17 |
+
|
18 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
19 |
ops.gelu_and_mul(out, x)
|
20 |
return out
|
build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac7174352dea307231f308c84ca32ee001cdbcefd976de860e76501c52aae591
|
3 |
+
size 2613776
|
build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:7ac88cc0d3c65ab283d20608f3a097be29ee572e7856f10f8d7919536efd95b4
|
3 |
-
size 2506808
|
|
|
|
|
|
|
|
build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _activation_be5bedb
|
3 |
+
ops = torch.ops._activation_be5bedb
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_activation_be5bedb::{op_name}"
|
build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py
CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
can_torch_compile: bool = True
|
9 |
|
10 |
def forward(self, x: torch.Tensor):
|
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
|
|
15 |
return out
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
class GeluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
can_torch_compile: bool = True
|
20 |
|
21 |
def forward(self, x: torch.Tensor):
|
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
|
|
38 |
|
39 |
|
40 |
class FatreluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
can_torch_compile: bool = True
|
42 |
|
43 |
def __init__(self, threshold: float = 0.0):
|
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
8 |
+
"""An activation function for SwiGLU.
|
9 |
+
|
10 |
+
The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
11 |
+
|
12 |
+
Shapes:
|
13 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
14 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
15 |
+
"""
|
16 |
+
|
17 |
can_torch_compile: bool = True
|
18 |
|
19 |
def forward(self, x: torch.Tensor):
|
|
|
24 |
return out
|
25 |
|
26 |
|
27 |
+
class MulAndSilu(nn.Module):
|
28 |
+
"""An activation function for SwiGLU.
|
29 |
+
|
30 |
+
The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
|
31 |
+
|
32 |
+
Shapes:
|
33 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
34 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
35 |
+
"""
|
36 |
+
|
37 |
+
can_torch_compile: bool = True
|
38 |
+
|
39 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
40 |
+
d = x.shape[-1] // 2
|
41 |
+
output_shape = x.shape[:-1] + (d,)
|
42 |
+
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
43 |
+
ops.mul_and_silu(out, x)
|
44 |
+
return out
|
45 |
+
|
46 |
+
|
47 |
class GeluAndMul(nn.Module):
|
48 |
+
"""An activation function for GeGLU.
|
49 |
+
|
50 |
+
The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
51 |
+
|
52 |
+
Shapes:
|
53 |
+
x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
|
54 |
+
return: (batch_size, seq_len, d) or (num_tokens, d)
|
55 |
+
"""
|
56 |
+
|
57 |
can_torch_compile: bool = True
|
58 |
|
59 |
def forward(self, x: torch.Tensor):
|
|
|
76 |
|
77 |
|
78 |
class FatreluAndMul(nn.Module):
|
79 |
+
"""An activation function for FATReLU.
|
80 |
+
|
81 |
+
The function computes x -> FATReLU(x[:d]) * x[d:] where
|
82 |
+
d = x.shape[-1] // 2.
|
83 |
+
This is used in openbmb/MiniCPM-S-1B-sft.
|
84 |
+
|
85 |
+
Shapes:
|
86 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
87 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
88 |
+
"""
|
89 |
+
|
90 |
can_torch_compile: bool = True
|
91 |
|
92 |
def __init__(self, threshold: float = 0.0):
|
build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py
CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
|
10 |
return out
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
13 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
ops.gelu_and_mul(out, x)
|
15 |
return out
|
|
|
10 |
return out
|
11 |
|
12 |
|
13 |
+
def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
+
ops.mul_and_silu(out, x)
|
15 |
+
return out
|
16 |
+
|
17 |
+
|
18 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
19 |
ops.gelu_and_mul(out, x)
|
20 |
return out
|
build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc
ADDED
Binary file (2.5 kB). View file
|
|
build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc
ADDED
Binary file (538 Bytes). View file
|
|
build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc
ADDED
Binary file (6.92 kB). View file
|
|
build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aee7c6869a9e318ad81cb84460c58ca0dac2dc85f4ed739b12fe57641f766332
|
3 |
+
size 2546984
|
build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:e4f9e647eea40d3d3801d5ee57d4917e4c2e8dbfd87cdfebdc40b1b0a1c571fe
|
3 |
-
size 2448184
|
|
|
|
|
|
|
|
build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _activation_be5bedb_dirty
|
3 |
+
ops = torch.ops._activation_be5bedb_dirty
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_activation_be5bedb_dirty::{op_name}"
|
build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py
CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
can_torch_compile: bool = True
|
9 |
|
10 |
def forward(self, x: torch.Tensor):
|
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
|
|
15 |
return out
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
class GeluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
can_torch_compile: bool = True
|
20 |
|
21 |
def forward(self, x: torch.Tensor):
|
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
|
|
38 |
|
39 |
|
40 |
class FatreluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
can_torch_compile: bool = True
|
42 |
|
43 |
def __init__(self, threshold: float = 0.0):
|
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
8 |
+
"""An activation function for SwiGLU.
|
9 |
+
|
10 |
+
The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
11 |
+
|
12 |
+
Shapes:
|
13 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
14 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
15 |
+
"""
|
16 |
+
|
17 |
can_torch_compile: bool = True
|
18 |
|
19 |
def forward(self, x: torch.Tensor):
|
|
|
24 |
return out
|
25 |
|
26 |
|
27 |
+
class MulAndSilu(nn.Module):
|
28 |
+
"""An activation function for SwiGLU.
|
29 |
+
|
30 |
+
The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
|
31 |
+
|
32 |
+
Shapes:
|
33 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
34 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
35 |
+
"""
|
36 |
+
|
37 |
+
can_torch_compile: bool = True
|
38 |
+
|
39 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
40 |
+
d = x.shape[-1] // 2
|
41 |
+
output_shape = x.shape[:-1] + (d,)
|
42 |
+
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
43 |
+
ops.mul_and_silu(out, x)
|
44 |
+
return out
|
45 |
+
|
46 |
+
|
47 |
class GeluAndMul(nn.Module):
|
48 |
+
"""An activation function for GeGLU.
|
49 |
+
|
50 |
+
The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
51 |
+
|
52 |
+
Shapes:
|
53 |
+
x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
|
54 |
+
return: (batch_size, seq_len, d) or (num_tokens, d)
|
55 |
+
"""
|
56 |
+
|
57 |
can_torch_compile: bool = True
|
58 |
|
59 |
def forward(self, x: torch.Tensor):
|
|
|
76 |
|
77 |
|
78 |
class FatreluAndMul(nn.Module):
|
79 |
+
"""An activation function for FATReLU.
|
80 |
+
|
81 |
+
The function computes x -> FATReLU(x[:d]) * x[d:] where
|
82 |
+
d = x.shape[-1] // 2.
|
83 |
+
This is used in openbmb/MiniCPM-S-1B-sft.
|
84 |
+
|
85 |
+
Shapes:
|
86 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
87 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
88 |
+
"""
|
89 |
+
|
90 |
can_torch_compile: bool = True
|
91 |
|
92 |
def __init__(self, threshold: float = 0.0):
|
build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py
CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
|
10 |
return out
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
13 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
ops.gelu_and_mul(out, x)
|
15 |
return out
|
|
|
10 |
return out
|
11 |
|
12 |
|
13 |
+
def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
+
ops.mul_and_silu(out, x)
|
15 |
+
return out
|
16 |
+
|
17 |
+
|
18 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
19 |
ops.gelu_and_mul(out, x)
|
20 |
return out
|
build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc
ADDED
Binary file (2.5 kB). View file
|
|
build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc
ADDED
Binary file (538 Bytes). View file
|
|
build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc
ADDED
Binary file (6.92 kB). View file
|
|
build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f075a6e0d47a2d382d16291b1c5d7d1d98111e2bbc5891b14b627e3c1778b699
|
3 |
+
size 2621536
|
build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a2b72ff2a0f2253e4dfe028842b5f15cabf2647d7812bf4662a2de510ca0c489
|
3 |
-
size 2518632
|
|
|
|
|
|
|
|
build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _activation_be5bedb_dirty
|
3 |
+
ops = torch.ops._activation_be5bedb_dirty
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_activation_be5bedb_dirty::{op_name}"
|
build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py
CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
can_torch_compile: bool = True
|
9 |
|
10 |
def forward(self, x: torch.Tensor):
|
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
|
|
15 |
return out
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
class GeluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
can_torch_compile: bool = True
|
20 |
|
21 |
def forward(self, x: torch.Tensor):
|
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
|
|
38 |
|
39 |
|
40 |
class FatreluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
can_torch_compile: bool = True
|
42 |
|
43 |
def __init__(self, threshold: float = 0.0):
|
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
8 |
+
"""An activation function for SwiGLU.
|
9 |
+
|
10 |
+
The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
11 |
+
|
12 |
+
Shapes:
|
13 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
14 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
15 |
+
"""
|
16 |
+
|
17 |
can_torch_compile: bool = True
|
18 |
|
19 |
def forward(self, x: torch.Tensor):
|
|
|
24 |
return out
|
25 |
|
26 |
|
27 |
+
class MulAndSilu(nn.Module):
|
28 |
+
"""An activation function for SwiGLU.
|
29 |
+
|
30 |
+
The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
|
31 |
+
|
32 |
+
Shapes:
|
33 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
34 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
35 |
+
"""
|
36 |
+
|
37 |
+
can_torch_compile: bool = True
|
38 |
+
|
39 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
40 |
+
d = x.shape[-1] // 2
|
41 |
+
output_shape = x.shape[:-1] + (d,)
|
42 |
+
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
43 |
+
ops.mul_and_silu(out, x)
|
44 |
+
return out
|
45 |
+
|
46 |
+
|
47 |
class GeluAndMul(nn.Module):
|
48 |
+
"""An activation function for GeGLU.
|
49 |
+
|
50 |
+
The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
51 |
+
|
52 |
+
Shapes:
|
53 |
+
x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
|
54 |
+
return: (batch_size, seq_len, d) or (num_tokens, d)
|
55 |
+
"""
|
56 |
+
|
57 |
can_torch_compile: bool = True
|
58 |
|
59 |
def forward(self, x: torch.Tensor):
|
|
|
76 |
|
77 |
|
78 |
class FatreluAndMul(nn.Module):
|
79 |
+
"""An activation function for FATReLU.
|
80 |
+
|
81 |
+
The function computes x -> FATReLU(x[:d]) * x[d:] where
|
82 |
+
d = x.shape[-1] // 2.
|
83 |
+
This is used in openbmb/MiniCPM-S-1B-sft.
|
84 |
+
|
85 |
+
Shapes:
|
86 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
87 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
88 |
+
"""
|
89 |
+
|
90 |
can_torch_compile: bool = True
|
91 |
|
92 |
def __init__(self, threshold: float = 0.0):
|
build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py
CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
|
10 |
return out
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
13 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
ops.gelu_and_mul(out, x)
|
15 |
return out
|
|
|
10 |
return out
|
11 |
|
12 |
|
13 |
+
def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
|
14 |
+
ops.mul_and_silu(out, x)
|
15 |
+
return out
|
16 |
+
|
17 |
+
|
18 |
def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
|
19 |
ops.gelu_and_mul(out, x)
|
20 |
return out
|
build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc
ADDED
Binary file (2.5 kB). View file
|
|
build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc
ADDED
Binary file (538 Bytes). View file
|
|
build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc
ADDED
Binary file (6.92 kB). View file
|
|
build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc2406aa2fa09dd7bc1fd5e87cdcdf55edfc7e0853fad5f977e2500e08fa8899
|
3 |
+
size 3565432
|
build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f4590c852899e4c11ddb74cfad61e26b07490a91f3c09e0fb0874a3fcc1f533e
|
3 |
-
size 3331456
|
|
|
|
|
|
|
|
build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _activation_be5bedb_dirty
|
3 |
+
ops = torch.ops._activation_be5bedb_dirty
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_activation_be5bedb_dirty::{op_name}"
|
build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py
CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
can_torch_compile: bool = True
|
9 |
|
10 |
def forward(self, x: torch.Tensor):
|
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
|
|
15 |
return out
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
class GeluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
can_torch_compile: bool = True
|
20 |
|
21 |
def forward(self, x: torch.Tensor):
|
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
|
|
38 |
|
39 |
|
40 |
class FatreluAndMul(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
can_torch_compile: bool = True
|
42 |
|
43 |
def __init__(self, threshold: float = 0.0):
|
|
|
5 |
|
6 |
|
7 |
class SiluAndMul(nn.Module):
|
8 |
+
"""An activation function for SwiGLU.
|
9 |
+
|
10 |
+
The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
11 |
+
|
12 |
+
Shapes:
|
13 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
14 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
15 |
+
"""
|
16 |
+
|
17 |
can_torch_compile: bool = True
|
18 |
|
19 |
def forward(self, x: torch.Tensor):
|
|
|
24 |
return out
|
25 |
|
26 |
|
27 |
+
class MulAndSilu(nn.Module):
|
28 |
+
"""An activation function for SwiGLU.
|
29 |
+
|
30 |
+
The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
|
31 |
+
|
32 |
+
Shapes:
|
33 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
34 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
35 |
+
"""
|
36 |
+
|
37 |
+
can_torch_compile: bool = True
|
38 |
+
|
39 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
40 |
+
d = x.shape[-1] // 2
|
41 |
+
output_shape = x.shape[:-1] + (d,)
|
42 |
+
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
43 |
+
ops.mul_and_silu(out, x)
|
44 |
+
return out
|
45 |
+
|
46 |
+
|
47 |
class GeluAndMul(nn.Module):
|
48 |
+
"""An activation function for GeGLU.
|
49 |
+
|
50 |
+
The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
|
51 |
+
|
52 |
+
Shapes:
|
53 |
+
x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
|
54 |
+
return: (batch_size, seq_len, d) or (num_tokens, d)
|
55 |
+
"""
|
56 |
+
|
57 |
can_torch_compile: bool = True
|
58 |
|
59 |
def forward(self, x: torch.Tensor):
|
|
|
76 |
|
77 |
|
78 |
class FatreluAndMul(nn.Module):
|
79 |
+
"""An activation function for FATReLU.
|
80 |
+
|
81 |
+
The function computes x -> FATReLU(x[:d]) * x[d:] where
|
82 |
+
d = x.shape[-1] // 2.
|
83 |
+
This is used in openbmb/MiniCPM-S-1B-sft.
|
84 |
+
|
85 |
+
Shapes:
|
86 |
+
x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
|
87 |
+
return: (num_tokens, d) or (batch_size, seq_len, d)
|
88 |
+
"""
|
89 |
+
|
90 |
can_torch_compile: bool = True
|
91 |
|
92 |
def __init__(self, threshold: float = 0.0):
|