diff --git a/build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py b/build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..1c4f207354093c6ef83eb5d7f3a5a3b22b95d357 100644 --- a/build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py +++ b/build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py @@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: return out +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_and_mul(out, x) return out diff --git a/build/torch26-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb.abi3.so b/build/torch26-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..c1e52a91b4fa56b4ff39c854b33497b094135599 --- /dev/null +++ b/build/torch26-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6ba32ecc6fc898df3b0cebee85e9afc6881749fe58142280f051ca3332d913 +size 2546864 diff --git a/build/torch26-cxx11-cu118-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so b/build/torch26-cxx11-cu118-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so deleted file mode 100755 index 0603eccc9144bee8f9704c4236947e42c905096d..0000000000000000000000000000000000000000 --- a/build/torch26-cxx11-cu118-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b925dc27b6a9afd5b6d11e454275222c531a92f7ca27958ac81a78c580665e4d -size 2448088 diff --git a/build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py b/build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py index 6cfb9cfa80b63852c1a9a8641b25616ce4caffd8..0110324ade19f59f705c61d5c21912c958e92e96 100644 --- a/build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py +++ b/build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_e99cc09_dirty -ops = torch.ops._activation_e99cc09_dirty +from . import _activation_be5bedb +ops = torch.ops._activation_be5bedb def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_e99cc09_dirty::{op_name}" \ No newline at end of file + return f"_activation_be5bedb::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py b/build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py index dea45935f51421e8ee87b05430c2e95840cb4ef8..45b31181ffb80509a85d729a7f7ee86fc2cf014a 100644 --- a/build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py +++ b/build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py @@ -5,6 +5,15 @@ from ._ops import ops class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -15,7 +24,36 @@ class SiluAndMul(nn.Module): return out +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module): class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def __init__(self, threshold: float = 0.0): diff --git a/build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py b/build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..1c4f207354093c6ef83eb5d7f3a5a3b22b95d357 100644 --- a/build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py +++ b/build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py @@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: return out +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_and_mul(out, x) return out diff --git a/build/torch26-cxx11-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so b/build/torch26-cxx11-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..f45a6ffcf3f11e3b24919496e213a61acb258d2a --- /dev/null +++ b/build/torch26-cxx11-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:331dcb3900d5e47a11d3577cdbac54f15a0b6e14910239293323c1d9e4eb9f49 +size 2616928 diff --git a/build/torch26-cxx11-cu124-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so b/build/torch26-cxx11-cu124-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so deleted file mode 100755 index 494cce9f6166100fdb10f021911228b1cbfa2bdd..0000000000000000000000000000000000000000 --- a/build/torch26-cxx11-cu124-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cfdbe510752b57a8dc4671f744bb0a2da5b1646e0b9a19fec02f1505ba044c8c -size 2509960 diff --git a/build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py b/build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py index 6cfb9cfa80b63852c1a9a8641b25616ce4caffd8..0110324ade19f59f705c61d5c21912c958e92e96 100644 --- a/build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py +++ b/build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_e99cc09_dirty -ops = torch.ops._activation_e99cc09_dirty +from . import _activation_be5bedb +ops = torch.ops._activation_be5bedb def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_e99cc09_dirty::{op_name}" \ No newline at end of file + return f"_activation_be5bedb::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py b/build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py index dea45935f51421e8ee87b05430c2e95840cb4ef8..45b31181ffb80509a85d729a7f7ee86fc2cf014a 100644 --- a/build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py +++ b/build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py @@ -5,6 +5,15 @@ from ._ops import ops class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -15,7 +24,36 @@ class SiluAndMul(nn.Module): return out +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module): class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def __init__(self, threshold: float = 0.0): diff --git a/build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..1c4f207354093c6ef83eb5d7f3a5a3b22b95d357 100644 --- a/build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py +++ b/build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py @@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: return out +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_and_mul(out, x) return out diff --git a/build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so b/build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..12f5777398872e7a3d93ab936e42ade8eeec3213 --- /dev/null +++ b/build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ce11492b9675a44afb3b896ed80e425f2a47e29481c4aad9c4a6ac59520f011 +size 2621472 diff --git a/build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so b/build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so deleted file mode 100755 index d18a35d3e459fa1ecfc1ca166e55cb6ac118a6bb..0000000000000000000000000000000000000000 --- a/build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:70e544ad6448a5576d26147f48403f3e9e593f4a2e24167dc8acb81ce3b7932e -size 2518600 diff --git a/build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py index 6cfb9cfa80b63852c1a9a8641b25616ce4caffd8..0110324ade19f59f705c61d5c21912c958e92e96 100644 --- a/build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py +++ b/build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_e99cc09_dirty -ops = torch.ops._activation_e99cc09_dirty +from . import _activation_be5bedb +ops = torch.ops._activation_be5bedb def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_e99cc09_dirty::{op_name}" \ No newline at end of file + return f"_activation_be5bedb::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py b/build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py index dea45935f51421e8ee87b05430c2e95840cb4ef8..45b31181ffb80509a85d729a7f7ee86fc2cf014a 100644 --- a/build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py +++ b/build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py @@ -5,6 +5,15 @@ from ._ops import ops class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -15,7 +24,36 @@ class SiluAndMul(nn.Module): return out +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module): class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def __init__(self, threshold: float = 0.0): diff --git a/build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py b/build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..1c4f207354093c6ef83eb5d7f3a5a3b22b95d357 100644 --- a/build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py +++ b/build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py @@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: return out +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_and_mul(out, x) return out diff --git a/build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_be5bedb.abi3.so b/build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_be5bedb.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..056de26936949cc36baf3caa9c4212d730da81f7 --- /dev/null +++ b/build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_be5bedb.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:434bd1ae43b7cbdb10d86b82da9a237ec05ef9d9fb4fc15cdc9096d3d5ed3fa7 +size 2539352 diff --git a/build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so b/build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so deleted file mode 100755 index 13989de7ff0a055c8e40e1e1f4d0a9ed9197c1fa..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:60fd224c33657558f03be5be57cc8d35ade23225b1abd71557b170c8a7010cd1 -size 2440576 diff --git a/build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py b/build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py index 6cfb9cfa80b63852c1a9a8641b25616ce4caffd8..0110324ade19f59f705c61d5c21912c958e92e96 100644 --- a/build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py +++ b/build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_e99cc09_dirty -ops = torch.ops._activation_e99cc09_dirty +from . import _activation_be5bedb +ops = torch.ops._activation_be5bedb def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_e99cc09_dirty::{op_name}" \ No newline at end of file + return f"_activation_be5bedb::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py b/build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py index dea45935f51421e8ee87b05430c2e95840cb4ef8..45b31181ffb80509a85d729a7f7ee86fc2cf014a 100644 --- a/build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py +++ b/build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py @@ -5,6 +5,15 @@ from ._ops import ops class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -15,7 +24,36 @@ class SiluAndMul(nn.Module): return out +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module): class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def __init__(self, threshold: float = 0.0): diff --git a/build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py b/build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..1c4f207354093c6ef83eb5d7f3a5a3b22b95d357 100644 --- a/build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py +++ b/build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py @@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: return out +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_and_mul(out, x) return out diff --git a/build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so b/build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..c31190f8f2be87dbb5d5a9c497c68cea2258fded --- /dev/null +++ b/build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53ddfb42466bfe01feb98348f5c2d6beefd589aeb3dec4c5c36609e11a6bde4c +size 2605136 diff --git a/build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so b/build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so deleted file mode 100755 index 76e8710b2a6d75f17d1c40c2ba116c096791c815..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e364773259dc1b91f3c0d3b076da83c5a9c6ee18ffdace30315c602dffd1dabe -size 2502264 diff --git a/build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py b/build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py index 6cfb9cfa80b63852c1a9a8641b25616ce4caffd8..0110324ade19f59f705c61d5c21912c958e92e96 100644 --- a/build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py +++ b/build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_e99cc09_dirty -ops = torch.ops._activation_e99cc09_dirty +from . import _activation_be5bedb +ops = torch.ops._activation_be5bedb def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_e99cc09_dirty::{op_name}" \ No newline at end of file + return f"_activation_be5bedb::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py b/build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py index 403cc5bbda3495c3c06a976e26dc5839ecc495b5..45b31181ffb80509a85d729a7f7ee86fc2cf014a 100644 --- a/build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py +++ b/build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py @@ -23,7 +23,8 @@ class SiluAndMul(nn.Module): ops.silu_and_mul(out, x) return out -class MulAndSilu(CustomOp): + +class MulAndSilu(nn.Module): """An activation function for SwiGLU. The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. @@ -37,11 +38,12 @@ class MulAndSilu(CustomOp): def forward(self, x: torch.Tensor) -> torch.Tensor: d = x.shape[-1] // 2 - output_shape = (x.shape[:-1] + (d, )) + output_shape = x.shape[:-1] + (d,) out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - self.mul_and_silu(out, x) + ops.mul_and_silu(out, x) return out + class GeluAndMul(nn.Module): """An activation function for GeGLU. diff --git a/build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py b/build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..1c4f207354093c6ef83eb5d7f3a5a3b22b95d357 100644 --- a/build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py +++ b/build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py @@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: return out +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_and_mul(out, x) return out diff --git a/build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so b/build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..516f085e9ac787a2454fb78975dbaec25d2a6576 --- /dev/null +++ b/build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac7174352dea307231f308c84ca32ee001cdbcefd976de860e76501c52aae591 +size 2613776 diff --git a/build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so b/build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so deleted file mode 100755 index 92433b08d2ef878a9d6fc7dfd5281051412ea0b3..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7ac88cc0d3c65ab283d20608f3a097be29ee572e7856f10f8d7919536efd95b4 -size 2506808 diff --git a/build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py b/build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py index 6cfb9cfa80b63852c1a9a8641b25616ce4caffd8..0110324ade19f59f705c61d5c21912c958e92e96 100644 --- a/build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py +++ b/build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_e99cc09_dirty -ops = torch.ops._activation_e99cc09_dirty +from . import _activation_be5bedb +ops = torch.ops._activation_be5bedb def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_e99cc09_dirty::{op_name}" \ No newline at end of file + return f"_activation_be5bedb::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py b/build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py index dea45935f51421e8ee87b05430c2e95840cb4ef8..45b31181ffb80509a85d729a7f7ee86fc2cf014a 100644 --- a/build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py +++ b/build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py @@ -5,6 +5,15 @@ from ._ops import ops class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -15,7 +24,36 @@ class SiluAndMul(nn.Module): return out +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module): class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def __init__(self, threshold: float = 0.0): diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..1c4f207354093c6ef83eb5d7f3a5a3b22b95d357 100644 --- a/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py +++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py @@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: return out +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_and_mul(out, x) return out diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5155b241dff8af4302230c3ae23518cb41efa185 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..53b5508fec27cd0ece00b9b018694ba8da40c5ba Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7752cad4c2a06746b1a68c3637c7baef00bb5ddc Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..7d5463c37b3f4a3dec8b15df1a13168019fb26e3 --- /dev/null +++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aee7c6869a9e318ad81cb84460c58ca0dac2dc85f4ed739b12fe57641f766332 +size 2546984 diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so deleted file mode 100755 index 16e9df58edb8b6fe6885a0ed783306390db853b1..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e4f9e647eea40d3d3801d5ee57d4917e4c2e8dbfd87cdfebdc40b1b0a1c571fe -size 2448184 diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py index 6cfb9cfa80b63852c1a9a8641b25616ce4caffd8..745e06b31cb5b9718d3b85236f4cc257459070d7 100644 --- a/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py +++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_e99cc09_dirty -ops = torch.ops._activation_e99cc09_dirty +from . import _activation_be5bedb_dirty +ops = torch.ops._activation_be5bedb_dirty def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_e99cc09_dirty::{op_name}" \ No newline at end of file + return f"_activation_be5bedb_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py index dea45935f51421e8ee87b05430c2e95840cb4ef8..45b31181ffb80509a85d729a7f7ee86fc2cf014a 100644 --- a/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py +++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py @@ -5,6 +5,15 @@ from ._ops import ops class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -15,7 +24,36 @@ class SiluAndMul(nn.Module): return out +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module): class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def __init__(self, threshold: float = 0.0): diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..1c4f207354093c6ef83eb5d7f3a5a3b22b95d357 100644 --- a/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py +++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py @@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: return out +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_and_mul(out, x) return out diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4b1fcc2dcde514cab92d358380824ca24616cd0b Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..665e89cb27b58c9caff761de28b7f6574cc2140e Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4602c567b14a674c4a56d0e1cf8ef073fbc50beb Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..94c38d99b9593469317fe894be35b069017b493e --- /dev/null +++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f075a6e0d47a2d382d16291b1c5d7d1d98111e2bbc5891b14b627e3c1778b699 +size 2621536 diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so deleted file mode 100755 index 085ef09ae9488945275424ee7a507f0289143ed8..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a2b72ff2a0f2253e4dfe028842b5f15cabf2647d7812bf4662a2de510ca0c489 -size 2518632 diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py index 6cfb9cfa80b63852c1a9a8641b25616ce4caffd8..745e06b31cb5b9718d3b85236f4cc257459070d7 100644 --- a/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py +++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_e99cc09_dirty -ops = torch.ops._activation_e99cc09_dirty +from . import _activation_be5bedb_dirty +ops = torch.ops._activation_be5bedb_dirty def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_e99cc09_dirty::{op_name}" \ No newline at end of file + return f"_activation_be5bedb_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py index dea45935f51421e8ee87b05430c2e95840cb4ef8..45b31181ffb80509a85d729a7f7ee86fc2cf014a 100644 --- a/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py +++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py @@ -5,6 +5,15 @@ from ._ops import ops class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -15,7 +24,36 @@ class SiluAndMul(nn.Module): return out +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module): class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def __init__(self, threshold: float = 0.0): diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..1c4f207354093c6ef83eb5d7f3a5a3b22b95d357 100644 --- a/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py +++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py @@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: return out +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_and_mul(out, x) return out diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fe2206ed48c6e6b877620ac3db87af6ee49ddf07 Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a940427d39d1a12a0806315d03b02bdfed65a3d Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..725246ac4c8d6c4374d8250ea67f759a871b1c38 Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..e5c17e44367c005d1c9f8d6b391be8d49079b2fc --- /dev/null +++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc2406aa2fa09dd7bc1fd5e87cdcdf55edfc7e0853fad5f977e2500e08fa8899 +size 3565432 diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so deleted file mode 100755 index ea1a9f1b610a4e3ca23afc5e13c26c3e0ef7758b..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f4590c852899e4c11ddb74cfad61e26b07490a91f3c09e0fb0874a3fcc1f533e -size 3331456 diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py index 6cfb9cfa80b63852c1a9a8641b25616ce4caffd8..745e06b31cb5b9718d3b85236f4cc257459070d7 100644 --- a/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py +++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_e99cc09_dirty -ops = torch.ops._activation_e99cc09_dirty +from . import _activation_be5bedb_dirty +ops = torch.ops._activation_be5bedb_dirty def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_e99cc09_dirty::{op_name}" \ No newline at end of file + return f"_activation_be5bedb_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py index dea45935f51421e8ee87b05430c2e95840cb4ef8..45b31181ffb80509a85d729a7f7ee86fc2cf014a 100644 --- a/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py +++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py @@ -5,6 +5,15 @@ from ._ops import ops class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -15,7 +24,36 @@ class SiluAndMul(nn.Module): return out +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + can_torch_compile: bool = True def forward(self, x: torch.Tensor): @@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module): class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + can_torch_compile: bool = True def __init__(self, threshold: float = 0.0): diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1c4f207354093c6ef83eb5d7f3a5a3b22b95d357 --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py @@ -0,0 +1,57 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "layers", +] diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5263d294bc5bc421b98d31436c896bbc244d0771 Binary files /dev/null and b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fb753a567265e3db8b71afceb9a4442139a6aea7 Binary files /dev/null and b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6dd25df0a6c63b7315d2c0d9f4b3894ff1626fc8 Binary files /dev/null and b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..40900ff2070ff72eb665fdd5fd78f12d3a287cd9 --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c88e87951b92ea55313ef79a34d284cb2a23713d3bdafee735caa4fc955b9dcb +size 2610616 diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..745e06b31cb5b9718d3b85236f4cc257459070d7 --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_be5bedb_dirty +ops = torch.ops._activation_be5bedb_dirty + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_be5bedb_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/layers.py b/build/torch28-cxx11-cu126-x86_64-linux/activation/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..45b31181ffb80509a85d729a7f7ee86fc2cf014a --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/layers.py @@ -0,0 +1,128 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1c4f207354093c6ef83eb5d7f3a5a3b22b95d357 --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py @@ -0,0 +1,57 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "layers", +] diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aedb284c8147a243ebfc99ec94000b62ae672077 Binary files /dev/null and b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ae3e6d861e600db32e9024ae7db059642f35a3f Binary files /dev/null and b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..51baab3cf4e592a2b8bed4cea0e9228a559b399d Binary files /dev/null and b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..8b1ece63bdec0e63013816dae6bce9a87068f88e --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf784c7ab178c476fc6268efe820b1948c7c5b8f049c046c851b03067da5dd59 +size 3558616 diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..745e06b31cb5b9718d3b85236f4cc257459070d7 --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_be5bedb_dirty +ops = torch.ops._activation_be5bedb_dirty + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_be5bedb_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/layers.py b/build/torch28-cxx11-cu128-x86_64-linux/activation/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..45b31181ffb80509a85d729a7f7ee86fc2cf014a --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/layers.py @@ -0,0 +1,128 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1c4f207354093c6ef83eb5d7f3a5a3b22b95d357 --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py @@ -0,0 +1,57 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "layers", +] diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..01d30fced2b5392d0f6f4e6454cbe7d782a14daa Binary files /dev/null and b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..75b0e5f83e10b053d8584f2607d9a9f3009d45dc Binary files /dev/null and b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d6ed035d206ae523160771021be45010f234687e Binary files /dev/null and b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..33fb245664d9daef5b07440b390db2c19ef404f1 --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e7cca3169eea8cbd67c61706d102548e49aadc936f8c2943efef3e7c4c0ee0d +size 3592400 diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..745e06b31cb5b9718d3b85236f4cc257459070d7 --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_be5bedb_dirty +ops = torch.ops._activation_be5bedb_dirty + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_be5bedb_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/layers.py b/build/torch28-cxx11-cu129-x86_64-linux/activation/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..45b31181ffb80509a85d729a7f7ee86fc2cf014a --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/layers.py @@ -0,0 +1,128 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out