danieldk HF Staff commited on 5 days ago

Commit

0c3eb4e

1 Parent(s): be5bedb

Build (x86_64-linux)

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py +5 -0
build/torch26-cxx11-cu118-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} +2 -2
build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py +3 -3
build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py +49 -0
build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py +5 -0
build/torch26-cxx11-cu124-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} +2 -2
build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py +3 -3
build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py +49 -0
build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py +5 -0
build/torch26-cxx11-cu126-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} +2 -2
build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py +3 -3
build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py +49 -0
build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py +5 -0
build/torch26-cxx98-cu118-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} +2 -2
build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py +3 -3
build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py +49 -0
build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py +5 -0
build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so +3 -0
build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +0 -3
build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py +3 -3
build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py +5 -3
build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py +5 -0
build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so +3 -0
build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +0 -3
build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py +3 -3
build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py +49 -0
build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py +5 -0
build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +3 -0
build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +0 -3
build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py +3 -3
build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py +49 -0
build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py +5 -0
build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +3 -0
build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +0 -3
build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py +3 -3
build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py +49 -0
build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py +5 -0
build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +3 -0
build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +0 -3
build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py +3 -3
build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py +49 -0

build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py CHANGED Viewed

@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

     return out
+def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.mul_and_silu(out, x)
+    return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

build/torch26-cxx11-cu118-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b925dc27b6a9afd5b6d11e454275222c531a92f7ca27958ac81a78c580665e4d
-size 2448088

 version https://git-lfs.github.com/spec/v1
+oid sha256:9b6ba32ecc6fc898df3b0cebee85e9afc6881749fe58142280f051ca3332d913
+size 2546864

build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _activation_e99cc09_dirty
-ops = torch.ops._activation_e99cc09_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_e99cc09_dirty::{op_name}"

 import torch
+from . import _activation_be5bedb
+ops = torch.ops._activation_be5bedb
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_activation_be5bedb::{op_name}"

build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py CHANGED Viewed

@@ -5,6 +5,15 @@ from ._ops import ops
 class SiluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
         return out
 class GeluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
 class FatreluAndMul(nn.Module):
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):

 class SiluAndMul(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
         return out
+class MulAndSilu(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        d = x.shape[-1] // 2
+        output_shape = x.shape[:-1] + (d,)
+        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
+        ops.mul_and_silu(out, x)
+        return out
 class GeluAndMul(nn.Module):
+    """An activation function for GeGLU.
+    The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
+        return: (batch_size, seq_len, d) or (num_tokens, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
 class FatreluAndMul(nn.Module):
+    """An activation function for FATReLU.
+    The function computes x -> FATReLU(x[:d]) * x[d:] where
+    d = x.shape[-1] // 2.
+    This is used in openbmb/MiniCPM-S-1B-sft.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):

build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py CHANGED Viewed

@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

     return out
+def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.mul_and_silu(out, x)
+    return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

build/torch26-cxx11-cu124-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cfdbe510752b57a8dc4671f744bb0a2da5b1646e0b9a19fec02f1505ba044c8c
-size 2509960

 version https://git-lfs.github.com/spec/v1
+oid sha256:331dcb3900d5e47a11d3577cdbac54f15a0b6e14910239293323c1d9e4eb9f49
+size 2616928

build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _activation_e99cc09_dirty
-ops = torch.ops._activation_e99cc09_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_e99cc09_dirty::{op_name}"

 import torch
+from . import _activation_be5bedb
+ops = torch.ops._activation_be5bedb
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_activation_be5bedb::{op_name}"

build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py CHANGED Viewed

@@ -5,6 +5,15 @@ from ._ops import ops
 class SiluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
         return out
 class GeluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
 class FatreluAndMul(nn.Module):
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):

 class SiluAndMul(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
         return out
+class MulAndSilu(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        d = x.shape[-1] // 2
+        output_shape = x.shape[:-1] + (d,)
+        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
+        ops.mul_and_silu(out, x)
+        return out
 class GeluAndMul(nn.Module):
+    """An activation function for GeGLU.
+    The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
+        return: (batch_size, seq_len, d) or (num_tokens, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
 class FatreluAndMul(nn.Module):
+    """An activation function for FATReLU.
+    The function computes x -> FATReLU(x[:d]) * x[d:] where
+    d = x.shape[-1] // 2.
+    This is used in openbmb/MiniCPM-S-1B-sft.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):

build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py CHANGED Viewed

@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

     return out
+def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.mul_and_silu(out, x)
+    return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

build/torch26-cxx11-cu126-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:70e544ad6448a5576d26147f48403f3e9e593f4a2e24167dc8acb81ce3b7932e
-size 2518600

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ce11492b9675a44afb3b896ed80e425f2a47e29481c4aad9c4a6ac59520f011
+size 2621472

build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _activation_e99cc09_dirty
-ops = torch.ops._activation_e99cc09_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_e99cc09_dirty::{op_name}"

 import torch
+from . import _activation_be5bedb
+ops = torch.ops._activation_be5bedb
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_activation_be5bedb::{op_name}"

build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py CHANGED Viewed

@@ -5,6 +5,15 @@ from ._ops import ops
 class SiluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
         return out
 class GeluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
 class FatreluAndMul(nn.Module):
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):

 class SiluAndMul(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
         return out
+class MulAndSilu(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        d = x.shape[-1] // 2
+        output_shape = x.shape[:-1] + (d,)
+        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
+        ops.mul_and_silu(out, x)
+        return out
 class GeluAndMul(nn.Module):
+    """An activation function for GeGLU.
+    The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
+        return: (batch_size, seq_len, d) or (num_tokens, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
 class FatreluAndMul(nn.Module):
+    """An activation function for FATReLU.
+    The function computes x -> FATReLU(x[:d]) * x[d:] where
+    d = x.shape[-1] // 2.
+    This is used in openbmb/MiniCPM-S-1B-sft.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):

build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py CHANGED Viewed

@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

     return out
+def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.mul_and_silu(out, x)
+    return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

build/torch26-cxx98-cu118-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60fd224c33657558f03be5be57cc8d35ade23225b1abd71557b170c8a7010cd1
-size 2440576

 version https://git-lfs.github.com/spec/v1
+oid sha256:434bd1ae43b7cbdb10d86b82da9a237ec05ef9d9fb4fc15cdc9096d3d5ed3fa7
+size 2539352

build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _activation_e99cc09_dirty
-ops = torch.ops._activation_e99cc09_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_e99cc09_dirty::{op_name}"

 import torch
+from . import _activation_be5bedb
+ops = torch.ops._activation_be5bedb
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_activation_be5bedb::{op_name}"

build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py CHANGED Viewed

@@ -5,6 +5,15 @@ from ._ops import ops
 class SiluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
         return out
 class GeluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
 class FatreluAndMul(nn.Module):
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):

 class SiluAndMul(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
         return out
+class MulAndSilu(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        d = x.shape[-1] // 2
+        output_shape = x.shape[:-1] + (d,)
+        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
+        ops.mul_and_silu(out, x)
+        return out
 class GeluAndMul(nn.Module):
+    """An activation function for GeGLU.
+    The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
+        return: (batch_size, seq_len, d) or (num_tokens, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
 class FatreluAndMul(nn.Module):
+    """An activation function for FATReLU.
+    The function computes x -> FATReLU(x[:d]) * x[d:] where
+    d = x.shape[-1] // 2.
+    This is used in openbmb/MiniCPM-S-1B-sft.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):

build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py CHANGED Viewed

@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

     return out
+def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.mul_and_silu(out, x)
+    return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53ddfb42466bfe01feb98348f5c2d6beefd589aeb3dec4c5c36609e11a6bde4c
+size 2605136

build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e364773259dc1b91f3c0d3b076da83c5a9c6ee18ffdace30315c602dffd1dabe
-size 2502264

build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _activation_e99cc09_dirty
-ops = torch.ops._activation_e99cc09_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_e99cc09_dirty::{op_name}"

 import torch
+from . import _activation_be5bedb
+ops = torch.ops._activation_be5bedb
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_activation_be5bedb::{op_name}"

build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py CHANGED Viewed

@@ -23,7 +23,8 @@ class SiluAndMul(nn.Module):
         ops.silu_and_mul(out, x)
         return out
-class MulAndSilu(CustomOp):
     """An activation function for SwiGLU.
     The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
@@ -37,11 +38,12 @@ class MulAndSilu(CustomOp):
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         d = x.shape[-1] // 2
-        output_shape = (x.shape[:-1] + (d, ))
         out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        self.mul_and_silu(out, x)
         return out
 class GeluAndMul(nn.Module):
     """An activation function for GeGLU.

         ops.silu_and_mul(out, x)
         return out
+class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.
     The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         d = x.shape[-1] // 2
+        output_shape = x.shape[:-1] + (d,)
         out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
+        ops.mul_and_silu(out, x)
         return out
 class GeluAndMul(nn.Module):
     """An activation function for GeGLU.

build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py CHANGED Viewed

@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

     return out
+def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.mul_and_silu(out, x)
+    return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac7174352dea307231f308c84ca32ee001cdbcefd976de860e76501c52aae591
+size 2613776

build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7ac88cc0d3c65ab283d20608f3a097be29ee572e7856f10f8d7919536efd95b4
-size 2506808

build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _activation_e99cc09_dirty
-ops = torch.ops._activation_e99cc09_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_e99cc09_dirty::{op_name}"

 import torch
+from . import _activation_be5bedb
+ops = torch.ops._activation_be5bedb
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_activation_be5bedb::{op_name}"

build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py CHANGED Viewed

@@ -5,6 +5,15 @@ from ._ops import ops
 class SiluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
         return out
 class GeluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
 class FatreluAndMul(nn.Module):
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):

 class SiluAndMul(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
         return out
+class MulAndSilu(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        d = x.shape[-1] // 2
+        output_shape = x.shape[:-1] + (d,)
+        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
+        ops.mul_and_silu(out, x)
+        return out
 class GeluAndMul(nn.Module):
+    """An activation function for GeGLU.
+    The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
+        return: (batch_size, seq_len, d) or (num_tokens, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
 class FatreluAndMul(nn.Module):
+    """An activation function for FATReLU.
+    The function computes x -> FATReLU(x[:d]) * x[d:] where
+    d = x.shape[-1] // 2.
+    This is used in openbmb/MiniCPM-S-1B-sft.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):

build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py CHANGED Viewed

@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

     return out
+def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.mul_and_silu(out, x)
+    return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (2.5 kB). View file

build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc ADDED Viewed

Binary file (538 Bytes). View file

build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc ADDED Viewed

Binary file (6.92 kB). View file

build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aee7c6869a9e318ad81cb84460c58ca0dac2dc85f4ed739b12fe57641f766332
+size 2546984

build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e4f9e647eea40d3d3801d5ee57d4917e4c2e8dbfd87cdfebdc40b1b0a1c571fe
-size 2448184

build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _activation_e99cc09_dirty
-ops = torch.ops._activation_e99cc09_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_e99cc09_dirty::{op_name}"

 import torch
+from . import _activation_be5bedb_dirty
+ops = torch.ops._activation_be5bedb_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_activation_be5bedb_dirty::{op_name}"

build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py CHANGED Viewed

@@ -5,6 +5,15 @@ from ._ops import ops
 class SiluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
         return out
 class GeluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
 class FatreluAndMul(nn.Module):
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):

 class SiluAndMul(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
         return out
+class MulAndSilu(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        d = x.shape[-1] // 2
+        output_shape = x.shape[:-1] + (d,)
+        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
+        ops.mul_and_silu(out, x)
+        return out
 class GeluAndMul(nn.Module):
+    """An activation function for GeGLU.
+    The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
+        return: (batch_size, seq_len, d) or (num_tokens, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
 class FatreluAndMul(nn.Module):
+    """An activation function for FATReLU.
+    The function computes x -> FATReLU(x[:d]) * x[d:] where
+    d = x.shape[-1] // 2.
+    This is used in openbmb/MiniCPM-S-1B-sft.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):

build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py CHANGED Viewed

@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

     return out
+def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.mul_and_silu(out, x)
+    return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (2.5 kB). View file

build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc ADDED Viewed

Binary file (538 Bytes). View file

build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc ADDED Viewed

Binary file (6.92 kB). View file

build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f075a6e0d47a2d382d16291b1c5d7d1d98111e2bbc5891b14b627e3c1778b699
+size 2621536

build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a2b72ff2a0f2253e4dfe028842b5f15cabf2647d7812bf4662a2de510ca0c489
-size 2518632

build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _activation_e99cc09_dirty
-ops = torch.ops._activation_e99cc09_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_e99cc09_dirty::{op_name}"

 import torch
+from . import _activation_be5bedb_dirty
+ops = torch.ops._activation_be5bedb_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_activation_be5bedb_dirty::{op_name}"

build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py CHANGED Viewed

@@ -5,6 +5,15 @@ from ._ops import ops
 class SiluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
         return out
 class GeluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
 class FatreluAndMul(nn.Module):
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):

 class SiluAndMul(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
         return out
+class MulAndSilu(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        d = x.shape[-1] // 2
+        output_shape = x.shape[:-1] + (d,)
+        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
+        ops.mul_and_silu(out, x)
+        return out
 class GeluAndMul(nn.Module):
+    """An activation function for GeGLU.
+    The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
+        return: (batch_size, seq_len, d) or (num_tokens, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
 class FatreluAndMul(nn.Module):
+    """An activation function for FATReLU.
+    The function computes x -> FATReLU(x[:d]) * x[d:] where
+    d = x.shape[-1] // 2.
+    This is used in openbmb/MiniCPM-S-1B-sft.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):

build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py CHANGED Viewed

@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

     return out
+def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.mul_and_silu(out, x)
+    return out
 def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_and_mul(out, x)
     return out

build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (2.5 kB). View file

build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc ADDED Viewed

Binary file (538 Bytes). View file

build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc ADDED Viewed

Binary file (6.92 kB). View file

build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc2406aa2fa09dd7bc1fd5e87cdcdf55edfc7e0853fad5f977e2500e08fa8899
+size 3565432

build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f4590c852899e4c11ddb74cfad61e26b07490a91f3c09e0fb0874a3fcc1f533e
-size 3331456

build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _activation_e99cc09_dirty
-ops = torch.ops._activation_e99cc09_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_e99cc09_dirty::{op_name}"

 import torch
+from . import _activation_be5bedb_dirty
+ops = torch.ops._activation_be5bedb_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_activation_be5bedb_dirty::{op_name}"

build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py CHANGED Viewed

@@ -5,6 +5,15 @@ from ._ops import ops
 class SiluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
         return out
 class GeluAndMul(nn.Module):
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
 class FatreluAndMul(nn.Module):
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):

 class SiluAndMul(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
         return out
+class MulAndSilu(nn.Module):
+    """An activation function for SwiGLU.
+    The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        d = x.shape[-1] // 2
+        output_shape = x.shape[:-1] + (d,)
+        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
+        ops.mul_and_silu(out, x)
+        return out
 class GeluAndMul(nn.Module):
+    """An activation function for GeGLU.
+    The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+    Shapes:
+        x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
+        return: (batch_size, seq_len, d) or (num_tokens, d)
+    """
     can_torch_compile: bool = True
     def forward(self, x: torch.Tensor):
 class FatreluAndMul(nn.Module):
+    """An activation function for FATReLU.
+    The function computes x -> FATReLU(x[:d]) * x[d:] where
+    d = x.shape[-1] // 2.
+    This is used in openbmb/MiniCPM-S-1B-sft.
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
     can_torch_compile: bool = True
     def __init__(self, threshold: float = 0.0):