danieldk HF Staff commited on
Commit
0c3eb4e
·
1 Parent(s): be5bedb

Build (x86_64-linux)

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py +5 -0
  2. build/torch26-cxx11-cu118-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} +2 -2
  3. build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py +3 -3
  4. build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py +49 -0
  5. build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py +5 -0
  6. build/torch26-cxx11-cu124-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} +2 -2
  7. build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py +3 -3
  8. build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py +49 -0
  9. build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py +5 -0
  10. build/torch26-cxx11-cu126-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} +2 -2
  11. build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py +3 -3
  12. build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py +49 -0
  13. build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py +5 -0
  14. build/torch26-cxx98-cu118-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} +2 -2
  15. build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py +3 -3
  16. build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py +49 -0
  17. build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py +5 -0
  18. build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so +3 -0
  19. build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +0 -3
  20. build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py +3 -3
  21. build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py +5 -3
  22. build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py +5 -0
  23. build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so +3 -0
  24. build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +0 -3
  25. build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py +3 -3
  26. build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py +49 -0
  27. build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py +5 -0
  28. build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
  29. build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
  30. build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
  31. build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +3 -0
  32. build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +0 -3
  33. build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py +3 -3
  34. build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py +49 -0
  35. build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py +5 -0
  36. build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
  37. build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
  38. build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
  39. build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +3 -0
  40. build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +0 -3
  41. build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py +3 -3
  42. build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py +49 -0
  43. build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py +5 -0
  44. build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
  45. build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
  46. build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
  47. build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +3 -0
  48. build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so +0 -3
  49. build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py +3 -3
  50. build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py +49 -0
build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
10
  return out
11
 
12
 
 
 
 
 
 
13
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
14
  ops.gelu_and_mul(out, x)
15
  return out
 
10
  return out
11
 
12
 
13
+ def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
14
+ ops.mul_and_silu(out, x)
15
+ return out
16
+
17
+
18
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
19
  ops.gelu_and_mul(out, x)
20
  return out
build/torch26-cxx11-cu118-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b925dc27b6a9afd5b6d11e454275222c531a92f7ca27958ac81a78c580665e4d
3
- size 2448088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b6ba32ecc6fc898df3b0cebee85e9afc6881749fe58142280f051ca3332d913
3
+ size 2546864
build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e99cc09_dirty
3
- ops = torch.ops._activation_e99cc09_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e99cc09_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_be5bedb
3
+ ops = torch.ops._activation_be5bedb
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_be5bedb::{op_name}"
build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
5
 
6
 
7
  class SiluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
8
  can_torch_compile: bool = True
9
 
10
  def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
15
  return out
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  class GeluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
19
  can_torch_compile: bool = True
20
 
21
  def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
38
 
39
 
40
  class FatreluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
 
 
41
  can_torch_compile: bool = True
42
 
43
  def __init__(self, threshold: float = 0.0):
 
5
 
6
 
7
  class SiluAndMul(nn.Module):
8
+ """An activation function for SwiGLU.
9
+
10
+ The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
11
+
12
+ Shapes:
13
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
14
+ return: (num_tokens, d) or (batch_size, seq_len, d)
15
+ """
16
+
17
  can_torch_compile: bool = True
18
 
19
  def forward(self, x: torch.Tensor):
 
24
  return out
25
 
26
 
27
+ class MulAndSilu(nn.Module):
28
+ """An activation function for SwiGLU.
29
+
30
+ The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
31
+
32
+ Shapes:
33
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
34
+ return: (num_tokens, d) or (batch_size, seq_len, d)
35
+ """
36
+
37
+ can_torch_compile: bool = True
38
+
39
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.mul_and_silu(out, x)
44
+ return out
45
+
46
+
47
  class GeluAndMul(nn.Module):
48
+ """An activation function for GeGLU.
49
+
50
+ The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
51
+
52
+ Shapes:
53
+ x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
54
+ return: (batch_size, seq_len, d) or (num_tokens, d)
55
+ """
56
+
57
  can_torch_compile: bool = True
58
 
59
  def forward(self, x: torch.Tensor):
 
76
 
77
 
78
  class FatreluAndMul(nn.Module):
79
+ """An activation function for FATReLU.
80
+
81
+ The function computes x -> FATReLU(x[:d]) * x[d:] where
82
+ d = x.shape[-1] // 2.
83
+ This is used in openbmb/MiniCPM-S-1B-sft.
84
+
85
+ Shapes:
86
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
87
+ return: (num_tokens, d) or (batch_size, seq_len, d)
88
+ """
89
+
90
  can_torch_compile: bool = True
91
 
92
  def __init__(self, threshold: float = 0.0):
build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
10
  return out
11
 
12
 
 
 
 
 
 
13
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
14
  ops.gelu_and_mul(out, x)
15
  return out
 
10
  return out
11
 
12
 
13
+ def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
14
+ ops.mul_and_silu(out, x)
15
+ return out
16
+
17
+
18
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
19
  ops.gelu_and_mul(out, x)
20
  return out
build/torch26-cxx11-cu124-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfdbe510752b57a8dc4671f744bb0a2da5b1646e0b9a19fec02f1505ba044c8c
3
- size 2509960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:331dcb3900d5e47a11d3577cdbac54f15a0b6e14910239293323c1d9e4eb9f49
3
+ size 2616928
build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e99cc09_dirty
3
- ops = torch.ops._activation_e99cc09_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e99cc09_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_be5bedb
3
+ ops = torch.ops._activation_be5bedb
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_be5bedb::{op_name}"
build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
5
 
6
 
7
  class SiluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
8
  can_torch_compile: bool = True
9
 
10
  def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
15
  return out
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  class GeluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
19
  can_torch_compile: bool = True
20
 
21
  def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
38
 
39
 
40
  class FatreluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
 
 
41
  can_torch_compile: bool = True
42
 
43
  def __init__(self, threshold: float = 0.0):
 
5
 
6
 
7
  class SiluAndMul(nn.Module):
8
+ """An activation function for SwiGLU.
9
+
10
+ The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
11
+
12
+ Shapes:
13
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
14
+ return: (num_tokens, d) or (batch_size, seq_len, d)
15
+ """
16
+
17
  can_torch_compile: bool = True
18
 
19
  def forward(self, x: torch.Tensor):
 
24
  return out
25
 
26
 
27
+ class MulAndSilu(nn.Module):
28
+ """An activation function for SwiGLU.
29
+
30
+ The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
31
+
32
+ Shapes:
33
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
34
+ return: (num_tokens, d) or (batch_size, seq_len, d)
35
+ """
36
+
37
+ can_torch_compile: bool = True
38
+
39
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.mul_and_silu(out, x)
44
+ return out
45
+
46
+
47
  class GeluAndMul(nn.Module):
48
+ """An activation function for GeGLU.
49
+
50
+ The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
51
+
52
+ Shapes:
53
+ x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
54
+ return: (batch_size, seq_len, d) or (num_tokens, d)
55
+ """
56
+
57
  can_torch_compile: bool = True
58
 
59
  def forward(self, x: torch.Tensor):
 
76
 
77
 
78
  class FatreluAndMul(nn.Module):
79
+ """An activation function for FATReLU.
80
+
81
+ The function computes x -> FATReLU(x[:d]) * x[d:] where
82
+ d = x.shape[-1] // 2.
83
+ This is used in openbmb/MiniCPM-S-1B-sft.
84
+
85
+ Shapes:
86
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
87
+ return: (num_tokens, d) or (batch_size, seq_len, d)
88
+ """
89
+
90
  can_torch_compile: bool = True
91
 
92
  def __init__(self, threshold: float = 0.0):
build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
10
  return out
11
 
12
 
 
 
 
 
 
13
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
14
  ops.gelu_and_mul(out, x)
15
  return out
 
10
  return out
11
 
12
 
13
+ def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
14
+ ops.mul_and_silu(out, x)
15
+ return out
16
+
17
+
18
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
19
  ops.gelu_and_mul(out, x)
20
  return out
build/torch26-cxx11-cu126-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70e544ad6448a5576d26147f48403f3e9e593f4a2e24167dc8acb81ce3b7932e
3
- size 2518600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ce11492b9675a44afb3b896ed80e425f2a47e29481c4aad9c4a6ac59520f011
3
+ size 2621472
build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e99cc09_dirty
3
- ops = torch.ops._activation_e99cc09_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e99cc09_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_be5bedb
3
+ ops = torch.ops._activation_be5bedb
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_be5bedb::{op_name}"
build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
5
 
6
 
7
  class SiluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
8
  can_torch_compile: bool = True
9
 
10
  def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
15
  return out
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  class GeluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
19
  can_torch_compile: bool = True
20
 
21
  def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
38
 
39
 
40
  class FatreluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
 
 
41
  can_torch_compile: bool = True
42
 
43
  def __init__(self, threshold: float = 0.0):
 
5
 
6
 
7
  class SiluAndMul(nn.Module):
8
+ """An activation function for SwiGLU.
9
+
10
+ The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
11
+
12
+ Shapes:
13
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
14
+ return: (num_tokens, d) or (batch_size, seq_len, d)
15
+ """
16
+
17
  can_torch_compile: bool = True
18
 
19
  def forward(self, x: torch.Tensor):
 
24
  return out
25
 
26
 
27
+ class MulAndSilu(nn.Module):
28
+ """An activation function for SwiGLU.
29
+
30
+ The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
31
+
32
+ Shapes:
33
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
34
+ return: (num_tokens, d) or (batch_size, seq_len, d)
35
+ """
36
+
37
+ can_torch_compile: bool = True
38
+
39
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.mul_and_silu(out, x)
44
+ return out
45
+
46
+
47
  class GeluAndMul(nn.Module):
48
+ """An activation function for GeGLU.
49
+
50
+ The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
51
+
52
+ Shapes:
53
+ x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
54
+ return: (batch_size, seq_len, d) or (num_tokens, d)
55
+ """
56
+
57
  can_torch_compile: bool = True
58
 
59
  def forward(self, x: torch.Tensor):
 
76
 
77
 
78
  class FatreluAndMul(nn.Module):
79
+ """An activation function for FATReLU.
80
+
81
+ The function computes x -> FATReLU(x[:d]) * x[d:] where
82
+ d = x.shape[-1] // 2.
83
+ This is used in openbmb/MiniCPM-S-1B-sft.
84
+
85
+ Shapes:
86
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
87
+ return: (num_tokens, d) or (batch_size, seq_len, d)
88
+ """
89
+
90
  can_torch_compile: bool = True
91
 
92
  def __init__(self, threshold: float = 0.0):
build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
10
  return out
11
 
12
 
 
 
 
 
 
13
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
14
  ops.gelu_and_mul(out, x)
15
  return out
 
10
  return out
11
 
12
 
13
+ def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
14
+ ops.mul_and_silu(out, x)
15
+ return out
16
+
17
+
18
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
19
  ops.gelu_and_mul(out, x)
20
  return out
build/torch26-cxx98-cu118-x86_64-linux/activation/{_activation_e99cc09_dirty.abi3.so → _activation_be5bedb.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60fd224c33657558f03be5be57cc8d35ade23225b1abd71557b170c8a7010cd1
3
- size 2440576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:434bd1ae43b7cbdb10d86b82da9a237ec05ef9d9fb4fc15cdc9096d3d5ed3fa7
3
+ size 2539352
build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e99cc09_dirty
3
- ops = torch.ops._activation_e99cc09_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e99cc09_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_be5bedb
3
+ ops = torch.ops._activation_be5bedb
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_be5bedb::{op_name}"
build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
5
 
6
 
7
  class SiluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
8
  can_torch_compile: bool = True
9
 
10
  def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
15
  return out
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  class GeluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
19
  can_torch_compile: bool = True
20
 
21
  def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
38
 
39
 
40
  class FatreluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
 
 
41
  can_torch_compile: bool = True
42
 
43
  def __init__(self, threshold: float = 0.0):
 
5
 
6
 
7
  class SiluAndMul(nn.Module):
8
+ """An activation function for SwiGLU.
9
+
10
+ The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
11
+
12
+ Shapes:
13
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
14
+ return: (num_tokens, d) or (batch_size, seq_len, d)
15
+ """
16
+
17
  can_torch_compile: bool = True
18
 
19
  def forward(self, x: torch.Tensor):
 
24
  return out
25
 
26
 
27
+ class MulAndSilu(nn.Module):
28
+ """An activation function for SwiGLU.
29
+
30
+ The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
31
+
32
+ Shapes:
33
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
34
+ return: (num_tokens, d) or (batch_size, seq_len, d)
35
+ """
36
+
37
+ can_torch_compile: bool = True
38
+
39
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.mul_and_silu(out, x)
44
+ return out
45
+
46
+
47
  class GeluAndMul(nn.Module):
48
+ """An activation function for GeGLU.
49
+
50
+ The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
51
+
52
+ Shapes:
53
+ x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
54
+ return: (batch_size, seq_len, d) or (num_tokens, d)
55
+ """
56
+
57
  can_torch_compile: bool = True
58
 
59
  def forward(self, x: torch.Tensor):
 
76
 
77
 
78
  class FatreluAndMul(nn.Module):
79
+ """An activation function for FATReLU.
80
+
81
+ The function computes x -> FATReLU(x[:d]) * x[d:] where
82
+ d = x.shape[-1] // 2.
83
+ This is used in openbmb/MiniCPM-S-1B-sft.
84
+
85
+ Shapes:
86
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
87
+ return: (num_tokens, d) or (batch_size, seq_len, d)
88
+ """
89
+
90
  can_torch_compile: bool = True
91
 
92
  def __init__(self, threshold: float = 0.0):
build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
10
  return out
11
 
12
 
 
 
 
 
 
13
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
14
  ops.gelu_and_mul(out, x)
15
  return out
 
10
  return out
11
 
12
 
13
+ def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
14
+ ops.mul_and_silu(out, x)
15
+ return out
16
+
17
+
18
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
19
  ops.gelu_and_mul(out, x)
20
  return out
build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_be5bedb.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53ddfb42466bfe01feb98348f5c2d6beefd589aeb3dec4c5c36609e11a6bde4c
3
+ size 2605136
build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e364773259dc1b91f3c0d3b076da83c5a9c6ee18ffdace30315c602dffd1dabe
3
- size 2502264
 
 
 
 
build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e99cc09_dirty
3
- ops = torch.ops._activation_e99cc09_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e99cc09_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_be5bedb
3
+ ops = torch.ops._activation_be5bedb
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_be5bedb::{op_name}"
build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py CHANGED
@@ -23,7 +23,8 @@ class SiluAndMul(nn.Module):
23
  ops.silu_and_mul(out, x)
24
  return out
25
 
26
- class MulAndSilu(CustomOp):
 
27
  """An activation function for SwiGLU.
28
 
29
  The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
@@ -37,11 +38,12 @@ class MulAndSilu(CustomOp):
37
 
38
  def forward(self, x: torch.Tensor) -> torch.Tensor:
39
  d = x.shape[-1] // 2
40
- output_shape = (x.shape[:-1] + (d, ))
41
  out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
42
- self.mul_and_silu(out, x)
43
  return out
44
 
 
45
  class GeluAndMul(nn.Module):
46
  """An activation function for GeGLU.
47
 
 
23
  ops.silu_and_mul(out, x)
24
  return out
25
 
26
+
27
+ class MulAndSilu(nn.Module):
28
  """An activation function for SwiGLU.
29
 
30
  The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
 
38
 
39
  def forward(self, x: torch.Tensor) -> torch.Tensor:
40
  d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
  out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.mul_and_silu(out, x)
44
  return out
45
 
46
+
47
  class GeluAndMul(nn.Module):
48
  """An activation function for GeGLU.
49
 
build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
10
  return out
11
 
12
 
 
 
 
 
 
13
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
14
  ops.gelu_and_mul(out, x)
15
  return out
 
10
  return out
11
 
12
 
13
+ def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
14
+ ops.mul_and_silu(out, x)
15
+ return out
16
+
17
+
18
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
19
  ops.gelu_and_mul(out, x)
20
  return out
build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_be5bedb.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac7174352dea307231f308c84ca32ee001cdbcefd976de860e76501c52aae591
3
+ size 2613776
build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ac88cc0d3c65ab283d20608f3a097be29ee572e7856f10f8d7919536efd95b4
3
- size 2506808
 
 
 
 
build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e99cc09_dirty
3
- ops = torch.ops._activation_e99cc09_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e99cc09_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_be5bedb
3
+ ops = torch.ops._activation_be5bedb
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_be5bedb::{op_name}"
build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
5
 
6
 
7
  class SiluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
8
  can_torch_compile: bool = True
9
 
10
  def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
15
  return out
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  class GeluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
19
  can_torch_compile: bool = True
20
 
21
  def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
38
 
39
 
40
  class FatreluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
 
 
41
  can_torch_compile: bool = True
42
 
43
  def __init__(self, threshold: float = 0.0):
 
5
 
6
 
7
  class SiluAndMul(nn.Module):
8
+ """An activation function for SwiGLU.
9
+
10
+ The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
11
+
12
+ Shapes:
13
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
14
+ return: (num_tokens, d) or (batch_size, seq_len, d)
15
+ """
16
+
17
  can_torch_compile: bool = True
18
 
19
  def forward(self, x: torch.Tensor):
 
24
  return out
25
 
26
 
27
+ class MulAndSilu(nn.Module):
28
+ """An activation function for SwiGLU.
29
+
30
+ The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
31
+
32
+ Shapes:
33
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
34
+ return: (num_tokens, d) or (batch_size, seq_len, d)
35
+ """
36
+
37
+ can_torch_compile: bool = True
38
+
39
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.mul_and_silu(out, x)
44
+ return out
45
+
46
+
47
  class GeluAndMul(nn.Module):
48
+ """An activation function for GeGLU.
49
+
50
+ The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
51
+
52
+ Shapes:
53
+ x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
54
+ return: (batch_size, seq_len, d) or (num_tokens, d)
55
+ """
56
+
57
  can_torch_compile: bool = True
58
 
59
  def forward(self, x: torch.Tensor):
 
76
 
77
 
78
  class FatreluAndMul(nn.Module):
79
+ """An activation function for FATReLU.
80
+
81
+ The function computes x -> FATReLU(x[:d]) * x[d:] where
82
+ d = x.shape[-1] // 2.
83
+ This is used in openbmb/MiniCPM-S-1B-sft.
84
+
85
+ Shapes:
86
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
87
+ return: (num_tokens, d) or (batch_size, seq_len, d)
88
+ """
89
+
90
  can_torch_compile: bool = True
91
 
92
  def __init__(self, threshold: float = 0.0):
build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
10
  return out
11
 
12
 
 
 
 
 
 
13
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
14
  ops.gelu_and_mul(out, x)
15
  return out
 
10
  return out
11
 
12
 
13
+ def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
14
+ ops.mul_and_silu(out, x)
15
+ return out
16
+
17
+
18
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
19
  ops.gelu_and_mul(out, x)
20
  return out
build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (2.5 kB). View file
 
build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (538 Bytes). View file
 
build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc ADDED
Binary file (6.92 kB). View file
 
build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aee7c6869a9e318ad81cb84460c58ca0dac2dc85f4ed739b12fe57641f766332
3
+ size 2546984
build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4f9e647eea40d3d3801d5ee57d4917e4c2e8dbfd87cdfebdc40b1b0a1c571fe
3
- size 2448184
 
 
 
 
build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e99cc09_dirty
3
- ops = torch.ops._activation_e99cc09_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e99cc09_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_be5bedb_dirty
3
+ ops = torch.ops._activation_be5bedb_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_be5bedb_dirty::{op_name}"
build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
5
 
6
 
7
  class SiluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
8
  can_torch_compile: bool = True
9
 
10
  def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
15
  return out
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  class GeluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
19
  can_torch_compile: bool = True
20
 
21
  def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
38
 
39
 
40
  class FatreluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
 
 
41
  can_torch_compile: bool = True
42
 
43
  def __init__(self, threshold: float = 0.0):
 
5
 
6
 
7
  class SiluAndMul(nn.Module):
8
+ """An activation function for SwiGLU.
9
+
10
+ The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
11
+
12
+ Shapes:
13
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
14
+ return: (num_tokens, d) or (batch_size, seq_len, d)
15
+ """
16
+
17
  can_torch_compile: bool = True
18
 
19
  def forward(self, x: torch.Tensor):
 
24
  return out
25
 
26
 
27
+ class MulAndSilu(nn.Module):
28
+ """An activation function for SwiGLU.
29
+
30
+ The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
31
+
32
+ Shapes:
33
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
34
+ return: (num_tokens, d) or (batch_size, seq_len, d)
35
+ """
36
+
37
+ can_torch_compile: bool = True
38
+
39
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.mul_and_silu(out, x)
44
+ return out
45
+
46
+
47
  class GeluAndMul(nn.Module):
48
+ """An activation function for GeGLU.
49
+
50
+ The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
51
+
52
+ Shapes:
53
+ x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
54
+ return: (batch_size, seq_len, d) or (num_tokens, d)
55
+ """
56
+
57
  can_torch_compile: bool = True
58
 
59
  def forward(self, x: torch.Tensor):
 
76
 
77
 
78
  class FatreluAndMul(nn.Module):
79
+ """An activation function for FATReLU.
80
+
81
+ The function computes x -> FATReLU(x[:d]) * x[d:] where
82
+ d = x.shape[-1] // 2.
83
+ This is used in openbmb/MiniCPM-S-1B-sft.
84
+
85
+ Shapes:
86
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
87
+ return: (num_tokens, d) or (batch_size, seq_len, d)
88
+ """
89
+
90
  can_torch_compile: bool = True
91
 
92
  def __init__(self, threshold: float = 0.0):
build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
10
  return out
11
 
12
 
 
 
 
 
 
13
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
14
  ops.gelu_and_mul(out, x)
15
  return out
 
10
  return out
11
 
12
 
13
+ def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
14
+ ops.mul_and_silu(out, x)
15
+ return out
16
+
17
+
18
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
19
  ops.gelu_and_mul(out, x)
20
  return out
build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (2.5 kB). View file
 
build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (538 Bytes). View file
 
build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc ADDED
Binary file (6.92 kB). View file
 
build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f075a6e0d47a2d382d16291b1c5d7d1d98111e2bbc5891b14b627e3c1778b699
3
+ size 2621536
build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2b72ff2a0f2253e4dfe028842b5f15cabf2647d7812bf4662a2de510ca0c489
3
- size 2518632
 
 
 
 
build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e99cc09_dirty
3
- ops = torch.ops._activation_e99cc09_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e99cc09_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_be5bedb_dirty
3
+ ops = torch.ops._activation_be5bedb_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_be5bedb_dirty::{op_name}"
build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
5
 
6
 
7
  class SiluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
8
  can_torch_compile: bool = True
9
 
10
  def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
15
  return out
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  class GeluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
19
  can_torch_compile: bool = True
20
 
21
  def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
38
 
39
 
40
  class FatreluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
 
 
41
  can_torch_compile: bool = True
42
 
43
  def __init__(self, threshold: float = 0.0):
 
5
 
6
 
7
  class SiluAndMul(nn.Module):
8
+ """An activation function for SwiGLU.
9
+
10
+ The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
11
+
12
+ Shapes:
13
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
14
+ return: (num_tokens, d) or (batch_size, seq_len, d)
15
+ """
16
+
17
  can_torch_compile: bool = True
18
 
19
  def forward(self, x: torch.Tensor):
 
24
  return out
25
 
26
 
27
+ class MulAndSilu(nn.Module):
28
+ """An activation function for SwiGLU.
29
+
30
+ The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
31
+
32
+ Shapes:
33
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
34
+ return: (num_tokens, d) or (batch_size, seq_len, d)
35
+ """
36
+
37
+ can_torch_compile: bool = True
38
+
39
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.mul_and_silu(out, x)
44
+ return out
45
+
46
+
47
  class GeluAndMul(nn.Module):
48
+ """An activation function for GeGLU.
49
+
50
+ The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
51
+
52
+ Shapes:
53
+ x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
54
+ return: (batch_size, seq_len, d) or (num_tokens, d)
55
+ """
56
+
57
  can_torch_compile: bool = True
58
 
59
  def forward(self, x: torch.Tensor):
 
76
 
77
 
78
  class FatreluAndMul(nn.Module):
79
+ """An activation function for FATReLU.
80
+
81
+ The function computes x -> FATReLU(x[:d]) * x[d:] where
82
+ d = x.shape[-1] // 2.
83
+ This is used in openbmb/MiniCPM-S-1B-sft.
84
+
85
+ Shapes:
86
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
87
+ return: (num_tokens, d) or (batch_size, seq_len, d)
88
+ """
89
+
90
  can_torch_compile: bool = True
91
 
92
  def __init__(self, threshold: float = 0.0):
build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py CHANGED
@@ -10,6 +10,11 @@ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
10
  return out
11
 
12
 
 
 
 
 
 
13
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
14
  ops.gelu_and_mul(out, x)
15
  return out
 
10
  return out
11
 
12
 
13
+ def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
14
+ ops.mul_and_silu(out, x)
15
+ return out
16
+
17
+
18
  def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
19
  ops.gelu_and_mul(out, x)
20
  return out
build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (2.5 kB). View file
 
build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (538 Bytes). View file
 
build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc ADDED
Binary file (6.92 kB). View file
 
build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc2406aa2fa09dd7bc1fd5e87cdcdf55edfc7e0853fad5f977e2500e08fa8899
3
+ size 3565432
build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_e99cc09_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4590c852899e4c11ddb74cfad61e26b07490a91f3c09e0fb0874a3fcc1f533e
3
- size 3331456
 
 
 
 
build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e99cc09_dirty
3
- ops = torch.ops._activation_e99cc09_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e99cc09_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_be5bedb_dirty
3
+ ops = torch.ops._activation_be5bedb_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_be5bedb_dirty::{op_name}"
build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py CHANGED
@@ -5,6 +5,15 @@ from ._ops import ops
5
 
6
 
7
  class SiluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
8
  can_torch_compile: bool = True
9
 
10
  def forward(self, x: torch.Tensor):
@@ -15,7 +24,36 @@ class SiluAndMul(nn.Module):
15
  return out
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  class GeluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
19
  can_torch_compile: bool = True
20
 
21
  def forward(self, x: torch.Tensor):
@@ -38,6 +76,17 @@ class GeluTanhAndMul(nn.Module):
38
 
39
 
40
  class FatreluAndMul(nn.Module):
 
 
 
 
 
 
 
 
 
 
 
41
  can_torch_compile: bool = True
42
 
43
  def __init__(self, threshold: float = 0.0):
 
5
 
6
 
7
  class SiluAndMul(nn.Module):
8
+ """An activation function for SwiGLU.
9
+
10
+ The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
11
+
12
+ Shapes:
13
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
14
+ return: (num_tokens, d) or (batch_size, seq_len, d)
15
+ """
16
+
17
  can_torch_compile: bool = True
18
 
19
  def forward(self, x: torch.Tensor):
 
24
  return out
25
 
26
 
27
+ class MulAndSilu(nn.Module):
28
+ """An activation function for SwiGLU.
29
+
30
+ The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
31
+
32
+ Shapes:
33
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
34
+ return: (num_tokens, d) or (batch_size, seq_len, d)
35
+ """
36
+
37
+ can_torch_compile: bool = True
38
+
39
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.mul_and_silu(out, x)
44
+ return out
45
+
46
+
47
  class GeluAndMul(nn.Module):
48
+ """An activation function for GeGLU.
49
+
50
+ The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
51
+
52
+ Shapes:
53
+ x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
54
+ return: (batch_size, seq_len, d) or (num_tokens, d)
55
+ """
56
+
57
  can_torch_compile: bool = True
58
 
59
  def forward(self, x: torch.Tensor):
 
76
 
77
 
78
  class FatreluAndMul(nn.Module):
79
+ """An activation function for FATReLU.
80
+
81
+ The function computes x -> FATReLU(x[:d]) * x[d:] where
82
+ d = x.shape[-1] // 2.
83
+ This is used in openbmb/MiniCPM-S-1B-sft.
84
+
85
+ Shapes:
86
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
87
+ return: (num_tokens, d) or (batch_size, seq_len, d)
88
+ """
89
+
90
  can_torch_compile: bool = True
91
 
92
  def __init__(self, threshold: float = 0.0):