|
[general] |
|
name = "moe" |
|
universal = false |
|
|
|
[torch] |
|
include = ["."] |
|
pyext = [ |
|
"py", |
|
"json", |
|
] |
|
src = [ |
|
"core/scalar_type.hpp", |
|
"torch-ext/torch_binding.cpp", |
|
"torch-ext/torch_binding.h", |
|
] |
|
|
|
[kernel.moe-marlin] |
|
backend = "cuda" |
|
cuda-capabilities = [ |
|
"8.0", |
|
"8.6", |
|
"8.7", |
|
"8.9", |
|
"9.0", |
|
"10.0", |
|
"10.1", |
|
"12.0", |
|
] |
|
depends = ["torch"] |
|
include = ["."] |
|
src = [ |
|
"core/exception.hpp", |
|
"core/scalar_type.hpp", |
|
"marlin-moe/marlin_moe_ops.cu", |
|
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4.cu", |
|
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku8b128.cu", |
|
"marlin-moe/marlin_kernels/marlin_moe_kernel.h", |
|
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4.h", |
|
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4b8.h", |
|
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4b8.cu", |
|
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku8b128.h", |
|
] |
|
|
|
[kernel.activation] |
|
backend = "cuda" |
|
depends = ["torch"] |
|
src = [ |
|
"activation/activation_kernels.cu", |
|
"activation/cuda_compat.h", |
|
"activation/dispatch_utils.h", |
|
] |
|
|
|
[kernel.fp8] |
|
backend = "cuda" |
|
depends = ["torch"] |
|
include = ["."] |
|
src = [ |
|
"cuda_compat.h", |
|
"dispatch_utils.h", |
|
"fp8/amd/hip_float8.h", |
|
"fp8/amd/hip_float8_impl.h", |
|
"fp8/common.cu", |
|
"fp8/common.cuh", |
|
"fp8/vectorization.cuh", |
|
] |
|
|
|
[kernel.moe] |
|
backend = "cuda" |
|
depends = ["torch"] |
|
src = [ |
|
"cuda_compat.h", |
|
"dispatch_utils.h", |
|
"moe/moe_align_sum_kernels.cu", |
|
"moe/moe_wna16.cu", |
|
"moe/moe_wna16_utils.h", |
|
"moe/topk_softmax_kernels.cu", |
|
] |
|
|