Spaces:
Build error
Build error
| // AMX type_trais | |
| namespace ggml::cpu::amx { | |
| class tensor_traits : public ggml::cpu::tensor_traits { | |
| bool work_size(int /* n_threads */, const struct ggml_tensor * op, size_t & size) override { | |
| size = ggml_backend_amx_desired_wsize(op); | |
| return true; | |
| } | |
| bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) override { | |
| if (op->op == GGML_OP_MUL_MAT) { | |
| ggml_backend_amx_mul_mat(params, op); | |
| return true; | |
| } | |
| return false; | |
| } | |
| }; | |
| static ggml::cpu::tensor_traits * get_tensor_traits(ggml_backend_buffer_t, struct ggml_tensor *) { | |
| static tensor_traits traits; | |
| return &traits; | |
| } | |
| } // namespace ggml::cpu::amx | |
| // AMX buffer interface | |
| static void ggml_backend_amx_buffer_free_buffer(ggml_backend_buffer_t buffer) { | |
| free(buffer->context); | |
| } | |
| static void * ggml_backend_amx_buffer_get_base(ggml_backend_buffer_t buffer) { | |
| return (void *) (buffer->context); | |
| } | |
| static void ggml_backend_amx_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { | |
| tensor->extra = (void *) ggml::cpu::amx::get_tensor_traits(buffer, tensor); | |
| GGML_UNUSED(buffer); | |
| } | |
| static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, | |
| uint8_t value, size_t offset, size_t size) { | |
| memset((char *) tensor->data + offset, value, size); | |
| GGML_UNUSED(buffer); | |
| } | |
| static void ggml_backend_amx_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, | |
| const void * data, size_t offset, size_t size) { | |
| if (qtype_has_amx_kernels(tensor->type)) { | |
| GGML_LOG_DEBUG("%s: amx repack tensor %s of type %s\n", __func__, tensor->name, ggml_type_name(tensor->type)); | |
| ggml_backend_amx_convert_weight(tensor, data, offset, size); | |
| } else { | |
| memcpy((char *) tensor->data + offset, data, size); | |
| } | |
| GGML_UNUSED(buffer); | |
| } | |
| /* | |
| // need to figure what we need to do with buffer->extra. | |
| static void ggml_backend_amx_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { | |
| GGML_ASSERT(!qtype_has_amx_kernels(tensor->type)); | |
| memcpy(data, (const char *)tensor->data + offset, size); | |
| GGML_UNUSED(buffer); | |
| } | |
| static bool ggml_backend_amx_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) { | |
| if (ggml_backend_buffer_is_host(src->buffer)) { | |
| if (qtype_has_amx_kernels(src->type)) { | |
| ggml_backend_amx_convert_weight(dst, src->data, 0, ggml_nbytes(dst)); | |
| } else { | |
| memcpy(dst->data, src->data, ggml_nbytes(src)); | |
| } | |
| return true; | |
| } | |
| return false; | |
| GGML_UNUSED(buffer); | |
| } | |
| */ | |
| static void ggml_backend_amx_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { | |
| memset(buffer->context, value, buffer->size); | |
| } | |
| static ggml_backend_buffer_i ggml_backend_amx_buffer_interface = { | |
| /* .free_buffer = */ ggml_backend_amx_buffer_free_buffer, | |
| /* .get_base = */ ggml_backend_amx_buffer_get_base, | |
| /* .init_tensor = */ ggml_backend_amx_buffer_init_tensor, | |
| /* .memset_tensor = */ ggml_backend_amx_buffer_memset_tensor, | |
| /* .set_tensor = */ ggml_backend_amx_buffer_set_tensor, | |
| /* .get_tensor = */ nullptr, | |
| /* .cpy_tensor = */ nullptr, | |
| /* .clear = */ ggml_backend_amx_buffer_clear, | |
| /* .reset = */ nullptr, | |
| }; | |
| static const char * ggml_backend_amx_buffer_type_get_name(ggml_backend_buffer_type_t buft) { | |
| return "AMX"; | |
| GGML_UNUSED(buft); | |
| } | |
| static ggml_backend_buffer_t ggml_backend_amx_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { | |
| void * data = ggml_aligned_malloc(size); | |
| if (data == NULL) { | |
| fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size); | |
| return NULL; | |
| } | |
| return ggml_backend_buffer_init(buft, ggml_backend_amx_buffer_interface, data, size); | |
| } | |
| static size_t ggml_backend_amx_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { | |
| return TENSOR_ALIGNMENT; | |
| GGML_UNUSED(buft); | |
| } | |
| namespace ggml::cpu::amx { | |
| class extra_buffer_type : ggml::cpu::extra_buffer_type { | |
| bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override { | |
| // handle only 2d gemm for now | |
| auto is_contiguous_2d = [](const struct ggml_tensor * t) { | |
| return ggml_is_contiguous(t) && t->ne[3] == 1 && t->ne[2] == 1; | |
| }; | |
| if (op->op == GGML_OP_MUL_MAT && is_contiguous_2d(op->src[0]) && // src0 must be contiguous | |
| is_contiguous_2d(op->src[1]) && // src1 must be contiguous | |
| op->src[0]->buffer && op->src[0]->buffer->buft == ggml_backend_amx_buffer_type() && | |
| op->ne[0] % (TILE_N * 2) == 0 && // out_features is 32x | |
| (qtype_has_amx_kernels(op->src[0]->type) || (op->src[0]->type == GGML_TYPE_F16))) { | |
| // src1 must be host buffer | |
| if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) { | |
| return false; | |
| } | |
| // src1 must be float32 | |
| if (op->src[1]->type == GGML_TYPE_F32) { | |
| return true; | |
| } | |
| } | |
| return false; | |
| } | |
| ggml::cpu::tensor_traits * get_tensor_traits(const struct ggml_tensor * op) override { | |
| if (op->op == GGML_OP_MUL_MAT && op->src[0]->buffer && | |
| op->src[0]->buffer->buft == ggml_backend_amx_buffer_type()) { | |
| return (ggml::cpu::tensor_traits *) op->src[0]->extra; | |
| } | |
| return nullptr; | |
| } | |
| }; | |
| } // namespace ggml::cpu::amx | |
| static size_t ggml_backend_amx_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) { | |
| return ggml_backend_amx_get_alloc_size(tensor); | |
| GGML_UNUSED(buft); | |
| } | |
| static bool ggml_amx_init() { | |
| if (syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA)) { | |
| fprintf(stderr, "AMX is not ready to be used!\n"); | |
| return false; | |
| } | |
| return true; | |
| return true; | |
| } | |
| ggml_backend_buffer_type_t ggml_backend_amx_buffer_type() { | |
| static struct ggml_backend_buffer_type ggml_backend_buffer_type_amx = { | |
| /* .iface = */ { | |
| /* .get_name = */ ggml_backend_amx_buffer_type_get_name, | |
| /* .alloc_buffer = */ ggml_backend_amx_buffer_type_alloc_buffer, | |
| /* .get_alignment = */ ggml_backend_amx_buffer_type_get_alignment, | |
| /* .get_max_size = */ nullptr, // defaults to SIZE_MAX | |
| /* .get_alloc_size = */ ggml_backend_amx_buffer_type_get_alloc_size, | |
| /* .is_host = */ nullptr, | |
| }, | |
| /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0), | |
| /* .context = */ new ggml::cpu::amx::extra_buffer_type(), | |
| }; | |
| if (!ggml_amx_init()) { | |
| return nullptr; | |
| } | |
| return &ggml_backend_buffer_type_amx; | |
| } | |