| # /// script | |
| # requires-python = ">=3.13" | |
| # dependencies = [ | |
| # "kernels", | |
| # "numpy", | |
| # "torch", | |
| # ] | |
| # /// | |
| import platform | |
| from pathlib import Path | |
| import kernels | |
| import torch | |
| # Load the locally built kernel | |
| kernel = kernels.get_local_kernel(Path("build"), "__KERNEL_NAME_NORMALIZED__") | |
| # Select device | |
| if platform.system() == "Darwin": | |
| device = torch.device("mps") | |
| elif hasattr(torch, "xpu") and torch.xpu.is_available(): | |
| device = torch.device("xpu") | |
| elif torch.version.cuda is not None and torch.cuda.is_available(): | |
| device = torch.device("cuda") | |
| else: | |
| device = torch.device("cpu") | |
| print(f"Using device: {device}") | |
| # Create input tensor | |
| x = torch.tensor([1.0, 2.0, 3.0], device=device) | |
| print(f"Input: {x}") | |
| # Run kernel (adds 1 to each element) | |
| result = kernel.__KERNEL_NAME_NORMALIZED__(x) | |
| print(f"Output: {result}") | |
| # Verify result | |
| expected = x + 1.0 | |
| assert torch.allclose(result, expected), "Kernel output doesn't match expected!" | |
| print("Success!") | |