img2gray / csrc /img2gray.cu
drbh
feat: small grayscale kernel
846d481
#include <cstdint>
#include <torch/torch.h>
// Define a kernel to convert RGB to Grayscale
__global__ void img2gray_kernel(const uint8_t* input, uint8_t* output, int width, int height) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < width && y < height) {
int idx = (y * width + x) * 3; // RGB has 3 channels
uint8_t r = input[idx];
uint8_t g = input[idx + 1];
uint8_t b = input[idx + 2];
// Convert to grayscale using luminosity method
uint8_t gray = static_cast<uint8_t>(0.21f * r + 0.72f * g + 0.07f * b);
output[y * width + x] = gray;
}
}
// Define a wrapper for this kernel to align with the PyTorch extension interface
void img2gray_cuda(torch::Tensor input, torch::Tensor output) {
const int width = input.size(1);
const int height = input.size(0);
const dim3 blockSize(16, 16);
const dim3 gridSize((width + blockSize.x - 1) / blockSize.x, (height + blockSize.y - 1) / blockSize.y);
img2gray_kernel<<<gridSize, blockSize>>>(
input.data_ptr<uint8_t>(),
output.data_ptr<uint8_t>(),
width,
height
);
cudaDeviceSynchronize();
}