// Define a kernel to convert RGB to Grayscale | |
__global__ void img2gray_kernel(const uint8_t* input, uint8_t* output, int width, int height) { | |
int x = blockIdx.x * blockDim.x + threadIdx.x; | |
int y = blockIdx.y * blockDim.y + threadIdx.y; | |
if (x < width && y < height) { | |
int idx = (y * width + x) * 3; // RGB has 3 channels | |
uint8_t r = input[idx]; | |
uint8_t g = input[idx + 1]; | |
uint8_t b = input[idx + 2]; | |
// Convert to grayscale using luminosity method | |
uint8_t gray = static_cast<uint8_t>(0.21f * r + 0.72f * g + 0.07f * b); | |
output[y * width + x] = gray; | |
} | |
} | |
// Define a wrapper for this kernel to align with the PyTorch extension interface | |
void img2gray_cuda(torch::Tensor input, torch::Tensor output) { | |
const int width = input.size(1); | |
const int height = input.size(0); | |
const dim3 blockSize(16, 16); | |
const dim3 gridSize((width + blockSize.x - 1) / blockSize.x, (height + blockSize.y - 1) / blockSize.y); | |
img2gray_kernel<<<gridSize, blockSize>>>( | |
input.data_ptr<uint8_t>(), | |
output.data_ptr<uint8_t>(), | |
width, | |
height | |
); | |
cudaDeviceSynchronize(); | |
} |