CUDA Networks
matrix_relu_derivative.cu
Go to the documentation of this file.
1 /**
2  * @file matrix_relu_derivative.cu
3  * @brief Implementation of the ReLU derivative function for matrices.
4  */
5 
6 #include "matrix.h"
7 #include <cuda_runtime.h>
8 
9 /**
10  * @brief CUDA kernel for applying the ReLU derivative function element-wise.
11  * @param input Pointer to the input matrix data.
12  * @param output Pointer to the output matrix data.
13  * @param size Total number of elements in the matrix.
14  */
15 __global__ void reluDerivativeKernel(const double* input, double* output, int size) {
16  // Calculate the global thread index
17  int idx = blockIdx.x * blockDim.x + threadIdx.x;
18 
19  // Check if the thread is within the matrix bounds
20  if (idx < size) {
21  // Apply ReLU derivative: 1 if x > 0, 0 otherwise
22  output[idx] = (input[idx] > 0.0) ? 1.0 : 0.0;
23  }
24 }
25 
26 /**
27  * @brief Applies the ReLU derivative function to the matrix.
28  * @return A new Matrix object with ReLU derivative applied.
29  */
31  // Create a new matrix with the same dimensions
32  Matrix result(rows, cols);
33 
34  // Calculate the total number of elements
35  int size = rows * cols;
36 
37  // Define the number of threads per block
38  int threadsPerBlock = 256;
39 
40  // Calculate the number of blocks needed
41  int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
42 
43  // Launch the CUDA kernel
44  reluDerivativeKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.d_data, size);
45 
46  // Synchronize to ensure the kernel execution is complete
47  cudaDeviceSynchronize();
48 
49  return result;
50 }
Represents a matrix with GPU-accelerated operations.
Definition: matrix.h:18
Matrix relu_derivative() const
Applies the derivative of the ReLU activation function to the matrix.
Defines the Matrix class for GPU-accelerated matrix operations.
__global__ void reluDerivativeKernel(const double *input, double *output, int size)
CUDA kernel for applying the ReLU derivative function element-wise.