7 #include <cuda_runtime.h>
17 int idx = blockIdx.x * blockDim.x + threadIdx.x;
22 double sigmoid_x = 1.0 / (1.0 + exp(-input[idx]));
25 output[idx] = sigmoid_x * (1.0 - sigmoid_x);
38 int size = rows * cols;
41 int threadsPerBlock = 256;
44 int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
47 sigmoidDerivativeKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.d_data, size);
50 cudaDeviceSynchronize();
Represents a matrix with GPU-accelerated operations.
Matrix sigmoid_derivative() const
Applies the derivative of the sigmoid activation function to the matrix.
Defines the Matrix class for GPU-accelerated matrix operations.
__global__ void sigmoidDerivativeKernel(const double *input, double *output, int size)
CUDA kernel for applying the sigmoid derivative function element-wise.