7 #include <cuda_runtime.h>
16 __global__
void sigmoidKernel(
const double* input,
double* output,
int size) {
18 int idx = blockIdx.x * blockDim.x + threadIdx.x;
23 output[idx] = 1.0 / (1.0 + exp(-input[idx]));
36 int size = rows * cols;
39 int threadsPerBlock = 256;
42 int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
45 sigmoidKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.d_data, size);
48 cudaDeviceSynchronize();
Represents a matrix with GPU-accelerated operations.
Matrix sigmoid() const
Applies the sigmoid activation function to the matrix.
Defines the Matrix class for GPU-accelerated matrix operations.
__global__ void sigmoidKernel(const double *input, double *output, int size)
CUDA kernel for applying the sigmoid activation function element-wise.