CUDA Networks
matrix_softmax.cu
Go to the documentation of this file.
1 /**
2  * @file matrix_softmax.cu
3  * @brief Implementation of the softmax function for matrices.
4  */
5 
6 #include "matrix.h"
7 #include <cuda_runtime.h>
8 #include <cfloat>
9 
10 /**
11  * @brief CUDA kernel for applying the softmax function column-wise.
12  * @param input Pointer to the input matrix data.
13  * @param output Pointer to the output matrix data.
14  * @param rows Number of rows in the matrix.
15  * @param cols Number of columns in the matrix.
16  */
17 __global__ void softmaxKernel(const double* input, double* output, int rows, int cols) {
18  // Calculate the column index
19  int col = blockIdx.x * blockDim.x + threadIdx.x;
20 
21  // Check if the column is within bounds
22  if (col < cols) {
23  // Initialize maximum value to negative infinity
24  double max_val = -DBL_MAX;
25 
26  // Find the maximum value in the column
27  for (int row = 0; row < rows; ++row) {
28  max_val = fmax(max_val, input[row * cols + col]);
29  }
30 
31  // Initialize sum of exponentials
32  double sum_exp = 0.0;
33 
34  // Calculate the sum of exponentials
35  for (int row = 0; row < rows; ++row) {
36  sum_exp += exp(input[row * cols + col] - max_val);
37  }
38 
39  // Add a small epsilon to avoid division by zero
40  sum_exp += 1e-15;
41 
42  // Apply softmax function
43  for (int row = 0; row < rows; ++row) {
44  output[row * cols + col] = exp(input[row * cols + col] - max_val) / sum_exp;
45  }
46  }
47 }
48 
49 /**
50  * @brief Applies the softmax function to the matrix column-wise.
51  * @return A new Matrix object with softmax applied.
52  */
54  // Create a new matrix with the same dimensions
55  Matrix result(rows, cols);
56 
57  // Define the number of threads per block
58  int threadsPerBlock = 256;
59 
60  // Calculate the number of blocks needed
61  int blocksPerGrid = (cols + threadsPerBlock - 1) / threadsPerBlock;
62 
63  // Launch the CUDA kernel
64  softmaxKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.d_data, rows, cols);
65 
66  // Synchronize to ensure the kernel execution is complete
67  cudaDeviceSynchronize();
68 
69  return result;
70 }
Represents a matrix with GPU-accelerated operations.
Definition: matrix.h:18
Matrix softmax() const
Applies the softmax function to the matrix column-wise.
Defines the Matrix class for GPU-accelerated matrix operations.
__global__ void softmaxKernel(const double *input, double *output, int rows, int cols)
CUDA kernel for applying the softmax function column-wise.