CUDA Networks
matrix_multiply_elementwise.cu
Go to the documentation of this file.
1 /**
2  * @file matrix_multiply_elementwise.cu
3  * @brief Implementation of the Matrix::multiply_elementwise method for GPU-accelerated element-wise matrix multiplication.
4  */
5 
6 #include "matrix.h"
7 #include <cuda_runtime.h>
8 #include <stdexcept>
9 #include <string>
10 
11 /**
12  * @brief CUDA kernel for element-wise matrix multiplication.
13  * @param a Pointer to the first input matrix data.
14  * @param b Pointer to the second input matrix data.
15  * @param c Pointer to the output matrix data.
16  * @param rows Number of rows in the matrices.
17  * @param cols Number of columns in the matrices.
18  */
19 __global__ void matrixMultiplyElementwiseKernel(const double* a, const double* b, double* c, int rows, int cols) {
20  // Calculate global thread indices
21  int row = blockIdx.y * blockDim.y + threadIdx.y;
22  int col = blockIdx.x * blockDim.x + threadIdx.x;
23 
24  // Check if thread is within matrix bounds
25  if (row < rows && col < cols) {
26  // Calculate index of current element
27  int index = row * cols + col;
28 
29  // Perform element-wise multiplication
30  c[index] = a[index] * b[index];
31  }
32 }
33 
34 /**
35  * @brief Performs element-wise multiplication with another matrix.
36  * @param other The matrix to multiply element-wise with.
37  * @return A new Matrix object containing the result of the element-wise multiplication.
38  * @throws std::invalid_argument if matrix dimensions are not identical.
39  */
41  // Check if matrices have identical dimensions
42  if (rows != other.rows || cols != other.cols) {
43  throw std::invalid_argument("Matrix dimensions must be identical for element-wise multiplication");
44  }
45 
46  // Create result matrix
47  Matrix result(rows, cols);
48 
49  // Define block dimensions
50  dim3 threadsPerBlock(16, 16);
51 
52  // Calculate grid dimensions
53  dim3 numBlocks((cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
54  (rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
55 
56  // Launch CUDA kernel
57  matrixMultiplyElementwiseKernel<<<numBlocks, threadsPerBlock>>>(d_data, other.d_data, result.d_data, rows, cols);
58 
59  // Check for kernel launch errors
60  cudaError_t cudaStatus = cudaGetLastError();
61  if (cudaStatus != cudaSuccess) {
62  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
63  }
64 
65  // Synchronize device
66  cudaDeviceSynchronize();
67 
68  return result;
69 }
Represents a matrix with GPU-accelerated operations.
Definition: matrix.h:18
Matrix multiply_elementwise(const Matrix &other) const
Performs element-wise multiplication with another matrix.
Defines the Matrix class for GPU-accelerated matrix operations.
__global__ void matrixMultiplyElementwiseKernel(const double *a, const double *b, double *c, int rows, int cols)
CUDA kernel for element-wise matrix multiplication.