CUDA Networks
matrix_multiply.cu
Go to the documentation of this file.
1 /**
2  * @file matrix_multiply.cu
3  * @brief Implementation of the Matrix::multiply method for GPU-accelerated matrix multiplication.
4  */
5 
6 #include "matrix.h"
7 #include <cuda_runtime.h>
8 #include <stdexcept> // For std::invalid_argument and std::runtime_error
9 #include <string> // For std::string
10 
11 /**
12  * @brief CUDA kernel for matrix multiplication.
13  * @param a Pointer to the first input matrix data.
14  * @param b Pointer to the second input matrix data.
15  * @param c Pointer to the output matrix data.
16  * @param m Number of rows in matrix A.
17  * @param n Number of columns in matrix A / rows in matrix B.
18  * @param k Number of columns in matrix B.
19  */
20 __global__ void matrixMultiplyKernel(const double* a,
21  const double* b,
22  double* c,
23  int m,
24  int n,
25  int k) {
26  // Calculate global thread indices
27  int row = blockIdx.y * blockDim.y + threadIdx.y;
28  int col = blockIdx.x * blockDim.x + threadIdx.x;
29 
30  // Check if thread is within matrix bounds
31  if (row < m && col < k) {
32  // Initialize sum for dot product
33  double sum = 0.0;
34 
35  // Perform dot product of row from A and column from B
36  for (int i = 0; i < n; ++i) {
37  sum += a[row * n + i] * b[i * k + col];
38  }
39 
40  // Store the result in matrix C
41  c[row * k + col] = sum;
42  }
43 }
44 
45 /**
46  * @brief Multiplies this matrix with another matrix.
47  * @param other The matrix to multiply with.
48  * @return A new Matrix object containing the result of the multiplication.
49  * @throws std::invalid_argument if matrix dimensions are incompatible for multiplication.
50  */
51 Matrix Matrix::multiply(const Matrix& other) const {
52  // Check if matrices can be multiplied
53  if (cols != other.rows) {
54  throw std::invalid_argument("Matrix dimensions are incompatible for multiplication");
55  }
56 
57  // Create result matrix
58  Matrix result(rows, other.cols);
59 
60  // Define block dimensions
61  dim3 threadsPerBlock(16, 16);
62 
63  // Calculate grid dimensions
64  dim3 numBlocks((other.cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
65  (rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
66 
67  // Launch CUDA kernel
68  matrixMultiplyKernel<<<numBlocks, threadsPerBlock>>>(d_data,
69  other.d_data,
70  result.d_data, rows,
71  cols,
72  other.cols);
73 
74  // Check for kernel launch errors
75  cudaError_t cudaStatus = cudaGetLastError();
76  if (cudaStatus != cudaSuccess) {
77  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
78  }
79 
80  // Synchronize device
81  cudaDeviceSynchronize();
82 
83  return result;
84 }
Represents a matrix with GPU-accelerated operations.
Definition: matrix.h:18
Matrix multiply(const Matrix &other) const
Multiplies this matrix with another matrix.
Defines the Matrix class for GPU-accelerated matrix operations.
__global__ void matrixMultiplyKernel(const double *a, const double *b, double *c, int m, int n, int k)
CUDA kernel for matrix multiplication.