CUDA Networks
matrix_add_vector.cu
Go to the documentation of this file.
1 /**
2  * @file matrix_add_vector.cu
3  * @brief Implementation of the Matrix::add_vector method for GPU-accelerated addition of a vector to each column of a matrix.
4  */
5 
6 #include "matrix.h"
7 #include "vector.h"
8 #include <cuda_runtime.h>
9 #include <stdexcept>
10 #include <string>
11 
12 /**
13  * @brief CUDA kernel for adding a vector to each column of a matrix.
14  * @param m Pointer to the matrix data.
15  * @param v Pointer to the vector data.
16  * @param rows Number of rows in the matrix.
17  * @param cols Number of columns in the matrix.
18  */
19 __global__ void addVectorToMatrixKernel(double* m, const double* v, int rows, int cols) {
20  // Calculate global thread indices
21  int col = blockIdx.x * blockDim.x + threadIdx.x;
22  int row = blockIdx.y * blockDim.y + threadIdx.y;
23 
24  // Check if thread is within matrix bounds
25  if (row < rows && col < cols) {
26  // Calculate index of current matrix element
27  int index = row * cols + col;
28 
29  // Add vector element to matrix element
30  m[index] += v[row];
31  }
32 }
33 
34 /**
35  * @brief Adds a vector to each column of the matrix.
36  * @param v The vector to add.
37  * @throws std::invalid_argument if vector dimension doesn't match matrix rows.
38  */
39 void Matrix::add_vector(const Vector& v) {
40  // Check if vector dimension matches matrix rows
41  if (rows != v.get_rows()) {
42  throw std::invalid_argument("Vector dimension must match matrix rows for addition");
43  }
44 
45  // Define block dimensions
46  dim3 threadsPerBlock(16, 16);
47 
48  // Calculate grid dimensions
49  dim3 numBlocks((cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
50  (rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
51 
52  // Launch CUDA kernel
53  addVectorToMatrixKernel<<<numBlocks, threadsPerBlock>>>(d_data, v.get_data(), rows, cols);
54 
55  // Check for kernel launch errors
56  cudaError_t cudaStatus = cudaGetLastError();
57  if (cudaStatus != cudaSuccess) {
58  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
59  }
60 
61  // Synchronize device
62  cudaDeviceSynchronize();
63 }
void add_vector(const Vector &v)
Adds a vector to each column of the matrix.
Represents a vector with GPU-accelerated operations.
Definition: vector.h:13
int get_rows() const
Get the number of elements in the vector.
double * get_data() const
Get the raw data pointer of the vector.
Defines the Matrix class for GPU-accelerated matrix operations.
__global__ void addVectorToMatrixKernel(double *m, const double *v, int rows, int cols)
CUDA kernel for adding a vector to each column of a matrix.
Defines the Vector class for GPU-accelerated vector operations.