CUDA Networks
Public Member Functions | List of all members
Matrix Class Reference

Represents a matrix with GPU-accelerated operations. More...

#include <matrix.h>

Public Member Functions

 Matrix (int rows, int cols)
 Construct a new Matrix object. More...
 
 Matrix (const Matrix &other)
 Copy constructor. More...
 
 Matrix (Matrix &&other) noexcept
 Move constructor. More...
 
Matrixoperator= (const Matrix &other)
 Copy assignment operator. More...
 
Matrixoperator= (Matrix &&other) noexcept
 Move assignment operator. More...
 
 ~Matrix ()
 Destroy the Matrix object. More...
 
void initialize ()
 Initialize the matrix (typically sets all elements to zero) More...
 
void randomize ()
 Randomize the matrix elements with values between -0.5 and 0.5. More...
 
void print (int decimals)
 Print the matrix contents. More...
 
int get_rows () const
 Get the number of rows in the matrix. More...
 
int get_cols () const
 Get the number of columns in the matrix. More...
 
double * get_data () const
 Get the raw data pointer of the matrix. More...
 
void read_csv (const char *filename)
 Read data from a CSV file into the matrix. More...
 
void read_csv_limited (const char *filename, int startRow, int endRow, int fileRows, int fileCols)
 Read a subset of data from a CSV file into the matrix. More...
 
void preview_image (int row_index, int image_size_x, int image_size_y) const
 Preview a single image from the matrix. More...
 
Matrix relu () const
 Applies the ReLU activation function to the matrix. More...
 
Matrix relu_derivative () const
 Applies the derivative of the ReLU activation function to the matrix. More...
 
Matrix sigmoid () const
 Applies the sigmoid activation function to the matrix. More...
 
Matrix sigmoid_derivative () const
 Applies the derivative of the sigmoid activation function to the matrix. More...
 
Matrix softmax () const
 Applies the softmax function to the matrix column-wise. More...
 
Matrix copy () const
 Creates a deep copy of the matrix. More...
 
Matrix multiply (const Matrix &other) const
 Multiplies this matrix with another matrix. More...
 
Matrix multiply_elementwise (const Matrix &other) const
 Performs element-wise multiplication with another matrix. More...
 
void add_vector (const Vector &v)
 Adds a vector to each column of the matrix. More...
 
Matrix subtract (const Matrix &other) const
 Subtracts another matrix from this matrix. More...
 
double sum () const
 Sums all elements in the matrix. More...
 
void divide_scalar (double scalar)
 Divides all elements in the matrix by a scalar. More...
 
void multiply_scalar (double scalar)
 Multiplies all elements in the matrix by a scalar. More...
 
Vector argmax () const
 Computes the argmax of each column in the matrix. More...
 
Matrix transpose () const
 Transposes the matrix and returns a new Matrix object. More...
 
Matrix select_batch (int start_row, int end_row, int start_col, int end_col) const
 Selects a subset of the matrix based on specified row and column ranges. More...
 

Detailed Description

Represents a matrix with GPU-accelerated operations.

Definition at line 18 of file matrix.h.

Constructor & Destructor Documentation

◆ Matrix() [1/3]

Matrix::Matrix ( int  rows,
int  cols 
)

Construct a new Matrix object.

Parameters
rowsNumber of rows in the matrix
colsNumber of columns in the matrix

Definition at line 8 of file matrix_constructor.cu.

8  : rows(rows), cols(cols) {
9  // Allocate memory on the GPU for the matrix data
10  // The size is calculated as rows * cols * sizeof(double)
11  cudaMalloc(&d_data, rows * cols * sizeof(double));
12 }

◆ Matrix() [2/3]

Matrix::Matrix ( const Matrix other)

Copy constructor.

Parameters
otherThe matrix to copy from

Definition at line 9 of file matrix_copy.cu.

9  : rows(other.rows), cols(other.cols) {
10  // Allocate new memory on the device
11  cudaMalloc(&d_data, rows * cols * sizeof(double));
12  // Copy data from the other matrix to this one
13  cudaMemcpy(d_data, other.d_data, rows * cols * sizeof(double), cudaMemcpyDeviceToDevice);
14 }

◆ Matrix() [3/3]

Matrix::Matrix ( Matrix &&  other)
noexcept

Move constructor.

Parameters
otherThe matrix to move from

Definition at line 33 of file matrix_copy.cu.

34  : rows(other.rows), cols(other.cols), d_data(other.d_data) {
35  // Transfer ownership and reset the source object
36  other.d_data = nullptr;
37  other.rows = 0;
38  other.cols = 0;
39 }

◆ ~Matrix()

Matrix::~Matrix ( )

Destroy the Matrix object.

Definition at line 8 of file matrix_destructor.cu.

8  {
9  // Free the GPU memory allocated for this matrix
10  cudaFree(d_data);
11 }

Member Function Documentation

◆ add_vector()

void Matrix::add_vector ( const Vector v)

Adds a vector to each column of the matrix.

Parameters
vThe vector to add.
Exceptions
std::invalid_argumentif vector dimension doesn't match matrix rows.

Definition at line 39 of file matrix_add_vector.cu.

39  {
40  // Check if vector dimension matches matrix rows
41  if (rows != v.get_rows()) {
42  throw std::invalid_argument("Vector dimension must match matrix rows for addition");
43  }
44 
45  // Define block dimensions
46  dim3 threadsPerBlock(16, 16);
47 
48  // Calculate grid dimensions
49  dim3 numBlocks((cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
50  (rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
51 
52  // Launch CUDA kernel
53  addVectorToMatrixKernel<<<numBlocks, threadsPerBlock>>>(d_data, v.get_data(), rows, cols);
54 
55  // Check for kernel launch errors
56  cudaError_t cudaStatus = cudaGetLastError();
57  if (cudaStatus != cudaSuccess) {
58  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
59  }
60 
61  // Synchronize device
62  cudaDeviceSynchronize();
63 }
int get_rows() const
Get the number of elements in the vector.
double * get_data() const
Get the raw data pointer of the vector.

◆ argmax()

Vector Matrix::argmax ( ) const

Computes the argmax of each column in the matrix.

Launches the argmax_GPU kernel to perform column-wise argmax on the matrix.

Returns
A Vector containing the row indices of the maximum values for each column.

Definition at line 44 of file matrix_argmax.cu.

44  {
45  // Create a result vector on the device
46  Vector result(cols);
47 
48  // Define grid and block sizes
49  int threadsPerBlock = 256;
50  int blocksPerGrid = (cols + threadsPerBlock - 1) / threadsPerBlock;
51 
52  // Launch the argmax kernel on the device
53  argmax_GPU<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.get_data(), rows, cols);
54 
55  // Ensure the kernel execution is complete
56  cudaDeviceSynchronize();
57 
58  return result;
59 }
Represents a vector with GPU-accelerated operations.
Definition: vector.h:13

◆ copy()

Matrix Matrix::copy ( ) const

Creates a deep copy of the matrix.

Returns
A new Matrix object with the same content as the original.

Definition at line 59 of file matrix_copy.cu.

59  {
60  // Use the copy constructor to create a deep copy
61  return *this;
62 }

◆ divide_scalar()

void Matrix::divide_scalar ( double  scalar)

Divides all elements in the matrix by a scalar.

Parameters
scalarThe scalar to divide by.
Exceptions
std::invalid_argumentif scalar is zero.
Parameters
scalarThe scalar to divide by.
Exceptions
std::invalid_argumentif scalar is exactly zero.

Definition at line 47 of file matrix_divide_scalar.cu.

47  {
48  // Check for division by exactly zero
49  if (scalar == 0.0) {
50  throw std::invalid_argument("Cannot divide by exactly zero");
51  }
52 
53  // Calculate total number of elements
54  int size = rows * cols;
55 
56  // Define block and grid dimensions
57  int threadsPerBlock = 256;
58  int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
59 
60  // Launch CUDA kernel
61  divideScalarKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, scalar, size);
62 
63  // Check for kernel launch errors
64  cudaError_t cudaStatus = cudaGetLastError();
65  if (cudaStatus != cudaSuccess) {
66  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
67  }
68 
69  // Synchronize device
70  cudaDeviceSynchronize();
71 }

◆ get_cols()

int Matrix::get_cols ( ) const

Get the number of columns in the matrix.

Returns
int Number of columns

Definition at line 7 of file matrix_get_cols.cu.

7  {
8  // Return the number of columns in the matrix
9  return cols;
10 }

◆ get_data()

double * Matrix::get_data ( ) const

Get the raw data pointer of the matrix.

Returns
double* Pointer to the matrix data on the device

Definition at line 7 of file matrix_get_data.cu.

7  {
8  // Return the pointer to the GPU memory
9  return d_data;
10 }

◆ get_rows()

int Matrix::get_rows ( ) const

Get the number of rows in the matrix.

Returns
int Number of rows

Definition at line 7 of file matrix_get_rows.cu.

7  {
8  // Return the number of rows in the matrix
9  return rows;
10 }

◆ initialize()

void Matrix::initialize ( )

Initialize the matrix (typically sets all elements to zero)

Definition at line 8 of file matrix_initialize.cu.

8  {
9  // Use cudaMemset to set all elements of d_data to 0
10  cudaMemset(d_data, 0, rows * cols * sizeof(double));
11 }

◆ multiply()

Matrix Matrix::multiply ( const Matrix other) const

Multiplies this matrix with another matrix.

Parameters
otherThe matrix to multiply with.
Returns
A new Matrix object containing the result of the multiplication.
Exceptions
std::invalid_argumentif matrix dimensions are incompatible for multiplication.

Definition at line 51 of file matrix_multiply.cu.

51  {
52  // Check if matrices can be multiplied
53  if (cols != other.rows) {
54  throw std::invalid_argument("Matrix dimensions are incompatible for multiplication");
55  }
56 
57  // Create result matrix
58  Matrix result(rows, other.cols);
59 
60  // Define block dimensions
61  dim3 threadsPerBlock(16, 16);
62 
63  // Calculate grid dimensions
64  dim3 numBlocks((other.cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
65  (rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
66 
67  // Launch CUDA kernel
68  matrixMultiplyKernel<<<numBlocks, threadsPerBlock>>>(d_data,
69  other.d_data,
70  result.d_data, rows,
71  cols,
72  other.cols);
73 
74  // Check for kernel launch errors
75  cudaError_t cudaStatus = cudaGetLastError();
76  if (cudaStatus != cudaSuccess) {
77  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
78  }
79 
80  // Synchronize device
81  cudaDeviceSynchronize();
82 
83  return result;
84 }
Represents a matrix with GPU-accelerated operations.
Definition: matrix.h:18

◆ multiply_elementwise()

Matrix Matrix::multiply_elementwise ( const Matrix other) const

Performs element-wise multiplication with another matrix.

Parameters
otherThe matrix to multiply element-wise with.
Returns
A new Matrix object containing the result of the element-wise multiplication.
Exceptions
std::invalid_argumentif matrix dimensions are not identical.

Definition at line 40 of file matrix_multiply_elementwise.cu.

40  {
41  // Check if matrices have identical dimensions
42  if (rows != other.rows || cols != other.cols) {
43  throw std::invalid_argument("Matrix dimensions must be identical for element-wise multiplication");
44  }
45 
46  // Create result matrix
47  Matrix result(rows, cols);
48 
49  // Define block dimensions
50  dim3 threadsPerBlock(16, 16);
51 
52  // Calculate grid dimensions
53  dim3 numBlocks((cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
54  (rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
55 
56  // Launch CUDA kernel
57  matrixMultiplyElementwiseKernel<<<numBlocks, threadsPerBlock>>>(d_data, other.d_data, result.d_data, rows, cols);
58 
59  // Check for kernel launch errors
60  cudaError_t cudaStatus = cudaGetLastError();
61  if (cudaStatus != cudaSuccess) {
62  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
63  }
64 
65  // Synchronize device
66  cudaDeviceSynchronize();
67 
68  return result;
69 }

◆ multiply_scalar()

void Matrix::multiply_scalar ( double  scalar)

Multiplies all elements in the matrix by a scalar.

Parameters
scalarThe scalar to multiply by.

Definition at line 41 of file matrix_multiply_scalar.cu.

41  {
42  // Calculate total number of elements
43  int size = rows * cols;
44 
45  // Define block and grid dimensions
46  int threadsPerBlock = 256;
47  int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
48 
49  // Launch CUDA kernel
50  multiplyScalarKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, scalar, size);
51 
52  // Check for kernel launch errors
53  cudaError_t cudaStatus = cudaGetLastError();
54  if (cudaStatus != cudaSuccess) {
55  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
56  }
57 
58  // Synchronize device
59  cudaDeviceSynchronize();
60 }

◆ operator=() [1/2]

Matrix & Matrix::operator= ( const Matrix other)

Copy assignment operator.

Parameters
otherThe matrix to copy from
Returns
Reference to this matrix

Definition at line 16 of file matrix_copy.cu.

16  {
17  if (this != &other) { // Protect against self-assignment
18  // Free existing memory
19  cudaFree(d_data);
20 
21  // Copy dimensions
22  rows = other.rows;
23  cols = other.cols;
24 
25  // Allocate new memory
26  cudaMalloc(&d_data, rows * cols * sizeof(double));
27  // Copy data from the other matrix
28  cudaMemcpy(d_data, other.d_data, rows * cols * sizeof(double), cudaMemcpyDeviceToDevice);
29  }
30  return *this;
31 }

◆ operator=() [2/2]

Matrix & Matrix::operator= ( Matrix &&  other)
noexcept

Move assignment operator.

Parameters
otherThe matrix to move from
Returns
Reference to this matrix

Definition at line 41 of file matrix_copy.cu.

41  {
42  if (this != &other) { // Protect against self-assignment
43  // Free existing memory
44  cudaFree(d_data);
45 
46  // Transfer ownership
47  rows = other.rows;
48  cols = other.cols;
49  d_data = other.d_data;
50 
51  // Reset the source object
52  other.d_data = nullptr;
53  other.rows = 0;
54  other.cols = 0;
55  }
56  return *this;
57 }

◆ preview_image()

void Matrix::preview_image ( int  row_index,
int  image_size_x,
int  image_size_y 
) const

Preview a single image from the matrix.

Parameters
row_indexIndex of the row containing the image data
image_size_xNumber of rows in the image
image_size_yNumber of columns in the image

Definition at line 11 of file matrix_preview_image.cu.

11  {
12  // Check if the row_index is within the valid range
13  if (row_index < 0 || row_index >= rows) {
14  throw std::out_of_range("Invalid row index");
15  }
16 
17  // Check if the image dimensions fit within the matrix columns
18  if (image_size_x * image_size_y > cols) {
19  throw std::invalid_argument("Image dimensions exceed matrix column count");
20  }
21 
22  // Allocate host memory to store a single row of the matrix
23  double* h_data = new double[cols];
24 
25  // Copy the specified row from device (GPU) memory to host memory
26  cudaMemcpy(h_data, d_data + row_index * cols, cols * sizeof(double), cudaMemcpyDeviceToHost);
27 
28  // Iterate over each row of the image
29  for (int i = 0; i < image_size_x; ++i) {
30  // Iterate over each column of the image
31  for (int j = 0; j < image_size_y; ++j) {
32  // Calculate the index in the flattened array
33  int index = i * image_size_y + j;
34 
35  // Round the pixel value to the nearest integer
36  int value = static_cast<int>(std::round(h_data[index]));
37 
38  // Print spaces for zero values (background)
39  if (value == 0) {
40  std::cout << " ";
41  } else {
42  // Print non-zero values with 3-digit width
43  std::cout << std::setw(3) << value << " ";
44  }
45  }
46  // Move to the next line after each row of the image
47  std::cout << std::endl;
48  }
49  // Print an extra newline for separation
50  std::cout << std::endl;
51 
52  // Free the allocated host memory
53  delete[] h_data;
54 }

◆ print()

void Matrix::print ( int  decimals)

Print the matrix contents.

Parameters
decimalsNumber of decimal places to display

Definition at line 11 of file matrix_print.cu.

11  {
12  // Create format string for desired number of decimals
13  char format[20];
14  sprintf(format, "%%.%df", decimals);
15 
16  // Allocate host memory to copy the data from GPU
17  double* h_data = new double[rows * cols];
18  cudaMemcpy(h_data, d_data, rows * cols * sizeof(double), cudaMemcpyDeviceToHost);
19 
20  // Print matrix dimensions
21  std::cout << "Matrix with " << rows << " rows and " << cols << " columns:\n";
22 
23  // Print column labels
24  std::cout << "\t";
25  for (int j = 0; j < cols; ++j) {
26  if (j == 4 && cols > 8) {
27  std::cout << "...\t";
28  j = cols - 4; // Skip to the last 4 columns
29  }
30  std::cout << j << ":\t";
31  }
32  std::cout << "\n";
33 
34  // Iterate over rows
35  for (int i = 0; i < rows; ++i) {
36  if (i == 5 && rows > 10) {
37  std::cout << "...\n\t";
38  for (int k = 0; k < cols; ++k) {
39  if (k == 4 && cols > 8) {
40  std::cout << "...\t";
41  k = cols - 4;
42  }
43  std::cout << "...\t";
44  }
45  std::cout << "\n";
46  i = rows - 5; // Jump to the last 5 rows
47  }
48 
49  // Print row index
50  std::cout << i << ":\t";
51 
52  // Print each element in the row
53  for (int j = 0; j < cols; ++j) {
54  if (j == 4 && cols > 8) {
55  std::cout << "...\t";
56  j = cols - 4; // Skip to the last 4 columns
57  }
58  printf(format, h_data[i * cols + j]);
59  std::cout << "\t";
60  }
61  std::cout << "\n";
62  }
63 
64  // Free the allocated host memory
65  delete[] h_data;
66  std::cout << std::endl;
67 }

◆ randomize()

void Matrix::randomize ( )

Randomize the matrix elements with values between -0.5 and 0.5.

Fills the matrix with random values between -0.5 and 0.5.

Definition at line 43 of file matrix_randomize.cu.

43  {
44  // Calculate the total number of elements in the matrix
45  int totalElements = rows * cols;
46 
47  // Define the number of threads per block (a common choice for good occupancy)
48  int threadsPerBlock = 256;
49 
50  // Calculate the number of blocks needed to cover all elements
51  // We use ceiling division to ensure we have enough blocks
52  int blocksPerGrid = (totalElements + threadsPerBlock - 1) / threadsPerBlock;
53 
54  // Generate a seed for the random number generator
55  // We use the current time to ensure different seeds across runs
56  unsigned long seed = time(NULL);
57 
58  // Launch the CUDA kernel
59  randomizeKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, rows, cols, seed);
60 
61  // Wait for the kernel to complete before returning
62  // This ensures all random values are generated before any subsequent operations
63  cudaDeviceSynchronize();
64 }

◆ read_csv()

void Matrix::read_csv ( const char *  filename)

Read data from a CSV file into the matrix.

Parameters
filenamePath to the CSV file

Definition at line 12 of file matrix_read_csv.cu.

12  {
13  // Open the CSV file
14  std::ifstream file(filename);
15  if (!file.is_open()) {
16  throw std::runtime_error("Error opening file");
17  }
18 
19  // Vector to temporarily store the data read from the CSV
20  std::vector<double> temp_data;
21  std::string line, value;
22 
23  // Read the CSV file line by line
24  while (std::getline(file, line)) {
25  // Create a string stream from the current line
26  std::istringstream s(line);
27 
28  // Parse each value in the line, separated by commas
29  while (std::getline(s, value, ',')) {
30  // Convert the string value to double and add it to the temporary vector
31  temp_data.push_back(std::stod(value));
32  }
33  }
34 
35  // Check if the number of values read matches the matrix dimensions
36  if (temp_data.size() != rows * cols) {
37  throw std::runtime_error("CSV data size does not match matrix dimensions");
38  }
39 
40  // Copy data from the temporary vector to the device (GPU) memory
41  cudaMemcpy(d_data, temp_data.data(), rows * cols * sizeof(double), cudaMemcpyHostToDevice);
42 }

◆ read_csv_limited()

void Matrix::read_csv_limited ( const char *  filename,
int  startRow,
int  endRow,
int  fileRows,
int  fileCols 
)

Read a subset of data from a CSV file into the matrix.

Parameters
filenamePath to the CSV file
startRowStarting row to read from the CSV file (0-based index)
endRowEnding row to read from the CSV file (exclusive)
fileRowsTotal number of rows in the CSV file
fileColsTotal number of columns in the CSV file

Definition at line 12 of file matrix_read_csv_limited.cu.

16  {
17  // Open the CSV file
18  std::ifstream file(filename);
19  if (!file.is_open()) {
20  throw std::runtime_error("Error opening file");
21  }
22 
23  // Check if the specified range is valid
24  if (startRow < 0 || endRow > fileRows || startRow >= endRow) {
25  throw std::runtime_error("Invalid row range specified");
26  }
27 
28  // Check if the matrix dimensions match the specified range and file columns
29  if (rows != endRow - startRow || cols != fileCols) {
30  throw std::runtime_error("Matrix dimensions do not match the specified range and file columns");
31  }
32 
33  // Vector to temporarily store the data read from the CSV
34  std::vector<double> temp_data(rows * cols);
35  std::string line, value;
36  int currentRow = 0;
37 
38  // Read the CSV file line by line
39  while (std::getline(file, line) && currentRow < fileRows) {
40  // Process only the rows within the specified range
41  if (currentRow >= startRow && currentRow < endRow) {
42  std::istringstream s(line);
43  for (int col = 0; col < fileCols; ++col) {
44  // Parse each value in the line, separated by commas
45  if (!std::getline(s, value, ',')) {
46  throw std::runtime_error("Insufficient columns in CSV file");
47  }
48  // Convert the string value to double and store it in the temporary vector
49  temp_data[(currentRow - startRow) * cols + col] = std::stod(value);
50  }
51  }
52  currentRow++;
53  }
54 
55  // Check if we read enough rows
56  if (currentRow < endRow) {
57  throw std::runtime_error("Insufficient rows in CSV file");
58  }
59 
60  // Copy data from the temporary vector to the device (GPU) memory
61  cudaMemcpy(d_data, temp_data.data(), rows * cols * sizeof(double), cudaMemcpyHostToDevice);
62 }

◆ relu()

Matrix Matrix::relu ( ) const

Applies the ReLU activation function to the matrix.

Returns
A new Matrix object with ReLU applied.

Definition at line 30 of file matrix_relu.cu.

30  {
31  // Create a new matrix with the same dimensions
32  Matrix result(rows, cols);
33 
34  // Calculate the total number of elements
35  int size = rows * cols;
36 
37  // Define the number of threads per block
38  int threadsPerBlock = 256;
39 
40  // Calculate the number of blocks needed
41  int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
42 
43  // Launch the CUDA kernel
44  reluKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.d_data, size);
45 
46  // Synchronize to ensure the kernel execution is complete
47  cudaDeviceSynchronize();
48 
49  return result;
50 }

◆ relu_derivative()

Matrix Matrix::relu_derivative ( ) const

Applies the derivative of the ReLU activation function to the matrix.

Applies the ReLU derivative function to the matrix.

Returns
A new Matrix object with ReLU derivative applied.

Definition at line 30 of file matrix_relu_derivative.cu.

30  {
31  // Create a new matrix with the same dimensions
32  Matrix result(rows, cols);
33 
34  // Calculate the total number of elements
35  int size = rows * cols;
36 
37  // Define the number of threads per block
38  int threadsPerBlock = 256;
39 
40  // Calculate the number of blocks needed
41  int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
42 
43  // Launch the CUDA kernel
44  reluDerivativeKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.d_data, size);
45 
46  // Synchronize to ensure the kernel execution is complete
47  cudaDeviceSynchronize();
48 
49  return result;
50 }

◆ select_batch()

Matrix Matrix::select_batch ( int  start_row,
int  end_row,
int  start_col,
int  end_col 
) const

Selects a subset of the matrix based on specified row and column ranges.

Parameters
start_rowStarting row index (inclusive).
end_rowEnding row index (exclusive).
start_colStarting column index (inclusive).
end_colEnding column index (exclusive).
Returns
A new Matrix object containing the selected subset.
Exceptions
std::out_of_rangeif the specified ranges are invalid.

Definition at line 38 of file matrix_select_batch.cu.

38  {
39  // Validate input ranges
40  if (start_row < 0 || end_row > rows || start_col < 0 || end_col > cols ||
41  start_row >= end_row || start_col >= end_col) {
42  throw std::out_of_range("Invalid row or column range specified");
43  }
44 
45  // Calculate dimensions of the selected subset
46  int num_rows = end_row - start_row;
47  int num_cols = end_col - start_col;
48 
49  // Create a new matrix to store the selected subset
50  Matrix result(num_rows, num_cols);
51 
52  // Define block and grid dimensions
53  dim3 threadsPerBlock(16, 16);
54  dim3 numBlocks((num_cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
55  (num_rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
56 
57  // Launch CUDA kernel
58  selectBatchKernel<<<numBlocks, threadsPerBlock>>>(
59  d_data, result.d_data, cols, num_cols, start_row, start_col, num_rows, num_cols
60  );
61 
62  // Check for kernel launch errors
63  cudaError_t cudaStatus = cudaGetLastError();
64  if (cudaStatus != cudaSuccess) {
65  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
66  }
67 
68  // Synchronize device
69  cudaDeviceSynchronize();
70 
71  return result;
72 }

◆ sigmoid()

Matrix Matrix::sigmoid ( ) const

Applies the sigmoid activation function to the matrix.

Returns
A new Matrix object with sigmoid applied.

Definition at line 31 of file matrix_sigmoid.cu.

31  {
32  // Create a new matrix with the same dimensions
33  Matrix result(rows, cols);
34 
35  // Calculate the total number of elements
36  int size = rows * cols;
37 
38  // Define the number of threads per block
39  int threadsPerBlock = 256;
40 
41  // Calculate the number of blocks needed
42  int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
43 
44  // Launch the CUDA kernel
45  sigmoidKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.d_data, size);
46 
47  // Synchronize to ensure the kernel execution is complete
48  cudaDeviceSynchronize();
49 
50  return result;
51 }

◆ sigmoid_derivative()

Matrix Matrix::sigmoid_derivative ( ) const

Applies the derivative of the sigmoid activation function to the matrix.

Applies the sigmoid derivative function to the matrix.

Returns
A new Matrix object with sigmoid derivative applied.

Definition at line 33 of file matrix_sigmoid_derivative.cu.

33  {
34  // Create a new matrix with the same dimensions
35  Matrix result(rows, cols);
36 
37  // Calculate the total number of elements
38  int size = rows * cols;
39 
40  // Define the number of threads per block
41  int threadsPerBlock = 256;
42 
43  // Calculate the number of blocks needed
44  int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
45 
46  // Launch the CUDA kernel
47  sigmoidDerivativeKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.d_data, size);
48 
49  // Synchronize to ensure the kernel execution is complete
50  cudaDeviceSynchronize();
51 
52  return result;
53 }

◆ softmax()

Matrix Matrix::softmax ( ) const

Applies the softmax function to the matrix column-wise.

Returns
A new Matrix object with softmax applied.

Definition at line 53 of file matrix_softmax.cu.

53  {
54  // Create a new matrix with the same dimensions
55  Matrix result(rows, cols);
56 
57  // Define the number of threads per block
58  int threadsPerBlock = 256;
59 
60  // Calculate the number of blocks needed
61  int blocksPerGrid = (cols + threadsPerBlock - 1) / threadsPerBlock;
62 
63  // Launch the CUDA kernel
64  softmaxKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.d_data, rows, cols);
65 
66  // Synchronize to ensure the kernel execution is complete
67  cudaDeviceSynchronize();
68 
69  return result;
70 }

◆ subtract()

Matrix Matrix::subtract ( const Matrix other) const

Subtracts another matrix from this matrix.

Parameters
otherThe matrix to subtract.
Returns
A new Matrix object containing the result of the subtraction.
Exceptions
std::invalid_argumentif matrix dimensions are not identical.

Definition at line 40 of file matrix_subtract.cu.

40  {
41  // Check if matrices have identical dimensions
42  if (rows != other.rows || cols != other.cols) {
43  throw std::invalid_argument("Matrix dimensions must be identical for subtraction");
44  }
45 
46  // Create result matrix
47  Matrix result(rows, cols);
48 
49  // Define block dimensions
50  dim3 threadsPerBlock(16, 16);
51 
52  // Calculate grid dimensions
53  dim3 numBlocks((cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
54  (rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
55 
56  // Launch CUDA kernel
57  matrixSubtractKernel<<<numBlocks, threadsPerBlock>>>(d_data, other.d_data, result.d_data, rows, cols);
58 
59  // Check for kernel launch errors
60  cudaError_t cudaStatus = cudaGetLastError();
61  if (cudaStatus != cudaSuccess) {
62  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
63  }
64 
65  // Synchronize device
66  cudaDeviceSynchronize();
67 
68  return result;
69 }

◆ sum()

double Matrix::sum ( ) const

Sums all elements in the matrix.

Returns
The sum of all elements in the matrix.

Definition at line 15 of file matrix_sum.cu.

15  {
16  // Create a thrust device pointer from the raw CUDA pointer
17  thrust::device_ptr<double> d_ptr(d_data);
18 
19  // Use thrust::reduce to sum all elements
20  double result = thrust::reduce(d_ptr, d_ptr + rows * cols);
21 
22  return result;
23 }

◆ transpose()

Matrix Matrix::transpose ( ) const

Transposes the matrix and returns a new Matrix object.

Transposes the matrix and returns a new Matrix object containing the transposed data.

Returns
A new Matrix object containing the transposed data.
A new Matrix object with transposed dimensions.

Definition at line 36 of file matrix_transpose.cu.

36  {
37  // Create a new matrix to hold the transposed data
38  Matrix result(cols, rows);
39 
40  // Define block dimensions (16x16 is common for matrix operations)
41  dim3 threadsPerBlock(16, 16);
42 
43  // Calculate grid dimensions to cover the entire matrix
44  dim3 numBlocks((cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
45  (rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
46 
47  // Launch the CUDA kernel to perform transposition
48  matrixTransposeKernel<<<numBlocks, threadsPerBlock>>>(d_data, result.d_data, rows, cols);
49 
50  // Check for kernel launch errors
51  cudaError_t cudaStatus = cudaGetLastError();
52  if (cudaStatus != cudaSuccess) {
53  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
54  }
55 
56  // Synchronize device to ensure completion
57  cudaDeviceSynchronize();
58 
59  return result;
60 }

The documentation for this class was generated from the following files: