Represents a matrix with GPU-accelerated operations. More...

#include <matrix.h>

Public Member Functions
	Matrix (int rows, int cols)
	Construct a new Matrix object. More...

	Matrix (const Matrix &other)
	Copy constructor. More...

	Matrix (Matrix &&other) noexcept
	Move constructor. More...

Matrix &	operator= (const Matrix &other)
	Copy assignment operator. More...

Matrix &	operator= (Matrix &&other) noexcept
	Move assignment operator. More...

	~Matrix ()
	Destroy the Matrix object. More...

void	initialize ()
	Initialize the matrix (typically sets all elements to zero) More...

void	randomize ()
	Randomize the matrix elements with values between -0.5 and 0.5. More...

void	print (int decimals)
	Print the matrix contents. More...

int	get_rows () const
	Get the number of rows in the matrix. More...

int	get_cols () const
	Get the number of columns in the matrix. More...

double *	get_data () const
	Get the raw data pointer of the matrix. More...

void	read_csv (const char *filename)
	Read data from a CSV file into the matrix. More...

void	read_csv_limited (const char *filename, int startRow, int endRow, int fileRows, int fileCols)
	Read a subset of data from a CSV file into the matrix. More...

void	preview_image (int row_index, int image_size_x, int image_size_y) const
	Preview a single image from the matrix. More...

Matrix	relu () const
	Applies the ReLU activation function to the matrix. More...

Matrix	relu_derivative () const
	Applies the derivative of the ReLU activation function to the matrix. More...

Matrix	sigmoid () const
	Applies the sigmoid activation function to the matrix. More...

Matrix	sigmoid_derivative () const
	Applies the derivative of the sigmoid activation function to the matrix. More...

Matrix	softmax () const
	Applies the softmax function to the matrix column-wise. More...

Matrix	copy () const
	Creates a deep copy of the matrix. More...

Matrix	multiply (const Matrix &other) const
	Multiplies this matrix with another matrix. More...

Matrix	multiply_elementwise (const Matrix &other) const
	Performs element-wise multiplication with another matrix. More...

void	add_vector (const Vector &v)
	Adds a vector to each column of the matrix. More...

Matrix	subtract (const Matrix &other) const
	Subtracts another matrix from this matrix. More...

double	sum () const
	Sums all elements in the matrix. More...

void	divide_scalar (double scalar)
	Divides all elements in the matrix by a scalar. More...

void	multiply_scalar (double scalar)
	Multiplies all elements in the matrix by a scalar. More...

Vector	argmax () const
	Computes the argmax of each column in the matrix. More...

Matrix	transpose () const
	Transposes the matrix and returns a new Matrix object. More...

Matrix	select_batch (int start_row, int end_row, int start_col, int end_col) const
	Selects a subset of the matrix based on specified row and column ranges. More...

Detailed Description

Represents a matrix with GPU-accelerated operations.

Definition at line 18 of file matrix.h.

Constructor & Destructor Documentation

◆ Matrix() [1/3]

Matrix::Matrix	(	int	rows,
		int	cols
	)

Construct a new Matrix object.

Parameters

rows	Number of rows in the matrix
cols	Number of columns in the matrix

Definition at line 8 of file matrix_constructor.cu.

                                  : rows(rows), cols(cols) {
     // Allocate memory on the GPU for the matrix data
     // The size is calculated as rows * cols * sizeof(double)
     cudaMalloc(&d_data, rows * cols * sizeof(double));
 }

◆ Matrix() [2/3]

Matrix::Matrix ( const Matrix & other )

Copy constructor.

Parameters

other The matrix to copy from

Definition at line 9 of file matrix_copy.cu.

                                   : rows(other.rows), cols(other.cols) {
     // Allocate new memory on the device
     cudaMalloc(&d_data, rows * cols * sizeof(double));
     // Copy data from the other matrix to this one
     cudaMemcpy(d_data, other.d_data, rows * cols * sizeof(double), cudaMemcpyDeviceToDevice);
 }

◆ Matrix() [3/3]

Matrix::Matrix ( Matrix && other )

noexcept

Move constructor.

Parameters

other The matrix to move from

Definition at line 33 of file matrix_copy.cu.

     : rows(other.rows), cols(other.cols), d_data(other.d_data) {
     // Transfer ownership and reset the source object
     other.d_data = nullptr;
     other.rows = 0;
     other.cols = 0;
 }

◆ ~Matrix()

Matrix::~Matrix ( )

Destroy the Matrix object.

Definition at line 8 of file matrix_destructor.cu.

                 {
     // Free the GPU memory allocated for this matrix
     cudaFree(d_data);
 }

Member Function Documentation

◆ add_vector()

void Matrix::add_vector ( const Vector & v )

Adds a vector to each column of the matrix.

Parameters

v	The vector to add.

Exceptions

std::invalid_argument if vector dimension doesn't match matrix rows.

Definition at line 39 of file matrix_add_vector.cu.

                                        {
     // Check if vector dimension matches matrix rows
     if (rows != v.get_rows()) {
         throw std::invalid_argument("Vector dimension must match matrix rows for addition");
     }
  
     // Define block dimensions
     dim3 threadsPerBlock(16, 16);
  
     // Calculate grid dimensions
     dim3 numBlocks((cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
                    (rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
  
     // Launch CUDA kernel
     addVectorToMatrixKernel<<<numBlocks, threadsPerBlock>>>(d_data, v.get_data(), rows, cols);
  
     // Check for kernel launch errors
     cudaError_t cudaStatus = cudaGetLastError();
     if (cudaStatus != cudaSuccess) {
         throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
     }
  
     // Synchronize device
     cudaDeviceSynchronize();
 }

◆ argmax()

Vector Matrix::argmax ( ) const

Computes the argmax of each column in the matrix.

Launches the argmax_GPU kernel to perform column-wise argmax on the matrix.

Returns: A Vector containing the row indices of the maximum values for each column.

Definition at line 44 of file matrix_argmax.cu.

                             {
     // Create a result vector on the device
     Vector result(cols);
  
     // Define grid and block sizes
     int threadsPerBlock = 256;
     int blocksPerGrid = (cols + threadsPerBlock - 1) / threadsPerBlock;
  
     // Launch the argmax kernel on the device
     argmax_GPU<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.get_data(), rows, cols);
  
     // Ensure the kernel execution is complete
     cudaDeviceSynchronize();
  
     return result;
 }

◆ copy()

Matrix Matrix::copy ( ) const

Creates a deep copy of the matrix.

Returns: A new Matrix object with the same content as the original.

Definition at line 59 of file matrix_copy.cu.

                           {
     // Use the copy constructor to create a deep copy
     return *this;
 }

◆ divide_scalar()

void Matrix::divide_scalar ( double scalar )

Divides all elements in the matrix by a scalar.

Parameters

scalar The scalar to divide by.

Exceptions

std::invalid_argument if scalar is zero.

Parameters

scalar The scalar to divide by.

Exceptions

std::invalid_argument if scalar is exactly zero.

Definition at line 47 of file matrix_divide_scalar.cu.

                                         {
     // Check for division by exactly zero
     if (scalar == 0.0) {
         throw std::invalid_argument("Cannot divide by exactly zero");
     }
  
     // Calculate total number of elements
     int size = rows * cols;
  
     // Define block and grid dimensions
     int threadsPerBlock = 256;
     int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
  
     // Launch CUDA kernel
     divideScalarKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, scalar, size);
  
     // Check for kernel launch errors
     cudaError_t cudaStatus = cudaGetLastError();
     if (cudaStatus != cudaSuccess) {
         throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
     }
  
     // Synchronize device
     cudaDeviceSynchronize();
 }

◆ get_cols()

int Matrix::get_cols ( ) const

Get the number of columns in the matrix.

Returns: int Number of columns

Definition at line 7 of file matrix_get_cols.cu.

                            {
     // Return the number of columns in the matrix
     return cols;
 }

◆ get_data()

double * Matrix::get_data ( ) const

Get the raw data pointer of the matrix.

Returns: double* Pointer to the matrix data on the device

Definition at line 7 of file matrix_get_data.cu.

                                {
     // Return the pointer to the GPU memory
     return d_data;
 }

◆ get_rows()

int Matrix::get_rows ( ) const

Get the number of rows in the matrix.

Returns: int Number of rows

Definition at line 7 of file matrix_get_rows.cu.

                            {
     // Return the number of rows in the matrix
     return rows;
 }

◆ initialize()

void Matrix::initialize ( )

Initialize the matrix (typically sets all elements to zero)

Definition at line 8 of file matrix_initialize.cu.

                         {
     // Use cudaMemset to set all elements of d_data to 0
     cudaMemset(d_data, 0, rows * cols * sizeof(double));
 }

◆ multiply()

Matrix Matrix::multiply ( const Matrix & other ) const

Multiplies this matrix with another matrix.

Parameters

other The matrix to multiply with.

Returns: A new Matrix object containing the result of the multiplication.

Exceptions

std::invalid_argument if matrix dimensions are incompatible for multiplication.

Definition at line 51 of file matrix_multiply.cu.

                                                  {
     // Check if matrices can be multiplied
     if (cols != other.rows) {
         throw std::invalid_argument("Matrix dimensions are incompatible for multiplication");
     }
  
     // Create result matrix
     Matrix result(rows, other.cols);
  
     // Define block dimensions
     dim3 threadsPerBlock(16, 16);
  
     // Calculate grid dimensions
     dim3 numBlocks((other.cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
                    (rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
  
     // Launch CUDA kernel
     matrixMultiplyKernel<<<numBlocks, threadsPerBlock>>>(d_data,
                                                          other.d_data,
                                                          result.d_data, rows,
                                                          cols,
                                                          other.cols);
  
     // Check for kernel launch errors
     cudaError_t cudaStatus = cudaGetLastError();
     if (cudaStatus != cudaSuccess) {
         throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
     }
  
     // Synchronize device
     cudaDeviceSynchronize();
  
     return result;
 }

◆ multiply_elementwise()

Matrix Matrix::multiply_elementwise ( const Matrix & other ) const

Performs element-wise multiplication with another matrix.

Parameters

other The matrix to multiply element-wise with.

Returns: A new Matrix object containing the result of the element-wise multiplication.

Exceptions

std::invalid_argument if matrix dimensions are not identical.

Definition at line 40 of file matrix_multiply_elementwise.cu.

                                                              {
     // Check if matrices have identical dimensions
     if (rows != other.rows || cols != other.cols) {
         throw std::invalid_argument("Matrix dimensions must be identical for element-wise multiplication");
     }
  
     // Create result matrix
     Matrix result(rows, cols);
  
     // Define block dimensions
     dim3 threadsPerBlock(16, 16);
  
     // Calculate grid dimensions
     dim3 numBlocks((cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
                    (rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
  
     // Launch CUDA kernel
     matrixMultiplyElementwiseKernel<<<numBlocks, threadsPerBlock>>>(d_data, other.d_data, result.d_data, rows, cols);
  
     // Check for kernel launch errors
     cudaError_t cudaStatus = cudaGetLastError();
     if (cudaStatus != cudaSuccess) {
         throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
     }
  
     // Synchronize device
     cudaDeviceSynchronize();
  
     return result;
 }

◆ multiply_scalar()

void Matrix::multiply_scalar ( double scalar )

Multiplies all elements in the matrix by a scalar.

Parameters

scalar The scalar to multiply by.

Definition at line 41 of file matrix_multiply_scalar.cu.

                                           {
     // Calculate total number of elements
     int size = rows * cols;
  
     // Define block and grid dimensions
     int threadsPerBlock = 256;
     int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
  
     // Launch CUDA kernel
     multiplyScalarKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, scalar, size);
  
     // Check for kernel launch errors
     cudaError_t cudaStatus = cudaGetLastError();
     if (cudaStatus != cudaSuccess) {
         throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
     }
  
     // Synchronize device
     cudaDeviceSynchronize();
 }

◆ operator=() [1/2]

Matrix & Matrix::operator= ( const Matrix & other )

Copy assignment operator.

Parameters

other The matrix to copy from

Returns: Reference to this matrix

Definition at line 16 of file matrix_copy.cu.

                                              {
     if (this != &other) {  // Protect against self-assignment
         // Free existing memory
         cudaFree(d_data);
         
         // Copy dimensions
         rows = other.rows;
         cols = other.cols;
         
         // Allocate new memory
         cudaMalloc(&d_data, rows * cols * sizeof(double));
         // Copy data from the other matrix
         cudaMemcpy(d_data, other.d_data, rows * cols * sizeof(double), cudaMemcpyDeviceToDevice);
     }
     return *this;
 }

◆ operator=() [2/2]

Matrix & Matrix::operator= ( Matrix && other )

noexcept

Move assignment operator.

Parameters

other The matrix to move from

Returns: Reference to this matrix

Definition at line 41 of file matrix_copy.cu.

                                                  {
     if (this != &other) {  // Protect against self-assignment
         // Free existing memory
         cudaFree(d_data);
         
         // Transfer ownership
         rows = other.rows;
         cols = other.cols;
         d_data = other.d_data;
         
         // Reset the source object
         other.d_data = nullptr;
         other.rows = 0;
         other.cols = 0;
     }
     return *this;
 }

◆ preview_image()

void Matrix::preview_image	(	int	row_index,
		int	image_size_x,
		int	image_size_y
	)		const

Preview a single image from the matrix.

Parameters

row_index	Index of the row containing the image data
image_size_x	Number of rows in the image
image_size_y	Number of columns in the image

Definition at line 11 of file matrix_preview_image.cu.

                                                                                   {
     // Check if the row_index is within the valid range
     if (row_index < 0 || row_index >= rows) {
         throw std::out_of_range("Invalid row index");
     }
  
     // Check if the image dimensions fit within the matrix columns
     if (image_size_x * image_size_y > cols) {
         throw std::invalid_argument("Image dimensions exceed matrix column count");
     }
  
     // Allocate host memory to store a single row of the matrix
     double* h_data = new double[cols];
  
     // Copy the specified row from device (GPU) memory to host memory
     cudaMemcpy(h_data, d_data + row_index * cols, cols * sizeof(double), cudaMemcpyDeviceToHost);
  
     // Iterate over each row of the image
     for (int i = 0; i < image_size_x; ++i) {
         // Iterate over each column of the image
         for (int j = 0; j < image_size_y; ++j) {
             // Calculate the index in the flattened array
             int index = i * image_size_y + j;
  
             // Round the pixel value to the nearest integer
             int value = static_cast<int>(std::round(h_data[index]));
  
             // Print spaces for zero values (background)
             if (value == 0) {
                 std::cout << "    ";
             } else {
                 // Print non-zero values with 3-digit width
                 std::cout << std::setw(3) << value << " ";
             }
         }
         // Move to the next line after each row of the image
         std::cout << std::endl;
     }
     // Print an extra newline for separation
     std::cout << std::endl;
  
     // Free the allocated host memory
     delete[] h_data;
 }

◆ print()

void Matrix::print ( int decimals )

Print the matrix contents.

Parameters

decimals Number of decimal places to display

Definition at line 11 of file matrix_print.cu.

                                {
     // Create format string for desired number of decimals
     char format[20];
     sprintf(format, "%%.%df", decimals);
  
     // Allocate host memory to copy the data from GPU
     double* h_data = new double[rows * cols];
     cudaMemcpy(h_data, d_data, rows * cols * sizeof(double), cudaMemcpyDeviceToHost);
  
     // Print matrix dimensions
     std::cout << "Matrix with " << rows << " rows and " << cols << " columns:\n";
  
     // Print column labels
     std::cout << "\t";
     for (int j = 0; j < cols; ++j) {
         if (j == 4 && cols > 8) {
             std::cout << "...\t";
             j = cols - 4;  // Skip to the last 4 columns
         }
         std::cout << j << ":\t";
     }
     std::cout << "\n";
  
     // Iterate over rows
     for (int i = 0; i < rows; ++i) {
         if (i == 5 && rows > 10) {
             std::cout << "...\n\t";
             for (int k = 0; k < cols; ++k) {
                 if (k == 4 && cols > 8) {
                     std::cout << "...\t";
                     k = cols - 4;
                 }
                 std::cout << "...\t";
             }
             std::cout << "\n";
             i = rows - 5;  // Jump to the last 5 rows
         }
  
         // Print row index
         std::cout << i << ":\t";
  
         // Print each element in the row
         for (int j = 0; j < cols; ++j) {
             if (j == 4 && cols > 8) {
                 std::cout << "...\t";
                 j = cols - 4;  // Skip to the last 4 columns
             }
             printf(format, h_data[i * cols + j]);
             std::cout << "\t";
         }
         std::cout << "\n";
     }
  
     // Free the allocated host memory
     delete[] h_data;
     std::cout << std::endl;
 }

◆ randomize()

void Matrix::randomize ( )

Randomize the matrix elements with values between -0.5 and 0.5.

Fills the matrix with random values between -0.5 and 0.5.

Definition at line 43 of file matrix_randomize.cu.

                        {
     // Calculate the total number of elements in the matrix
     int totalElements = rows * cols;
  
     // Define the number of threads per block (a common choice for good occupancy)
     int threadsPerBlock = 256;
  
     // Calculate the number of blocks needed to cover all elements
     // We use ceiling division to ensure we have enough blocks
     int blocksPerGrid = (totalElements + threadsPerBlock - 1) / threadsPerBlock;
  
     // Generate a seed for the random number generator
     // We use the current time to ensure different seeds across runs
     unsigned long seed = time(NULL);
  
     // Launch the CUDA kernel
     randomizeKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, rows, cols, seed);
  
     // Wait for the kernel to complete before returning
     // This ensures all random values are generated before any subsequent operations
     cudaDeviceSynchronize();
 }

◆ read_csv()

void Matrix::read_csv ( const char * filename )

Read data from a CSV file into the matrix.

Parameters

filename Path to the CSV file

Definition at line 12 of file matrix_read_csv.cu.

                                           {
     // Open the CSV file
     std::ifstream file(filename);
     if (!file.is_open()) {
         throw std::runtime_error("Error opening file");
     }
  
     // Vector to temporarily store the data read from the CSV
     std::vector<double> temp_data;
     std::string line, value;
  
     // Read the CSV file line by line
     while (std::getline(file, line)) {
         // Create a string stream from the current line
         std::istringstream s(line);
         
         // Parse each value in the line, separated by commas
         while (std::getline(s, value, ',')) {
             // Convert the string value to double and add it to the temporary vector
             temp_data.push_back(std::stod(value));
         }
     }
  
     // Check if the number of values read matches the matrix dimensions
     if (temp_data.size() != rows * cols) {
         throw std::runtime_error("CSV data size does not match matrix dimensions");
     }
  
     // Copy data from the temporary vector to the device (GPU) memory
     cudaMemcpy(d_data, temp_data.data(), rows * cols * sizeof(double), cudaMemcpyHostToDevice);
 }

◆ read_csv_limited()

void Matrix::read_csv_limited	(	const char *	filename,
		int	startRow,
		int	endRow,
		int	fileRows,
		int	fileCols
	)

Read a subset of data from a CSV file into the matrix.

Parameters

filename	Path to the CSV file
startRow	Starting row to read from the CSV file (0-based index)
endRow	Ending row to read from the CSV file (exclusive)
fileRows	Total number of rows in the CSV file
fileCols	Total number of columns in the CSV file

Definition at line 12 of file matrix_read_csv_limited.cu.

                                             {
     // Open the CSV file
     std::ifstream file(filename);
     if (!file.is_open()) {
         throw std::runtime_error("Error opening file");
     }
  
     // Check if the specified range is valid
     if (startRow < 0 || endRow > fileRows || startRow >= endRow) {
         throw std::runtime_error("Invalid row range specified");
     }
  
     // Check if the matrix dimensions match the specified range and file columns
     if (rows != endRow - startRow || cols != fileCols) {
         throw std::runtime_error("Matrix dimensions do not match the specified range and file columns");
     }
  
     // Vector to temporarily store the data read from the CSV
     std::vector<double> temp_data(rows * cols);
     std::string line, value;
     int currentRow = 0;
  
     // Read the CSV file line by line
     while (std::getline(file, line) && currentRow < fileRows) {
         // Process only the rows within the specified range
         if (currentRow >= startRow && currentRow < endRow) {
             std::istringstream s(line);
             for (int col = 0; col < fileCols; ++col) {
                 // Parse each value in the line, separated by commas
                 if (!std::getline(s, value, ',')) {
                     throw std::runtime_error("Insufficient columns in CSV file");
                 }
                 // Convert the string value to double and store it in the temporary vector
                 temp_data[(currentRow - startRow) * cols + col] = std::stod(value);
             }
         }
         currentRow++;
     }
  
     // Check if we read enough rows
     if (currentRow < endRow) {
         throw std::runtime_error("Insufficient rows in CSV file");
     }
  
     // Copy data from the temporary vector to the device (GPU) memory
     cudaMemcpy(d_data, temp_data.data(), rows * cols * sizeof(double), cudaMemcpyHostToDevice);
 }

◆ relu()

Matrix Matrix::relu ( ) const

Applies the ReLU activation function to the matrix.

Returns: A new Matrix object with ReLU applied.

Definition at line 30 of file matrix_relu.cu.

                           {
     // Create a new matrix with the same dimensions
     Matrix result(rows, cols);
     
     // Calculate the total number of elements
     int size = rows * cols;
     
     // Define the number of threads per block
     int threadsPerBlock = 256;
     
     // Calculate the number of blocks needed
     int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
     
     // Launch the CUDA kernel
     reluKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.d_data, size);
     
     // Synchronize to ensure the kernel execution is complete
     cudaDeviceSynchronize();
     
     return result;
 }

◆ relu_derivative()

Matrix Matrix::relu_derivative ( ) const

Applies the derivative of the ReLU activation function to the matrix.

Applies the ReLU derivative function to the matrix.

Returns: A new Matrix object with ReLU derivative applied.

Definition at line 30 of file matrix_relu_derivative.cu.

                                      {
     // Create a new matrix with the same dimensions
     Matrix result(rows, cols);
     
     // Calculate the total number of elements
     int size = rows * cols;
     
     // Define the number of threads per block
     int threadsPerBlock = 256;
     
     // Calculate the number of blocks needed
     int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
     
     // Launch the CUDA kernel
     reluDerivativeKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.d_data, size);
     
     // Synchronize to ensure the kernel execution is complete
     cudaDeviceSynchronize();
     
     return result;
 }

◆ select_batch()

Matrix Matrix::select_batch	(	int	start_row,
		int	end_row,
		int	start_col,
		int	end_col
	)		const

Selects a subset of the matrix based on specified row and column ranges.

Parameters

start_row	Starting row index (inclusive).
end_row	Ending row index (exclusive).
start_col	Starting column index (inclusive).
end_col	Ending column index (exclusive).

Returns: A new Matrix object containing the selected subset.

Exceptions

std::out_of_range if the specified ranges are invalid.

Definition at line 38 of file matrix_select_batch.cu.

                                                                                         {
     // Validate input ranges
     if (start_row < 0 || end_row > rows || start_col < 0 || end_col > cols ||
         start_row >= end_row || start_col >= end_col) {
         throw std::out_of_range("Invalid row or column range specified");
     }
  
     // Calculate dimensions of the selected subset
     int num_rows = end_row - start_row;
     int num_cols = end_col - start_col;
  
     // Create a new matrix to store the selected subset
     Matrix result(num_rows, num_cols);
  
     // Define block and grid dimensions
     dim3 threadsPerBlock(16, 16);
     dim3 numBlocks((num_cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
                    (num_rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
  
     // Launch CUDA kernel
     selectBatchKernel<<<numBlocks, threadsPerBlock>>>(
         d_data, result.d_data, cols, num_cols, start_row, start_col, num_rows, num_cols
     );
  
     // Check for kernel launch errors
     cudaError_t cudaStatus = cudaGetLastError();
     if (cudaStatus != cudaSuccess) {
         throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
     }
  
     // Synchronize device
     cudaDeviceSynchronize();
  
     return result;
 }

◆ sigmoid()

Matrix Matrix::sigmoid ( ) const

Applies the sigmoid activation function to the matrix.

Returns: A new Matrix object with sigmoid applied.

Definition at line 31 of file matrix_sigmoid.cu.

                              {
     // Create a new matrix with the same dimensions
     Matrix result(rows, cols);
     
     // Calculate the total number of elements
     int size = rows * cols;
     
     // Define the number of threads per block
     int threadsPerBlock = 256;
     
     // Calculate the number of blocks needed
     int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
     
     // Launch the CUDA kernel
     sigmoidKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.d_data, size);
     
     // Synchronize to ensure the kernel execution is complete
     cudaDeviceSynchronize();
     
     return result;
 }

◆ sigmoid_derivative()

Matrix Matrix::sigmoid_derivative ( ) const

Applies the derivative of the sigmoid activation function to the matrix.

Applies the sigmoid derivative function to the matrix.

Returns: A new Matrix object with sigmoid derivative applied.

Definition at line 33 of file matrix_sigmoid_derivative.cu.

                                         {
     // Create a new matrix with the same dimensions
     Matrix result(rows, cols);
     
     // Calculate the total number of elements
     int size = rows * cols;
     
     // Define the number of threads per block
     int threadsPerBlock = 256;
     
     // Calculate the number of blocks needed
     int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
     
     // Launch the CUDA kernel
     sigmoidDerivativeKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.d_data, size);
     
     // Synchronize to ensure the kernel execution is complete
     cudaDeviceSynchronize();
     
     return result;
 }

◆ softmax()

Matrix Matrix::softmax ( ) const

Applies the softmax function to the matrix column-wise.

Returns: A new Matrix object with softmax applied.

Definition at line 53 of file matrix_softmax.cu.

                              {
     // Create a new matrix with the same dimensions
     Matrix result(rows, cols);
     
     // Define the number of threads per block
     int threadsPerBlock = 256;
     
     // Calculate the number of blocks needed
     int blocksPerGrid = (cols + threadsPerBlock - 1) / threadsPerBlock;
     
     // Launch the CUDA kernel
     softmaxKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, result.d_data, rows, cols);
     
     // Synchronize to ensure the kernel execution is complete
     cudaDeviceSynchronize();
     
     return result;
 }

◆ subtract()

Matrix Matrix::subtract ( const Matrix & other ) const

Subtracts another matrix from this matrix.

Parameters

other The matrix to subtract.

Returns: A new Matrix object containing the result of the subtraction.

Exceptions

std::invalid_argument if matrix dimensions are not identical.

Definition at line 40 of file matrix_subtract.cu.

                                                  {
     // Check if matrices have identical dimensions
     if (rows != other.rows || cols != other.cols) {
         throw std::invalid_argument("Matrix dimensions must be identical for subtraction");
     }
  
     // Create result matrix
     Matrix result(rows, cols);
  
     // Define block dimensions
     dim3 threadsPerBlock(16, 16);
  
     // Calculate grid dimensions
     dim3 numBlocks((cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
                    (rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
  
     // Launch CUDA kernel
     matrixSubtractKernel<<<numBlocks, threadsPerBlock>>>(d_data, other.d_data, result.d_data, rows, cols);
  
     // Check for kernel launch errors
     cudaError_t cudaStatus = cudaGetLastError();
     if (cudaStatus != cudaSuccess) {
         throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
     }
  
     // Synchronize device
     cudaDeviceSynchronize();
  
     return result;
 }

◆ sum()

double Matrix::sum ( ) const

Sums all elements in the matrix.

Returns: The sum of all elements in the matrix.

Definition at line 15 of file matrix_sum.cu.

                          {
     // Create a thrust device pointer from the raw CUDA pointer
     thrust::device_ptr<double> d_ptr(d_data);
     
     // Use thrust::reduce to sum all elements
     double result = thrust::reduce(d_ptr, d_ptr + rows * cols);
     
     return result;
 }

◆ transpose()

Matrix Matrix::transpose ( ) const

Transposes the matrix and returns a new Matrix object.

Transposes the matrix and returns a new Matrix object containing the transposed data.

Returns: A new Matrix object containing the transposed data.; A new Matrix object with transposed dimensions.

Definition at line 36 of file matrix_transpose.cu.

                                {
     // Create a new matrix to hold the transposed data
     Matrix result(cols, rows);
  
     // Define block dimensions (16x16 is common for matrix operations)
     dim3 threadsPerBlock(16, 16);
  
     // Calculate grid dimensions to cover the entire matrix
     dim3 numBlocks((cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
                    (rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
  
     // Launch the CUDA kernel to perform transposition
     matrixTransposeKernel<<<numBlocks, threadsPerBlock>>>(d_data, result.d_data, rows, cols);
  
     // Check for kernel launch errors
     cudaError_t cudaStatus = cudaGetLastError();
     if (cudaStatus != cudaSuccess) {
         throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
     }
  
     // Synchronize device to ensure completion
     cudaDeviceSynchronize();
  
     return result;
 }

The documentation for this class was generated from the following files:

src/linear_algebra/matrix.h
src/linear_algebra/matrix_add_vector.cu
src/linear_algebra/matrix_argmax.cu
src/linear_algebra/matrix_constructor.cu
src/linear_algebra/matrix_copy.cu
src/linear_algebra/matrix_destructor.cu
src/linear_algebra/matrix_divide_scalar.cu
src/linear_algebra/matrix_get_cols.cu
src/linear_algebra/matrix_get_data.cu
src/linear_algebra/matrix_get_rows.cu
src/linear_algebra/matrix_initialize.cu
src/linear_algebra/matrix_multiply.cu
src/linear_algebra/matrix_multiply_elementwise.cu
src/linear_algebra/matrix_multiply_scalar.cu
src/linear_algebra/matrix_preview_image.cu
src/linear_algebra/matrix_print.cu
src/linear_algebra/matrix_randomize.cu
src/linear_algebra/matrix_read_csv.cu
src/linear_algebra/matrix_read_csv_limited.cu
src/linear_algebra/matrix_relu.cu
src/linear_algebra/matrix_relu_derivative.cu
src/linear_algebra/matrix_select_batch.cu
src/linear_algebra/matrix_sigmoid.cu
src/linear_algebra/matrix_sigmoid_derivative.cu
src/linear_algebra/matrix_softmax.cu
src/linear_algebra/matrix_subtract.cu
src/linear_algebra/matrix_sum.cu
src/linear_algebra/matrix_transpose.cu

Public Member Functions

Detailed Description

Constructor & Destructor Documentation

◆ Matrix() [1/3]

◆ Matrix() [2/3]

◆ Matrix() [3/3]

◆ ~Matrix()

Member Function Documentation

◆ add_vector()

◆ argmax()

◆ copy()

◆ divide_scalar()

◆ get_cols()

◆ get_data()

◆ get_rows()

◆ initialize()

◆ multiply()

◆ multiply_elementwise()

◆ multiply_scalar()

◆ operator=() [1/2]

◆ operator=() [2/2]

◆ preview_image()

◆ print()

◆ randomize()

◆ read_csv()

◆ read_csv_limited()

◆ relu()

◆ relu_derivative()

◆ select_batch()

◆ sigmoid()

◆ sigmoid_derivative()

◆ softmax()

◆ subtract()

◆ sum()

◆ transpose()