CUDA Networks
matrix_select_batch.cu
Go to the documentation of this file.
1 /**
2  * @file matrix_select_batch.cu
3  * @brief Implementation of the Matrix::select_batch method for selecting a subset of the matrix.
4  */
5 
6 #include "matrix.h"
7 #include <cuda_runtime.h>
8 #include <stdexcept>
9 
10 /**
11  * @brief CUDA kernel for selecting a subset of the matrix.
12  * @param src Pointer to the source matrix data.
13  * @param dst Pointer to the destination matrix data.
14  * @param src_cols Number of columns in the source matrix.
15  * @param dst_cols Number of columns in the destination matrix.
16  * @param start_row Starting row index.
17  * @param start_col Starting column index.
18  * @param num_rows Number of rows to select.
19  * @param num_cols Number of columns to select.
20  */
21 __global__ void selectBatchKernel(const double* src, double* dst, int src_cols, int dst_cols,
22  int start_row, int start_col, int num_rows, int num_cols) {
23  // Calculate global thread indices
24  int row = blockIdx.y * blockDim.y + threadIdx.y;
25  int col = blockIdx.x * blockDim.x + threadIdx.x;
26 
27  // Check if thread is within the selected subset bounds
28  if (row < num_rows && col < num_cols) {
29  // Calculate source and destination indices
30  int src_idx = (start_row + row) * src_cols + (start_col + col);
31  int dst_idx = row * dst_cols + col;
32 
33  // Copy the element from source to destination
34  dst[dst_idx] = src[src_idx];
35  }
36 }
37 
38 Matrix Matrix::select_batch(int start_row, int end_row, int start_col, int end_col) const {
39  // Validate input ranges
40  if (start_row < 0 || end_row > rows || start_col < 0 || end_col > cols ||
41  start_row >= end_row || start_col >= end_col) {
42  throw std::out_of_range("Invalid row or column range specified");
43  }
44 
45  // Calculate dimensions of the selected subset
46  int num_rows = end_row - start_row;
47  int num_cols = end_col - start_col;
48 
49  // Create a new matrix to store the selected subset
50  Matrix result(num_rows, num_cols);
51 
52  // Define block and grid dimensions
53  dim3 threadsPerBlock(16, 16);
54  dim3 numBlocks((num_cols + threadsPerBlock.x - 1) / threadsPerBlock.x,
55  (num_rows + threadsPerBlock.y - 1) / threadsPerBlock.y);
56 
57  // Launch CUDA kernel
58  selectBatchKernel<<<numBlocks, threadsPerBlock>>>(
59  d_data, result.d_data, cols, num_cols, start_row, start_col, num_rows, num_cols
60  );
61 
62  // Check for kernel launch errors
63  cudaError_t cudaStatus = cudaGetLastError();
64  if (cudaStatus != cudaSuccess) {
65  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
66  }
67 
68  // Synchronize device
69  cudaDeviceSynchronize();
70 
71  return result;
72 }
Represents a matrix with GPU-accelerated operations.
Definition: matrix.h:18
Matrix select_batch(int start_row, int end_row, int start_col, int end_col) const
Selects a subset of the matrix based on specified row and column ranges.
Defines the Matrix class for GPU-accelerated matrix operations.
__global__ void selectBatchKernel(const double *src, double *dst, int src_cols, int dst_cols, int start_row, int start_col, int num_rows, int num_cols)
CUDA kernel for selecting a subset of the matrix.