CUDA Networks
vector_divide_scalar.cu
Go to the documentation of this file.
1 /**
2  * @file vector_divide_scalar.cu
3  * @brief Implementation of the Vector::divide_scalar method for GPU-accelerated division of a vector by a scalar.
4  */
5 
6 #include "vector.h"
7 #include <cuda_runtime.h>
8 #include <stdexcept>
9 #include <cfloat>
10 #include <cmath>
11 
12 /**
13  * @brief CUDA kernel for dividing vector elements by a scalar.
14  * @param data Pointer to the vector data.
15  * @param scalar The scalar to divide by.
16  * @param size Total number of elements in the vector.
17  */
18 __global__ void vectorDivideScalarKernel(double* data, double scalar, int size) {
19  // Calculate global thread index
20  int idx = blockIdx.x * blockDim.x + threadIdx.x;
21 
22  // Check if thread is within vector bounds
23  if (idx < size) {
24  // Handle division by very small numbers
25  if (fabs(scalar) < DBL_EPSILON) {
26  // If data is zero, keep it zero
27  // Otherwise, set to max or min based on sign
28  data[idx] = (data[idx] == 0.0) ? 0.0 : ((data[idx] > 0.0) ? DBL_MAX : -DBL_MAX);
29  }
30  // Handle very large numbers
31  else if (fabs(data[idx]) > DBL_MAX / 2) {
32  // Preserve sign and set to max value
33  data[idx] = (data[idx] > 0.0) ? DBL_MAX : -DBL_MAX;
34  }
35  // Regular division for normal cases
36  else {
37  data[idx] /= scalar;
38  }
39  }
40 }
41 
42 /**
43  * @brief Divides all elements in the vector by a scalar.
44  * @param scalar The scalar to divide by.
45  * @throws std::invalid_argument if scalar is exactly zero.
46  */
47 void Vector::divide_scalar(double scalar) {
48  // Check for division by exactly zero
49  if (scalar == 0.0) {
50  throw std::invalid_argument("Cannot divide by exactly zero");
51  }
52 
53  // Calculate total number of elements
54  int size = rows;
55 
56  // Define block and grid dimensions
57  int threadsPerBlock = 256;
58  int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
59 
60  // Launch CUDA kernel
61  vectorDivideScalarKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, scalar, size);
62 
63  // Check for kernel launch errors
64  cudaError_t cudaStatus = cudaGetLastError();
65  if (cudaStatus != cudaSuccess) {
66  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
67  }
68 
69  // Synchronize device
70  cudaDeviceSynchronize();
71 }
void divide_scalar(double scalar)
Divides all elements in the vector by a scalar.
Defines the Vector class for GPU-accelerated vector operations.
__global__ void vectorDivideScalarKernel(double *data, double scalar, int size)
CUDA kernel for dividing vector elements by a scalar.