7 #include <cuda_runtime.h>
20 int idx = blockIdx.x * blockDim.x + threadIdx.x;
25 double result = data[idx] * scalar;
28 if (!isfinite(result)) {
29 result = (result > 0.0) ? DBL_MAX : -DBL_MAX;
43 int size = rows * cols;
46 int threadsPerBlock = 256;
47 int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
50 multiplyScalarKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, scalar, size);
53 cudaError_t cudaStatus = cudaGetLastError();
54 if (cudaStatus != cudaSuccess) {
55 throw std::runtime_error(
"Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
59 cudaDeviceSynchronize();
void multiply_scalar(double scalar)
Multiplies all elements in the matrix by a scalar.
Defines the Matrix class for GPU-accelerated matrix operations.
__global__ void multiplyScalarKernel(double *data, double scalar, int size)
CUDA kernel for multiplying matrix elements by a scalar.