CUDA Networks
Public Member Functions | List of all members
Vector Class Reference

Represents a vector with GPU-accelerated operations. More...

#include <vector.h>

Public Member Functions

 Vector (int rows)
 Construct a new Vector object. More...
 
 ~Vector ()
 Destroy the Vector object. More...
 
void initialize ()
 Initialize the vector (typically sets all elements to zero) More...
 
void randomize ()
 Randomize the vector elements with values between -0.5 and 0.5. More...
 
void print (int decimals)
 Print the vector contents. More...
 
int get_rows () const
 Get the number of elements in the vector. More...
 
double * get_data () const
 Get the raw data pointer of the vector. More...
 
Vector copy () const
 Creates a deep copy of the vector. More...
 
void multiply_scalar (double scalar)
 Multiplies all elements in the vector by a scalar. More...
 
void divide_scalar (double scalar)
 Divides all elements in the vector by a scalar. More...
 
void subtract_scalar (double scalar)
 Subtracts a scalar value from all elements in the vector. More...
 

Detailed Description

Represents a vector with GPU-accelerated operations.

Definition at line 13 of file vector.h.

Constructor & Destructor Documentation

◆ Vector()

Vector::Vector ( int  rows)

Construct a new Vector object.

Parameters
rowsNumber of elements in the vector

Definition at line 8 of file vector_constructor.cu.

8  : rows(rows) {
9  // Allocate memory on the GPU for the vector data
10  cudaMalloc(&d_data, rows * sizeof(double));
11 }

◆ ~Vector()

Vector::~Vector ( )

Destroy the Vector object.

Definition at line 8 of file vector_destructor.cu.

8  {
9  // Free the GPU memory allocated for this vector
10  cudaFree(d_data);
11 }

Member Function Documentation

◆ copy()

Vector Vector::copy ( ) const

Creates a deep copy of the vector.

Returns
A new Vector object with the same content as the original.

Definition at line 13 of file vector_copy.cu.

13  {
14  // Create a new vector with the same number of rows
15  Vector result(rows);
16 
17  // Copy the data from the current vector to the new vector
18  cudaMemcpy(result.d_data, d_data, rows * sizeof(double), cudaMemcpyDeviceToDevice);
19 
20  // Return the new vector
21  return result;
22 }
Represents a vector with GPU-accelerated operations.
Definition: vector.h:13

◆ divide_scalar()

void Vector::divide_scalar ( double  scalar)

Divides all elements in the vector by a scalar.

Parameters
scalarThe scalar to divide by.
Exceptions
std::invalid_argumentif scalar is exactly zero.

Definition at line 47 of file vector_divide_scalar.cu.

47  {
48  // Check for division by exactly zero
49  if (scalar == 0.0) {
50  throw std::invalid_argument("Cannot divide by exactly zero");
51  }
52 
53  // Calculate total number of elements
54  int size = rows;
55 
56  // Define block and grid dimensions
57  int threadsPerBlock = 256;
58  int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
59 
60  // Launch CUDA kernel
61  vectorDivideScalarKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, scalar, size);
62 
63  // Check for kernel launch errors
64  cudaError_t cudaStatus = cudaGetLastError();
65  if (cudaStatus != cudaSuccess) {
66  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
67  }
68 
69  // Synchronize device
70  cudaDeviceSynchronize();
71 }

◆ get_data()

double * Vector::get_data ( ) const

Get the raw data pointer of the vector.

Returns
double* Pointer to the vector data on the device

Definition at line 7 of file vector_get_data.cu.

7  {
8  // Return the pointer to the GPU memory
9  return d_data;
10 }

◆ get_rows()

int Vector::get_rows ( ) const

Get the number of elements in the vector.

Returns
int Number of elements

Definition at line 7 of file vector_get_rows.cu.

7  {
8  // Return the number of elements in the vector
9  return rows;
10 }

◆ initialize()

void Vector::initialize ( )

Initialize the vector (typically sets all elements to zero)

Definition at line 8 of file vector_initialize.cu.

8  {
9  // Use cudaMemset to set all elements of d_data to 0
10  cudaMemset(d_data, 0, rows * sizeof(double));
11 }

◆ multiply_scalar()

void Vector::multiply_scalar ( double  scalar)

Multiplies all elements in the vector by a scalar.

Parameters
scalarThe scalar to multiply by.

Definition at line 41 of file vector_multiply_scalar.cu.

41  {
42  // Calculate total number of elements
43  int size = rows;
44 
45  // Define block and grid dimensions
46  int threadsPerBlock = 256;
47  int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
48 
49  // Launch CUDA kernel
50  vectorMultiplyScalarKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, scalar, size);
51 
52  // Check for kernel launch errors
53  cudaError_t cudaStatus = cudaGetLastError();
54  if (cudaStatus != cudaSuccess) {
55  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
56  }
57 
58  // Synchronize device
59  cudaDeviceSynchronize();
60 }

◆ print()

void Vector::print ( int  decimals)

Print the vector contents.

Parameters
decimalsNumber of decimal places to display

Definition at line 11 of file vector_print.cu.

11  {
12  // Create format string for desired number of decimals
13  char format[20];
14  sprintf(format, "%%d:\t%%.%df\n", decimals);
15 
16  // Allocate host memory to copy the data from GPU
17  double* h_data = new double[rows];
18  cudaMemcpy(h_data, d_data, rows * sizeof(double), cudaMemcpyDeviceToHost);
19 
20  // Print vector dimensions
21  std::cout << "Vector with " << rows << " rows:\n";
22 
23  // Print column header (since vector is treated as a single column)
24  std::cout << "\t0:\t\n";
25 
26  // Iterate over rows
27  for (int i = 0; i < rows; ++i) {
28  // If more than 10 rows, only print first and last 5
29  if (i == 5 && rows > 10) {
30  std::cout << "...\t...\n";
31  i = rows - 5; // Skip to the last 5 rows
32  }
33  // Print row index and value
34  printf(format, i, h_data[i]);
35  }
36 
37  // Free the allocated host memory
38  delete[] h_data;
39  std::cout << std::endl;
40 }

◆ randomize()

void Vector::randomize ( )

Randomize the vector elements with values between -0.5 and 0.5.

Fills the vector with random values between -0.5 and 0.5.

Definition at line 37 of file vector_randomize.cu.

37  {
38  // Define the number of threads per block (a common choice for good occupancy)
39  int threadsPerBlock = 256;
40 
41  // Calculate the number of blocks needed to cover all elements
42  // We use ceiling division to ensure we have enough blocks
43  int blocksPerGrid = (rows + threadsPerBlock - 1) / threadsPerBlock;
44 
45  // Generate a seed for the random number generator
46  // We use the current time to ensure different seeds across runs
47  unsigned long seed = time(NULL);
48 
49  // Launch the CUDA kernel
50  randomizeKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, rows, seed);
51 
52  // Wait for the kernel to complete before returning
53  // This ensures all random values are generated before any subsequent operations
54  cudaDeviceSynchronize();
55 }

◆ subtract_scalar()

void Vector::subtract_scalar ( double  scalar)

Subtracts a scalar value from all elements in the vector.

Parameters
scalarThe scalar value to subtract.

Definition at line 41 of file vector_subtract_scalar.cu.

41  {
42  // Calculate total number of elements
43  int size = rows;
44 
45  // Define block and grid dimensions
46  int threadsPerBlock = 256;
47  int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
48 
49  // Launch CUDA kernel
50  vectorSubtractScalarKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, scalar, size);
51 
52  // Check for kernel launch errors
53  cudaError_t cudaStatus = cudaGetLastError();
54  if (cudaStatus != cudaSuccess) {
55  throw std::runtime_error("Kernel launch failed: " + std::string(cudaGetErrorString(cudaStatus)));
56  }
57 
58  // Synchronize device
59  cudaDeviceSynchronize();
60 }

The documentation for this class was generated from the following files: