6 #include <cuda_runtime.h>
11 int idx = blockIdx.x * blockDim.x + threadIdx.x;
16 if (predictions[idx] == Y[idx]) {
17 atomicAdd(correct_count, 1);
35 cudaMalloc(&d_correct_count,
sizeof(
int));
36 cudaMemset(d_correct_count, 0,
sizeof(
int));
39 int threadsPerBlock = 256;
40 int blocksPerGrid = (Y.
get_cols() + threadsPerBlock - 1) / threadsPerBlock;
43 calculate_accuracy_kernel<<<blocksPerGrid, threadsPerBlock>>>(
49 cudaMemcpy(&h_correct_count, d_correct_count,
sizeof(
int), cudaMemcpyDeviceToHost);
52 double accuracy =
static_cast<double>(h_correct_count) / Y.
get_cols();
55 cudaFree(d_correct_count);
Represents a matrix with GPU-accelerated operations.
int get_cols() const
Get the number of columns in the matrix.
Vector argmax() const
Computes the argmax of each column in the matrix.
double get_accuracy(const Matrix &Y) const
Calculate the accuracy of predictions compared to true labels.
Vector get_predictions() const
Get predictions from the output layer (A2)
Represents a vector with GPU-accelerated operations.
double * get_data() const
Get the raw data pointer of the vector.
Defines the NeuralNetwork class for a simple feedforward neural network.
__global__ void calculate_accuracy_kernel(const double *predictions, const double *Y, int size, int *correct_count)