cuda-networks-docs/neural__network__get__accuracy_8cu_source.html

 /**

  * @file neural_network_get_accuracy.cu

  * @brief Implementation of the NeuralNetwork::get_accuracy method.

  */

 #include "neural_network.h"

 #include <cuda_runtime.h>

 #include <iostream>


 __global__ void calculate_accuracy_kernel(const double* predictions, const double* Y, int size, int* correct_count) {

     // Calculate global thread index

     int idx = blockIdx.x * blockDim.x + threadIdx.x;


     // Check if thread is within bounds

     if (idx < size) {

         // Increment correct_count if prediction matches true label

         if (predictions[idx] == Y[idx]) {

             atomicAdd(correct_count, 1);

         }

     }

 }


 double NeuralNetwork::get_accuracy(const Matrix& Y) const {

     // Get predictions

     Vector predictions = get_predictions();

     // std::cout << "Predictions:" << std::endl;

     // predictions.print(0);


     // Convert Y matrix to argmax form for comparison

     Vector Y_argmax = Y.argmax();

     // std::cout << "True labels (argmax):" << std::endl;

     // Y_argmax.print(0);


     // Allocate device memory for correct count

     int* d_correct_count;

     cudaMalloc(&d_correct_count, sizeof(int));

     cudaMemset(d_correct_count, 0, sizeof(int));


     // Define block and grid dimensions

     int threadsPerBlock = 256;

     int blocksPerGrid = (Y.get_cols() + threadsPerBlock - 1) / threadsPerBlock;


     // Launch kernel to calculate accuracy

     calculate_accuracy_kernel<<<blocksPerGrid, threadsPerBlock>>>(

         predictions.get_data(), Y_argmax.get_data(), Y.get_cols(), d_correct_count

     );


     // Copy correct count from device to host

     int h_correct_count;

     cudaMemcpy(&h_correct_count, d_correct_count, sizeof(int), cudaMemcpyDeviceToHost);


     // Calculate accuracy

     double accuracy = static_cast<double>(h_correct_count) / Y.get_cols();


     // Free device memory

     cudaFree(d_correct_count);


     return accuracy;

 }

Matrix
Represents a matrix with GPU-accelerated operations.
Definition: matrix.h:18

Matrix::get_cols
int get_cols() const
Get the number of columns in the matrix.
Definition: matrix_get_cols.cu:7

Matrix::argmax
Vector argmax() const
Computes the argmax of each column in the matrix.
Definition: matrix_argmax.cu:44

NeuralNetwork::get_accuracy
double get_accuracy(const Matrix &Y) const
Calculate the accuracy of predictions compared to true labels.
Definition: neural_network_get_accuracy.cu:22

NeuralNetwork::get_predictions
Vector get_predictions() const
Get predictions from the output layer (A2)
Definition: neural_network_get_predictions.cu:7

Vector
Represents a vector with GPU-accelerated operations.
Definition: vector.h:13

Vector::get_data
double * get_data() const
Get the raw data pointer of the vector.
Definition: vector_get_data.cu:7

neural_network.h
Defines the NeuralNetwork class for a simple feedforward neural network.

calculate_accuracy_kernel
__global__ void calculate_accuracy_kernel(const double *predictions, const double *Y, int size, int *correct_count)
Definition: neural_network_get_accuracy.cu:9