CUDA Networks
neural_network_get_accuracy.cu
Go to the documentation of this file.
1 /**
2  * @file neural_network_get_accuracy.cu
3  * @brief Implementation of the NeuralNetwork::get_accuracy method.
4  */
5 #include "neural_network.h"
6 #include <cuda_runtime.h>
7 #include <iostream>
8 
9 __global__ void calculate_accuracy_kernel(const double* predictions, const double* Y, int size, int* correct_count) {
10  // Calculate global thread index
11  int idx = blockIdx.x * blockDim.x + threadIdx.x;
12 
13  // Check if thread is within bounds
14  if (idx < size) {
15  // Increment correct_count if prediction matches true label
16  if (predictions[idx] == Y[idx]) {
17  atomicAdd(correct_count, 1);
18  }
19  }
20 }
21 
22 double NeuralNetwork::get_accuracy(const Matrix& Y) const {
23  // Get predictions
24  Vector predictions = get_predictions();
25  // std::cout << "Predictions:" << std::endl;
26  // predictions.print(0);
27 
28  // Convert Y matrix to argmax form for comparison
29  Vector Y_argmax = Y.argmax();
30  // std::cout << "True labels (argmax):" << std::endl;
31  // Y_argmax.print(0);
32 
33  // Allocate device memory for correct count
34  int* d_correct_count;
35  cudaMalloc(&d_correct_count, sizeof(int));
36  cudaMemset(d_correct_count, 0, sizeof(int));
37 
38  // Define block and grid dimensions
39  int threadsPerBlock = 256;
40  int blocksPerGrid = (Y.get_cols() + threadsPerBlock - 1) / threadsPerBlock;
41 
42  // Launch kernel to calculate accuracy
43  calculate_accuracy_kernel<<<blocksPerGrid, threadsPerBlock>>>(
44  predictions.get_data(), Y_argmax.get_data(), Y.get_cols(), d_correct_count
45  );
46 
47  // Copy correct count from device to host
48  int h_correct_count;
49  cudaMemcpy(&h_correct_count, d_correct_count, sizeof(int), cudaMemcpyDeviceToHost);
50 
51  // Calculate accuracy
52  double accuracy = static_cast<double>(h_correct_count) / Y.get_cols();
53 
54  // Free device memory
55  cudaFree(d_correct_count);
56 
57  return accuracy;
58 }
Represents a matrix with GPU-accelerated operations.
Definition: matrix.h:18
int get_cols() const
Get the number of columns in the matrix.
Vector argmax() const
Computes the argmax of each column in the matrix.
double get_accuracy(const Matrix &Y) const
Calculate the accuracy of predictions compared to true labels.
Vector get_predictions() const
Get predictions from the output layer (A2)
Represents a vector with GPU-accelerated operations.
Definition: vector.h:13
double * get_data() const
Get the raw data pointer of the vector.
Defines the NeuralNetwork class for a simple feedforward neural network.
__global__ void calculate_accuracy_kernel(const double *predictions, const double *Y, int size, int *correct_count)