CUDA Networks
neural_network_backward.cu
Go to the documentation of this file.
1 /**
2  * @file neural_network_backward.cu
3  * @brief Implementation of the NeuralNetwork::backward method.
4  */
5 #include "neural_network.h"
6 #include <iostream>
7 
8 void NeuralNetwork::backward(const Matrix& X, const Matrix& Y) {
9  // Get the number of training examples
10  int m = X.get_cols();
11 
12  // Compute the gradient of the output layer
13  // DZ2 = A2 - Y
14  DZ2 = A2.subtract(Y);
15  // std::cout << "Gradient of output layer DZ2:" << std::endl;
16  // DZ2.print(4);
17 
18  // Compute gradient for W2
19  // DW2 = 1/m * DZ2 * A1^T
20  DW2 = DZ2.multiply(A1.transpose());
21  DW2.divide_scalar(m);
22  // std::cout << "Gradient for W2:" << std::endl;
23  // DW2.print(4);
24 
25  // Compute gradient for b2
26  // db2 = 1/m * sum(DZ2)
27  db2 = DZ2.sum() / m;
28  // std::cout << "Gradient for b2: " << db2 << std::endl;
29 
30  // Compute the gradient of the hidden layer
31  // DZ1 = W2^T * DZ2 .* ReLU'(Z1)
32  Matrix W2_transpose = W2.transpose();
33  DZ1 = W2_transpose.multiply(DZ2);
34  Matrix Z1_relu_derivative = Z1.relu_derivative();
35  DZ1 = DZ1.multiply_elementwise(Z1_relu_derivative);
36  // std::cout << "Gradient of hidden layer DZ1:" << std::endl;
37  // DZ1.print(4);
38 
39  // Compute gradient for W1
40  // DW1 = 1/m * DZ1 * X^T
41  DW1 = DZ1.multiply(X.transpose());
42  DW1.divide_scalar(m);
43  // std::cout << "Gradient for W1:" << std::endl;
44  // DW1.print(4);
45 
46  // Compute gradient for b1
47  // db1 = 1/m * sum(DZ1)
48  db1 = DZ1.sum() / m;
49  // std::cout << "Gradient for b1: " << db1 << std::endl;
50 }
Represents a matrix with GPU-accelerated operations.
Definition: matrix.h:18
void divide_scalar(double scalar)
Divides all elements in the matrix by a scalar.
int get_cols() const
Get the number of columns in the matrix.
Matrix multiply_elementwise(const Matrix &other) const
Performs element-wise multiplication with another matrix.
Matrix transpose() const
Transposes the matrix and returns a new Matrix object.
Matrix subtract(const Matrix &other) const
Subtracts another matrix from this matrix.
Matrix multiply(const Matrix &other) const
Multiplies this matrix with another matrix.
Matrix relu_derivative() const
Applies the derivative of the ReLU activation function to the matrix.
double sum() const
Sums all elements in the matrix.
Definition: matrix_sum.cu:15
void backward(const Matrix &X, const Matrix &Y)
Perform backward propagation through the network.
Defines the NeuralNetwork class for a simple feedforward neural network.