CUDA Networks
neural_network_initialize.cu
Go to the documentation of this file.
1 /**
2  * @file neural_network_initialize.cu
3  * @brief Implementation of the NeuralNetwork::initialize method.
4  */
5 
6 #include "neural_network.h"
7 #include <cmath>
8 
10  // Initialize W1 with random values
11  W1.randomize();
12  // Scale W1 by sqrt(2.0 / input_size) for better initial performance
13  W1.multiply_scalar(std::sqrt(2.0 / input_size));
14 
15  // Initialize b1 with random values
16  b1.randomize();
17  // Scale b1 by 0.01 to keep initial values small
18  b1.multiply_scalar(0.01);
19 
20  // Initialize W2 with random values
21  W2.randomize();
22  // Scale W2 by sqrt(2.0 / hidden_size) for better initial performance
23  W2.multiply_scalar(std::sqrt(2.0 / hidden_size));
24 
25  // Initialize b2 with random values
26  b2.randomize();
27  // Scale b2 by 0.01 to keep initial values small
28  b2.multiply_scalar(0.01);
29 
30  // Initialize other matrices with zeros
31  A.initialize();
32  Z1.initialize();
33  A1.initialize();
34  Z2.initialize();
35  A2.initialize();
36  DZ2.initialize();
37  DW2.initialize();
38  DZ1.initialize();
39  DW1.initialize();
40 
41  // Initialize scalar gradients to zero
42  db1 = 0.0;
43  db2 = 0.0;
44 }
void initialize()
Initialize the matrix (typically sets all elements to zero)
void randomize()
Randomize the matrix elements with values between -0.5 and 0.5.
void multiply_scalar(double scalar)
Multiplies all elements in the matrix by a scalar.
void initialize()
Initialize the neural network parameters.
void randomize()
Randomize the vector elements with values between -0.5 and 0.5.
void multiply_scalar(double scalar)
Multiplies all elements in the vector by a scalar.
Defines the NeuralNetwork class for a simple feedforward neural network.