CUDA Networks
neural_network.h
Go to the documentation of this file.
1 /**
2  * @file neural_network.h
3  * @brief Defines the NeuralNetwork class for a simple feedforward neural network.
4  */
5 
6 #ifndef NEURAL_NETWORK_H
7 #define NEURAL_NETWORK_H
8 
9 #include "../linear_algebra/matrix.h"
10 #include "../linear_algebra/vector.h"
11 
12 /**
13  * @class NeuralNetwork
14  * @brief Represents a simple feedforward neural network with one hidden layer.
15  */
17 public:
18  /**
19  * @brief Construct a new NeuralNetwork object
20  * @param input_size Number of input features
21  * @param hidden_size Number of neurons in the hidden layer
22  * @param output_size Number of output classes
23  */
24  NeuralNetwork(int input_size, int hidden_size, int output_size);
25 
26  /**
27  * @brief Destroy the NeuralNetwork object
28  */
30 
31  /**
32  * @brief Initialize the neural network parameters
33  */
34  void initialize();
35 
36  /**
37  * @brief Perform forward propagation through the network
38  * @param X Input data matrix
39  */
40  void forward(const Matrix& X);
41 
42  /**
43  * @brief Perform backward propagation through the network
44  * @param X Input data matrix
45  * @param Y True labels matrix
46  */
47  void backward(const Matrix& X, const Matrix& Y);
48 
49  /**
50  * @brief Updates the network parameters based on computed gradients.
51  * @param learning_rate The learning rate for the parameter update.
52  */
53  void update_params(double learning_rate);
54 
55  /**
56  * @brief Get predictions from the output layer (A2)
57  * @return Vector containing the predicted class indices
58  */
59  Vector get_predictions() const;
60 
61  /**
62  * @brief Calculate the accuracy of predictions compared to true labels
63  * @param Y True labels matrix
64  * @return Accuracy as a fraction of correct predictions
65  */
66  double get_accuracy(const Matrix& Y) const;
67 
68  /**
69  * @brief Perform gradient descent to train the neural network
70  * @param X Input data matrix
71  * @param Y True labels matrix
72  * @param learning_rate Learning rate for parameter updates
73  * @param epochs Number of training epochs
74  */
75  void gradient_descent(const Matrix& X, const Matrix& Y, double learning_rate, int epochs);
76 
77  /**
78  * @brief Get the pointer to the W1 matrix data
79  * @return Pointer to the W1 matrix data on the device
80  */
81  double* get_W1_data() const { return W1.get_data(); }
82 
83  /**
84  * @brief Get the pointer to the W2 matrix data
85  * @return Pointer to the W2 matrix data on the device
86  */
87  double* get_W2_data() const { return W2.get_data(); }
88 
89  /**
90  * @brief Get the pointer to the b1 vector data
91  * @return Pointer to the b1 vector data on the device
92  */
93  double* get_b1_data() const { return b1.get_data(); }
94 
95  /**
96  * @brief Get the pointer to the b2 vector data
97  * @return Pointer to the b2 vector data on the device
98  */
99  double* get_b2_data() const { return b2.get_data(); }
100 
101  /**
102  * @brief Get the dimensions of the W1 matrix
103  * @return std::pair<int, int> containing rows and columns of W1
104  */
105  std::pair<int, int> get_W1_dimensions() const { return {W1.get_rows(), W1.get_cols()}; }
106 
107  /**
108  * @brief Get the dimensions of the W2 matrix
109  * @return std::pair<int, int> containing rows and columns of W2
110  */
111  std::pair<int, int> get_W2_dimensions() const { return {W2.get_rows(), W2.get_cols()}; }
112 
113  /**
114  * @brief Get the size of the b1 vector
115  * @return Size of the b1 vector
116  */
117  int get_b1_size() const { return b1.get_rows(); }
118 
119  /**
120  * @brief Get the size of the b2 vector
121  * @return Size of the b2 vector
122  */
123  int get_b2_size() const { return b2.get_rows(); }
124 
125  /**
126  * @brief Get the DW1 matrix
127  * @return The DW1 matrix
128  */
129  Matrix get_DW1() const { return DW1; }
130 
131  /**
132  * @brief Get the db1 scalar
133  * @return The db1 scalar
134  */
135  double get_db1() const { return db1; }
136 
137  /**
138  * @brief Get the DW2 matrix
139  * @return The DW2 matrix
140  */
141  Matrix get_DW2() const { return DW2; }
142 
143  /**
144  * @brief Get the db2 scalar
145  * @return The db2 scalar
146  */
147  double get_db2() const { return db2; }
148 
149  /**
150  * @brief Get the pointer to the A matrix data (input matrix)
151  * @return Pointer to the A matrix data on the device
152  */
153  double* get_A_data() const { return A.get_data(); }
154 
155  /**
156  * @brief Get the dimensions of the A matrix
157  * @return std::pair<int, int> containing rows and columns of A
158  */
159  std::pair<int, int> get_A_dimensions() const { return {A.get_rows(), A.get_cols()}; }
160 
161  /**
162  * @brief Get the pointer to the Z1 matrix data (pre-activation of hidden layer)
163  * @return Pointer to the Z1 matrix data on the device
164  */
165  double* get_Z1_data() const { return Z1.get_data(); }
166 
167  /**
168  * @brief Get the dimensions of the Z1 matrix
169  * @return std::pair<int, int> containing rows and columns of Z1
170  */
171  std::pair<int, int> get_Z1_dimensions() const { return {Z1.get_rows(), Z1.get_cols()}; }
172 
173  /**
174  * @brief Get the pointer to the A1 matrix data (activation of hidden layer)
175  * @return Pointer to the A1 matrix data on the device
176  */
177  double* get_A1_data() const { return A1.get_data(); }
178 
179  /**
180  * @brief Get the dimensions of the A1 matrix
181  * @return std::pair<int, int> containing rows and columns of A1
182  */
183  std::pair<int, int> get_A1_dimensions() const { return {A1.get_rows(), A1.get_cols()}; }
184 
185  /**
186  * @brief Get the pointer to the Z2 matrix data (pre-activation of output layer)
187  * @return Pointer to the Z2 matrix data on the device
188  */
189  double* get_Z2_data() const { return Z2.get_data(); }
190 
191  /**
192  * @brief Get the dimensions of the Z2 matrix
193  * @return std::pair<int, int> containing rows and columns of Z2
194  */
195  std::pair<int, int> get_Z2_dimensions() const { return {Z2.get_rows(), Z2.get_cols()}; }
196 
197  /**
198  * @brief Get the pointer to the A2 matrix data (activation of output layer)
199  * @return Pointer to the A2 matrix data on the device
200  */
201  double* get_A2_data() const { return A2.get_data(); }
202 
203  /**
204  * @brief Get the dimensions of the A2 matrix
205  * @return std::pair<int, int> containing rows and columns of A2
206  */
207  std::pair<int, int> get_A2_dimensions() const { return {A2.get_rows(), A2.get_cols()}; }
208 
209 private:
210  int input_size; ///< Number of input features
211  int hidden_size; ///< Number of neurons in the hidden layer
212  int output_size; ///< Number of output classes
213 
214  Matrix W1; ///< Weights for the hidden layer
215  Vector b1; ///< Biases for the hidden layer
216  Matrix W2; ///< Weights for the output layer
217  Vector b2; ///< Biases for the output layer
218 
219  Matrix A; ///< Input matrix
220  Matrix Z1; ///< Pre-activation of hidden layer
221  Matrix A1; ///< Activation of hidden layer
222  Matrix Z2; ///< Pre-activation of output layer
223  Matrix A2; ///< Activation of output layer (final output)
224 
225  Matrix DZ2; ///< Gradient of Z2
226  Matrix DW2; ///< Gradient of W2
227  double db2; ///< Gradient of b2 (scalar)
228  Matrix DZ1; ///< Gradient of Z1
229  Matrix DW1; ///< Gradient of W1
230  double db1; ///< Gradient of b1 (scalar)
231 };
232 
233 #endif // NEURAL_NETWORK_H
Represents a matrix with GPU-accelerated operations.
Definition: matrix.h:18
int get_rows() const
Get the number of rows in the matrix.
double * get_data() const
Get the raw data pointer of the matrix.
int get_cols() const
Get the number of columns in the matrix.
Represents a simple feedforward neural network with one hidden layer.
double * get_b1_data() const
Get the pointer to the b1 vector data.
void backward(const Matrix &X, const Matrix &Y)
Perform backward propagation through the network.
Matrix get_DW1() const
Get the DW1 matrix.
void update_params(double learning_rate)
Updates the network parameters based on computed gradients.
double get_accuracy(const Matrix &Y) const
Calculate the accuracy of predictions compared to true labels.
void forward(const Matrix &X)
Perform forward propagation through the network.
double get_db2() const
Get the db2 scalar.
int get_b1_size() const
Get the size of the b1 vector.
double * get_W2_data() const
Get the pointer to the W2 matrix data.
std::pair< int, int > get_Z1_dimensions() const
Get the dimensions of the Z1 matrix.
int get_b2_size() const
Get the size of the b2 vector.
double * get_b2_data() const
Get the pointer to the b2 vector data.
double * get_W1_data() const
Get the pointer to the W1 matrix data.
void initialize()
Initialize the neural network parameters.
double * get_A1_data() const
Get the pointer to the A1 matrix data (activation of hidden layer)
~NeuralNetwork()
Destroy the NeuralNetwork object.
double get_db1() const
Get the db1 scalar.
std::pair< int, int > get_W2_dimensions() const
Get the dimensions of the W2 matrix.
double * get_A_data() const
Get the pointer to the A matrix data (input matrix)
std::pair< int, int > get_A1_dimensions() const
Get the dimensions of the A1 matrix.
double * get_Z2_data() const
Get the pointer to the Z2 matrix data (pre-activation of output layer)
Vector get_predictions() const
Get predictions from the output layer (A2)
std::pair< int, int > get_W1_dimensions() const
Get the dimensions of the W1 matrix.
std::pair< int, int > get_Z2_dimensions() const
Get the dimensions of the Z2 matrix.
Matrix get_DW2() const
Get the DW2 matrix.
std::pair< int, int > get_A2_dimensions() const
Get the dimensions of the A2 matrix.
double * get_A2_data() const
Get the pointer to the A2 matrix data (activation of output layer)
double * get_Z1_data() const
Get the pointer to the Z1 matrix data (pre-activation of hidden layer)
void gradient_descent(const Matrix &X, const Matrix &Y, double learning_rate, int epochs)
Perform gradient descent to train the neural network.
std::pair< int, int > get_A_dimensions() const
Get the dimensions of the A matrix.
NeuralNetwork(int input_size, int hidden_size, int output_size)
Construct a new NeuralNetwork object.
Represents a vector with GPU-accelerated operations.
Definition: vector.h:13
int get_rows() const
Get the number of elements in the vector.
double * get_data() const
Get the raw data pointer of the vector.