A neural network is a structure of connections and nodes that takes input and generates an output. It can be "taught"(adjusting weights and biases of connections) from a teacher data set with acceptable outputs and inputs. See https://en.wikipedia.org/wiki/Neural_network for more details.
nn.c:
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#define INPUT_COUNT 3
#define HIDDEN_COUNT 3
#define OUTPUT_COUNT 1
#define LEARNING_RATE 0.15
typedef struct
{
float in_Weights[INPUT_COUNT];
float inBias;
float value;
float out_Weights[OUTPUT_COUNT];
}Neuron;
typedef struct
{
float value;
}IO_Neuron;
typedef struct
{
int sucess;
IO_Neuron** training_in;
IO_Neuron** training_out;
int examples;
}TData;
//loads training data from a file like format below
/*
#inputs,outputs,count
input1,input2,input3 output1,output2
*/
TData tData(const char* filename)
{
FILE* fp = fopen(filename,"r");
int ins,outs,count;
fscanf(fp,"#%i,%i,%i",&ins,&outs,&count);
TData ret;
ret.sucess = 1;
if (ins != INPUT_COUNT || outs != OUTPUT_COUNT)
{
printf("%s\n","File will not fit into network!" );
ret.sucess = 0;
return ret;
}
int i,j;
ret.training_in = malloc(sizeof(IO_Neuron*)*count);
ret.training_out = malloc(sizeof(IO_Neuron*)*count);
ret.examples = count;
for (i =0; i< count;i++)
{
ret.training_in[i] = malloc(sizeof(IO_Neuron)*INPUT_COUNT);
}
for (i =0; i< count;i++)
{
ret.training_out[i] = malloc(sizeof(IO_Neuron)*OUTPUT_COUNT);
}
for (i =0 ; i < count;i++)
{
int inIndex = 0;
int outIndex = 0;
for (j =0; j < (INPUT_COUNT*2 - 1);j++)
{
if (j % 2 == 1)
{
fscanf(fp,",");
}
else
{
fscanf(fp,"%f",&ret.training_in[i][inIndex]);
inIndex += 1;
}
}
fscanf(fp," ");
for (j =0; j < (OUTPUT_COUNT*2 - 1);j++)
{
if (j % 2 == 1)
{
fscanf(fp,",");
}
else
{
fscanf(fp,"%f",&ret.training_out[i][outIndex]);
outIndex += 1;
}
}
}
printf("%s\n","File Read Sucessfully!" );
return ret;
}
float genRandRange(float min,float max)
{
if (min == max)
return min;
float scale = rand() / (float) RAND_MAX; /* [0, 1.0] */
return min + scale * ( max - min ); /* [min, max] */
}
//activation function
float sigmoid(float x)
{
return 1 / (1 + exp(-x));
}
float sigmoid_derivative(float x)
{
return sigmoid(x) * (1 - sigmoid(x));
}
//computes weighted sum
float dot_summation(float* in,float* weights,int count)
{
int i;
float result = 0;
for (i =0;i < count;i++)
{
result += in[i]*weights[i];
}
return result;
}
//these functions extract data into an easier to handle format
float* ioValues(IO_Neuron* hidden_layer)
{
float* ret = malloc(sizeof(float)*INPUT_COUNT);
int i;
for (i =0; i < INPUT_COUNT;i++)
{
ret[i] = hidden_layer[i].value;
}
return ret;
}
float* values(Neuron* hidden_layer)
{
float* ret = malloc(sizeof(float)*HIDDEN_COUNT);
int i;
for (i =0; i < HIDDEN_COUNT;i++)
{
ret[i] = hidden_layer[i].value;
}
return ret;
}
float* outWeights(Neuron* hidden_layer,int index)
{
float* ret = malloc(sizeof(float)*HIDDEN_COUNT);
int i;
for (i =0; i < HIDDEN_COUNT;i++)
{
ret[i] = hidden_layer[i].out_Weights[index];
}
return ret;
}
//pass values through the neural network
void think(IO_Neuron* input_layer,Neuron* hidden_layer,IO_Neuron* output_layer)
{
int i;
float* io_values = ioValues(input_layer);
for (i =0; i < HIDDEN_COUNT;i++)
{
hidden_layer[i].value = sigmoid(dot_summation(io_values,hidden_layer[i].in_Weights,INPUT_COUNT) + hidden_layer[i].inBias);
}
free(io_values);
float* hidden_values = values(hidden_layer);
for (i =0; i < OUTPUT_COUNT;i++)
{
float* out_weights = outWeights(hidden_layer,i);
output_layer[i].value = sigmoid(dot_summation(hidden_values,out_weights,HIDDEN_COUNT));
free(out_weights);
}
free(hidden_values);
}
//adjust the neural network's connection weights and biases based upon training data
void train(IO_Neuron* input_layer,Neuron* hidden_layer,IO_Neuron* output_layer,IO_Neuron** input_training,IO_Neuron** output_training,int training_samples,int iterations)
{
int i,j,k,l;
IO_Neuron recorded_outputs[training_samples][OUTPUT_COUNT];
Neuron recorded_hidden[training_samples][HIDDEN_COUNT];
float error_output[training_samples][OUTPUT_COUNT];//contains output node's delta
float error_hidden[training_samples][HIDDEN_COUNT];
for (i =0; i < iterations;i++)
{
for (j =0; j < training_samples;j++)
{
think(input_training[j],hidden_layer,output_layer);
memcpy(recorded_outputs[j],output_layer,sizeof(IO_Neuron)*OUTPUT_COUNT);
memcpy(recorded_hidden[j],hidden_layer,sizeof(Neuron)*HIDDEN_COUNT);
}
for (j =0; j < training_samples;j++)
{
for (k =0; k < OUTPUT_COUNT;k++)
{
error_output[j][k] = recorded_outputs[j][k].value*(1 - recorded_outputs[j][k].value) * (output_training[j][k].value - recorded_outputs[j][k].value);
}
}
for (j =0; j < training_samples;j++)
{
for (k =0; k < HIDDEN_COUNT;k++)
{
float errorFactor = 0;
for (l =0;l < OUTPUT_COUNT;l++)
{
errorFactor += (error_output[j][l]*hidden_layer[k].out_Weights[l]);
}
error_hidden[j][k] = recorded_hidden[j][k].value*(1 - recorded_hidden[j][k].value) * errorFactor;
}
}
for (j =0; j < training_samples;j++)
{
for (k =0; k < HIDDEN_COUNT;k++)
{//TODO update biases
hidden_layer[k].inBias = hidden_layer[k].inBias + LEARNING_RATE *error_hidden[j][k];
for (l = 0;l < INPUT_COUNT;l++)
{
hidden_layer[k].in_Weights[l] = hidden_layer[k].in_Weights[l] + (LEARNING_RATE*error_hidden[j][k]*input_training[j][l].value)/training_samples;
}
}
}
for (j =0; j < training_samples;j++)
{
for (k =0; k < HIDDEN_COUNT;k++)
{
for (l = 0;l < OUTPUT_COUNT;l++)
{
hidden_layer[k].out_Weights[l] = hidden_layer[k].out_Weights[l] + (LEARNING_RATE*error_output[j][k]*recorded_hidden[j][k].value)/training_samples;
}
}
}
}
}
//assign random weights to the neural network's connections
void randweights(Neuron* neurons)
{
int i;
for (i =0;i< HIDDEN_COUNT;i++)
{
neurons[i].in_Weights[0] = 2*genRandRange(0,1) - 1;
neurons[i].in_Weights[1] = 2*genRandRange(0,1) - 1;
neurons[i].in_Weights[2] = 2*genRandRange(0,1) - 1;
neurons[i].out_Weights[2] = 2*genRandRange(0,1) - 1;
neurons[i].inBias = 2*genRandRange(0,1) - 1;
}
}
int main()
{
srand(1);
int i,j;
//aquire training data
TData t_data = tData("training.txt");
if (!t_data.sucess)
{
return 0;
}
IO_Neuron** training_in = t_data.training_in;
IO_Neuron** training_out = t_data.training_out;
//allocate neural network
IO_Neuron* input_layer = malloc(sizeof(IO_Neuron)*INPUT_COUNT);
Neuron* hidden_layer = malloc(sizeof(Neuron)*HIDDEN_COUNT);
IO_Neuron* output_layer = malloc(sizeof(IO_Neuron)*OUTPUT_COUNT);
randweights(hidden_layer);
//train with training data
train(input_layer,hidden_layer,output_layer,training_in,training_out,t_data.examples,10000);
//test out the learned pattern
input_layer[0].value = 0;
input_layer[1].value = 0;
input_layer[2].value = 0;
//generates the output
think(input_layer,hidden_layer,output_layer);
for (i =0; i < OUTPUT_COUNT;i++)
{
printf("%f\n",output_layer[i].value );
}
return 0;
}
Here is a sample file of training data that can be read by the program.
training.txt:
#3,1,7
1,0,1 0
1,0,0 0
1,1,0 1
1,1,1 1
0,1,0 1
0,1,1 1
0,0,1 0
0,0,0 0
In case you have not noticed, the pattern present within the data is that it simply outputs the first number of the input.
The neural network has 3 inputs,3 hidden nodes, and 1 output, these can be changed easily by modifying training data and the constants at the beginning of the code.
I know pure C is not often used for neural networks due to its lack of true object orientation and strict typing but I prefer its simplicity and readability.
1 Answer 1
General Observations
The program could be more useful if it accepted arguments for the input file name; currently the input file name is hardwired into main()
.
When doing C program development it is always a good practice to compile with the -Wall
and -Wextra
flags. These flags will help you find possible errors in the code. When I was learning how to program in C on Unix we had a program called lint which helped identify possible problems in the code. I always ran lint on my code. To some extent the -Wall
and -Wextra
flags have replaced lint.
If the program had been compiled with the -Wall
andd -Wextra
flags it would have found the following issues:
nn.c: In function ‘tData’:
nn.c:78:21: warning: format ‘%f’ expects argument of type ‘float *’, but argument 3 has type ‘IO_Neuron *’ [-Wformat=]
78 | fscanf(fp,"%f",&ret.training_in[i][inIndex]);
| ~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| | |
| | IO_Neuron *
| float *
nn.c:91:21: warning: format ‘%f’ expects argument of type ‘float *’, but argument 3 has type ‘IO_Neuron *’ [-Wformat=]
91 | fscanf(fp,"%f",&ret.training_out[i][outIndex]);
| ~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| | |
| | IO_Neuron *
| float *
nn.c: In function ‘train’:
nn.c:189:23: warning: unused parameter ‘input_layer’ [-Wunused-parameter]
189 | void train(IO_Neuron* input_layer,Neuron* hidden_layer,IO_Neuron* output_layer,IO_Neuron** input_training,IO_Neuron** output_training,int training_samples,int iterations)
| ~~~~~~~~~~~^~~~~~~~~~~
nn.c: In function ‘main’:
nn.c:272:9: warning: unused variable ‘j’ [-Wunused-variable]
272 | int i,j;
| ^
Test for Possible Memory Allocation Errors
In modern high-level languages such as C++, memory allocation errors throw an exception that the programmer can catch. This is not the case in the C programming language. While it is rare in modern computers because there is so much memory, memory allocation can fail, especially if the code is working in a limited memory application such as embedded control systems. In the C programming language when memory allocation fails, the functions malloc()
, calloc()
and realloc()
return NULL
. Referencing any memory address through a NULL
pointer results in undefined behavior (UB).
Possible unknown behavior in this case can be a memory page error (in Unix this would be call Segmentation Violation), corrupted data in the program and in very old computers it could even cause the computer to reboot (corruption of the stack pointer).
To prevent this undefined behavior a best practice is to always follow the memory allocation statement with a test that the pointer that was returned is not null.
Example of Current Code:
//allocate neural network
IO_Neuron* input_layer = malloc(sizeof(IO_Neuron)*INPUT_COUNT);
IO_Neuron* hidden_layer = malloc(sizeof(Neuron)*HIDDEN_COUNT);
IO_Neuron* output_layer = malloc(sizeof(IO_Neuron)*OUTPUT_COUNT);
Example of Current Code with Test:
//allocate neural network
IO_Neuron* input_layer = malloc(sizeof(IO_Neuron)*INPUT_COUNT);
if (input_layer == NULL)
{
fprintf(stderr, "malloc of input_layer FAILED\n");
return EXIT_FAILURE;
}
IO_Neuron* hidden_layer = malloc(sizeof(Neuron)*HIDDEN_COUNT);
if (hidden_layer == NULL)
{
fprintf(stderr, "malloc of hidden_layer FAILED\n");
return EXIT_FAILURE;
}
IO_Neuron* output_layer = malloc(sizeof(IO_Neuron)*OUTPUT_COUNT);
if (output_layer == NULL)
{
fprintf(stderr, "malloc of output_layer FAILED\n");
return EXIT_FAILURE;
}
In the above memory allocations, since the code seems to be allocating arrays of IO_Neuron
or Neuron
it might be better to use calloc(size_t num, size_t size)
rather than malloc()
. One of the benefits of using calloc()
is that all the memory allocated is zeroed out.
Convention When Using Memory Allocation in C
When using malloc()
, calloc()
or realloc()
in C a common convention is to sizeof *PTR
rather sizeof (PTR_TYPE)
. This makes the code easier to maintain and less error prone, since less editing is required if the type of the pointer changes.
Input Optimization
It would be better to input an entire line at one time using getline(char **lineptr, size_t *n, FILE *stream)
rather than using fscanf()
multiple times per line. The getline()
function will input the entire line at once and then it can be processed using string functions or character manipulation. The optimization is that fewer system calls are used to get the input. It might also be easier to create a function to get a line of input and process it this way.
DRY Code
There is a programming principle called the Don't Repeat Yourself Principle sometimes referred to as DRY code. If you find yourself repeating the same code multiple times it is better to encapsulate it in a function. If it is possible to loop through the code, that can reduce repetition as well.
In main()
these 3 lines should be a loop:
input_layer[0].value = 0;
input_layer[1].value = 0;
input_layer[2].value = 0;
In the TData tData(const char* filename)
function the code below should be modified so that it can be made into a function that can be called twice with different input.
for (j =0; j < (INPUT_COUNT*2 - 1);j++)
{
if (j % 2 == 1)
{
fscanf(fp,",");
}
else
{
fscanf(fp,"%f",&ret.training_in[i][inIndex].value);
inIndex += 1;
}
}
fscanf(fp," ");
for (j =0; j < (OUTPUT_COUNT*2 - 1);j++)
{
if (j % 2 == 1)
{
fscanf(fp,",");
}
else
{
fscanf(fp,"%f",&ret.training_out[i][outIndex].value);
outIndex += 1;
}
}
}
Complexity
Three of the functions, main()
, TData tData(const char* filename)
and void train(...)
are too complex (do too much). All three of these should be broken into smaller functions.
As programs grow in size the use of main()
should be limited to calling functions that parse the command line, calling functions that set up for processing, calling functions that execute the desired function of the program, and calling functions to clean up after the main portion of the program.
There is also a programming principle called the Single Responsibility Principle that applies here. The Single Responsibility Principle states:
that every module, class, or function should have responsibility over a single part of the functionality provided by the software, and that responsibility should be entirely encapsulated by that module, class or function.
-
1\$\begingroup\$ The other advantage of
getline()
overscanf()
is that we have a simpler error-recovery strategy - any line we can't parse can simply be discarded. That's more work if we parse directly from the stream. \$\endgroup\$Toby Speight– Toby Speight2024年11月29日 09:22:47 +00:00Commented Nov 29, 2024 at 9:22 -
\$\begingroup\$ Thank you for finally reviewing my code, I have been checking this page every day since I asked it. \$\endgroup\$J. H– J. H2024年11月29日 22:27:50 +00:00Commented Nov 29, 2024 at 22:27