I just read a book on creating a neural network. The book uses Python. I wanted to port this to C++. In order to do that I had to create template functions to implement some of the numpy
matrix functions. The template functions that I created attempt to be portable (work on any C++), size independent and type independent (though I am using double
s). Instead of arrays it uses nested std::vector
s.
I was wondering if using size_t
is okay. The reason for it is to avoid a comparison of an unsigned and signed number.
Note: The template function CreateNormalized
does a lame attempt to emulate numpy.random.normal(0.0, pow(self.inodes, -0.5), (self.hnodes, self.inodes))
. This version makes no attempt at normal distribution but I think this ok since it is only creating random number within a given range.
#include <vector>
#include <iostream>
#include <iomanip>
#include <algorithm>
// template class to create a 2D vector
// given rows, cols and an a initial value
template <class T, class T2>
std::vector <std::vector<T>> CreateVector(T2 rows, T2 cols, T init)
{
std::vector <std::vector<T>> c(rows, std::vector<T>(cols, init));
return c;
}
// template class to create a 2D square vector
// given size and an a initial value
template <class T, class T2>
std::vector <std::vector<T>> CreateVector(T2 size, T init)
{
std::vector <std::vector<T>> c(size, std::vector<T>(size, init));
return c;
}
// template class to assign a 1D vector to a 2D vector
template <class T>
std::vector<std::vector<T>>To2D(const std::vector<T>& a)
{
auto out = CreateVector((a.size() + 1) - a.size(), a.size(), T(0));
auto cols = a.size();
for (size_t col = 0; col < cols; ++col)
{
out[0][col] = a[col];
}
return out;
}
// template class to Transpose a 2D vector
//
// 1 2 3 1 4
// 4 5 6 2 5
// 3 6
//
// https://en.wikipedia.org/wiki/Transpose
template <class T>
std::vector <std::vector<T>> Transpose(const std::vector<std::vector<T>>& a)
{
auto rows = a.size();
auto cols = a[0].size();
auto out = CreateVector(cols, rows, T(0));
for (size_t row = 0; row < rows; ++row)
{
for (size_t col = 0; col < cols; ++col)
{
out[col][row] = a[row][col];
}
}
return out;
}
// template class to create a random 2D vector with a scale average
template <class T, class T2>
std::vector <std::vector<T>> CreateNormalized(T loc, T scale, T2 rows, T2 cols)
{
auto a1 = scale / 2.0;
auto a2 = loc - a1;
auto out = CreateVector(rows, cols, loc);
for (T2 row = 0; row < rows; ++row)
{
for (T2 col = 0; col < cols; ++col)
{
auto x = ((double(rand()) / RAND_MAX) * scale) + a2;
out[row][col] = x;
}
}
return out;
}
// template class to subtract a 2D vector from a constant
template <class T>
std::vector <std::vector<T>> Subtract(const T n, const std::vector<std::vector<T>> &a)
{
auto rows = a.size();
auto cols = a[0].size();
auto out = CreateVector(rows, cols, T(0));
for (size_t row = 0; row < rows; ++row)
{
for (size_t col = 0; col < cols; ++col)
{
out[row][col] = n - a[row][col];
}
}
return out;
}
// template class to subtract a 2D vector from another 2D vector
template <class T>
std::vector <std::vector<T>> Subtract(const std::vector<std::vector<T>> &a, const std::vector<std::vector<T>> &b)
{
auto rows = a.size();
auto cols = a[0].size();
auto out = CreateVector(rows, cols, T(0));
for (size_t row = 0; row < rows; ++row)
{
for (size_t col = 0; col < cols; ++col)
{
out[row][col] = a[row][col] - b[row][col];
}
}
return out;
}
// template class to add a 2D vector to another 2D vector
template <class T>
std::vector <std::vector<T>> Add(const std::vector<std::vector<T>> &a, const std::vector<std::vector<T>> &b)
{
auto arows = a.size();
auto acols = a[0].size();
auto brows = b.size();
auto bcols = b[0].size();
auto rows = arows + brows;
auto cols = std::max(acols, bcols);
auto out = CreateVector(rows, cols, T(0));
for (size_t row = arows - arows; row < arows; ++row)
{
for (size_t col = acols - acols; col < acols; ++col)
{
out[row][col] = a[row][col];
}
}
size_t r = 0;
for (auto row = arows; row < rows; ++row, ++r)
{
for (auto col = bcols - bcols; col < bcols; ++col)
{
out[row][col] = b[r][col];
}
}
return out;
}
// Multiply 2D vector A by single value B returning the result vector
template <class T>
std::vector<std::vector<T>> Multiply(const T &n, const std::vector<std::vector<T>> &a)
{
auto rows = a.size();
auto cols = a[0].size();
auto out = CreateVector(rows, cols, T(0));
for (size_t row = 0; row < rows; ++row)
{
for (size_t col = 0; col < cols; ++col)
{
out[row][col] = n * a[row][col];
}
}
return out;
}
// Multiply 2D vector A by 2D vector B returning the result vector AB
// based on https://en.wikipedia.org/wiki/Matrix_multiplication
//
// a b c j k l
// A = d e f B = m n o
// g h i p q r
//
// (a*j + b*m + c*p) (a*k + b*n + c*q) (a*l + b*o + c*r)
// AB = (d*j + e*m + f*p) (d*k + e*n + f*q) (d*l + e*o + f*r)
// (g*j + q*m + r*p) (g*k + h*n + i*q) (g*l + h*o + i*r)
//
template <class T>
std::vector<std::vector<T>> Multiply(const std::vector<std::vector<T>> &a, const std::vector<std::vector<T>> &b)
{
const auto n = a.size(); // a rows
const auto m = a[0].size(); // a cols
const auto p = b[0].size(); // b cols
// create the result vector initilized with 0
std::vector <std::vector<T>> c(n, std::vector<T>(p, T(0)));
for (size_t row = 0; row < p; ++row)
{
for (size_t column = 0; column < m; ++column)
{
for (size_t i = 0; i < n; ++i)
{
c[i][row] += a[i][column] * b[column][row];
}
}
}
return c;
}
// template class to subtract a 2D vector from a constant
template <class T>
std::vector<std::vector<T>> operator-(const T n, const std::vector<std::vector<T>>& b)
{
auto c = Subtract(n, b);
return c;
}
// template class for adding a 2D matrix to another 2D matrix
template <class T>
std::vector<std::vector<T>> operator+=(std::vector<std::vector<T>>& a, const std::vector<std::vector<T>>& b)
{
auto arows = a.size();
auto acols = a[0].size();
auto brows = b.size();
auto bcols = b[0].size();
auto rows = arows + brows;
auto cols = std::max(acols, bcols);
auto r = 0;
for (auto row = arows; row < rows; ++row, ++r)
{
std::vector<T> rr(bcols);
for (auto col = bcols - bcols; col < bcols; ++col)
{
rr[col] = b[r][col];
}
a.push_back(rr);
}
return a;
}
// template class opererator to add a 2D vector to another 2D vector
template <class T>
std::vector<std::vector<T>> operator+(const std::vector<std::vector<T>>& a, const std::vector<std::vector<T>>& b)
{
auto c = Add(a, b);
return c;
}
// template class opererator to subtract a 2D vector from another 2D vector
template <class T>
std::vector<std::vector<T>> operator-(const std::vector<std::vector<T>>& a, const std::vector<std::vector<T>>& b)
{
auto c = Subtract(a, b);
return c;
}
// template class opererator to multiply a 2D vector with another 2D vector
template <class T>
std::vector<std::vector<T>> operator*(const std::vector<std::vector<T>>& a, const std::vector<std::vector<T>>& b)
{
auto c = Multiply(a, b);
return c;
}
// template class opererator to multiply a 2D vector with const
template <class T>
std::vector<std::vector<T>> operator*(const T & n, const std::vector<std::vector<T>>& a)
{
auto c = Multiply(n, a);
return c;
}
// Function to get cofactor of a[p][q] in b[][]
template <class T, class T2>
void GetCofactor(std::vector<std::vector<T>> &a, std::vector<std::vector<T>> &b, T2 p, T2 q, T2 n)
{
T2 i = 0;
T2 j = 0;
// Looping for each element of the matrix
for (T2 row = 0; row < n; ++row)
{
for (T2 col = 0; col < n; ++col)
{
// Copying into temporary matrix only those element
// which are not in given row and column
if (row != p && col != q)
{
b[i][j++] = a[row][col];
// Row is filled, so increase row index and
// reset col index
if (j == n - 1)
{
j = 0;
++i;
}
}
}
}
}
// Recursive function for finding determinant of matrix.
template <class T, class T2>
T Determinant(std::vector<std::vector<T>> &a, T2 n)
{
T d = 0; // Initialize result
// Base case : if matrix contains single element
if (n == 1)
return a[0][0];
auto temp = CreateVector(n, d);
auto sign = 1; // To store sign multiplier
// Iterate for each element of first row
for (T2 f = 0; f < n; ++f)
{
// Getting Cofactor of A[0][f]
GetCofactor(a, temp, T2(0), f, n);
d += sign * a[0][f] * Determinant(temp, n - 1);
// terms are to be added with alternate sign
sign = -sign;
}
return d;
}
// Function to get adjoint of A[N][N] in adj[N][N].
template <class T, class T2>
void Adjoint(std::vector<std::vector<T>> &a, std::vector<std::vector<T>> &adj, T2 n)
{
if (n == 1)
{
adj[0][0] = T(1);
return;
}
// b is used to store cofactors of A[][]
auto sign = 1;
auto temp = CreateVector(n, T(0));
for (T2 row = 0; row<n; ++row)
{
for (T2 column = 0; column < n; ++column)
{
// Get cofactor of A[i][j]
GetCofactor(a, temp, row, column, n);
// sign of adj[j][i] positive if sum of row
// and column indexes is even.
sign = ((row + column) % 2 == 0) ? 1 : -1;
// Interchanging rows and columns to get the
// transpose of the cofactor matrix
adj[column][row] = (sign)*(Determinant(temp, n - 1));
}
}
}
// template class to calculate and store inverse, returns false if
// matrix is singular
template <class T>
bool Inverse(std::vector<std::vector<T>> &a, std::vector<std::vector<T>> &inverse)
{
const auto n = a.size();
// Find determinant of A[][]
auto det = Determinant(a, n);
if (det == 0)
{
std::cout << "Singular matrix, can't find its inverse";
return false;
}
// Find adjoint
auto adj = CreateVector(n, det);
Adjoint(a, adj, n);
// Find Inverse using formula "inverse(A) = adj(A)/det(A)"
for (size_t i =0; i < n; ++i)
{
for (size_t j = 0; j < n; ++j)
{
inverse[i][j] = adj[i][j] / det;
}
}
return true;
}
// template class opererator to get the inverse of a 2D vector
template <class T>
std::vector<std::vector<T>> operator~(std::vector<std::vector<T>>& a)
{
auto inv = CreateVector(a.size(), T(0)); // To store inverse of A[][]
auto ok = Inverse(a, inv);
return inv;
}
// template class to print a 2D vector
template<class T>
std::ostream& operator<<(std::ostream& os, const std::vector<std::vector<T>> &a)
{
const auto precision = 6;
const auto mantisa = 4;
const auto spacing = 2;
const auto n = a.size(); // a rows
const auto m = a[0].size(); // a cols
auto isInt = std::is_integral<T>::value;
os.setf(std::ios::fixed, std::ios::floatfield);
os.precision(precision);
for (size_t i = 0; i < n; ++i)
{
for (size_t j = 0; j < m; ++j)
{
if (isInt)
os << std::setw(8) << a[i][j];
else
os << std::setw(precision + mantisa + spacing) << a[i][j];
}
std::cout << std::endl;
}
return os;
}
for completeness I am adding Gauss elimination method for matrix
template <class T>
bool MatrixInversion(std::vector<std::vector<T>> &a, std::vector<std::vector<T>>&aInverse)
{
auto n = a.size();
// A = input matrix (n x n) copied to ac
// n = dimension of A
// AInverse = inverted matrix (n x n)
// This function inverts a matrix based on the Gauss Jordan method.
// The function returns 1 on success, 0 on failure.
size_t icol, irow;
T det, factor;
auto ac = CreateVector(n, T(0));
det = 1;
for (size_t i = 0; i < n; ++i)
{
for (size_t j = 0; j < n; ++j)
{
aInverse[i][j] = 0;
ac[i][j] = a[i][j];
}
aInverse[i][i] = 1;
}
// The current pivot row is iPass.
// For each pass, first find the maximum element in the pivot column.
for (size_t iPass = 0; iPass < n; iPass++)
{
auto imx = iPass;
for (irow = iPass; irow < n; irow++)
{
if (fabs(ac[irow][iPass]) > fabs(ac[imx][iPass])) imx = irow;
}
// Interchange the elements of row iPass and row imx in both A and AInverse.
if (imx != iPass)
{
for (icol = 0; icol < n; icol++)
{
T temp = aInverse[iPass][icol];
aInverse[iPass][icol] = aInverse[imx][icol];
aInverse[imx][icol] = temp;
if (icol >= iPass)
{
temp = ac[iPass][icol];
ac[iPass][icol] = ac[imx][icol];
ac[imx][icol] = temp;
}
}
}
// The current pivot is now A[iPass][iPass].
// The determinant is the product of the pivot elements.
T pivot = ac[iPass][iPass];
det = det * pivot;
if (det == 0)
{
return false;
}
for (icol = 0; icol < n; icol++)
{
// Normalize the pivot row by dividing by the pivot element.
aInverse[iPass][icol] = aInverse[iPass][icol] / pivot;
if (icol >= iPass) ac[iPass][icol] = ac[iPass][icol] / pivot;
}
for (irow = 0; irow < n; irow++)
// Add a multiple of the pivot row to each row. The multiple factor
// is chosen so that the element of A on the pivot column is 0.
{
if (irow != iPass) factor = ac[irow][iPass];
for (icol = 0; icol < n; icol++)
{
if (irow != iPass)
{
aInverse[irow][icol] -= factor * aInverse[iPass][icol];
ac[irow][icol] -= factor * ac[iPass][icol];
}
}
}
}
return true;
}
-
\$\begingroup\$ Please do not update the code in your question to incorporate feedback from answers, doing so goes against the Question + Answer style of Code Review. This is not a forum where you should keep the most updated version in your question. Please see what you may and may not do after receiving answers . \$\endgroup\$Mast– Mast ♦2017年12月18日 00:11:44 +00:00Commented Dec 18, 2017 at 0:11
1 Answer 1
If you want to use someone else's library instead of rolling your own, this question and this wikipedia article discuss possible libraries.
- Many older C++ compilers will not allow consecutive
>>
to close out the template, as it will be mistaken for the right-shift operator by the parser. You'll need to put a space between them. - If you need your code to work on "original" C++ (pre-C++0x) you should use a
typedef
inside your class for the matrix, rather than repeatingstd::vector<std::vector<T> >
all over the place. If you can guarantee that it will only be used by C++0x users, you should useusing Matrix = std::vector<std::vector<T>>;
as this preserves the templating. - Your
Subtract()
routine (and thus youroperator-
) will fail if the two matrices are not the same dimensions. - It looks like your
Add()
routine, despite what the comment would suggest, is performing the same task as numpy'svstack()
. Why not call it that? - I don't understand this idiom
for (size_t row = arows - arows; row < arows; ++row)
at all. Why subtractarows
from itself? - Your
Multiply()
(and thus youroperator*
) will fail if the two matrices are not compatible. - Your
+=
operator will fail of the two matrices are not the same dimensions. - I strongly question your overloading of the
+
operator to performvstack()
, especially when you use the-
operator for ordinary subtraction, and the+=
operator for ordinary addition. - It is counterproductive to create a temporary variable in all your operator arithmetic routines. This will cause multiple unnecessary copies.
- Your
Determinant()
function uses the naive grade-school algorithm to calculate. This is unnecessarily slow (runtime is proportional to the cube of the element count) and very susceptible to round-off error. Please look into other methods of calculating the determinant. - Your
Inverse()
function (and thus youroperator~
) uses the naive grade-school algorithm. It inherits the slowness and numerical instability fromDeterminant()
. But it's worse since you'll be post-multiplying. Please don't do this -- use a better algorithm like LU decomposition. - I don't see the random function anywhere.
-
\$\begingroup\$ Actually, 'using' is preferred to 'typedef' in this context, in C++0x. \$\endgroup\$JNS– JNS2017年12月13日 08:17:42 +00:00Commented Dec 13, 2017 at 8:17
-
\$\begingroup\$ Well OP says it's supposed to "work on any C++", which in my mind suggests that only ISO/IEC 14882:1998 is available... \$\endgroup\$Snowbody– Snowbody2017年12月13日 08:55:12 +00:00Commented Dec 13, 2017 at 8:55
-
1\$\begingroup\$ @Incomputable C++0x refers to the latest standard, being C++17. \$\endgroup\$JNS– JNS2017年12月13日 14:36:11 +00:00Commented Dec 13, 2017 at 14:36
-
1\$\begingroup\$ @JNS, the only alias I heard for C++17 is C++1z. Clang and gcc use C++1z. Where did you heard that from? \$\endgroup\$Incomputable– Incomputable2017年12月13日 14:39:32 +00:00Commented Dec 13, 2017 at 14:39
-
1\$\begingroup\$ @Snowbody I implemented the Gaussian elimination method. In 5 seconds adjunct method executed 2,726 times, Gaussian executed 31,000 times. gaussian has a huge advantage. \$\endgroup\$Paul Baxter– Paul Baxter2017年12月17日 23:28:24 +00:00Commented Dec 17, 2017 at 23:28