// L============================================================================= // L This software is distributed under the MIT license. // L Copyright 2021 Péter Kardos // L============================================================================= #pragma once #include "MatrixImpl.hpp" #include namespace mathter { //------------------------------------------------------------------------------ // Matrix-matrix multiplication //------------------------------------------------------------------------------ namespace impl { template inline auto SmallProductRowRR(const Matrix& lhs, const Matrix& rhs, int row, std::integer_sequence) { return (... + (rhs.stripes[MatchIndices] * lhs(row, MatchIndices))); } template inline auto SmallProductRR(const Matrix& lhs, const Matrix& rhs, std::integer_sequence) { using V = traits::MatMulElemT; using ResultT = Matrix; return ResultT{ ResultT::FromStripes, SmallProductRowRR(lhs, rhs, RowIndices, std::make_integer_sequence{})... }; } template inline auto SmallProductRowCC(const Matrix& lhs, const Matrix& rhs, int col, std::integer_sequence) { return (... + (lhs.stripes[MatchIndices] * rhs(MatchIndices, col))); } template inline auto SmallProductCC(const Matrix& lhs, const Matrix& rhs, std::integer_sequence) { using V = traits::MatMulElemT; using ResultT = Matrix; return ResultT{ ResultT::FromStripes, SmallProductRowCC(lhs, rhs, ColIndices, std::make_integer_sequence{})... }; } } // namespace impl template inline auto operator*(const Matrix& lhs, const Matrix& rhs) { if constexpr (Rows1 <= 4 && Match <= 4 && Columns2 <= 4) { return impl::SmallProductRR(lhs, rhs, std::make_integer_sequence{}); } else { using V = traits::MatMulElemT; Matrix result; for (int i = 0; i < Rows1; ++i) { result.stripes[i] = rhs.stripes[0] * lhs(i, 0); } for (int i = 0; i < Rows1; ++i) { for (int j = 1; j < Match; ++j) { result.stripes[i] += rhs.stripes[j] * lhs(i, j); } } return result; } } template inline auto operator*(const Matrix& lhs, const Matrix& rhs) { using V = traits::MatMulElemT; Matrix result; for (int j = 0; j < Columns2; ++j) { for (int i = 0; i < Rows1; ++i) { result(i, j) = Dot(lhs.stripes[i], rhs.stripes[j]); } } return result; } template inline auto operator*(const Matrix& lhs, const Matrix& rhs) { if constexpr (Rows1 <= 4 && Match <= 4 && Columns2 <= 4) { return impl::SmallProductCC(lhs, rhs, std::make_integer_sequence{}); } else { using V = traits::MatMulElemT; Matrix result; for (int j = 0; j < Columns2; ++j) { result.stripes[j] = lhs.stripes[0] * rhs(0, j); } for (int i = 1; i < Match; ++i) { for (int j = 0; j < Columns2; ++j) { result.stripes[j] += lhs.stripes[i] * rhs(i, j); } } return result; } } template inline auto operator*(const Matrix& lhs, const Matrix& rhs) { // CC algorithm is completely fine for COL_MAJOR x ROW_MAJOR. // See that rhs is only indexed per-element, so its layout does not matter. if constexpr (Rows1 <= 4 && Match <= 4 && Columns2 <= 4) { return impl::SmallProductCC(lhs, rhs, std::make_integer_sequence{}); } else { using V = traits::MatMulElemT; Matrix result; for (int j = 0; j < Columns2; ++j) { result.stripes[j] = lhs.stripes[0] * rhs(0, j); } for (int i = 1; i < Match; ++i) { for (int j = 0; j < Columns2; ++j) { result.stripes[j] += lhs.stripes[i] * rhs(i, j); } } return result; } } // Assign-multiply template inline Matrix& operator*=(Matrix& lhs, const Matrix& rhs) { lhs = lhs * rhs; return lhs; } //------------------------------------------------------------------------------ // Matrix-matrix addition & subtraction //------------------------------------------------------------------------------ namespace impl { template inline auto SmallAdd(const Matrix& lhs, const Matrix& rhs, std::integer_sequence) { using V = traits::MatMulElemT; using ResultT = Matrix; return ResultT{ ResultT::FromStripes, (lhs.stripes[StripeIndices] + rhs.stripes[StripeIndices])... }; } template inline auto SmallSub(const Matrix& lhs, const Matrix& rhs, std::integer_sequence) { using V = traits::MatMulElemT; using ResultT = Matrix; return ResultT{ ResultT::FromStripes, (lhs.stripes[StripeIndices] - rhs.stripes[StripeIndices])... }; } } // namespace impl // Same layout template inline auto operator+(const Matrix& lhs, const Matrix& rhs) { using V = traits::MatMulElemT; if constexpr (Rows * Columns == 4) { Matrix result; for (int i = 0; i < result.RowCount(); ++i) { for (int j = 0; j < result.ColumnCount(); ++j) { result(i, j) = lhs(i, j) + rhs(i, j); } } return result; } else if constexpr (Rows <= 4 && Columns <= 4) { return impl::SmallAdd(lhs, rhs, std::make_integer_sequence::StripeCount>{}); } else { Matrix result; for (int i = 0; i < result.StripeCount; ++i) { result.stripes[i] = lhs.stripes[i] + rhs.stripes[i]; } return result; } } template inline auto operator-(const Matrix& lhs, const Matrix& rhs) { using V = traits::MatMulElemT; if constexpr (Rows * Columns == 4) { Matrix result; for (int i = 0; i < result.RowCount(); ++i) { for (int j = 0; j < result.ColumnCount(); ++j) { result(i, j) = lhs(i, j) - rhs(i, j); } } return result; } else if constexpr (Rows <= 4 && Columns <= 4) { return impl::SmallSub(lhs, rhs, std::make_integer_sequence::StripeCount>{}); } else { Matrix result; for (int i = 0; i < result.StripeCount; ++i) { result.stripes[i] = lhs.stripes[i] - rhs.stripes[i]; } return result; } } // Add & sub opposite layout template ::type> inline auto operator+(const Matrix& lhs, const Matrix& rhs) { using V = traits::MatMulElemT; Matrix result; for (int i = 0; i < result.RowCount(); ++i) { for (int j = 0; j < result.ColumnCount(); ++j) { result(i, j) = lhs(i, j) + rhs(i, j); } } return result; } template ::type> inline auto operator-(const Matrix& lhs, const Matrix& rhs) { using V = traits::MatMulElemT; Matrix result; for (int i = 0; i < result.RowCount(); ++i) { for (int j = 0; j < result.ColumnCount(); ++j) { result(i, j) = lhs(i, j) - rhs(i, j); } } return result; } /// Performs matrix addition and stores result in this. template inline Matrix& operator+=( Matrix& lhs, const Matrix& rhs) { lhs = lhs + rhs; return lhs; } /// Performs matrix subtraction and stores result in this. template inline Matrix& operator-=( Matrix& lhs, const Matrix& rhs) { lhs = lhs - rhs; return lhs; } //------------------------------------------------------------------------------ // Matrix-Scalar arithmetic //------------------------------------------------------------------------------ // Scalar multiplication /// Multiplies all elements of the matrix by scalar. template , int> = 0> inline Matrix& operator*=(Matrix& mat, U s) { for (auto& stripe : mat.stripes) { stripe *= s; } return mat; } /// Divides all elements of the matrix by scalar. template , int> = 0> inline Matrix& operator/=(Matrix& mat, U s) { mat *= U(1) / s; return mat; } template , int> = 0> Matrix operator*(const Matrix& mat, U s) { Matrix copy(mat); copy *= s; return copy; } template , int> = 0> Matrix operator/(const Matrix& mat, U s) { Matrix copy(mat); copy /= s; return copy; } template , int> = 0> Matrix operator*(U s, const Matrix& mat) { return mat * s; } template , int> = 0> Matrix operator/(U s, const Matrix& mat) { Matrix result; for (int i = 0; i < Matrix::StripeCount; ++i) { result.stripes[i] = T(s) / mat.stripes[i]; } return result; } //------------------------------------------------------------------------------ // Elementwise multiply and divide //------------------------------------------------------------------------------ template auto MulElementwise(const Matrix& lhs, const Matrix& rhs) { Matrix result; for (int i = 0; i < result.StripeCount; ++i) { result.stripes[i] = lhs.stripes[i] * rhs.stripes[i]; } return result; } template auto MulElementwise(const Matrix& lhs, const Matrix::value, Packed>& rhs) { Matrix result; for (int i = 0; i < Rows; ++i) { for (int j = 0; j < Columns; ++j) { result(i, j) = lhs(i, j) * rhs(i, j); } } return result; } template auto DivElementwise(const Matrix& lhs, const Matrix& rhs) { Matrix result; for (int i = 0; i < result.StripeCount; ++i) { result.stripes[i] = lhs.stripes[i] / rhs.stripes[i]; } return result; } template auto DivElementwise(const Matrix& lhs, const Matrix::value, Packed>& rhs) { Matrix result; for (int i = 0; i < Rows; ++i) { for (int j = 0; j < Columns; ++j) { result(i, j) = lhs(i, j) / rhs(i, j); } } return result; } //------------------------------------------------------------------------------ // Unary signs //------------------------------------------------------------------------------ template auto operator+(const Matrix& mat) { return Matrix(mat); } template auto operator-(const Matrix& mat) { return Matrix(mat) * T(-1); } } // namespace mathter