diff --git a/build_linux64/libamsculib3.linux64.a b/build_linux64/libamsculib3.linux64.a index 94cbc7b..3eb062a 100644 Binary files a/build_linux64/libamsculib3.linux64.a and b/build_linux64/libamsculib3.linux64.a differ diff --git a/build_linux64/objstore/cuvec2f.o b/build_linux64/objstore/cuvec2f.o index 5347f73..0e204f8 100644 Binary files a/build_linux64/objstore/cuvec2f.o and b/build_linux64/objstore/cuvec2f.o differ diff --git a/build_linux64/objstore/cuvec3f.o b/build_linux64/objstore/cuvec3f.o index ed9ed62..fbcf3d5 100644 Binary files a/build_linux64/objstore/cuvec3f.o and b/build_linux64/objstore/cuvec3f.o differ diff --git a/build_linux64/objstore/cuvec4f.o b/build_linux64/objstore/cuvec4f.o index d20de1d..48b0461 100644 Binary files a/build_linux64/objstore/cuvec4f.o and b/build_linux64/objstore/cuvec4f.o differ diff --git a/build_linux64/test b/build_linux64/test index 331a30b..2475f49 100644 Binary files a/build_linux64/test and b/build_linux64/test differ diff --git a/include/amsculib3/math/cuvec2f.hpp b/include/amsculib3/math/cuvec2f.hpp index 8bd9d37..8493ca5 100644 --- a/include/amsculib3/math/cuvec2f.hpp +++ b/include/amsculib3/math/cuvec2f.hpp @@ -45,8 +45,8 @@ namespace amscuda __host__ __device__ cumat2f(); __host__ __device__ ~cumat2f(); __host__ __device__ cumat2f( - const float& _m00, const float& _m10, - const float& _m01, const float& _m11 + const float& _m00, const float& _m01, + const float& _m10, const float& _m11 ); __host__ __device__ cumat2f(const float* data4); diff --git a/include/amsculib3/math/cuvec3f.hpp b/include/amsculib3/math/cuvec3f.hpp index 34721be..c572fdd 100644 --- a/include/amsculib3/math/cuvec3f.hpp +++ b/include/amsculib3/math/cuvec3f.hpp @@ -11,79 +11,76 @@ namespace amscuda float y; float z; - __host__ __device__ cuvec3f(); - __host__ __device__ ~cuvec3f(); - __host__ __device__ cuvec3f(const float &_x, const float &_y, const float &_z); - - + __host__ __device__ cuvec3f(); + __host__ __device__ ~cuvec3f(); + __host__ __device__ cuvec3f(const float &_x, const float &_y, const float &_z); + __host__ __device__ float& operator[](const int &I); __host__ __device__ const float& operator[](const int &I) const; - __host__ __device__ cuvec3f operator+(const cuvec3f &rhs); - __host__ __device__ cuvec3f operator-(const cuvec3f &rhs); - - __host__ __device__ friend cuvec3f operator*(const cuvec3f& lhs, const float &rhs); - __host__ __device__ friend cuvec3f operator/(const cuvec3f& lhs, const float &rhs); - __host__ __device__ friend cuvec3f operator*(const float& lhs, const cuvec3f &rhs); - __host__ __device__ friend cuvec3f operator/(const float& lhs, const cuvec3f &rhs); - - __host__ __device__ friend cuvec3f operator-(const cuvec3f &rhs); - - __host__ __device__ cuvec3f& operator+=(const cuvec3f &rhs); - __host__ __device__ cuvec3f& operator-=(const cuvec3f &rhs); - __host__ __device__ cuvec3f& operator/=(const float &rhs); - __host__ __device__ cuvec3f& operator*=(const float &rhs); - + __host__ __device__ cuvec3f operator+(const cuvec3f& rhs) const; + __host__ __device__ cuvec3f operator-(const cuvec3f& rhs) const; + __host__ __device__ cuvec3f operator*(const cuvec3f& rhs) const; //elementwise product + __host__ __device__ cuvec3f operator/(const cuvec3f& rhs) const; //elementwise division + + __host__ __device__ friend cuvec3f operator*(const cuvec3f& lhs, const float& rhs); + __host__ __device__ friend cuvec3f operator*(const float& lhs, const cuvec3f& rhs); + __host__ __device__ friend cuvec3f operator/(const cuvec3f& lhs, const float& rhs); + __host__ __device__ friend cuvec3f operator/(const float& lhs, const cuvec3f& rhs); + __host__ __device__ friend cuvec3f operator-(const cuvec3f& other); + + __host__ __device__ cuvec3f& operator+=(const cuvec3f& rhs); + __host__ __device__ cuvec3f& operator-=(const cuvec3f& rhs); + __host__ __device__ cuvec3f& operator*=(const float& rhs); + __host__ __device__ cuvec3f& operator/=(const float& rhs); }; class cumat3f { public: - float m00,m10,m20; //named references to force register use? - float m01,m11,m21; //switched to column-major-order to match GLSL/lapack + float m00,m10,m20; + float m01,m11,m21; float m02,m12,m22; - - __host__ __device__ cumat3f(); - __host__ __device__ ~cumat3f(); - __host__ __device__ cumat3f( - const float & _m00, const float & _m01, const float & _m02, - const float & _m10, const float & _m11, const float & _m12, - const float & _m20, const float & _m21, const float & _m22 - ); - - __host__ __device__ explicit cumat3f(const float *data9); + __host__ __device__ cumat3f(); + __host__ __device__ ~cumat3f(); + __host__ __device__ cumat3f( + const float& _m00, const float& _m01, const float& _m02, + const float& _m10, const float& _m11, const float& _m12, + const float& _m20, const float& _m21, const float& _m22 + ); + __host__ __device__ cumat3f(const float* data9); __host__ __device__ float& operator[](const int &I); - __host__ __device__ float& operator()(const int &I, const int &J); - __host__ __device__ float& at(const int &I, const int &J); - __host__ __device__ const float& operator[](const int &I) const; + __host__ __device__ float& operator()(const int &I, const int &J); __host__ __device__ const float& operator()(const int &I, const int &J) const; + __host__ __device__ float& at(const int &I, const int &J); __host__ __device__ const float& at(const int &I, const int &J) const; - - __host__ __device__ cumat3f operator+(const cumat3f &rhs); - __host__ __device__ cumat3f operator-(const cumat3f &rhs); - __host__ __device__ cumat3f operator*(const float &rhs); - __host__ __device__ cumat3f operator/(const float &rhs); - __host__ __device__ cuvec3f operator*(const cuvec3f &rhs); - __host__ __device__ cumat3f operator*(const cumat3f &rhs); - __host__ __device__ friend cumat3f operator-(const cumat3f &rhs); + __host__ __device__ float* data(); //pointer to float9 representation of matrix + __host__ __device__ const float* data() const; //pointer to float9 representation of matrix + + __host__ __device__ cumat3f operator+(const cumat3f& rhs) const; + __host__ __device__ cumat3f operator-(const cumat3f& rhs) const; + __host__ __device__ cumat3f operator*(const cumat3f& rhs) const; + + __host__ __device__ friend cumat3f operator*(const cumat3f& lhs, const float& rhs); + __host__ __device__ friend cumat3f operator/(const cumat3f& lhs, const float& rhs); + __host__ __device__ friend cumat3f operator*(const float& lhs, const cumat3f& rhs); + __host__ __device__ friend cuvec3f operator*(const cumat3f& lhs, const cuvec3f& rhs); + __host__ __device__ friend cuvec3f operator*(const cuvec3f& lhs, const cumat3f& rhs); + __host__ __device__ friend cumat3f operator-(const cumat3f& rhs); + + __host__ __device__ cumat3f& operator+=(const cumat3f& rhs); + __host__ __device__ cumat3f& operator-=(const cumat3f& rhs); + __host__ __device__ cumat3f& operator*=(const float& rhs); + __host__ __device__ cumat3f& operator/=(const float& rhs); + __host__ __device__ cumat3f& operator*=(const cumat3f& rhs); + + __host__ __device__ cumat3f transpose() const; __host__ __device__ float det(); - __host__ __device__ cumat3f transpose(); __host__ __device__ cumat3f inverse(); - - __host__ __device__ float* data(); //pointer to float[9] representation of matrix - __host__ __device__ const float* data() const; //pointer to float[9] representation of matrix - - //In place operations (to save GPU register use) - __host__ __device__ cumat3f& operator+=(const cumat3f &rhs); - __host__ __device__ cumat3f& operator-=(const cumat3f &rhs); - __host__ __device__ cumat3f& operator/=(const float &rhs); - __host__ __device__ cumat3f& operator*=(const float &rhs); - __host__ __device__ cumat3f& operator*=(const cumat3f &rhs); - }; __host__ __device__ float cuvec3f_dot(const cuvec3f &a,const cuvec3f &b); diff --git a/include/amsculib3/math/cuvec4f.hpp b/include/amsculib3/math/cuvec4f.hpp index c5f383c..21b164c 100644 --- a/include/amsculib3/math/cuvec4f.hpp +++ b/include/amsculib3/math/cuvec4f.hpp @@ -12,81 +12,83 @@ namespace amscuda float z; float w; - __host__ __device__ cuvec4f(); - __host__ __device__ ~cuvec4f(); - __host__ __device__ cuvec4f(const float &_x, const float &_y, const float &_z, const float &_w); - + __host__ __device__ cuvec4f(); + __host__ __device__ ~cuvec4f(); + __host__ __device__ cuvec4f(const float &_x, const float &_y, const float &_z, const float &_w); __host__ __device__ float& operator[](const int &I); __host__ __device__ const float& operator[](const int &I) const; - - __host__ __device__ cuvec4f operator+(const cuvec4f &rhs); - __host__ __device__ cuvec4f operator-(const cuvec4f &rhs); - - __host__ __device__ friend cuvec4f operator*(const cuvec4f& lhs, const float &rhs); - __host__ __device__ friend cuvec4f operator/(const cuvec4f& lhs, const float &rhs); - __host__ __device__ friend cuvec4f operator*(const float& lhs, const cuvec4f &rhs); - __host__ __device__ friend cuvec4f operator/(const float& lhs, const cuvec4f &rhs); - - __host__ __device__ friend cuvec4f operator-(const cuvec4f &rhs); - - __host__ __device__ cuvec4f& operator+=(const cuvec4f &rhs); - __host__ __device__ cuvec4f& operator-=(const cuvec4f &rhs); - __host__ __device__ cuvec4f& operator/=(const float &rhs); - __host__ __device__ cuvec4f& operator*=(const float &rhs); + + __host__ __device__ cuvec4f operator+(const cuvec4f& rhs) const; + __host__ __device__ cuvec4f operator-(const cuvec4f& rhs) const; + __host__ __device__ cuvec4f operator*(const cuvec4f& rhs) const; //elementwise product + __host__ __device__ cuvec4f operator/(const cuvec4f& rhs) const; //elementwise division + + __host__ __device__ friend cuvec4f operator*(const cuvec4f& lhs, const float& rhs); + __host__ __device__ friend cuvec4f operator*(const float& lhs, const cuvec4f& rhs); + __host__ __device__ friend cuvec4f operator/(const cuvec4f& lhs, const float& rhs); + __host__ __device__ friend cuvec4f operator/(const float& lhs, const cuvec4f& rhs); + __host__ __device__ friend cuvec4f operator-(const cuvec4f& other); + + __host__ __device__ cuvec4f& operator+=(const cuvec4f& rhs); + __host__ __device__ cuvec4f& operator-=(const cuvec4f& rhs); + __host__ __device__ cuvec4f& operator*=(const float& rhs); + __host__ __device__ cuvec4f& operator/=(const float& rhs); }; class cumat4f { public: //float dat[16]; + //__forceinline__ - float m00,m10,m20,m30; //named references to force register use? - float m01,m11,m21,m31; //switched to column-major-order to match GLSL/lapack + + float m00,m10,m20,m30; + float m01,m11,m21,m31; float m02,m12,m22,m32; float m03,m13,m23,m33; - __host__ __device__ cumat4f(); - __host__ __device__ ~cumat4f(); + __host__ __device__ cumat4f(); + __host__ __device__ ~cumat4f(); __host__ __device__ cumat4f( - const float & _m00, const float & _m01, const float & _m02, const float & _m03, - const float & _m10, const float & _m11, const float & _m12, const float & _m13, - const float & _m20, const float & _m21, const float & _m22, const float & _m23, - const float & _m30, const float & _m31, const float & _m32, const float & _m33 + const float& _m00, const float& _m01, const float& _m02, const float& _m03, + const float& _m10, const float& _m11, const float& _m12, const float& _m13, + const float& _m20, const float& _m21, const float& _m22, const float& _m23, + const float& _m30, const float& _m31, const float& _m32, const float& _m33 ); + __host__ __device__ cumat4f(const float* data16); - __host__ __device__ explicit cumat4f(const float *data16); - - //__forceinline__ __host__ __device__ float& operator[](const int &I); - __host__ __device__ float& operator()(const int &I, const int &J); - __host__ __device__ float& at(const int &I, const int &J); - __host__ __device__ const float& operator[](const int &I) const; + __host__ __device__ float& operator()(const int &I, const int &J); __host__ __device__ const float& operator()(const int &I, const int &J) const; + __host__ __device__ float& at(const int &I, const int &J); __host__ __device__ const float& at(const int &I, const int &J) const; - __host__ __device__ cumat4f operator+(const cumat4f &rhs); - __host__ __device__ cumat4f operator-(const cumat4f &rhs); - __host__ __device__ cumat4f operator*(const float &rhs); - __host__ __device__ cumat4f operator/(const float &rhs); - __host__ __device__ cuvec4f operator*(const cuvec4f &rhs); - __host__ __device__ cumat4f operator*(const cumat4f &rhs); - __host__ __device__ friend cumat4f operator-(const cumat4f &rhs); + __host__ __device__ float* data(); //pointer to float16 representation of matrix + __host__ __device__ const float* data() const; //pointer to float16 representation of matrix + __host__ __device__ cumat4f operator+(const cumat4f& rhs) const; + __host__ __device__ cumat4f operator-(const cumat4f& rhs) const; + __host__ __device__ cumat4f operator*(const cumat4f& rhs) const; + + __host__ __device__ friend cumat4f operator*(const cumat4f& lhs, const float& rhs); + __host__ __device__ friend cumat4f operator/(const cumat4f& lhs, const float& rhs); + __host__ __device__ friend cumat4f operator*(const float& lhs, const cumat4f& rhs); + __host__ __device__ friend cuvec4f operator*(const cumat4f& lhs, const cuvec4f& rhs); + __host__ __device__ friend cuvec4f operator*(const cuvec4f& lhs, const cumat4f& rhs); + __host__ __device__ friend cumat4f operator-(const cumat4f& rhs); + + __host__ __device__ cumat4f& operator+=(const cumat4f& rhs); + __host__ __device__ cumat4f& operator-=(const cumat4f& rhs); + __host__ __device__ cumat4f& operator*=(const float& rhs); + __host__ __device__ cumat4f& operator/=(const float& rhs); + __host__ __device__ cumat4f& operator*=(const cumat4f& rhs); + + __host__ __device__ cumat4f transpose() const; + __host__ __device__ float det(); - __host__ __device__ cumat4f transpose(); __host__ __device__ cumat4f inverse(); - - __host__ __device__ float* data(); //pointer to float[9] representation of matrix - __host__ __device__ const float* data() const; //pointer to float[9] representation of matrix - - //In place operations (to save GPU register use) - __host__ __device__ cumat4f& operator+=(const cumat4f &rhs); - __host__ __device__ cumat4f& operator-=(const cumat4f &rhs); - __host__ __device__ cumat4f& operator/=(const float &rhs); - __host__ __device__ cumat4f& operator*=(const float &rhs); - __host__ __device__ cumat4f& operator*=(const cumat4f &rhs); }; __host__ __device__ float cuvec4f_dot(cuvec4f &a, cuvec4f &b); diff --git a/src/amsculib3/math/cuvec2f.cu b/src/amsculib3/math/cuvec2f.cu index 8c12dfc..dc93a5a 100644 --- a/src/amsculib3/math/cuvec2f.cu +++ b/src/amsculib3/math/cuvec2f.cu @@ -176,8 +176,8 @@ namespace amscuda } __host__ __device__ cumat2f::cumat2f( - const float& _m00, const float& _m10, - const float& _m01, const float& _m11 + const float& _m00, const float& _m01, + const float& _m10, const float& _m11 ) { m00 = _m00; @@ -256,165 +256,163 @@ namespace amscuda return (*this)[I+2*J]; } -__host__ __device__ float* cumat2f::data() -{ - return (float*)this; -} + __host__ __device__ float* cumat2f::data() + { + return (float*)this; + } -__host__ __device__ const float* cumat2f::data() const -{ - return (float*)this; -} + __host__ __device__ const float* cumat2f::data() const + { + return (float*)this; + } -__host__ __device__ cumat2f cumat2f::operator+(const cumat2f& rhs) const -{ - cumat2f ret; - ret.m00 = m00 + rhs.m00; - ret.m10 = m10 + rhs.m10; + __host__ __device__ cumat2f cumat2f::operator+(const cumat2f& rhs) const + { + cumat2f ret; + ret.m00 = m00 + rhs.m00; + ret.m10 = m10 + rhs.m10; - ret.m01 = m01 + rhs.m01; - ret.m11 = m11 + rhs.m11; + ret.m01 = m01 + rhs.m01; + ret.m11 = m11 + rhs.m11; - return ret; -} + return ret; + } -__host__ __device__ cumat2f cumat2f::operator-(const cumat2f& rhs) const -{ - cumat2f ret; - ret.m00 = m00 - rhs.m00; - ret.m10 = m10 - rhs.m10; + __host__ __device__ cumat2f cumat2f::operator-(const cumat2f& rhs) const + { + cumat2f ret; + ret.m00 = m00 - rhs.m00; + ret.m10 = m10 - rhs.m10; - ret.m01 = m01 - rhs.m01; - ret.m11 = m11 - rhs.m11; + ret.m01 = m01 - rhs.m01; + ret.m11 = m11 - rhs.m11; - return ret; -} + return ret; + } -__host__ __device__ cumat2f cumat2f::operator*(const cumat2f& rhs) const -{ - cumat2f ret; //should be zeroed in constructor - ret.m00 = m00*rhs.m00 + m01*rhs.m10; - ret.m01 = m00*rhs.m01 + m01*rhs.m11; + __host__ __device__ cumat2f cumat2f::operator*(const cumat2f& rhs) const + { + cumat2f ret; //should be zeroed in constructor + ret.m00 = m00*rhs.m00 + m01*rhs.m10; + ret.m01 = m00*rhs.m01 + m01*rhs.m11; - ret.m10 = m10*rhs.m00 + m11*rhs.m10; - ret.m11 = m10*rhs.m01 + m11*rhs.m11; + ret.m10 = m10*rhs.m00 + m11*rhs.m10; + ret.m11 = m10*rhs.m01 + m11*rhs.m11; - return ret; -} + return ret; + } -__host__ __device__ cumat2f operator*(const cumat2f& lhs, const float& rhs) -{ - cumat2f ret; - ret.m00=lhs.m00*rhs; - ret.m10=lhs.m10*rhs; - ret.m01=lhs.m01*rhs; - ret.m11=lhs.m11*rhs; - return ret; -} + __host__ __device__ cumat2f operator*(const cumat2f& lhs, const float& rhs) + { + cumat2f ret; + ret.m00=lhs.m00*rhs; + ret.m10=lhs.m10*rhs; + ret.m01=lhs.m01*rhs; + ret.m11=lhs.m11*rhs; + return ret; + } -__host__ __device__ cumat2f operator/(const cumat2f& lhs, const float& rhs) -{ - cumat2f ret; - ret.m00=lhs.m00/rhs; - ret.m10=lhs.m10/rhs; - ret.m01=lhs.m01/rhs; - ret.m11=lhs.m11/rhs; - return ret; -} + __host__ __device__ cumat2f operator/(const cumat2f& lhs, const float& rhs) + { + cumat2f ret; + ret.m00=lhs.m00/rhs; + ret.m10=lhs.m10/rhs; + ret.m01=lhs.m01/rhs; + ret.m11=lhs.m11/rhs; + return ret; + } -__host__ __device__ cumat2f operator*(const float& lhs, const cumat2f& rhs) -{ - cumat2f ret; - ret.m00=lhs*rhs.m00; - ret.m10=lhs*rhs.m10; - ret.m01=lhs*rhs.m01; - ret.m11=lhs*rhs.m11; - return ret; -} + __host__ __device__ cumat2f operator*(const float& lhs, const cumat2f& rhs) + { + cumat2f ret; + ret.m00=lhs*rhs.m00; + ret.m10=lhs*rhs.m10; + ret.m01=lhs*rhs.m01; + ret.m11=lhs*rhs.m11; + return ret; + } -__host__ __device__ cuvec2f operator*(const cumat2f& lhs, const cuvec2f& rhs) -{ - cuvec2f ret; - ret.x = lhs.m00*rhs.x + lhs.m01*rhs.y; - ret.y = lhs.m10*rhs.x + lhs.m11*rhs.y; - return ret; -} + __host__ __device__ cuvec2f operator*(const cumat2f& lhs, const cuvec2f& rhs) + { + cuvec2f ret; + ret.x = lhs.m00*rhs.x + lhs.m01*rhs.y; + ret.y = lhs.m10*rhs.x + lhs.m11*rhs.y; + return ret; + } -__host__ __device__ cuvec2f operator*(const cuvec2f& lhs, const cumat2f& rhs) -{ - cuvec2f ret; - ret.x = lhs.x*rhs.m00 + lhs.y*rhs.m10; - ret.y = lhs.x*rhs.m01 + lhs.y*rhs.m11; - return ret; -} + __host__ __device__ cuvec2f operator*(const cuvec2f& lhs, const cumat2f& rhs) + { + cuvec2f ret; + ret.x = lhs.x*rhs.m00 + lhs.y*rhs.m10; + ret.y = lhs.x*rhs.m01 + lhs.y*rhs.m11; + return ret; + } -__host__ __device__ cumat2f operator-(const cumat2f& rhs) -{ - cumat2f ret; - ret.m00 = -rhs.m00; - ret.m10 = -rhs.m10; - ret.m01 = -rhs.m01; - ret.m11 = -rhs.m11; - return ret; -} + __host__ __device__ cumat2f operator-(const cumat2f& rhs) + { + cumat2f ret; + ret.m00 = -rhs.m00; + ret.m10 = -rhs.m10; + ret.m01 = -rhs.m01; + ret.m11 = -rhs.m11; + return ret; + } -__host__ __device__ cumat2f& cumat2f::operator+=(const cumat2f& rhs) -{ - m00 += rhs.m00; - m10 += rhs.m10; - m01 += rhs.m01; - m11 += rhs.m11; - return *this; -} + __host__ __device__ cumat2f& cumat2f::operator+=(const cumat2f& rhs) + { + m00 += rhs.m00; + m10 += rhs.m10; + m01 += rhs.m01; + m11 += rhs.m11; + return *this; + } -__host__ __device__ cumat2f& cumat2f::operator-=(const cumat2f& rhs) -{ - m00 -= rhs.m00; - m10 -= rhs.m10; - m01 -= rhs.m01; - m11 -= rhs.m11; - return *this; -} + __host__ __device__ cumat2f& cumat2f::operator-=(const cumat2f& rhs) + { + m00 -= rhs.m00; + m10 -= rhs.m10; + m01 -= rhs.m01; + m11 -= rhs.m11; + return *this; + } -__host__ __device__ cumat2f& cumat2f::operator*=(const float& rhs) -{ - m00 *= rhs; - m10 *= rhs; - m01 *= rhs; - m11 *= rhs; - return *this; -} + __host__ __device__ cumat2f& cumat2f::operator*=(const float& rhs) + { + m00 *= rhs; + m10 *= rhs; + m01 *= rhs; + m11 *= rhs; + return *this; + } -__host__ __device__ cumat2f& cumat2f::operator/=(const float& rhs) -{ - m00 /= rhs; - m10 /= rhs; - m01 /= rhs; - m11 /= rhs; - return *this; -} + __host__ __device__ cumat2f& cumat2f::operator/=(const float& rhs) + { + m00 /= rhs; + m10 /= rhs; + m01 /= rhs; + m11 /= rhs; + return *this; + } -__host__ __device__ cumat2f& cumat2f::operator*=(const cumat2f& rhs) -{ - cumat2f tmp = *this; - m00 = tmp.m00*rhs.m00 + tmp.m01*rhs.m10; - m01 = tmp.m00*rhs.m01 + tmp.m01*rhs.m11; - m10 = tmp.m10*rhs.m00 + tmp.m11*rhs.m10; - m11 = tmp.m10*rhs.m01 + tmp.m11*rhs.m11; - return *this; -} + __host__ __device__ cumat2f& cumat2f::operator*=(const cumat2f& rhs) + { + cumat2f tmp = *this; + m00 = tmp.m00*rhs.m00 + tmp.m01*rhs.m10; + m01 = tmp.m00*rhs.m01 + tmp.m01*rhs.m11; + m10 = tmp.m10*rhs.m00 + tmp.m11*rhs.m10; + m11 = tmp.m10*rhs.m01 + tmp.m11*rhs.m11; + return *this; + } -__host__ __device__ cumat2f cumat2f::transpose() const -{ - cumat2f ret; - ret.m00 = m00; - ret.m10 = m01; - ret.m01 = m10; - ret.m11 = m11; - return ret; -} - - + __host__ __device__ cumat2f cumat2f::transpose() const + { + cumat2f ret; + ret.m00 = m00; + ret.m10 = m01; + ret.m01 = m10; + ret.m11 = m11; + return ret; + } /////////////////// //Det and Inverse// diff --git a/src/amsculib3/math/cuvec3f.cu b/src/amsculib3/math/cuvec3f.cu index 1426dbb..05e0ba8 100644 --- a/src/amsculib3/math/cuvec3f.cu +++ b/src/amsculib3/math/cuvec3f.cu @@ -3,407 +3,559 @@ namespace amscuda { - __host__ __device__ cuvec3f::cuvec3f() - { - x = 0.0f; y = 0.0f; z = 0.0f; - return; - } +////////////////// +// Vector Class // +////////////////// - __host__ __device__ cuvec3f::~cuvec3f() - { - x = 0.0f; y = 0.0f; z = 0.0f; - return; - } - - __host__ __device__ float& cuvec3f::operator[](const int &I) - { - if(I==0) return x; - if(I==1) return y; - if(I==2) return z; - return x; - } - - __host__ __device__ const float& cuvec3f::operator[](const int &I) const - { - if(I==0) return x; - if(I==1) return y; - if(I==2) return z; - return x; - } - - __host__ __device__ cuvec3f cuvec3f::operator+(const cuvec3f &rhs) - { - cuvec3f ret; - ret.x = x+rhs.x; - ret.y = y+rhs.y; - ret.z = z+rhs.z; - - return ret; - } - - __host__ __device__ cuvec3f cuvec3f::operator-(const cuvec3f &rhs) - { - cuvec3f ret; - ret.x = x-rhs.x; - ret.y = y-rhs.y; - ret.z = z-rhs.z; - - return ret; - } - - __host__ __device__ cuvec3f operator*(const cuvec3f& lhs, const float &rhs) - { - cuvec3f ret; - ret.x = lhs.x*rhs; - ret.y = lhs.y*rhs; - ret.z = lhs.z*rhs; - return ret; - } - __host__ __device__ cuvec3f operator/(const cuvec3f& lhs, const float &rhs) - { - cuvec3f ret; - ret.x = lhs.x/rhs; - ret.y = lhs.y/rhs; - ret.z = lhs.z/rhs; - return ret; - } - __host__ __device__ cuvec3f operator*(const float& lhs, const cuvec3f &rhs) - { - cuvec3f ret; - ret.x = rhs.x*lhs; - ret.y = rhs.y*lhs; - ret.z = rhs.z*lhs; - return ret; - } - __host__ __device__ cuvec3f operator/(const float& lhs, const cuvec3f &rhs) - { - cuvec3f ret; - ret.x = rhs.x/lhs; - ret.y = rhs.y/lhs; - ret.z = rhs.z/lhs; - return ret; - } - - __host__ __device__ cuvec3f& cuvec3f::operator+=(const cuvec3f &rhs) - { - x = x + rhs.x; - y = y + rhs.y; - z = z + rhs.z; - return *this; - } - - __host__ __device__ cuvec3f& cuvec3f::operator-=(const cuvec3f &rhs) - { - x = x - rhs.x; - y = y - rhs.y; - z = z - rhs.z; - return *this; - } - - __host__ __device__ cuvec3f& cuvec3f::operator*=(const float &rhs) - { - x = x * rhs; - y = y * rhs; - z = z * rhs; - return *this; - } - - __host__ __device__ cuvec3f& cuvec3f::operator/=(const float &rhs) - { - x = x / rhs; - y = y / rhs; - z = z / rhs; - return *this; - } - - - __host__ __device__ cuvec3f::cuvec3f(const float &_x, const float &_y, const float &_z) - { - x = _x; y = _y; z = _z; - return; - } - - - __host__ __device__ float cuvec3f_dot(const cuvec3f &a, const cuvec3f &b) - { - float ret = a.x*b.x+a.y*b.y+a.z*b.z; - - return ret; - } - - __host__ __device__ cuvec3f cuvec3f_cross(const cuvec3f &a, const cuvec3f &b) - { - cuvec3f ret; - ret[0] = a[1]*b[2]-a[2]*b[1]; - ret[1] = a[2]*b[0]-a[0]*b[2]; - ret[2] = a[0]*b[1]-a[1]*b[0]; - - return ret; - } - - __host__ __device__ float cuvec3f_norm(const cuvec3f &a) - { - float ret; - ret = ::sqrtf(a.x*a.x+a.y*a.y+a.z*a.z); - return ret; - } - - __host__ __device__ cuvec3f cuvec3f_normalize(const cuvec3f &a) - { - cuvec3f ret; - float m; - m = ::sqrtf(a.x*a.x+a.y*a.y+a.z*a.z); - if(m>0.0) - { - ret.x = a.x/m; ret.y = a.y/m; ret.z = a.z/m; - } - else - { - ret.x = 0.0f; ret.y = 0.0f; ret.z = 0.0f; - } - - return ret; - } - - __host__ __device__ cuvec3f cuvec3f_proj(const cuvec3f &a, const cuvec3f &b) - { - cuvec3f ret; - cuvec3f bn = cuvec3f_normalize(b); - float m = cuvec3f_dot(a,bn); - ret = bn*m; - return ret; - } - - - - -__host__ __device__ cumat3f::cumat3f() +__host__ __device__ cuvec3f::cuvec3f() { - m00 = 0.0f; - m01 = 0.0f; - m02 = 0.0f; - m10 = 0.0f; - m11 = 0.0f; - m12 = 0.0f; - m20 = 0.0f; - m21 = 0.0f; - m22 = 0.0f; + x = 0; y = 0; z = 0; + return; +} + +__host__ __device__ cuvec3f::~cuvec3f() +{ + x = 0; y = 0; z = 0; + return; +} + +__host__ __device__ cuvec3f::cuvec3f(const float &_x, const float &_y, const float &_z) +{ + x = _x; y = _y; z = _z; + return; +} + +__host__ __device__ float& cuvec3f::operator[](const int &I) +{ + switch(I) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + } + + return x; +} + +__host__ __device__ const float& cuvec3f::operator[](const int &I) const +{ + switch(I) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + } + + return x; +} + +__host__ __device__ cuvec3f cuvec3f::operator+(const cuvec3f& rhs) const +{ + cuvec3f ret; + ret.x = x + rhs.x; + ret.y = y + rhs.y; + ret.z = z + rhs.z; + return ret; +} + +__host__ __device__ cuvec3f cuvec3f::operator-(const cuvec3f& rhs) const +{ + cuvec3f ret; + ret.x = x - rhs.x; + ret.y = y - rhs.y; + ret.z = z - rhs.z; + return ret; +} + +__host__ __device__ cuvec3f cuvec3f::operator*(const cuvec3f& rhs) const +{ + //Elementwise product + cuvec3f ret; + ret.x = x * rhs.x; + ret.y = y * rhs.y; + ret.z = z * rhs.z; + return ret; +} + +__host__ __device__ cuvec3f cuvec3f::operator/(const cuvec3f& rhs) const +{ + //Elementwise division + cuvec3f ret; + ret.x = x / rhs.x; + ret.y = y / rhs.y; + ret.z = z / rhs.z; + return ret; +} + +__host__ __device__ cuvec3f operator*(const cuvec3f& lhs, const float& rhs) +{ + cuvec3f ret; + ret.x = lhs.x*rhs; + ret.y = lhs.y*rhs; + ret.z = lhs.z*rhs; + return ret; +} + +__host__ __device__ cuvec3f operator*(const float& lhs, const cuvec3f& rhs) +{ + cuvec3f ret; + ret.x = lhs*rhs.x; + ret.y = lhs*rhs.y; + ret.z = lhs*rhs.z; + return ret; +} + +__host__ __device__ cuvec3f operator/(const cuvec3f& lhs, const float& rhs) +{ + cuvec3f ret; + ret.x = lhs.x/rhs; + ret.y = lhs.y/rhs; + ret.z = lhs.z/rhs; + return ret; +} + +__host__ __device__ cuvec3f operator/(const float& lhs, const cuvec3f& rhs) +{ + cuvec3f ret; + ret.x = lhs/rhs.x; + ret.y = lhs/rhs.y; + ret.z = lhs/rhs.z; + return ret; +} + +__host__ __device__ cuvec3f operator-(const cuvec3f& other) +{ + cuvec3f ret; + ret.x = -other.x; + ret.y = -other.y; + ret.z = -other.z; + return ret; +} + +__host__ __device__ cuvec3f& cuvec3f::operator+=(const cuvec3f& rhs) +{ + x += rhs.x; + y += rhs.y; + z += rhs.z; + return *this; +} + +__host__ __device__ cuvec3f& cuvec3f::operator-=(const cuvec3f& rhs) +{ + x -= rhs.x; + y -= rhs.y; + z -= rhs.z; + return *this; +} + +__host__ __device__ cuvec3f& cuvec3f::operator*=(const float& rhs) +{ + x *= rhs; + y *= rhs; + z *= rhs; + return *this; +} + +__host__ __device__ cuvec3f& cuvec3f::operator/=(const float& rhs) +{ + x /= rhs; + y /= rhs; + z /= rhs; + return *this; +} + + +////////////////// +// Matrix Class // +////////////////// + +__host__ __device__ cumat3f::cumat3f() +{ + m00 = 0; + m01 = 0; + m02 = 0; + + m10 = 0; + m11 = 0; + m12 = 0; + + m20 = 0; + m21 = 0; + m22 = 0; return; } -__host__ __device__ cumat3f::~cumat3f() +__host__ __device__ cumat3f::~cumat3f() { - m00 = 0.0f; - m01 = 0.0f; - m02 = 0.0f; - m10 = 0.0f; - m11 = 0.0f; - m12 = 0.0f; - m20 = 0.0f; - m21 = 0.0f; - m22 = 0.0f; + //m00 = 0; + //m01 = 0; + //m02 = 0; + + //m10 = 0; + //m11 = 0; + //m12 = 0; + + //m20 = 0; + //m21 = 0; + //m22 = 0; + return; } -__host__ __device__ cumat3f::cumat3f(const float *data9) +__host__ __device__ cumat3f::cumat3f( + const float& _m00, const float& _m01, const float& _m02, + const float& _m10, const float& _m11, const float& _m12, + const float& _m20, const float& _m21, const float& _m22 +) { - m00 = data9[0]; - m10 = data9[1]; - m20 = data9[2]; - m01 = data9[3]; - m11 = data9[4]; - m21 = data9[5]; - m02 = data9[6]; - m12 = data9[7]; - m22 = data9[8]; + m00 = _m00; + m10 = _m10; + m20 = _m20; + + m01 = _m01; + m11 = _m11; + m21 = _m21; + + m02 = _m02; + m12 = _m12; + m22 = _m22; + + + return; +} + +__host__ __device__ cumat3f::cumat3f(const float* data9) +{ + m00 = data9[0]; + m10 = data9[1]; + m20 = data9[2]; + + m01 = data9[3]; + m11 = data9[4]; + m21 = data9[5]; + + m02 = data9[6]; + m12 = data9[7]; + m22 = data9[8]; + + return; } __host__ __device__ float& cumat3f::operator[](const int &I) { - if(I==0) return m00; - if(I==1) return m10; - if(I==2) return m20; - if(I==3) return m01; - if(I==4) return m11; - if(I==5) return m21; - if(I==6) return m02; - if(I==7) return m12; - if(I==8) return m22; - + switch(I) + { + case 0: + return m00; + case 1: + return m10; + case 2: + return m20; + case 3: + return m01; + case 4: + return m11; + case 5: + return m21; + case 6: + return m02; + case 7: + return m12; + case 8: + return m22; + } + + return m00; +} + +__host__ __device__ const float& cumat3f::operator[](const int &I) const +{ + switch(I) + { + case 0: + return m00; + case 1: + return m10; + case 2: + return m20; + case 3: + return m01; + case 4: + return m11; + case 5: + return m21; + case 6: + return m02; + case 7: + return m12; + case 8: + return m22; + } + return m00; } __host__ __device__ float& cumat3f::operator()(const int &I, const int &J) { - if(I==0 && J==0) return m00; - if(I==1 && J==0) return m10; - if(I==2 && J==0) return m20; - if(I==0 && J==1) return m01; - if(I==1 && J==1) return m11; - if(I==2 && J==1) return m21; - if(I==0 && J==2) return m02; - if(I==1 && J==2) return m12; - if(I==2 && J==2) return m22; - - return m00; + return (*this)[I+3*J]; } +__host__ __device__ const float& cumat3f::operator()(const int &I, const int &J) const +{ + return (*this)[I+3*J]; +} __host__ __device__ float& cumat3f::at(const int &I, const int &J) { - if(I==0 && J==0) return m00; - if(I==1 && J==0) return m10; - if(I==2 && J==0) return m20; - if(I==0 && J==1) return m01; - if(I==1 && J==1) return m11; - if(I==2 && J==1) return m21; - if(I==0 && J==2) return m02; - if(I==1 && J==2) return m12; - if(I==2 && J==2) return m22; - - return m00; + return (*this)[I+3*J]; } -__host__ __device__ const float& cumat3f::operator[](const int &I) const +__host__ __device__ const float& cumat3f::at(const int &I, const int &J) const { - if(I==0) return m00; - if(I==1) return m10; - if(I==2) return m20; - if(I==3) return m01; - if(I==4) return m11; - if(I==5) return m21; - if(I==6) return m02; - if(I==7) return m12; - if(I==8) return m22; - - return m00; + return (*this)[I+3*J]; } -__host__ __device__ const float& cumat3f::operator()(const int &I, const int &J) const +__host__ __device__ float* cumat3f::data() { - if(I==0 && J==0) return m00; - if(I==1 && J==0) return m10; - if(I==2 && J==0) return m20; - if(I==0 && J==1) return m01; - if(I==1 && J==1) return m11; - if(I==2 && J==1) return m21; - if(I==0 && J==2) return m02; - if(I==1 && J==2) return m12; - if(I==2 && J==2) return m22; - - return m00; +return (float*)this; } - -__host__ __device__ const float& cumat3f::at(const int &I, const int &J) const +__host__ __device__ const float* cumat3f::data() const { - if(I==0 && J==0) return m00; - if(I==1 && J==0) return m10; - if(I==2 && J==0) return m20; - if(I==0 && J==1) return m01; - if(I==1 && J==1) return m11; - if(I==2 && J==1) return m21; - if(I==0 && J==2) return m02; - if(I==1 && J==2) return m12; - if(I==2 && J==2) return m22; - - return m00; +return (float*)this; } - -__host__ __device__ cumat3f cumat3f::operator+(const cumat3f &rhs) +__host__ __device__ cumat3f cumat3f::operator+(const cumat3f& rhs) const { - cumat3f ret; - ret.m00 = m00 + rhs.m00; - ret.m10 = m10 + rhs.m10; - ret.m20 = m20 + rhs.m20; - ret.m01 = m01 + rhs.m01; - ret.m11 = m11 + rhs.m11; - ret.m21 = m21 + rhs.m21; - ret.m02 = m02 + rhs.m02; - ret.m12 = m12 + rhs.m12; - ret.m22 = m22 + rhs.m22; - +cumat3f ret; +ret.m00 = m00 + rhs.m00; +ret.m10 = m10 + rhs.m10; +ret.m20 = m20 + rhs.m20; - return ret; +ret.m01 = m01 + rhs.m01; +ret.m11 = m11 + rhs.m11; +ret.m21 = m21 + rhs.m21; + +ret.m02 = m02 + rhs.m02; +ret.m12 = m12 + rhs.m12; +ret.m22 = m22 + rhs.m22; + +return ret; } -__host__ __device__ cumat3f cumat3f::operator-(const cumat3f &rhs) +__host__ __device__ cumat3f cumat3f::operator-(const cumat3f& rhs) const { - cumat3f ret; - ret.m00 = m00 - rhs.m00; - ret.m10 = m10 - rhs.m10; - ret.m20 = m20 - rhs.m20; - ret.m01 = m01 - rhs.m01; - ret.m11 = m11 - rhs.m11; - ret.m21 = m21 - rhs.m21; - ret.m02 = m02 - rhs.m02; - ret.m12 = m12 - rhs.m12; - ret.m22 = m22 - rhs.m22; - return ret; +cumat3f ret; +ret.m00 = m00 - rhs.m00; +ret.m10 = m10 - rhs.m10; +ret.m20 = m20 - rhs.m20; + +ret.m01 = m01 - rhs.m01; +ret.m11 = m11 - rhs.m11; +ret.m21 = m21 - rhs.m21; + +ret.m02 = m02 - rhs.m02; +ret.m12 = m12 - rhs.m12; +ret.m22 = m22 - rhs.m22; + +return ret; } -__host__ __device__ cumat3f cumat3f::operator*(const float &rhs) +__host__ __device__ cumat3f cumat3f::operator*(const cumat3f& rhs) const { - cumat3f ret; - ret.m00 = m00 * rhs; - ret.m10 = m10 * rhs; - ret.m20 = m20 * rhs; - ret.m01 = m01 * rhs; - ret.m11 = m11 * rhs; - ret.m21 = m21 * rhs; - ret.m02 = m02 * rhs; - ret.m12 = m12 * rhs; - ret.m22 = m22 * rhs; - return ret; +cumat3f ret; //should be zeroed in constructor +ret.m00 = m00*rhs.m00 + m01*rhs.m10 + m02*rhs.m20; +ret.m01 = m00*rhs.m01 + m01*rhs.m11 + m02*rhs.m21; +ret.m02 = m00*rhs.m02 + m01*rhs.m12 + m02*rhs.m22; + +ret.m10 = m10*rhs.m00 + m11*rhs.m10 + m12*rhs.m20; +ret.m11 = m10*rhs.m01 + m11*rhs.m11 + m12*rhs.m21; +ret.m12 = m10*rhs.m02 + m11*rhs.m12 + m12*rhs.m22; + +ret.m20 = m20*rhs.m00 + m21*rhs.m10 + m22*rhs.m20; +ret.m21 = m20*rhs.m01 + m21*rhs.m11 + m22*rhs.m21; +ret.m22 = m20*rhs.m02 + m21*rhs.m12 + m22*rhs.m22; + +return ret; } -__host__ __device__ cumat3f cumat3f::operator/(const float &rhs) +__host__ __device__ cumat3f operator*(const cumat3f& lhs, const float& rhs) { - cumat3f ret; - float irhs = 1.0f/rhs; - ret.m00 = m00 * irhs; - ret.m10 = m10 * irhs; - ret.m20 = m20 * irhs; - ret.m01 = m01 * irhs; - ret.m11 = m11 * irhs; - ret.m21 = m21 * irhs; - ret.m02 = m02 * irhs; - ret.m12 = m12 * irhs; - ret.m22 = m22 * irhs; - return ret; +cumat3f ret; +ret.m00=lhs.m00*rhs; +ret.m10=lhs.m10*rhs; +ret.m20=lhs.m20*rhs; +ret.m01=lhs.m01*rhs; +ret.m11=lhs.m11*rhs; +ret.m21=lhs.m21*rhs; +ret.m02=lhs.m02*rhs; +ret.m12=lhs.m12*rhs; +ret.m22=lhs.m22*rhs; +return ret; } - -__host__ __device__ cuvec3f cumat3f::operator*(const cuvec3f &rhs) + +__host__ __device__ cumat3f operator/(const cumat3f& lhs, const float& rhs) { - cuvec3f ret; - - ret.x = m00*rhs.x + m01*rhs.y + m02*rhs.z; - ret.y = m10*rhs.x + m11*rhs.y + m12*rhs.z; - ret.z = m20*rhs.x + m21*rhs.y + m22*rhs.z; - - return ret; +cumat3f ret; +ret.m00=lhs.m00/rhs; +ret.m10=lhs.m10/rhs; +ret.m20=lhs.m20/rhs; +ret.m01=lhs.m01/rhs; +ret.m11=lhs.m11/rhs; +ret.m21=lhs.m21/rhs; +ret.m02=lhs.m02/rhs; +ret.m12=lhs.m12/rhs; +ret.m22=lhs.m22/rhs; +return ret; } -__host__ __device__ cumat3f cumat3f::operator*(const cumat3f &rhs) +__host__ __device__ cumat3f operator*(const float& lhs, const cumat3f& rhs) { - cumat3f ret; - - ret.m00 = m00*rhs.m00 + m01*rhs.m10 + m02*rhs.m20; - ret.m01 = m00*rhs.m01 + m01*rhs.m11 + m02*rhs.m21; - ret.m02 = m00*rhs.m02 + m01*rhs.m12 + m02*rhs.m22; - ret.m10 = m10*rhs.m00 + m11*rhs.m10 + m12*rhs.m20; - ret.m11 = m10*rhs.m01 + m11*rhs.m11 + m12*rhs.m21; - ret.m12 = m10*rhs.m02 + m11*rhs.m12 + m12*rhs.m22; - ret.m20 = m20*rhs.m00 + m21*rhs.m10 + m22*rhs.m20; - ret.m21 = m20*rhs.m01 + m21*rhs.m11 + m22*rhs.m21; - ret.m22 = m20*rhs.m02 + m21*rhs.m12 + m22*rhs.m22; - - return ret; +cumat3f ret; +ret.m00=lhs*rhs.m00; +ret.m10=lhs*rhs.m10; +ret.m20=lhs*rhs.m20; +ret.m01=lhs*rhs.m01; +ret.m11=lhs*rhs.m11; +ret.m21=lhs*rhs.m21; +ret.m02=lhs*rhs.m02; +ret.m12=lhs*rhs.m12; +ret.m22=lhs*rhs.m22; +return ret; } +__host__ __device__ cuvec3f operator*(const cumat3f& lhs, const cuvec3f& rhs) +{ +cuvec3f ret; +ret.x = lhs.m00*rhs.x + lhs.m01*rhs.y + lhs.m02*rhs.z; +ret.y = lhs.m10*rhs.x + lhs.m11*rhs.y + lhs.m12*rhs.z; +ret.z = lhs.m20*rhs.x + lhs.m21*rhs.y + lhs.m22*rhs.z; +return ret; +} + +__host__ __device__ cuvec3f operator*(const cuvec3f& lhs, const cumat3f& rhs) +{ +cuvec3f ret; +ret.x = lhs.x*rhs.m00 + lhs.y*rhs.m10 + lhs.z*rhs.m20; +ret.y = lhs.x*rhs.m01 + lhs.y*rhs.m11 + lhs.z*rhs.m21; +ret.z = lhs.x*rhs.m02 + lhs.y*rhs.m12 + lhs.z*rhs.m22; +return ret; +} + +__host__ __device__ cumat3f operator-(const cumat3f& rhs) +{ +cumat3f ret; +ret.m00 = -rhs.m00; +ret.m10 = -rhs.m10; +ret.m20 = -rhs.m20; +ret.m01 = -rhs.m01; +ret.m11 = -rhs.m11; +ret.m21 = -rhs.m21; +ret.m02 = -rhs.m02; +ret.m12 = -rhs.m12; +ret.m22 = -rhs.m22; +return ret; +} + +__host__ __device__ cumat3f& cumat3f::operator+=(const cumat3f& rhs) +{ +m00 += rhs.m00; +m10 += rhs.m10; +m20 += rhs.m20; +m01 += rhs.m01; +m11 += rhs.m11; +m21 += rhs.m21; +m02 += rhs.m02; +m12 += rhs.m12; +m22 += rhs.m22; +return *this; +} + +__host__ __device__ cumat3f& cumat3f::operator-=(const cumat3f& rhs) +{ +m00 -= rhs.m00; +m10 -= rhs.m10; +m20 -= rhs.m20; +m01 -= rhs.m01; +m11 -= rhs.m11; +m21 -= rhs.m21; +m02 -= rhs.m02; +m12 -= rhs.m12; +m22 -= rhs.m22; +return *this; +} + +__host__ __device__ cumat3f& cumat3f::operator*=(const float& rhs) +{ +m00 *= rhs; +m10 *= rhs; +m20 *= rhs; +m01 *= rhs; +m11 *= rhs; +m21 *= rhs; +m02 *= rhs; +m12 *= rhs; +m22 *= rhs; +return *this; +} + +__host__ __device__ cumat3f& cumat3f::operator/=(const float& rhs) +{ +m00 /= rhs; +m10 /= rhs; +m20 /= rhs; +m01 /= rhs; +m11 /= rhs; +m21 /= rhs; +m02 /= rhs; +m12 /= rhs; +m22 /= rhs; +return *this; +} + +__host__ __device__ cumat3f& cumat3f::operator*=(const cumat3f& rhs) +{ +cumat3f tmp = *this; +m00 = tmp.m00*rhs.m00 + tmp.m01*rhs.m10 + tmp.m02*rhs.m20; +m01 = tmp.m00*rhs.m01 + tmp.m01*rhs.m11 + tmp.m02*rhs.m21; +m02 = tmp.m00*rhs.m02 + tmp.m01*rhs.m12 + tmp.m02*rhs.m22; +m10 = tmp.m10*rhs.m00 + tmp.m11*rhs.m10 + tmp.m12*rhs.m20; +m11 = tmp.m10*rhs.m01 + tmp.m11*rhs.m11 + tmp.m12*rhs.m21; +m12 = tmp.m10*rhs.m02 + tmp.m11*rhs.m12 + tmp.m12*rhs.m22; +m20 = tmp.m20*rhs.m00 + tmp.m21*rhs.m10 + tmp.m22*rhs.m20; +m21 = tmp.m20*rhs.m01 + tmp.m21*rhs.m11 + tmp.m22*rhs.m21; +m22 = tmp.m20*rhs.m02 + tmp.m21*rhs.m12 + tmp.m22*rhs.m22; +return *this; +} + +__host__ __device__ cumat3f cumat3f::transpose() const +{ +cumat3f ret; +ret.m00 = m00; +ret.m10 = m01; +ret.m20 = m02; +ret.m01 = m10; +ret.m11 = m11; +ret.m21 = m12; +ret.m02 = m20; +ret.m12 = m21; +ret.m22 = m22; +return ret; +} + + +///////////////////// +// Det and Inverse // +///////////////////// + __host__ __device__ float cumat3f::det() { float ret = 0.0f; @@ -418,23 +570,6 @@ __host__ __device__ float cumat3f::det() return ret; } -__host__ __device__ cumat3f cumat3f::transpose() -{ - cumat3f ret; - - ret.m00 = m00; - ret.m01 = m10; - ret.m02 = m20; - ret.m10 = m01; - ret.m11 = m11; - ret.m12 = m21; - ret.m20 = m02; - ret.m21 = m12; - ret.m22 = m22; - - return ret; -} - __host__ __device__ cumat3f cumat3f::inverse() { cumat3f q; @@ -479,225 +614,58 @@ __host__ __device__ cumat3f cumat3f::inverse() return q; } -__host__ __device__ cumat3f operator-(const cumat3f &rhs) +////////////////////////// +// Standalone functions // +////////////////////////// + +__host__ __device__ float cuvec3f_dot(const cuvec3f &a, const cuvec3f &b) { - cumat3f ret; - ret.m00 = -rhs.m00; - ret.m10 = -rhs.m10; - ret.m20 = -rhs.m20; - ret.m01 = -rhs.m01; - ret.m11 = -rhs.m11; - ret.m21 = -rhs.m21; - ret.m02 = -rhs.m02; - ret.m12 = -rhs.m12; - ret.m22 = -rhs.m22; + float ret = a.x*b.x+a.y*b.y+a.z*b.z; return ret; } -__host__ __device__ cumat3f& cumat3f::operator+=(const cumat3f &rhs) +__host__ __device__ cuvec3f cuvec3f_cross(const cuvec3f &a, const cuvec3f &b) { - m00 += rhs.m00; - m10 += rhs.m10; - m20 += rhs.m20; - m01 += rhs.m01; - m11 += rhs.m11; - m21 += rhs.m21; - m02 += rhs.m02; - m12 += rhs.m12; - m22 += rhs.m22; + cuvec3f ret; + ret[0] = a[1]*b[2]-a[2]*b[1]; + ret[1] = a[2]*b[0]-a[0]*b[2]; + ret[2] = a[0]*b[1]-a[1]*b[0]; - return *this; + return ret; } -__host__ __device__ cumat3f& cumat3f::operator-=(const cumat3f &rhs) +__host__ __device__ float cuvec3f_norm(const cuvec3f &a) { - m00 -= rhs.m00; - m10 -= rhs.m10; - m20 -= rhs.m20; - m01 -= rhs.m01; - m11 -= rhs.m11; - m21 -= rhs.m21; - m02 -= rhs.m02; - m12 -= rhs.m12; - m22 -= rhs.m22; - - return *this; + float ret; + ret = ::sqrtf(a.x*a.x+a.y*a.y+a.z*a.z); + return ret; } - - -__host__ __device__ cumat3f& cumat3f::operator/=(const float &rhs) +__host__ __device__ cuvec3f cuvec3f_normalize(const cuvec3f &a) { - float irhs = 1.0f/rhs; - m00 *= irhs; - m10 *= irhs; - m20 *= irhs; - m01 *= irhs; - m11 *= irhs; - m21 *= irhs; - m02 *= irhs; - m12 *= irhs; - m22 *= irhs; + cuvec3f ret; + float m; + m = ::sqrtf(a.x*a.x+a.y*a.y+a.z*a.z); + if(m>0.0) + { + ret.x = a.x/m; ret.y = a.y/m; ret.z = a.z/m; + } + else + { + ret.x = 0.0f; ret.y = 0.0f; ret.z = 0.0f; + } - return *this; + return ret; } -__host__ __device__ cumat3f& cumat3f::operator*=(const float &rhs) +__host__ __device__ cuvec3f cuvec3f_proj(const cuvec3f &a, const cuvec3f &b) { - m00 *= rhs; - m10 *= rhs; - m20 *= rhs; - m01 *= rhs; - m11 *= rhs; - m21 *= rhs; - m02 *= rhs; - m12 *= rhs; - m22 *= rhs; - - return *this; -} - -__host__ __device__ cumat3f& cumat3f::operator*=(const cumat3f &rhs) -{ - cumat3f tmp; - - tmp.m00 = m00*rhs.m00 + m01*rhs.m10 + m02*rhs.m20; - tmp.m01 = m00*rhs.m01 + m01*rhs.m11 + m02*rhs.m21; - tmp.m02 = m00*rhs.m02 + m01*rhs.m12 + m02*rhs.m22; - tmp.m10 = m10*rhs.m00 + m11*rhs.m10 + m12*rhs.m20; - tmp.m11 = m10*rhs.m01 + m11*rhs.m11 + m12*rhs.m21; - tmp.m12 = m10*rhs.m02 + m11*rhs.m12 + m12*rhs.m22; - tmp.m20 = m20*rhs.m00 + m21*rhs.m10 + m22*rhs.m20; - tmp.m21 = m20*rhs.m01 + m21*rhs.m11 + m22*rhs.m21; - tmp.m22 = m20*rhs.m02 + m21*rhs.m12 + m22*rhs.m22; - - (*this) = tmp; - - return *this; -} - -__host__ __device__ cumat3f::cumat3f( - const float & _m00, const float & _m01, const float & _m02, - const float & _m10, const float & _m11, const float & _m12, - const float & _m20, const float & _m21, const float & _m22 -) -{ - m00 = _m00; - m01 = _m01; - m02 = _m02; - m10 = _m10; - m11 = _m11; - m12 = _m12; - m20 = _m20; - m21 = _m21; - m22 = _m22; -} - -__host__ __device__ float* cumat3f::data() -{ - //pointer to float[9] representation of matrix - return (float*) this; -} - -__host__ __device__ const float* cumat3f::data() const -{ - //pointer to float[9] representation of matrix - return (const float*) this; -} - -__host__ void test_cudavectf_logic1() -{ - //3 dim vector and matrix functional tests on host side - - // printf("3 dim vector and matrix functional tests on host side\n"); - - // cuvec3f a,b,c; - // float ma[9],mb[9],mc[9]; - - // int I,J; - - // for(I=0;I<3;I++) - // { - // for(J=0;J<3;J++) - // { - // ma[I+3*J] = ((float) rand())/((float) RAND_MAX); - // mb[I+3*J] = ma[I+3*J]; - // } - // } - - // mat3f_inverse(mb); - // mat3f_mult(ma,mb,mc); - - // for(I=0;I<3;I++) - // { - // for(J=0;J<3;J++) - // { - // printf("ma[%d,%d] = %1.3f\n",I,J,ma[I+3*J]); - // } - // } - // for(I=0;I<3;I++) - // { - // for(J=0;J<3;J++) - // { - // printf("mb[%d,%d] = %1.3f\n",I,J,mb[I+3*J]); - // } - // } - // for(I=0;I<3;I++) - // { - // for(J=0;J<3;J++) - // { - // printf("mc[%d,%d] = %1.3f\n",I,J,mc[I+3*J]); - // } - // } - - // a = cuvec3f(1,1,1); - // b = mat3f_mult(ma,a); - // b = mat3f_mult(mb,b); - - // for(I=0;I<3;I++) - // { - // printf("a[%d] = %1.3f, b[%d] = %1.3f\n",I,a[I],I,b[I]); - // } - - // a = cuvec3f(1,0,1); - // b = cuvec3f(0,1,-1); - // c = a+b; - - // for(I=0;I<3;I++) - // { - // printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]); - // } - - // c = c/2.0; - - // for(I=0;I<3;I++) - // { - // printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]); - // } - - // c = cuvec3f_cross(a,b); - - // for(I=0;I<3;I++) - // { - // printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]); - // } - - // printf("c dot a = %1.3f, c dot b = %1.3f\n",cuvec3f_dot(c,a),cuvec3f_dot(c,b)); - - // printf("norm(a)=%1.3f, norm(b)=%1.3f, norm(c)=%1.3f\n",cuvec3f_norm(a),cuvec3f_norm(b),cuvec3f_norm(c)); - // a = cuvec3f_normalize(a); - // b = cuvec3f_normalize(b); - // c = cuvec3f_normalize(c); - - // for(I=0;I<3;I++) - // { - // printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]); - // } - // printf("c dot a = %1.3f, c dot b = %1.3f\n",cuvec3f_dot(c,a),cuvec3f_dot(c,b)); - // printf("norm(a)=%1.3f, norm(b)=%1.3f, norm(c)=%1.3f\n",cuvec3f_norm(a),cuvec3f_norm(b),cuvec3f_norm(c)); - - // return; + cuvec3f ret; + cuvec3f bn = cuvec3f_normalize(b); + float m = cuvec3f_dot(a,bn); + ret = bn*m; + return ret; } __host__ __device__ cumat3f hodge_dual(const cuvec3f &vin) @@ -764,5 +732,15 @@ __host__ __device__ cumat3f rotmat_from_axisangle(const cuvec3f &axis, const flo } +/////////// +// Tests // +/////////// + +__host__ void test_cudavectf_logic1() +{ + +} + + }; \ No newline at end of file diff --git a/src/amsculib3/math/cuvec4f.cu b/src/amsculib3/math/cuvec4f.cu index 70bbf23..727bba7 100644 --- a/src/amsculib3/math/cuvec4f.cu +++ b/src/amsculib3/math/cuvec4f.cu @@ -3,63 +3,105 @@ namespace amscuda { -//////////// -//cuvec4ff// -//////////// +////////////////// +// Vector Class // +////////////////// -__host__ __device__ cuvec4f::cuvec4f() +__host__ __device__ cuvec4f::cuvec4f() { - x = 0.0f; y = 0.0f; z = 0.0f; w = 0.0f; + x = 0; y = 0; z = 0; w = 0; return; } -__host__ __device__ cuvec4f::~cuvec4f() +__host__ __device__ cuvec4f::~cuvec4f() { - x = 0.0f; y = 0.0f; z = 0.0f; w = 0.0f; + x = 0; y = 0; z = 0; w = 0; + return; +} + +__host__ __device__ cuvec4f::cuvec4f(const float &_x, const float &_y, const float &_z, const float &_w) +{ + x = _x; y = _y; z = _z; w = _w; return; } __host__ __device__ float& cuvec4f::operator[](const int &I) { - if(I==0) return x; - if(I==1) return y; - if(I==2) return z; - if(I==3) return w; + switch(I) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + case 3: + return w; + } + return x; } __host__ __device__ const float& cuvec4f::operator[](const int &I) const { - if(I==0) return x; - if(I==1) return y; - if(I==2) return z; - if(I==3) return w; + switch(I) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + case 3: + return w; + } + return x; } -__host__ __device__ cuvec4f cuvec4f::operator+(const cuvec4f &rhs) +__host__ __device__ cuvec4f cuvec4f::operator+(const cuvec4f& rhs) const { cuvec4f ret; - ret.x = x+rhs.x; - ret.y = y+rhs.y; - ret.z = z+rhs.z; - ret.w = w+rhs.w; - + ret.x = x + rhs.x; + ret.y = y + rhs.y; + ret.z = z + rhs.z; + ret.w = w + rhs.w; return ret; } -__host__ __device__ cuvec4f cuvec4f::operator-(const cuvec4f &rhs) +__host__ __device__ cuvec4f cuvec4f::operator-(const cuvec4f& rhs) const { cuvec4f ret; - ret.x = x-rhs.x; - ret.y = y-rhs.y; - ret.z = z-rhs.z; - ret.w = w-rhs.w; - + ret.x = x - rhs.x; + ret.y = y - rhs.y; + ret.z = z - rhs.z; + ret.w = w - rhs.w; return ret; } -__host__ __device__ cuvec4f operator*(const cuvec4f& lhs, const float &rhs) +__host__ __device__ cuvec4f cuvec4f::operator*(const cuvec4f& rhs) const +{ + //Elementwise product + cuvec4f ret; + ret.x = x * rhs.x; + ret.y = y * rhs.y; + ret.z = z * rhs.z; + ret.w = w * rhs.w; + return ret; +} + +__host__ __device__ cuvec4f cuvec4f::operator/(const cuvec4f& rhs) const +{ + //Elementwise division + cuvec4f ret; + ret.x = x / rhs.x; + ret.y = y / rhs.y; + ret.z = z / rhs.z; + ret.w = w / rhs.w; + return ret; +} + +__host__ __device__ cuvec4f operator*(const cuvec4f& lhs, const float& rhs) { cuvec4f ret; ret.x = lhs.x*rhs; @@ -68,7 +110,18 @@ __host__ __device__ cuvec4f operator*(const cuvec4f& lhs, const float &rhs) ret.w = lhs.w*rhs; return ret; } -__host__ __device__ cuvec4f operator/(const cuvec4f& lhs, const float &rhs) + +__host__ __device__ cuvec4f operator*(const float& lhs, const cuvec4f& rhs) +{ + cuvec4f ret; + ret.x = lhs*rhs.x; + ret.y = lhs*rhs.y; + ret.z = lhs*rhs.z; + ret.w = lhs*rhs.w; + return ret; +} + +__host__ __device__ cuvec4f operator/(const cuvec4f& lhs, const float& rhs) { cuvec4f ret; ret.x = lhs.x/rhs; @@ -77,180 +130,171 @@ __host__ __device__ cuvec4f operator/(const cuvec4f& lhs, const float &rhs) ret.w = lhs.w/rhs; return ret; } -__host__ __device__ cuvec4f operator*(const float& lhs, const cuvec4f &rhs) + +__host__ __device__ cuvec4f operator/(const float& lhs, const cuvec4f& rhs) { cuvec4f ret; - ret.x = rhs.x*lhs; - ret.y = rhs.y*lhs; - ret.z = rhs.z*lhs; - ret.w = rhs.w*lhs; - return ret; -} -__host__ __device__ cuvec4f operator/(const float& lhs, const cuvec4f &rhs) -{ - cuvec4f ret; - ret.x = rhs.x/lhs; - ret.y = rhs.y/lhs; - ret.z = rhs.z/lhs; - ret.w = rhs.w/lhs; + ret.x = lhs/rhs.x; + ret.y = lhs/rhs.y; + ret.z = lhs/rhs.z; + ret.w = lhs/rhs.w; return ret; } -__host__ __device__ cuvec4f& cuvec4f::operator+=(const cuvec4f &rhs) -{ - x = x + rhs.x; - y = y + rhs.y; - z = z + rhs.z; - w = w + rhs.w; - return *this; -} - -__host__ __device__ cuvec4f& cuvec4f::operator-=(const cuvec4f &rhs) -{ - x = x - rhs.x; - y = y - rhs.y; - z = z - rhs.z; - w = w - rhs.w; - return *this; -} - -__host__ __device__ cuvec4f& cuvec4f::operator*=(const float &rhs) -{ - x = x * rhs; - y = y * rhs; - z = z * rhs; - w = w * rhs; - return *this; -} - -__host__ __device__ cuvec4f& cuvec4f::operator/=(const float &rhs) -{ - x = x / rhs; - y = y / rhs; - z = z / rhs; - w = w / rhs; - return *this; -} - -__host__ __device__ cuvec4f operator-(const cuvec4f &rhs) +__host__ __device__ cuvec4f operator-(const cuvec4f& other) { cuvec4f ret; - ret[0] = -rhs[0]; - ret[1] = -rhs[1]; - ret[2] = -rhs[2]; - ret[3] = -rhs[3]; + ret.x = -other.x; + ret.y = -other.y; + ret.z = -other.z; + ret.w = -other.w; return ret; } - -__host__ __device__ cuvec4f::cuvec4f(const float &_x, const float &_y, const float &_z, const float &_w) +__host__ __device__ cuvec4f& cuvec4f::operator+=(const cuvec4f& rhs) { - x = _x; y = _y; z = _z; w = _w; + x += rhs.x; + y += rhs.y; + z += rhs.z; + w += rhs.w; + return *this; +} + +__host__ __device__ cuvec4f& cuvec4f::operator-=(const cuvec4f& rhs) +{ + x -= rhs.x; + y -= rhs.y; + z -= rhs.z; + w -= rhs.w; + return *this; +} + +__host__ __device__ cuvec4f& cuvec4f::operator*=(const float& rhs) +{ + x *= rhs; + y *= rhs; + z *= rhs; + w *= rhs; + return *this; +} + +__host__ __device__ cuvec4f& cuvec4f::operator/=(const float& rhs) +{ + x /= rhs; + y /= rhs; + z /= rhs; + w /= rhs; + return *this; +} + + +////////////////// +// Matrix Class // +////////////////// + +__host__ __device__ cumat4f::cumat4f() +{ + m00 = 0; + m01 = 0; + m02 = 0; + m03 = 0; + + m10 = 0; + m11 = 0; + m12 = 0; + m13 = 0; + + m20 = 0; + m21 = 0; + m22 = 0; + m23 = 0; + + m30 = 0; + m31 = 0; + m32 = 0; + m33 = 0; + return; } - -__host__ __device__ cumat4f::cumat4f() +__host__ __device__ cumat4f::~cumat4f() { - m00 = 0.0f; - m10 = 0.0f; - m20 = 0.0f; - m30 = 0.0f; + //m00 = 0; + //m01 = 0; + //m02 = 0; + //m03 = 0; - m01 = 0.0f; - m11 = 0.0f; - m21 = 0.0f; - m31 = 0.0f; + //m10 = 0; + //m11 = 0; + //m12 = 0; + //m13 = 0; - m02 = 0.0f; - m12 = 0.0f; - m22 = 0.0f; - m32 = 0.0f; + //m20 = 0; + //m21 = 0; + //m22 = 0; + //m23 = 0; - m03 = 0.0f; - m13 = 0.0f; - m23 = 0.0f; - m33 = 0.0f; - - return; -} - -__host__ __device__ cumat4f::~cumat4f() -{ - m00 = 0.0f; - m10 = 0.0f; - m20 = 0.0f; - m30 = 0.0f; - - m01 = 0.0f; - m11 = 0.0f; - m21 = 0.0f; - m31 = 0.0f; - - m02 = 0.0f; - m12 = 0.0f; - m22 = 0.0f; - m32 = 0.0f; - - m03 = 0.0f; - m13 = 0.0f; - m23 = 0.0f; - m33 = 0.0f; + //m30 = 0; + //m31 = 0; + //m32 = 0; + //m33 = 0; return; } __host__ __device__ cumat4f::cumat4f( - const float & _m00, const float & _m01, const float & _m02, const float & _m03, - const float & _m10, const float & _m11, const float & _m12, const float & _m13, - const float & _m20, const float & _m21, const float & _m22, const float & _m23, - const float & _m30, const float & _m31, const float & _m32, const float & _m33 + const float& _m00, const float& _m01, const float& _m02, const float& _m03, + const float& _m10, const float& _m11, const float& _m12, const float& _m13, + const float& _m20, const float& _m21, const float& _m22, const float& _m23, + const float& _m30, const float& _m31, const float& _m32, const float& _m33 ) { - m00 = _m00; - m10 = _m10; - m20 = _m20; - m30 = _m30; + m00 = _m00; + m10 = _m10; + m20 = _m20; + m30 = _m30; - m01 = _m01; - m11 = _m11; - m21 = _m21; - m31 = _m31; + m01 = _m01; + m11 = _m11; + m21 = _m21; + m31 = _m31; + + m02 = _m02; + m12 = _m12; + m22 = _m22; + m32 = _m32; + + m03 = _m03; + m13 = _m13; + m23 = _m23; + m33 = _m33; - m02 = _m02; - m12 = _m12; - m22 = _m22; - m32 = _m32; - m03 = _m03; - m13 = _m13; - m23 = _m23; - m33 = _m33; - return; } -__host__ __device__ cumat4f::cumat4f(const float *data16) +__host__ __device__ cumat4f::cumat4f(const float* data16) { - m00 = data16[0]; - m10 = data16[1]; - m20 = data16[2]; - m30 = data16[3]; + m00 = data16[0]; + m10 = data16[1]; + m20 = data16[2]; + m30 = data16[3]; - m01 = data16[4]; - m11 = data16[5]; - m21 = data16[6]; - m31 = data16[7]; + m01 = data16[4]; + m11 = data16[5]; + m21 = data16[6]; + m31 = data16[7]; - m02 = data16[8]; - m12 = data16[9]; - m22 = data16[10]; - m32 = data16[11]; + m02 = data16[8]; + m12 = data16[9]; + m22 = data16[10]; + m32 = data16[11]; + + m03 = data16[12]; + m13 = data16[13]; + m23 = data16[14]; + m33 = data16[15]; - m03 = data16[12]; - m13 = data16[13]; - m23 = data16[14]; - m33 = data16[15]; return; } @@ -259,27 +303,81 @@ __host__ __device__ float& cumat4f::operator[](const int &I) { switch(I) { - case 0: return m00; - case 1: return m10; - case 2: return m20; - case 3: return m30; - - case 4: return m01; - case 5: return m11; - case 6: return m21; - case 7: return m31; - - case 8: return m02; - case 9: return m12; - case 10: return m22; - case 11: return m32; - - case 12: return m03; - case 13: return m13; - case 14: return m23; - case 15: return m33; + case 0: + return m00; + case 1: + return m10; + case 2: + return m20; + case 3: + return m30; + case 4: + return m01; + case 5: + return m11; + case 6: + return m21; + case 7: + return m31; + case 8: + return m02; + case 9: + return m12; + case 10: + return m22; + case 11: + return m32; + case 12: + return m03; + case 13: + return m13; + case 14: + return m23; + case 15: + return m33; } - + + return m00; +} + +__host__ __device__ const float& cumat4f::operator[](const int &I) const +{ + switch(I) + { + case 0: + return m00; + case 1: + return m10; + case 2: + return m20; + case 3: + return m30; + case 4: + return m01; + case 5: + return m11; + case 6: + return m21; + case 7: + return m31; + case 8: + return m02; + case 9: + return m12; + case 10: + return m22; + case 11: + return m32; + case 12: + return m03; + case 13: + return m13; + case 14: + return m23; + case 15: + return m33; + } + return m00; } @@ -288,197 +386,349 @@ __host__ __device__ float& cumat4f::operator()(const int &I, const int &J) return (*this)[I+4*J]; } +__host__ __device__ const float& cumat4f::operator()(const int &I, const int &J) const +{ + return (*this)[I+4*J]; +} __host__ __device__ float& cumat4f::at(const int &I, const int &J) { return (*this)[I+4*J]; } -__host__ __device__ const float& cumat4f::operator[](const int &I) const -{ - switch(I) - { - case 0: return m00; - case 1: return m10; - case 2: return m20; - case 3: return m30; - - case 4: return m01; - case 5: return m11; - case 6: return m21; - case 7: return m31; - - case 8: return m02; - case 9: return m12; - case 10: return m22; - case 11: return m32; - - case 12: return m03; - case 13: return m13; - case 14: return m23; - case 15: return m33; - } - - return m00; -} - -__host__ __device__ const float& cumat4f::operator()(const int &I, const int &J) const +__host__ __device__ const float& cumat4f::at(const int &I, const int &J) const { return (*this)[I+4*J]; } - -__host__ __device__ const float& cumat4f::at(const int &I, const int &J) const +__host__ __device__ float* cumat4f::data() { - return (*this)[I+4*J]; +return (float*)this; } -__host__ __device__ cumat4f cumat4f::operator+(const cumat4f &rhs) +__host__ __device__ const float* cumat4f::data() const { - cumat4f ret; - ret.m00 = m00 + rhs.m00; - ret.m10 = m10 + rhs.m10; - ret.m20 = m20 + rhs.m20; - ret.m30 = m30 + rhs.m30; - ret.m01 = m01 + rhs.m01; - ret.m11 = m11 + rhs.m11; - ret.m21 = m21 + rhs.m21; - ret.m31 = m31 + rhs.m31; - ret.m02 = m02 + rhs.m02; - ret.m12 = m12 + rhs.m12; - ret.m22 = m22 + rhs.m22; - ret.m32 = m32 + rhs.m32; - ret.m03 = m03 + rhs.m03; - ret.m13 = m13 + rhs.m13; - ret.m23 = m23 + rhs.m23; - ret.m33 = m33 + rhs.m33; - return ret; +return (float*)this; } -__host__ __device__ cumat4f cumat4f::operator-(const cumat4f &rhs) +__host__ __device__ cumat4f cumat4f::operator+(const cumat4f& rhs) const { - cumat4f ret; - ret.m00 = m00 - rhs.m00; - ret.m10 = m10 - rhs.m10; - ret.m20 = m20 - rhs.m20; - ret.m30 = m30 - rhs.m30; - ret.m01 = m01 - rhs.m01; - ret.m11 = m11 - rhs.m11; - ret.m21 = m21 - rhs.m21; - ret.m31 = m31 - rhs.m31; - ret.m02 = m02 - rhs.m02; - ret.m12 = m12 - rhs.m12; - ret.m22 = m22 - rhs.m22; - ret.m32 = m32 - rhs.m32; - ret.m03 = m03 - rhs.m03; - ret.m13 = m13 - rhs.m13; - ret.m23 = m23 - rhs.m23; - ret.m33 = m33 - rhs.m33; - return ret; +cumat4f ret; +ret.m00 = m00 + rhs.m00; +ret.m10 = m10 + rhs.m10; +ret.m20 = m20 + rhs.m20; +ret.m30 = m30 + rhs.m30; + +ret.m01 = m01 + rhs.m01; +ret.m11 = m11 + rhs.m11; +ret.m21 = m21 + rhs.m21; +ret.m31 = m31 + rhs.m31; + +ret.m02 = m02 + rhs.m02; +ret.m12 = m12 + rhs.m12; +ret.m22 = m22 + rhs.m22; +ret.m32 = m32 + rhs.m32; + +ret.m03 = m03 + rhs.m03; +ret.m13 = m13 + rhs.m13; +ret.m23 = m23 + rhs.m23; +ret.m33 = m33 + rhs.m33; + +return ret; } -__host__ __device__ cumat4f cumat4f::operator*(const float &rhs) +__host__ __device__ cumat4f cumat4f::operator-(const cumat4f& rhs) const { - cumat4f ret; - ret.m00 = m00 * rhs; - ret.m10 = m10 * rhs; - ret.m20 = m20 * rhs; - ret.m30 = m30 * rhs; - ret.m01 = m01 * rhs; - ret.m11 = m11 * rhs; - ret.m21 = m21 * rhs; - ret.m31 = m31 * rhs; - ret.m02 = m02 * rhs; - ret.m12 = m12 * rhs; - ret.m22 = m22 * rhs; - ret.m32 = m32 * rhs; - ret.m03 = m03 * rhs; - ret.m13 = m13 * rhs; - ret.m23 = m23 * rhs; - ret.m33 = m33 * rhs; - return ret; +cumat4f ret; +ret.m00 = m00 - rhs.m00; +ret.m10 = m10 - rhs.m10; +ret.m20 = m20 - rhs.m20; +ret.m30 = m30 - rhs.m30; + +ret.m01 = m01 - rhs.m01; +ret.m11 = m11 - rhs.m11; +ret.m21 = m21 - rhs.m21; +ret.m31 = m31 - rhs.m31; + +ret.m02 = m02 - rhs.m02; +ret.m12 = m12 - rhs.m12; +ret.m22 = m22 - rhs.m22; +ret.m32 = m32 - rhs.m32; + +ret.m03 = m03 - rhs.m03; +ret.m13 = m13 - rhs.m13; +ret.m23 = m23 - rhs.m23; +ret.m33 = m33 - rhs.m33; + +return ret; } -__host__ __device__ cumat4f cumat4f::operator/(const float &rhs) +__host__ __device__ cumat4f cumat4f::operator*(const cumat4f& rhs) const { - cumat4f ret; - ret.m00 = m00 / rhs; - ret.m10 = m10 / rhs; - ret.m20 = m20 / rhs; - ret.m30 = m30 / rhs; - ret.m01 = m01 / rhs; - ret.m11 = m11 / rhs; - ret.m21 = m21 / rhs; - ret.m31 = m31 / rhs; - ret.m02 = m02 / rhs; - ret.m12 = m12 / rhs; - ret.m22 = m22 / rhs; - ret.m32 = m32 / rhs; - ret.m03 = m03 / rhs; - ret.m13 = m13 / rhs; - ret.m23 = m23 / rhs; - ret.m33 = m33 / rhs; - return ret; -} - -__host__ __device__ cuvec4f cumat4f::operator*(const cuvec4f &rhs) -{ - cuvec4f ret; - ret.x = m00*rhs.x + m01*rhs.y + m02*rhs.z + m03*rhs.w; - ret.y = m10*rhs.x + m11*rhs.y + m12*rhs.z + m13*rhs.w; - ret.z = m20*rhs.x + m21*rhs.y + m22*rhs.z + m23*rhs.w; - ret.w = m30*rhs.x + m31*rhs.y + m32*rhs.z + m33*rhs.w; - return ret; +cumat4f ret; //should be zeroed in constructor +ret.m00 = m00*rhs.m00 + m01*rhs.m10 + m02*rhs.m20 + m03*rhs.m30; +ret.m01 = m00*rhs.m01 + m01*rhs.m11 + m02*rhs.m21 + m03*rhs.m31; +ret.m02 = m00*rhs.m02 + m01*rhs.m12 + m02*rhs.m22 + m03*rhs.m32; +ret.m03 = m00*rhs.m03 + m01*rhs.m13 + m02*rhs.m23 + m03*rhs.m33; + +ret.m10 = m10*rhs.m00 + m11*rhs.m10 + m12*rhs.m20 + m13*rhs.m30; +ret.m11 = m10*rhs.m01 + m11*rhs.m11 + m12*rhs.m21 + m13*rhs.m31; +ret.m12 = m10*rhs.m02 + m11*rhs.m12 + m12*rhs.m22 + m13*rhs.m32; +ret.m13 = m10*rhs.m03 + m11*rhs.m13 + m12*rhs.m23 + m13*rhs.m33; + +ret.m20 = m20*rhs.m00 + m21*rhs.m10 + m22*rhs.m20 + m23*rhs.m30; +ret.m21 = m20*rhs.m01 + m21*rhs.m11 + m22*rhs.m21 + m23*rhs.m31; +ret.m22 = m20*rhs.m02 + m21*rhs.m12 + m22*rhs.m22 + m23*rhs.m32; +ret.m23 = m20*rhs.m03 + m21*rhs.m13 + m22*rhs.m23 + m23*rhs.m33; + +ret.m30 = m30*rhs.m00 + m31*rhs.m10 + m32*rhs.m20 + m33*rhs.m30; +ret.m31 = m30*rhs.m01 + m31*rhs.m11 + m32*rhs.m21 + m33*rhs.m31; +ret.m32 = m30*rhs.m02 + m31*rhs.m12 + m32*rhs.m22 + m33*rhs.m32; +ret.m33 = m30*rhs.m03 + m31*rhs.m13 + m32*rhs.m23 + m33*rhs.m33; + +return ret; } +__host__ __device__ cumat4f operator*(const cumat4f& lhs, const float& rhs) +{ +cumat4f ret; +ret.m00=lhs.m00*rhs; +ret.m10=lhs.m10*rhs; +ret.m20=lhs.m20*rhs; +ret.m30=lhs.m30*rhs; +ret.m01=lhs.m01*rhs; +ret.m11=lhs.m11*rhs; +ret.m21=lhs.m21*rhs; +ret.m31=lhs.m31*rhs; +ret.m02=lhs.m02*rhs; +ret.m12=lhs.m12*rhs; +ret.m22=lhs.m22*rhs; +ret.m32=lhs.m32*rhs; +ret.m03=lhs.m03*rhs; +ret.m13=lhs.m13*rhs; +ret.m23=lhs.m23*rhs; +ret.m33=lhs.m33*rhs; +return ret; +} -__host__ __device__ cumat4f cumat4f::operator*(const cumat4f &rhs) +__host__ __device__ cumat4f operator/(const cumat4f& lhs, const float& rhs) { - cumat4f ret; - ret.m00 = m00*rhs.m00 + m01*rhs.m10 + m02*rhs.m20 + m03*rhs.m30; - ret.m01 = m00*rhs.m01 + m01*rhs.m11 + m02*rhs.m21 + m03*rhs.m31; - ret.m02 = m00*rhs.m02 + m01*rhs.m12 + m02*rhs.m22 + m03*rhs.m32; - ret.m03 = m00*rhs.m03 + m01*rhs.m13 + m02*rhs.m23 + m03*rhs.m33; - ret.m10 = m10*rhs.m00 + m11*rhs.m10 + m12*rhs.m20 + m13*rhs.m30; - ret.m11 = m10*rhs.m01 + m11*rhs.m11 + m12*rhs.m21 + m13*rhs.m31; - ret.m12 = m10*rhs.m02 + m11*rhs.m12 + m12*rhs.m22 + m13*rhs.m32; - ret.m13 = m10*rhs.m03 + m11*rhs.m13 + m12*rhs.m23 + m13*rhs.m33; - ret.m20 = m20*rhs.m00 + m21*rhs.m10 + m22*rhs.m20 + m23*rhs.m30; - ret.m21 = m20*rhs.m01 + m21*rhs.m11 + m22*rhs.m21 + m23*rhs.m31; - ret.m22 = m20*rhs.m02 + m21*rhs.m12 + m22*rhs.m22 + m23*rhs.m32; - ret.m23 = m20*rhs.m03 + m21*rhs.m13 + m22*rhs.m23 + m23*rhs.m33; - ret.m30 = m30*rhs.m00 + m31*rhs.m10 + m32*rhs.m20 + m33*rhs.m30; - ret.m31 = m30*rhs.m01 + m31*rhs.m11 + m32*rhs.m21 + m33*rhs.m31; - ret.m32 = m30*rhs.m02 + m31*rhs.m12 + m32*rhs.m22 + m33*rhs.m32; - ret.m33 = m30*rhs.m03 + m31*rhs.m13 + m32*rhs.m23 + m33*rhs.m33; - return ret; +cumat4f ret; +ret.m00=lhs.m00/rhs; +ret.m10=lhs.m10/rhs; +ret.m20=lhs.m20/rhs; +ret.m30=lhs.m30/rhs; +ret.m01=lhs.m01/rhs; +ret.m11=lhs.m11/rhs; +ret.m21=lhs.m21/rhs; +ret.m31=lhs.m31/rhs; +ret.m02=lhs.m02/rhs; +ret.m12=lhs.m12/rhs; +ret.m22=lhs.m22/rhs; +ret.m32=lhs.m32/rhs; +ret.m03=lhs.m03/rhs; +ret.m13=lhs.m13/rhs; +ret.m23=lhs.m23/rhs; +ret.m33=lhs.m33/rhs; +return ret; } -__host__ __device__ cumat4f cumat4f::transpose() + +__host__ __device__ cumat4f operator*(const float& lhs, const cumat4f& rhs) { - cumat4f q; - q.m00 = m00; - q.m10 = m01; - q.m20 = m02; - q.m30 = m03; - - q.m01 = m10; - q.m11 = m11; - q.m21 = m12; - q.m31 = m13; - - q.m02 = m20; - q.m12 = m21; - q.m22 = m22; - q.m32 = m23; - - q.m03 = m30; - q.m13 = m31; - q.m23 = m32; - q.m33 = m33; - - return q; +cumat4f ret; +ret.m00=lhs*rhs.m00; +ret.m10=lhs*rhs.m10; +ret.m20=lhs*rhs.m20; +ret.m30=lhs*rhs.m30; +ret.m01=lhs*rhs.m01; +ret.m11=lhs*rhs.m11; +ret.m21=lhs*rhs.m21; +ret.m31=lhs*rhs.m31; +ret.m02=lhs*rhs.m02; +ret.m12=lhs*rhs.m12; +ret.m22=lhs*rhs.m22; +ret.m32=lhs*rhs.m32; +ret.m03=lhs*rhs.m03; +ret.m13=lhs*rhs.m13; +ret.m23=lhs*rhs.m23; +ret.m33=lhs*rhs.m33; +return ret; } +__host__ __device__ cuvec4f operator*(const cumat4f& lhs, const cuvec4f& rhs) +{ +cuvec4f ret; +ret.x = lhs.m00*rhs.x + lhs.m01*rhs.y + lhs.m02*rhs.z + lhs.m03*rhs.w; +ret.y = lhs.m10*rhs.x + lhs.m11*rhs.y + lhs.m12*rhs.z + lhs.m13*rhs.w; +ret.z = lhs.m20*rhs.x + lhs.m21*rhs.y + lhs.m22*rhs.z + lhs.m23*rhs.w; +ret.w = lhs.m30*rhs.x + lhs.m31*rhs.y + lhs.m32*rhs.z + lhs.m33*rhs.w; +return ret; +} + +__host__ __device__ cuvec4f operator*(const cuvec4f& lhs, const cumat4f& rhs) +{ +cuvec4f ret; +ret.x = lhs.x*rhs.m00 + lhs.y*rhs.m10 + lhs.z*rhs.m20 + lhs.w*rhs.m30; +ret.y = lhs.x*rhs.m01 + lhs.y*rhs.m11 + lhs.z*rhs.m21 + lhs.w*rhs.m31; +ret.z = lhs.x*rhs.m02 + lhs.y*rhs.m12 + lhs.z*rhs.m22 + lhs.w*rhs.m32; +ret.w = lhs.x*rhs.m03 + lhs.y*rhs.m13 + lhs.z*rhs.m23 + lhs.w*rhs.m33; +return ret; +} + +__host__ __device__ cumat4f operator-(const cumat4f& rhs) +{ +cumat4f ret; +ret.m00 = -rhs.m00; +ret.m10 = -rhs.m10; +ret.m20 = -rhs.m20; +ret.m30 = -rhs.m30; +ret.m01 = -rhs.m01; +ret.m11 = -rhs.m11; +ret.m21 = -rhs.m21; +ret.m31 = -rhs.m31; +ret.m02 = -rhs.m02; +ret.m12 = -rhs.m12; +ret.m22 = -rhs.m22; +ret.m32 = -rhs.m32; +ret.m03 = -rhs.m03; +ret.m13 = -rhs.m13; +ret.m23 = -rhs.m23; +ret.m33 = -rhs.m33; +return ret; +} + +__host__ __device__ cumat4f& cumat4f::operator+=(const cumat4f& rhs) +{ +m00 += rhs.m00; +m10 += rhs.m10; +m20 += rhs.m20; +m30 += rhs.m30; +m01 += rhs.m01; +m11 += rhs.m11; +m21 += rhs.m21; +m31 += rhs.m31; +m02 += rhs.m02; +m12 += rhs.m12; +m22 += rhs.m22; +m32 += rhs.m32; +m03 += rhs.m03; +m13 += rhs.m13; +m23 += rhs.m23; +m33 += rhs.m33; +return *this; +} + +__host__ __device__ cumat4f& cumat4f::operator-=(const cumat4f& rhs) +{ +m00 -= rhs.m00; +m10 -= rhs.m10; +m20 -= rhs.m20; +m30 -= rhs.m30; +m01 -= rhs.m01; +m11 -= rhs.m11; +m21 -= rhs.m21; +m31 -= rhs.m31; +m02 -= rhs.m02; +m12 -= rhs.m12; +m22 -= rhs.m22; +m32 -= rhs.m32; +m03 -= rhs.m03; +m13 -= rhs.m13; +m23 -= rhs.m23; +m33 -= rhs.m33; +return *this; +} + +__host__ __device__ cumat4f& cumat4f::operator*=(const float& rhs) +{ +m00 *= rhs; +m10 *= rhs; +m20 *= rhs; +m30 *= rhs; +m01 *= rhs; +m11 *= rhs; +m21 *= rhs; +m31 *= rhs; +m02 *= rhs; +m12 *= rhs; +m22 *= rhs; +m32 *= rhs; +m03 *= rhs; +m13 *= rhs; +m23 *= rhs; +m33 *= rhs; +return *this; +} + +__host__ __device__ cumat4f& cumat4f::operator/=(const float& rhs) +{ +m00 /= rhs; +m10 /= rhs; +m20 /= rhs; +m30 /= rhs; +m01 /= rhs; +m11 /= rhs; +m21 /= rhs; +m31 /= rhs; +m02 /= rhs; +m12 /= rhs; +m22 /= rhs; +m32 /= rhs; +m03 /= rhs; +m13 /= rhs; +m23 /= rhs; +m33 /= rhs; +return *this; +} + +__host__ __device__ cumat4f& cumat4f::operator*=(const cumat4f& rhs) +{ +cumat4f tmp = *this; +m00 = tmp.m00*rhs.m00 + tmp.m01*rhs.m10 + tmp.m02*rhs.m20 + tmp.m03*rhs.m30; +m01 = tmp.m00*rhs.m01 + tmp.m01*rhs.m11 + tmp.m02*rhs.m21 + tmp.m03*rhs.m31; +m02 = tmp.m00*rhs.m02 + tmp.m01*rhs.m12 + tmp.m02*rhs.m22 + tmp.m03*rhs.m32; +m03 = tmp.m00*rhs.m03 + tmp.m01*rhs.m13 + tmp.m02*rhs.m23 + tmp.m03*rhs.m33; +m10 = tmp.m10*rhs.m00 + tmp.m11*rhs.m10 + tmp.m12*rhs.m20 + tmp.m13*rhs.m30; +m11 = tmp.m10*rhs.m01 + tmp.m11*rhs.m11 + tmp.m12*rhs.m21 + tmp.m13*rhs.m31; +m12 = tmp.m10*rhs.m02 + tmp.m11*rhs.m12 + tmp.m12*rhs.m22 + tmp.m13*rhs.m32; +m13 = tmp.m10*rhs.m03 + tmp.m11*rhs.m13 + tmp.m12*rhs.m23 + tmp.m13*rhs.m33; +m20 = tmp.m20*rhs.m00 + tmp.m21*rhs.m10 + tmp.m22*rhs.m20 + tmp.m23*rhs.m30; +m21 = tmp.m20*rhs.m01 + tmp.m21*rhs.m11 + tmp.m22*rhs.m21 + tmp.m23*rhs.m31; +m22 = tmp.m20*rhs.m02 + tmp.m21*rhs.m12 + tmp.m22*rhs.m22 + tmp.m23*rhs.m32; +m23 = tmp.m20*rhs.m03 + tmp.m21*rhs.m13 + tmp.m22*rhs.m23 + tmp.m23*rhs.m33; +m30 = tmp.m30*rhs.m00 + tmp.m31*rhs.m10 + tmp.m32*rhs.m20 + tmp.m33*rhs.m30; +m31 = tmp.m30*rhs.m01 + tmp.m31*rhs.m11 + tmp.m32*rhs.m21 + tmp.m33*rhs.m31; +m32 = tmp.m30*rhs.m02 + tmp.m31*rhs.m12 + tmp.m32*rhs.m22 + tmp.m33*rhs.m32; +m33 = tmp.m30*rhs.m03 + tmp.m31*rhs.m13 + tmp.m32*rhs.m23 + tmp.m33*rhs.m33; +return *this; +} + +__host__ __device__ cumat4f cumat4f::transpose() const +{ +cumat4f ret; +ret.m00 = m00; +ret.m10 = m01; +ret.m20 = m02; +ret.m30 = m03; +ret.m01 = m10; +ret.m11 = m11; +ret.m21 = m12; +ret.m31 = m13; +ret.m02 = m20; +ret.m12 = m21; +ret.m22 = m22; +ret.m32 = m23; +ret.m03 = m30; +ret.m13 = m31; +ret.m23 = m32; +ret.m33 = m33; +return ret; +} + +///////////////////// +// Det and Inverse // +///////////////////// + __host__ __device__ float cumat4f::det() { float dt; @@ -576,120 +826,9 @@ __host__ __device__ cumat4f cumat4f::inverse() return mret; } -__host__ __device__ cumat4f operator-(const cumat4f &rhs) -{ - cumat4f ret; - ret.m00 = -rhs.m00; - ret.m10 = -rhs.m10; - ret.m20 = -rhs.m20; - ret.m30 = -rhs.m30; - ret.m01 = -rhs.m01; - ret.m11 = -rhs.m11; - ret.m21 = -rhs.m21; - ret.m31 = -rhs.m31; - ret.m02 = -rhs.m02; - ret.m12 = -rhs.m12; - ret.m22 = -rhs.m22; - ret.m32 = -rhs.m32; - ret.m03 = -rhs.m03; - ret.m13 = -rhs.m13; - ret.m23 = -rhs.m23; - ret.m33 = -rhs.m33; - return ret; -} - -__host__ __device__ cumat4f& cumat4f::operator+=(const cumat4f &rhs) -{ - m00 += rhs.m00; - m01 += rhs.m01; - m02 += rhs.m02; - m03 += rhs.m03; - m10 += rhs.m10; - m11 += rhs.m11; - m12 += rhs.m12; - m13 += rhs.m13; - m20 += rhs.m20; - m21 += rhs.m21; - m22 += rhs.m22; - m23 += rhs.m23; - m30 += rhs.m30; - m31 += rhs.m31; - m32 += rhs.m32; - m33 += rhs.m33; - return *this; -} - -__host__ __device__ cumat4f& cumat4f::operator-=(const cumat4f &rhs) -{ - m00 -= rhs.m00; - m01 -= rhs.m01; - m02 -= rhs.m02; - m03 -= rhs.m03; - m10 -= rhs.m10; - m11 -= rhs.m11; - m12 -= rhs.m12; - m13 -= rhs.m13; - m20 -= rhs.m20; - m21 -= rhs.m21; - m22 -= rhs.m22; - m23 -= rhs.m23; - m30 -= rhs.m30; - m31 -= rhs.m31; - m32 -= rhs.m32; - m33 -= rhs.m33; - return *this; -} - -__host__ __device__ cumat4f& cumat4f::operator*=(const float &rhs) -{ - m00 *= rhs; - m01 *= rhs; - m02 *= rhs; - m03 *= rhs; - m10 *= rhs; - m11 *= rhs; - m12 *= rhs; - m13 *= rhs; - m20 *= rhs; - m21 *= rhs; - m22 *= rhs; - m23 *= rhs; - m30 *= rhs; - m31 *= rhs; - m32 *= rhs; - m33 *= rhs; - return *this; -} - -__host__ __device__ cumat4f& cumat4f::operator/=(const float &rhs) -{ - float irhs = 1.0f / rhs; - m00 *= irhs; - m01 *= irhs; - m02 *= irhs; - m03 *= irhs; - m10 *= irhs; - m11 *= irhs; - m12 *= irhs; - m13 *= irhs; - m20 *= irhs; - m21 *= irhs; - m22 *= irhs; - m23 *= irhs; - m30 *= irhs; - m31 *= irhs; - m32 *= irhs; - m33 *= irhs; - return *this; -} - -__host__ __device__ cumat4f& cumat4f::operator*=(const cumat4f &rhs) -{ - cumat4f tmp = (*this); - (*this) = tmp*rhs; - return *this;; -} - +////////////////////////// +// Standalone Functions // +////////////////////////// __host__ __device__ float cuvec4f_dot(cuvec4f &a, cuvec4f &b) { @@ -723,7 +862,8 @@ __host__ __device__ cuvec4f cuvec4f_proj(cuvec4f &a, cuvec4f &b) return ret; } - - +/////////// +// Tests // +/////////// }; //namespace amscuda diff --git a/test_scripts/cuvec_codegen.py b/test_scripts/cuvec_codegen.py index a3b2ed7..134d892 100644 --- a/test_scripts/cuvec_codegen.py +++ b/test_scripts/cuvec_codegen.py @@ -523,7 +523,7 @@ def mconstructor2(dim,dtype): for I in range(0,dim): lnsh+="\t\t" for J in range(0,dim): - lnsh += f"const {dtype}& _m{J}{I}" + lnsh += f"const {dtype}& _m{I}{J}" if(J+I*dim