diff --git a/amsculib2.code-workspace b/amsculib2.code-workspace index 362d7c2..d35f5f5 100644 --- a/amsculib2.code-workspace +++ b/amsculib2.code-workspace @@ -3,5 +3,19 @@ { "path": "." } - ] + ], + //"editor.quickSuggestions": false + //https://code.visualstudio.com/docs/editing/intellisense#_customizing-intellisense + // Controls if quick suggestions should show up while typing + "settings": { + "editor.quickSuggestions": { + "other": false, + "comments": false, + "strings": false + }, + "editor.suggestOnTriggerCharacters": false, + "editor.tabCompletion": "off", + "editor.parameterHints.enabled": false, + "editor.acceptSuggestionOnCommitCharacter": false + } } \ No newline at end of file diff --git a/build_linux64/libamsculib2.linux64.a b/build_linux64/libamsculib2.linux64.a index 5a19883..06cd762 100644 Binary files a/build_linux64/libamsculib2.linux64.a and b/build_linux64/libamsculib2.linux64.a differ diff --git a/build_linux64/objstore/amscu_comp128.o b/build_linux64/objstore/amscu_comp128.o index 7ac3343..4af119f 100644 Binary files a/build_linux64/objstore/amscu_comp128.o and b/build_linux64/objstore/amscu_comp128.o differ diff --git a/build_linux64/objstore/amscu_comp64.o b/build_linux64/objstore/amscu_comp64.o index cb96761..e2dffd0 100644 Binary files a/build_linux64/objstore/amscu_comp64.o and b/build_linux64/objstore/amscu_comp64.o differ diff --git a/build_linux64/objstore/amscu_cudafunctions.o b/build_linux64/objstore/amscu_cudafunctions.o index 44d5377..827be9b 100644 Binary files a/build_linux64/objstore/amscu_cudafunctions.o and b/build_linux64/objstore/amscu_cudafunctions.o differ diff --git a/build_linux64/objstore/amscu_random.o b/build_linux64/objstore/amscu_random.o index 18ff6f8..d616470 100644 Binary files a/build_linux64/objstore/amscu_random.o and b/build_linux64/objstore/amscu_random.o differ diff --git a/build_linux64/objstore/amscuarray.o b/build_linux64/objstore/amscuarray.o index 56cb5bb..96d9685 100644 Binary files a/build_linux64/objstore/amscuarray.o and b/build_linux64/objstore/amscuarray.o differ diff --git a/build_linux64/objstore/amscuarray_dops.o b/build_linux64/objstore/amscuarray_dops.o index 0c0f549..1db6b56 100644 Binary files a/build_linux64/objstore/amscuarray_dops.o and b/build_linux64/objstore/amscuarray_dops.o differ diff --git a/build_linux64/objstore/amscugeom.o b/build_linux64/objstore/amscugeom.o index a016b5c..4956135 100644 Binary files a/build_linux64/objstore/amscugeom.o and b/build_linux64/objstore/amscugeom.o differ diff --git a/build_linux64/objstore/amsculib2.o b/build_linux64/objstore/amsculib2.o index 02c9cc7..46c1bd6 100644 Binary files a/build_linux64/objstore/amsculib2.o and b/build_linux64/objstore/amsculib2.o differ diff --git a/build_linux64/objstore/amscumath.o b/build_linux64/objstore/amscumath.o index b7f744b..3acd851 100644 Binary files a/build_linux64/objstore/amscumath.o and b/build_linux64/objstore/amscumath.o differ diff --git a/build_linux64/objstore/amscurarray.o b/build_linux64/objstore/amscurarray.o index eb3211e..f616177 100644 Binary files a/build_linux64/objstore/amscurarray.o and b/build_linux64/objstore/amscurarray.o differ diff --git a/build_linux64/objstore/cuvect2.o b/build_linux64/objstore/cuvect2.o index f88c2e0..2bfe452 100644 Binary files a/build_linux64/objstore/cuvect2.o and b/build_linux64/objstore/cuvect2.o differ diff --git a/build_linux64/objstore/cuvect2f.o b/build_linux64/objstore/cuvect2f.o index f70fa95..19dfc05 100644 Binary files a/build_linux64/objstore/cuvect2f.o and b/build_linux64/objstore/cuvect2f.o differ diff --git a/build_linux64/objstore/cuvect3.o b/build_linux64/objstore/cuvect3.o index 672a662..91da2d0 100644 Binary files a/build_linux64/objstore/cuvect3.o and b/build_linux64/objstore/cuvect3.o differ diff --git a/build_linux64/objstore/cuvect3f.o b/build_linux64/objstore/cuvect3f.o index ec36225..9725095 100644 Binary files a/build_linux64/objstore/cuvect3f.o and b/build_linux64/objstore/cuvect3f.o differ diff --git a/build_linux64/objstore/cuvect4.o b/build_linux64/objstore/cuvect4.o index 0b33597..18b550b 100644 Binary files a/build_linux64/objstore/cuvect4.o and b/build_linux64/objstore/cuvect4.o differ diff --git a/build_linux64/objstore/cuvect4f.o b/build_linux64/objstore/cuvect4f.o index 4e24365..1effae1 100644 Binary files a/build_linux64/objstore/cuvect4f.o and b/build_linux64/objstore/cuvect4f.o differ diff --git a/build_linux64/test b/build_linux64/test index 7b811ba..5f25607 100644 Binary files a/build_linux64/test and b/build_linux64/test differ diff --git a/include/amsculib2/cuvect4f.hpp b/include/amsculib2/cuvect4f.hpp index 1a5b999..f2ac7d9 100644 --- a/include/amsculib2/cuvect4f.hpp +++ b/include/amsculib2/cuvect4f.hpp @@ -35,30 +35,54 @@ namespace amscuda class cumat4f { public: - float dat[16]; + //float dat[16]; - // float m00,m10,m20,m30; //named references to force register use? - // float m01,m11,m21,m31; //switched to column-major-order to match GLSL/lapack - // float m02,m12,m22,m32; - // float m03,m13,m23,m33; + float m00,m10,m20,m30; //named references to force register use? + float m01,m11,m21,m31; //switched to column-major-order to match GLSL/lapack + float m02,m12,m22,m32; + float m03,m13,m23,m33; __host__ __device__ cumat4f(); __host__ __device__ ~cumat4f(); - __host__ __device__ float& operator[](const int I); - __host__ __device__ float& operator()(const int I, const int J); - __host__ __device__ float& at(const int I, const int J); + __host__ __device__ cumat4f( + const float & _m00, const float & _m01, const float & _m02, const float & _m03, + const float & _m10, const float & _m11, const float & _m12, const float & _m13, + const float & _m20, const float & _m21, const float & _m22, const float & _m23, + const float & _m30, const float & _m31, const float & _m32, const float & _m33 + ); - __host__ __device__ cumat4f operator+(cumat4f lhs); - __host__ __device__ cumat4f operator-(cumat4f lhs); - __host__ __device__ cumat4f operator*(float lhs); - __host__ __device__ cumat4f operator/(float lhs); - __host__ __device__ cuvect4f operator*(cuvect4f lhs); - __host__ __device__ cumat4f operator*(cumat4f lhs); - __host__ __device__ friend cumat4f operator-(cumat4f rhs); + __host__ __device__ explicit cumat4f(const float *data16); + + //__forceinline__ + __host__ __device__ float& operator[](const int &I); + __host__ __device__ float& operator()(const int &I, const int &J); + __host__ __device__ float& at(const int &I, const int &J); + __host__ __device__ const float& operator[](const int &I) const; + __host__ __device__ const float& operator()(const int &I, const int &J) const; + __host__ __device__ const float& at(const int &I, const int &J) const; + + __host__ __device__ cumat4f operator+(const cumat4f &rhs); + __host__ __device__ cumat4f operator-(const cumat4f &rhs); + __host__ __device__ cumat4f operator*(const float &rhs); + __host__ __device__ cumat4f operator/(const float &rhs); + __host__ __device__ cuvect4f operator*(const cuvect4f &rhs); + __host__ __device__ cumat4f operator*(const cumat4f &rhs); + __host__ __device__ friend cumat4f operator-(const cumat4f &rhs); + __host__ __device__ float det(); __host__ __device__ cumat4f transpose(); __host__ __device__ cumat4f inverse(); + + __host__ __device__ float* data(); //pointer to float[9] representation of matrix + __host__ __device__ const float* data() const; //pointer to float[9] representation of matrix + + //In place operations (to save GPU register use) + __host__ __device__ cumat4f& operator+=(const cumat4f &rhs); + __host__ __device__ cumat4f& operator-=(const cumat4f &rhs); + __host__ __device__ cumat4f& operator/=(const float &rhs); + __host__ __device__ cumat4f& operator*=(const float &rhs); + __host__ __device__ cumat4f& operator*=(const cumat4f &rhs); }; __host__ __device__ float cuvect4f_dot(cuvect4f a, cuvect4f b); diff --git a/src/amsculib2/cuvect4f.cu b/src/amsculib2/cuvect4f.cu index 6a4fc7f..46c60e7 100644 --- a/src/amsculib2/cuvect4f.cu +++ b/src/amsculib2/cuvect4f.cu @@ -115,6 +115,16 @@ __host__ __device__ cuvect4f& cuvect4f::operator/=(const float &rhs) return *this; } +__host__ __device__ cuvect4f operator-(const cuvect4f &rhs) +{ + cuvect4f ret; + ret[0] = -rhs[0]; + ret[1] = -rhs[1]; + ret[2] = -rhs[2]; + ret[3] = -rhs[3]; + return ret; +} + __host__ __device__ cuvect4f::cuvect4f(const float &_x, const float &_y, const float &_z, const float &_w) { @@ -122,302 +132,545 @@ __host__ __device__ cuvect4f::cuvect4f(const float &_x, const float &_y, const f return; } + __host__ __device__ cumat4f::cumat4f() { - int I; - for(I=0;I<16;I++) - { - dat[I] = 0.0; - } + m00 = 0.0f; + m10 = 0.0f; + m20 = 0.0f; + m30 = 0.0f; + + m01 = 0.0f; + m11 = 0.0f; + m21 = 0.0f; + m31 = 0.0f; + + m02 = 0.0f; + m12 = 0.0f; + m22 = 0.0f; + m32 = 0.0f; + + m03 = 0.0f; + m13 = 0.0f; + m23 = 0.0f; + m33 = 0.0f; + return; } __host__ __device__ cumat4f::~cumat4f() { - int I; - for(I=0;I<16;I++) - { - dat[I] = 0.0; - } + m00 = 0.0f; + m10 = 0.0f; + m20 = 0.0f; + m30 = 0.0f; + + m01 = 0.0f; + m11 = 0.0f; + m21 = 0.0f; + m31 = 0.0f; + + m02 = 0.0f; + m12 = 0.0f; + m22 = 0.0f; + m32 = 0.0f; + + m03 = 0.0f; + m13 = 0.0f; + m23 = 0.0f; + m33 = 0.0f; + return; } -__host__ __device__ float& cumat4f::operator[](const int I) +__host__ __device__ cumat4f::cumat4f( + const float & _m00, const float & _m01, const float & _m02, const float & _m03, + const float & _m10, const float & _m11, const float & _m12, const float & _m13, + const float & _m20, const float & _m21, const float & _m22, const float & _m23, + const float & _m30, const float & _m31, const float & _m32, const float & _m33 +) { - return dat[I]; + m00 = _m00; + m10 = _m10; + m20 = _m20; + m30 = _m30; + + m01 = _m01; + m11 = _m11; + m21 = _m21; + m31 = _m31; + + m02 = _m02; + m12 = _m12; + m22 = _m22; + m32 = _m32; + + m03 = _m03; + m13 = _m13; + m23 = _m23; + m33 = _m33; + + return; } -__host__ __device__ float& cumat4f::operator()(const int I, const int J) +__host__ __device__ cumat4f::cumat4f(const float *data16) { - return dat[I+4*J]; + m00 = data16[0]; + m10 = data16[1]; + m20 = data16[2]; + m30 = data16[3]; + + m01 = data16[4]; + m11 = data16[5]; + m21 = data16[6]; + m31 = data16[7]; + + m02 = data16[8]; + m12 = data16[9]; + m22 = data16[10]; + m32 = data16[11]; + + m03 = data16[12]; + m13 = data16[13]; + m23 = data16[14]; + m33 = data16[15]; + + return; } -__host__ __device__ float& cumat4f::at(const int I, const int J) +__host__ __device__ float& cumat4f::operator[](const int &I) { - return dat[I+4*J]; + switch(I) + { + case 0: return m00; + case 1: return m10; + case 2: return m20; + case 3: return m30; + + case 4: return m01; + case 5: return m11; + case 6: return m21; + case 7: return m31; + + case 8: return m02; + case 9: return m12; + case 10: return m22; + case 11: return m32; + + case 12: return m03; + case 13: return m13; + case 14: return m23; + case 15: return m33; + } + + return m00; } -__host__ __device__ cumat4f cumat4f::operator+(cumat4f lhs) +__host__ __device__ float& cumat4f::operator()(const int &I, const int &J) +{ + return (*this)[I+4*J]; +} + + +__host__ __device__ float& cumat4f::at(const int &I, const int &J) +{ + return (*this)[I+4*J]; +} + +__host__ __device__ const float& cumat4f::operator[](const int &I) const +{ + switch(I) + { + case 0: return m00; + case 1: return m10; + case 2: return m20; + case 3: return m30; + + case 4: return m01; + case 5: return m11; + case 6: return m21; + case 7: return m31; + + case 8: return m02; + case 9: return m12; + case 10: return m22; + case 11: return m32; + + case 12: return m03; + case 13: return m13; + case 14: return m23; + case 15: return m33; + } + + return m00; +} + +__host__ __device__ const float& cumat4f::operator()(const int &I, const int &J) const +{ + return (*this)[I+4*J]; +} + + +__host__ __device__ const float& cumat4f::at(const int &I, const int &J) const +{ + return (*this)[I+4*J]; +} + +__host__ __device__ cumat4f cumat4f::operator+(const cumat4f &rhs) { cumat4f ret; - int I; - for(I=0;I<16;I++) - { - ret.dat[I] = this->dat[I] + lhs.dat[I]; - } + ret.m00 = m00 + rhs.m00; + ret.m10 = m10 + rhs.m10; + ret.m20 = m20 + rhs.m20; + ret.m30 = m30 + rhs.m30; + ret.m01 = m01 + rhs.m01; + ret.m11 = m11 + rhs.m11; + ret.m21 = m21 + rhs.m21; + ret.m31 = m31 + rhs.m31; + ret.m02 = m02 + rhs.m02; + ret.m12 = m12 + rhs.m12; + ret.m22 = m22 + rhs.m22; + ret.m32 = m32 + rhs.m32; + ret.m03 = m03 + rhs.m03; + ret.m13 = m13 + rhs.m13; + ret.m23 = m23 + rhs.m23; + ret.m33 = m33 + rhs.m33; return ret; } -__host__ __device__ cumat4f cumat4f::operator-(cumat4f lhs) +__host__ __device__ cumat4f cumat4f::operator-(const cumat4f &rhs) { cumat4f ret; - int I; - for(I=0;I<16;I++) - { - ret.dat[I] = this->dat[I] - lhs.dat[I]; - } + ret.m00 = m00 - rhs.m00; + ret.m10 = m10 - rhs.m10; + ret.m20 = m20 - rhs.m20; + ret.m30 = m30 - rhs.m30; + ret.m01 = m01 - rhs.m01; + ret.m11 = m11 - rhs.m11; + ret.m21 = m21 - rhs.m21; + ret.m31 = m31 - rhs.m31; + ret.m02 = m02 - rhs.m02; + ret.m12 = m12 - rhs.m12; + ret.m22 = m22 - rhs.m22; + ret.m32 = m32 - rhs.m32; + ret.m03 = m03 - rhs.m03; + ret.m13 = m13 - rhs.m13; + ret.m23 = m23 - rhs.m23; + ret.m33 = m33 - rhs.m33; return ret; } -__host__ __device__ cumat4f cumat4f::operator*(float lhs) +__host__ __device__ cumat4f cumat4f::operator*(const float &rhs) { cumat4f ret; - int I; - for(I=0;I<16;I++) - { - ret.dat[I] = this->dat[I]*lhs; - } + ret.m00 = m00 * rhs; + ret.m10 = m10 * rhs; + ret.m20 = m20 * rhs; + ret.m30 = m30 * rhs; + ret.m01 = m01 * rhs; + ret.m11 = m11 * rhs; + ret.m21 = m21 * rhs; + ret.m31 = m31 * rhs; + ret.m02 = m02 * rhs; + ret.m12 = m12 * rhs; + ret.m22 = m22 * rhs; + ret.m32 = m32 * rhs; + ret.m03 = m03 * rhs; + ret.m13 = m13 * rhs; + ret.m23 = m23 * rhs; + ret.m33 = m33 * rhs; return ret; } -__host__ __device__ cumat4f cumat4f::operator/(float lhs) +__host__ __device__ cumat4f cumat4f::operator/(const float &rhs) { cumat4f ret; - int I; - for(I=0;I<16;I++) - { - ret.dat[I] = this->dat[I]/lhs; - } + ret.m00 = m00 / rhs; + ret.m10 = m10 / rhs; + ret.m20 = m20 / rhs; + ret.m30 = m30 / rhs; + ret.m01 = m01 / rhs; + ret.m11 = m11 / rhs; + ret.m21 = m21 / rhs; + ret.m31 = m31 / rhs; + ret.m02 = m02 / rhs; + ret.m12 = m12 / rhs; + ret.m22 = m22 / rhs; + ret.m32 = m32 / rhs; + ret.m03 = m03 / rhs; + ret.m13 = m13 / rhs; + ret.m23 = m23 / rhs; + ret.m33 = m33 / rhs; return ret; } - -__host__ __device__ cuvect4f cumat4f::operator*(cuvect4f lhs) + +__host__ __device__ cuvect4f cumat4f::operator*(const cuvect4f &rhs) { - cuvect4f ret = cuvect4f(0.0,0.0,0.0,0.0); - int I,J; - - for(I=0;I<4;I++) - { - for(J=0;J<4;J++) - { - ret[I] = ret[I] + this->at(I,J)*lhs[J]; - } - } - + cuvect4f ret; + ret.x = m00*rhs.x + m01*rhs.y + m02*rhs.z + m03*rhs.w; + ret.y = m10*rhs.x + m11*rhs.y + m12*rhs.z + m13*rhs.w; + ret.z = m20*rhs.x + m21*rhs.y + m22*rhs.z + m23*rhs.w; + ret.w = m30*rhs.x + m31*rhs.y + m32*rhs.z + m33*rhs.w; return ret; } -__host__ __device__ cumat4f cumat4f::operator*(cumat4f lhs) + +__host__ __device__ cumat4f cumat4f::operator*(const cumat4f &rhs) { cumat4f ret; - int I,J,K; - for(I=0;I<4;I++) - { - for(J=0;J<4;J++) - { - ret(I,J) = 0; - for(K=0;K<4;K++) - { - ret(I,J) = ret(I,J) + this->at(I,K) * lhs(K,J); - } - } - } + ret.m00 = m00*rhs.m00 + m01*rhs.m10 + m02*rhs.m20 + m03*rhs.m30; + ret.m01 = m00*rhs.m01 + m01*rhs.m11 + m02*rhs.m21 + m03*rhs.m31; + ret.m02 = m00*rhs.m02 + m01*rhs.m12 + m02*rhs.m22 + m03*rhs.m32; + ret.m03 = m00*rhs.m03 + m01*rhs.m13 + m02*rhs.m23 + m03*rhs.m33; + ret.m10 = m10*rhs.m00 + m11*rhs.m10 + m12*rhs.m20 + m13*rhs.m30; + ret.m11 = m10*rhs.m01 + m11*rhs.m11 + m12*rhs.m21 + m13*rhs.m31; + ret.m12 = m10*rhs.m02 + m11*rhs.m12 + m12*rhs.m22 + m13*rhs.m32; + ret.m13 = m10*rhs.m03 + m11*rhs.m13 + m12*rhs.m23 + m13*rhs.m33; + ret.m20 = m20*rhs.m00 + m21*rhs.m10 + m22*rhs.m20 + m23*rhs.m30; + ret.m21 = m20*rhs.m01 + m21*rhs.m11 + m22*rhs.m21 + m23*rhs.m31; + ret.m22 = m20*rhs.m02 + m21*rhs.m12 + m22*rhs.m22 + m23*rhs.m32; + ret.m23 = m20*rhs.m03 + m21*rhs.m13 + m22*rhs.m23 + m23*rhs.m33; + ret.m30 = m30*rhs.m00 + m31*rhs.m10 + m32*rhs.m20 + m33*rhs.m30; + ret.m31 = m30*rhs.m01 + m31*rhs.m11 + m32*rhs.m21 + m33*rhs.m31; + ret.m32 = m30*rhs.m02 + m31*rhs.m12 + m32*rhs.m22 + m33*rhs.m32; + ret.m33 = m30*rhs.m03 + m31*rhs.m13 + m32*rhs.m23 + m33*rhs.m33; return ret; } - __host__ __device__ cumat4f cumat4f::transpose() { cumat4f q; - int I,J; - for(I=0;I<4;I++) - { - for(J=0;J<4;J++) - { - q(I,J) = this->at(J,I); - } - } + q.m00 = m00; + q.m10 = m01; + q.m20 = m02; + q.m30 = m03; + + q.m01 = m10; + q.m11 = m11; + q.m21 = m12; + q.m31 = m13; + + q.m02 = m20; + q.m12 = m21; + q.m22 = m22; + q.m32 = m23; + + q.m03 = m30; + q.m13 = m31; + q.m23 = m32; + q.m33 = m33; + return q; } __host__ __device__ float cumat4f::det() { - float a00,a01,a02,a03; - float a10,a11,a12,a13; - float a20,a21,a22,a23; - float a30,a31,a32,a33; - float det; + float dt; + dt = m03*m12*m21*m30 - + m02*m13*m21*m30 - + m03*m11*m22*m30 + + m01*m13*m22*m30 + + m02*m11*m23*m30 - + m01*m12*m23*m30 - + m03*m12*m20*m31 + + m02*m13*m20*m31 + + m03*m10*m22*m31 - + m00*m13*m22*m31 - + m02*m10*m23*m31 + + m00*m12*m23*m31 + + m03*m11*m20*m32 - + m01*m13*m20*m32 - + m03*m10*m21*m32 + + m00*m13*m21*m32 + + m01*m10*m23*m32 - + m00*m11*m23*m32 - + m02*m11*m20*m33 + + m01*m12*m20*m33 + + m02*m10*m21*m33 - + m00*m12*m21*m33 - + m01*m10*m22*m33 + + m00*m11*m22*m33; - a00 = this->at(0,0); - a01 = this->at(0,1); - a02 = this->at(0,2); - a03 = this->at(0,3); - a10 = this->at(1,0); - a11 = this->at(1,1); - a12 = this->at(1,2); - a13 = this->at(1,3); - a20 = this->at(2,0); - a21 = this->at(2,1); - a22 = this->at(2,2); - a23 = this->at(2,3); - a30 = this->at(3,0); - a31 = this->at(3,1); - a32 = this->at(3,2); - a33 = this->at(3,3); - - det = a03*a12*a21*a30 - - a02*a13*a21*a30 - - a03*a11*a22*a30 + - a01*a13*a22*a30 + - a02*a11*a23*a30 - - a01*a12*a23*a30 - - a03*a12*a20*a31 + - a02*a13*a20*a31 + - a03*a10*a22*a31 - - a00*a13*a22*a31 - - a02*a10*a23*a31 + - a00*a12*a23*a31 + - a03*a11*a20*a32 - - a01*a13*a20*a32 - - a03*a10*a21*a32 + - a00*a13*a21*a32 + - a01*a10*a23*a32 - - a00*a11*a23*a32 - - a02*a11*a20*a33 + - a01*a12*a20*a33 + - a02*a10*a21*a33 - - a00*a12*a21*a33 - - a01*a10*a22*a33 + - a00*a11*a22*a33; - - return det; -} - -__host__ __device__ cumat4f minverse(cumat4f ma) -{ - cumat4f mb; - - float a00,a01,a02,a03; - float a10,a11,a12,a13; - float a20,a21,a22,a23; - float a30,a31,a32,a33; - - float b00,b01,b02,b03; - float b10,b11,b12,b13; - float b20,b21,b22,b23; - float b30,b31,b32,b33; - - float det = 0.0; - - a00 = ma.at(0,0); - a01 = ma.at(0,1); - a02 = ma.at(0,2); - a03 = ma.at(0,3); - a10 = ma.at(1,0); - a11 = ma.at(1,1); - a12 = ma.at(1,2); - a13 = ma.at(1,3); - a20 = ma.at(2,0); - a21 = ma.at(2,1); - a22 = ma.at(2,2); - a23 = ma.at(2,3); - a30 = ma.at(3,0); - a31 = ma.at(3,1); - a32 = ma.at(3,2); - a33 = ma.at(3,3); - - det = a03*a12*a21*a30 - - a02*a13*a21*a30 - - a03*a11*a22*a30 + - a01*a13*a22*a30 + - a02*a11*a23*a30 - - a01*a12*a23*a30 - - a03*a12*a20*a31 + - a02*a13*a20*a31 + - a03*a10*a22*a31 - - a00*a13*a22*a31 - - a02*a10*a23*a31 + - a00*a12*a23*a31 + - a03*a11*a20*a32 - - a01*a13*a20*a32 - - a03*a10*a21*a32 + - a00*a13*a21*a32 + - a01*a10*a23*a32 - - a00*a11*a23*a32 - - a02*a11*a20*a33 + - a01*a12*a20*a33 + - a02*a10*a21*a33 - - a00*a12*a21*a33 - - a01*a10*a22*a33 + - a00*a11*a22*a33; - - if(det*det>1.0E-30) - { - b00 = -a13*a22*a31 + a12*a23*a31 + a13*a21*a32 - a11*a23*a32 - a12*a21*a33 + a11*a22*a33; - b01 = a03*a22*a31 - a02*a23*a31 - a03*a21*a32 + a01*a23*a32 + a02*a21*a33 - a01*a22*a33; - b02 = -a03*a12*a31 + a02*a13*a31 + a03*a11*a32 - a01*a13*a32 - a02*a11*a33 + a01*a12*a33; - b03 = a03*a12*a21 - a02*a13*a21 - a03*a11*a22 + a01*a13*a22 + a02*a11*a23 - a01*a12*a23; - b10 = a13*a22*a30 - a12*a23*a30 - a13*a20*a32 + a10*a23*a32 + a12*a20*a33 - a10*a22*a33; - b11 = -a03*a22*a30 + a02*a23*a30 + a03*a20*a32 - a00*a23*a32 - a02*a20*a33 + a00*a22*a33; - b12 = a03*a12*a30 - a02*a13*a30 - a03*a10*a32 + a00*a13*a32 + a02*a10*a33 - a00*a12*a33; - b13 = -a03*a12*a20 + a02*a13*a20 + a03*a10*a22 - a00*a13*a22 - a02*a10*a23 + a00*a12*a23; - b20 = -a13*a21*a30 + a11*a23*a30 + a13*a20*a31 - a10*a23*a31 - a11*a20*a33 + a10*a21*a33; - b21 = a03*a21*a30 - a01*a23*a30 - a03*a20*a31 + a00*a23*a31 + a01*a20*a33 - a00*a21*a33; - b22 = -a03*a11*a30 + a01*a13*a30 + a03*a10*a31 - a00*a13*a31 - a01*a10*a33 + a00*a11*a33; - b23 = a03*a11*a20 - a01*a13*a20 - a03*a10*a21 + a00*a13*a21 + a01*a10*a23 - a00*a11*a23; - b30 = a12*a21*a30 - a11*a22*a30 - a12*a20*a31 + a10*a22*a31 + a11*a20*a32 - a10*a21*a32; - b31 = -a02*a21*a30 + a01*a22*a30 + a02*a20*a31 - a00*a22*a31 - a01*a20*a32 + a00*a21*a32; - b32 = a02*a11*a30 - a01*a12*a30 - a02*a10*a31 + a00*a12*a31 + a01*a10*a32 - a00*a11*a32; - b33 = -a02*a11*a20 + a01*a12*a20 + a02*a10*a21 - a00*a12*a21 - a01*a10*a22 + a00*a11*a22; - b00 = b00/det; - b01 = b01/det; - b02 = b02/det; - b03 = b03/det; - b10 = b10/det; - b11 = b11/det; - b12 = b12/det; - b13 = b13/det; - b20 = b20/det; - b21 = b21/det; - b22 = b22/det; - b23 = b23/det; - b30 = b30/det; - b31 = b31/det; - b32 = b32/det; - b33 = b33/det; - mb.at(0,0) = b00; - mb.at(0,1) = b01; - mb.at(0,2) = b02; - mb.at(0,3) = b03; - mb.at(1,0) = b10; - mb.at(1,1) = b11; - mb.at(1,2) = b12; - mb.at(1,3) = b13; - mb.at(2,0) = b20; - mb.at(2,1) = b21; - mb.at(2,2) = b22; - mb.at(2,3) = b23; - mb.at(3,0) = b30; - mb.at(3,1) = b31; - mb.at(3,2) = b32; - mb.at(3,3) = b33; - } - //this was STUPID. Gaah. Computer algebra system saves the day? I'd be surprised if this didn't end up *slower* than gaussian elimination. Don't do this again! - return mb; + return dt; } __host__ __device__ cumat4f cumat4f::inverse() { - return minverse(*this); + cumat4f mret; + float dt = this->det(); + float idet; + + if(dt>1E-7 || dt<-1E-7) + { + idet = 1.0f/dt; + mret.m00 = -m13*m22*m31 + m12*m23*m31 + m13*m21*m32 - m11*m23*m32 - m12*m21*m33 + m11*m22*m33; + mret.m01 = m03*m22*m31 - m02*m23*m31 - m03*m21*m32 + m01*m23*m32 + m02*m21*m33 - m01*m22*m33; + mret.m02 = -m03*m12*m31 + m02*m13*m31 + m03*m11*m32 - m01*m13*m32 - m02*m11*m33 + m01*m12*m33; + mret.m03 = m03*m12*m21 - m02*m13*m21 - m03*m11*m22 + m01*m13*m22 + m02*m11*m23 - m01*m12*m23; + mret.m10 = m13*m22*m30 - m12*m23*m30 - m13*m20*m32 + m10*m23*m32 + m12*m20*m33 - m10*m22*m33; + mret.m11 = -m03*m22*m30 + m02*m23*m30 + m03*m20*m32 - m00*m23*m32 - m02*m20*m33 + m00*m22*m33; + mret.m12 = m03*m12*m30 - m02*m13*m30 - m03*m10*m32 + m00*m13*m32 + m02*m10*m33 - m00*m12*m33; + mret.m13 = -m03*m12*m20 + m02*m13*m20 + m03*m10*m22 - m00*m13*m22 - m02*m10*m23 + m00*m12*m23; + mret.m20 = -m13*m21*m30 + m11*m23*m30 + m13*m20*m31 - m10*m23*m31 - m11*m20*m33 + m10*m21*m33; + mret.m21 = m03*m21*m30 - m01*m23*m30 - m03*m20*m31 + m00*m23*m31 + m01*m20*m33 - m00*m21*m33; + mret.m22 = -m03*m11*m30 + m01*m13*m30 + m03*m10*m31 - m00*m13*m31 - m01*m10*m33 + m00*m11*m33; + mret.m23 = m03*m11*m20 - m01*m13*m20 - m03*m10*m21 + m00*m13*m21 + m01*m10*m23 - m00*m11*m23; + mret.m30 = m12*m21*m30 - m11*m22*m30 - m12*m20*m31 + m10*m22*m31 + m11*m20*m32 - m10*m21*m32; + mret.m31 = -m02*m21*m30 + m01*m22*m30 + m02*m20*m31 - m00*m22*m31 - m01*m20*m32 + m00*m21*m32; + mret.m32 = m02*m11*m30 - m01*m12*m30 - m02*m10*m31 + m00*m12*m31 + m01*m10*m32 - m00*m11*m32; + mret.m33 = -m02*m11*m20 + m01*m12*m20 + m02*m10*m21 - m00*m12*m21 - m01*m10*m22 + m00*m11*m22; + mret.m00 *= idet; + mret.m01 *= idet; + mret.m02 *= idet; + mret.m03 *= idet; + mret.m10 *= idet; + mret.m11 *= idet; + mret.m12 *= idet; + mret.m13 *= idet; + mret.m20 *= idet; + mret.m21 *= idet; + mret.m22 *= idet; + mret.m23 *= idet; + mret.m30 *= idet; + mret.m31 *= idet; + mret.m32 *= idet; + mret.m33 *= idet; + } + else + { + mret.m00 = inf; + mret.m10 = inf; + mret.m20 = inf; + mret.m30 = inf; + mret.m01 = inf; + mret.m11 = inf; + mret.m21 = inf; + mret.m31 = inf; + mret.m02 = inf; + mret.m12 = inf; + mret.m22 = inf; + mret.m32 = inf; + mret.m03 = inf; + mret.m13 = inf; + mret.m23 = inf; + mret.m33 = inf; + } + //this was STUPID. Gaah. Computer algebra system saves the day? I'd be surprised if this didn't end up *slower* than gaussian elimination. Don't do this again! + + return mret; +} + +__host__ __device__ cumat4f operator-(const cumat4f &rhs) +{ + cumat4f ret; + ret.m00 = -rhs.m00; + ret.m10 = -rhs.m10; + ret.m20 = -rhs.m20; + ret.m30 = -rhs.m30; + ret.m01 = -rhs.m01; + ret.m11 = -rhs.m11; + ret.m21 = -rhs.m21; + ret.m31 = -rhs.m31; + ret.m02 = -rhs.m02; + ret.m12 = -rhs.m12; + ret.m22 = -rhs.m22; + ret.m32 = -rhs.m32; + ret.m03 = -rhs.m03; + ret.m13 = -rhs.m13; + ret.m23 = -rhs.m23; + ret.m33 = -rhs.m33; + return ret; +} + +__host__ __device__ cumat4f& cumat4f::operator+=(const cumat4f &rhs) +{ + m00 += rhs.m00; + m01 += rhs.m01; + m02 += rhs.m02; + m03 += rhs.m03; + m10 += rhs.m10; + m11 += rhs.m11; + m12 += rhs.m12; + m13 += rhs.m13; + m20 += rhs.m20; + m21 += rhs.m21; + m22 += rhs.m22; + m23 += rhs.m23; + m30 += rhs.m30; + m31 += rhs.m31; + m32 += rhs.m32; + m33 += rhs.m33; + return *this; +} + +__host__ __device__ cumat4f& cumat4f::operator-=(const cumat4f &rhs) +{ + m00 -= rhs.m00; + m01 -= rhs.m01; + m02 -= rhs.m02; + m03 -= rhs.m03; + m10 -= rhs.m10; + m11 -= rhs.m11; + m12 -= rhs.m12; + m13 -= rhs.m13; + m20 -= rhs.m20; + m21 -= rhs.m21; + m22 -= rhs.m22; + m23 -= rhs.m23; + m30 -= rhs.m30; + m31 -= rhs.m31; + m32 -= rhs.m32; + m33 -= rhs.m33; + return *this; +} + +__host__ __device__ cumat4f& cumat4f::operator*=(const float &rhs) +{ + m00 *= rhs; + m01 *= rhs; + m02 *= rhs; + m03 *= rhs; + m10 *= rhs; + m11 *= rhs; + m12 *= rhs; + m13 *= rhs; + m20 *= rhs; + m21 *= rhs; + m22 *= rhs; + m23 *= rhs; + m30 *= rhs; + m31 *= rhs; + m32 *= rhs; + m33 *= rhs; + return *this; +} + +__host__ __device__ cumat4f& cumat4f::operator/=(const float &rhs) +{ + float irhs = 1.0f / rhs; + m00 *= irhs; + m01 *= irhs; + m02 *= irhs; + m03 *= irhs; + m10 *= irhs; + m11 *= irhs; + m12 *= irhs; + m13 *= irhs; + m20 *= irhs; + m21 *= irhs; + m22 *= irhs; + m23 *= irhs; + m30 *= irhs; + m31 *= irhs; + m32 *= irhs; + m33 *= irhs; + return *this; +} + +__host__ __device__ cumat4f& cumat4f::operator*=(const cumat4f &rhs) +{ + cumat4f tmp = (*this); + (*this) = tmp*rhs; + return *this;; } @@ -453,22 +706,7 @@ __host__ __device__ cuvect4f cuvect4f_proj(cuvect4f a, cuvect4f b) return ret; } -__host__ __device__ cuvect4f operator-(cuvect4f rhs) -{ - cuvect4f ret; - ret[0] = -rhs[0]; - ret[1] = -rhs[1]; - ret[2] = -rhs[2]; - ret[3] = -rhs[3]; - return ret; -} -__host__ __device__ cumat4f operator-(cumat4f rhs) -{ - cumat4f ret; - int I; - for(I=0;I<16;I++) ret[I] = -rhs[I]; - return ret; -} + }; //namespace amscuda