diff --git a/build_linux64/libamsculib2.linux64.a b/build_linux64/libamsculib2.linux64.a index 391c99e..80d41b0 100644 Binary files a/build_linux64/libamsculib2.linux64.a and b/build_linux64/libamsculib2.linux64.a differ diff --git a/build_linux64/objstore/amscu_comp128.o b/build_linux64/objstore/amscu_comp128.o index 9c023c4..8bb01b3 100644 Binary files a/build_linux64/objstore/amscu_comp128.o and b/build_linux64/objstore/amscu_comp128.o differ diff --git a/build_linux64/objstore/amscu_comp64.o b/build_linux64/objstore/amscu_comp64.o index 8c2125c..a0d54ea 100644 Binary files a/build_linux64/objstore/amscu_comp64.o and b/build_linux64/objstore/amscu_comp64.o differ diff --git a/build_linux64/objstore/amscu_cudafunctions.o b/build_linux64/objstore/amscu_cudafunctions.o index 288b167..e854844 100644 Binary files a/build_linux64/objstore/amscu_cudafunctions.o and b/build_linux64/objstore/amscu_cudafunctions.o differ diff --git a/build_linux64/objstore/amscu_random.o b/build_linux64/objstore/amscu_random.o index f2b2df8..12de1de 100644 Binary files a/build_linux64/objstore/amscu_random.o and b/build_linux64/objstore/amscu_random.o differ diff --git a/build_linux64/objstore/amscuarray.o b/build_linux64/objstore/amscuarray.o index 1227dad..909a501 100644 Binary files a/build_linux64/objstore/amscuarray.o and b/build_linux64/objstore/amscuarray.o differ diff --git a/build_linux64/objstore/amscuarray_dops.o b/build_linux64/objstore/amscuarray_dops.o index a6897c5..ad0965f 100644 Binary files a/build_linux64/objstore/amscuarray_dops.o and b/build_linux64/objstore/amscuarray_dops.o differ diff --git a/build_linux64/objstore/amscugeom.o b/build_linux64/objstore/amscugeom.o index 725de93..fbb073b 100644 Binary files a/build_linux64/objstore/amscugeom.o and b/build_linux64/objstore/amscugeom.o differ diff --git a/build_linux64/objstore/amsculib2.o b/build_linux64/objstore/amsculib2.o index ba098f7..4e0b89d 100644 Binary files a/build_linux64/objstore/amsculib2.o and b/build_linux64/objstore/amsculib2.o differ diff --git a/build_linux64/objstore/amscumath.o b/build_linux64/objstore/amscumath.o index 469a8e5..7aec13b 100644 Binary files a/build_linux64/objstore/amscumath.o and b/build_linux64/objstore/amscumath.o differ diff --git a/build_linux64/objstore/amscurarray.o b/build_linux64/objstore/amscurarray.o index 3d9794b..91c72dc 100644 Binary files a/build_linux64/objstore/amscurarray.o and b/build_linux64/objstore/amscurarray.o differ diff --git a/build_linux64/objstore/cuvect2.o b/build_linux64/objstore/cuvect2.o index 5e562dc..84465ed 100644 Binary files a/build_linux64/objstore/cuvect2.o and b/build_linux64/objstore/cuvect2.o differ diff --git a/build_linux64/objstore/cuvect2f.o b/build_linux64/objstore/cuvect2f.o index e921c14..65df8f0 100644 Binary files a/build_linux64/objstore/cuvect2f.o and b/build_linux64/objstore/cuvect2f.o differ diff --git a/build_linux64/objstore/cuvect3.o b/build_linux64/objstore/cuvect3.o index 94e8651..259ee43 100644 Binary files a/build_linux64/objstore/cuvect3.o and b/build_linux64/objstore/cuvect3.o differ diff --git a/build_linux64/objstore/cuvect3f.o b/build_linux64/objstore/cuvect3f.o index 2ba5aa9..ef6f84a 100644 Binary files a/build_linux64/objstore/cuvect3f.o and b/build_linux64/objstore/cuvect3f.o differ diff --git a/build_linux64/objstore/cuvect4.o b/build_linux64/objstore/cuvect4.o index b82c006..c4e51e2 100644 Binary files a/build_linux64/objstore/cuvect4.o and b/build_linux64/objstore/cuvect4.o differ diff --git a/build_linux64/objstore/cuvect4f.o b/build_linux64/objstore/cuvect4f.o index fe0e55e..ec293a8 100644 Binary files a/build_linux64/objstore/cuvect4f.o and b/build_linux64/objstore/cuvect4f.o differ diff --git a/build_linux64/test b/build_linux64/test index 80ca5d3..c815a3d 100644 Binary files a/build_linux64/test and b/build_linux64/test differ diff --git a/include/amsculib2/cuvect2f.hpp b/include/amsculib2/cuvect2f.hpp index a0ba6ce..dcab448 100644 --- a/include/amsculib2/cuvect2f.hpp +++ b/include/amsculib2/cuvect2f.hpp @@ -10,43 +10,70 @@ namespace amscuda float x; float y; - __host__ __device__ cuvect2f(); __host__ __device__ ~cuvect2f(); __host__ __device__ cuvect2f(const float &_x, const float &_y); - + + __host__ __device__ float& operator[](const int &I); __host__ __device__ const float& operator[](const int &I) const; - __host__ __device__ cuvect2f operator+(const cuvect2f &lhs); - __host__ __device__ cuvect2f operator-(const cuvect2f &lhs); - __host__ __device__ cuvect2f operator*(const float &lhs); - __host__ __device__ cuvect2f operator/(const float &lhs); + __host__ __device__ cuvect2f operator+(const cuvect2f &rhs); + __host__ __device__ cuvect2f operator-(const cuvect2f &rhs); + __host__ __device__ cuvect2f operator*(const float &rhs); + __host__ __device__ cuvect2f operator/(const float &rhs); __host__ __device__ friend cuvect2f operator-(const cuvect2f &rhs); + + __host__ __device__ cuvect2f& operator+=(const cuvect2f &rhs); + __host__ __device__ cuvect2f& operator-=(const cuvect2f &rhs); + __host__ __device__ cuvect2f& operator/=(const float &rhs); + __host__ __device__ cuvect2f& operator*=(const float &rhs); + }; class cumat2f { public: - float dat[4]; + float m00,m10; //named references to force register use? + float m01,m11; //switched to column-major-order to match GLSL/lapack __host__ __device__ cumat2f(); __host__ __device__ ~cumat2f(); - __host__ __device__ float& operator[](const int I); - __host__ __device__ float& operator()(const int I, const int J); - __host__ __device__ float& at(const int I, const int J); - __host__ __device__ cumat2f operator+(cumat2f lhs); - __host__ __device__ cumat2f operator-(cumat2f lhs); - __host__ __device__ cumat2f operator*(float lhs); - __host__ __device__ cumat2f operator/(float lhs); - __host__ __device__ cuvect2f operator*(cuvect2f lhs); - __host__ __device__ cumat2f operator*(cumat2f lhs); - __host__ __device__ friend cumat2f operator-(cumat2f rhs); + __host__ __device__ cumat2f( + const float & _m00, const float & _m01, + const float & _m10, const float & _m11 + ); + __host__ __device__ float& operator[](const int &I); + __host__ __device__ float& operator()(const int &I, const int &J); + __host__ __device__ float& at(const int &I, const int &J); + + __host__ __device__ const float& operator[](const int &I) const; + __host__ __device__ const float& operator()(const int &I, const int &J) const; + __host__ __device__ const float& at(const int &I, const int &J) const; + + __host__ __device__ cumat2f operator+(const cumat2f &rhs); + __host__ __device__ cumat2f operator-(const cumat2f &rhs); + __host__ __device__ cumat2f operator*(const float &rhs); + __host__ __device__ cumat2f operator/(const float &rhs); + __host__ __device__ cuvect2f operator*(const cuvect2f &rhs); + __host__ __device__ cumat2f operator*(const cumat2f &rhs); + __host__ __device__ friend cumat2f operator-(const cumat2f &rhs); + __host__ __device__ float det(); __host__ __device__ cumat2f transpose(); __host__ __device__ cumat2f inverse(); + + __host__ __device__ float* data(); //pointer to float[9] representation of matrix + __host__ __device__ const float* data() const; //pointer to float[9] representation of matrix + + //In place operations (to save GPU register use) + __host__ __device__ cumat2f& operator+=(const cumat2f &rhs); + __host__ __device__ cumat2f& operator-=(const cumat2f &rhs); + __host__ __device__ cumat2f& operator/=(const float &rhs); + __host__ __device__ cumat2f& operator*=(const float &rhs); + __host__ __device__ cumat2f& operator*=(const cumat2f &rhs); }; __host__ __device__ float cuvect2f_dot(const cuvect2f &a, const cuvect2f &b); @@ -55,6 +82,8 @@ namespace amscuda __host__ __device__ cuvect2f cuvect2f_normalize(const cuvect2f &a); __host__ __device__ cuvect2f cuvect2f_proj(const cuvect2f &a, const cuvect2f &b); + __host__ __device__ cumat2f mat2f_rot_from_angle(const cuvect3f &axis, const float &angle); + //2x2 matrix operations //matrix order is assumed to be mat[I,J] = mat[I+3*J] diff --git a/include/amsculib2/cuvect3.hpp b/include/amsculib2/cuvect3.hpp index f0c96bc..00f3834 100644 --- a/include/amsculib2/cuvect3.hpp +++ b/include/amsculib2/cuvect3.hpp @@ -13,72 +13,106 @@ namespace amscuda __host__ __device__ cuvect3(); __host__ __device__ ~cuvect3(); - __host__ __device__ cuvect3(double _x, double _y, double _z); + __host__ __device__ cuvect3(const double &_x, const double &_y, const double &_z); - __host__ __device__ double& operator[](const int I); - __host__ __device__ const double& operator[](const int I) const; + __host__ __device__ double& operator[](const int &I); + __host__ __device__ const double& operator[](const int &I) const; + + __host__ __device__ cuvect3 operator+(const cuvect3 &rhs); + __host__ __device__ cuvect3 operator-(const cuvect3 &rhs); + __host__ __device__ cuvect3 operator*(const double &rhs); + __host__ __device__ cuvect3 operator/(const double &rhs); + __host__ __device__ friend cuvect3 operator-(const cuvect3 &rhs); + + __host__ __device__ cuvect3& operator+=(const cuvect3 &rhs); + __host__ __device__ cuvect3& operator-=(const cuvect3 &rhs); + __host__ __device__ cuvect3& operator/=(const double &rhs); + __host__ __device__ cuvect3& operator*=(const double &rhs); - __host__ __device__ cuvect3 operator+(cuvect3 lhs); - __host__ __device__ cuvect3 operator-(cuvect3 lhs); - __host__ __device__ cuvect3 operator*(double lhs); - __host__ __device__ cuvect3 operator/(double lhs); - __host__ __device__ friend cuvect3 operator-(cuvect3 rhs); }; class cumat3 { public: - double dat[9]; + double m00,m10,m20; //named references to force register use? + double m01,m11,m21; //switched to column-major-order to match GLSL/lapack + double m02,m12,m22; __host__ __device__ cumat3(); __host__ __device__ ~cumat3(); - __host__ __device__ double& operator[](const int I); - __host__ __device__ double& operator()(const int I, const int J); - __host__ __device__ double& at(const int I, const int J); + __host__ __device__ cumat3( + const double & _m00, const double & _m01, const double & _m02, + const double & _m10, const double & _m11, const double & _m12, + const double & _m20, const double & _m21, const double & _m22 + ); - __host__ __device__ cumat3 operator+(cumat3 lhs); - __host__ __device__ cumat3 operator-(cumat3 lhs); - __host__ __device__ cumat3 operator*(double lhs); - __host__ __device__ cumat3 operator/(double lhs); - __host__ __device__ cuvect3 operator*(cuvect3 lhs); - __host__ __device__ cumat3 operator*(cumat3 lhs); - __host__ __device__ friend cumat3 operator-(cumat3 rhs); + __host__ __device__ double& operator[](const int &I); + __host__ __device__ double& operator()(const int &I, const int &J); + __host__ __device__ double& at(const int &I, const int &J); + __host__ __device__ const double& operator[](const int &I) const; + __host__ __device__ const double& operator()(const int &I, const int &J) const; + __host__ __device__ const double& at(const int &I, const int &J) const; + + __host__ __device__ cumat3 operator+(const cumat3 &rhs); + __host__ __device__ cumat3 operator-(const cumat3 &rhs); + __host__ __device__ cumat3 operator*(const double &rhs); + __host__ __device__ cumat3 operator/(const double &rhs); + __host__ __device__ cuvect3 operator*(const cuvect3 &rhs); + __host__ __device__ cumat3 operator*(const cumat3 &rhs); + __host__ __device__ friend cumat3 operator-(const cumat3 &rhs); + __host__ __device__ double det(); __host__ __device__ cumat3 transpose(); __host__ __device__ cumat3 inverse(); + + __host__ __device__ double* data(); //pointer to double[9] representation of matrix + __host__ __device__ const double* data() const; //pointer to double[9] representation of matrix + + //In place operations (to save GPU register use) + __host__ __device__ cumat3& operator+=(const cumat3 &rhs); + __host__ __device__ cumat3& operator-=(const cumat3 &rhs); + __host__ __device__ cumat3& operator/=(const double &rhs); + __host__ __device__ cumat3& operator*=(const double &rhs); + __host__ __device__ cumat3& operator*=(const cumat3 &rhs); + }; - __host__ __device__ double cuvect3_dot(cuvect3 a, cuvect3 b); - __host__ __device__ cuvect3 cuvect3_cross(cuvect3 a, cuvect3 b); - __host__ __device__ double cuvect3_norm(cuvect3 a); - __host__ __device__ cuvect3 cuvect3_normalize(cuvect3 a); - __host__ __device__ cuvect3 cuvect3_proj(cuvect3 a, cuvect3 b); + __host__ __device__ double cuvect3_dot(const cuvect3 &a,const cuvect3 &b); + __host__ __device__ cuvect3 cuvect3_cross(const cuvect3 &a, const cuvect3 &b); + __host__ __device__ double cuvect3_norm(const cuvect3 &a); + __host__ __device__ cuvect3 cuvect3_normalize(const cuvect3 &a); + __host__ __device__ cuvect3 cuvect3_proj(const cuvect3 &a, const cuvect3 &b); + + __host__ __device__ cumat3 hodge_dual(const cuvect3 &vin); + __host__ __device__ cuvect3 hodge_dual(const cumat3 &min); + __host__ __device__ cumat3 rotmat_from_axisangle(const cuvect3 &axis, const double &angle); + //3x3 matrix operations //matrix order is assumed to be mat[I,J] = mat[I+3*J] //transposes a 3x3 (9 element) matrix - __host__ __device__ void mat3_transpose(double *mat3inout); + __host__ __device__ void mat3f_transpose(double *mat3inout); //copies src to dest - __host__ __device__ void mat3_copy(double *mat3_dest, const double *mat3_src); + __host__ __device__ void mat3f_copy(double *mat3f_dest, const double *mat3f_src); //returns determinant of 3x3 matrix - __host__ __device__ double mat3_det(double *mat3in); + __host__ __device__ double mat3f_det(double *mat3in); //inverts a 3x3 (9 element) matrix - __host__ __device__ void mat3_inverse(double *mat3inout); + __host__ __device__ void mat3f_inverse(double *mat3inout); - __host__ __device__ cuvect3 mat3_mult(double *mat3in, cuvect3 cvin); - __host__ __device__ void mat3_mult(double *matina, double *matinb, double *matout); + __host__ __device__ cuvect3 mat3f_mult(double *mat3in, const cuvect3 &cvin); + __host__ __device__ void mat3f_mult(double *matina, double *matinb, double *matout); - __host__ __device__ void mat3_hodgedual(cuvect3 vecin, double *matout); - __host__ __device__ void mat3_hodgedual(double *matin, cuvect3 vecout); + __host__ __device__ void mat3f_hodgedual(const cuvect3 &vecin, double *matout); + __host__ __device__ void mat3f_hodgedual(double *matin, cuvect3 &vecout); //returns direction cosine rotation matrix from axis and angle - __host__ __device__ void mat3_rot_from_axisangle(cuvect3 axis, double angle, double *matout); + __host__ __device__ void mat3f_rot_from_axisangle(cuvect3 axis, double angle, double *matout); __host__ void test_cudavect_logic1(); diff --git a/src/amsculib2/cuvect2f.cu b/src/amsculib2/cuvect2f.cu index 9b53d79..8b76b49 100644 --- a/src/amsculib2/cuvect2f.cu +++ b/src/amsculib2/cuvect2f.cu @@ -5,19 +5,13 @@ namespace amscuda __host__ __device__ cuvect2f::cuvect2f() { - x = 0.0; y = 0.0; + x = 0.0f; y = 0.0f; return; } __host__ __device__ cuvect2f::~cuvect2f() { - x = 0.0; y = 0.0; - return; - } - - __host__ __device__ cuvect2f::cuvect2f(const float &_x, const float &_y) - { - x = _x; y = _y; + x = 0.0f; y = 0.0f; return; } @@ -35,37 +29,75 @@ namespace amscuda return x; } - __host__ __device__ cuvect2f cuvect2f::operator+(const cuvect2f &lhs) + __host__ __device__ cuvect2f cuvect2f::operator+(const cuvect2f &rhs) { cuvect2f ret; - ret.x = x + lhs.x; - ret.y = y + lhs.y; + ret.x = x+rhs.x; + ret.y = y+rhs.y; + return ret; } - __host__ __device__ cuvect2f cuvect2f::operator-(const cuvect2f &lhs) + __host__ __device__ cuvect2f cuvect2f::operator-(const cuvect2f &rhs) { cuvect2f ret; - ret.x = x - lhs.x; - ret.y = y - lhs.y; - return ret; - } - __host__ __device__ cuvect2f cuvect2f::operator*(const float &lhs) - { - cuvect2f ret; - ret.x = x*lhs; - ret.y = y*lhs; + ret.x = x-rhs.x; + ret.y = y-rhs.y; + return ret; } - __host__ __device__ cuvect2f cuvect2f::operator/(const float &lhs) + __host__ __device__ cuvect2f cuvect2f::operator*(const float &rhs) { cuvect2f ret; - ret.x = x/lhs; - ret.y = y/lhs; + ret.x = x*rhs; + ret.y = y*rhs; return ret; } + __host__ __device__ cuvect2f cuvect2f::operator/(const float &rhs) + { + cuvect2f ret; + ret.x = x/rhs; + ret.y = y/rhs; + return ret; + } + + __host__ __device__ cuvect2f& cuvect2f::operator+=(const cuvect2f &rhs) + { + x = x + rhs.x; + y = y + rhs.y; + return *this; + } + + __host__ __device__ cuvect2f& cuvect2f::operator-=(const cuvect2f &rhs) + { + x = x - rhs.x; + y = y - rhs.y; + return *this; + } + + __host__ __device__ cuvect2f& cuvect2f::operator*=(const float &rhs) + { + x = x * rhs; + y = y * rhs; + return *this; + } + + __host__ __device__ cuvect2f& cuvect2f::operator/=(const float &rhs) + { + x = x / rhs; + y = y / rhs; + return *this; + } + + + __host__ __device__ cuvect2f::cuvect2f(const float &_x, const float &_y) + { + x = _x; y = _y; + return; + } + __host__ __device__ float cuvect2f_dot(const cuvect2f &a, const cuvect2f &b) { float ret = a.x*b.x+a.y*b.y; @@ -94,7 +126,7 @@ namespace amscuda } else { - ret.x = 0.0; ret.y = 0.0; + ret.x = 0.0f; ret.y = 0.0f; } return ret; } @@ -108,181 +140,292 @@ namespace amscuda return ret; } - -__host__ __device__ cumat2f::cumat2f() -{ - int I; - for(I=0;I<4;I++) + __host__ __device__ cumat2f::cumat2f() { - dat[I] = 0.0; + m00 = 0.0f; + m01 = 0.0f; + m10 = 0.0f; + m11 = 0.0f; + + return; } - return; -} - -__host__ __device__ cumat2f::~cumat2f() -{ - int I; - for(I=0;I<4;I++) - { - dat[I] = 0.0; - } - return; -} - -__host__ __device__ float& cumat2f::operator[](const int I) -{ - return dat[I]; -} - -__host__ __device__ float& cumat2f::operator()(const int I, const int J) -{ - return dat[I+2*J]; -} - -__host__ __device__ cumat2f cumat2f::operator+(cumat2f lhs) -{ - int I; - cumat2f ret; - for(I=0;I<4;I++) - { - ret.dat[I] = this->dat[I] + lhs.dat[I]; - } - return ret; -} - -__host__ __device__ cumat2f cumat2f::operator-(cumat2f lhs) -{ - int I; - cumat2f ret; - for(I=0;I<4;I++) - { - ret.dat[I] = this->dat[I] - lhs.dat[I]; - } - return ret; -} - -__host__ __device__ cumat2f cumat2f::operator*(float lhs) -{ - cumat2f ret; - int I; - for(I=0;I<4;I++) - { - ret.dat[I] = this->dat[I]*lhs; - } - return ret; -} - -__host__ __device__ cumat2f cumat2f::operator/(float lhs) -{ - cumat2f ret; - int I; - for(I=0;I<4;I++) - { - ret.dat[I] = this->dat[I]/lhs; - } - return ret; -} - -__host__ __device__ float& cumat2f::at(const int I, const int J) -{ - return dat[I+2*J]; -} - - -__host__ __device__ cuvect2f cumat2f::operator*(cuvect2f lhs) -{ - cuvect2f ret = cuvect2f(0.0,0.0); - int I,J; - for(I=0;I<2;I++) - { - for(J=0;J<2;J++) - { - ret[I] = ret[I] + this->at(I,J)*lhs[J]; - } - } - return ret; -} - -__host__ __device__ cumat2f cumat2f::operator*(cumat2f lhs) -{ - cumat2f ret; - int I,J,K; - - for(I=0;I<2;I++) - { - for(J=0;J<2;J++) - { - ret(I,J) = 0.0; - for(K=0;K<2;K++) - { - ret(I,J) = ret(I,J) + this->at(I,K)*lhs(K,J); - } - } - } - - return ret; -} - -__host__ __device__ float cumat2f::det() -{ - float ret = 0.0; - ret = ret + this->at(0,0)*this->at(1,1); - ret = ret - this->at(1,0)*this->at(0,1); - return ret; -} - -__host__ __device__ cumat2f cumat2f::transpose() -{ - cumat2f q; - int I,J; - for(I=0;I<2;I++) - { - for(J=0;J<2;J++) - { - q.at(I,J) = this->at(J,I); - } - } - return q; -} - -__host__ __device__ cumat2f cumat2f::inverse() -{ - cumat2f q; - float dt = q.det(); - if(dt!=0.0) - { - q(0,0) = this->at(1,1)/dt; - q(0,1) = -this->at(0,1)/dt; - q(1,0) = -this->at(1,0)/dt; - q(1,1) = this->at(0,0)/dt; - } - else - { - q(0,0) = inf; - q(0,1) = inf; - q(1,0) = inf; - q(1,1) = inf; - } - - return q; -} - -//2x2 matrix operations -//matrix order is assumed to be mat[I,J] = mat[I+3*J] - -//transpose a 2x2 matrix in place -__host__ __device__ void mat2f_transpose(float *mat2inout) -{ - float mat2f_in[4]; - - mat2f_copy(mat2f_in,mat2inout); - mat2inout[0+0*2] = mat2f_in[0+0*2]; - mat2inout[1+0*2] = mat2f_in[0+1*2]; - mat2inout[0+1*2] = mat2f_in[1+0*2]; - mat2inout[1+1*2] = mat2f_in[1+1*2]; - return; -} + __host__ __device__ cumat2f::~cumat2f() + { + m00 = 0.0f; + m01 = 0.0f; + m10 = 0.0f; + m11 = 0.0f; + return; + } + + __host__ __device__ float& cumat2f::operator[](const int &I) + { + if(I==0) return m00; + if(I==1) return m10; + if(I==2) return m01; + if(I==3) return m11; + + return m00; + } + + __host__ __device__ const float& cumat2f::operator[](const int &I) const + { + if(I==0) return m00; + if(I==1) return m10; + if(I==2) return m01; + if(I==3) return m11; + + return m00; + } + + __host__ __device__ float& cumat2f::operator()(const int &I, const int &J) + { + if(I==0 && J==0) return m00; + if(I==1 && J==0) return m10; + if(I==0 && J==1) return m01; + if(I==1 && J==1) return m11; + + return m00; + } + + + __host__ __device__ float& cumat2f::at(const int &I, const int &J) + { + if(I==0 && J==0) return m00; + if(I==1 && J==0) return m10; + if(I==0 && J==1) return m01; + if(I==1 && J==1) return m11; + + return m00; + } + + __host__ __device__ const float& cumat2f::operator()(const int &I, const int &J) const + { + if(I==0 && J==0) return m00; + if(I==1 && J==0) return m10; + if(I==0 && J==1) return m01; + if(I==1 && J==1) return m11; + + return m00; + } + + __host__ __device__ const float& cumat2f::at(const int &I, const int &J) const + { + if(I==0 && J==0) return m00; + if(I==1 && J==0) return m10; + if(I==0 && J==1) return m01; + if(I==1 && J==1) return m11; + + return m00; + } + + + __host__ __device__ cumat2f cumat2f::operator+(const cumat2f &rhs) + { + cumat2f ret; + ret.m00 = m00 + rhs.m00; + ret.m10 = m10 + rhs.m10; + ret.m01 = m01 + rhs.m01; + ret.m11 = m11 + rhs.m11; + + + return ret; + } + + __host__ __device__ cumat2f cumat2f::operator-(const cumat2f &rhs) + { + cumat2f ret; + ret.m00 = m00 - rhs.m00; + ret.m10 = m10 - rhs.m10; + ret.m01 = m01 - rhs.m01; + ret.m11 = m11 - rhs.m11; + + return ret; + } + + __host__ __device__ cumat2f cumat2f::operator*(const float &rhs) + { + cumat2f ret; + ret.m00 = m00 * rhs; + ret.m10 = m10 * rhs; + ret.m01 = m01 * rhs; + ret.m11 = m11 * rhs; + return ret; + } + + __host__ __device__ cumat2f cumat2f::operator/(const float &rhs) + { + cumat2f ret; + ret.m00 = m00 / rhs; + ret.m10 = m10 / rhs; + ret.m01 = m01 / rhs; + ret.m11 = m11 / rhs; + return ret; + } + + __host__ __device__ cuvect2f cumat2f::operator*(const cuvect2f &rhs) + { + cuvect2f ret; + + ret.x = m00*rhs.x + m01*rhs.y; + ret.y = m10*rhs.x + m11*rhs.y; + + return ret; + } + + __host__ __device__ cumat2f cumat2f::operator*(const cumat2f &rhs) + { + cumat2f ret; + + ret.m00 = m00*rhs.m00 + m01*rhs.m10; + ret.m01 = m00*rhs.m01 + m01*rhs.m11; + ret.m10 = m10*rhs.m00 + m11*rhs.m10; + ret.m11 = m10*rhs.m01 + m11*rhs.m11; + + return ret; + } + + __host__ __device__ float cumat2f::det() + { + float ret = 0.0; + + ret += m00*m11; + ret -= m01*m10; + + return ret; + } + + __host__ __device__ cumat2f cumat2f::transpose() + { + cumat2f ret; + + ret.m00 = m00; + ret.m01 = m10; + ret.m10 = m01; + ret.m11 = m11; + + return ret; + } + + __host__ __device__ cumat2f cumat2f::inverse() + { + cumat2f q; + float dt = det(); + if(dt!=0) + { + q(0,0) = q(1,1)/dt; + q(0,1) = -q(0,1)/dt; + q(1,0) = -q(1,0)/dt; + q(1,1) = q(0,0)/dt; + + } + else + { + q(0,0) = inf; + q(0,1) = inf; + q(1,0) = inf; + q(1,1) = inf; + } + + return q; + } + + __host__ __device__ cumat2f operator-(const cumat2f &rhs) + { + cumat2f ret; + ret.m00 = -rhs.m00; + ret.m10 = -rhs.m10; + ret.m01 = -rhs.m01; + ret.m11 = -rhs.m11; + + return ret; + } + + __host__ __device__ cumat2f& cumat2f::operator+=(const cumat2f &rhs) + { + m00 += rhs.m00; + m10 += rhs.m10; + m01 += rhs.m01; + m11 += rhs.m11; + + return *this; + } + + __host__ __device__ cumat2f& cumat2f::operator-=(const cumat2f &rhs) + { + m00 -= rhs.m00; + m10 -= rhs.m10; + m01 -= rhs.m01; + m11 -= rhs.m11; + + return *this; + } + + + + __host__ __device__ cumat2f& cumat2f::operator/=(const float &rhs) + { + m00 /= rhs; + m10 /= rhs; + m01 /= rhs; + m11 /= rhs; + + return *this; + } + + __host__ __device__ cumat2f& cumat2f::operator*=(const float &rhs) + { + m00 *= rhs; + m10 *= rhs; + m01 *= rhs; + m11 *= rhs; + + return *this; + } + + __host__ __device__ cumat2f& cumat2f::operator*=(const cumat2f &rhs) + { + cumat2f tmp; + + tmp.m00 = m00*rhs.m00 + m01*rhs.m10; + tmp.m01 = m00*rhs.m01 + m01*rhs.m11; + tmp.m10 = m10*rhs.m00 + m11*rhs.m10; + tmp.m11 = m10*rhs.m01 + m11*rhs.m11; + + (*this) = tmp; + + return *this; + } + + __host__ __device__ cumat2f::cumat2f( + const float & _m00, const float & _m01, + const float & _m10, const float & _m11 + ) + { + m00 = _m00; + m01 = _m01; + m10 = _m10; + m11 = _m11; + } + + __host__ __device__ float* cumat2f::data() + { + //pointer to float[9] representation of matrix + return (float*) this; + } + + __host__ __device__ const float* cumat2f::data() const + { + //pointer to float[9] representation of matrix + return (const float*) this; + } + + + +/////////////////////////// +//legacy array operations// +/////////////////////////// //copies src to dest __host__ __device__ void mat2f_copy(float *mat2f_dest, const float *mat2f_src) diff --git a/src/amsculib2/cuvect3.cu b/src/amsculib2/cuvect3.cu index 6a9357c..27a9af1 100644 --- a/src/amsculib2/cuvect3.cu +++ b/src/amsculib2/cuvect3.cu @@ -15,7 +15,7 @@ namespace amscuda return; } - __host__ __device__ double& cuvect3::operator[](const int I) + __host__ __device__ double& cuvect3::operator[](const int &I) { if(I==0) return x; if(I==1) return y; @@ -23,7 +23,7 @@ namespace amscuda return x; } - __host__ __device__ const double& cuvect3::operator[](const int I) const + __host__ __device__ const double& cuvect3::operator[](const int &I) const { if(I==0) return x; if(I==1) return y; @@ -31,59 +31,92 @@ namespace amscuda return x; } - __host__ __device__ cuvect3 cuvect3::operator+(cuvect3 lhs) + __host__ __device__ cuvect3 cuvect3::operator+(const cuvect3 &rhs) { cuvect3 ret; - ret.x = x+lhs.x; - ret.y = y+lhs.y; - ret.z = z+lhs.z; + ret.x = x+rhs.x; + ret.y = y+rhs.y; + ret.z = z+rhs.z; return ret; } - __host__ __device__ cuvect3 cuvect3::operator-(cuvect3 lhs) + __host__ __device__ cuvect3 cuvect3::operator-(const cuvect3 &rhs) { cuvect3 ret; - ret.x = x-lhs.x; - ret.y = y-lhs.y; - ret.z = z-lhs.z; + ret.x = x-rhs.x; + ret.y = y-rhs.y; + ret.z = z-rhs.z; return ret; } - __host__ __device__ cuvect3 cuvect3::operator*(double lhs) + __host__ __device__ cuvect3 cuvect3::operator*(const double &rhs) { cuvect3 ret; - ret.x = x*lhs; - ret.y = y*lhs; - ret.z = z*lhs; + ret.x = x*rhs; + ret.y = y*rhs; + ret.z = z*rhs; return ret; } - __host__ __device__ cuvect3 cuvect3::operator/(double lhs) + __host__ __device__ cuvect3 cuvect3::operator/(const double &rhs) { cuvect3 ret; - ret.x = x/lhs; - ret.y = y/lhs; - ret.z = z/lhs; + ret.x = x/rhs; + ret.y = y/rhs; + ret.z = z/rhs; return ret; } - __host__ __device__ cuvect3::cuvect3(double _x, double _y, double _z) + __host__ __device__ cuvect3& cuvect3::operator+=(const cuvect3 &rhs) + { + x = x + rhs.x; + y = y + rhs.y; + z = z + rhs.z; + return *this; + } + + __host__ __device__ cuvect3& cuvect3::operator-=(const cuvect3 &rhs) + { + x = x - rhs.x; + y = y - rhs.y; + z = z - rhs.z; + return *this; + } + + __host__ __device__ cuvect3& cuvect3::operator*=(const double &rhs) + { + x = x * rhs; + y = y * rhs; + z = z * rhs; + return *this; + } + + __host__ __device__ cuvect3& cuvect3::operator/=(const double &rhs) + { + x = x / rhs; + y = y / rhs; + z = z / rhs; + return *this; + } + + + __host__ __device__ cuvect3::cuvect3(const double &_x, const double &_y, const double &_z) { x = _x; y = _y; z = _z; return; } - __host__ __device__ double cuvect3_dot(cuvect3 a, cuvect3 b) + __host__ __device__ double cuvect3_dot(const cuvect3 &a, const cuvect3 &b) { double ret = a.x*b.x+a.y*b.y+a.z*b.z; return ret; } - __host__ __device__ cuvect3 cuvect3_cross(cuvect3 a, cuvect3 b) + __host__ __device__ cuvect3 cuvect3_cross(const cuvect3 &a, const cuvect3 &b) { cuvect3 ret; ret[0] = a[1]*b[2]-a[2]*b[1]; @@ -93,18 +126,18 @@ namespace amscuda return ret; } - __host__ __device__ double cuvect3_norm(cuvect3 a) + __host__ __device__ double cuvect3_norm(const cuvect3 &a) { double ret; - ret = ::sqrt(a.x*a.x+a.y*a.y+a.z*a.z); + ret = ::sqrtf(a.x*a.x+a.y*a.y+a.z*a.z); return ret; } - __host__ __device__ cuvect3 cuvect3_normalize(cuvect3 a) + __host__ __device__ cuvect3 cuvect3_normalize(const cuvect3 &a) { cuvect3 ret; double m; - m = ::sqrt(a.x*a.x+a.y*a.y+a.z*a.z); + m = ::sqrtf(a.x*a.x+a.y*a.y+a.z*a.z); if(m>0.0) { ret.x = a.x/m; ret.y = a.y/m; ret.z = a.z/m; @@ -117,7 +150,7 @@ namespace amscuda return ret; } - __host__ __device__ cuvect3 cuvect3_proj(cuvect3 a, cuvect3 b) + __host__ __device__ cuvect3 cuvect3_proj(const cuvect3 &a, const cuvect3 &b) { cuvect3 ret; cuvect3 bn = cuvect3_normalize(b); @@ -126,8 +159,422 @@ namespace amscuda return ret; } + + + +__host__ __device__ cumat3::cumat3() +{ + m00 = 0.0; + m01 = 0.0; + m02 = 0.0; + m10 = 0.0; + m11 = 0.0; + m12 = 0.0; + m20 = 0.0; + m21 = 0.0; + m22 = 0.0; + + return; +} + +__host__ __device__ cumat3::~cumat3() +{ + m00 = 0.0; + m01 = 0.0; + m02 = 0.0; + m10 = 0.0; + m11 = 0.0; + m12 = 0.0; + m20 = 0.0; + m21 = 0.0; + m22 = 0.0; + return; +} + +__host__ __device__ double& cumat3::operator[](const int &I) +{ + if(I==0) return m00; + if(I==1) return m10; + if(I==2) return m20; + if(I==3) return m01; + if(I==4) return m11; + if(I==5) return m21; + if(I==6) return m02; + if(I==7) return m12; + if(I==8) return m22; + + return m00; +} + +__host__ __device__ double& cumat3::operator()(const int &I, const int &J) +{ + if(I==0 && J==0) return m00; + if(I==1 && J==0) return m10; + if(I==2 && J==0) return m20; + if(I==0 && J==1) return m01; + if(I==1 && J==1) return m11; + if(I==2 && J==1) return m21; + if(I==0 && J==2) return m02; + if(I==1 && J==2) return m12; + if(I==2 && J==2) return m22; + + return m00; +} + + +__host__ __device__ double& cumat3::at(const int &I, const int &J) +{ + if(I==0 && J==0) return m00; + if(I==1 && J==0) return m10; + if(I==2 && J==0) return m20; + if(I==0 && J==1) return m01; + if(I==1 && J==1) return m11; + if(I==2 && J==1) return m21; + if(I==0 && J==2) return m02; + if(I==1 && J==2) return m12; + if(I==2 && J==2) return m22; + + return m00; +} + +__host__ __device__ const double& cumat3::operator[](const int &I) const +{ + if(I==0) return m00; + if(I==1) return m10; + if(I==2) return m20; + if(I==3) return m01; + if(I==4) return m11; + if(I==5) return m21; + if(I==6) return m02; + if(I==7) return m12; + if(I==8) return m22; + + return m00; +} + +__host__ __device__ const double& cumat3::operator()(const int &I, const int &J) const +{ + if(I==0 && J==0) return m00; + if(I==1 && J==0) return m10; + if(I==2 && J==0) return m20; + if(I==0 && J==1) return m01; + if(I==1 && J==1) return m11; + if(I==2 && J==1) return m21; + if(I==0 && J==2) return m02; + if(I==1 && J==2) return m12; + if(I==2 && J==2) return m22; + + return m00; +} + + +__host__ __device__ const double& cumat3::at(const int &I, const int &J) const +{ + if(I==0 && J==0) return m00; + if(I==1 && J==0) return m10; + if(I==2 && J==0) return m20; + if(I==0 && J==1) return m01; + if(I==1 && J==1) return m11; + if(I==2 && J==1) return m21; + if(I==0 && J==2) return m02; + if(I==1 && J==2) return m12; + if(I==2 && J==2) return m22; + + return m00; +} + + +__host__ __device__ cumat3 cumat3::operator+(const cumat3 &rhs) +{ + cumat3 ret; + ret.m00 = m00 + rhs.m00; + ret.m10 = m10 + rhs.m10; + ret.m20 = m20 + rhs.m20; + ret.m01 = m01 + rhs.m01; + ret.m11 = m11 + rhs.m11; + ret.m21 = m21 + rhs.m21; + ret.m02 = m02 + rhs.m02; + ret.m12 = m12 + rhs.m12; + ret.m22 = m22 + rhs.m22; + + + return ret; +} + +__host__ __device__ cumat3 cumat3::operator-(const cumat3 &rhs) +{ + cumat3 ret; + ret.m00 = m00 - rhs.m00; + ret.m10 = m10 - rhs.m10; + ret.m20 = m20 - rhs.m20; + ret.m01 = m01 - rhs.m01; + ret.m11 = m11 - rhs.m11; + ret.m21 = m21 - rhs.m21; + ret.m02 = m02 - rhs.m02; + ret.m12 = m12 - rhs.m12; + ret.m22 = m22 - rhs.m22; + return ret; +} + +__host__ __device__ cumat3 cumat3::operator*(const double &rhs) +{ + cumat3 ret; + ret.m00 = m00 * rhs; + ret.m10 = m10 * rhs; + ret.m20 = m20 * rhs; + ret.m01 = m01 * rhs; + ret.m11 = m11 * rhs; + ret.m21 = m21 * rhs; + ret.m02 = m02 * rhs; + ret.m12 = m12 * rhs; + ret.m22 = m22 * rhs; + return ret; +} + +__host__ __device__ cumat3 cumat3::operator/(const double &rhs) +{ + cumat3 ret; + ret.m00 = m00 / rhs; + ret.m10 = m10 / rhs; + ret.m20 = m20 / rhs; + ret.m01 = m01 / rhs; + ret.m11 = m11 / rhs; + ret.m21 = m21 / rhs; + ret.m02 = m02 / rhs; + ret.m12 = m12 / rhs; + ret.m22 = m22 / rhs; + return ret; +} + +__host__ __device__ cuvect3 cumat3::operator*(const cuvect3 &rhs) +{ + cuvect3 ret; + + ret.x = m00*rhs.x + m01*rhs.y + m02*rhs.z; + ret.y = m10*rhs.x + m11*rhs.y + m12*rhs.z; + ret.z = m20*rhs.x + m21*rhs.y + m22*rhs.z; + + return ret; +} + +__host__ __device__ cumat3 cumat3::operator*(const cumat3 &rhs) +{ + cumat3 ret; + + ret.m00 = m00*rhs.m00 + m01*rhs.m10 + m02*rhs.m20; + ret.m01 = m00*rhs.m01 + m01*rhs.m11 + m02*rhs.m21; + ret.m02 = m00*rhs.m02 + m01*rhs.m12 + m02*rhs.m22; + ret.m10 = m10*rhs.m00 + m11*rhs.m10 + m12*rhs.m20; + ret.m11 = m10*rhs.m01 + m11*rhs.m11 + m12*rhs.m21; + ret.m12 = m10*rhs.m02 + m11*rhs.m12 + m12*rhs.m22; + ret.m20 = m20*rhs.m00 + m21*rhs.m10 + m22*rhs.m20; + ret.m21 = m20*rhs.m01 + m21*rhs.m11 + m22*rhs.m21; + ret.m22 = m20*rhs.m02 + m21*rhs.m12 + m22*rhs.m22; + + return ret; +} + +__host__ __device__ double cumat3::det() +{ + double ret = 0.0; + + ret += m00*m11*m22; + ret += m01*m12*m20; + ret += m02*m10*m21; + ret -= m20*m11*m02; + ret -= m21*m12*m00; + ret -= m22*m10*m01; + + return ret; +} + +__host__ __device__ cumat3 cumat3::transpose() +{ + cumat3 ret; + + ret.m00 = m00; + ret.m01 = m10; + ret.m02 = m20; + ret.m10 = m01; + ret.m11 = m11; + ret.m12 = m21; + ret.m20 = m02; + ret.m21 = m12; + ret.m22 = m22; + + return ret; +} + +__host__ __device__ cumat3 cumat3::inverse() +{ + cumat3 q; + double dt = det(); + if(dt!=0.0) + { + q(0,0) = (at(1,1)*at(2,2)-at(1,2)*at(2,1))/dt; + q(1,0) = -(at(1,0)*at(2,2)-at(1,2)*at(2,0))/dt; + q(2,0) = (at(1,0)*at(2,1)-at(1,1)*at(2,0))/dt; + q(0,1) = -(at(0,1)*at(2,2)-at(0,2)*at(2,1))/dt; + q(1,1) = (at(0,0)*at(2,2)-at(0,2)*at(2,0))/dt; + q(2,1) = -(at(0,0)*at(2,1)-at(0,1)*at(2,0))/dt; + q(0,2) = (at(0,1)*at(1,2)-at(0,2)*at(1,1))/dt; + q(1,2) = -(at(0,0)*at(1,2)-at(0,2)*at(1,0))/dt; + q(2,2) = (at(0,0)*at(1,1)-at(0,1)*at(1,0))/dt; + // q(0,0) = (at(1,1)*at(2,2)-at(1,2)*at(2,1))/dt; + // q(0,1) = -(at(1,0)*at(2,2)-at(1,2)*at(2,0))/dt; + // q(0,2) = (at(1,0)*at(2,1)-at(1,1)*at(2,0))/dt; + // q(1,0) = -(at(0,1)*at(2,2)-at(0,2)*at(2,1))/dt; + // q(1,1) = (at(0,0)*at(2,2)-at(0,2)*at(2,0))/dt; + // q(1,2) = -(at(0,0)*at(2,1)-at(0,1)*at(2,0))/dt; + // q(2,0) = (at(0,1)*at(1,2)-at(0,2)*at(1,1))/dt; + // q(2,1) = -(at(0,0)*at(1,2)-at(0,2)*at(1,0))/dt; + // q(2,2) = (at(0,0)*at(1,1)-at(0,1)*at(1,0))/dt; + // q = q.transpose(); + } + else + { + q(0,0) = inf; + q(0,1) = inf; + q(0,2) = inf; + q(1,0) = inf; + q(1,1) = inf; + q(1,2) = inf; + q(2,0) = inf; + q(2,1) = inf; + q(2,2) = inf; + } + + return q; +} + +__host__ __device__ cumat3 operator-(const cumat3 &rhs) +{ + cumat3 ret; + ret.m00 = -rhs.m00; + ret.m10 = -rhs.m10; + ret.m20 = -rhs.m20; + ret.m01 = -rhs.m01; + ret.m11 = -rhs.m11; + ret.m21 = -rhs.m21; + ret.m02 = -rhs.m02; + ret.m12 = -rhs.m12; + ret.m22 = -rhs.m22; + + return ret; +} + +__host__ __device__ cumat3& cumat3::operator+=(const cumat3 &rhs) +{ + m00 += rhs.m00; + m10 += rhs.m10; + m20 += rhs.m20; + m01 += rhs.m01; + m11 += rhs.m11; + m21 += rhs.m21; + m02 += rhs.m02; + m12 += rhs.m12; + m22 += rhs.m22; + + return *this; +} + +__host__ __device__ cumat3& cumat3::operator-=(const cumat3 &rhs) +{ + m00 -= rhs.m00; + m10 -= rhs.m10; + m20 -= rhs.m20; + m01 -= rhs.m01; + m11 -= rhs.m11; + m21 -= rhs.m21; + m02 -= rhs.m02; + m12 -= rhs.m12; + m22 -= rhs.m22; + + return *this; +} + + + +__host__ __device__ cumat3& cumat3::operator/=(const double &rhs) +{ + m00 /= rhs; + m10 /= rhs; + m20 /= rhs; + m01 /= rhs; + m11 /= rhs; + m21 /= rhs; + m02 /= rhs; + m12 /= rhs; + m22 /= rhs; + + return *this; +} + +__host__ __device__ cumat3& cumat3::operator*=(const double &rhs) +{ + m00 *= rhs; + m10 *= rhs; + m20 *= rhs; + m01 *= rhs; + m11 *= rhs; + m21 *= rhs; + m02 *= rhs; + m12 *= rhs; + m22 *= rhs; + + return *this; +} + +__host__ __device__ cumat3& cumat3::operator*=(const cumat3 &rhs) +{ + cumat3 tmp; + + tmp.m00 = m00*rhs.m00 + m01*rhs.m10 + m02*rhs.m20; + tmp.m01 = m00*rhs.m01 + m01*rhs.m11 + m02*rhs.m21; + tmp.m02 = m00*rhs.m02 + m01*rhs.m12 + m02*rhs.m22; + tmp.m10 = m10*rhs.m00 + m11*rhs.m10 + m12*rhs.m20; + tmp.m11 = m10*rhs.m01 + m11*rhs.m11 + m12*rhs.m21; + tmp.m12 = m10*rhs.m02 + m11*rhs.m12 + m12*rhs.m22; + tmp.m20 = m20*rhs.m00 + m21*rhs.m10 + m22*rhs.m20; + tmp.m21 = m20*rhs.m01 + m21*rhs.m11 + m22*rhs.m21; + tmp.m22 = m20*rhs.m02 + m21*rhs.m12 + m22*rhs.m22; + + (*this) = tmp; + + return *this; +} + +__host__ __device__ cumat3::cumat3( + const double & _m00, const double & _m01, const double & _m02, + const double & _m10, const double & _m11, const double & _m12, + const double & _m20, const double & _m21, const double & _m22 +) +{ + m00 = _m00; + m01 = _m01; + m02 = _m02; + m10 = _m10; + m11 = _m11; + m12 = _m12; + m20 = _m20; + m21 = _m21; + m22 = _m22; +} + +__host__ __device__ double* cumat3::data() +{ + //pointer to double[9] representation of matrix + return (double*) this; +} + +__host__ __device__ const double* cumat3::data() const +{ + //pointer to double[9] representation of matrix + return (const double*) this; +} + //transposes a 3x3 (9 element) matrix -__host__ __device__ void mat3_transpose(double *mat3inout) +__host__ __device__ void mat3f_transpose(double *mat3inout) { int I,J; double matint[9]; @@ -148,20 +595,20 @@ __host__ __device__ void mat3_transpose(double *mat3inout) } //copies src to dest -__host__ __device__ void mat3_copy(double *mat3_dest, const double *mat3_src) +__host__ __device__ void mat3f_copy(double *mat3f_dest, const double *mat3f_src) { int I; - if(mat3_dest==NULL || mat3_src==NULL) + if(mat3f_dest==NULL || mat3f_src==NULL) return; for(I=0;I<9;I++) - mat3_dest[I] = mat3_src[I]; + mat3f_dest[I] = mat3f_src[I]; return; } -__host__ __device__ double mat3_det(double *mat3in) +__host__ __device__ double mat3f_det(double *mat3in) { double ret = 0.0; @@ -176,11 +623,11 @@ __host__ __device__ double mat3_det(double *mat3in) } //inverts a 3x3 (9 element) matrix -__host__ __device__ void mat3_inverse(double *mat3inout) +__host__ __device__ void mat3f_inverse(double *mat3inout) { int I; double matint[9]; - double det = mat3_det(mat3inout); + double det = mat3f_det(mat3inout); for(I=0;I<9;I++) { @@ -197,12 +644,12 @@ __host__ __device__ void mat3_inverse(double *mat3inout) mat3inout[2+1*3] = -(matint[0+0*3]*matint[1+2*3]-matint[0+2*3]*matint[1+0*3])/det; mat3inout[2+2*3] = (matint[0+0*3]*matint[1+1*3]-matint[0+1*3]*matint[1+0*3])/det; - mat3_transpose(mat3inout); + mat3f_transpose(mat3inout); return; } -__host__ __device__ cuvect3 mat3_mult(double *mat3in, cuvect3 cvin) +__host__ __device__ cuvect3 mat3f_mult(double *mat3in, const cuvect3 &cvin) { int I,J; cuvect3 ret; @@ -218,7 +665,7 @@ __host__ __device__ cuvect3 mat3_mult(double *mat3in, cuvect3 cvin) return ret; } -__host__ __device__ void mat3_mult(double *matina, double *matinb, double *matout) +__host__ __device__ void mat3f_mult(double *matina, double *matinb, double *matout) { double wrk[9]; int I,J,K; @@ -253,23 +700,23 @@ __host__ __device__ void mat3_mult(double *matina, double *matinb, double *matou return; } -__host__ __device__ void mat3_hodgedual(cuvect3 vecin, double *matout) +__host__ __device__ void mat3f_hodgedual(const cuvect3 &vecin, double *matout) { - matout[0 + 0*3] = 0.0f; + matout[0 + 0*3] = 0.0; matout[1 + 0*3] = -vecin[2]; matout[2 + 0*3] = vecin[1]; matout[0 + 1*3] = vecin[2]; - matout[1 + 1*3] = 0.0f; + matout[1 + 1*3] = 0.0; matout[2 + 1*3] = -vecin[0]; matout[0 + 2*3] = -vecin[1]; matout[1 + 2*3] = vecin[0]; - matout[2 + 2*3] = 0.0f; + matout[2 + 2*3] = 0.0; return; } -__host__ __device__ void mat3_hodgedual(double *matin, cuvect3 vecout) +__host__ __device__ void mat3f_hodgedual(double *matin, cuvect3 &vecout) { vecout[0] = 0.5*(matin[1 + 2*3] - matin[2 + 1*3]); vecout[1] = 0.5*(matin[2 + 0*3] - matin[0 + 2*3]); @@ -279,7 +726,7 @@ __host__ __device__ void mat3_hodgedual(double *matin, cuvect3 vecout) } //returns direction cosine rotation matrix from axis and angle -__host__ __device__ void mat3_rot_from_axisangle(cuvect3 axis, double angle, double *matout) +__host__ __device__ void mat3f_rot_from_axisangle(cuvect3 axis, double angle, double *matout) { int I; double H[9]; @@ -293,12 +740,12 @@ __host__ __device__ void mat3_rot_from_axisangle(cuvect3 axis, double angle, dou axis = cuvect3_normalize(axis); - mat3_hodgedual(axis,H); - mat3_mult(H,H,Hsq); + mat3f_hodgedual(axis,H); + mat3f_mult(H,H,Hsq); for(I=0;I<9;I++) { - matout[I] = (II[I] + Hsq[I]) + H[I]*sin(angle) - Hsq[I]*cos(angle); + matout[I] = (II[I] + Hsq[I]) + H[I]*sinf(angle) - Hsq[I]*cosf(angle); } return; @@ -326,8 +773,8 @@ __host__ void test_cudavect_logic1() } } - mat3_inverse(mb); - mat3_mult(ma,mb,mc); + mat3f_inverse(mb); + mat3f_mult(ma,mb,mc); for(I=0;I<3;I++) { @@ -352,8 +799,8 @@ __host__ void test_cudavect_logic1() } a = cuvect3(1,1,1); - b = mat3_mult(ma,a); - b = mat3_mult(mb,b); + b = mat3f_mult(ma,a); + b = mat3f_mult(mb,b); for(I=0;I<3;I++) { @@ -400,198 +847,66 @@ __host__ void test_cudavect_logic1() return; } - - -__host__ __device__ cumat3::cumat3() -{ - int I; - for(I=0;I<9;I++) - { - dat[I] = 0.0; - } - return; -} - -__host__ __device__ cumat3::~cumat3() -{ - int I; - for(I=0;I<9;I++) - { - dat[I] = 0.0; - } - return; -} - -__host__ __device__ double& cumat3::operator[](const int I) -{ - return dat[I]; -} - -__host__ __device__ double& cumat3::operator()(const int I, const int J) -{ - return dat[I+3*J]; -} - -__host__ __device__ cumat3 cumat3::operator+(cumat3 lhs) -{ - int I; - cumat3 ret; - for(I=0;I<9;I++) - { - ret.dat[I] = this->dat[I] + lhs.dat[I]; - } - return ret; -} - -__host__ __device__ cumat3 cumat3::operator-(cumat3 lhs) -{ - int I; - cumat3 ret; - for(I=0;I<9;I++) - { - ret.dat[I] = this->dat[I] - lhs.dat[I]; - } - return ret; -} - -__host__ __device__ cumat3 cumat3::operator*(double lhs) +__host__ __device__ cumat3 hodge_dual(const cuvect3 &vin) { cumat3 ret; - int I; - for(I=0;I<9;I++) - { - ret.dat[I] = this->dat[I]*lhs; - } - return ret; -} -__host__ __device__ cumat3 cumat3::operator/(double lhs) -{ - cumat3 ret; - int I; - for(I=0;I<9;I++) - { - ret.dat[I] = this->dat[I]/lhs; - } - return ret; -} + ret.m00 = 0.0; + ret.m11 = 0.0; + ret.m22 = 0.0; -__host__ __device__ double& cumat3::at(const int I, const int J) -{ - return dat[I+3*J]; -} - + ret.m01 = vin.z; + ret.m12 = vin.x; + ret.m20 = vin.y; -__host__ __device__ cuvect3 cumat3::operator*(cuvect3 lhs) -{ - cuvect3 ret = cuvect3(0.0,0.0,0.0); - int I,J; - for(I=0;I<3;I++) - { - for(J=0;J<3;J++) - { - ret[I] = ret[I] + this->at(I,J)*lhs[J]; - } - } - return ret; -} - -__host__ __device__ cumat3 cumat3::operator*(cumat3 lhs) -{ - cumat3 ret; - int I,J,K; - - for(I=0;I<3;I++) - { - for(J=0;J<3;J++) - { - ret(I,J) = 0.0; - for(K=0;K<3;K++) - { - ret(I,J) = ret(I,J) + this->at(I,K)*lhs(K,J); - } - } - } + ret.m10 = -vin.z; + ret.m21 = -vin.x; + ret.m02 = -vin.y; return ret; } -__host__ __device__ double cumat3::det() -{ - double ret = 0.0; - - ret = ret + this->at(0,0)*this->at(1,1)*this->at(2,2); - ret = ret + this->at(0,1)*this->at(1,2)*this->at(2,0); - ret = ret + this->at(0,2)*this->at(1,0)*this->at(2,1); - ret = ret - this->at(0,0)*this->at(1,2)*this->at(2,1); - ret = ret - this->at(0,1)*this->at(1,0)*this->at(2,2); - ret = ret - this->at(0,2)*this->at(1,1)*this->at(2,0); - - return ret; -} - -__host__ __device__ cumat3 cumat3::transpose() -{ - cumat3 q; - int I,J; - for(I=0;I<3;I++) - { - for(J=0;J<3;J++) - { - q.at(I,J) = this->at(J,I); - } - } - return q; -} - -__host__ __device__ cumat3 cumat3::inverse() -{ - cumat3 q; - double dt = q.det(); - if(dt!=0.0) - { - q(0,0) = (this->at(1,1)*this->at(2,2)-this->at(1,2)*this->at(2,1))/dt; - q(0,1) = -(this->at(1,0)*this->at(2,2)-this->at(1,2)*this->at(2,0))/dt; - q(0,2) = (this->at(1,0)*this->at(2,1)-this->at(1,1)*this->at(2,0))/dt; - q(1,0) = -(this->at(0,1)*this->at(2,2)-this->at(0,2)*this->at(2,1))/dt; - q(1,1) = (this->at(0,0)*this->at(2,2)-this->at(0,2)*this->at(2,0))/dt; - q(1,2) = -(this->at(0,0)*this->at(2,1)-this->at(0,1)*this->at(2,0))/dt; - q(2,0) = (this->at(0,1)*this->at(1,2)-this->at(0,2)*this->at(1,1))/dt; - q(2,1) = -(this->at(0,0)*this->at(1,2)-this->at(0,2)*this->at(1,0))/dt; - q(2,2) = (this->at(0,0)*this->at(1,1)-this->at(0,1)*this->at(1,0))/dt; - - q = q.transpose(); - } - else - { - q(0,0) = inf; - q(0,1) = inf; - q(0,2) = inf; - q(1,0) = inf; - q(1,1) = inf; - q(1,2) = inf; - q(2,0) = inf; - q(2,1) = inf; - q(2,2) = inf; - } - - return q; -} - -__host__ __device__ cuvect3 operator-(cuvect3 rhs) +__host__ __device__ cuvect3 hodge_dual(const cumat3 &min) { cuvect3 ret; - ret[0] = -rhs[0]; - ret[1] = -rhs[1]; - ret[2] = -rhs[2]; + + ret.x = 0.5f*(min.m12 - min.m21); + ret.y = 0.5f*(min.m20 - min.m02); + ret.z = 0.5f*(min.m01 - min.m10); + return ret; } -__host__ __device__ cumat3 operator-(cumat3 rhs) +__host__ __device__ const cumat3 cumat3_eye() { cumat3 ret; - int I; - for(I=0;I<9;I++) ret[I] = -rhs[I]; + ret.m00 = 1.0f; + ret.m11 = 1.0f; + ret.m22 = 1.0f; + + return ret; +} + +__host__ __device__ const cumat3 cumat3_zeros() +{ + cumat3 ret; + return ret; +} + +__host__ __device__ cumat3 rotmat_from_axisangle(const cuvect3 &axis, const double &angle) +{ + cumat3 ret = cumat3_zeros(); + cumat3 H; + cuvect3 _naxis; + + _naxis = cuvect3_normalize(axis); + H = hodge_dual(_naxis); + + ret += H*sinf(angle); + H = H*H; + ret -= H*cosf(angle); + ret += (H + cumat3_eye()); + return ret; }