diff --git a/build_linux64/libamsculib3.linux64.a b/build_linux64/libamsculib3.linux64.a index 3eb062a..1b5956f 100644 Binary files a/build_linux64/libamsculib3.linux64.a and b/build_linux64/libamsculib3.linux64.a differ diff --git a/build_linux64/objstore/cuvec2.o b/build_linux64/objstore/cuvec2.o new file mode 100644 index 0000000..7c9f8ab Binary files /dev/null and b/build_linux64/objstore/cuvec2.o differ diff --git a/build_linux64/objstore/cuvec3.o b/build_linux64/objstore/cuvec3.o new file mode 100644 index 0000000..5a8a722 Binary files /dev/null and b/build_linux64/objstore/cuvec3.o differ diff --git a/build_linux64/objstore/cuvec4.o b/build_linux64/objstore/cuvec4.o new file mode 100644 index 0000000..fc29a37 Binary files /dev/null and b/build_linux64/objstore/cuvec4.o differ diff --git a/build_linux64/test b/build_linux64/test deleted file mode 100644 index 2475f49..0000000 Binary files a/build_linux64/test and /dev/null differ diff --git a/include/amsculib3/math/amscumath.hpp b/include/amsculib3/math/amscumath.hpp index df934b8..13c8389 100644 --- a/include/amsculib3/math/amscumath.hpp +++ b/include/amsculib3/math/amscumath.hpp @@ -57,14 +57,19 @@ namespace amscuda #include //component headers +#include + #include #include -//#include -//#include -//#include +#include +#include +#include #include #include #include +#include +#include +#include #endif diff --git a/include/amsculib3/math/amscumath_predecl.hpp b/include/amsculib3/math/amscumath_predecl.hpp new file mode 100644 index 0000000..5be700c --- /dev/null +++ b/include/amsculib3/math/amscumath_predecl.hpp @@ -0,0 +1,11 @@ +#ifndef __AMSCUMATH_PREDECL_HPP__ +#define __AMSCUMATH_PREDECL_HPP__ + +namespace amscuda +{ + + +}; //end namespace amscuda + +#endif + diff --git a/include/amsculib3/math/cuvec2.hpp b/include/amsculib3/math/cuvec2.hpp new file mode 100644 index 0000000..fb80be6 --- /dev/null +++ b/include/amsculib3/math/cuvec2.hpp @@ -0,0 +1,104 @@ +#ifndef __CUVEC2_HPP__ +#define __CUVEC2_HPP__ + +namespace amscuda +{ + + class cuvec2 + { + public: + double x; + double y; + + __host__ __device__ cuvec2(); + __host__ __device__ ~cuvec2(); + __host__ __device__ cuvec2(const double &_x, const double &_y); + + __host__ __device__ double& operator[](const int &I); + __host__ __device__ const double& operator[](const int &I) const; + + __host__ __device__ cuvec2 operator+(const cuvec2& rhs) const; + __host__ __device__ cuvec2 operator-(const cuvec2& rhs) const; + __host__ __device__ cuvec2 operator*(const cuvec2& rhs) const; //elementwise product + __host__ __device__ cuvec2 operator/(const cuvec2& rhs) const; //elementwise division + + __host__ __device__ friend cuvec2 operator*(const cuvec2& lhs, const double& rhs); + __host__ __device__ friend cuvec2 operator*(const double& lhs, const cuvec2& rhs); + __host__ __device__ friend cuvec2 operator/(const cuvec2& lhs, const double& rhs); + __host__ __device__ friend cuvec2 operator/(const double& lhs, const cuvec2& rhs); + __host__ __device__ friend cuvec2 operator-(const cuvec2& other); + + __host__ __device__ cuvec2& operator+=(const cuvec2& rhs); + __host__ __device__ cuvec2& operator-=(const cuvec2& rhs); + __host__ __device__ cuvec2& operator*=(const double& rhs); + __host__ __device__ cuvec2& operator/=(const double& rhs); + + + }; + + class cumat2 + { + public: + double m00,m10; + double m01,m11; + + __host__ __device__ cumat2(); + __host__ __device__ ~cumat2(); + __host__ __device__ cumat2( + const double& _m00, const double& _m01, + const double& _m10, const double& _m11 + ); + __host__ __device__ cumat2(const double* data4); + + __host__ __device__ double& operator[](const int &I); + __host__ __device__ const double& operator[](const int &I) const; + __host__ __device__ double& operator()(const int &I, const int &J); + __host__ __device__ const double& operator()(const int &I, const int &J) const; + __host__ __device__ double& at(const int &I, const int &J); + __host__ __device__ const double& at(const int &I, const int &J) const; + + __host__ __device__ double* data(); //pointer to double4 representation of matrix + __host__ __device__ const double* data() const; //pointer to double4 representation of matrix + + //operators + __host__ __device__ cumat2 operator+(const cumat2& rhs) const; + __host__ __device__ cumat2 operator-(const cumat2& rhs) const; + __host__ __device__ cumat2 operator*(const cumat2& rhs) const; + __host__ __device__ friend cumat2 operator*(const cumat2& lhs, const double& rhs); + __host__ __device__ friend cumat2 operator/(const cumat2& lhs, const double& rhs); + __host__ __device__ friend cumat2 operator*(const double& lhs, const cumat2& rhs); + __host__ __device__ friend cuvec2 operator*(const cumat2& lhs, const cuvec2& rhs); + __host__ __device__ friend cuvec2 operator*(const cuvec2& lhs, const cumat2& rhs); + __host__ __device__ friend cumat2 operator-(const cumat2& rhs); + + //in place operators to save register use + __host__ __device__ cumat2& operator+=(const cumat2& rhs); + __host__ __device__ cumat2& operator-=(const cumat2& rhs); + __host__ __device__ cumat2& operator*=(const double& rhs); + __host__ __device__ cumat2& operator/=(const double& rhs); + __host__ __device__ cumat2& operator*=(const cumat2& rhs); + + __host__ __device__ cumat2 transpose() const; + + __host__ __device__ double det(); + __host__ __device__ cumat2 inverse(); + }; + + __host__ __device__ double cuvec2_dot(const cuvec2 &a, const cuvec2 &b); + __host__ __device__ double cuvec2_cross(const cuvec2 &a, const cuvec2 &b); + __host__ __device__ double cuvec2_norm(const cuvec2 &a); + __host__ __device__ cuvec2 cuvec2_normalize(const cuvec2 &a); + __host__ __device__ cuvec2 cuvec2_proj(const cuvec2 &a, const cuvec2 &b); + __host__ __device__ cumat2 cumat2_rot_from_angle(const double &angle); + + + /////////// + // Tests // + /////////// + void test_cuvec2_1(); + + +}; //end namespace amscuda + +#endif + diff --git a/include/amsculib3/math/cuvec2i.hpp b/include/amsculib3/math/cuvec2i.hpp new file mode 100644 index 0000000..172d7e5 --- /dev/null +++ b/include/amsculib3/math/cuvec2i.hpp @@ -0,0 +1,12 @@ +#ifndef __CUVEC2I_HPP__ +#define __CUVEC2I_HPP__ + +namespace amscuda +{ + + +}; //end namespace amscuda + + +#endif + diff --git a/include/amsculib3/math/cuvec3.hpp b/include/amsculib3/math/cuvec3.hpp new file mode 100644 index 0000000..65542af --- /dev/null +++ b/include/amsculib3/math/cuvec3.hpp @@ -0,0 +1,103 @@ +#ifndef __CUVEC3_HPP__ +#define __CUVEC3_HPP__ + +namespace amscuda +{ + + class cuvec3 + { + public: + double x; + double y; + double z; + + __host__ __device__ cuvec3(); + __host__ __device__ ~cuvec3(); + __host__ __device__ cuvec3(const double &_x, const double &_y, const double &_z); + + __host__ __device__ double& operator[](const int &I); + __host__ __device__ const double& operator[](const int &I) const; + + __host__ __device__ cuvec3 operator+(const cuvec3& rhs) const; + __host__ __device__ cuvec3 operator-(const cuvec3& rhs) const; + __host__ __device__ cuvec3 operator*(const cuvec3& rhs) const; //elementwise product + __host__ __device__ cuvec3 operator/(const cuvec3& rhs) const; //elementwise division + + __host__ __device__ friend cuvec3 operator*(const cuvec3& lhs, const double& rhs); + __host__ __device__ friend cuvec3 operator*(const double& lhs, const cuvec3& rhs); + __host__ __device__ friend cuvec3 operator/(const cuvec3& lhs, const double& rhs); + __host__ __device__ friend cuvec3 operator/(const double& lhs, const cuvec3& rhs); + __host__ __device__ friend cuvec3 operator-(const cuvec3& other); + + __host__ __device__ cuvec3& operator+=(const cuvec3& rhs); + __host__ __device__ cuvec3& operator-=(const cuvec3& rhs); + __host__ __device__ cuvec3& operator*=(const double& rhs); + __host__ __device__ cuvec3& operator/=(const double& rhs); + }; + + class cumat3 + { + public: + double m00,m10,m20; + double m01,m11,m21; + double m02,m12,m22; + + __host__ __device__ cumat3(); + __host__ __device__ ~cumat3(); + __host__ __device__ cumat3( + const double& _m00, const double& _m01, const double& _m02, + const double& _m10, const double& _m11, const double& _m12, + const double& _m20, const double& _m21, const double& _m22 + ); + __host__ __device__ cumat3(const double* data9); + + __host__ __device__ double& operator[](const int &I); + __host__ __device__ const double& operator[](const int &I) const; + __host__ __device__ double& operator()(const int &I, const int &J); + __host__ __device__ const double& operator()(const int &I, const int &J) const; + __host__ __device__ double& at(const int &I, const int &J); + __host__ __device__ const double& at(const int &I, const int &J) const; + __host__ __device__ double* data(); //pointer to double9 representation of matrix + __host__ __device__ const double* data() const; //pointer to double9 representation of matrix + + __host__ __device__ cumat3 operator+(const cumat3& rhs) const; + __host__ __device__ cumat3 operator-(const cumat3& rhs) const; + __host__ __device__ cumat3 operator*(const cumat3& rhs) const; + + __host__ __device__ friend cumat3 operator*(const cumat3& lhs, const double& rhs); + __host__ __device__ friend cumat3 operator/(const cumat3& lhs, const double& rhs); + __host__ __device__ friend cumat3 operator*(const double& lhs, const cumat3& rhs); + __host__ __device__ friend cuvec3 operator*(const cumat3& lhs, const cuvec3& rhs); + __host__ __device__ friend cuvec3 operator*(const cuvec3& lhs, const cumat3& rhs); + __host__ __device__ friend cumat3 operator-(const cumat3& rhs); + + __host__ __device__ cumat3& operator+=(const cumat3& rhs); + __host__ __device__ cumat3& operator-=(const cumat3& rhs); + __host__ __device__ cumat3& operator*=(const double& rhs); + __host__ __device__ cumat3& operator/=(const double& rhs); + __host__ __device__ cumat3& operator*=(const cumat3& rhs); + + __host__ __device__ cumat3 transpose() const; + + __host__ __device__ double det(); + __host__ __device__ cumat3 inverse(); + }; + + __host__ __device__ double cuvec3_dot(const cuvec3 &a,const cuvec3 &b); + __host__ __device__ cuvec3 cuvec3_cross(const cuvec3 &a, const cuvec3 &b); + __host__ __device__ double cuvec3_norm(const cuvec3 &a); + __host__ __device__ cuvec3 cuvec3_normalize(const cuvec3 &a); + __host__ __device__ cuvec3 cuvec3_proj(const cuvec3 &a, const cuvec3 &b); + + __host__ __device__ cumat3 hodge_dual(const cuvec3 &vin); + __host__ __device__ cuvec3 hodge_dual(const cumat3 &min); + __host__ __device__ cumat3 rotmat_from_axisangle(const cuvec3 &axis, const double &angle); + + __host__ void test_cudavectf_logic1(); + + +}; //end namespace amscuda + + +#endif + diff --git a/include/amsculib3/math/cuvec3i.hpp b/include/amsculib3/math/cuvec3i.hpp new file mode 100644 index 0000000..90f6c1c --- /dev/null +++ b/include/amsculib3/math/cuvec3i.hpp @@ -0,0 +1,12 @@ +#ifndef __CUVEC3I_HPP__ +#define __CUVEC3I_HPP__ + +namespace amscuda +{ + + +}; //end namespace amscuda + + +#endif + diff --git a/include/amsculib3/math/cuvec4.hpp b/include/amsculib3/math/cuvec4.hpp new file mode 100644 index 0000000..e17065f --- /dev/null +++ b/include/amsculib3/math/cuvec4.hpp @@ -0,0 +1,103 @@ +#ifndef __CUVEC4_HPP__ +#define __CUVEC4_HPP__ + +namespace amscuda +{ + + class cuvec4 + { + public: + double x; + double y; + double z; + double w; + + __host__ __device__ cuvec4(); + __host__ __device__ ~cuvec4(); + __host__ __device__ cuvec4(const double &_x, const double &_y, const double &_z, const double &_w); + + __host__ __device__ double& operator[](const int &I); + __host__ __device__ const double& operator[](const int &I) const; + + __host__ __device__ cuvec4 operator+(const cuvec4& rhs) const; + __host__ __device__ cuvec4 operator-(const cuvec4& rhs) const; + __host__ __device__ cuvec4 operator*(const cuvec4& rhs) const; //elementwise product + __host__ __device__ cuvec4 operator/(const cuvec4& rhs) const; //elementwise division + + __host__ __device__ friend cuvec4 operator*(const cuvec4& lhs, const double& rhs); + __host__ __device__ friend cuvec4 operator*(const double& lhs, const cuvec4& rhs); + __host__ __device__ friend cuvec4 operator/(const cuvec4& lhs, const double& rhs); + __host__ __device__ friend cuvec4 operator/(const double& lhs, const cuvec4& rhs); + __host__ __device__ friend cuvec4 operator-(const cuvec4& other); + + __host__ __device__ cuvec4& operator+=(const cuvec4& rhs); + __host__ __device__ cuvec4& operator-=(const cuvec4& rhs); + __host__ __device__ cuvec4& operator*=(const double& rhs); + __host__ __device__ cuvec4& operator/=(const double& rhs); + }; + + class cumat4 + { + public: + //double dat[16]; + //__forceinline__ + + + double m00,m10,m20,m30; + double m01,m11,m21,m31; + double m02,m12,m22,m32; + double m03,m13,m23,m33; + + __host__ __device__ cumat4(); + __host__ __device__ ~cumat4(); + __host__ __device__ cumat4( + const double& _m00, const double& _m01, const double& _m02, const double& _m03, + const double& _m10, const double& _m11, const double& _m12, const double& _m13, + const double& _m20, const double& _m21, const double& _m22, const double& _m23, + const double& _m30, const double& _m31, const double& _m32, const double& _m33 + ); + __host__ __device__ cumat4(const double* data16); + + __host__ __device__ double& operator[](const int &I); + __host__ __device__ const double& operator[](const int &I) const; + __host__ __device__ double& operator()(const int &I, const int &J); + __host__ __device__ const double& operator()(const int &I, const int &J) const; + __host__ __device__ double& at(const int &I, const int &J); + __host__ __device__ const double& at(const int &I, const int &J) const; + + __host__ __device__ double* data(); //pointer to double16 representation of matrix + __host__ __device__ const double* data() const; //pointer to double16 representation of matrix + + __host__ __device__ cumat4 operator+(const cumat4& rhs) const; + __host__ __device__ cumat4 operator-(const cumat4& rhs) const; + __host__ __device__ cumat4 operator*(const cumat4& rhs) const; + + __host__ __device__ friend cumat4 operator*(const cumat4& lhs, const double& rhs); + __host__ __device__ friend cumat4 operator/(const cumat4& lhs, const double& rhs); + __host__ __device__ friend cumat4 operator*(const double& lhs, const cumat4& rhs); + __host__ __device__ friend cuvec4 operator*(const cumat4& lhs, const cuvec4& rhs); + __host__ __device__ friend cuvec4 operator*(const cuvec4& lhs, const cumat4& rhs); + __host__ __device__ friend cumat4 operator-(const cumat4& rhs); + + __host__ __device__ cumat4& operator+=(const cumat4& rhs); + __host__ __device__ cumat4& operator-=(const cumat4& rhs); + __host__ __device__ cumat4& operator*=(const double& rhs); + __host__ __device__ cumat4& operator/=(const double& rhs); + __host__ __device__ cumat4& operator*=(const cumat4& rhs); + + __host__ __device__ cumat4 transpose() const; + + __host__ __device__ double det(); + __host__ __device__ cumat4 inverse(); + }; + + __host__ __device__ double cuvec4_dot(cuvec4 &a, cuvec4 &b); + __host__ __device__ double cuvec4_norm(cuvec4 &a); + __host__ __device__ cuvec4 cuvec4_normalize(cuvec4 &a); + __host__ __device__ cuvec4 cuvec4_proj(cuvec4 &a, cuvec4 &b); + +}; //end namespace amscuda + + +#endif + diff --git a/include/amsculib3/math/cuvec4i.hpp b/include/amsculib3/math/cuvec4i.hpp new file mode 100644 index 0000000..ae44b21 --- /dev/null +++ b/include/amsculib3/math/cuvec4i.hpp @@ -0,0 +1,11 @@ +#ifndef __CUVEC4I_HPP__ +#define __CUVEC4I_HPP__ + +namespace amscuda +{ + + +}; //end namespace amscuda + +#endif + diff --git a/src/amsculib3/math/cuvec2.cu b/src/amsculib3/math/cuvec2.cu new file mode 100644 index 0000000..f87ca9e --- /dev/null +++ b/src/amsculib3/math/cuvec2.cu @@ -0,0 +1,518 @@ +#include + +namespace amscuda +{ + + __host__ __device__ cuvec2::cuvec2() + { + x = 0; y = 0; + return; + } + + __host__ __device__ cuvec2::~cuvec2() + { + x = 0; y = 0; + return; + } + + __host__ __device__ cuvec2::cuvec2(const double &_x, const double &_y) + { + x = _x; y = _y; + return; + } + + __host__ __device__ double& cuvec2::operator[](const int &I) + { + switch(I) + { + case 0: + return x; + case 1: + return y; + } + + return x; + } + + __host__ __device__ const double& cuvec2::operator[](const int &I) const + { + switch(I) + { + case 0: + return x; + case 1: + return y; + } + + return x; + } + + __host__ __device__ cuvec2 cuvec2::operator+(const cuvec2& rhs) const + { + cuvec2 ret; + ret.x = x + rhs.x; + ret.y = y + rhs.y; + return ret; + } + + __host__ __device__ cuvec2 cuvec2::operator-(const cuvec2& rhs) const + { + cuvec2 ret; + ret.x = x - rhs.x; + ret.y = y - rhs.y; + return ret; + } + + __host__ __device__ cuvec2 cuvec2::operator*(const cuvec2& rhs) const + { + //Elementwise product + cuvec2 ret; + ret.x = x * rhs.x; + ret.y = y * rhs.y; + return ret; + } + + __host__ __device__ cuvec2 cuvec2::operator/(const cuvec2& rhs) const + { + //Elementwise division + cuvec2 ret; + ret.x = x / rhs.x; + ret.y = y / rhs.y; + return ret; + } + + __host__ __device__ cuvec2 operator*(const cuvec2& lhs, const double& rhs) + { + cuvec2 ret; + ret.x = lhs.x*rhs; + ret.y = lhs.y*rhs; + return ret; + } + + __host__ __device__ cuvec2 operator*(const double& lhs, const cuvec2& rhs) + { + cuvec2 ret; + ret.x = lhs*rhs.x; + ret.y = lhs*rhs.y; + return ret; + } + + __host__ __device__ cuvec2 operator/(const cuvec2& lhs, const double& rhs) + { + cuvec2 ret; + ret.x = lhs.x/rhs; + ret.y = lhs.y/rhs; + return ret; + } + + __host__ __device__ cuvec2 operator/(const double& lhs, const cuvec2& rhs) + { + cuvec2 ret; + ret.x = lhs/rhs.x; + ret.y = lhs/rhs.y; + return ret; + } + + __host__ __device__ cuvec2 operator-(const cuvec2& other) + { + cuvec2 ret; + ret.x = -other.x; + ret.y = -other.y; + return ret; + } + + __host__ __device__ cuvec2& cuvec2::operator+=(const cuvec2& rhs) + { + x += rhs.x; + y += rhs.y; + return *this; + } + + __host__ __device__ cuvec2& cuvec2::operator-=(const cuvec2& rhs) + { + x -= rhs.x; + y -= rhs.y; + return *this; + } + + __host__ __device__ cuvec2& cuvec2::operator*=(const double& rhs) + { + x *= rhs; + y *= rhs; + return *this; + } + + __host__ __device__ cuvec2& cuvec2::operator/=(const double& rhs) + { + x /= rhs; + y /= rhs; + return *this; + } + + //////////////// + //Matrix Class// + //////////////// + + __host__ __device__ cumat2::cumat2() + { + m00 = 0; + m01 = 0; + + m10 = 0; + m11 = 0; + + return; + } + + __host__ __device__ cumat2::~cumat2() + { + //m00 = 0; + //m01 = 0; + + //m10 = 0; + //m11 = 0; + + return; + } + + __host__ __device__ cumat2::cumat2( + const double& _m00, const double& _m01, + const double& _m10, const double& _m11 + ) + { + m00 = _m00; + m10 = _m10; + + m01 = _m01; + m11 = _m11; + + + return; + } + + __host__ __device__ cumat2::cumat2(const double* data4) + { + m00 = data4[0]; + m10 = data4[1]; + + m01 = data4[2]; + m11 = data4[3]; + + + return; + } + + __host__ __device__ double& cumat2::operator[](const int &I) + { + switch(I) + { + case 0: + return m00; + case 1: + return m10; + case 2: + return m01; + case 3: + return m11; + } + + return m00; + } + + __host__ __device__ const double& cumat2::operator[](const int &I) const + { + switch(I) + { + case 0: + return m00; + case 1: + return m10; + case 2: + return m01; + case 3: + return m11; + } + + return m00; + } + + __host__ __device__ double& cumat2::operator()(const int &I, const int &J) + { + return (*this)[I+2*J]; + } + + __host__ __device__ const double& cumat2::operator()(const int &I, const int &J) const + { + return (*this)[I+2*J]; + } + + __host__ __device__ double& cumat2::at(const int &I, const int &J) + { + return (*this)[I+2*J]; + } + + __host__ __device__ const double& cumat2::at(const int &I, const int &J) const + { + return (*this)[I+2*J]; + } + + __host__ __device__ double* cumat2::data() + { + return (double*)this; + } + + __host__ __device__ const double* cumat2::data() const + { + return (double*)this; + } + + __host__ __device__ cumat2 cumat2::operator+(const cumat2& rhs) const + { + cumat2 ret; + ret.m00 = m00 + rhs.m00; + ret.m10 = m10 + rhs.m10; + + ret.m01 = m01 + rhs.m01; + ret.m11 = m11 + rhs.m11; + + return ret; + } + + __host__ __device__ cumat2 cumat2::operator-(const cumat2& rhs) const + { + cumat2 ret; + ret.m00 = m00 - rhs.m00; + ret.m10 = m10 - rhs.m10; + + ret.m01 = m01 - rhs.m01; + ret.m11 = m11 - rhs.m11; + + return ret; + } + + __host__ __device__ cumat2 cumat2::operator*(const cumat2& rhs) const + { + cumat2 ret; //should be zeroed in constructor + ret.m00 = m00*rhs.m00 + m01*rhs.m10; + ret.m01 = m00*rhs.m01 + m01*rhs.m11; + + ret.m10 = m10*rhs.m00 + m11*rhs.m10; + ret.m11 = m10*rhs.m01 + m11*rhs.m11; + + return ret; + } + + __host__ __device__ cumat2 operator*(const cumat2& lhs, const double& rhs) + { + cumat2 ret; + ret.m00=lhs.m00*rhs; + ret.m10=lhs.m10*rhs; + ret.m01=lhs.m01*rhs; + ret.m11=lhs.m11*rhs; + return ret; + } + + __host__ __device__ cumat2 operator/(const cumat2& lhs, const double& rhs) + { + cumat2 ret; + ret.m00=lhs.m00/rhs; + ret.m10=lhs.m10/rhs; + ret.m01=lhs.m01/rhs; + ret.m11=lhs.m11/rhs; + return ret; + } + + __host__ __device__ cumat2 operator*(const double& lhs, const cumat2& rhs) + { + cumat2 ret; + ret.m00=lhs*rhs.m00; + ret.m10=lhs*rhs.m10; + ret.m01=lhs*rhs.m01; + ret.m11=lhs*rhs.m11; + return ret; + } + + __host__ __device__ cuvec2 operator*(const cumat2& lhs, const cuvec2& rhs) + { + cuvec2 ret; + ret.x = lhs.m00*rhs.x + lhs.m01*rhs.y; + ret.y = lhs.m10*rhs.x + lhs.m11*rhs.y; + return ret; + } + + __host__ __device__ cuvec2 operator*(const cuvec2& lhs, const cumat2& rhs) + { + cuvec2 ret; + ret.x = lhs.x*rhs.m00 + lhs.y*rhs.m10; + ret.y = lhs.x*rhs.m01 + lhs.y*rhs.m11; + return ret; + } + + __host__ __device__ cumat2 operator-(const cumat2& rhs) + { + cumat2 ret; + ret.m00 = -rhs.m00; + ret.m10 = -rhs.m10; + ret.m01 = -rhs.m01; + ret.m11 = -rhs.m11; + return ret; + } + + __host__ __device__ cumat2& cumat2::operator+=(const cumat2& rhs) + { + m00 += rhs.m00; + m10 += rhs.m10; + m01 += rhs.m01; + m11 += rhs.m11; + return *this; + } + + __host__ __device__ cumat2& cumat2::operator-=(const cumat2& rhs) + { + m00 -= rhs.m00; + m10 -= rhs.m10; + m01 -= rhs.m01; + m11 -= rhs.m11; + return *this; + } + + __host__ __device__ cumat2& cumat2::operator*=(const double& rhs) + { + m00 *= rhs; + m10 *= rhs; + m01 *= rhs; + m11 *= rhs; + return *this; + } + + __host__ __device__ cumat2& cumat2::operator/=(const double& rhs) + { + m00 /= rhs; + m10 /= rhs; + m01 /= rhs; + m11 /= rhs; + return *this; + } + + __host__ __device__ cumat2& cumat2::operator*=(const cumat2& rhs) + { + cumat2 tmp = *this; + m00 = tmp.m00*rhs.m00 + tmp.m01*rhs.m10; + m01 = tmp.m00*rhs.m01 + tmp.m01*rhs.m11; + m10 = tmp.m10*rhs.m00 + tmp.m11*rhs.m10; + m11 = tmp.m10*rhs.m01 + tmp.m11*rhs.m11; + return *this; + } + + __host__ __device__ cumat2 cumat2::transpose() const + { + cumat2 ret; + ret.m00 = m00; + ret.m10 = m01; + ret.m01 = m10; + ret.m11 = m11; + return ret; + } + + /////////////////// + //Det and Inverse// + /////////////////// + + __host__ __device__ double cumat2::det() + { + double ret = 0; + + ret += m00*m11; + ret -= m01*m10; + + return ret; + } + + __host__ __device__ cumat2 cumat2::inverse() + { + cumat2 q; + double dt = det(); + if(dt!=0) + { + q(0,0) = m11/dt; + q(0,1) = -m01/dt; + q(1,0) = -m10/dt; + q(1,1) = m00/dt; + + } + else + { + q(0,0) = inf; + q(0,1) = inf; + q(1,0) = inf; + q(1,1) = inf; + } + + return q; + } + + //////////////////////// + //Non member functions// + //////////////////////// + + __host__ __device__ cumat2 cumat2_rot_from_angle(const double &angle) + { + cumat2 R; + R(0,0) = ::cosf(angle); + R(1,0) = ::sinf(angle); + R(0,1) = -::sinf(angle); + R(1,1) = ::cosf(angle); + return R; + } + + __host__ __device__ double cuvec2_dot(const cuvec2 &a, const cuvec2 &b) + { + double ret = a.x*b.x+a.y*b.y; + return ret; + } + + __host__ __device__ double cuvec2_cross(const cuvec2 &a, const cuvec2 &b) + { + double ret = a.x*b.y-a.y*b.x; + return ret; + } + + __host__ __device__ double cuvec2_norm(const cuvec2 &a) + { + double ret = ::sqrtf(a.x*a.x+a.y*a.y); + return ret; + } + + __host__ __device__ cuvec2 cuvec2_normalize(const cuvec2 &a) + { + cuvec2 ret; + double m = cuvec2_norm(a); + if(m>0) + { + ret.x = a.x/m; ret.y = a.y/m; + } + else + { + ret.x = 0; ret.y = 0; + } + return ret; + } + + __host__ __device__ cuvec2 cuvec2_proj(const cuvec2 &a, const cuvec2 &b) + { + cuvec2 ret; + cuvec2 bn = cuvec2_normalize(b); + double m = cuvec2_dot(a,bn); + ret = bn*m; + return ret; + } + + +void test_cuvec2_1() +{ + + + return; +} + +}; \ No newline at end of file diff --git a/src/amsculib3/math/cuvec3.cu b/src/amsculib3/math/cuvec3.cu new file mode 100644 index 0000000..1443c81 --- /dev/null +++ b/src/amsculib3/math/cuvec3.cu @@ -0,0 +1,746 @@ +#include + +namespace amscuda +{ + +////////////////// +// Vector Class // +////////////////// + +__host__ __device__ cuvec3::cuvec3() +{ + x = 0; y = 0; z = 0; + return; +} + +__host__ __device__ cuvec3::~cuvec3() +{ + x = 0; y = 0; z = 0; + return; +} + +__host__ __device__ cuvec3::cuvec3(const double &_x, const double &_y, const double &_z) +{ + x = _x; y = _y; z = _z; + return; +} + +__host__ __device__ double& cuvec3::operator[](const int &I) +{ + switch(I) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + } + + return x; +} + +__host__ __device__ const double& cuvec3::operator[](const int &I) const +{ + switch(I) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + } + + return x; +} + +__host__ __device__ cuvec3 cuvec3::operator+(const cuvec3& rhs) const +{ + cuvec3 ret; + ret.x = x + rhs.x; + ret.y = y + rhs.y; + ret.z = z + rhs.z; + return ret; +} + +__host__ __device__ cuvec3 cuvec3::operator-(const cuvec3& rhs) const +{ + cuvec3 ret; + ret.x = x - rhs.x; + ret.y = y - rhs.y; + ret.z = z - rhs.z; + return ret; +} + +__host__ __device__ cuvec3 cuvec3::operator*(const cuvec3& rhs) const +{ + //Elementwise product + cuvec3 ret; + ret.x = x * rhs.x; + ret.y = y * rhs.y; + ret.z = z * rhs.z; + return ret; +} + +__host__ __device__ cuvec3 cuvec3::operator/(const cuvec3& rhs) const +{ + //Elementwise division + cuvec3 ret; + ret.x = x / rhs.x; + ret.y = y / rhs.y; + ret.z = z / rhs.z; + return ret; +} + +__host__ __device__ cuvec3 operator*(const cuvec3& lhs, const double& rhs) +{ + cuvec3 ret; + ret.x = lhs.x*rhs; + ret.y = lhs.y*rhs; + ret.z = lhs.z*rhs; + return ret; +} + +__host__ __device__ cuvec3 operator*(const double& lhs, const cuvec3& rhs) +{ + cuvec3 ret; + ret.x = lhs*rhs.x; + ret.y = lhs*rhs.y; + ret.z = lhs*rhs.z; + return ret; +} + +__host__ __device__ cuvec3 operator/(const cuvec3& lhs, const double& rhs) +{ + cuvec3 ret; + ret.x = lhs.x/rhs; + ret.y = lhs.y/rhs; + ret.z = lhs.z/rhs; + return ret; +} + +__host__ __device__ cuvec3 operator/(const double& lhs, const cuvec3& rhs) +{ + cuvec3 ret; + ret.x = lhs/rhs.x; + ret.y = lhs/rhs.y; + ret.z = lhs/rhs.z; + return ret; +} + +__host__ __device__ cuvec3 operator-(const cuvec3& other) +{ + cuvec3 ret; + ret.x = -other.x; + ret.y = -other.y; + ret.z = -other.z; + return ret; +} + +__host__ __device__ cuvec3& cuvec3::operator+=(const cuvec3& rhs) +{ + x += rhs.x; + y += rhs.y; + z += rhs.z; + return *this; +} + +__host__ __device__ cuvec3& cuvec3::operator-=(const cuvec3& rhs) +{ + x -= rhs.x; + y -= rhs.y; + z -= rhs.z; + return *this; +} + +__host__ __device__ cuvec3& cuvec3::operator*=(const double& rhs) +{ + x *= rhs; + y *= rhs; + z *= rhs; + return *this; +} + +__host__ __device__ cuvec3& cuvec3::operator/=(const double& rhs) +{ + x /= rhs; + y /= rhs; + z /= rhs; + return *this; +} + + +////////////////// +// Matrix Class // +////////////////// + +__host__ __device__ cumat3::cumat3() +{ + m00 = 0; + m01 = 0; + m02 = 0; + + m10 = 0; + m11 = 0; + m12 = 0; + + m20 = 0; + m21 = 0; + m22 = 0; + + return; +} + +__host__ __device__ cumat3::~cumat3() +{ + //m00 = 0; + //m01 = 0; + //m02 = 0; + + //m10 = 0; + //m11 = 0; + //m12 = 0; + + //m20 = 0; + //m21 = 0; + //m22 = 0; + + return; +} + +__host__ __device__ cumat3::cumat3( + const double& _m00, const double& _m01, const double& _m02, + const double& _m10, const double& _m11, const double& _m12, + const double& _m20, const double& _m21, const double& _m22 +) +{ + m00 = _m00; + m10 = _m10; + m20 = _m20; + + m01 = _m01; + m11 = _m11; + m21 = _m21; + + m02 = _m02; + m12 = _m12; + m22 = _m22; + + + return; +} + +__host__ __device__ cumat3::cumat3(const double* data9) +{ + m00 = data9[0]; + m10 = data9[1]; + m20 = data9[2]; + + m01 = data9[3]; + m11 = data9[4]; + m21 = data9[5]; + + m02 = data9[6]; + m12 = data9[7]; + m22 = data9[8]; + + + return; +} + +__host__ __device__ double& cumat3::operator[](const int &I) +{ + switch(I) + { + case 0: + return m00; + case 1: + return m10; + case 2: + return m20; + case 3: + return m01; + case 4: + return m11; + case 5: + return m21; + case 6: + return m02; + case 7: + return m12; + case 8: + return m22; + } + + return m00; +} + +__host__ __device__ const double& cumat3::operator[](const int &I) const +{ + switch(I) + { + case 0: + return m00; + case 1: + return m10; + case 2: + return m20; + case 3: + return m01; + case 4: + return m11; + case 5: + return m21; + case 6: + return m02; + case 7: + return m12; + case 8: + return m22; + } + + return m00; +} + +__host__ __device__ double& cumat3::operator()(const int &I, const int &J) +{ + return (*this)[I+3*J]; +} + +__host__ __device__ const double& cumat3::operator()(const int &I, const int &J) const +{ + return (*this)[I+3*J]; +} + +__host__ __device__ double& cumat3::at(const int &I, const int &J) +{ + return (*this)[I+3*J]; +} + +__host__ __device__ const double& cumat3::at(const int &I, const int &J) const +{ + return (*this)[I+3*J]; +} + +__host__ __device__ double* cumat3::data() +{ +return (double*)this; +} + +__host__ __device__ const double* cumat3::data() const +{ +return (double*)this; +} + +__host__ __device__ cumat3 cumat3::operator+(const cumat3& rhs) const +{ +cumat3 ret; +ret.m00 = m00 + rhs.m00; +ret.m10 = m10 + rhs.m10; +ret.m20 = m20 + rhs.m20; + +ret.m01 = m01 + rhs.m01; +ret.m11 = m11 + rhs.m11; +ret.m21 = m21 + rhs.m21; + +ret.m02 = m02 + rhs.m02; +ret.m12 = m12 + rhs.m12; +ret.m22 = m22 + rhs.m22; + +return ret; +} + +__host__ __device__ cumat3 cumat3::operator-(const cumat3& rhs) const +{ +cumat3 ret; +ret.m00 = m00 - rhs.m00; +ret.m10 = m10 - rhs.m10; +ret.m20 = m20 - rhs.m20; + +ret.m01 = m01 - rhs.m01; +ret.m11 = m11 - rhs.m11; +ret.m21 = m21 - rhs.m21; + +ret.m02 = m02 - rhs.m02; +ret.m12 = m12 - rhs.m12; +ret.m22 = m22 - rhs.m22; + +return ret; +} + +__host__ __device__ cumat3 cumat3::operator*(const cumat3& rhs) const +{ +cumat3 ret; //should be zeroed in constructor +ret.m00 = m00*rhs.m00 + m01*rhs.m10 + m02*rhs.m20; +ret.m01 = m00*rhs.m01 + m01*rhs.m11 + m02*rhs.m21; +ret.m02 = m00*rhs.m02 + m01*rhs.m12 + m02*rhs.m22; + +ret.m10 = m10*rhs.m00 + m11*rhs.m10 + m12*rhs.m20; +ret.m11 = m10*rhs.m01 + m11*rhs.m11 + m12*rhs.m21; +ret.m12 = m10*rhs.m02 + m11*rhs.m12 + m12*rhs.m22; + +ret.m20 = m20*rhs.m00 + m21*rhs.m10 + m22*rhs.m20; +ret.m21 = m20*rhs.m01 + m21*rhs.m11 + m22*rhs.m21; +ret.m22 = m20*rhs.m02 + m21*rhs.m12 + m22*rhs.m22; + +return ret; +} + +__host__ __device__ cumat3 operator*(const cumat3& lhs, const double& rhs) +{ +cumat3 ret; +ret.m00=lhs.m00*rhs; +ret.m10=lhs.m10*rhs; +ret.m20=lhs.m20*rhs; +ret.m01=lhs.m01*rhs; +ret.m11=lhs.m11*rhs; +ret.m21=lhs.m21*rhs; +ret.m02=lhs.m02*rhs; +ret.m12=lhs.m12*rhs; +ret.m22=lhs.m22*rhs; +return ret; +} + +__host__ __device__ cumat3 operator/(const cumat3& lhs, const double& rhs) +{ +cumat3 ret; +ret.m00=lhs.m00/rhs; +ret.m10=lhs.m10/rhs; +ret.m20=lhs.m20/rhs; +ret.m01=lhs.m01/rhs; +ret.m11=lhs.m11/rhs; +ret.m21=lhs.m21/rhs; +ret.m02=lhs.m02/rhs; +ret.m12=lhs.m12/rhs; +ret.m22=lhs.m22/rhs; +return ret; +} + +__host__ __device__ cumat3 operator*(const double& lhs, const cumat3& rhs) +{ +cumat3 ret; +ret.m00=lhs*rhs.m00; +ret.m10=lhs*rhs.m10; +ret.m20=lhs*rhs.m20; +ret.m01=lhs*rhs.m01; +ret.m11=lhs*rhs.m11; +ret.m21=lhs*rhs.m21; +ret.m02=lhs*rhs.m02; +ret.m12=lhs*rhs.m12; +ret.m22=lhs*rhs.m22; +return ret; +} + +__host__ __device__ cuvec3 operator*(const cumat3& lhs, const cuvec3& rhs) +{ +cuvec3 ret; +ret.x = lhs.m00*rhs.x + lhs.m01*rhs.y + lhs.m02*rhs.z; +ret.y = lhs.m10*rhs.x + lhs.m11*rhs.y + lhs.m12*rhs.z; +ret.z = lhs.m20*rhs.x + lhs.m21*rhs.y + lhs.m22*rhs.z; +return ret; +} + +__host__ __device__ cuvec3 operator*(const cuvec3& lhs, const cumat3& rhs) +{ +cuvec3 ret; +ret.x = lhs.x*rhs.m00 + lhs.y*rhs.m10 + lhs.z*rhs.m20; +ret.y = lhs.x*rhs.m01 + lhs.y*rhs.m11 + lhs.z*rhs.m21; +ret.z = lhs.x*rhs.m02 + lhs.y*rhs.m12 + lhs.z*rhs.m22; +return ret; +} + +__host__ __device__ cumat3 operator-(const cumat3& rhs) +{ +cumat3 ret; +ret.m00 = -rhs.m00; +ret.m10 = -rhs.m10; +ret.m20 = -rhs.m20; +ret.m01 = -rhs.m01; +ret.m11 = -rhs.m11; +ret.m21 = -rhs.m21; +ret.m02 = -rhs.m02; +ret.m12 = -rhs.m12; +ret.m22 = -rhs.m22; +return ret; +} + +__host__ __device__ cumat3& cumat3::operator+=(const cumat3& rhs) +{ +m00 += rhs.m00; +m10 += rhs.m10; +m20 += rhs.m20; +m01 += rhs.m01; +m11 += rhs.m11; +m21 += rhs.m21; +m02 += rhs.m02; +m12 += rhs.m12; +m22 += rhs.m22; +return *this; +} + +__host__ __device__ cumat3& cumat3::operator-=(const cumat3& rhs) +{ +m00 -= rhs.m00; +m10 -= rhs.m10; +m20 -= rhs.m20; +m01 -= rhs.m01; +m11 -= rhs.m11; +m21 -= rhs.m21; +m02 -= rhs.m02; +m12 -= rhs.m12; +m22 -= rhs.m22; +return *this; +} + +__host__ __device__ cumat3& cumat3::operator*=(const double& rhs) +{ +m00 *= rhs; +m10 *= rhs; +m20 *= rhs; +m01 *= rhs; +m11 *= rhs; +m21 *= rhs; +m02 *= rhs; +m12 *= rhs; +m22 *= rhs; +return *this; +} + +__host__ __device__ cumat3& cumat3::operator/=(const double& rhs) +{ +m00 /= rhs; +m10 /= rhs; +m20 /= rhs; +m01 /= rhs; +m11 /= rhs; +m21 /= rhs; +m02 /= rhs; +m12 /= rhs; +m22 /= rhs; +return *this; +} + +__host__ __device__ cumat3& cumat3::operator*=(const cumat3& rhs) +{ +cumat3 tmp = *this; +m00 = tmp.m00*rhs.m00 + tmp.m01*rhs.m10 + tmp.m02*rhs.m20; +m01 = tmp.m00*rhs.m01 + tmp.m01*rhs.m11 + tmp.m02*rhs.m21; +m02 = tmp.m00*rhs.m02 + tmp.m01*rhs.m12 + tmp.m02*rhs.m22; +m10 = tmp.m10*rhs.m00 + tmp.m11*rhs.m10 + tmp.m12*rhs.m20; +m11 = tmp.m10*rhs.m01 + tmp.m11*rhs.m11 + tmp.m12*rhs.m21; +m12 = tmp.m10*rhs.m02 + tmp.m11*rhs.m12 + tmp.m12*rhs.m22; +m20 = tmp.m20*rhs.m00 + tmp.m21*rhs.m10 + tmp.m22*rhs.m20; +m21 = tmp.m20*rhs.m01 + tmp.m21*rhs.m11 + tmp.m22*rhs.m21; +m22 = tmp.m20*rhs.m02 + tmp.m21*rhs.m12 + tmp.m22*rhs.m22; +return *this; +} + +__host__ __device__ cumat3 cumat3::transpose() const +{ +cumat3 ret; +ret.m00 = m00; +ret.m10 = m01; +ret.m20 = m02; +ret.m01 = m10; +ret.m11 = m11; +ret.m21 = m12; +ret.m02 = m20; +ret.m12 = m21; +ret.m22 = m22; +return ret; +} + + +///////////////////// +// Det and Inverse // +///////////////////// + +__host__ __device__ double cumat3::det() +{ + double ret = 0.0f; + + ret += m00*m11*m22; + ret += m01*m12*m20; + ret += m02*m10*m21; + ret -= m20*m11*m02; + ret -= m21*m12*m00; + ret -= m22*m10*m01; + + return ret; +} + +__host__ __device__ cumat3 cumat3::inverse() +{ + cumat3 q; + double dt = det(); + double idt; + if(dt!=0.0) + { + idt = 1.0f/dt; + q(0,0) = (at(1,1)*at(2,2)-at(1,2)*at(2,1))*idt; + q(1,0) = -(at(1,0)*at(2,2)-at(1,2)*at(2,0))*idt; + q(2,0) = (at(1,0)*at(2,1)-at(1,1)*at(2,0))*idt; + q(0,1) = -(at(0,1)*at(2,2)-at(0,2)*at(2,1))*idt; + q(1,1) = (at(0,0)*at(2,2)-at(0,2)*at(2,0))*idt; + q(2,1) = -(at(0,0)*at(2,1)-at(0,1)*at(2,0))*idt; + q(0,2) = (at(0,1)*at(1,2)-at(0,2)*at(1,1))*idt; + q(1,2) = -(at(0,0)*at(1,2)-at(0,2)*at(1,0))*idt; + q(2,2) = (at(0,0)*at(1,1)-at(0,1)*at(1,0))*idt; + // q(0,0) = (at(1,1)*at(2,2)-at(1,2)*at(2,1))/dt; + // q(0,1) = -(at(1,0)*at(2,2)-at(1,2)*at(2,0))/dt; + // q(0,2) = (at(1,0)*at(2,1)-at(1,1)*at(2,0))/dt; + // q(1,0) = -(at(0,1)*at(2,2)-at(0,2)*at(2,1))/dt; + // q(1,1) = (at(0,0)*at(2,2)-at(0,2)*at(2,0))/dt; + // q(1,2) = -(at(0,0)*at(2,1)-at(0,1)*at(2,0))/dt; + // q(2,0) = (at(0,1)*at(1,2)-at(0,2)*at(1,1))/dt; + // q(2,1) = -(at(0,0)*at(1,2)-at(0,2)*at(1,0))/dt; + // q(2,2) = (at(0,0)*at(1,1)-at(0,1)*at(1,0))/dt; + // q = q.transpose(); + } + else + { + q(0,0) = inf; + q(0,1) = inf; + q(0,2) = inf; + q(1,0) = inf; + q(1,1) = inf; + q(1,2) = inf; + q(2,0) = inf; + q(2,1) = inf; + q(2,2) = inf; + } + + return q; +} + +////////////////////////// +// Standalone functions // +////////////////////////// + +__host__ __device__ double cuvec3_dot(const cuvec3 &a, const cuvec3 &b) +{ + double ret = a.x*b.x+a.y*b.y+a.z*b.z; + + return ret; +} + +__host__ __device__ cuvec3 cuvec3_cross(const cuvec3 &a, const cuvec3 &b) +{ + cuvec3 ret; + ret[0] = a[1]*b[2]-a[2]*b[1]; + ret[1] = a[2]*b[0]-a[0]*b[2]; + ret[2] = a[0]*b[1]-a[1]*b[0]; + + return ret; +} + +__host__ __device__ double cuvec3_norm(const cuvec3 &a) +{ + double ret; + ret = ::sqrtf(a.x*a.x+a.y*a.y+a.z*a.z); + return ret; +} + +__host__ __device__ cuvec3 cuvec3_normalize(const cuvec3 &a) +{ + cuvec3 ret; + double m; + m = ::sqrtf(a.x*a.x+a.y*a.y+a.z*a.z); + if(m>0.0) + { + ret.x = a.x/m; ret.y = a.y/m; ret.z = a.z/m; + } + else + { + ret.x = 0.0f; ret.y = 0.0f; ret.z = 0.0f; + } + + return ret; +} + +__host__ __device__ cuvec3 cuvec3_proj(const cuvec3 &a, const cuvec3 &b) +{ + cuvec3 ret; + cuvec3 bn = cuvec3_normalize(b); + double m = cuvec3_dot(a,bn); + ret = bn*m; + return ret; +} + +__host__ __device__ cumat3 hodge_dual(const cuvec3 &vin) +{ + cumat3 ret; + + ret.m00 = 0.0f; + ret.m11 = 0.0f; + ret.m22 = 0.0f; + + ret.m01 = vin.z; + ret.m12 = vin.x; + ret.m20 = vin.y; + + ret.m10 = -vin.z; + ret.m21 = -vin.x; + ret.m02 = -vin.y; + + return ret; +} + +__host__ __device__ cuvec3 hodge_dual(const cumat3 &min) +{ + cuvec3 ret; + + ret.x = 0.5f*(min.m12 - min.m21); + ret.y = 0.5f*(min.m20 - min.m02); + ret.z = 0.5f*(min.m01 - min.m10); + + return ret; +} + +__host__ __device__ const cumat3 cumat3_eye() +{ + cumat3 ret; + ret.m00 = 1.0f; + ret.m11 = 1.0f; + ret.m22 = 1.0f; + + return ret; +} + +__host__ __device__ const cumat3 cumat3_zeros() +{ + cumat3 ret; + return ret; +} + +__host__ __device__ cumat3 rotmat_from_axisangle(const cuvec3 &axis, const double &angle) +{ + cumat3 ret = cumat3_zeros(); + cumat3 H; + cuvec3 _naxis; + + _naxis = cuvec3_normalize(axis); + H = hodge_dual(_naxis); + + ret += H*sinf(angle); + H = H*H; + ret -= H*cosf(angle); + ret += (H + cumat3_eye()); + + return ret; +} + + +/////////// +// Tests // +/////////// + +__host__ void test_cudavectf_logic1() +{ + +} + + + +}; \ No newline at end of file diff --git a/src/amsculib3/math/cuvec4.cu b/src/amsculib3/math/cuvec4.cu new file mode 100644 index 0000000..536ffb6 --- /dev/null +++ b/src/amsculib3/math/cuvec4.cu @@ -0,0 +1,869 @@ +#include + +namespace amscuda +{ + +////////////////// +// Vector Class // +////////////////// + +__host__ __device__ cuvec4::cuvec4() +{ + x = 0; y = 0; z = 0; w = 0; + return; +} + +__host__ __device__ cuvec4::~cuvec4() +{ + x = 0; y = 0; z = 0; w = 0; + return; +} + +__host__ __device__ cuvec4::cuvec4(const double &_x, const double &_y, const double &_z, const double &_w) +{ + x = _x; y = _y; z = _z; w = _w; + return; +} + +__host__ __device__ double& cuvec4::operator[](const int &I) +{ + switch(I) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + case 3: + return w; + } + + return x; +} + +__host__ __device__ const double& cuvec4::operator[](const int &I) const +{ + switch(I) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + case 3: + return w; + } + + return x; +} + +__host__ __device__ cuvec4 cuvec4::operator+(const cuvec4& rhs) const +{ + cuvec4 ret; + ret.x = x + rhs.x; + ret.y = y + rhs.y; + ret.z = z + rhs.z; + ret.w = w + rhs.w; + return ret; +} + +__host__ __device__ cuvec4 cuvec4::operator-(const cuvec4& rhs) const +{ + cuvec4 ret; + ret.x = x - rhs.x; + ret.y = y - rhs.y; + ret.z = z - rhs.z; + ret.w = w - rhs.w; + return ret; +} + +__host__ __device__ cuvec4 cuvec4::operator*(const cuvec4& rhs) const +{ + //Elementwise product + cuvec4 ret; + ret.x = x * rhs.x; + ret.y = y * rhs.y; + ret.z = z * rhs.z; + ret.w = w * rhs.w; + return ret; +} + +__host__ __device__ cuvec4 cuvec4::operator/(const cuvec4& rhs) const +{ + //Elementwise division + cuvec4 ret; + ret.x = x / rhs.x; + ret.y = y / rhs.y; + ret.z = z / rhs.z; + ret.w = w / rhs.w; + return ret; +} + +__host__ __device__ cuvec4 operator*(const cuvec4& lhs, const double& rhs) +{ + cuvec4 ret; + ret.x = lhs.x*rhs; + ret.y = lhs.y*rhs; + ret.z = lhs.z*rhs; + ret.w = lhs.w*rhs; + return ret; +} + +__host__ __device__ cuvec4 operator*(const double& lhs, const cuvec4& rhs) +{ + cuvec4 ret; + ret.x = lhs*rhs.x; + ret.y = lhs*rhs.y; + ret.z = lhs*rhs.z; + ret.w = lhs*rhs.w; + return ret; +} + +__host__ __device__ cuvec4 operator/(const cuvec4& lhs, const double& rhs) +{ + cuvec4 ret; + ret.x = lhs.x/rhs; + ret.y = lhs.y/rhs; + ret.z = lhs.z/rhs; + ret.w = lhs.w/rhs; + return ret; +} + +__host__ __device__ cuvec4 operator/(const double& lhs, const cuvec4& rhs) +{ + cuvec4 ret; + ret.x = lhs/rhs.x; + ret.y = lhs/rhs.y; + ret.z = lhs/rhs.z; + ret.w = lhs/rhs.w; + return ret; +} + +__host__ __device__ cuvec4 operator-(const cuvec4& other) +{ + cuvec4 ret; + ret.x = -other.x; + ret.y = -other.y; + ret.z = -other.z; + ret.w = -other.w; + return ret; +} + +__host__ __device__ cuvec4& cuvec4::operator+=(const cuvec4& rhs) +{ + x += rhs.x; + y += rhs.y; + z += rhs.z; + w += rhs.w; + return *this; +} + +__host__ __device__ cuvec4& cuvec4::operator-=(const cuvec4& rhs) +{ + x -= rhs.x; + y -= rhs.y; + z -= rhs.z; + w -= rhs.w; + return *this; +} + +__host__ __device__ cuvec4& cuvec4::operator*=(const double& rhs) +{ + x *= rhs; + y *= rhs; + z *= rhs; + w *= rhs; + return *this; +} + +__host__ __device__ cuvec4& cuvec4::operator/=(const double& rhs) +{ + x /= rhs; + y /= rhs; + z /= rhs; + w /= rhs; + return *this; +} + + +////////////////// +// Matrix Class // +////////////////// + +__host__ __device__ cumat4::cumat4() +{ + m00 = 0; + m01 = 0; + m02 = 0; + m03 = 0; + + m10 = 0; + m11 = 0; + m12 = 0; + m13 = 0; + + m20 = 0; + m21 = 0; + m22 = 0; + m23 = 0; + + m30 = 0; + m31 = 0; + m32 = 0; + m33 = 0; + + return; +} + +__host__ __device__ cumat4::~cumat4() +{ + //m00 = 0; + //m01 = 0; + //m02 = 0; + //m03 = 0; + + //m10 = 0; + //m11 = 0; + //m12 = 0; + //m13 = 0; + + //m20 = 0; + //m21 = 0; + //m22 = 0; + //m23 = 0; + + //m30 = 0; + //m31 = 0; + //m32 = 0; + //m33 = 0; + + return; +} + +__host__ __device__ cumat4::cumat4( + const double& _m00, const double& _m01, const double& _m02, const double& _m03, + const double& _m10, const double& _m11, const double& _m12, const double& _m13, + const double& _m20, const double& _m21, const double& _m22, const double& _m23, + const double& _m30, const double& _m31, const double& _m32, const double& _m33 +) +{ + m00 = _m00; + m10 = _m10; + m20 = _m20; + m30 = _m30; + + m01 = _m01; + m11 = _m11; + m21 = _m21; + m31 = _m31; + + m02 = _m02; + m12 = _m12; + m22 = _m22; + m32 = _m32; + + m03 = _m03; + m13 = _m13; + m23 = _m23; + m33 = _m33; + + + return; +} + +__host__ __device__ cumat4::cumat4(const double* data16) +{ + m00 = data16[0]; + m10 = data16[1]; + m20 = data16[2]; + m30 = data16[3]; + + m01 = data16[4]; + m11 = data16[5]; + m21 = data16[6]; + m31 = data16[7]; + + m02 = data16[8]; + m12 = data16[9]; + m22 = data16[10]; + m32 = data16[11]; + + m03 = data16[12]; + m13 = data16[13]; + m23 = data16[14]; + m33 = data16[15]; + + + return; +} + +__host__ __device__ double& cumat4::operator[](const int &I) +{ + switch(I) + { + case 0: + return m00; + case 1: + return m10; + case 2: + return m20; + case 3: + return m30; + case 4: + return m01; + case 5: + return m11; + case 6: + return m21; + case 7: + return m31; + case 8: + return m02; + case 9: + return m12; + case 10: + return m22; + case 11: + return m32; + case 12: + return m03; + case 13: + return m13; + case 14: + return m23; + case 15: + return m33; + } + + return m00; +} + +__host__ __device__ const double& cumat4::operator[](const int &I) const +{ + switch(I) + { + case 0: + return m00; + case 1: + return m10; + case 2: + return m20; + case 3: + return m30; + case 4: + return m01; + case 5: + return m11; + case 6: + return m21; + case 7: + return m31; + case 8: + return m02; + case 9: + return m12; + case 10: + return m22; + case 11: + return m32; + case 12: + return m03; + case 13: + return m13; + case 14: + return m23; + case 15: + return m33; + } + + return m00; +} + +__host__ __device__ double& cumat4::operator()(const int &I, const int &J) +{ + return (*this)[I+4*J]; +} + +__host__ __device__ const double& cumat4::operator()(const int &I, const int &J) const +{ + return (*this)[I+4*J]; +} + +__host__ __device__ double& cumat4::at(const int &I, const int &J) +{ + return (*this)[I+4*J]; +} + +__host__ __device__ const double& cumat4::at(const int &I, const int &J) const +{ + return (*this)[I+4*J]; +} + +__host__ __device__ double* cumat4::data() +{ +return (double*)this; +} + +__host__ __device__ const double* cumat4::data() const +{ +return (double*)this; +} + +__host__ __device__ cumat4 cumat4::operator+(const cumat4& rhs) const +{ +cumat4 ret; +ret.m00 = m00 + rhs.m00; +ret.m10 = m10 + rhs.m10; +ret.m20 = m20 + rhs.m20; +ret.m30 = m30 + rhs.m30; + +ret.m01 = m01 + rhs.m01; +ret.m11 = m11 + rhs.m11; +ret.m21 = m21 + rhs.m21; +ret.m31 = m31 + rhs.m31; + +ret.m02 = m02 + rhs.m02; +ret.m12 = m12 + rhs.m12; +ret.m22 = m22 + rhs.m22; +ret.m32 = m32 + rhs.m32; + +ret.m03 = m03 + rhs.m03; +ret.m13 = m13 + rhs.m13; +ret.m23 = m23 + rhs.m23; +ret.m33 = m33 + rhs.m33; + +return ret; +} + +__host__ __device__ cumat4 cumat4::operator-(const cumat4& rhs) const +{ +cumat4 ret; +ret.m00 = m00 - rhs.m00; +ret.m10 = m10 - rhs.m10; +ret.m20 = m20 - rhs.m20; +ret.m30 = m30 - rhs.m30; + +ret.m01 = m01 - rhs.m01; +ret.m11 = m11 - rhs.m11; +ret.m21 = m21 - rhs.m21; +ret.m31 = m31 - rhs.m31; + +ret.m02 = m02 - rhs.m02; +ret.m12 = m12 - rhs.m12; +ret.m22 = m22 - rhs.m22; +ret.m32 = m32 - rhs.m32; + +ret.m03 = m03 - rhs.m03; +ret.m13 = m13 - rhs.m13; +ret.m23 = m23 - rhs.m23; +ret.m33 = m33 - rhs.m33; + +return ret; +} + +__host__ __device__ cumat4 cumat4::operator*(const cumat4& rhs) const +{ +cumat4 ret; //should be zeroed in constructor +ret.m00 = m00*rhs.m00 + m01*rhs.m10 + m02*rhs.m20 + m03*rhs.m30; +ret.m01 = m00*rhs.m01 + m01*rhs.m11 + m02*rhs.m21 + m03*rhs.m31; +ret.m02 = m00*rhs.m02 + m01*rhs.m12 + m02*rhs.m22 + m03*rhs.m32; +ret.m03 = m00*rhs.m03 + m01*rhs.m13 + m02*rhs.m23 + m03*rhs.m33; + +ret.m10 = m10*rhs.m00 + m11*rhs.m10 + m12*rhs.m20 + m13*rhs.m30; +ret.m11 = m10*rhs.m01 + m11*rhs.m11 + m12*rhs.m21 + m13*rhs.m31; +ret.m12 = m10*rhs.m02 + m11*rhs.m12 + m12*rhs.m22 + m13*rhs.m32; +ret.m13 = m10*rhs.m03 + m11*rhs.m13 + m12*rhs.m23 + m13*rhs.m33; + +ret.m20 = m20*rhs.m00 + m21*rhs.m10 + m22*rhs.m20 + m23*rhs.m30; +ret.m21 = m20*rhs.m01 + m21*rhs.m11 + m22*rhs.m21 + m23*rhs.m31; +ret.m22 = m20*rhs.m02 + m21*rhs.m12 + m22*rhs.m22 + m23*rhs.m32; +ret.m23 = m20*rhs.m03 + m21*rhs.m13 + m22*rhs.m23 + m23*rhs.m33; + +ret.m30 = m30*rhs.m00 + m31*rhs.m10 + m32*rhs.m20 + m33*rhs.m30; +ret.m31 = m30*rhs.m01 + m31*rhs.m11 + m32*rhs.m21 + m33*rhs.m31; +ret.m32 = m30*rhs.m02 + m31*rhs.m12 + m32*rhs.m22 + m33*rhs.m32; +ret.m33 = m30*rhs.m03 + m31*rhs.m13 + m32*rhs.m23 + m33*rhs.m33; + +return ret; +} + +__host__ __device__ cumat4 operator*(const cumat4& lhs, const double& rhs) +{ +cumat4 ret; +ret.m00=lhs.m00*rhs; +ret.m10=lhs.m10*rhs; +ret.m20=lhs.m20*rhs; +ret.m30=lhs.m30*rhs; +ret.m01=lhs.m01*rhs; +ret.m11=lhs.m11*rhs; +ret.m21=lhs.m21*rhs; +ret.m31=lhs.m31*rhs; +ret.m02=lhs.m02*rhs; +ret.m12=lhs.m12*rhs; +ret.m22=lhs.m22*rhs; +ret.m32=lhs.m32*rhs; +ret.m03=lhs.m03*rhs; +ret.m13=lhs.m13*rhs; +ret.m23=lhs.m23*rhs; +ret.m33=lhs.m33*rhs; +return ret; +} + +__host__ __device__ cumat4 operator/(const cumat4& lhs, const double& rhs) +{ +cumat4 ret; +ret.m00=lhs.m00/rhs; +ret.m10=lhs.m10/rhs; +ret.m20=lhs.m20/rhs; +ret.m30=lhs.m30/rhs; +ret.m01=lhs.m01/rhs; +ret.m11=lhs.m11/rhs; +ret.m21=lhs.m21/rhs; +ret.m31=lhs.m31/rhs; +ret.m02=lhs.m02/rhs; +ret.m12=lhs.m12/rhs; +ret.m22=lhs.m22/rhs; +ret.m32=lhs.m32/rhs; +ret.m03=lhs.m03/rhs; +ret.m13=lhs.m13/rhs; +ret.m23=lhs.m23/rhs; +ret.m33=lhs.m33/rhs; +return ret; +} + +__host__ __device__ cumat4 operator*(const double& lhs, const cumat4& rhs) +{ +cumat4 ret; +ret.m00=lhs*rhs.m00; +ret.m10=lhs*rhs.m10; +ret.m20=lhs*rhs.m20; +ret.m30=lhs*rhs.m30; +ret.m01=lhs*rhs.m01; +ret.m11=lhs*rhs.m11; +ret.m21=lhs*rhs.m21; +ret.m31=lhs*rhs.m31; +ret.m02=lhs*rhs.m02; +ret.m12=lhs*rhs.m12; +ret.m22=lhs*rhs.m22; +ret.m32=lhs*rhs.m32; +ret.m03=lhs*rhs.m03; +ret.m13=lhs*rhs.m13; +ret.m23=lhs*rhs.m23; +ret.m33=lhs*rhs.m33; +return ret; +} + +__host__ __device__ cuvec4 operator*(const cumat4& lhs, const cuvec4& rhs) +{ +cuvec4 ret; +ret.x = lhs.m00*rhs.x + lhs.m01*rhs.y + lhs.m02*rhs.z + lhs.m03*rhs.w; +ret.y = lhs.m10*rhs.x + lhs.m11*rhs.y + lhs.m12*rhs.z + lhs.m13*rhs.w; +ret.z = lhs.m20*rhs.x + lhs.m21*rhs.y + lhs.m22*rhs.z + lhs.m23*rhs.w; +ret.w = lhs.m30*rhs.x + lhs.m31*rhs.y + lhs.m32*rhs.z + lhs.m33*rhs.w; +return ret; +} + +__host__ __device__ cuvec4 operator*(const cuvec4& lhs, const cumat4& rhs) +{ +cuvec4 ret; +ret.x = lhs.x*rhs.m00 + lhs.y*rhs.m10 + lhs.z*rhs.m20 + lhs.w*rhs.m30; +ret.y = lhs.x*rhs.m01 + lhs.y*rhs.m11 + lhs.z*rhs.m21 + lhs.w*rhs.m31; +ret.z = lhs.x*rhs.m02 + lhs.y*rhs.m12 + lhs.z*rhs.m22 + lhs.w*rhs.m32; +ret.w = lhs.x*rhs.m03 + lhs.y*rhs.m13 + lhs.z*rhs.m23 + lhs.w*rhs.m33; +return ret; +} + +__host__ __device__ cumat4 operator-(const cumat4& rhs) +{ +cumat4 ret; +ret.m00 = -rhs.m00; +ret.m10 = -rhs.m10; +ret.m20 = -rhs.m20; +ret.m30 = -rhs.m30; +ret.m01 = -rhs.m01; +ret.m11 = -rhs.m11; +ret.m21 = -rhs.m21; +ret.m31 = -rhs.m31; +ret.m02 = -rhs.m02; +ret.m12 = -rhs.m12; +ret.m22 = -rhs.m22; +ret.m32 = -rhs.m32; +ret.m03 = -rhs.m03; +ret.m13 = -rhs.m13; +ret.m23 = -rhs.m23; +ret.m33 = -rhs.m33; +return ret; +} + +__host__ __device__ cumat4& cumat4::operator+=(const cumat4& rhs) +{ +m00 += rhs.m00; +m10 += rhs.m10; +m20 += rhs.m20; +m30 += rhs.m30; +m01 += rhs.m01; +m11 += rhs.m11; +m21 += rhs.m21; +m31 += rhs.m31; +m02 += rhs.m02; +m12 += rhs.m12; +m22 += rhs.m22; +m32 += rhs.m32; +m03 += rhs.m03; +m13 += rhs.m13; +m23 += rhs.m23; +m33 += rhs.m33; +return *this; +} + +__host__ __device__ cumat4& cumat4::operator-=(const cumat4& rhs) +{ +m00 -= rhs.m00; +m10 -= rhs.m10; +m20 -= rhs.m20; +m30 -= rhs.m30; +m01 -= rhs.m01; +m11 -= rhs.m11; +m21 -= rhs.m21; +m31 -= rhs.m31; +m02 -= rhs.m02; +m12 -= rhs.m12; +m22 -= rhs.m22; +m32 -= rhs.m32; +m03 -= rhs.m03; +m13 -= rhs.m13; +m23 -= rhs.m23; +m33 -= rhs.m33; +return *this; +} + +__host__ __device__ cumat4& cumat4::operator*=(const double& rhs) +{ +m00 *= rhs; +m10 *= rhs; +m20 *= rhs; +m30 *= rhs; +m01 *= rhs; +m11 *= rhs; +m21 *= rhs; +m31 *= rhs; +m02 *= rhs; +m12 *= rhs; +m22 *= rhs; +m32 *= rhs; +m03 *= rhs; +m13 *= rhs; +m23 *= rhs; +m33 *= rhs; +return *this; +} + +__host__ __device__ cumat4& cumat4::operator/=(const double& rhs) +{ +m00 /= rhs; +m10 /= rhs; +m20 /= rhs; +m30 /= rhs; +m01 /= rhs; +m11 /= rhs; +m21 /= rhs; +m31 /= rhs; +m02 /= rhs; +m12 /= rhs; +m22 /= rhs; +m32 /= rhs; +m03 /= rhs; +m13 /= rhs; +m23 /= rhs; +m33 /= rhs; +return *this; +} + +__host__ __device__ cumat4& cumat4::operator*=(const cumat4& rhs) +{ +cumat4 tmp = *this; +m00 = tmp.m00*rhs.m00 + tmp.m01*rhs.m10 + tmp.m02*rhs.m20 + tmp.m03*rhs.m30; +m01 = tmp.m00*rhs.m01 + tmp.m01*rhs.m11 + tmp.m02*rhs.m21 + tmp.m03*rhs.m31; +m02 = tmp.m00*rhs.m02 + tmp.m01*rhs.m12 + tmp.m02*rhs.m22 + tmp.m03*rhs.m32; +m03 = tmp.m00*rhs.m03 + tmp.m01*rhs.m13 + tmp.m02*rhs.m23 + tmp.m03*rhs.m33; +m10 = tmp.m10*rhs.m00 + tmp.m11*rhs.m10 + tmp.m12*rhs.m20 + tmp.m13*rhs.m30; +m11 = tmp.m10*rhs.m01 + tmp.m11*rhs.m11 + tmp.m12*rhs.m21 + tmp.m13*rhs.m31; +m12 = tmp.m10*rhs.m02 + tmp.m11*rhs.m12 + tmp.m12*rhs.m22 + tmp.m13*rhs.m32; +m13 = tmp.m10*rhs.m03 + tmp.m11*rhs.m13 + tmp.m12*rhs.m23 + tmp.m13*rhs.m33; +m20 = tmp.m20*rhs.m00 + tmp.m21*rhs.m10 + tmp.m22*rhs.m20 + tmp.m23*rhs.m30; +m21 = tmp.m20*rhs.m01 + tmp.m21*rhs.m11 + tmp.m22*rhs.m21 + tmp.m23*rhs.m31; +m22 = tmp.m20*rhs.m02 + tmp.m21*rhs.m12 + tmp.m22*rhs.m22 + tmp.m23*rhs.m32; +m23 = tmp.m20*rhs.m03 + tmp.m21*rhs.m13 + tmp.m22*rhs.m23 + tmp.m23*rhs.m33; +m30 = tmp.m30*rhs.m00 + tmp.m31*rhs.m10 + tmp.m32*rhs.m20 + tmp.m33*rhs.m30; +m31 = tmp.m30*rhs.m01 + tmp.m31*rhs.m11 + tmp.m32*rhs.m21 + tmp.m33*rhs.m31; +m32 = tmp.m30*rhs.m02 + tmp.m31*rhs.m12 + tmp.m32*rhs.m22 + tmp.m33*rhs.m32; +m33 = tmp.m30*rhs.m03 + tmp.m31*rhs.m13 + tmp.m32*rhs.m23 + tmp.m33*rhs.m33; +return *this; +} + +__host__ __device__ cumat4 cumat4::transpose() const +{ +cumat4 ret; +ret.m00 = m00; +ret.m10 = m01; +ret.m20 = m02; +ret.m30 = m03; +ret.m01 = m10; +ret.m11 = m11; +ret.m21 = m12; +ret.m31 = m13; +ret.m02 = m20; +ret.m12 = m21; +ret.m22 = m22; +ret.m32 = m23; +ret.m03 = m30; +ret.m13 = m31; +ret.m23 = m32; +ret.m33 = m33; +return ret; +} + +///////////////////// +// Det and Inverse // +///////////////////// + +__host__ __device__ double cumat4::det() +{ + double dt; + dt = m03*m12*m21*m30 - + m02*m13*m21*m30 - + m03*m11*m22*m30 + + m01*m13*m22*m30 + + m02*m11*m23*m30 - + m01*m12*m23*m30 - + m03*m12*m20*m31 + + m02*m13*m20*m31 + + m03*m10*m22*m31 - + m00*m13*m22*m31 - + m02*m10*m23*m31 + + m00*m12*m23*m31 + + m03*m11*m20*m32 - + m01*m13*m20*m32 - + m03*m10*m21*m32 + + m00*m13*m21*m32 + + m01*m10*m23*m32 - + m00*m11*m23*m32 - + m02*m11*m20*m33 + + m01*m12*m20*m33 + + m02*m10*m21*m33 - + m00*m12*m21*m33 - + m01*m10*m22*m33 + + m00*m11*m22*m33; + + return dt; +} + +__host__ __device__ cumat4 cumat4::inverse() +{ + cumat4 mret; + double dt = this->det(); + double idet; + + if(dt>1E-7 || dt<-1E-7) + { + idet = 1.0f/dt; + mret.m00 = -m13*m22*m31 + m12*m23*m31 + m13*m21*m32 - m11*m23*m32 - m12*m21*m33 + m11*m22*m33; + mret.m01 = m03*m22*m31 - m02*m23*m31 - m03*m21*m32 + m01*m23*m32 + m02*m21*m33 - m01*m22*m33; + mret.m02 = -m03*m12*m31 + m02*m13*m31 + m03*m11*m32 - m01*m13*m32 - m02*m11*m33 + m01*m12*m33; + mret.m03 = m03*m12*m21 - m02*m13*m21 - m03*m11*m22 + m01*m13*m22 + m02*m11*m23 - m01*m12*m23; + mret.m10 = m13*m22*m30 - m12*m23*m30 - m13*m20*m32 + m10*m23*m32 + m12*m20*m33 - m10*m22*m33; + mret.m11 = -m03*m22*m30 + m02*m23*m30 + m03*m20*m32 - m00*m23*m32 - m02*m20*m33 + m00*m22*m33; + mret.m12 = m03*m12*m30 - m02*m13*m30 - m03*m10*m32 + m00*m13*m32 + m02*m10*m33 - m00*m12*m33; + mret.m13 = -m03*m12*m20 + m02*m13*m20 + m03*m10*m22 - m00*m13*m22 - m02*m10*m23 + m00*m12*m23; + mret.m20 = -m13*m21*m30 + m11*m23*m30 + m13*m20*m31 - m10*m23*m31 - m11*m20*m33 + m10*m21*m33; + mret.m21 = m03*m21*m30 - m01*m23*m30 - m03*m20*m31 + m00*m23*m31 + m01*m20*m33 - m00*m21*m33; + mret.m22 = -m03*m11*m30 + m01*m13*m30 + m03*m10*m31 - m00*m13*m31 - m01*m10*m33 + m00*m11*m33; + mret.m23 = m03*m11*m20 - m01*m13*m20 - m03*m10*m21 + m00*m13*m21 + m01*m10*m23 - m00*m11*m23; + mret.m30 = m12*m21*m30 - m11*m22*m30 - m12*m20*m31 + m10*m22*m31 + m11*m20*m32 - m10*m21*m32; + mret.m31 = -m02*m21*m30 + m01*m22*m30 + m02*m20*m31 - m00*m22*m31 - m01*m20*m32 + m00*m21*m32; + mret.m32 = m02*m11*m30 - m01*m12*m30 - m02*m10*m31 + m00*m12*m31 + m01*m10*m32 - m00*m11*m32; + mret.m33 = -m02*m11*m20 + m01*m12*m20 + m02*m10*m21 - m00*m12*m21 - m01*m10*m22 + m00*m11*m22; + mret.m00 *= idet; + mret.m01 *= idet; + mret.m02 *= idet; + mret.m03 *= idet; + mret.m10 *= idet; + mret.m11 *= idet; + mret.m12 *= idet; + mret.m13 *= idet; + mret.m20 *= idet; + mret.m21 *= idet; + mret.m22 *= idet; + mret.m23 *= idet; + mret.m30 *= idet; + mret.m31 *= idet; + mret.m32 *= idet; + mret.m33 *= idet; + } + else + { + mret.m00 = inf; + mret.m10 = inf; + mret.m20 = inf; + mret.m30 = inf; + mret.m01 = inf; + mret.m11 = inf; + mret.m21 = inf; + mret.m31 = inf; + mret.m02 = inf; + mret.m12 = inf; + mret.m22 = inf; + mret.m32 = inf; + mret.m03 = inf; + mret.m13 = inf; + mret.m23 = inf; + mret.m33 = inf; + } + //this was STUPID. Gaah. Computer algebra system saves the day? I'd be surprised if this didn't end up *slower* than gaussian elimination. Don't do this again! + + return mret; +} + +////////////////////////// +// Standalone Functions // +////////////////////////// + +__host__ __device__ double cuvec4_dot(cuvec4 &a, cuvec4 &b) +{ + double ret = 0.0f; + ret = a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; + return ret; +} + +__host__ __device__ double cuvec4_norm(cuvec4 &a) +{ + double ret = 0.0f; + ret = ::sqrtf(cuvec4_dot(a,a)); + return ret; +} + +__host__ __device__ cuvec4 cuvec4_normalize(cuvec4 &a) +{ + cuvec4 ret = cuvec4(0.0f,0.0f,0.0f,0.0f); + double nrm = cuvec4_norm(a); + if(nrm>0.0f) + ret = a/nrm; + return ret; +} + +__host__ __device__ cuvec4 cuvec4_proj(cuvec4 &a, cuvec4 &b) +{ + cuvec4 ret; + cuvec4 bn = cuvec4_normalize(b); + double d = cuvec4_dot(a,bn); + ret = bn*d; + return ret; +} + +/////////// +// Tests // +/////////// + +}; //namespace amscuda