diff --git a/build_linux64/libamsculib2.linux64.a b/build_linux64/libamsculib2.linux64.a index abba8e2..5a19883 100644 Binary files a/build_linux64/libamsculib2.linux64.a and b/build_linux64/libamsculib2.linux64.a differ diff --git a/build_linux64/objstore/amscu_comp128.o b/build_linux64/objstore/amscu_comp128.o index 8bb01b3..7ac3343 100644 Binary files a/build_linux64/objstore/amscu_comp128.o and b/build_linux64/objstore/amscu_comp128.o differ diff --git a/build_linux64/objstore/amscu_comp64.o b/build_linux64/objstore/amscu_comp64.o index a0d54ea..cb96761 100644 Binary files a/build_linux64/objstore/amscu_comp64.o and b/build_linux64/objstore/amscu_comp64.o differ diff --git a/build_linux64/objstore/amscu_cudafunctions.o b/build_linux64/objstore/amscu_cudafunctions.o index e854844..44d5377 100644 Binary files a/build_linux64/objstore/amscu_cudafunctions.o and b/build_linux64/objstore/amscu_cudafunctions.o differ diff --git a/build_linux64/objstore/amscu_random.o b/build_linux64/objstore/amscu_random.o index 12de1de..18ff6f8 100644 Binary files a/build_linux64/objstore/amscu_random.o and b/build_linux64/objstore/amscu_random.o differ diff --git a/build_linux64/objstore/amscuarray.o b/build_linux64/objstore/amscuarray.o index 909a501..56cb5bb 100644 Binary files a/build_linux64/objstore/amscuarray.o and b/build_linux64/objstore/amscuarray.o differ diff --git a/build_linux64/objstore/amscuarray_dops.o b/build_linux64/objstore/amscuarray_dops.o index ad0965f..0c0f549 100644 Binary files a/build_linux64/objstore/amscuarray_dops.o and b/build_linux64/objstore/amscuarray_dops.o differ diff --git a/build_linux64/objstore/amscugeom.o b/build_linux64/objstore/amscugeom.o index fbb073b..a016b5c 100644 Binary files a/build_linux64/objstore/amscugeom.o and b/build_linux64/objstore/amscugeom.o differ diff --git a/build_linux64/objstore/amsculib2.o b/build_linux64/objstore/amsculib2.o index 4e0b89d..02c9cc7 100644 Binary files a/build_linux64/objstore/amsculib2.o and b/build_linux64/objstore/amsculib2.o differ diff --git a/build_linux64/objstore/amscumath.o b/build_linux64/objstore/amscumath.o index 7aec13b..b7f744b 100644 Binary files a/build_linux64/objstore/amscumath.o and b/build_linux64/objstore/amscumath.o differ diff --git a/build_linux64/objstore/amscurarray.o b/build_linux64/objstore/amscurarray.o index 91c72dc..eb3211e 100644 Binary files a/build_linux64/objstore/amscurarray.o and b/build_linux64/objstore/amscurarray.o differ diff --git a/build_linux64/objstore/cuvect2.o b/build_linux64/objstore/cuvect2.o index 84465ed..f88c2e0 100644 Binary files a/build_linux64/objstore/cuvect2.o and b/build_linux64/objstore/cuvect2.o differ diff --git a/build_linux64/objstore/cuvect2f.o b/build_linux64/objstore/cuvect2f.o index 2e88046..f70fa95 100644 Binary files a/build_linux64/objstore/cuvect2f.o and b/build_linux64/objstore/cuvect2f.o differ diff --git a/build_linux64/objstore/cuvect3.o b/build_linux64/objstore/cuvect3.o index 259ee43..672a662 100644 Binary files a/build_linux64/objstore/cuvect3.o and b/build_linux64/objstore/cuvect3.o differ diff --git a/build_linux64/objstore/cuvect3f.o b/build_linux64/objstore/cuvect3f.o index b619a80..ec36225 100644 Binary files a/build_linux64/objstore/cuvect3f.o and b/build_linux64/objstore/cuvect3f.o differ diff --git a/build_linux64/objstore/cuvect4.o b/build_linux64/objstore/cuvect4.o index c4e51e2..0b33597 100644 Binary files a/build_linux64/objstore/cuvect4.o and b/build_linux64/objstore/cuvect4.o differ diff --git a/build_linux64/objstore/cuvect4f.o b/build_linux64/objstore/cuvect4f.o index ec293a8..4e24365 100644 Binary files a/build_linux64/objstore/cuvect4f.o and b/build_linux64/objstore/cuvect4f.o differ diff --git a/build_linux64/test b/build_linux64/test index 4cc4378..7b811ba 100644 Binary files a/build_linux64/test and b/build_linux64/test differ diff --git a/include/amsculib2/cuvect4f.hpp b/include/amsculib2/cuvect4f.hpp index e70b2f9..1a5b999 100644 --- a/include/amsculib2/cuvect4f.hpp +++ b/include/amsculib2/cuvect4f.hpp @@ -14,16 +14,22 @@ namespace amscuda __host__ __device__ cuvect4f(); __host__ __device__ ~cuvect4f(); - __host__ __device__ cuvect4f(float _x, float _y, float _z, float _w); - - __host__ __device__ float& operator[](const int I); - __host__ __device__ const float& operator[](const int I) const; + __host__ __device__ cuvect4f(const float &_x, const float &_y, const float &_z, const float &_w); + + + __host__ __device__ float& operator[](const int &I); + __host__ __device__ const float& operator[](const int &I) const; - __host__ __device__ cuvect4f operator+(cuvect4f lhs); - __host__ __device__ cuvect4f operator-(cuvect4f lhs); - __host__ __device__ cuvect4f operator*(float lhs); - __host__ __device__ cuvect4f operator/(float lhs); - __host__ __device__ friend cuvect4f operator-(cuvect4f rhs); + __host__ __device__ cuvect4f operator+(const cuvect4f &rhs); + __host__ __device__ cuvect4f operator-(const cuvect4f &rhs); + __host__ __device__ cuvect4f operator*(const float &rhs); + __host__ __device__ cuvect4f operator/(const float &rhs); + __host__ __device__ friend cuvect4f operator-(const cuvect4f &rhs); + + __host__ __device__ cuvect4f& operator+=(const cuvect4f &rhs); + __host__ __device__ cuvect4f& operator-=(const cuvect4f &rhs); + __host__ __device__ cuvect4f& operator/=(const float &rhs); + __host__ __device__ cuvect4f& operator*=(const float &rhs); }; class cumat4f @@ -31,6 +37,11 @@ namespace amscuda public: float dat[16]; + // float m00,m10,m20,m30; //named references to force register use? + // float m01,m11,m21,m31; //switched to column-major-order to match GLSL/lapack + // float m02,m12,m22,m32; + // float m03,m13,m23,m33; + __host__ __device__ cumat4f(); __host__ __device__ ~cumat4f(); __host__ __device__ float& operator[](const int I); diff --git a/src/amsculib2/cuvect4f.cu b/src/amsculib2/cuvect4f.cu index 3718341..6a4fc7f 100644 --- a/src/amsculib2/cuvect4f.cu +++ b/src/amsculib2/cuvect4f.cu @@ -9,79 +9,118 @@ namespace amscuda __host__ __device__ cuvect4f::cuvect4f() { - x = 0.0; y = 0.0; z = 0.0; w = 0.0; + x = 0.0f; y = 0.0f; z = 0.0f; w = 0.0f; return; } __host__ __device__ cuvect4f::~cuvect4f() { - x = 0.0; y = 0.0; z = 0.0; w = 0.0; + x = 0.0f; y = 0.0f; z = 0.0f; w = 0.0f; return; } -__host__ __device__ cuvect4f::cuvect4f(float _x, float _y, float _z, float _w) +__host__ __device__ float& cuvect4f::operator[](const int &I) +{ + if(I==0) return x; + if(I==1) return y; + if(I==2) return z; + if(I==3) return w; + return x; +} + +__host__ __device__ const float& cuvect4f::operator[](const int &I) const +{ + if(I==0) return x; + if(I==1) return y; + if(I==2) return z; + if(I==3) return w; + return x; +} + +__host__ __device__ cuvect4f cuvect4f::operator+(const cuvect4f &rhs) +{ + cuvect4f ret; + ret.x = x+rhs.x; + ret.y = y+rhs.y; + ret.z = z+rhs.z; + ret.w = w+rhs.w; + + return ret; +} + +__host__ __device__ cuvect4f cuvect4f::operator-(const cuvect4f &rhs) +{ + cuvect4f ret; + ret.x = x-rhs.x; + ret.y = y-rhs.y; + ret.z = z-rhs.z; + ret.w = w-rhs.w; + + return ret; +} + +__host__ __device__ cuvect4f cuvect4f::operator*(const float &rhs) +{ + cuvect4f ret; + ret.x = x*rhs; + ret.y = y*rhs; + ret.z = z*rhs; + ret.w = w*rhs; + return ret; +} + +__host__ __device__ cuvect4f cuvect4f::operator/(const float &rhs) +{ + cuvect4f ret; + ret.x = x/rhs; + ret.y = y/rhs; + ret.z = z/rhs; + ret.w = w/rhs; + return ret; +} + +__host__ __device__ cuvect4f& cuvect4f::operator+=(const cuvect4f &rhs) +{ + x = x + rhs.x; + y = y + rhs.y; + z = z + rhs.z; + w = w + rhs.w; + return *this; +} + +__host__ __device__ cuvect4f& cuvect4f::operator-=(const cuvect4f &rhs) +{ + x = x - rhs.x; + y = y - rhs.y; + z = z - rhs.z; + w = w - rhs.w; + return *this; +} + +__host__ __device__ cuvect4f& cuvect4f::operator*=(const float &rhs) +{ + x = x * rhs; + y = y * rhs; + z = z * rhs; + w = w * rhs; + return *this; +} + +__host__ __device__ cuvect4f& cuvect4f::operator/=(const float &rhs) +{ + x = x / rhs; + y = y / rhs; + z = z / rhs; + w = w / rhs; + return *this; +} + + +__host__ __device__ cuvect4f::cuvect4f(const float &_x, const float &_y, const float &_z, const float &_w) { x = _x; y = _y; z = _z; w = _w; return; } - -__host__ __device__ float& cuvect4f::operator[](const int I) -{ - if(I==0) return x; - else if(I==1) return y; - else if(I==2) return z; - else if(I==3) return w; - return x; -} - -__host__ __device__ const float& cuvect4f::operator[](const int I) const -{ - if(I==0) return x; - else if(I==1) return y; - else if(I==2) return z; - else if(I==3) return w; - return x; -} - -__host__ __device__ cuvect4f cuvect4f::operator+(cuvect4f lhs) -{ - cuvect4f ret; - ret.x = this->x + lhs.x; - ret.y = this->y + lhs.y; - ret.z = this->z + lhs.z; - ret.w = this->w + lhs.w; - return ret; -} - -__host__ __device__ cuvect4f cuvect4f::operator-(cuvect4f lhs) -{ - cuvect4f ret; - ret.x = this->x - lhs.x; - ret.y = this->y - lhs.y; - ret.z = this->z - lhs.z; - ret.w = this->w - lhs.w; - return ret; -} - -__host__ __device__ cuvect4f cuvect4f::operator*(float lhs) -{ - cuvect4f ret; - ret.x = this->x*lhs; - ret.y = this->y*lhs; - ret.z = this->z*lhs; - ret.w = this->w*lhs; - return ret; -} - -__host__ __device__ cuvect4f cuvect4f::operator/(float lhs) -{ - cuvect4f ret; - ret.x = this->x/lhs; - ret.y = this->y/lhs; - ret.z = this->z/lhs; - ret.w = this->w/lhs; - return ret; -} __host__ __device__ cumat4f::cumat4f() {