diff --git a/build_linux64/libamsculib2.linux64.a b/build_linux64/libamsculib2.linux64.a index 06cd762..00cf916 100644 Binary files a/build_linux64/libamsculib2.linux64.a and b/build_linux64/libamsculib2.linux64.a differ diff --git a/build_linux64/objstore/amscu_comp128.o b/build_linux64/objstore/amscu_comp128.o index 4af119f..2781244 100644 Binary files a/build_linux64/objstore/amscu_comp128.o and b/build_linux64/objstore/amscu_comp128.o differ diff --git a/build_linux64/objstore/amscu_comp64.o b/build_linux64/objstore/amscu_comp64.o index e2dffd0..e9abdee 100644 Binary files a/build_linux64/objstore/amscu_comp64.o and b/build_linux64/objstore/amscu_comp64.o differ diff --git a/build_linux64/objstore/amscu_cudafunctions.o b/build_linux64/objstore/amscu_cudafunctions.o index 827be9b..1d0224a 100644 Binary files a/build_linux64/objstore/amscu_cudafunctions.o and b/build_linux64/objstore/amscu_cudafunctions.o differ diff --git a/build_linux64/objstore/amscu_random.o b/build_linux64/objstore/amscu_random.o index d616470..ac371fe 100644 Binary files a/build_linux64/objstore/amscu_random.o and b/build_linux64/objstore/amscu_random.o differ diff --git a/build_linux64/objstore/amscuarray.o b/build_linux64/objstore/amscuarray.o index 96d9685..65dddee 100644 Binary files a/build_linux64/objstore/amscuarray.o and b/build_linux64/objstore/amscuarray.o differ diff --git a/build_linux64/objstore/amscuarray_dops.o b/build_linux64/objstore/amscuarray_dops.o index 1db6b56..d00f15e 100644 Binary files a/build_linux64/objstore/amscuarray_dops.o and b/build_linux64/objstore/amscuarray_dops.o differ diff --git a/build_linux64/objstore/amscugeom.o b/build_linux64/objstore/amscugeom.o index 4956135..bb0b865 100644 Binary files a/build_linux64/objstore/amscugeom.o and b/build_linux64/objstore/amscugeom.o differ diff --git a/build_linux64/objstore/amsculib2.o b/build_linux64/objstore/amsculib2.o index 46c1bd6..df23906 100644 Binary files a/build_linux64/objstore/amsculib2.o and b/build_linux64/objstore/amsculib2.o differ diff --git a/build_linux64/objstore/amscumath.o b/build_linux64/objstore/amscumath.o index 3acd851..7b706bc 100644 Binary files a/build_linux64/objstore/amscumath.o and b/build_linux64/objstore/amscumath.o differ diff --git a/build_linux64/objstore/amscurarray.o b/build_linux64/objstore/amscurarray.o index f616177..fa80f16 100644 Binary files a/build_linux64/objstore/amscurarray.o and b/build_linux64/objstore/amscurarray.o differ diff --git a/build_linux64/objstore/cuvect2.o b/build_linux64/objstore/cuvect2.o index 2bfe452..4f8aba9 100644 Binary files a/build_linux64/objstore/cuvect2.o and b/build_linux64/objstore/cuvect2.o differ diff --git a/build_linux64/objstore/cuvect2f.o b/build_linux64/objstore/cuvect2f.o index 19dfc05..33251ff 100644 Binary files a/build_linux64/objstore/cuvect2f.o and b/build_linux64/objstore/cuvect2f.o differ diff --git a/build_linux64/objstore/cuvect3.o b/build_linux64/objstore/cuvect3.o index 91da2d0..3c1fb1e 100644 Binary files a/build_linux64/objstore/cuvect3.o and b/build_linux64/objstore/cuvect3.o differ diff --git a/build_linux64/objstore/cuvect3f.o b/build_linux64/objstore/cuvect3f.o index 9725095..04282a1 100644 Binary files a/build_linux64/objstore/cuvect3f.o and b/build_linux64/objstore/cuvect3f.o differ diff --git a/build_linux64/objstore/cuvect3f_legacy.o b/build_linux64/objstore/cuvect3f_legacy.o new file mode 100644 index 0000000..ce2bdc9 Binary files /dev/null and b/build_linux64/objstore/cuvect3f_legacy.o differ diff --git a/build_linux64/objstore/cuvect4.o b/build_linux64/objstore/cuvect4.o index 18b550b..8544da8 100644 Binary files a/build_linux64/objstore/cuvect4.o and b/build_linux64/objstore/cuvect4.o differ diff --git a/build_linux64/objstore/cuvect4f.o b/build_linux64/objstore/cuvect4f.o index 1effae1..02b6ce9 100644 Binary files a/build_linux64/objstore/cuvect4f.o and b/build_linux64/objstore/cuvect4f.o differ diff --git a/build_linux64/test b/build_linux64/test index 5f25607..5cab227 100644 Binary files a/build_linux64/test and b/build_linux64/test differ diff --git a/include/amsculib2/cuvect3f.hpp b/include/amsculib2/cuvect3f.hpp index 7d6b22b..8a3008e 100644 --- a/include/amsculib2/cuvect3f.hpp +++ b/include/amsculib2/cuvect3f.hpp @@ -92,34 +92,11 @@ namespace amscuda __host__ __device__ cuvect3f hodge_dual(const cumat3f &min); __host__ __device__ cumat3f rotmat_from_axisangle(const cuvect3f &axis, const float &angle); - - //3x3 matrix operations - //matrix order is assumed to be mat[I,J] = mat[I+3*J] - - //transposes a 3x3 (9 element) matrix - __host__ __device__ void mat3f_transpose(float *mat3inout); - - //copies src to dest - __host__ __device__ void mat3f_copy(float *mat3f_dest, const float *mat3f_src); - - //returns determinant of 3x3 matrix - __host__ __device__ float mat3f_det(float *mat3in); - - //inverts a 3x3 (9 element) matrix - __host__ __device__ void mat3f_inverse(float *mat3inout); - - __host__ __device__ cuvect3f mat3f_mult(float *mat3in, const cuvect3f &cvin); - __host__ __device__ void mat3f_mult(float *matina, float *matinb, float *matout); - - __host__ __device__ void mat3f_hodgedual(const cuvect3f &vecin, float *matout); - __host__ __device__ void mat3f_hodgedual(float *matin, cuvect3f &vecout); - - //returns direction cosine rotation matrix from axis and angle - __host__ __device__ void mat3f_rot_from_axisangle(cuvect3f axis, float angle, float *matout); - __host__ void test_cudavectf_logic1(); }; +#include + #endif diff --git a/include/amsculib2/cuvect3f_legacy.hpp b/include/amsculib2/cuvect3f_legacy.hpp new file mode 100644 index 0000000..2f40b2a --- /dev/null +++ b/include/amsculib2/cuvect3f_legacy.hpp @@ -0,0 +1,34 @@ +#ifndef __CUVECT3_LEGACY_HPP__ +#define __CUVECT3_LEGACY_HPP__ + +namespace amscuda +{ + + //3x3 matrix operations + //matrix order is assumed to be mat[I,J] = mat[I+3*J] + + //transposes a 3x3 (9 element) matrix + __host__ __device__ void mat3f_transpose(float *mat3inout); + + //copies src to dest + __host__ __device__ void mat3f_copy(float *mat3f_dest, const float *mat3f_src); + + //returns determinant of 3x3 matrix + __host__ __device__ float mat3f_det(float *mat3in); + + //inverts a 3x3 (9 element) matrix + __host__ __device__ void mat3f_inverse(float *mat3inout); + + __host__ __device__ cuvect3f mat3f_mult(float *mat3in, const cuvect3f &cvin); + __host__ __device__ void mat3f_mult(float *matina, float *matinb, float *matout); + + __host__ __device__ void mat3f_hodgedual(const cuvect3f &vecin, float *matout); + __host__ __device__ void mat3f_hodgedual(float *matin, cuvect3f &vecout); + + //returns direction cosine rotation matrix from axis and angle + __host__ __device__ void mat3f_rot_from_axisangle(cuvect3f axis, float angle, float *matout); + + +}; + +#endif \ No newline at end of file diff --git a/include/amsculib2/cuvect4f.hpp b/include/amsculib2/cuvect4f.hpp index f2ac7d9..4139caf 100644 --- a/include/amsculib2/cuvect4f.hpp +++ b/include/amsculib2/cuvect4f.hpp @@ -85,10 +85,10 @@ namespace amscuda __host__ __device__ cumat4f& operator*=(const cumat4f &rhs); }; - __host__ __device__ float cuvect4f_dot(cuvect4f a, cuvect4f b); - __host__ __device__ float cuvect4f_norm(cuvect4f a); - __host__ __device__ cuvect4f cuvect4f_normalize(cuvect4f a); - __host__ __device__ cuvect4f cuvect4f_proj(cuvect4f a, cuvect4f b); + __host__ __device__ float cuvect4f_dot(cuvect4f &a, cuvect4f &b); + __host__ __device__ float cuvect4f_norm(cuvect4f &a); + __host__ __device__ cuvect4f cuvect4f_normalize(cuvect4f &a); + __host__ __device__ cuvect4f cuvect4f_proj(cuvect4f &a, cuvect4f &b); }; diff --git a/src/amsculib2/cuvect3f.cu b/src/amsculib2/cuvect3f.cu index 6f8a397..26996bb 100644 --- a/src/amsculib2/cuvect3f.cu +++ b/src/amsculib2/cuvect3f.cu @@ -5,13 +5,13 @@ namespace amscuda __host__ __device__ cuvect3f::cuvect3f() { - x = 0.0; y = 0.0; z = 0.0; + x = 0.0f; y = 0.0f; z = 0.0f; return; } __host__ __device__ cuvect3f::~cuvect3f() { - x = 0.0; y = 0.0; z = 0.0; + x = 0.0f; y = 0.0f; z = 0.0f; return; } @@ -144,7 +144,7 @@ namespace amscuda } else { - ret.x = 0.0; ret.y = 0.0; ret.z = 0.0; + ret.x = 0.0f; ret.y = 0.0f; ret.z = 0.0f; } return ret; @@ -390,7 +390,7 @@ __host__ __device__ cumat3f cumat3f::operator*(const cumat3f &rhs) __host__ __device__ float cumat3f::det() { - float ret = 0.0; + float ret = 0.0f; ret += m00*m11*m22; ret += m01*m12*m20; @@ -587,186 +587,6 @@ __host__ __device__ const float* cumat3f::data() const return (const float*) this; } -//transposes a 3x3 (9 element) matrix -__host__ __device__ void mat3f_transpose(float *mat3inout) -{ - int I,J; - float matint[9]; - for(I=0;I<9;I++) - { - matint[I] = mat3inout[I]; - } - - for(I=0;I<3;I++) - { - for(J=0;J<3;J++) - { - mat3inout[I+J*3] = matint[J+I*3]; - } - } - - return; -} - -//copies src to dest -__host__ __device__ void mat3f_copy(float *mat3f_dest, const float *mat3f_src) -{ - int I; - if(mat3f_dest==NULL || mat3f_src==NULL) - return; - - for(I=0;I<9;I++) - mat3f_dest[I] = mat3f_src[I]; - - return; -} - - -__host__ __device__ float mat3f_det(float *mat3in) -{ - float ret = 0.0; - - ret = ret + mat3in[0+0*3]*mat3in[1+1*3]*mat3in[2+2*3]; - ret = ret + mat3in[0+1*3]*mat3in[1+2*3]*mat3in[2+0*3]; - ret = ret + mat3in[0+2*3]*mat3in[1+0*3]*mat3in[2+1*3]; - ret = ret - mat3in[0+0*3]*mat3in[1+2*3]*mat3in[2+1*3]; - ret = ret - mat3in[0+1*3]*mat3in[1+0*3]*mat3in[2+2*3]; - ret = ret - mat3in[0+2*3]*mat3in[1+1*3]*mat3in[2+0*3]; - - return ret; -} - -//inverts a 3x3 (9 element) matrix -__host__ __device__ void mat3f_inverse(float *mat3inout) -{ - int I; - float matint[9]; - float det = mat3f_det(mat3inout); - - for(I=0;I<9;I++) - { - matint[I] = mat3inout[I]; - } - - mat3inout[0+0*3] = (matint[1+1*3]*matint[2+2*3]-matint[1+2*3]*matint[2+1*3])/det; - mat3inout[0+1*3] = -(matint[1+0*3]*matint[2+2*3]-matint[1+2*3]*matint[2+0*3])/det; - mat3inout[0+2*3] = (matint[1+0*3]*matint[2+1*3]-matint[1+1*3]*matint[2+0*3])/det; - mat3inout[1+0*3] = -(matint[0+1*3]*matint[2+2*3]-matint[0+2*3]*matint[2+1*3])/det; - mat3inout[1+1*3] = (matint[0+0*3]*matint[2+2*3]-matint[0+2*3]*matint[2+0*3])/det; - mat3inout[1+2*3] = -(matint[0+0*3]*matint[2+1*3]-matint[0+1*3]*matint[2+0*3])/det; - mat3inout[2+0*3] = (matint[0+1*3]*matint[1+2*3]-matint[0+2*3]*matint[1+1*3])/det; - mat3inout[2+1*3] = -(matint[0+0*3]*matint[1+2*3]-matint[0+2*3]*matint[1+0*3])/det; - mat3inout[2+2*3] = (matint[0+0*3]*matint[1+1*3]-matint[0+1*3]*matint[1+0*3])/det; - - mat3f_transpose(mat3inout); - - return; -} - -__host__ __device__ cuvect3f mat3f_mult(float *mat3in, const cuvect3f &cvin) -{ - int I,J; - cuvect3f ret; - for(I=0;I<3;I++) - { - ret[I] = 0.0; - for(J=0;J<3;J++) - { - ret[I] = ret[I] + mat3in[I+3*J]*cvin[J]; - } - } - - return ret; -} - -__host__ __device__ void mat3f_mult(float *matina, float *matinb, float *matout) -{ - float wrk[9]; - int I,J,K; - - for(I=0;I<3;I++) - { - for(J=0;J<3;J++) - { - wrk[I+3*J] = 0.0; - } - } - - for(I=0;I<3;I++) - { - for(J=0;J<3;J++) - { - for(K=0;K<3;K++) - { - wrk[I+3*K] = wrk[I+3*K] + matina[I+3*J]*matinb[J+3*K]; - } - } - } - - for(I=0;I<3;I++) - { - for(J=0;J<3;J++) - { - matout[I+3*J] = wrk[I+3*J]; - } - } - - return; -} - -__host__ __device__ void mat3f_hodgedual(const cuvect3f &vecin, float *matout) -{ - matout[0 + 0*3] = 0.0f; - matout[1 + 0*3] = -vecin[2]; - matout[2 + 0*3] = vecin[1]; - - matout[0 + 1*3] = vecin[2]; - matout[1 + 1*3] = 0.0f; - matout[2 + 1*3] = -vecin[0]; - - matout[0 + 2*3] = -vecin[1]; - matout[1 + 2*3] = vecin[0]; - matout[2 + 2*3] = 0.0f; - return; -} - -__host__ __device__ void mat3f_hodgedual(float *matin, cuvect3f &vecout) -{ - vecout[0] = 0.5*(matin[1 + 2*3] - matin[2 + 1*3]); - vecout[1] = 0.5*(matin[2 + 0*3] - matin[0 + 2*3]); - vecout[2] = 0.5*(matin[0 + 1*3] - matin[1 + 0*3]); - - return; -} - -//returns direction cosine rotation matrix from axis and angle -__host__ __device__ void mat3f_rot_from_axisangle(cuvect3f axis, float angle, float *matout) -{ - int I; - float H[9]; - float Hsq[9]; - float II[9]; - - for(I=0;I<9;I++) II[I] = 0.0; - II[0+0*3] = 1.0; - II[1+1*3] = 1.0; - II[2+2*3] = 1.0; - - axis = cuvect3f_normalize(axis); - - mat3f_hodgedual(axis,H); - mat3f_mult(H,H,Hsq); - - for(I=0;I<9;I++) - { - matout[I] = (II[I] + Hsq[I]) + H[I]*sinf(angle) - Hsq[I]*cosf(angle); - } - - return; -} - - - __host__ void test_cudavectf_logic1() { //3 dim vector and matrix functional tests on host side diff --git a/src/amsculib2/cuvect3f_legacy.cu b/src/amsculib2/cuvect3f_legacy.cu new file mode 100644 index 0000000..f3e7d99 --- /dev/null +++ b/src/amsculib2/cuvect3f_legacy.cu @@ -0,0 +1,185 @@ +#include + +namespace amscuda +{ + + +//transposes a 3x3 (9 element) matrix +__host__ __device__ void mat3f_transpose(float *mat3inout) +{ + int I,J; + float matint[9]; + for(I=0;I<9;I++) + { + matint[I] = mat3inout[I]; + } + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + mat3inout[I+J*3] = matint[J+I*3]; + } + } + + return; +} + +//copies src to dest +__host__ __device__ void mat3f_copy(float *mat3f_dest, const float *mat3f_src) +{ + int I; + if(mat3f_dest==NULL || mat3f_src==NULL) + return; + + for(I=0;I<9;I++) + mat3f_dest[I] = mat3f_src[I]; + + return; +} + + +__host__ __device__ float mat3f_det(float *mat3in) +{ + float ret = 0.0f; + + ret = ret + mat3in[0+0*3]*mat3in[1+1*3]*mat3in[2+2*3]; + ret = ret + mat3in[0+1*3]*mat3in[1+2*3]*mat3in[2+0*3]; + ret = ret + mat3in[0+2*3]*mat3in[1+0*3]*mat3in[2+1*3]; + ret = ret - mat3in[0+0*3]*mat3in[1+2*3]*mat3in[2+1*3]; + ret = ret - mat3in[0+1*3]*mat3in[1+0*3]*mat3in[2+2*3]; + ret = ret - mat3in[0+2*3]*mat3in[1+1*3]*mat3in[2+0*3]; + + return ret; +} + +//inverts a 3x3 (9 element) matrix +__host__ __device__ void mat3f_inverse(float *mat3inout) +{ + int I; + float matint[9]; + float det = mat3f_det(mat3inout); + + for(I=0;I<9;I++) + { + matint[I] = mat3inout[I]; + } + + mat3inout[0+0*3] = (matint[1+1*3]*matint[2+2*3]-matint[1+2*3]*matint[2+1*3])/det; + mat3inout[0+1*3] = -(matint[1+0*3]*matint[2+2*3]-matint[1+2*3]*matint[2+0*3])/det; + mat3inout[0+2*3] = (matint[1+0*3]*matint[2+1*3]-matint[1+1*3]*matint[2+0*3])/det; + mat3inout[1+0*3] = -(matint[0+1*3]*matint[2+2*3]-matint[0+2*3]*matint[2+1*3])/det; + mat3inout[1+1*3] = (matint[0+0*3]*matint[2+2*3]-matint[0+2*3]*matint[2+0*3])/det; + mat3inout[1+2*3] = -(matint[0+0*3]*matint[2+1*3]-matint[0+1*3]*matint[2+0*3])/det; + mat3inout[2+0*3] = (matint[0+1*3]*matint[1+2*3]-matint[0+2*3]*matint[1+1*3])/det; + mat3inout[2+1*3] = -(matint[0+0*3]*matint[1+2*3]-matint[0+2*3]*matint[1+0*3])/det; + mat3inout[2+2*3] = (matint[0+0*3]*matint[1+1*3]-matint[0+1*3]*matint[1+0*3])/det; + + mat3f_transpose(mat3inout); + + return; +} + +__host__ __device__ cuvect3f mat3f_mult(float *mat3in, const cuvect3f &cvin) +{ + int I,J; + cuvect3f ret; + for(I=0;I<3;I++) + { + ret[I] = 0.0f; + for(J=0;J<3;J++) + { + ret[I] = ret[I] + mat3in[I+3*J]*cvin[J]; + } + } + + return ret; +} + +__host__ __device__ void mat3f_mult(float *matina, float *matinb, float *matout) +{ + float wrk[9]; + int I,J,K; + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + wrk[I+3*J] = 0.0f; + } + } + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + for(K=0;K<3;K++) + { + wrk[I+3*K] = wrk[I+3*K] + matina[I+3*J]*matinb[J+3*K]; + } + } + } + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + matout[I+3*J] = wrk[I+3*J]; + } + } + + return; +} + +__host__ __device__ void mat3f_hodgedual(const cuvect3f &vecin, float *matout) +{ + matout[0 + 0*3] = 0.0f; + matout[1 + 0*3] = -vecin[2]; + matout[2 + 0*3] = vecin[1]; + + matout[0 + 1*3] = vecin[2]; + matout[1 + 1*3] = 0.0f; + matout[2 + 1*3] = -vecin[0]; + + matout[0 + 2*3] = -vecin[1]; + matout[1 + 2*3] = vecin[0]; + matout[2 + 2*3] = 0.0f; + return; +} + +__host__ __device__ void mat3f_hodgedual(float *matin, cuvect3f &vecout) +{ + vecout[0] = 0.5*(matin[1 + 2*3] - matin[2 + 1*3]); + vecout[1] = 0.5*(matin[2 + 0*3] - matin[0 + 2*3]); + vecout[2] = 0.5*(matin[0 + 1*3] - matin[1 + 0*3]); + + return; +} + +//returns direction cosine rotation matrix from axis and angle +__host__ __device__ void mat3f_rot_from_axisangle(cuvect3f axis, float angle, float *matout) +{ + int I; + float H[9]; + float Hsq[9]; + float II[9]; + + for(I=0;I<9;I++) II[I] = 0.0f; + II[0+0*3] = 1.0; + II[1+1*3] = 1.0; + II[2+2*3] = 1.0; + + axis = cuvect3f_normalize(axis); + + mat3f_hodgedual(axis,H); + mat3f_mult(H,H,Hsq); + + for(I=0;I<9;I++) + { + matout[I] = (II[I] + Hsq[I]) + H[I]*sinf(angle) - Hsq[I]*cosf(angle); + } + + return; +} + +}; \ No newline at end of file diff --git a/src/amsculib2/cuvect4f.cu b/src/amsculib2/cuvect4f.cu index 46c60e7..0fa6f82 100644 --- a/src/amsculib2/cuvect4f.cu +++ b/src/amsculib2/cuvect4f.cu @@ -674,30 +674,30 @@ __host__ __device__ cumat4f& cumat4f::operator*=(const cumat4f &rhs) } -__host__ __device__ float cuvect4f_dot(cuvect4f a, cuvect4f b) +__host__ __device__ float cuvect4f_dot(cuvect4f &a, cuvect4f &b) { - float ret = 0.0; + float ret = 0.0f; ret = a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; return ret; } -__host__ __device__ float cuvect4f_norm(cuvect4f a) +__host__ __device__ float cuvect4f_norm(cuvect4f &a) { - float ret = 0.0; + float ret = 0.0f; ret = ::sqrtf(cuvect4f_dot(a,a)); return ret; } -__host__ __device__ cuvect4f cuvect4f_normalize(cuvect4f a) +__host__ __device__ cuvect4f cuvect4f_normalize(cuvect4f &a) { cuvect4f ret = cuvect4f(0.0f,0.0f,0.0f,0.0f); float nrm = cuvect4f_norm(a); - if(nrm>0.0) + if(nrm>0.0f) ret = a/nrm; return ret; } -__host__ __device__ cuvect4f cuvect4f_proj(cuvect4f a, cuvect4f b) +__host__ __device__ cuvect4f cuvect4f_proj(cuvect4f &a, cuvect4f &b) { cuvect4f ret; cuvect4f bn = cuvect4f_normalize(b);