diff --git a/build_linux64/libamsculib2.linux64.a b/build_linux64/libamsculib2.linux64.a index 00cf916..62524fa 100644 Binary files a/build_linux64/libamsculib2.linux64.a and b/build_linux64/libamsculib2.linux64.a differ diff --git a/build_linux64/objstore/cuvect3f.o b/build_linux64/objstore/cuvect3f.o index 04282a1..4f2708b 100644 Binary files a/build_linux64/objstore/cuvect3f.o and b/build_linux64/objstore/cuvect3f.o differ diff --git a/build_linux64/test b/build_linux64/test index 5cab227..6f13865 100644 Binary files a/build_linux64/test and b/build_linux64/test differ diff --git a/src/amsculib2/cuvect3f.cu b/src/amsculib2/cuvect3f.cu index 26996bb..3706b0b 100644 --- a/src/amsculib2/cuvect3f.cu +++ b/src/amsculib2/cuvect3f.cu @@ -348,15 +348,16 @@ __host__ __device__ cumat3f cumat3f::operator*(const float &rhs) __host__ __device__ cumat3f cumat3f::operator/(const float &rhs) { cumat3f ret; - ret.m00 = m00 / rhs; - ret.m10 = m10 / rhs; - ret.m20 = m20 / rhs; - ret.m01 = m01 / rhs; - ret.m11 = m11 / rhs; - ret.m21 = m21 / rhs; - ret.m02 = m02 / rhs; - ret.m12 = m12 / rhs; - ret.m22 = m22 / rhs; + float irhs = 1.0f/rhs; + ret.m00 = m00 * irhs; + ret.m10 = m10 * irhs; + ret.m20 = m20 * irhs; + ret.m01 = m01 * irhs; + ret.m11 = m11 * irhs; + ret.m21 = m21 * irhs; + ret.m02 = m02 * irhs; + ret.m12 = m12 * irhs; + ret.m22 = m22 * irhs; return ret; } @@ -423,17 +424,19 @@ __host__ __device__ cumat3f cumat3f::inverse() { cumat3f q; float dt = det(); + float idt; if(dt!=0.0) { - q(0,0) = (at(1,1)*at(2,2)-at(1,2)*at(2,1))/dt; - q(1,0) = -(at(1,0)*at(2,2)-at(1,2)*at(2,0))/dt; - q(2,0) = (at(1,0)*at(2,1)-at(1,1)*at(2,0))/dt; - q(0,1) = -(at(0,1)*at(2,2)-at(0,2)*at(2,1))/dt; - q(1,1) = (at(0,0)*at(2,2)-at(0,2)*at(2,0))/dt; - q(2,1) = -(at(0,0)*at(2,1)-at(0,1)*at(2,0))/dt; - q(0,2) = (at(0,1)*at(1,2)-at(0,2)*at(1,1))/dt; - q(1,2) = -(at(0,0)*at(1,2)-at(0,2)*at(1,0))/dt; - q(2,2) = (at(0,0)*at(1,1)-at(0,1)*at(1,0))/dt; + idt = 1.0f/dt; + q(0,0) = (at(1,1)*at(2,2)-at(1,2)*at(2,1))*idt; + q(1,0) = -(at(1,0)*at(2,2)-at(1,2)*at(2,0))*idt; + q(2,0) = (at(1,0)*at(2,1)-at(1,1)*at(2,0))*idt; + q(0,1) = -(at(0,1)*at(2,2)-at(0,2)*at(2,1))*idt; + q(1,1) = (at(0,0)*at(2,2)-at(0,2)*at(2,0))*idt; + q(2,1) = -(at(0,0)*at(2,1)-at(0,1)*at(2,0))*idt; + q(0,2) = (at(0,1)*at(1,2)-at(0,2)*at(1,1))*idt; + q(1,2) = -(at(0,0)*at(1,2)-at(0,2)*at(1,0))*idt; + q(2,2) = (at(0,0)*at(1,1)-at(0,1)*at(1,0))*idt; // q(0,0) = (at(1,1)*at(2,2)-at(1,2)*at(2,1))/dt; // q(0,1) = -(at(1,0)*at(2,2)-at(1,2)*at(2,0))/dt; // q(0,2) = (at(1,0)*at(2,1)-at(1,1)*at(2,0))/dt; @@ -511,15 +514,16 @@ __host__ __device__ cumat3f& cumat3f::operator-=(const cumat3f &rhs) __host__ __device__ cumat3f& cumat3f::operator/=(const float &rhs) { - m00 /= rhs; - m10 /= rhs; - m20 /= rhs; - m01 /= rhs; - m11 /= rhs; - m21 /= rhs; - m02 /= rhs; - m12 /= rhs; - m22 /= rhs; + float irhs = 1.0f/rhs; + m00 *= irhs; + m10 *= irhs; + m20 *= irhs; + m01 *= irhs; + m11 *= irhs; + m21 *= irhs; + m02 *= irhs; + m12 *= irhs; + m22 *= irhs; return *this; }