matrix math
This commit is contained in:
@ -3,5 +3,19 @@
|
||||
{
|
||||
"path": "."
|
||||
}
|
||||
]
|
||||
],
|
||||
//"editor.quickSuggestions": false
|
||||
//https://code.visualstudio.com/docs/editing/intellisense#_customizing-intellisense
|
||||
// Controls if quick suggestions should show up while typing
|
||||
"settings": {
|
||||
"editor.quickSuggestions": {
|
||||
"other": false,
|
||||
"comments": false,
|
||||
"strings": false
|
||||
},
|
||||
"editor.suggestOnTriggerCharacters": false,
|
||||
"editor.tabCompletion": "off",
|
||||
"editor.parameterHints.enabled": false,
|
||||
"editor.acceptSuggestionOnCommitCharacter": false
|
||||
}
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -35,30 +35,54 @@ namespace amscuda
|
||||
class cumat4f
|
||||
{
|
||||
public:
|
||||
float dat[16];
|
||||
//float dat[16];
|
||||
|
||||
// float m00,m10,m20,m30; //named references to force register use?
|
||||
// float m01,m11,m21,m31; //switched to column-major-order to match GLSL/lapack
|
||||
// float m02,m12,m22,m32;
|
||||
// float m03,m13,m23,m33;
|
||||
float m00,m10,m20,m30; //named references to force register use?
|
||||
float m01,m11,m21,m31; //switched to column-major-order to match GLSL/lapack
|
||||
float m02,m12,m22,m32;
|
||||
float m03,m13,m23,m33;
|
||||
|
||||
__host__ __device__ cumat4f();
|
||||
__host__ __device__ ~cumat4f();
|
||||
__host__ __device__ float& operator[](const int I);
|
||||
__host__ __device__ float& operator()(const int I, const int J);
|
||||
__host__ __device__ float& at(const int I, const int J);
|
||||
__host__ __device__ cumat4f(
|
||||
const float & _m00, const float & _m01, const float & _m02, const float & _m03,
|
||||
const float & _m10, const float & _m11, const float & _m12, const float & _m13,
|
||||
const float & _m20, const float & _m21, const float & _m22, const float & _m23,
|
||||
const float & _m30, const float & _m31, const float & _m32, const float & _m33
|
||||
);
|
||||
|
||||
__host__ __device__ cumat4f operator+(cumat4f lhs);
|
||||
__host__ __device__ cumat4f operator-(cumat4f lhs);
|
||||
__host__ __device__ cumat4f operator*(float lhs);
|
||||
__host__ __device__ cumat4f operator/(float lhs);
|
||||
__host__ __device__ cuvect4f operator*(cuvect4f lhs);
|
||||
__host__ __device__ cumat4f operator*(cumat4f lhs);
|
||||
__host__ __device__ friend cumat4f operator-(cumat4f rhs);
|
||||
__host__ __device__ explicit cumat4f(const float *data16);
|
||||
|
||||
//__forceinline__
|
||||
__host__ __device__ float& operator[](const int &I);
|
||||
__host__ __device__ float& operator()(const int &I, const int &J);
|
||||
__host__ __device__ float& at(const int &I, const int &J);
|
||||
|
||||
__host__ __device__ const float& operator[](const int &I) const;
|
||||
__host__ __device__ const float& operator()(const int &I, const int &J) const;
|
||||
__host__ __device__ const float& at(const int &I, const int &J) const;
|
||||
|
||||
__host__ __device__ cumat4f operator+(const cumat4f &rhs);
|
||||
__host__ __device__ cumat4f operator-(const cumat4f &rhs);
|
||||
__host__ __device__ cumat4f operator*(const float &rhs);
|
||||
__host__ __device__ cumat4f operator/(const float &rhs);
|
||||
__host__ __device__ cuvect4f operator*(const cuvect4f &rhs);
|
||||
__host__ __device__ cumat4f operator*(const cumat4f &rhs);
|
||||
__host__ __device__ friend cumat4f operator-(const cumat4f &rhs);
|
||||
|
||||
__host__ __device__ float det();
|
||||
__host__ __device__ cumat4f transpose();
|
||||
__host__ __device__ cumat4f inverse();
|
||||
|
||||
__host__ __device__ float* data(); //pointer to float[9] representation of matrix
|
||||
__host__ __device__ const float* data() const; //pointer to float[9] representation of matrix
|
||||
|
||||
//In place operations (to save GPU register use)
|
||||
__host__ __device__ cumat4f& operator+=(const cumat4f &rhs);
|
||||
__host__ __device__ cumat4f& operator-=(const cumat4f &rhs);
|
||||
__host__ __device__ cumat4f& operator/=(const float &rhs);
|
||||
__host__ __device__ cumat4f& operator*=(const float &rhs);
|
||||
__host__ __device__ cumat4f& operator*=(const cumat4f &rhs);
|
||||
};
|
||||
|
||||
__host__ __device__ float cuvect4f_dot(cuvect4f a, cuvect4f b);
|
||||
|
||||
@ -115,6 +115,16 @@ __host__ __device__ cuvect4f& cuvect4f::operator/=(const float &rhs)
|
||||
return *this;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4f operator-(const cuvect4f &rhs)
|
||||
{
|
||||
cuvect4f ret;
|
||||
ret[0] = -rhs[0];
|
||||
ret[1] = -rhs[1];
|
||||
ret[2] = -rhs[2];
|
||||
ret[3] = -rhs[3];
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ cuvect4f::cuvect4f(const float &_x, const float &_y, const float &_z, const float &_w)
|
||||
{
|
||||
@ -122,302 +132,545 @@ __host__ __device__ cuvect4f::cuvect4f(const float &_x, const float &_y, const f
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ cumat4f::cumat4f()
|
||||
{
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
dat[I] = 0.0;
|
||||
}
|
||||
m00 = 0.0f;
|
||||
m10 = 0.0f;
|
||||
m20 = 0.0f;
|
||||
m30 = 0.0f;
|
||||
|
||||
m01 = 0.0f;
|
||||
m11 = 0.0f;
|
||||
m21 = 0.0f;
|
||||
m31 = 0.0f;
|
||||
|
||||
m02 = 0.0f;
|
||||
m12 = 0.0f;
|
||||
m22 = 0.0f;
|
||||
m32 = 0.0f;
|
||||
|
||||
m03 = 0.0f;
|
||||
m13 = 0.0f;
|
||||
m23 = 0.0f;
|
||||
m33 = 0.0f;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f::~cumat4f()
|
||||
{
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
dat[I] = 0.0;
|
||||
}
|
||||
m00 = 0.0f;
|
||||
m10 = 0.0f;
|
||||
m20 = 0.0f;
|
||||
m30 = 0.0f;
|
||||
|
||||
m01 = 0.0f;
|
||||
m11 = 0.0f;
|
||||
m21 = 0.0f;
|
||||
m31 = 0.0f;
|
||||
|
||||
m02 = 0.0f;
|
||||
m12 = 0.0f;
|
||||
m22 = 0.0f;
|
||||
m32 = 0.0f;
|
||||
|
||||
m03 = 0.0f;
|
||||
m13 = 0.0f;
|
||||
m23 = 0.0f;
|
||||
m33 = 0.0f;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ float& cumat4f::operator[](const int I)
|
||||
__host__ __device__ cumat4f::cumat4f(
|
||||
const float & _m00, const float & _m01, const float & _m02, const float & _m03,
|
||||
const float & _m10, const float & _m11, const float & _m12, const float & _m13,
|
||||
const float & _m20, const float & _m21, const float & _m22, const float & _m23,
|
||||
const float & _m30, const float & _m31, const float & _m32, const float & _m33
|
||||
)
|
||||
{
|
||||
return dat[I];
|
||||
m00 = _m00;
|
||||
m10 = _m10;
|
||||
m20 = _m20;
|
||||
m30 = _m30;
|
||||
|
||||
m01 = _m01;
|
||||
m11 = _m11;
|
||||
m21 = _m21;
|
||||
m31 = _m31;
|
||||
|
||||
m02 = _m02;
|
||||
m12 = _m12;
|
||||
m22 = _m22;
|
||||
m32 = _m32;
|
||||
|
||||
m03 = _m03;
|
||||
m13 = _m13;
|
||||
m23 = _m23;
|
||||
m33 = _m33;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ float& cumat4f::operator()(const int I, const int J)
|
||||
__host__ __device__ cumat4f::cumat4f(const float *data16)
|
||||
{
|
||||
return dat[I+4*J];
|
||||
m00 = data16[0];
|
||||
m10 = data16[1];
|
||||
m20 = data16[2];
|
||||
m30 = data16[3];
|
||||
|
||||
m01 = data16[4];
|
||||
m11 = data16[5];
|
||||
m21 = data16[6];
|
||||
m31 = data16[7];
|
||||
|
||||
m02 = data16[8];
|
||||
m12 = data16[9];
|
||||
m22 = data16[10];
|
||||
m32 = data16[11];
|
||||
|
||||
m03 = data16[12];
|
||||
m13 = data16[13];
|
||||
m23 = data16[14];
|
||||
m33 = data16[15];
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ float& cumat4f::at(const int I, const int J)
|
||||
__host__ __device__ float& cumat4f::operator[](const int &I)
|
||||
{
|
||||
return dat[I+4*J];
|
||||
switch(I)
|
||||
{
|
||||
case 0: return m00;
|
||||
case 1: return m10;
|
||||
case 2: return m20;
|
||||
case 3: return m30;
|
||||
|
||||
case 4: return m01;
|
||||
case 5: return m11;
|
||||
case 6: return m21;
|
||||
case 7: return m31;
|
||||
|
||||
case 8: return m02;
|
||||
case 9: return m12;
|
||||
case 10: return m22;
|
||||
case 11: return m32;
|
||||
|
||||
case 12: return m03;
|
||||
case 13: return m13;
|
||||
case 14: return m23;
|
||||
case 15: return m33;
|
||||
}
|
||||
|
||||
return m00;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::operator+(cumat4f lhs)
|
||||
__host__ __device__ float& cumat4f::operator()(const int &I, const int &J)
|
||||
{
|
||||
return (*this)[I+4*J];
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ float& cumat4f::at(const int &I, const int &J)
|
||||
{
|
||||
return (*this)[I+4*J];
|
||||
}
|
||||
|
||||
__host__ __device__ const float& cumat4f::operator[](const int &I) const
|
||||
{
|
||||
switch(I)
|
||||
{
|
||||
case 0: return m00;
|
||||
case 1: return m10;
|
||||
case 2: return m20;
|
||||
case 3: return m30;
|
||||
|
||||
case 4: return m01;
|
||||
case 5: return m11;
|
||||
case 6: return m21;
|
||||
case 7: return m31;
|
||||
|
||||
case 8: return m02;
|
||||
case 9: return m12;
|
||||
case 10: return m22;
|
||||
case 11: return m32;
|
||||
|
||||
case 12: return m03;
|
||||
case 13: return m13;
|
||||
case 14: return m23;
|
||||
case 15: return m33;
|
||||
}
|
||||
|
||||
return m00;
|
||||
}
|
||||
|
||||
__host__ __device__ const float& cumat4f::operator()(const int &I, const int &J) const
|
||||
{
|
||||
return (*this)[I+4*J];
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ const float& cumat4f::at(const int &I, const int &J) const
|
||||
{
|
||||
return (*this)[I+4*J];
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::operator+(const cumat4f &rhs)
|
||||
{
|
||||
cumat4f ret;
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I] + lhs.dat[I];
|
||||
}
|
||||
ret.m00 = m00 + rhs.m00;
|
||||
ret.m10 = m10 + rhs.m10;
|
||||
ret.m20 = m20 + rhs.m20;
|
||||
ret.m30 = m30 + rhs.m30;
|
||||
ret.m01 = m01 + rhs.m01;
|
||||
ret.m11 = m11 + rhs.m11;
|
||||
ret.m21 = m21 + rhs.m21;
|
||||
ret.m31 = m31 + rhs.m31;
|
||||
ret.m02 = m02 + rhs.m02;
|
||||
ret.m12 = m12 + rhs.m12;
|
||||
ret.m22 = m22 + rhs.m22;
|
||||
ret.m32 = m32 + rhs.m32;
|
||||
ret.m03 = m03 + rhs.m03;
|
||||
ret.m13 = m13 + rhs.m13;
|
||||
ret.m23 = m23 + rhs.m23;
|
||||
ret.m33 = m33 + rhs.m33;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::operator-(cumat4f lhs)
|
||||
__host__ __device__ cumat4f cumat4f::operator-(const cumat4f &rhs)
|
||||
{
|
||||
cumat4f ret;
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I] - lhs.dat[I];
|
||||
}
|
||||
ret.m00 = m00 - rhs.m00;
|
||||
ret.m10 = m10 - rhs.m10;
|
||||
ret.m20 = m20 - rhs.m20;
|
||||
ret.m30 = m30 - rhs.m30;
|
||||
ret.m01 = m01 - rhs.m01;
|
||||
ret.m11 = m11 - rhs.m11;
|
||||
ret.m21 = m21 - rhs.m21;
|
||||
ret.m31 = m31 - rhs.m31;
|
||||
ret.m02 = m02 - rhs.m02;
|
||||
ret.m12 = m12 - rhs.m12;
|
||||
ret.m22 = m22 - rhs.m22;
|
||||
ret.m32 = m32 - rhs.m32;
|
||||
ret.m03 = m03 - rhs.m03;
|
||||
ret.m13 = m13 - rhs.m13;
|
||||
ret.m23 = m23 - rhs.m23;
|
||||
ret.m33 = m33 - rhs.m33;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::operator*(float lhs)
|
||||
__host__ __device__ cumat4f cumat4f::operator*(const float &rhs)
|
||||
{
|
||||
cumat4f ret;
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I]*lhs;
|
||||
}
|
||||
ret.m00 = m00 * rhs;
|
||||
ret.m10 = m10 * rhs;
|
||||
ret.m20 = m20 * rhs;
|
||||
ret.m30 = m30 * rhs;
|
||||
ret.m01 = m01 * rhs;
|
||||
ret.m11 = m11 * rhs;
|
||||
ret.m21 = m21 * rhs;
|
||||
ret.m31 = m31 * rhs;
|
||||
ret.m02 = m02 * rhs;
|
||||
ret.m12 = m12 * rhs;
|
||||
ret.m22 = m22 * rhs;
|
||||
ret.m32 = m32 * rhs;
|
||||
ret.m03 = m03 * rhs;
|
||||
ret.m13 = m13 * rhs;
|
||||
ret.m23 = m23 * rhs;
|
||||
ret.m33 = m33 * rhs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::operator/(float lhs)
|
||||
__host__ __device__ cumat4f cumat4f::operator/(const float &rhs)
|
||||
{
|
||||
cumat4f ret;
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I]/lhs;
|
||||
}
|
||||
ret.m00 = m00 / rhs;
|
||||
ret.m10 = m10 / rhs;
|
||||
ret.m20 = m20 / rhs;
|
||||
ret.m30 = m30 / rhs;
|
||||
ret.m01 = m01 / rhs;
|
||||
ret.m11 = m11 / rhs;
|
||||
ret.m21 = m21 / rhs;
|
||||
ret.m31 = m31 / rhs;
|
||||
ret.m02 = m02 / rhs;
|
||||
ret.m12 = m12 / rhs;
|
||||
ret.m22 = m22 / rhs;
|
||||
ret.m32 = m32 / rhs;
|
||||
ret.m03 = m03 / rhs;
|
||||
ret.m13 = m13 / rhs;
|
||||
ret.m23 = m23 / rhs;
|
||||
ret.m33 = m33 / rhs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4f cumat4f::operator*(cuvect4f lhs)
|
||||
__host__ __device__ cuvect4f cumat4f::operator*(const cuvect4f &rhs)
|
||||
{
|
||||
cuvect4f ret = cuvect4f(0.0,0.0,0.0,0.0);
|
||||
int I,J;
|
||||
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
for(J=0;J<4;J++)
|
||||
{
|
||||
ret[I] = ret[I] + this->at(I,J)*lhs[J];
|
||||
}
|
||||
}
|
||||
|
||||
cuvect4f ret;
|
||||
ret.x = m00*rhs.x + m01*rhs.y + m02*rhs.z + m03*rhs.w;
|
||||
ret.y = m10*rhs.x + m11*rhs.y + m12*rhs.z + m13*rhs.w;
|
||||
ret.z = m20*rhs.x + m21*rhs.y + m22*rhs.z + m23*rhs.w;
|
||||
ret.w = m30*rhs.x + m31*rhs.y + m32*rhs.z + m33*rhs.w;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::operator*(cumat4f lhs)
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::operator*(const cumat4f &rhs)
|
||||
{
|
||||
cumat4f ret;
|
||||
int I,J,K;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
for(J=0;J<4;J++)
|
||||
{
|
||||
ret(I,J) = 0;
|
||||
for(K=0;K<4;K++)
|
||||
{
|
||||
ret(I,J) = ret(I,J) + this->at(I,K) * lhs(K,J);
|
||||
}
|
||||
}
|
||||
}
|
||||
ret.m00 = m00*rhs.m00 + m01*rhs.m10 + m02*rhs.m20 + m03*rhs.m30;
|
||||
ret.m01 = m00*rhs.m01 + m01*rhs.m11 + m02*rhs.m21 + m03*rhs.m31;
|
||||
ret.m02 = m00*rhs.m02 + m01*rhs.m12 + m02*rhs.m22 + m03*rhs.m32;
|
||||
ret.m03 = m00*rhs.m03 + m01*rhs.m13 + m02*rhs.m23 + m03*rhs.m33;
|
||||
ret.m10 = m10*rhs.m00 + m11*rhs.m10 + m12*rhs.m20 + m13*rhs.m30;
|
||||
ret.m11 = m10*rhs.m01 + m11*rhs.m11 + m12*rhs.m21 + m13*rhs.m31;
|
||||
ret.m12 = m10*rhs.m02 + m11*rhs.m12 + m12*rhs.m22 + m13*rhs.m32;
|
||||
ret.m13 = m10*rhs.m03 + m11*rhs.m13 + m12*rhs.m23 + m13*rhs.m33;
|
||||
ret.m20 = m20*rhs.m00 + m21*rhs.m10 + m22*rhs.m20 + m23*rhs.m30;
|
||||
ret.m21 = m20*rhs.m01 + m21*rhs.m11 + m22*rhs.m21 + m23*rhs.m31;
|
||||
ret.m22 = m20*rhs.m02 + m21*rhs.m12 + m22*rhs.m22 + m23*rhs.m32;
|
||||
ret.m23 = m20*rhs.m03 + m21*rhs.m13 + m22*rhs.m23 + m23*rhs.m33;
|
||||
ret.m30 = m30*rhs.m00 + m31*rhs.m10 + m32*rhs.m20 + m33*rhs.m30;
|
||||
ret.m31 = m30*rhs.m01 + m31*rhs.m11 + m32*rhs.m21 + m33*rhs.m31;
|
||||
ret.m32 = m30*rhs.m02 + m31*rhs.m12 + m32*rhs.m22 + m33*rhs.m32;
|
||||
ret.m33 = m30*rhs.m03 + m31*rhs.m13 + m32*rhs.m23 + m33*rhs.m33;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::transpose()
|
||||
{
|
||||
cumat4f q;
|
||||
int I,J;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
for(J=0;J<4;J++)
|
||||
{
|
||||
q(I,J) = this->at(J,I);
|
||||
}
|
||||
}
|
||||
q.m00 = m00;
|
||||
q.m10 = m01;
|
||||
q.m20 = m02;
|
||||
q.m30 = m03;
|
||||
|
||||
q.m01 = m10;
|
||||
q.m11 = m11;
|
||||
q.m21 = m12;
|
||||
q.m31 = m13;
|
||||
|
||||
q.m02 = m20;
|
||||
q.m12 = m21;
|
||||
q.m22 = m22;
|
||||
q.m32 = m23;
|
||||
|
||||
q.m03 = m30;
|
||||
q.m13 = m31;
|
||||
q.m23 = m32;
|
||||
q.m33 = m33;
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
__host__ __device__ float cumat4f::det()
|
||||
{
|
||||
float a00,a01,a02,a03;
|
||||
float a10,a11,a12,a13;
|
||||
float a20,a21,a22,a23;
|
||||
float a30,a31,a32,a33;
|
||||
float det;
|
||||
float dt;
|
||||
dt = m03*m12*m21*m30 -
|
||||
m02*m13*m21*m30 -
|
||||
m03*m11*m22*m30 +
|
||||
m01*m13*m22*m30 +
|
||||
m02*m11*m23*m30 -
|
||||
m01*m12*m23*m30 -
|
||||
m03*m12*m20*m31 +
|
||||
m02*m13*m20*m31 +
|
||||
m03*m10*m22*m31 -
|
||||
m00*m13*m22*m31 -
|
||||
m02*m10*m23*m31 +
|
||||
m00*m12*m23*m31 +
|
||||
m03*m11*m20*m32 -
|
||||
m01*m13*m20*m32 -
|
||||
m03*m10*m21*m32 +
|
||||
m00*m13*m21*m32 +
|
||||
m01*m10*m23*m32 -
|
||||
m00*m11*m23*m32 -
|
||||
m02*m11*m20*m33 +
|
||||
m01*m12*m20*m33 +
|
||||
m02*m10*m21*m33 -
|
||||
m00*m12*m21*m33 -
|
||||
m01*m10*m22*m33 +
|
||||
m00*m11*m22*m33;
|
||||
|
||||
a00 = this->at(0,0);
|
||||
a01 = this->at(0,1);
|
||||
a02 = this->at(0,2);
|
||||
a03 = this->at(0,3);
|
||||
a10 = this->at(1,0);
|
||||
a11 = this->at(1,1);
|
||||
a12 = this->at(1,2);
|
||||
a13 = this->at(1,3);
|
||||
a20 = this->at(2,0);
|
||||
a21 = this->at(2,1);
|
||||
a22 = this->at(2,2);
|
||||
a23 = this->at(2,3);
|
||||
a30 = this->at(3,0);
|
||||
a31 = this->at(3,1);
|
||||
a32 = this->at(3,2);
|
||||
a33 = this->at(3,3);
|
||||
|
||||
det = a03*a12*a21*a30 -
|
||||
a02*a13*a21*a30 -
|
||||
a03*a11*a22*a30 +
|
||||
a01*a13*a22*a30 +
|
||||
a02*a11*a23*a30 -
|
||||
a01*a12*a23*a30 -
|
||||
a03*a12*a20*a31 +
|
||||
a02*a13*a20*a31 +
|
||||
a03*a10*a22*a31 -
|
||||
a00*a13*a22*a31 -
|
||||
a02*a10*a23*a31 +
|
||||
a00*a12*a23*a31 +
|
||||
a03*a11*a20*a32 -
|
||||
a01*a13*a20*a32 -
|
||||
a03*a10*a21*a32 +
|
||||
a00*a13*a21*a32 +
|
||||
a01*a10*a23*a32 -
|
||||
a00*a11*a23*a32 -
|
||||
a02*a11*a20*a33 +
|
||||
a01*a12*a20*a33 +
|
||||
a02*a10*a21*a33 -
|
||||
a00*a12*a21*a33 -
|
||||
a01*a10*a22*a33 +
|
||||
a00*a11*a22*a33;
|
||||
|
||||
return det;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f minverse(cumat4f ma)
|
||||
{
|
||||
cumat4f mb;
|
||||
|
||||
float a00,a01,a02,a03;
|
||||
float a10,a11,a12,a13;
|
||||
float a20,a21,a22,a23;
|
||||
float a30,a31,a32,a33;
|
||||
|
||||
float b00,b01,b02,b03;
|
||||
float b10,b11,b12,b13;
|
||||
float b20,b21,b22,b23;
|
||||
float b30,b31,b32,b33;
|
||||
|
||||
float det = 0.0;
|
||||
|
||||
a00 = ma.at(0,0);
|
||||
a01 = ma.at(0,1);
|
||||
a02 = ma.at(0,2);
|
||||
a03 = ma.at(0,3);
|
||||
a10 = ma.at(1,0);
|
||||
a11 = ma.at(1,1);
|
||||
a12 = ma.at(1,2);
|
||||
a13 = ma.at(1,3);
|
||||
a20 = ma.at(2,0);
|
||||
a21 = ma.at(2,1);
|
||||
a22 = ma.at(2,2);
|
||||
a23 = ma.at(2,3);
|
||||
a30 = ma.at(3,0);
|
||||
a31 = ma.at(3,1);
|
||||
a32 = ma.at(3,2);
|
||||
a33 = ma.at(3,3);
|
||||
|
||||
det = a03*a12*a21*a30 -
|
||||
a02*a13*a21*a30 -
|
||||
a03*a11*a22*a30 +
|
||||
a01*a13*a22*a30 +
|
||||
a02*a11*a23*a30 -
|
||||
a01*a12*a23*a30 -
|
||||
a03*a12*a20*a31 +
|
||||
a02*a13*a20*a31 +
|
||||
a03*a10*a22*a31 -
|
||||
a00*a13*a22*a31 -
|
||||
a02*a10*a23*a31 +
|
||||
a00*a12*a23*a31 +
|
||||
a03*a11*a20*a32 -
|
||||
a01*a13*a20*a32 -
|
||||
a03*a10*a21*a32 +
|
||||
a00*a13*a21*a32 +
|
||||
a01*a10*a23*a32 -
|
||||
a00*a11*a23*a32 -
|
||||
a02*a11*a20*a33 +
|
||||
a01*a12*a20*a33 +
|
||||
a02*a10*a21*a33 -
|
||||
a00*a12*a21*a33 -
|
||||
a01*a10*a22*a33 +
|
||||
a00*a11*a22*a33;
|
||||
|
||||
if(det*det>1.0E-30)
|
||||
{
|
||||
b00 = -a13*a22*a31 + a12*a23*a31 + a13*a21*a32 - a11*a23*a32 - a12*a21*a33 + a11*a22*a33;
|
||||
b01 = a03*a22*a31 - a02*a23*a31 - a03*a21*a32 + a01*a23*a32 + a02*a21*a33 - a01*a22*a33;
|
||||
b02 = -a03*a12*a31 + a02*a13*a31 + a03*a11*a32 - a01*a13*a32 - a02*a11*a33 + a01*a12*a33;
|
||||
b03 = a03*a12*a21 - a02*a13*a21 - a03*a11*a22 + a01*a13*a22 + a02*a11*a23 - a01*a12*a23;
|
||||
b10 = a13*a22*a30 - a12*a23*a30 - a13*a20*a32 + a10*a23*a32 + a12*a20*a33 - a10*a22*a33;
|
||||
b11 = -a03*a22*a30 + a02*a23*a30 + a03*a20*a32 - a00*a23*a32 - a02*a20*a33 + a00*a22*a33;
|
||||
b12 = a03*a12*a30 - a02*a13*a30 - a03*a10*a32 + a00*a13*a32 + a02*a10*a33 - a00*a12*a33;
|
||||
b13 = -a03*a12*a20 + a02*a13*a20 + a03*a10*a22 - a00*a13*a22 - a02*a10*a23 + a00*a12*a23;
|
||||
b20 = -a13*a21*a30 + a11*a23*a30 + a13*a20*a31 - a10*a23*a31 - a11*a20*a33 + a10*a21*a33;
|
||||
b21 = a03*a21*a30 - a01*a23*a30 - a03*a20*a31 + a00*a23*a31 + a01*a20*a33 - a00*a21*a33;
|
||||
b22 = -a03*a11*a30 + a01*a13*a30 + a03*a10*a31 - a00*a13*a31 - a01*a10*a33 + a00*a11*a33;
|
||||
b23 = a03*a11*a20 - a01*a13*a20 - a03*a10*a21 + a00*a13*a21 + a01*a10*a23 - a00*a11*a23;
|
||||
b30 = a12*a21*a30 - a11*a22*a30 - a12*a20*a31 + a10*a22*a31 + a11*a20*a32 - a10*a21*a32;
|
||||
b31 = -a02*a21*a30 + a01*a22*a30 + a02*a20*a31 - a00*a22*a31 - a01*a20*a32 + a00*a21*a32;
|
||||
b32 = a02*a11*a30 - a01*a12*a30 - a02*a10*a31 + a00*a12*a31 + a01*a10*a32 - a00*a11*a32;
|
||||
b33 = -a02*a11*a20 + a01*a12*a20 + a02*a10*a21 - a00*a12*a21 - a01*a10*a22 + a00*a11*a22;
|
||||
b00 = b00/det;
|
||||
b01 = b01/det;
|
||||
b02 = b02/det;
|
||||
b03 = b03/det;
|
||||
b10 = b10/det;
|
||||
b11 = b11/det;
|
||||
b12 = b12/det;
|
||||
b13 = b13/det;
|
||||
b20 = b20/det;
|
||||
b21 = b21/det;
|
||||
b22 = b22/det;
|
||||
b23 = b23/det;
|
||||
b30 = b30/det;
|
||||
b31 = b31/det;
|
||||
b32 = b32/det;
|
||||
b33 = b33/det;
|
||||
mb.at(0,0) = b00;
|
||||
mb.at(0,1) = b01;
|
||||
mb.at(0,2) = b02;
|
||||
mb.at(0,3) = b03;
|
||||
mb.at(1,0) = b10;
|
||||
mb.at(1,1) = b11;
|
||||
mb.at(1,2) = b12;
|
||||
mb.at(1,3) = b13;
|
||||
mb.at(2,0) = b20;
|
||||
mb.at(2,1) = b21;
|
||||
mb.at(2,2) = b22;
|
||||
mb.at(2,3) = b23;
|
||||
mb.at(3,0) = b30;
|
||||
mb.at(3,1) = b31;
|
||||
mb.at(3,2) = b32;
|
||||
mb.at(3,3) = b33;
|
||||
}
|
||||
//this was STUPID. Gaah. Computer algebra system saves the day? I'd be surprised if this didn't end up *slower* than gaussian elimination. Don't do this again!
|
||||
return mb;
|
||||
return dt;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::inverse()
|
||||
{
|
||||
return minverse(*this);
|
||||
cumat4f mret;
|
||||
float dt = this->det();
|
||||
float idet;
|
||||
|
||||
if(dt>1E-7 || dt<-1E-7)
|
||||
{
|
||||
idet = 1.0f/dt;
|
||||
mret.m00 = -m13*m22*m31 + m12*m23*m31 + m13*m21*m32 - m11*m23*m32 - m12*m21*m33 + m11*m22*m33;
|
||||
mret.m01 = m03*m22*m31 - m02*m23*m31 - m03*m21*m32 + m01*m23*m32 + m02*m21*m33 - m01*m22*m33;
|
||||
mret.m02 = -m03*m12*m31 + m02*m13*m31 + m03*m11*m32 - m01*m13*m32 - m02*m11*m33 + m01*m12*m33;
|
||||
mret.m03 = m03*m12*m21 - m02*m13*m21 - m03*m11*m22 + m01*m13*m22 + m02*m11*m23 - m01*m12*m23;
|
||||
mret.m10 = m13*m22*m30 - m12*m23*m30 - m13*m20*m32 + m10*m23*m32 + m12*m20*m33 - m10*m22*m33;
|
||||
mret.m11 = -m03*m22*m30 + m02*m23*m30 + m03*m20*m32 - m00*m23*m32 - m02*m20*m33 + m00*m22*m33;
|
||||
mret.m12 = m03*m12*m30 - m02*m13*m30 - m03*m10*m32 + m00*m13*m32 + m02*m10*m33 - m00*m12*m33;
|
||||
mret.m13 = -m03*m12*m20 + m02*m13*m20 + m03*m10*m22 - m00*m13*m22 - m02*m10*m23 + m00*m12*m23;
|
||||
mret.m20 = -m13*m21*m30 + m11*m23*m30 + m13*m20*m31 - m10*m23*m31 - m11*m20*m33 + m10*m21*m33;
|
||||
mret.m21 = m03*m21*m30 - m01*m23*m30 - m03*m20*m31 + m00*m23*m31 + m01*m20*m33 - m00*m21*m33;
|
||||
mret.m22 = -m03*m11*m30 + m01*m13*m30 + m03*m10*m31 - m00*m13*m31 - m01*m10*m33 + m00*m11*m33;
|
||||
mret.m23 = m03*m11*m20 - m01*m13*m20 - m03*m10*m21 + m00*m13*m21 + m01*m10*m23 - m00*m11*m23;
|
||||
mret.m30 = m12*m21*m30 - m11*m22*m30 - m12*m20*m31 + m10*m22*m31 + m11*m20*m32 - m10*m21*m32;
|
||||
mret.m31 = -m02*m21*m30 + m01*m22*m30 + m02*m20*m31 - m00*m22*m31 - m01*m20*m32 + m00*m21*m32;
|
||||
mret.m32 = m02*m11*m30 - m01*m12*m30 - m02*m10*m31 + m00*m12*m31 + m01*m10*m32 - m00*m11*m32;
|
||||
mret.m33 = -m02*m11*m20 + m01*m12*m20 + m02*m10*m21 - m00*m12*m21 - m01*m10*m22 + m00*m11*m22;
|
||||
mret.m00 *= idet;
|
||||
mret.m01 *= idet;
|
||||
mret.m02 *= idet;
|
||||
mret.m03 *= idet;
|
||||
mret.m10 *= idet;
|
||||
mret.m11 *= idet;
|
||||
mret.m12 *= idet;
|
||||
mret.m13 *= idet;
|
||||
mret.m20 *= idet;
|
||||
mret.m21 *= idet;
|
||||
mret.m22 *= idet;
|
||||
mret.m23 *= idet;
|
||||
mret.m30 *= idet;
|
||||
mret.m31 *= idet;
|
||||
mret.m32 *= idet;
|
||||
mret.m33 *= idet;
|
||||
}
|
||||
else
|
||||
{
|
||||
mret.m00 = inf;
|
||||
mret.m10 = inf;
|
||||
mret.m20 = inf;
|
||||
mret.m30 = inf;
|
||||
mret.m01 = inf;
|
||||
mret.m11 = inf;
|
||||
mret.m21 = inf;
|
||||
mret.m31 = inf;
|
||||
mret.m02 = inf;
|
||||
mret.m12 = inf;
|
||||
mret.m22 = inf;
|
||||
mret.m32 = inf;
|
||||
mret.m03 = inf;
|
||||
mret.m13 = inf;
|
||||
mret.m23 = inf;
|
||||
mret.m33 = inf;
|
||||
}
|
||||
//this was STUPID. Gaah. Computer algebra system saves the day? I'd be surprised if this didn't end up *slower* than gaussian elimination. Don't do this again!
|
||||
|
||||
return mret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f operator-(const cumat4f &rhs)
|
||||
{
|
||||
cumat4f ret;
|
||||
ret.m00 = -rhs.m00;
|
||||
ret.m10 = -rhs.m10;
|
||||
ret.m20 = -rhs.m20;
|
||||
ret.m30 = -rhs.m30;
|
||||
ret.m01 = -rhs.m01;
|
||||
ret.m11 = -rhs.m11;
|
||||
ret.m21 = -rhs.m21;
|
||||
ret.m31 = -rhs.m31;
|
||||
ret.m02 = -rhs.m02;
|
||||
ret.m12 = -rhs.m12;
|
||||
ret.m22 = -rhs.m22;
|
||||
ret.m32 = -rhs.m32;
|
||||
ret.m03 = -rhs.m03;
|
||||
ret.m13 = -rhs.m13;
|
||||
ret.m23 = -rhs.m23;
|
||||
ret.m33 = -rhs.m33;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f& cumat4f::operator+=(const cumat4f &rhs)
|
||||
{
|
||||
m00 += rhs.m00;
|
||||
m01 += rhs.m01;
|
||||
m02 += rhs.m02;
|
||||
m03 += rhs.m03;
|
||||
m10 += rhs.m10;
|
||||
m11 += rhs.m11;
|
||||
m12 += rhs.m12;
|
||||
m13 += rhs.m13;
|
||||
m20 += rhs.m20;
|
||||
m21 += rhs.m21;
|
||||
m22 += rhs.m22;
|
||||
m23 += rhs.m23;
|
||||
m30 += rhs.m30;
|
||||
m31 += rhs.m31;
|
||||
m32 += rhs.m32;
|
||||
m33 += rhs.m33;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f& cumat4f::operator-=(const cumat4f &rhs)
|
||||
{
|
||||
m00 -= rhs.m00;
|
||||
m01 -= rhs.m01;
|
||||
m02 -= rhs.m02;
|
||||
m03 -= rhs.m03;
|
||||
m10 -= rhs.m10;
|
||||
m11 -= rhs.m11;
|
||||
m12 -= rhs.m12;
|
||||
m13 -= rhs.m13;
|
||||
m20 -= rhs.m20;
|
||||
m21 -= rhs.m21;
|
||||
m22 -= rhs.m22;
|
||||
m23 -= rhs.m23;
|
||||
m30 -= rhs.m30;
|
||||
m31 -= rhs.m31;
|
||||
m32 -= rhs.m32;
|
||||
m33 -= rhs.m33;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f& cumat4f::operator*=(const float &rhs)
|
||||
{
|
||||
m00 *= rhs;
|
||||
m01 *= rhs;
|
||||
m02 *= rhs;
|
||||
m03 *= rhs;
|
||||
m10 *= rhs;
|
||||
m11 *= rhs;
|
||||
m12 *= rhs;
|
||||
m13 *= rhs;
|
||||
m20 *= rhs;
|
||||
m21 *= rhs;
|
||||
m22 *= rhs;
|
||||
m23 *= rhs;
|
||||
m30 *= rhs;
|
||||
m31 *= rhs;
|
||||
m32 *= rhs;
|
||||
m33 *= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f& cumat4f::operator/=(const float &rhs)
|
||||
{
|
||||
float irhs = 1.0f / rhs;
|
||||
m00 *= irhs;
|
||||
m01 *= irhs;
|
||||
m02 *= irhs;
|
||||
m03 *= irhs;
|
||||
m10 *= irhs;
|
||||
m11 *= irhs;
|
||||
m12 *= irhs;
|
||||
m13 *= irhs;
|
||||
m20 *= irhs;
|
||||
m21 *= irhs;
|
||||
m22 *= irhs;
|
||||
m23 *= irhs;
|
||||
m30 *= irhs;
|
||||
m31 *= irhs;
|
||||
m32 *= irhs;
|
||||
m33 *= irhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f& cumat4f::operator*=(const cumat4f &rhs)
|
||||
{
|
||||
cumat4f tmp = (*this);
|
||||
(*this) = tmp*rhs;
|
||||
return *this;;
|
||||
}
|
||||
|
||||
|
||||
@ -453,22 +706,7 @@ __host__ __device__ cuvect4f cuvect4f_proj(cuvect4f a, cuvect4f b)
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4f operator-(cuvect4f rhs)
|
||||
{
|
||||
cuvect4f ret;
|
||||
ret[0] = -rhs[0];
|
||||
ret[1] = -rhs[1];
|
||||
ret[2] = -rhs[2];
|
||||
ret[3] = -rhs[3];
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f operator-(cumat4f rhs)
|
||||
{
|
||||
cumat4f ret;
|
||||
int I;
|
||||
for(I=0;I<16;I++) ret[I] = -rhs[I];
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
}; //namespace amscuda
|
||||
|
||||
Reference in New Issue
Block a user