codegenerated matrix logic

This commit is contained in:
2026-04-13 11:42:40 -04:00
parent 978dad618f
commit 17a1c3f84a
30 changed files with 6468 additions and 1291 deletions

View File

@ -45,8 +45,8 @@ namespace amscuda
__host__ __device__ cumat2f();
__host__ __device__ ~cumat2f();
__host__ __device__ cumat2f(
const float& _m00, const float& _m10,
const float& _m01, const float& _m11
const float& _m00, const float& _m01,
const float& _m10, const float& _m11
);
__host__ __device__ cumat2f(const float* data4);

View File

@ -11,79 +11,76 @@ namespace amscuda
float y;
float z;
__host__ __device__ cuvec3f();
__host__ __device__ ~cuvec3f();
__host__ __device__ cuvec3f(const float &_x, const float &_y, const float &_z);
__host__ __device__ cuvec3f();
__host__ __device__ ~cuvec3f();
__host__ __device__ cuvec3f(const float &_x, const float &_y, const float &_z);
__host__ __device__ float& operator[](const int &I);
__host__ __device__ const float& operator[](const int &I) const;
__host__ __device__ cuvec3f operator+(const cuvec3f &rhs);
__host__ __device__ cuvec3f operator-(const cuvec3f &rhs);
__host__ __device__ friend cuvec3f operator*(const cuvec3f& lhs, const float &rhs);
__host__ __device__ friend cuvec3f operator/(const cuvec3f& lhs, const float &rhs);
__host__ __device__ friend cuvec3f operator*(const float& lhs, const cuvec3f &rhs);
__host__ __device__ friend cuvec3f operator/(const float& lhs, const cuvec3f &rhs);
__host__ __device__ friend cuvec3f operator-(const cuvec3f &rhs);
__host__ __device__ cuvec3f& operator+=(const cuvec3f &rhs);
__host__ __device__ cuvec3f& operator-=(const cuvec3f &rhs);
__host__ __device__ cuvec3f& operator/=(const float &rhs);
__host__ __device__ cuvec3f& operator*=(const float &rhs);
__host__ __device__ cuvec3f operator+(const cuvec3f& rhs) const;
__host__ __device__ cuvec3f operator-(const cuvec3f& rhs) const;
__host__ __device__ cuvec3f operator*(const cuvec3f& rhs) const; //elementwise product
__host__ __device__ cuvec3f operator/(const cuvec3f& rhs) const; //elementwise division
__host__ __device__ friend cuvec3f operator*(const cuvec3f& lhs, const float& rhs);
__host__ __device__ friend cuvec3f operator*(const float& lhs, const cuvec3f& rhs);
__host__ __device__ friend cuvec3f operator/(const cuvec3f& lhs, const float& rhs);
__host__ __device__ friend cuvec3f operator/(const float& lhs, const cuvec3f& rhs);
__host__ __device__ friend cuvec3f operator-(const cuvec3f& other);
__host__ __device__ cuvec3f& operator+=(const cuvec3f& rhs);
__host__ __device__ cuvec3f& operator-=(const cuvec3f& rhs);
__host__ __device__ cuvec3f& operator*=(const float& rhs);
__host__ __device__ cuvec3f& operator/=(const float& rhs);
};
class cumat3f
{
public:
float m00,m10,m20; //named references to force register use?
float m01,m11,m21; //switched to column-major-order to match GLSL/lapack
float m00,m10,m20;
float m01,m11,m21;
float m02,m12,m22;
__host__ __device__ cumat3f();
__host__ __device__ ~cumat3f();
__host__ __device__ cumat3f(
const float & _m00, const float & _m01, const float & _m02,
const float & _m10, const float & _m11, const float & _m12,
const float & _m20, const float & _m21, const float & _m22
);
__host__ __device__ explicit cumat3f(const float *data9);
__host__ __device__ cumat3f();
__host__ __device__ ~cumat3f();
__host__ __device__ cumat3f(
const float& _m00, const float& _m01, const float& _m02,
const float& _m10, const float& _m11, const float& _m12,
const float& _m20, const float& _m21, const float& _m22
);
__host__ __device__ cumat3f(const float* data9);
__host__ __device__ float& operator[](const int &I);
__host__ __device__ float& operator()(const int &I, const int &J);
__host__ __device__ float& at(const int &I, const int &J);
__host__ __device__ const float& operator[](const int &I) const;
__host__ __device__ float& operator()(const int &I, const int &J);
__host__ __device__ const float& operator()(const int &I, const int &J) const;
__host__ __device__ float& at(const int &I, const int &J);
__host__ __device__ const float& at(const int &I, const int &J) const;
__host__ __device__ cumat3f operator+(const cumat3f &rhs);
__host__ __device__ cumat3f operator-(const cumat3f &rhs);
__host__ __device__ cumat3f operator*(const float &rhs);
__host__ __device__ cumat3f operator/(const float &rhs);
__host__ __device__ cuvec3f operator*(const cuvec3f &rhs);
__host__ __device__ cumat3f operator*(const cumat3f &rhs);
__host__ __device__ friend cumat3f operator-(const cumat3f &rhs);
__host__ __device__ float* data(); //pointer to float9 representation of matrix
__host__ __device__ const float* data() const; //pointer to float9 representation of matrix
__host__ __device__ cumat3f operator+(const cumat3f& rhs) const;
__host__ __device__ cumat3f operator-(const cumat3f& rhs) const;
__host__ __device__ cumat3f operator*(const cumat3f& rhs) const;
__host__ __device__ friend cumat3f operator*(const cumat3f& lhs, const float& rhs);
__host__ __device__ friend cumat3f operator/(const cumat3f& lhs, const float& rhs);
__host__ __device__ friend cumat3f operator*(const float& lhs, const cumat3f& rhs);
__host__ __device__ friend cuvec3f operator*(const cumat3f& lhs, const cuvec3f& rhs);
__host__ __device__ friend cuvec3f operator*(const cuvec3f& lhs, const cumat3f& rhs);
__host__ __device__ friend cumat3f operator-(const cumat3f& rhs);
__host__ __device__ cumat3f& operator+=(const cumat3f& rhs);
__host__ __device__ cumat3f& operator-=(const cumat3f& rhs);
__host__ __device__ cumat3f& operator*=(const float& rhs);
__host__ __device__ cumat3f& operator/=(const float& rhs);
__host__ __device__ cumat3f& operator*=(const cumat3f& rhs);
__host__ __device__ cumat3f transpose() const;
__host__ __device__ float det();
__host__ __device__ cumat3f transpose();
__host__ __device__ cumat3f inverse();
__host__ __device__ float* data(); //pointer to float[9] representation of matrix
__host__ __device__ const float* data() const; //pointer to float[9] representation of matrix
//In place operations (to save GPU register use)
__host__ __device__ cumat3f& operator+=(const cumat3f &rhs);
__host__ __device__ cumat3f& operator-=(const cumat3f &rhs);
__host__ __device__ cumat3f& operator/=(const float &rhs);
__host__ __device__ cumat3f& operator*=(const float &rhs);
__host__ __device__ cumat3f& operator*=(const cumat3f &rhs);
};
__host__ __device__ float cuvec3f_dot(const cuvec3f &a,const cuvec3f &b);

View File

@ -12,81 +12,83 @@ namespace amscuda
float z;
float w;
__host__ __device__ cuvec4f();
__host__ __device__ ~cuvec4f();
__host__ __device__ cuvec4f(const float &_x, const float &_y, const float &_z, const float &_w);
__host__ __device__ cuvec4f();
__host__ __device__ ~cuvec4f();
__host__ __device__ cuvec4f(const float &_x, const float &_y, const float &_z, const float &_w);
__host__ __device__ float& operator[](const int &I);
__host__ __device__ const float& operator[](const int &I) const;
__host__ __device__ cuvec4f operator+(const cuvec4f &rhs);
__host__ __device__ cuvec4f operator-(const cuvec4f &rhs);
__host__ __device__ friend cuvec4f operator*(const cuvec4f& lhs, const float &rhs);
__host__ __device__ friend cuvec4f operator/(const cuvec4f& lhs, const float &rhs);
__host__ __device__ friend cuvec4f operator*(const float& lhs, const cuvec4f &rhs);
__host__ __device__ friend cuvec4f operator/(const float& lhs, const cuvec4f &rhs);
__host__ __device__ friend cuvec4f operator-(const cuvec4f &rhs);
__host__ __device__ cuvec4f& operator+=(const cuvec4f &rhs);
__host__ __device__ cuvec4f& operator-=(const cuvec4f &rhs);
__host__ __device__ cuvec4f& operator/=(const float &rhs);
__host__ __device__ cuvec4f& operator*=(const float &rhs);
__host__ __device__ cuvec4f operator+(const cuvec4f& rhs) const;
__host__ __device__ cuvec4f operator-(const cuvec4f& rhs) const;
__host__ __device__ cuvec4f operator*(const cuvec4f& rhs) const; //elementwise product
__host__ __device__ cuvec4f operator/(const cuvec4f& rhs) const; //elementwise division
__host__ __device__ friend cuvec4f operator*(const cuvec4f& lhs, const float& rhs);
__host__ __device__ friend cuvec4f operator*(const float& lhs, const cuvec4f& rhs);
__host__ __device__ friend cuvec4f operator/(const cuvec4f& lhs, const float& rhs);
__host__ __device__ friend cuvec4f operator/(const float& lhs, const cuvec4f& rhs);
__host__ __device__ friend cuvec4f operator-(const cuvec4f& other);
__host__ __device__ cuvec4f& operator+=(const cuvec4f& rhs);
__host__ __device__ cuvec4f& operator-=(const cuvec4f& rhs);
__host__ __device__ cuvec4f& operator*=(const float& rhs);
__host__ __device__ cuvec4f& operator/=(const float& rhs);
};
class cumat4f
{
public:
//float dat[16];
//__forceinline__
float m00,m10,m20,m30; //named references to force register use?
float m01,m11,m21,m31; //switched to column-major-order to match GLSL/lapack
float m00,m10,m20,m30;
float m01,m11,m21,m31;
float m02,m12,m22,m32;
float m03,m13,m23,m33;
__host__ __device__ cumat4f();
__host__ __device__ ~cumat4f();
__host__ __device__ cumat4f();
__host__ __device__ ~cumat4f();
__host__ __device__ cumat4f(
const float & _m00, const float & _m01, const float & _m02, const float & _m03,
const float & _m10, const float & _m11, const float & _m12, const float & _m13,
const float & _m20, const float & _m21, const float & _m22, const float & _m23,
const float & _m30, const float & _m31, const float & _m32, const float & _m33
const float& _m00, const float& _m01, const float& _m02, const float& _m03,
const float& _m10, const float& _m11, const float& _m12, const float& _m13,
const float& _m20, const float& _m21, const float& _m22, const float& _m23,
const float& _m30, const float& _m31, const float& _m32, const float& _m33
);
__host__ __device__ cumat4f(const float* data16);
__host__ __device__ explicit cumat4f(const float *data16);
//__forceinline__
__host__ __device__ float& operator[](const int &I);
__host__ __device__ float& operator()(const int &I, const int &J);
__host__ __device__ float& at(const int &I, const int &J);
__host__ __device__ const float& operator[](const int &I) const;
__host__ __device__ float& operator()(const int &I, const int &J);
__host__ __device__ const float& operator()(const int &I, const int &J) const;
__host__ __device__ float& at(const int &I, const int &J);
__host__ __device__ const float& at(const int &I, const int &J) const;
__host__ __device__ cumat4f operator+(const cumat4f &rhs);
__host__ __device__ cumat4f operator-(const cumat4f &rhs);
__host__ __device__ cumat4f operator*(const float &rhs);
__host__ __device__ cumat4f operator/(const float &rhs);
__host__ __device__ cuvec4f operator*(const cuvec4f &rhs);
__host__ __device__ cumat4f operator*(const cumat4f &rhs);
__host__ __device__ friend cumat4f operator-(const cumat4f &rhs);
__host__ __device__ float* data(); //pointer to float16 representation of matrix
__host__ __device__ const float* data() const; //pointer to float16 representation of matrix
__host__ __device__ cumat4f operator+(const cumat4f& rhs) const;
__host__ __device__ cumat4f operator-(const cumat4f& rhs) const;
__host__ __device__ cumat4f operator*(const cumat4f& rhs) const;
__host__ __device__ friend cumat4f operator*(const cumat4f& lhs, const float& rhs);
__host__ __device__ friend cumat4f operator/(const cumat4f& lhs, const float& rhs);
__host__ __device__ friend cumat4f operator*(const float& lhs, const cumat4f& rhs);
__host__ __device__ friend cuvec4f operator*(const cumat4f& lhs, const cuvec4f& rhs);
__host__ __device__ friend cuvec4f operator*(const cuvec4f& lhs, const cumat4f& rhs);
__host__ __device__ friend cumat4f operator-(const cumat4f& rhs);
__host__ __device__ cumat4f& operator+=(const cumat4f& rhs);
__host__ __device__ cumat4f& operator-=(const cumat4f& rhs);
__host__ __device__ cumat4f& operator*=(const float& rhs);
__host__ __device__ cumat4f& operator/=(const float& rhs);
__host__ __device__ cumat4f& operator*=(const cumat4f& rhs);
__host__ __device__ cumat4f transpose() const;
__host__ __device__ float det();
__host__ __device__ cumat4f transpose();
__host__ __device__ cumat4f inverse();
__host__ __device__ float* data(); //pointer to float[9] representation of matrix
__host__ __device__ const float* data() const; //pointer to float[9] representation of matrix
//In place operations (to save GPU register use)
__host__ __device__ cumat4f& operator+=(const cumat4f &rhs);
__host__ __device__ cumat4f& operator-=(const cumat4f &rhs);
__host__ __device__ cumat4f& operator/=(const float &rhs);
__host__ __device__ cumat4f& operator*=(const float &rhs);
__host__ __device__ cumat4f& operator*=(const cumat4f &rhs);
};
__host__ __device__ float cuvec4f_dot(cuvec4f &a, cuvec4f &b);