codegenerated matrix logic

2026-04-13 11:42:40 -04:00
parent 978dad618f
commit 17a1c3f84a
30 changed files with 6468 additions and 1291 deletions
--- a/include/amsculib3/math/cuvec2f.hpp
+++ b/include/amsculib3/math/cuvec2f.hpp
@ -45,8 +45,8 @@ namespace amscuda
        __host__ __device__ cumat2f(); 
        __host__ __device__ ~cumat2f(); 
        __host__ __device__ cumat2f(
-            const float& _m00, const float& _m10, 
-            const float& _m01, const float& _m11
+            const float& _m00, const float& _m01, 
+            const float& _m10, const float& _m11
        );
        __host__ __device__ cumat2f(const float* data4);
        
--- a/include/amsculib3/math/cuvec3f.hpp
+++ b/include/amsculib3/math/cuvec3f.hpp
@ -11,79 +11,76 @@ namespace amscuda
        float y;
        float z;

-        __host__ __device__ cuvec3f();
-        __host__ __device__ ~cuvec3f();
-        __host__ __device__ cuvec3f(const float &_x, const float &_y, const float &_z);
-        
-        
+        __host__ __device__ cuvec3f(); 
+        __host__ __device__ ~cuvec3f(); 
+        __host__ __device__ cuvec3f(const float &_x, const float &_y, const float &_z); 
+
        __host__ __device__ float& operator[](const int &I);
        __host__ __device__ const float& operator[](const int &I) const;

-        __host__ __device__ cuvec3f operator+(const cuvec3f &rhs);
-        __host__ __device__ cuvec3f operator-(const cuvec3f &rhs);
-
-        __host__ __device__ friend cuvec3f operator*(const cuvec3f& lhs, const float &rhs);
-        __host__ __device__ friend cuvec3f operator/(const cuvec3f& lhs, const float &rhs);
-        __host__ __device__ friend cuvec3f operator*(const float& lhs, const cuvec3f &rhs);
-        __host__ __device__ friend cuvec3f operator/(const float& lhs, const cuvec3f &rhs);
-
-        __host__ __device__ friend cuvec3f operator-(const cuvec3f &rhs);
-
-        __host__ __device__ cuvec3f& operator+=(const cuvec3f &rhs);
-        __host__ __device__ cuvec3f& operator-=(const cuvec3f &rhs);
-        __host__ __device__ cuvec3f& operator/=(const float &rhs);
-        __host__ __device__ cuvec3f& operator*=(const float &rhs);
-
+        __host__ __device__ cuvec3f operator+(const cuvec3f& rhs) const;
+        __host__ __device__ cuvec3f operator-(const cuvec3f& rhs) const;
+        __host__ __device__ cuvec3f operator*(const cuvec3f& rhs) const; 	 //elementwise product
+        __host__ __device__ cuvec3f operator/(const cuvec3f& rhs) const; 	 //elementwise division
+        
+        __host__ __device__ friend cuvec3f operator*(const cuvec3f& lhs, const float& rhs);
+        __host__ __device__ friend cuvec3f operator*(const float& lhs, const cuvec3f& rhs);
+        __host__ __device__ friend cuvec3f operator/(const cuvec3f& lhs, const float& rhs);
+        __host__ __device__ friend cuvec3f operator/(const float& lhs, const cuvec3f& rhs);
+        __host__ __device__ friend cuvec3f operator-(const cuvec3f& other);
+        
+        __host__ __device__ cuvec3f& operator+=(const cuvec3f& rhs);
+        __host__ __device__ cuvec3f& operator-=(const cuvec3f& rhs);
+        __host__ __device__ cuvec3f& operator*=(const float& rhs);
+        __host__ __device__ cuvec3f& operator/=(const float& rhs);
    };

    class cumat3f
    {
        public:
-        float m00,m10,m20; //named references to force register use?
-        float m01,m11,m21; //switched to column-major-order to match GLSL/lapack
+        float m00,m10,m20;
+        float m01,m11,m21;
        float m02,m12,m22;
-
-        __host__ __device__ cumat3f();
-        __host__ __device__ ~cumat3f();
-        __host__ __device__ cumat3f(
-            const float & _m00, const float & _m01, const float & _m02,
-            const float & _m10, const float & _m11, const float & _m12,
-            const float & _m20, const float & _m21, const float & _m22
-        );
-
-        __host__ __device__ explicit cumat3f(const float *data9);
        
+        __host__ __device__ cumat3f(); 
+        __host__ __device__ ~cumat3f(); 
+        __host__ __device__ cumat3f(
+            const float& _m00, const float& _m01, const float& _m02, 
+            const float& _m10, const float& _m11, const float& _m12, 
+            const float& _m20, const float& _m21, const float& _m22
+        );
+        __host__ __device__ cumat3f(const float* data9);

        __host__ __device__ float& operator[](const int &I);
-        __host__ __device__ float& operator()(const int &I, const int &J);
-        __host__ __device__ float& at(const int &I, const int &J);
-
        __host__ __device__ const float& operator[](const int &I) const;
+        __host__ __device__ float& operator()(const int &I, const int &J);
        __host__ __device__ const float& operator()(const int &I, const int &J) const;
+        __host__ __device__ float& at(const int &I, const int &J);
        __host__ __device__ const float& at(const int &I, const int &J) const;
-
-        __host__ __device__ cumat3f operator+(const cumat3f &rhs);
-        __host__ __device__ cumat3f operator-(const cumat3f &rhs);
-        __host__ __device__ cumat3f operator*(const float &rhs);
-        __host__ __device__ cumat3f operator/(const float &rhs);
-        __host__ __device__ cuvec3f operator*(const cuvec3f &rhs);
-        __host__ __device__ cumat3f operator*(const cumat3f &rhs);
-        __host__ __device__ friend cumat3f operator-(const cumat3f &rhs);
+        __host__ __device__ float* data(); //pointer to float9 representation of matrix
+        __host__ __device__ const float* data() const; //pointer to float9 representation of matrix
+        
+        __host__ __device__ cumat3f operator+(const cumat3f& rhs) const;
+        __host__ __device__ cumat3f operator-(const cumat3f& rhs) const;
+        __host__ __device__ cumat3f operator*(const cumat3f& rhs) const;
+        
+        __host__ __device__ friend cumat3f operator*(const cumat3f& lhs, const float& rhs);
+        __host__ __device__ friend cumat3f operator/(const cumat3f& lhs, const float& rhs);
+        __host__ __device__ friend cumat3f operator*(const float& lhs, const cumat3f& rhs);
+        __host__ __device__ friend cuvec3f operator*(const cumat3f& lhs, const cuvec3f& rhs);
+        __host__ __device__ friend cuvec3f operator*(const cuvec3f& lhs, const cumat3f& rhs);
+        __host__ __device__ friend cumat3f operator-(const cumat3f& rhs);
+        
+        __host__ __device__ cumat3f& operator+=(const cumat3f& rhs);
+        __host__ __device__ cumat3f& operator-=(const cumat3f& rhs);
+        __host__ __device__ cumat3f& operator*=(const float& rhs);
+        __host__ __device__ cumat3f& operator/=(const float& rhs);
+        __host__ __device__ cumat3f& operator*=(const cumat3f& rhs);
+        
+        __host__ __device__ cumat3f transpose() const;
        
        __host__ __device__ float det();
-        __host__ __device__ cumat3f transpose();
        __host__ __device__ cumat3f inverse();
-
-        __host__ __device__ float* data(); //pointer to float[9] representation of matrix
-        __host__ __device__ const float* data() const; //pointer to float[9] representation of matrix
-
-        //In place operations (to save GPU register use)
-        __host__ __device__ cumat3f& operator+=(const cumat3f &rhs);
-        __host__ __device__ cumat3f& operator-=(const cumat3f &rhs);
-        __host__ __device__ cumat3f& operator/=(const float &rhs);
-        __host__ __device__ cumat3f& operator*=(const float &rhs);
-        __host__ __device__ cumat3f& operator*=(const cumat3f &rhs);
-        
    };

    __host__ __device__ float cuvec3f_dot(const cuvec3f &a,const cuvec3f &b);
--- a/include/amsculib3/math/cuvec4f.hpp
+++ b/include/amsculib3/math/cuvec4f.hpp
@ -12,81 +12,83 @@ namespace amscuda
        float z;
        float w;

-        __host__ __device__ cuvec4f();
-        __host__ __device__ ~cuvec4f();
-        __host__ __device__ cuvec4f(const float &_x, const float &_y, const float &_z, const float &_w);
-        
+        __host__ __device__ cuvec4f(); 
+        __host__ __device__ ~cuvec4f(); 
+        __host__ __device__ cuvec4f(const float &_x, const float &_y, const float &_z, const float &_w); 
        
        __host__ __device__ float& operator[](const int &I);
        __host__ __device__ const float& operator[](const int &I) const;
-
-        __host__ __device__ cuvec4f operator+(const cuvec4f &rhs);
-        __host__ __device__ cuvec4f operator-(const cuvec4f &rhs);
-
-        __host__ __device__ friend cuvec4f operator*(const cuvec4f& lhs, const float &rhs);
-        __host__ __device__ friend cuvec4f operator/(const cuvec4f& lhs, const float &rhs);
-        __host__ __device__ friend cuvec4f operator*(const float& lhs, const cuvec4f &rhs);
-        __host__ __device__ friend cuvec4f operator/(const float& lhs, const cuvec4f &rhs);
-
-        __host__ __device__ friend cuvec4f operator-(const cuvec4f &rhs);
-
-        __host__ __device__ cuvec4f& operator+=(const cuvec4f &rhs);
-        __host__ __device__ cuvec4f& operator-=(const cuvec4f &rhs);
-        __host__ __device__ cuvec4f& operator/=(const float &rhs);
-        __host__ __device__ cuvec4f& operator*=(const float &rhs);
+        
+        __host__ __device__ cuvec4f operator+(const cuvec4f& rhs) const;
+        __host__ __device__ cuvec4f operator-(const cuvec4f& rhs) const;
+        __host__ __device__ cuvec4f operator*(const cuvec4f& rhs) const; 	 //elementwise product
+        __host__ __device__ cuvec4f operator/(const cuvec4f& rhs) const; 	 //elementwise division
+        
+        __host__ __device__ friend cuvec4f operator*(const cuvec4f& lhs, const float& rhs);
+        __host__ __device__ friend cuvec4f operator*(const float& lhs, const cuvec4f& rhs);
+        __host__ __device__ friend cuvec4f operator/(const cuvec4f& lhs, const float& rhs);
+        __host__ __device__ friend cuvec4f operator/(const float& lhs, const cuvec4f& rhs);
+        __host__ __device__ friend cuvec4f operator-(const cuvec4f& other);
+        
+        __host__ __device__ cuvec4f& operator+=(const cuvec4f& rhs);
+        __host__ __device__ cuvec4f& operator-=(const cuvec4f& rhs);
+        __host__ __device__ cuvec4f& operator*=(const float& rhs);
+        __host__ __device__ cuvec4f& operator/=(const float& rhs);
    };

    class cumat4f
    {
        public:
        //float dat[16];
+        //__forceinline__

-        float m00,m10,m20,m30; //named references to force register use?
-        float m01,m11,m21,m31; //switched to column-major-order to match GLSL/lapack
+
+        float m00,m10,m20,m30;
+        float m01,m11,m21,m31;
        float m02,m12,m22,m32;
        float m03,m13,m23,m33;

-        __host__ __device__ cumat4f();
-        __host__ __device__ ~cumat4f();
+        __host__ __device__ cumat4f(); 
+        __host__ __device__ ~cumat4f(); 
        __host__ __device__ cumat4f(
-            const float & _m00, const float & _m01, const float & _m02, const float & _m03,
-            const float & _m10, const float & _m11, const float & _m12, const float & _m13,
-            const float & _m20, const float & _m21, const float & _m22, const float & _m23,
-            const float & _m30, const float & _m31, const float & _m32, const float & _m33
+            const float& _m00, const float& _m01, const float& _m02, const float& _m03, 
+            const float& _m10, const float& _m11, const float& _m12, const float& _m13, 
+            const float& _m20, const float& _m21, const float& _m22, const float& _m23, 
+            const float& _m30, const float& _m31, const float& _m32, const float& _m33
        );
+        __host__ __device__ cumat4f(const float* data16);

-        __host__ __device__ explicit cumat4f(const float *data16);
-        
-        //__forceinline__
        __host__ __device__ float& operator[](const int &I);
-        __host__ __device__ float& operator()(const int &I, const int &J);
-        __host__ __device__ float& at(const int &I, const int &J);
-
        __host__ __device__ const float& operator[](const int &I) const;
+        __host__ __device__ float& operator()(const int &I, const int &J);
        __host__ __device__ const float& operator()(const int &I, const int &J) const;
+        __host__ __device__ float& at(const int &I, const int &J);
        __host__ __device__ const float& at(const int &I, const int &J) const;

-        __host__ __device__ cumat4f operator+(const cumat4f &rhs);
-        __host__ __device__ cumat4f operator-(const cumat4f &rhs);
-        __host__ __device__ cumat4f operator*(const float &rhs);
-        __host__ __device__ cumat4f operator/(const float &rhs);
-        __host__ __device__ cuvec4f operator*(const cuvec4f &rhs);
-        __host__ __device__ cumat4f operator*(const cumat4f &rhs);
-        __host__ __device__ friend cumat4f operator-(const cumat4f &rhs);
+        __host__ __device__ float* data(); //pointer to float16 representation of matrix
+        __host__ __device__ const float* data() const; //pointer to float16 representation of matrix
        
+        __host__ __device__ cumat4f operator+(const cumat4f& rhs) const;
+        __host__ __device__ cumat4f operator-(const cumat4f& rhs) const;
+        __host__ __device__ cumat4f operator*(const cumat4f& rhs) const;
+        
+        __host__ __device__ friend cumat4f operator*(const cumat4f& lhs, const float& rhs);
+        __host__ __device__ friend cumat4f operator/(const cumat4f& lhs, const float& rhs);
+        __host__ __device__ friend cumat4f operator*(const float& lhs, const cumat4f& rhs);
+        __host__ __device__ friend cuvec4f operator*(const cumat4f& lhs, const cuvec4f& rhs);
+        __host__ __device__ friend cuvec4f operator*(const cuvec4f& lhs, const cumat4f& rhs);
+        __host__ __device__ friend cumat4f operator-(const cumat4f& rhs);
+        
+        __host__ __device__ cumat4f& operator+=(const cumat4f& rhs);
+        __host__ __device__ cumat4f& operator-=(const cumat4f& rhs);
+        __host__ __device__ cumat4f& operator*=(const float& rhs);
+        __host__ __device__ cumat4f& operator/=(const float& rhs);
+        __host__ __device__ cumat4f& operator*=(const cumat4f& rhs);
+        
+        __host__ __device__ cumat4f transpose() const;
+
        __host__ __device__ float det();
-        __host__ __device__ cumat4f transpose();
        __host__ __device__ cumat4f inverse();
-
-        __host__ __device__ float* data(); //pointer to float[9] representation of matrix
-        __host__ __device__ const float* data() const; //pointer to float[9] representation of matrix
-
-        //In place operations (to save GPU register use)
-        __host__ __device__ cumat4f& operator+=(const cumat4f &rhs);
-        __host__ __device__ cumat4f& operator-=(const cumat4f &rhs);
-        __host__ __device__ cumat4f& operator/=(const float &rhs);
-        __host__ __device__ cumat4f& operator*=(const float &rhs);
-        __host__ __device__ cumat4f& operator*=(const cumat4f &rhs);
    };

    __host__ __device__ float cuvec4f_dot(cuvec4f &a, cuvec4f &b);