amsculib3/include/amsculib3/math/cuvec4f.hpp

#ifndef __CUVEC4F_HPP__
#define __CUVEC4F_HPP__

namespace amscuda
{

    class cuvec4f
    {
        public:
        float x;
        float y;
        float z;
        float w;

        __host__ __device__ cuvec4f();
        __host__ __device__ ~cuvec4f();
        __host__ __device__ cuvec4f(const float &_x, const float &_y, const float &_z, const float &_w);

        __host__ __device__ float& operator[](const int &I);
        __host__ __device__ const float& operator[](const int &I) const;

        __host__ __device__ cuvec4f operator+(const cuvec4f& rhs) const;
        __host__ __device__ cuvec4f operator-(const cuvec4f& rhs) const;
        __host__ __device__ cuvec4f operator*(const cuvec4f& rhs) const; 	 //elementwise product
        __host__ __device__ cuvec4f operator/(const cuvec4f& rhs) const; 	 //elementwise division

        __host__ __device__ friend cuvec4f operator*(const cuvec4f& lhs, const float& rhs);
        __host__ __device__ friend cuvec4f operator*(const float& lhs, const cuvec4f& rhs);
        __host__ __device__ friend cuvec4f operator/(const cuvec4f& lhs, const float& rhs);
        __host__ __device__ friend cuvec4f operator/(const float& lhs, const cuvec4f& rhs);
        __host__ __device__ friend cuvec4f operator-(const cuvec4f& other);

        __host__ __device__ cuvec4f& operator+=(const cuvec4f& rhs);
        __host__ __device__ cuvec4f& operator-=(const cuvec4f& rhs);
        __host__ __device__ cuvec4f& operator*=(const float& rhs);
        __host__ __device__ cuvec4f& operator/=(const float& rhs);
    };

    class cumat4f
    {
        public:
        //float dat[16];
        //__forceinline__


        float m00,m10,m20,m30;
        float m01,m11,m21,m31;
        float m02,m12,m22,m32;
        float m03,m13,m23,m33;

        __host__ __device__ cumat4f();
        __host__ __device__ ~cumat4f();
        __host__ __device__ cumat4f(
            const float& _m00, const float& _m01, const float& _m02, const float& _m03,
            const float& _m10, const float& _m11, const float& _m12, const float& _m13,
            const float& _m20, const float& _m21, const float& _m22, const float& _m23,
            const float& _m30, const float& _m31, const float& _m32, const float& _m33
        );
        __host__ __device__ cumat4f(const float* data16);

        __host__ __device__ float& operator[](const int &I);
        __host__ __device__ const float& operator[](const int &I) const;
        __host__ __device__ float& operator()(const int &I, const int &J);
        __host__ __device__ const float& operator()(const int &I, const int &J) const;
        __host__ __device__ float& at(const int &I, const int &J);
        __host__ __device__ const float& at(const int &I, const int &J) const;

        __host__ __device__ float* data(); //pointer to float16 representation of matrix
        __host__ __device__ const float* data() const; //pointer to float16 representation of matrix

        __host__ __device__ cumat4f operator+(const cumat4f& rhs) const;
        __host__ __device__ cumat4f operator-(const cumat4f& rhs) const;
        __host__ __device__ cumat4f operator*(const cumat4f& rhs) const;

        __host__ __device__ friend cumat4f operator*(const cumat4f& lhs, const float& rhs);
        __host__ __device__ friend cumat4f operator/(const cumat4f& lhs, const float& rhs);
        __host__ __device__ friend cumat4f operator*(const float& lhs, const cumat4f& rhs);
        __host__ __device__ friend cuvec4f operator*(const cumat4f& lhs, const cuvec4f& rhs);
        __host__ __device__ friend cuvec4f operator*(const cuvec4f& lhs, const cumat4f& rhs);
        __host__ __device__ friend cumat4f operator-(const cumat4f& rhs);

        __host__ __device__ cumat4f& operator+=(const cumat4f& rhs);
        __host__ __device__ cumat4f& operator-=(const cumat4f& rhs);
        __host__ __device__ cumat4f& operator*=(const float& rhs);
        __host__ __device__ cumat4f& operator/=(const float& rhs);
        __host__ __device__ cumat4f& operator*=(const cumat4f& rhs);

        __host__ __device__ cumat4f transpose() const;

        __host__ __device__ float det();
        __host__ __device__ cumat4f inverse();
    };

    __host__ __device__ float cuvec4f_dot(cuvec4f &a, cuvec4f &b);
    __host__ __device__ float cuvec4f_norm(cuvec4f &a);
    __host__ __device__ cuvec4f cuvec4f_normalize(cuvec4f &a);
    __host__ __device__ cuvec4f cuvec4f_proj(cuvec4f &a, cuvec4f &b);


};

#endif