autocodegen

2026-04-13 06:45:12 -04:00
parent 26d61540b3
commit 978dad618f
24 changed files with 2145 additions and 468 deletions
--- a/build_linux64/libamsculib3.linux64.a
+++ b/build_linux64/libamsculib3.linux64.a
--- a/build_linux64/objstore/amscu_comp128.o
+++ b/build_linux64/objstore/amscu_comp128.o
--- a/build_linux64/objstore/amscu_comp64.o
+++ b/build_linux64/objstore/amscu_comp64.o
--- a/build_linux64/objstore/amscu_cudafunctions.o
+++ b/build_linux64/objstore/amscu_cudafunctions.o
--- a/build_linux64/objstore/amscu_random.o
+++ b/build_linux64/objstore/amscu_random.o
--- a/build_linux64/objstore/amscuarray.o
+++ b/build_linux64/objstore/amscuarray.o
--- a/build_linux64/objstore/amscuarray_dops.o
+++ b/build_linux64/objstore/amscuarray_dops.o
--- a/build_linux64/objstore/amscugeom.o
+++ b/build_linux64/objstore/amscugeom.o
--- a/build_linux64/objstore/amsculib3.o
+++ b/build_linux64/objstore/amsculib3.o
--- a/build_linux64/objstore/amscumath.o
+++ b/build_linux64/objstore/amscumath.o
--- a/build_linux64/objstore/amscurarray.o
+++ b/build_linux64/objstore/amscurarray.o
--- a/build_linux64/objstore/cuvec2f.o
+++ b/build_linux64/objstore/cuvec2f.o
--- a/build_linux64/objstore/cuvec3f.o
+++ b/build_linux64/objstore/cuvec3f.o
--- a/build_linux64/objstore/cuvec4f.o
+++ b/build_linux64/objstore/cuvec4f.o
--- a/build_linux64/test
+++ b/build_linux64/test
--- a/include/amsculib3/math/cuvec2f.hpp
+++ b/include/amsculib3/math/cuvec2f.hpp
@ -10,72 +10,78 @@ namespace amscuda
        float x;
        float y;

-        __host__ __device__ cuvec2f();
-        __host__ __device__ ~cuvec2f();
-        __host__ __device__ cuvec2f(const float &_x, const float &_y);
-        
-        
+        __host__ __device__ cuvec2f(); 
+        __host__ __device__ ~cuvec2f(); 
+        __host__ __device__ cuvec2f(const float &_x, const float &_y); 
+
        __host__ __device__ float& operator[](const int &I);
        __host__ __device__ const float& operator[](const int &I) const;

-        __host__ __device__ cuvec2f operator+(const cuvec2f &rhs);
-        __host__ __device__ cuvec2f operator-(const cuvec2f &rhs);
-        __host__ __device__ cuvec2f operator*(const float &rhs);
-        __host__ __device__ cuvec2f operator/(const float &rhs);
-        __host__ __device__ friend cuvec2f operator-(const cuvec2f &rhs);
-
-        __host__ __device__ cuvec2f& operator+=(const cuvec2f &rhs);
-        __host__ __device__ cuvec2f& operator-=(const cuvec2f &rhs);
-        __host__ __device__ cuvec2f& operator/=(const float &rhs);
-        __host__ __device__ cuvec2f& operator*=(const float &rhs);
+        __host__ __device__ cuvec2f operator+(const cuvec2f& rhs) const;
+        __host__ __device__ cuvec2f operator-(const cuvec2f& rhs) const;
+        __host__ __device__ cuvec2f operator*(const cuvec2f& rhs) const; 	 //elementwise product
+        __host__ __device__ cuvec2f operator/(const cuvec2f& rhs) const; 	 //elementwise division
+        
+        __host__ __device__ friend cuvec2f operator*(const cuvec2f& lhs, const float& rhs);
+        __host__ __device__ friend cuvec2f operator*(const float& lhs, const cuvec2f& rhs);
+        __host__ __device__ friend cuvec2f operator/(const cuvec2f& lhs, const float& rhs);
+        __host__ __device__ friend cuvec2f operator/(const float& lhs, const cuvec2f& rhs);
+        __host__ __device__ friend cuvec2f operator-(const cuvec2f& other);
+        
+        __host__ __device__ cuvec2f& operator+=(const cuvec2f& rhs);
+        __host__ __device__ cuvec2f& operator-=(const cuvec2f& rhs);
+        __host__ __device__ cuvec2f& operator*=(const float& rhs);
+        __host__ __device__ cuvec2f& operator/=(const float& rhs);
+    

    };

    class cumat2f
    {
        public:
-        float m00,m10; //named references to force register use?
-        float m01,m11; //switched to column-major-order to match GLSL/lapack
-
-        __host__ __device__ cumat2f();
-        __host__ __device__ ~cumat2f();
-
-        __host__ __device__ cumat2f(
-            const float & _m00, const float & _m01,
-            const float & _m10, const float & _m11
-        );
-
-        __host__ __device__ explicit cumat2f(const float* data2x2);
-
-        __host__ __device__ float& operator[](const int &I);
-        __host__ __device__ float& operator()(const int &I, const int &J);
-        __host__ __device__ float& at(const int &I, const int &J);
-
-        __host__ __device__ const float& operator[](const int &I) const;
-        __host__ __device__ const float& operator()(const int &I, const int &J) const;
-        __host__ __device__ const float& at(const int &I, const int &J) const;
-
-        __host__ __device__ cumat2f operator+(const cumat2f &rhs);
-        __host__ __device__ cumat2f operator-(const cumat2f &rhs);
-        __host__ __device__ cumat2f operator*(const float &rhs);
-        __host__ __device__ cumat2f operator/(const float &rhs);
-        __host__ __device__ cuvec2f operator*(const cuvec2f &rhs);
-        __host__ __device__ cumat2f operator*(const cumat2f &rhs);
-        __host__ __device__ friend cumat2f operator-(const cumat2f &rhs);
+        float m00,m10;
+        float m01,m11;
        
+        __host__ __device__ cumat2f(); 
+        __host__ __device__ ~cumat2f(); 
+        __host__ __device__ cumat2f(
+            const float& _m00, const float& _m10, 
+            const float& _m01, const float& _m11
+        );
+        __host__ __device__ cumat2f(const float* data4);
+        
+        __host__ __device__ float& operator[](const int &I);
+        __host__ __device__ const float& operator[](const int &I) const;
+        __host__ __device__ float& operator()(const int &I, const int &J);
+        __host__ __device__ const float& operator()(const int &I, const int &J) const;
+        __host__ __device__ float& at(const int &I, const int &J);
+        __host__ __device__ const float& at(const int &I, const int &J) const;
+        
+        __host__ __device__ float* data(); //pointer to float4 representation of matrix
+        __host__ __device__ const float* data() const; //pointer to float4 representation of matrix
+        
+        //operators
+        __host__ __device__ cumat2f operator+(const cumat2f& rhs) const;
+        __host__ __device__ cumat2f operator-(const cumat2f& rhs) const;
+        __host__ __device__ cumat2f operator*(const cumat2f& rhs) const;
+        __host__ __device__ friend cumat2f operator*(const cumat2f& lhs, const float& rhs);
+        __host__ __device__ friend cumat2f operator/(const cumat2f& lhs, const float& rhs);
+        __host__ __device__ friend cumat2f operator*(const float& lhs, const cumat2f& rhs);
+        __host__ __device__ friend cuvec2f operator*(const cumat2f& lhs, const cuvec2f& rhs);
+        __host__ __device__ friend cuvec2f operator*(const cuvec2f& lhs, const cumat2f& rhs);
+        __host__ __device__ friend cumat2f operator-(const cumat2f& rhs);
+
+        //in place operators to save register use
+        __host__ __device__ cumat2f& operator+=(const cumat2f& rhs);
+        __host__ __device__ cumat2f& operator-=(const cumat2f& rhs);
+        __host__ __device__ cumat2f& operator*=(const float& rhs);
+        __host__ __device__ cumat2f& operator/=(const float& rhs);
+        __host__ __device__ cumat2f& operator*=(const cumat2f& rhs);
+        
+        __host__ __device__ cumat2f transpose() const;
+
        __host__ __device__ float det();
-        __host__ __device__ cumat2f transpose();
        __host__ __device__ cumat2f inverse();
-
-        __host__ __device__ float* data(); //pointer to float[4] representation of matrix
-        __host__ __device__ const float* data() const; //pointer to float[4] representation of matrix
-
-        //In place operations (to save GPU register use)
-        __host__ __device__ cumat2f& operator+=(const cumat2f &rhs);
-        __host__ __device__ cumat2f& operator-=(const cumat2f &rhs);
-        __host__ __device__ cumat2f& operator/=(const float &rhs);
-        __host__ __device__ cumat2f& operator*=(const float &rhs);
-        __host__ __device__ cumat2f& operator*=(const cumat2f &rhs);
    };

    __host__ __device__ float cuvec2f_dot(const cuvec2f &a, const cuvec2f &b);
--- a/include/amsculib3/math/cuvec3f.hpp
+++ b/include/amsculib3/math/cuvec3f.hpp
@ -21,8 +21,12 @@ namespace amscuda

        __host__ __device__ cuvec3f operator+(const cuvec3f &rhs);
        __host__ __device__ cuvec3f operator-(const cuvec3f &rhs);
-        __host__ __device__ cuvec3f operator*(const float &rhs);
-        __host__ __device__ cuvec3f operator/(const float &rhs);
+
+        __host__ __device__ friend cuvec3f operator*(const cuvec3f& lhs, const float &rhs);
+        __host__ __device__ friend cuvec3f operator/(const cuvec3f& lhs, const float &rhs);
+        __host__ __device__ friend cuvec3f operator*(const float& lhs, const cuvec3f &rhs);
+        __host__ __device__ friend cuvec3f operator/(const float& lhs, const cuvec3f &rhs);
+
        __host__ __device__ friend cuvec3f operator-(const cuvec3f &rhs);

        __host__ __device__ cuvec3f& operator+=(const cuvec3f &rhs);
--- a/include/amsculib3/math/cuvec4f.hpp
+++ b/include/amsculib3/math/cuvec4f.hpp
@ -22,8 +22,12 @@ namespace amscuda

        __host__ __device__ cuvec4f operator+(const cuvec4f &rhs);
        __host__ __device__ cuvec4f operator-(const cuvec4f &rhs);
-        __host__ __device__ cuvec4f operator*(const float &rhs);
-        __host__ __device__ cuvec4f operator/(const float &rhs);
+
+        __host__ __device__ friend cuvec4f operator*(const cuvec4f& lhs, const float &rhs);
+        __host__ __device__ friend cuvec4f operator/(const cuvec4f& lhs, const float &rhs);
+        __host__ __device__ friend cuvec4f operator*(const float& lhs, const cuvec4f &rhs);
+        __host__ __device__ friend cuvec4f operator/(const float& lhs, const cuvec4f &rhs);
+
        __host__ __device__ friend cuvec4f operator-(const cuvec4f &rhs);

        __host__ __device__ cuvec4f& operator+=(const cuvec4f &rhs);
--- a/src/amsculib3/math/cuvec2f.cu
+++ b/src/amsculib3/math/cuvec2f.cu
@ -3,99 +3,468 @@
 namespace amscuda
 {

-    __host__ __device__ cuvec2f::cuvec2f()
-    {
-        x = 0.0f; y = 0.0f;
-        return;
-    }
+	__host__ __device__ cuvec2f::cuvec2f() 
+	{
+		x = 0; y = 0; 
+		return;
+	}

-    __host__ __device__ cuvec2f::~cuvec2f()
-    {
-        x = 0.0f; y = 0.0f;
-        return;
-    }
+	__host__ __device__ cuvec2f::~cuvec2f() 
+	{
+		x = 0; y = 0; 
+		return;
+	}

-    __host__ __device__ float& cuvec2f::operator[](const int &I)
-    {
-        if(I==0) return x;
-        if(I==1) return y;
-        return x;
-    }
+	__host__ __device__ cuvec2f::cuvec2f(const float &_x, const float &_y)
+	{
+		x = _x; y = _y; 
+		return;
+	}

-    __host__ __device__ const float& cuvec2f::operator[](const int &I) const
-    {
-        if(I==0) return x;
-        if(I==1) return y;
-        return x;
-    }
+	__host__ __device__ float& cuvec2f::operator[](const int &I)
+	{
+		switch(I)
+		{
+			case 0:
+				return x;
+			case 1:
+				return y;
+		}

-    __host__ __device__ cuvec2f cuvec2f::operator+(const cuvec2f &rhs)
-    {
-        cuvec2f ret;
-        ret.x = x+rhs.x;
-        ret.y = y+rhs.y;
+		return x;
+	}

+	__host__ __device__ const float& cuvec2f::operator[](const int &I) const
+	{
+		switch(I)
+		{
+			case 0:
+				return x;
+			case 1:
+				return y;
+		}
+
+		return x;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator+(const cuvec2f& rhs) const
+	{
+		cuvec2f ret;
+		ret.x = x + rhs.x;
+		ret.y = y + rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator-(const cuvec2f& rhs) const
+	{
+		cuvec2f ret;
+		ret.x = x - rhs.x;
+		ret.y = y - rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator*(const cuvec2f& rhs) const
+	{
+		//Elementwise product
+		cuvec2f ret;
+		ret.x = x * rhs.x;
+		ret.y = y * rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator/(const cuvec2f& rhs) const
+	{
+		//Elementwise division
+		cuvec2f ret;
+		ret.x = x / rhs.x;
+		ret.y = y / rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator*(const cuvec2f& lhs, const float& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs.x*rhs;
+		ret.y = lhs.y*rhs;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator*(const float& lhs, const cuvec2f& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs*rhs.x;
+		ret.y = lhs*rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator/(const cuvec2f& lhs, const float& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs.x/rhs;
+		ret.y = lhs.y/rhs;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator/(const float& lhs, const cuvec2f& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs/rhs.x;
+		ret.y = lhs/rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator-(const cuvec2f& other)
+	{
+		cuvec2f ret;
+		ret.x = -other.x;
+		ret.y = -other.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator+=(const cuvec2f& rhs)
+	{
+		x += rhs.x;
+		y += rhs.y;
+		return *this;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator-=(const cuvec2f& rhs)
+	{
+		x -= rhs.x;
+		y -= rhs.y;
+		return *this;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator*=(const float& rhs)
+	{
+		x *= rhs;
+		y *= rhs;
+		return *this;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator/=(const float& rhs)
+	{
+		x /= rhs;
+		y /= rhs;
+		return *this;
+	}
+
+    ////////////////
+    //Matrix Class//
+    ////////////////
+
+    __host__ __device__ cumat2f::cumat2f() 
+	{
+		m00 = 0;
+		m01 = 0;
+
+		m10 = 0;
+		m11 = 0;
+
+		return;
+	}
+
+	__host__ __device__ cumat2f::~cumat2f() 
+	{
+		//m00 = 0;
+		//m01 = 0;
+
+		//m10 = 0;
+		//m11 = 0;
+
+		return;
+	}
+
+	__host__ __device__ cumat2f::cumat2f(
+		const float& _m00, const float& _m10, 
+		const float& _m01, const float& _m11
+	)
+	{
+		m00 = _m00; 
+		m10 = _m10; 
+
+		m01 = _m01; 
+		m11 = _m11; 
+
+
+		return;
+	}
+
+	__host__ __device__ cumat2f::cumat2f(const float* data4)
+	{
+		m00 = data4[0]; 
+		m10 = data4[1]; 
+
+		m01 = data4[2]; 
+		m11 = data4[3]; 
+
+
+		return;
+	}
+
+	__host__ __device__ float& cumat2f::operator[](const int &I)
+	{
+		switch(I)
+		{
+			case 0:
+				return m00;
+			case 1:
+				return m10;
+			case 2:
+				return m01;
+			case 3:
+				return m11;
+		}
+
+		return m00;
+	}
+
+	__host__ __device__ const float& cumat2f::operator[](const int &I) const
+	{
+		switch(I)
+		{
+			case 0:
+				return m00;
+			case 1:
+				return m10;
+			case 2:
+				return m01;
+			case 3:
+				return m11;
+		}
+
+		return m00;
+	}
+
+	__host__ __device__ float& cumat2f::operator()(const int &I, const int &J)
+	{
+		return (*this)[I+2*J];
+	}
+
+	__host__ __device__ const float& cumat2f::operator()(const int &I, const int &J) const
+	{
+		return (*this)[I+2*J];
+	}
+
+	__host__ __device__ float& cumat2f::at(const int &I, const int &J)
+	{
+		return (*this)[I+2*J];
+	}
+
+	__host__ __device__ const float& cumat2f::at(const int &I, const int &J) const
+	{
+		return (*this)[I+2*J];
+	}
+
+__host__ __device__ float* cumat2f::data()
+{
+	return (float*)this;
+}
+
+__host__ __device__ const float* cumat2f::data() const
+{
+	return (float*)this;
+}
+
+__host__ __device__ cumat2f cumat2f::operator+(const cumat2f& rhs) const
+{
+	cumat2f ret;
+	ret.m00 = m00 + rhs.m00;
+	ret.m10 = m10 + rhs.m10;
+
+	ret.m01 = m01 + rhs.m01;
+	ret.m11 = m11 + rhs.m11;
+
+	return ret;
+}
+
+__host__ __device__ cumat2f cumat2f::operator-(const cumat2f& rhs) const
+{
+	cumat2f ret;
+	ret.m00 = m00 - rhs.m00;
+	ret.m10 = m10 - rhs.m10;
+
+	ret.m01 = m01 - rhs.m01;
+	ret.m11 = m11 - rhs.m11;
+
+	return ret;
+}
+
+__host__ __device__ cumat2f cumat2f::operator*(const cumat2f& rhs) const
+{
+	cumat2f ret; //should be zeroed in constructor
+	ret.m00 = m00*rhs.m00 + m01*rhs.m10;
+	ret.m01 = m00*rhs.m01 + m01*rhs.m11;
+
+	ret.m10 = m10*rhs.m00 + m11*rhs.m10;
+	ret.m11 = m10*rhs.m01 + m11*rhs.m11;
+
+	return ret;
+}
+
+__host__ __device__ cumat2f operator*(const cumat2f& lhs, const float& rhs)
+{
+	cumat2f ret;
+	ret.m00=lhs.m00*rhs;
+	ret.m10=lhs.m10*rhs;
+	ret.m01=lhs.m01*rhs;
+	ret.m11=lhs.m11*rhs;
+	return ret;
+}
+
+__host__ __device__ cumat2f operator/(const cumat2f& lhs, const float& rhs)
+{
+	cumat2f ret;
+	ret.m00=lhs.m00/rhs;
+	ret.m10=lhs.m10/rhs;
+	ret.m01=lhs.m01/rhs;
+	ret.m11=lhs.m11/rhs;
+	return ret;
+}
+
+__host__ __device__ cumat2f operator*(const float& lhs, const cumat2f& rhs)
+{
+	cumat2f ret;
+	ret.m00=lhs*rhs.m00;
+	ret.m10=lhs*rhs.m10;
+	ret.m01=lhs*rhs.m01;
+	ret.m11=lhs*rhs.m11;
+	return ret;
+}
+
+__host__ __device__ cuvec2f operator*(const cumat2f& lhs, const cuvec2f& rhs)
+{
+	cuvec2f ret;
+	ret.x = lhs.m00*rhs.x + lhs.m01*rhs.y;
+	ret.y = lhs.m10*rhs.x + lhs.m11*rhs.y;
+	return ret;
+}
+
+__host__ __device__ cuvec2f operator*(const cuvec2f& lhs, const cumat2f& rhs)
+{
+	cuvec2f ret;
+	ret.x = lhs.x*rhs.m00 + lhs.y*rhs.m10;
+	ret.y = lhs.x*rhs.m01 + lhs.y*rhs.m11;
+	return ret;
+}
+
+__host__ __device__ cumat2f operator-(const cumat2f& rhs)
+{
+	cumat2f ret;
+	ret.m00 = -rhs.m00;
+	ret.m10 = -rhs.m10;
+	ret.m01 = -rhs.m01;
+	ret.m11 = -rhs.m11;
+	return ret;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator+=(const cumat2f& rhs)
+{
+	m00 += rhs.m00;
+	m10 += rhs.m10;
+	m01 += rhs.m01;
+	m11 += rhs.m11;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator-=(const cumat2f& rhs)
+{
+	m00 -= rhs.m00;
+	m10 -= rhs.m10;
+	m01 -= rhs.m01;
+	m11 -= rhs.m11;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator*=(const float& rhs)
+{
+	m00 *= rhs;
+	m10 *= rhs;
+	m01 *= rhs;
+	m11 *= rhs;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator/=(const float& rhs)
+{
+	m00 /= rhs;
+	m10 /= rhs;
+	m01 /= rhs;
+	m11 /= rhs;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator*=(const cumat2f& rhs)
+{
+	cumat2f tmp = *this;
+	m00 = tmp.m00*rhs.m00 + tmp.m01*rhs.m10;
+	m01 = tmp.m00*rhs.m01 + tmp.m01*rhs.m11;
+	m10 = tmp.m10*rhs.m00 + tmp.m11*rhs.m10;
+	m11 = tmp.m10*rhs.m01 + tmp.m11*rhs.m11;
+	return *this;
+}
+
+__host__ __device__ cumat2f cumat2f::transpose() const
+{
+	cumat2f ret;
+	ret.m00 = m00;
+	ret.m10 = m01;
+	ret.m01 = m10;
+	ret.m11 = m11;
+	return ret;
+}
+
+    
+
+    ///////////////////
+    //Det and Inverse//
+    ///////////////////
+
+    __host__ __device__ float cumat2f::det()
+    {
+        float ret = 0;
+    
+        ret += m00*m11;
+        ret -= m01*m10;
+    
        return ret;
    }
-
-    __host__ __device__ cuvec2f cuvec2f::operator-(const cuvec2f &rhs)
+    
+    __host__ __device__ cumat2f cumat2f::inverse()
    {
-        cuvec2f ret;
-        ret.x = x-rhs.x;
-        ret.y = y-rhs.y;
+        cumat2f q;
+        float dt = det();
+        if(dt!=0)
+        {
+            q(0,0) = m11/dt;
+            q(0,1) = -m01/dt;
+            q(1,0) = -m10/dt;
+            q(1,1) = m00/dt;

-        return ret;
+        }
+        else
+        {
+            q(0,0) = inf;
+            q(0,1) = inf;
+            q(1,0) = inf;
+            q(1,1) = inf;
+        }
+    
+        return q;
    }

-    __host__ __device__ cuvec2f cuvec2f::operator*(const float &rhs)
-    {
-        cuvec2f ret;
-        ret.x = x*rhs;
-        ret.y = y*rhs;
-        return ret;
-    }
+    ////////////////////////
+    //Non member functions//
+    ////////////////////////

-    __host__ __device__ cuvec2f cuvec2f::operator/(const float &rhs)
+    __host__ __device__ cumat2f cumat2f_rot_from_angle(const float &angle)
    {
-        cuvec2f ret;
-        ret.x = x/rhs;
-        ret.y = y/rhs;
-        return ret;
-    }
-
-    __host__ __device__ cuvec2f& cuvec2f::operator+=(const cuvec2f &rhs)
-    {
-        x = x + rhs.x;
-        y = y + rhs.y;
-        return *this;
-    }
-
-    __host__ __device__ cuvec2f& cuvec2f::operator-=(const cuvec2f &rhs)
-    {
-        x = x - rhs.x;
-        y = y - rhs.y;
-        return *this;
-    }
-
-    __host__ __device__ cuvec2f& cuvec2f::operator*=(const float &rhs)
-    {
-        x = x * rhs;
-        y = y * rhs;
-        return *this;
-    }
-
-    __host__ __device__ cuvec2f& cuvec2f::operator/=(const float &rhs)
-    {
-        x = x / rhs;
-        y = y / rhs;
-        return *this;
-    }
-
-
-    __host__ __device__ cuvec2f::cuvec2f(const float &_x, const float &_y)
-    {
-        x = _x; y = _y;
-        return;
+        cumat2f R;
+        R(0,0) = ::cosf(angle);
+        R(1,0) = ::sinf(angle);
+        R(0,1) = -::sinf(angle);
+        R(1,1) = ::cosf(angle);
+        return R;
    }

    __host__ __device__ float cuvec2f_dot(const cuvec2f &a, const cuvec2f &b)
@ -120,13 +489,13 @@ namespace amscuda
    {
        cuvec2f ret;
        float m = cuvec2f_norm(a);
-        if(m>0.0)
+        if(m>0)
        {
            ret.x = a.x/m; ret.y = a.y/m;
        }
        else
        {
-            ret.x = 0.0f; ret.y = 0.0f;
+            ret.x = 0; ret.y = 0;
        }
        return ret;
    }
@ -140,314 +509,6 @@ namespace amscuda
        return ret;
    }

-    __host__ __device__ cumat2f::cumat2f()
-    {
-        m00 = 0.0f;
-        m01 = 0.0f;
-        m10 = 0.0f;
-        m11 = 0.0f;
-
-        return;
-    }
-    
-    __host__ __device__ cumat2f::~cumat2f()
-    {
-        m00 = 0.0f;
-        m01 = 0.0f;
-        m10 = 0.0f;
-        m11 = 0.0f;
-        return;
-    }
-    
-    __host__ __device__ float& cumat2f::operator[](const int &I)
-    {
-        if(I==0) return m00;
-        if(I==1) return m10;
-        if(I==2) return m01;
-        if(I==3) return m11;
-        
-        return m00;
-    }
-
-    __host__ __device__ const float& cumat2f::operator[](const int &I) const 
-    {
-        if(I==0) return m00;
-        if(I==1) return m10;
-        if(I==2) return m01;
-        if(I==3) return m11;
-        
-        return m00;
-    }
-    
-    __host__ __device__ float& cumat2f::operator()(const int &I, const int &J)
-    {
-        if(I==0 && J==0) return m00;
-        if(I==1 && J==0) return m10;
-        if(I==0 && J==1) return m01;
-        if(I==1 && J==1) return m11;
-    
-        return m00;
-    }
-    
-    
-    __host__ __device__ float& cumat2f::at(const int &I, const int &J)
-    {
-        if(I==0 && J==0) return m00;
-        if(I==1 && J==0) return m10;
-        if(I==0 && J==1) return m01;
-        if(I==1 && J==1) return m11;
-
-        return m00;
-    }
-    
-    __host__ __device__ const float& cumat2f::operator()(const int &I, const int &J) const 
-    {
-        if(I==0 && J==0) return m00;
-        if(I==1 && J==0) return m10;
-        if(I==0 && J==1) return m01;
-        if(I==1 && J==1) return m11;
-    
-        return m00;
-    }
-    
-    __host__ __device__ const float& cumat2f::at(const int &I, const int &J) const 
-    {
-        if(I==0 && J==0) return m00;
-        if(I==1 && J==0) return m10;
-        if(I==0 && J==1) return m01;
-        if(I==1 && J==1) return m11;
-    
-        return m00;
-    }
-    
-    
-    __host__ __device__ cumat2f cumat2f::operator+(const cumat2f &rhs)
-    {
-        cumat2f ret;
-        ret.m00 = m00 + rhs.m00;
-        ret.m10 = m10 + rhs.m10;
-        ret.m01 = m01 + rhs.m01;
-        ret.m11 = m11 + rhs.m11;
-        
-    
-        return ret;
-    }
-    
-    __host__ __device__ cumat2f cumat2f::operator-(const cumat2f &rhs)
-    {
-        cumat2f ret;
-        ret.m00 = m00 - rhs.m00;
-        ret.m10 = m10 - rhs.m10;
-        ret.m01 = m01 - rhs.m01;
-        ret.m11 = m11 - rhs.m11;
-
-        return ret;
-    }
-    
-    __host__ __device__ cumat2f cumat2f::operator*(const float &rhs)
-    {
-        cumat2f ret;
-        ret.m00 = m00 * rhs;
-        ret.m10 = m10 * rhs;
-        ret.m01 = m01 * rhs;
-        ret.m11 = m11 * rhs;
-        return ret;
-    }
-    
-    __host__ __device__ cumat2f cumat2f::operator/(const float &rhs)
-    {
-        cumat2f ret;
-        ret.m00 = m00 / rhs;
-        ret.m10 = m10 / rhs;
-        ret.m01 = m01 / rhs;
-        ret.m11 = m11 / rhs;
-        return ret;
-    }
-            
-    __host__ __device__ cuvec2f cumat2f::operator*(const cuvec2f &rhs)
-    {
-        cuvec2f ret;
-        
-        ret.x = m00*rhs.x + m01*rhs.y;
-        ret.y = m10*rhs.x + m11*rhs.y;
-    
-        return ret;
-    }
-    
-    __host__ __device__ cumat2f cumat2f::operator*(const cumat2f &rhs)
-    {
-        cumat2f ret;
-    
-        ret.m00 = m00*rhs.m00 + m01*rhs.m10;
-        ret.m01 = m00*rhs.m01 + m01*rhs.m11;
-        ret.m10 = m10*rhs.m00 + m11*rhs.m10;
-        ret.m11 = m10*rhs.m01 + m11*rhs.m11;
-    
-        return ret;
-    }
-    
-    __host__ __device__ float cumat2f::det()
-    {
-        float ret = 0.0;
-    
-        ret += m00*m11;
-        ret -= m01*m10;
-    
-        return ret;
-    }
-    
-    __host__ __device__ cumat2f cumat2f::transpose()
-    {
-        cumat2f ret;
-    
-        ret.m00 = m00;
-        ret.m01 = m10;
-        ret.m10 = m01;
-        ret.m11 = m11;
-    
-        return ret;
-    }
-    
-    __host__ __device__ cumat2f cumat2f::inverse()
-    {
-        cumat2f q;
-        float dt = det();
-        if(dt!=0)
-        {
-            q(0,0) = q(1,1)/dt;
-            q(0,1) = -q(0,1)/dt;
-            q(1,0) = -q(1,0)/dt;
-            q(1,1) = q(0,0)/dt;
-
-        }
-        else
-        {
-            q(0,0) = inf;
-            q(0,1) = inf;
-            q(1,0) = inf;
-            q(1,1) = inf;
-        }
-    
-        return q;
-    }
-    
-    __host__ __device__ cumat2f operator-(const cumat2f &rhs)
-    {
-        cumat2f ret;
-        ret.m00 = -rhs.m00;
-        ret.m10 = -rhs.m10;
-        ret.m01 = -rhs.m01;
-        ret.m11 = -rhs.m11;
-    
-        return ret;
-    }
-    
-    __host__ __device__ cumat2f& cumat2f::operator+=(const cumat2f &rhs)
-    {
-        m00 += rhs.m00;
-        m10 += rhs.m10;
-        m01 += rhs.m01;
-        m11 += rhs.m11;
-    
-        return *this;
-    }
-    
-    __host__ __device__ cumat2f& cumat2f::operator-=(const cumat2f &rhs)
-    {
-        m00 -= rhs.m00;
-        m10 -= rhs.m10;
-        m01 -= rhs.m01;
-        m11 -= rhs.m11;
-    
-        return *this;
-    }
-    
-    
-    
-    __host__ __device__ cumat2f& cumat2f::operator/=(const float &rhs)
-    {
-        m00 /= rhs;
-        m10 /= rhs;
-        m01 /= rhs;
-        m11 /= rhs;
-    
-        return *this;
-    }
-    
-    __host__ __device__ cumat2f& cumat2f::operator*=(const float &rhs)
-    {
-        m00 *= rhs;
-        m10 *= rhs;
-        m01 *= rhs;
-        m11 *= rhs;
-    
-        return *this;
-    }
-    
-    __host__ __device__ cumat2f& cumat2f::operator*=(const cumat2f &rhs)
-    {
-        cumat2f tmp;
-    
-        tmp.m00 = m00*rhs.m00 + m01*rhs.m10; 
-        tmp.m01 = m00*rhs.m01 + m01*rhs.m11; 
-        tmp.m10 = m10*rhs.m00 + m11*rhs.m10; 
-        tmp.m11 = m10*rhs.m01 + m11*rhs.m11;
-    
-        (*this) = tmp;
-    
-        return *this;
-    }
-    
-    __host__ __device__ cumat2f::cumat2f(
-        const float & _m00, const float & _m01,
-        const float & _m10, const float & _m11
-    )
-    {
-        m00 = _m00;
-        m01 = _m01;
-        m10 = _m10;
-        m11 = _m11;
-    }
-    
-    __host__ __device__ float* cumat2f::data()
-    {
-         //pointer to float[9] representation of matrix
-        return (float*) this;
-    }
-    
-    __host__ __device__ const float* cumat2f::data() const
-    {
-         //pointer to float[9] representation of matrix
-        return (const float*) this;
-    }
-    
-    __host__ __device__ cumat2f::cumat2f(const float* data2x2)
-    {
-        m00 = data2x2[0];
-        m10 = data2x2[1];
-        m01 = data2x2[2];
-        m11 = data2x2[3];
-        return;
-    }
-
-    __host__ __device__ cumat2f cumat2f_rot_from_angle(const float &angle)
-    {
-        cumat2f R;
-        R(0,0) = ::cosf(angle);
-        R(1,0) = ::sinf(angle);
-        R(0,1) = -::sinf(angle);
-        R(1,1) = ::cosf(angle);
-        return R;
-    }
-
-    __host__ __device__ cuvec2f operator-(const cuvec2f &rhs)
-    {
-        cuvec2f ret;
-        ret[0] = -rhs[0];
-        ret[1] = -rhs[1];
-        return ret;
-    }
-

 void test_cuvec2f_1()
 {
--- a/src/amsculib3/math/cuvec3f.cu
+++ b/src/amsculib3/math/cuvec3f.cu
@ -51,21 +51,36 @@ namespace amscuda
        return ret;
    }

-    __host__ __device__ cuvec3f cuvec3f::operator*(const float &rhs)
+    __host__ __device__ cuvec3f operator*(const cuvec3f& lhs, const float &rhs)
    {
        cuvec3f ret;
-        ret.x = x*rhs;
-        ret.y = y*rhs;
-        ret.z = z*rhs;
+        ret.x = lhs.x*rhs;
+        ret.y = lhs.y*rhs;
+        ret.z = lhs.z*rhs;
        return ret;
    }
-
-    __host__ __device__ cuvec3f cuvec3f::operator/(const float &rhs)
+    __host__ __device__ cuvec3f operator/(const cuvec3f& lhs, const float &rhs)
    {
        cuvec3f ret;
-        ret.x = x/rhs;
-        ret.y = y/rhs;
-        ret.z = z/rhs;
+        ret.x = lhs.x/rhs;
+        ret.y = lhs.y/rhs;
+        ret.z = lhs.z/rhs;
+        return ret;
+    }
+    __host__ __device__ cuvec3f operator*(const float& lhs, const cuvec3f &rhs)
+    {
+        cuvec3f ret;
+        ret.x = rhs.x*lhs;
+        ret.y = rhs.y*lhs;
+        ret.z = rhs.z*lhs;
+        return ret;
+    }
+    __host__ __device__ cuvec3f operator/(const float& lhs, const cuvec3f &rhs)
+    {
+        cuvec3f ret;
+        ret.x = rhs.x/lhs;
+        ret.y = rhs.y/lhs;
+        ret.z = rhs.z/lhs;
        return ret;
    }

--- a/src/amsculib3/math/cuvec4f.cu
+++ b/src/amsculib3/math/cuvec4f.cu
@ -59,23 +59,40 @@ __host__ __device__ cuvec4f cuvec4f::operator-(const cuvec4f &rhs)
    return ret;
 }

-__host__ __device__ cuvec4f cuvec4f::operator*(const float &rhs)
+__host__ __device__ cuvec4f operator*(const cuvec4f& lhs, const float &rhs)
 {
    cuvec4f ret;
-    ret.x = x*rhs;
-    ret.y = y*rhs;
-    ret.z = z*rhs;
-    ret.w = w*rhs;
+    ret.x = lhs.x*rhs;
+    ret.y = lhs.y*rhs;
+    ret.z = lhs.z*rhs;
+    ret.w = lhs.w*rhs;
    return ret;
 }
-
-__host__ __device__ cuvec4f cuvec4f::operator/(const float &rhs)
+__host__ __device__ cuvec4f operator/(const cuvec4f& lhs, const float &rhs)
 {
    cuvec4f ret;
-    ret.x = x/rhs;
-    ret.y = y/rhs;
-    ret.z = z/rhs;
-    ret.w = w/rhs;
+    ret.x = lhs.x/rhs;
+    ret.y = lhs.y/rhs;
+    ret.z = lhs.z/rhs;
+    ret.w = lhs.w/rhs;
+    return ret;
+}
+__host__ __device__ cuvec4f operator*(const float& lhs, const cuvec4f &rhs)
+{
+    cuvec4f ret;
+    ret.x = rhs.x*lhs;
+    ret.y = rhs.y*lhs;
+    ret.z = rhs.z*lhs;
+    ret.w = rhs.w*lhs;
+    return ret;
+}
+__host__ __device__ cuvec4f operator/(const float& lhs, const cuvec4f &rhs)
+{
+    cuvec4f ret;
+    ret.x = rhs.x/lhs;
+    ret.y = rhs.y/lhs;
+    ret.z = rhs.z/lhs;
+    ret.w = rhs.w/lhs;
    return ret;
 }

--- a/test_scripts/cuvec2f_codegen1.cu
+++ b/test_scripts/cuvec2f_codegen1.cu
@ -0,0 +1,412 @@
+	__host__ __device__ cuvec2f::cuvec2f() 
+	{
+		x = 0; y = 0; 
+		return;
+	}
+
+	__host__ __device__ cuvec2f::~cuvec2f() 
+	{
+		x = 0; y = 0; 
+		return;
+	}
+
+	__host__ __device__ cuvec2f::cuvec2f(const float &_x, const float &_y) 
+	{
+		x = _x; y = _y; 
+		return;
+	}
+
+	__host__ __device__ float& cuvec2f::operator[](const int &I)
+	{
+		switch(I)
+		{
+			case 0:
+				return x;
+			case 1:
+				return y;
+		}
+
+		return x;
+	}
+
+	__host__ __device__ const float& cuvec2f::operator[](const int &I) const
+	{
+		switch(I)
+		{
+			case 0:
+				return x;
+			case 1:
+				return y;
+		}
+
+		return x;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator+(const cuvec2f& rhs) const
+	{
+		cuvec2f ret;
+		ret.x = x + rhs.x;
+		ret.y = y + rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator-(const cuvec2f& rhs) const
+	{
+		cuvec2f ret;
+		ret.x = x - rhs.x;
+		ret.y = y - rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator*(const cuvec2f& rhs) const
+	{
+		//Elementwise product
+		cuvec2f ret;
+		ret.x = x * rhs.x;
+		ret.y = y * rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator/(const cuvec2f& rhs) const
+	{
+		//Elementwise division
+		cuvec2f ret;
+		ret.x = x / rhs.x;
+		ret.y = y / rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator*(const cuvec2f& lhs, const float& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs.x*rhs;
+		ret.y = lhs.y*rhs;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator*(const float& lhs, const cuvec2f& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs*rhs.x;
+		ret.y = lhs*rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator/(const cuvec2f& lhs, const float& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs.x/rhs;
+		ret.y = lhs.y/rhs;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator/(const float& lhs, const cuvec2f& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs/rhs.x;
+		ret.y = lhs/rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator-(const cuvec2f& other)
+	{
+		cuvec2f ret;
+		ret.x = -other.x;
+		ret.y = -other.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator+=(const cuvec2f& rhs)
+	{
+		x += rhs.x;
+		y += rhs.y;
+		return *this;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator-=(const cuvec2f& rhs)
+	{
+		x -= rhs.x;
+		y -= rhs.y;
+		return *this;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator*=(const float& rhs)
+	{
+		x *= rhs;
+		y *= rhs;
+		return *this;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator/=(const float& rhs)
+	{
+		x /= rhs;
+		y /= rhs;
+		return *this;
+	}
+
+
+//
+//Matrix Header Stuff
+//
+
+	__host__ __device__ cumat2f::cumat2f() 
+	{
+		m00 = 0;
+		m01 = 0;
+
+		m10 = 0;
+		m11 = 0;
+
+		return;
+	}
+
+	__host__ __device__ cumat2f::~cumat2f() 
+	{
+		//m00 = 0;
+		//m01 = 0;
+
+		//m10 = 0;
+		//m11 = 0;
+
+		return;
+	}
+
+	__host__ __device__ cumat2f::cumat2f(
+		const float& _m00, const float& _m10, 
+		const float& _m01, const float& _m11
+	)
+	{
+		m00 = _m00; 
+		m10 = _m10; 
+
+		m01 = _m01; 
+		m11 = _m11; 
+
+
+		return;
+	}
+
+	__host__ __device__ cumat2f::cumat2f(const float* data4)
+	{
+		m00 = data4[0]; 
+		m10 = data4[1]; 
+
+		m01 = data4[2]; 
+		m11 = data4[3]; 
+
+
+		return;
+	}
+
+	__host__ __device__ float& cumat2f::operator[](const int &I)
+	{
+		switch(I)
+		{
+			case 0:
+				return m00;
+			case 1:
+				return m10;
+			case 2:
+				return m01;
+			case 3:
+				return m11;
+		}
+
+		return m00;
+	}
+
+	__host__ __device__ const float& cumat2f::operator[](const int &I) const
+	{
+		switch(I)
+		{
+			case 0:
+				return m00;
+			case 1:
+				return m10;
+			case 2:
+				return m01;
+			case 3:
+				return m11;
+		}
+
+		return m00;
+	}
+
+	__host__ __device__ float& cumat2f::operator()(const int &I, const int &J)
+	{
+		return (*this)[I+2*J];
+	}
+
+	__host__ __device__ const float& cumat2f::operator()(const int &I, const int &J) const
+	{
+		return (*this)[I+2*J];
+	}
+
+	__host__ __device__ float& cumat2f::at(const int &I, const int &J)
+	{
+		return (*this)[I+2*J];
+	}
+
+	__host__ __device__ const float& cumat2f::at(const int &I, const int &J) const
+	{
+		return (*this)[I+2*J];
+	}
+
+__host__ __device__ float* cumat2f::data()
+{
+	return (float*)this;
+}
+
+__host__ __device__ const float* cumat2f::data() const
+{
+	return (float*)this;
+}
+
+__host__ __device__ cumat2f cumat2f::operator+(const cumat2f& rhs) const
+{
+	cumat2f ret;
+	ret.m00 = m00 + rhs.m00;
+	ret.m10 = m10 + rhs.m10;
+
+	ret.m01 = m01 + rhs.m01;
+	ret.m11 = m11 + rhs.m11;
+
+	return ret;
+}
+
+__host__ __device__ cumat2f cumat2f::operator-(const cumat2f& rhs) const
+{
+	cumat2f ret;
+	ret.m00 = m00 - rhs.m00;
+	ret.m10 = m10 - rhs.m10;
+
+	ret.m01 = m01 - rhs.m01;
+	ret.m11 = m11 - rhs.m11;
+
+	return ret;
+}
+
+__host__ __device__ cumat2f cumat2f::operator*(const cumat2f& rhs) const
+{
+	cumat2f ret; //should be zeroed in constructor
+	ret.m00 = m00*rhs.m00 + m01*rhs.m10;
+	ret.m01 = m00*rhs.m01 + m01*rhs.m11;
+
+	ret.m10 = m10*rhs.m00 + m11*rhs.m10;
+	ret.m11 = m10*rhs.m01 + m11*rhs.m11;
+
+	return ret;
+}
+
+__host__ __device__ cumat2f operator*(const cumat2f& lhs, const float& rhs)
+{
+	cumat2f ret;
+	ret.m00=lhs.m00*rhs;
+	ret.m10=lhs.m10*rhs;
+	ret.m01=lhs.m01*rhs;
+	ret.m11=lhs.m11*rhs;
+	return ret;
+}
+
+__host__ __device__ cumat2f operator/(const cumat2f& lhs, const float& rhs)
+{
+	cumat2f ret;
+	ret.m00=lhs.m00/rhs;
+	ret.m10=lhs.m10/rhs;
+	ret.m01=lhs.m01/rhs;
+	ret.m11=lhs.m11/rhs;
+	return ret;
+}
+
+__host__ __device__ cumat2f operator*(const float& lhs, const cumat2f& rhs)
+{
+	cumat2f ret;
+	ret.m00=lhs*rhs.m00;
+	ret.m10=lhs*rhs.m10;
+	ret.m01=lhs*rhs.m01;
+	ret.m11=lhs*rhs.m11;
+	return ret;
+}
+
+__host__ __device__ cuvec2f operator*(const cumat2f& lhs, const cuvec2f& rhs)
+{
+	cuvec2f ret;
+	ret.x = lhs.m00*rhs.x + lhs.m01*rhs.y;
+	ret.y = lhs.m10*rhs.x + lhs.m11*rhs.y;
+	return ret;
+}
+
+__host__ __device__ cuvec2f operator*(const cuvec2f& lhs, const cumat2f& rhs)
+{
+	cuvec2f ret;
+	ret.x = lhs.x*rhs.m00 + lhs.y*rhs.m10;
+	ret.y = lhs.x*rhs.m01 + lhs.y*rhs.m11;
+	return ret;
+}
+
+__host__ __device__ cumat2f operator-(const cumat2f& rhs)
+{
+	cumat2f ret;
+	ret.m00 = -rhs.m00;
+	ret.m10 = -rhs.m10;
+	ret.m01 = -rhs.m01;
+	ret.m11 = -rhs.m11;
+	return ret;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator+=(const cumat2f& rhs)
+{
+	m00 += rhs.m00;
+	m10 += rhs.m10;
+	m01 += rhs.m01;
+	m11 += rhs.m11;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator-=(const cumat2f& rhs)
+{
+	m00 -= rhs.m00;
+	m10 -= rhs.m10;
+	m01 -= rhs.m01;
+	m11 -= rhs.m11;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator*=(const float& rhs)
+{
+	m00 *= rhs;
+	m10 *= rhs;
+	m01 *= rhs;
+	m11 *= rhs;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator/=(const float& rhs)
+{
+	m00 /= rhs;
+	m10 /= rhs;
+	m01 /= rhs;
+	m11 /= rhs;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator*=(const cumat2f& rhs)
+{
+	cumat2f tmp = *this;
+	m00 = tmp.m00*rhs.m00 + tmp.m01*rhs.m10;
+	m01 = tmp.m00*rhs.m01 + tmp.m01*rhs.m11;
+	m10 = tmp.m10*rhs.m00 + tmp.m11*rhs.m10;
+	m11 = tmp.m10*rhs.m01 + tmp.m11*rhs.m11;
+	return *this;
+}
+
+__host__ __device__ cumat2f cumat2f::transpose() const
+{
+	cumat2f ret;
+	ret.m00 = m00;
+	ret.m10 = m01;
+	ret.m01 = m10;
+	ret.m11 = m11;
+	return ret;
+}
+
--- a/test_scripts/cuvec2f_codegen1.hpp
+++ b/test_scripts/cuvec2f_codegen1.hpp
@ -0,0 +1,54 @@
+	__host__ __device__ cuvec2f(); 
+	__host__ __device__ ~cuvec2f(); 
+	__host__ __device__ cuvec2f(const float &_x, const float &_y); 
+	__host__ __device__ float& operator[](const int &I);
+	__host__ __device__ const float& operator[](const int &I) const;
+	__host__ __device__ cuvec2f operator+(const cuvec2f& rhs) const;
+	__host__ __device__ cuvec2f operator-(const cuvec2f& rhs) const;
+	__host__ __device__ cuvec2f operator*(const cuvec2f& rhs) const; 	 //elementwise product
+	__host__ __device__ cuvec2f operator/(const cuvec2f& rhs) const; 	 //elementwise division
+	__host__ __device__ friend cuvec2f operator*(const cuvec2f& lhs, const float& rhs);
+	__host__ __device__ friend cuvec2f operator*(const float& lhs, const cuvec2f& rhs);
+	__host__ __device__ friend cuvec2f operator/(const cuvec2f& lhs, const float& rhs);
+	__host__ __device__ friend cuvec2f operator/(const float& lhs, const cuvec2f& rhs);
+	__host__ __device__ friend cuvec2f operator-(const cuvec2f& other);
+	__host__ __device__ cuvec2f& operator+=(const cuvec2f& rhs);
+	__host__ __device__ cuvec2f& operator-=(const cuvec2f& rhs);
+	__host__ __device__ cuvec2f& operator*=(const float& rhs);
+	__host__ __device__ cuvec2f& operator/=(const float& rhs);
+//
+//Matrix Header Stuff
+//
+float m00,m10;
+float m01,m11;
+
+	__host__ __device__ cumat2f(); 
+	__host__ __device__ ~cumat2f(); 
+	__host__ __device__ cumat2f(
+		const float& _m00, const float& _m10, 
+		const float& _m01, const float& _m11
+	);
+	__host__ __device__ cumat2f(const float* data4);
+	__host__ __device__ float& operator[](const int &I);
+	__host__ __device__ const float& operator[](const int &I) const;
+	__host__ __device__ float& operator()(const int &I, const int &J);
+	__host__ __device__ const float& operator()(const int &I, const int &J) const;
+	__host__ __device__ float& at(const int &I, const int &J);
+	__host__ __device__ const float& at(const int &I, const int &J) const;
+__host__ __device__ float* data(); //pointer to float4 representation of matrix
+__host__ __device__ const float* data() const; //pointer to float4 representation of matrix
+__host__ __device__ cumat2f operator+(const cumat2f& rhs) const;
+__host__ __device__ cumat2f operator-(const cumat2f& rhs) const;
+__host__ __device__ cumat2f operator*(const cumat2f& rhs) const;
+__host__ __device__ friend cumat2f operator*(const cumat2f& lhs, const float& rhs);
+__host__ __device__ friend cumat2f operator/(const cumat2f& lhs, const float& rhs);
+__host__ __device__ friend cumat2f operator*(const float& lhs, const cumat2f& rhs);
+__host__ __device__ friend cuvec2f operator*(const cumat2f& lhs, const cuvec2f& rhs);
+__host__ __device__ friend cuvec2f operator*(const cuvec2f& lhs, const cumat2f& rhs);
+__host__ __device__ friend cumat2f operator-(const cumat2f& rhs);
+__host__ __device__ cumat2f& operator+=(const cumat2f& rhs);
+__host__ __device__ cumat2f& operator-=(const cumat2f& rhs);
+__host__ __device__ cumat2f& operator*=(const float& rhs);
+__host__ __device__ cumat2f& operator/=(const float& rhs);
+__host__ __device__ cumat2f& operator*=(const cumat2f& rhs);
+__host__ __device__ cumat2f transpose() const;
--- a/test_scripts/cuvec_codegen.py
+++ b/test_scripts/cuvec_codegen.py