diff --git a/build_linux64/libamsculib3.linux64.a b/build_linux64/libamsculib3.linux64.a
index 3905e0b..94cbc7b 100644
Binary files a/build_linux64/libamsculib3.linux64.a and b/build_linux64/libamsculib3.linux64.a differ
diff --git a/build_linux64/objstore/amscu_comp128.o b/build_linux64/objstore/amscu_comp128.o
index 0e0fd51..0449c54 100644
Binary files a/build_linux64/objstore/amscu_comp128.o and b/build_linux64/objstore/amscu_comp128.o differ
diff --git a/build_linux64/objstore/amscu_comp64.o b/build_linux64/objstore/amscu_comp64.o
index 252e8da..dfc0ff4 100644
Binary files a/build_linux64/objstore/amscu_comp64.o and b/build_linux64/objstore/amscu_comp64.o differ
diff --git a/build_linux64/objstore/amscu_cudafunctions.o b/build_linux64/objstore/amscu_cudafunctions.o
index cc8e081..62c4bba 100644
Binary files a/build_linux64/objstore/amscu_cudafunctions.o and b/build_linux64/objstore/amscu_cudafunctions.o differ
diff --git a/build_linux64/objstore/amscu_random.o b/build_linux64/objstore/amscu_random.o
index 2ba0e79..ec6cab5 100644
Binary files a/build_linux64/objstore/amscu_random.o and b/build_linux64/objstore/amscu_random.o differ
diff --git a/build_linux64/objstore/amscuarray.o b/build_linux64/objstore/amscuarray.o
index df0bbb4..eca24ca 100644
Binary files a/build_linux64/objstore/amscuarray.o and b/build_linux64/objstore/amscuarray.o differ
diff --git a/build_linux64/objstore/amscuarray_dops.o b/build_linux64/objstore/amscuarray_dops.o
index 2ac9ce6..6d43fa4 100644
Binary files a/build_linux64/objstore/amscuarray_dops.o and b/build_linux64/objstore/amscuarray_dops.o differ
diff --git a/build_linux64/objstore/amscugeom.o b/build_linux64/objstore/amscugeom.o
index 2aa1ff0..5af371f 100644
Binary files a/build_linux64/objstore/amscugeom.o and b/build_linux64/objstore/amscugeom.o differ
diff --git a/build_linux64/objstore/amsculib3.o b/build_linux64/objstore/amsculib3.o
index fc6935a..a88218e 100644
Binary files a/build_linux64/objstore/amsculib3.o and b/build_linux64/objstore/amsculib3.o differ
diff --git a/build_linux64/objstore/amscumath.o b/build_linux64/objstore/amscumath.o
index 2034c0a..8fd2229 100644
Binary files a/build_linux64/objstore/amscumath.o and b/build_linux64/objstore/amscumath.o differ
diff --git a/build_linux64/objstore/amscurarray.o b/build_linux64/objstore/amscurarray.o
index 2354199..a676e52 100644
Binary files a/build_linux64/objstore/amscurarray.o and b/build_linux64/objstore/amscurarray.o differ
diff --git a/build_linux64/objstore/cuvec2f.o b/build_linux64/objstore/cuvec2f.o
index 82d3901..5347f73 100644
Binary files a/build_linux64/objstore/cuvec2f.o and b/build_linux64/objstore/cuvec2f.o differ
diff --git a/build_linux64/objstore/cuvec3f.o b/build_linux64/objstore/cuvec3f.o
index 1174501..ed9ed62 100644
Binary files a/build_linux64/objstore/cuvec3f.o and b/build_linux64/objstore/cuvec3f.o differ
diff --git a/build_linux64/objstore/cuvec4f.o b/build_linux64/objstore/cuvec4f.o
index 6483ef8..d20de1d 100644
Binary files a/build_linux64/objstore/cuvec4f.o and b/build_linux64/objstore/cuvec4f.o differ
diff --git a/build_linux64/test b/build_linux64/test
index 131155b..331a30b 100644
Binary files a/build_linux64/test and b/build_linux64/test differ
diff --git a/include/amsculib3/math/cuvec2f.hpp b/include/amsculib3/math/cuvec2f.hpp
index 00a88e5..8bd9d37 100644
--- a/include/amsculib3/math/cuvec2f.hpp
+++ b/include/amsculib3/math/cuvec2f.hpp
@@ -10,72 +10,78 @@ namespace amscuda
         float x;
         float y;
 
-        __host__ __device__ cuvec2f();
-        __host__ __device__ ~cuvec2f();
-        __host__ __device__ cuvec2f(const float &_x, const float &_y);
-        
-        
+        __host__ __device__ cuvec2f(); 
+        __host__ __device__ ~cuvec2f(); 
+        __host__ __device__ cuvec2f(const float &_x, const float &_y); 
+
         __host__ __device__ float& operator[](const int &I);
         __host__ __device__ const float& operator[](const int &I) const;
 
-        __host__ __device__ cuvec2f operator+(const cuvec2f &rhs);
-        __host__ __device__ cuvec2f operator-(const cuvec2f &rhs);
-        __host__ __device__ cuvec2f operator*(const float &rhs);
-        __host__ __device__ cuvec2f operator/(const float &rhs);
-        __host__ __device__ friend cuvec2f operator-(const cuvec2f &rhs);
-
-        __host__ __device__ cuvec2f& operator+=(const cuvec2f &rhs);
-        __host__ __device__ cuvec2f& operator-=(const cuvec2f &rhs);
-        __host__ __device__ cuvec2f& operator/=(const float &rhs);
-        __host__ __device__ cuvec2f& operator*=(const float &rhs);
+        __host__ __device__ cuvec2f operator+(const cuvec2f& rhs) const;
+        __host__ __device__ cuvec2f operator-(const cuvec2f& rhs) const;
+        __host__ __device__ cuvec2f operator*(const cuvec2f& rhs) const; 	 //elementwise product
+        __host__ __device__ cuvec2f operator/(const cuvec2f& rhs) const; 	 //elementwise division
+        
+        __host__ __device__ friend cuvec2f operator*(const cuvec2f& lhs, const float& rhs);
+        __host__ __device__ friend cuvec2f operator*(const float& lhs, const cuvec2f& rhs);
+        __host__ __device__ friend cuvec2f operator/(const cuvec2f& lhs, const float& rhs);
+        __host__ __device__ friend cuvec2f operator/(const float& lhs, const cuvec2f& rhs);
+        __host__ __device__ friend cuvec2f operator-(const cuvec2f& other);
+        
+        __host__ __device__ cuvec2f& operator+=(const cuvec2f& rhs);
+        __host__ __device__ cuvec2f& operator-=(const cuvec2f& rhs);
+        __host__ __device__ cuvec2f& operator*=(const float& rhs);
+        __host__ __device__ cuvec2f& operator/=(const float& rhs);
+    
 
     };
 
     class cumat2f
     {
         public:
-        float m00,m10; //named references to force register use?
-        float m01,m11; //switched to column-major-order to match GLSL/lapack
-
-        __host__ __device__ cumat2f();
-        __host__ __device__ ~cumat2f();
-
-        __host__ __device__ cumat2f(
-            const float & _m00, const float & _m01,
-            const float & _m10, const float & _m11
-        );
-
-        __host__ __device__ explicit cumat2f(const float* data2x2);
-
-        __host__ __device__ float& operator[](const int &I);
-        __host__ __device__ float& operator()(const int &I, const int &J);
-        __host__ __device__ float& at(const int &I, const int &J);
-
-        __host__ __device__ const float& operator[](const int &I) const;
-        __host__ __device__ const float& operator()(const int &I, const int &J) const;
-        __host__ __device__ const float& at(const int &I, const int &J) const;
-
-        __host__ __device__ cumat2f operator+(const cumat2f &rhs);
-        __host__ __device__ cumat2f operator-(const cumat2f &rhs);
-        __host__ __device__ cumat2f operator*(const float &rhs);
-        __host__ __device__ cumat2f operator/(const float &rhs);
-        __host__ __device__ cuvec2f operator*(const cuvec2f &rhs);
-        __host__ __device__ cumat2f operator*(const cumat2f &rhs);
-        __host__ __device__ friend cumat2f operator-(const cumat2f &rhs);
+        float m00,m10;
+        float m01,m11;
         
+        __host__ __device__ cumat2f(); 
+        __host__ __device__ ~cumat2f(); 
+        __host__ __device__ cumat2f(
+            const float& _m00, const float& _m10, 
+            const float& _m01, const float& _m11
+        );
+        __host__ __device__ cumat2f(const float* data4);
+        
+        __host__ __device__ float& operator[](const int &I);
+        __host__ __device__ const float& operator[](const int &I) const;
+        __host__ __device__ float& operator()(const int &I, const int &J);
+        __host__ __device__ const float& operator()(const int &I, const int &J) const;
+        __host__ __device__ float& at(const int &I, const int &J);
+        __host__ __device__ const float& at(const int &I, const int &J) const;
+        
+        __host__ __device__ float* data(); //pointer to float4 representation of matrix
+        __host__ __device__ const float* data() const; //pointer to float4 representation of matrix
+        
+        //operators
+        __host__ __device__ cumat2f operator+(const cumat2f& rhs) const;
+        __host__ __device__ cumat2f operator-(const cumat2f& rhs) const;
+        __host__ __device__ cumat2f operator*(const cumat2f& rhs) const;
+        __host__ __device__ friend cumat2f operator*(const cumat2f& lhs, const float& rhs);
+        __host__ __device__ friend cumat2f operator/(const cumat2f& lhs, const float& rhs);
+        __host__ __device__ friend cumat2f operator*(const float& lhs, const cumat2f& rhs);
+        __host__ __device__ friend cuvec2f operator*(const cumat2f& lhs, const cuvec2f& rhs);
+        __host__ __device__ friend cuvec2f operator*(const cuvec2f& lhs, const cumat2f& rhs);
+        __host__ __device__ friend cumat2f operator-(const cumat2f& rhs);
+
+        //in place operators to save register use
+        __host__ __device__ cumat2f& operator+=(const cumat2f& rhs);
+        __host__ __device__ cumat2f& operator-=(const cumat2f& rhs);
+        __host__ __device__ cumat2f& operator*=(const float& rhs);
+        __host__ __device__ cumat2f& operator/=(const float& rhs);
+        __host__ __device__ cumat2f& operator*=(const cumat2f& rhs);
+        
+        __host__ __device__ cumat2f transpose() const;
+
         __host__ __device__ float det();
-        __host__ __device__ cumat2f transpose();
         __host__ __device__ cumat2f inverse();
-
-        __host__ __device__ float* data(); //pointer to float[4] representation of matrix
-        __host__ __device__ const float* data() const; //pointer to float[4] representation of matrix
-
-        //In place operations (to save GPU register use)
-        __host__ __device__ cumat2f& operator+=(const cumat2f &rhs);
-        __host__ __device__ cumat2f& operator-=(const cumat2f &rhs);
-        __host__ __device__ cumat2f& operator/=(const float &rhs);
-        __host__ __device__ cumat2f& operator*=(const float &rhs);
-        __host__ __device__ cumat2f& operator*=(const cumat2f &rhs);
     };
 
     __host__ __device__ float cuvec2f_dot(const cuvec2f &a, const cuvec2f &b);
diff --git a/include/amsculib3/math/cuvec3f.hpp b/include/amsculib3/math/cuvec3f.hpp
index 08962b4..34721be 100644
--- a/include/amsculib3/math/cuvec3f.hpp
+++ b/include/amsculib3/math/cuvec3f.hpp
@@ -21,8 +21,12 @@ namespace amscuda
 
         __host__ __device__ cuvec3f operator+(const cuvec3f &rhs);
         __host__ __device__ cuvec3f operator-(const cuvec3f &rhs);
-        __host__ __device__ cuvec3f operator*(const float &rhs);
-        __host__ __device__ cuvec3f operator/(const float &rhs);
+
+        __host__ __device__ friend cuvec3f operator*(const cuvec3f& lhs, const float &rhs);
+        __host__ __device__ friend cuvec3f operator/(const cuvec3f& lhs, const float &rhs);
+        __host__ __device__ friend cuvec3f operator*(const float& lhs, const cuvec3f &rhs);
+        __host__ __device__ friend cuvec3f operator/(const float& lhs, const cuvec3f &rhs);
+
         __host__ __device__ friend cuvec3f operator-(const cuvec3f &rhs);
 
         __host__ __device__ cuvec3f& operator+=(const cuvec3f &rhs);
diff --git a/include/amsculib3/math/cuvec4f.hpp b/include/amsculib3/math/cuvec4f.hpp
index d79f298..c5f383c 100644
--- a/include/amsculib3/math/cuvec4f.hpp
+++ b/include/amsculib3/math/cuvec4f.hpp
@@ -22,8 +22,12 @@ namespace amscuda
 
         __host__ __device__ cuvec4f operator+(const cuvec4f &rhs);
         __host__ __device__ cuvec4f operator-(const cuvec4f &rhs);
-        __host__ __device__ cuvec4f operator*(const float &rhs);
-        __host__ __device__ cuvec4f operator/(const float &rhs);
+
+        __host__ __device__ friend cuvec4f operator*(const cuvec4f& lhs, const float &rhs);
+        __host__ __device__ friend cuvec4f operator/(const cuvec4f& lhs, const float &rhs);
+        __host__ __device__ friend cuvec4f operator*(const float& lhs, const cuvec4f &rhs);
+        __host__ __device__ friend cuvec4f operator/(const float& lhs, const cuvec4f &rhs);
+
         __host__ __device__ friend cuvec4f operator-(const cuvec4f &rhs);
 
         __host__ __device__ cuvec4f& operator+=(const cuvec4f &rhs);
diff --git a/src/amsculib3/math/cuvec2f.cu b/src/amsculib3/math/cuvec2f.cu
index 2f1df96..8c12dfc 100644
--- a/src/amsculib3/math/cuvec2f.cu
+++ b/src/amsculib3/math/cuvec2f.cu
@@ -3,99 +3,468 @@
 namespace amscuda
 {
 
-    __host__ __device__ cuvec2f::cuvec2f()
-    {
-        x = 0.0f; y = 0.0f;
-        return;
-    }
+	__host__ __device__ cuvec2f::cuvec2f() 
+	{
+		x = 0; y = 0; 
+		return;
+	}
 
-    __host__ __device__ cuvec2f::~cuvec2f()
-    {
-        x = 0.0f; y = 0.0f;
-        return;
-    }
+	__host__ __device__ cuvec2f::~cuvec2f() 
+	{
+		x = 0; y = 0; 
+		return;
+	}
 
-    __host__ __device__ float& cuvec2f::operator[](const int &I)
-    {
-        if(I==0) return x;
-        if(I==1) return y;
-        return x;
-    }
+	__host__ __device__ cuvec2f::cuvec2f(const float &_x, const float &_y)
+	{
+		x = _x; y = _y; 
+		return;
+	}
 
-    __host__ __device__ const float& cuvec2f::operator[](const int &I) const
-    {
-        if(I==0) return x;
-        if(I==1) return y;
-        return x;
-    }
+	__host__ __device__ float& cuvec2f::operator[](const int &I)
+	{
+		switch(I)
+		{
+			case 0:
+				return x;
+			case 1:
+				return y;
+		}
 
-    __host__ __device__ cuvec2f cuvec2f::operator+(const cuvec2f &rhs)
-    {
-        cuvec2f ret;
-        ret.x = x+rhs.x;
-        ret.y = y+rhs.y;
+		return x;
+	}
 
+	__host__ __device__ const float& cuvec2f::operator[](const int &I) const
+	{
+		switch(I)
+		{
+			case 0:
+				return x;
+			case 1:
+				return y;
+		}
+
+		return x;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator+(const cuvec2f& rhs) const
+	{
+		cuvec2f ret;
+		ret.x = x + rhs.x;
+		ret.y = y + rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator-(const cuvec2f& rhs) const
+	{
+		cuvec2f ret;
+		ret.x = x - rhs.x;
+		ret.y = y - rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator*(const cuvec2f& rhs) const
+	{
+		//Elementwise product
+		cuvec2f ret;
+		ret.x = x * rhs.x;
+		ret.y = y * rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator/(const cuvec2f& rhs) const
+	{
+		//Elementwise division
+		cuvec2f ret;
+		ret.x = x / rhs.x;
+		ret.y = y / rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator*(const cuvec2f& lhs, const float& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs.x*rhs;
+		ret.y = lhs.y*rhs;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator*(const float& lhs, const cuvec2f& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs*rhs.x;
+		ret.y = lhs*rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator/(const cuvec2f& lhs, const float& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs.x/rhs;
+		ret.y = lhs.y/rhs;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator/(const float& lhs, const cuvec2f& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs/rhs.x;
+		ret.y = lhs/rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator-(const cuvec2f& other)
+	{
+		cuvec2f ret;
+		ret.x = -other.x;
+		ret.y = -other.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator+=(const cuvec2f& rhs)
+	{
+		x += rhs.x;
+		y += rhs.y;
+		return *this;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator-=(const cuvec2f& rhs)
+	{
+		x -= rhs.x;
+		y -= rhs.y;
+		return *this;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator*=(const float& rhs)
+	{
+		x *= rhs;
+		y *= rhs;
+		return *this;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator/=(const float& rhs)
+	{
+		x /= rhs;
+		y /= rhs;
+		return *this;
+	}
+
+    ////////////////
+    //Matrix Class//
+    ////////////////
+
+    __host__ __device__ cumat2f::cumat2f() 
+	{
+		m00 = 0;
+		m01 = 0;
+
+		m10 = 0;
+		m11 = 0;
+
+		return;
+	}
+
+	__host__ __device__ cumat2f::~cumat2f() 
+	{
+		//m00 = 0;
+		//m01 = 0;
+
+		//m10 = 0;
+		//m11 = 0;
+
+		return;
+	}
+
+	__host__ __device__ cumat2f::cumat2f(
+		const float& _m00, const float& _m10, 
+		const float& _m01, const float& _m11
+	)
+	{
+		m00 = _m00; 
+		m10 = _m10; 
+
+		m01 = _m01; 
+		m11 = _m11; 
+
+
+		return;
+	}
+
+	__host__ __device__ cumat2f::cumat2f(const float* data4)
+	{
+		m00 = data4[0]; 
+		m10 = data4[1]; 
+
+		m01 = data4[2]; 
+		m11 = data4[3]; 
+
+
+		return;
+	}
+
+	__host__ __device__ float& cumat2f::operator[](const int &I)
+	{
+		switch(I)
+		{
+			case 0:
+				return m00;
+			case 1:
+				return m10;
+			case 2:
+				return m01;
+			case 3:
+				return m11;
+		}
+
+		return m00;
+	}
+
+	__host__ __device__ const float& cumat2f::operator[](const int &I) const
+	{
+		switch(I)
+		{
+			case 0:
+				return m00;
+			case 1:
+				return m10;
+			case 2:
+				return m01;
+			case 3:
+				return m11;
+		}
+
+		return m00;
+	}
+
+	__host__ __device__ float& cumat2f::operator()(const int &I, const int &J)
+	{
+		return (*this)[I+2*J];
+	}
+
+	__host__ __device__ const float& cumat2f::operator()(const int &I, const int &J) const
+	{
+		return (*this)[I+2*J];
+	}
+
+	__host__ __device__ float& cumat2f::at(const int &I, const int &J)
+	{
+		return (*this)[I+2*J];
+	}
+
+	__host__ __device__ const float& cumat2f::at(const int &I, const int &J) const
+	{
+		return (*this)[I+2*J];
+	}
+
+__host__ __device__ float* cumat2f::data()
+{
+	return (float*)this;
+}
+
+__host__ __device__ const float* cumat2f::data() const
+{
+	return (float*)this;
+}
+
+__host__ __device__ cumat2f cumat2f::operator+(const cumat2f& rhs) const
+{
+	cumat2f ret;
+	ret.m00 = m00 + rhs.m00;
+	ret.m10 = m10 + rhs.m10;
+
+	ret.m01 = m01 + rhs.m01;
+	ret.m11 = m11 + rhs.m11;
+
+	return ret;
+}
+
+__host__ __device__ cumat2f cumat2f::operator-(const cumat2f& rhs) const
+{
+	cumat2f ret;
+	ret.m00 = m00 - rhs.m00;
+	ret.m10 = m10 - rhs.m10;
+
+	ret.m01 = m01 - rhs.m01;
+	ret.m11 = m11 - rhs.m11;
+
+	return ret;
+}
+
+__host__ __device__ cumat2f cumat2f::operator*(const cumat2f& rhs) const
+{
+	cumat2f ret; //should be zeroed in constructor
+	ret.m00 = m00*rhs.m00 + m01*rhs.m10;
+	ret.m01 = m00*rhs.m01 + m01*rhs.m11;
+
+	ret.m10 = m10*rhs.m00 + m11*rhs.m10;
+	ret.m11 = m10*rhs.m01 + m11*rhs.m11;
+
+	return ret;
+}
+
+__host__ __device__ cumat2f operator*(const cumat2f& lhs, const float& rhs)
+{
+	cumat2f ret;
+	ret.m00=lhs.m00*rhs;
+	ret.m10=lhs.m10*rhs;
+	ret.m01=lhs.m01*rhs;
+	ret.m11=lhs.m11*rhs;
+	return ret;
+}
+
+__host__ __device__ cumat2f operator/(const cumat2f& lhs, const float& rhs)
+{
+	cumat2f ret;
+	ret.m00=lhs.m00/rhs;
+	ret.m10=lhs.m10/rhs;
+	ret.m01=lhs.m01/rhs;
+	ret.m11=lhs.m11/rhs;
+	return ret;
+}
+
+__host__ __device__ cumat2f operator*(const float& lhs, const cumat2f& rhs)
+{
+	cumat2f ret;
+	ret.m00=lhs*rhs.m00;
+	ret.m10=lhs*rhs.m10;
+	ret.m01=lhs*rhs.m01;
+	ret.m11=lhs*rhs.m11;
+	return ret;
+}
+
+__host__ __device__ cuvec2f operator*(const cumat2f& lhs, const cuvec2f& rhs)
+{
+	cuvec2f ret;
+	ret.x = lhs.m00*rhs.x + lhs.m01*rhs.y;
+	ret.y = lhs.m10*rhs.x + lhs.m11*rhs.y;
+	return ret;
+}
+
+__host__ __device__ cuvec2f operator*(const cuvec2f& lhs, const cumat2f& rhs)
+{
+	cuvec2f ret;
+	ret.x = lhs.x*rhs.m00 + lhs.y*rhs.m10;
+	ret.y = lhs.x*rhs.m01 + lhs.y*rhs.m11;
+	return ret;
+}
+
+__host__ __device__ cumat2f operator-(const cumat2f& rhs)
+{
+	cumat2f ret;
+	ret.m00 = -rhs.m00;
+	ret.m10 = -rhs.m10;
+	ret.m01 = -rhs.m01;
+	ret.m11 = -rhs.m11;
+	return ret;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator+=(const cumat2f& rhs)
+{
+	m00 += rhs.m00;
+	m10 += rhs.m10;
+	m01 += rhs.m01;
+	m11 += rhs.m11;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator-=(const cumat2f& rhs)
+{
+	m00 -= rhs.m00;
+	m10 -= rhs.m10;
+	m01 -= rhs.m01;
+	m11 -= rhs.m11;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator*=(const float& rhs)
+{
+	m00 *= rhs;
+	m10 *= rhs;
+	m01 *= rhs;
+	m11 *= rhs;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator/=(const float& rhs)
+{
+	m00 /= rhs;
+	m10 /= rhs;
+	m01 /= rhs;
+	m11 /= rhs;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator*=(const cumat2f& rhs)
+{
+	cumat2f tmp = *this;
+	m00 = tmp.m00*rhs.m00 + tmp.m01*rhs.m10;
+	m01 = tmp.m00*rhs.m01 + tmp.m01*rhs.m11;
+	m10 = tmp.m10*rhs.m00 + tmp.m11*rhs.m10;
+	m11 = tmp.m10*rhs.m01 + tmp.m11*rhs.m11;
+	return *this;
+}
+
+__host__ __device__ cumat2f cumat2f::transpose() const
+{
+	cumat2f ret;
+	ret.m00 = m00;
+	ret.m10 = m01;
+	ret.m01 = m10;
+	ret.m11 = m11;
+	return ret;
+}
+
+    
+
+    ///////////////////
+    //Det and Inverse//
+    ///////////////////
+
+    __host__ __device__ float cumat2f::det()
+    {
+        float ret = 0;
+    
+        ret += m00*m11;
+        ret -= m01*m10;
+    
         return ret;
     }
-
-    __host__ __device__ cuvec2f cuvec2f::operator-(const cuvec2f &rhs)
+    
+    __host__ __device__ cumat2f cumat2f::inverse()
     {
-        cuvec2f ret;
-        ret.x = x-rhs.x;
-        ret.y = y-rhs.y;
+        cumat2f q;
+        float dt = det();
+        if(dt!=0)
+        {
+            q(0,0) = m11/dt;
+            q(0,1) = -m01/dt;
+            q(1,0) = -m10/dt;
+            q(1,1) = m00/dt;
 
-        return ret;
+        }
+        else
+        {
+            q(0,0) = inf;
+            q(0,1) = inf;
+            q(1,0) = inf;
+            q(1,1) = inf;
+        }
+    
+        return q;
     }
 
-    __host__ __device__ cuvec2f cuvec2f::operator*(const float &rhs)
-    {
-        cuvec2f ret;
-        ret.x = x*rhs;
-        ret.y = y*rhs;
-        return ret;
-    }
+    ////////////////////////
+    //Non member functions//
+    ////////////////////////
 
-    __host__ __device__ cuvec2f cuvec2f::operator/(const float &rhs)
+    __host__ __device__ cumat2f cumat2f_rot_from_angle(const float &angle)
     {
-        cuvec2f ret;
-        ret.x = x/rhs;
-        ret.y = y/rhs;
-        return ret;
-    }
-
-    __host__ __device__ cuvec2f& cuvec2f::operator+=(const cuvec2f &rhs)
-    {
-        x = x + rhs.x;
-        y = y + rhs.y;
-        return *this;
-    }
-
-    __host__ __device__ cuvec2f& cuvec2f::operator-=(const cuvec2f &rhs)
-    {
-        x = x - rhs.x;
-        y = y - rhs.y;
-        return *this;
-    }
-
-    __host__ __device__ cuvec2f& cuvec2f::operator*=(const float &rhs)
-    {
-        x = x * rhs;
-        y = y * rhs;
-        return *this;
-    }
-
-    __host__ __device__ cuvec2f& cuvec2f::operator/=(const float &rhs)
-    {
-        x = x / rhs;
-        y = y / rhs;
-        return *this;
-    }
-
-
-    __host__ __device__ cuvec2f::cuvec2f(const float &_x, const float &_y)
-    {
-        x = _x; y = _y;
-        return;
+        cumat2f R;
+        R(0,0) = ::cosf(angle);
+        R(1,0) = ::sinf(angle);
+        R(0,1) = -::sinf(angle);
+        R(1,1) = ::cosf(angle);
+        return R;
     }
 
     __host__ __device__ float cuvec2f_dot(const cuvec2f &a, const cuvec2f &b)
@@ -120,13 +489,13 @@ namespace amscuda
     {
         cuvec2f ret;
         float m = cuvec2f_norm(a);
-        if(m>0.0)
+        if(m>0)
         {
             ret.x = a.x/m; ret.y = a.y/m;
         }
         else
         {
-            ret.x = 0.0f; ret.y = 0.0f;
+            ret.x = 0; ret.y = 0;
         }
         return ret;
     }
@@ -140,314 +509,6 @@ namespace amscuda
         return ret;
     }
 
-    __host__ __device__ cumat2f::cumat2f()
-    {
-        m00 = 0.0f;
-        m01 = 0.0f;
-        m10 = 0.0f;
-        m11 = 0.0f;
-
-        return;
-    }
-    
-    __host__ __device__ cumat2f::~cumat2f()
-    {
-        m00 = 0.0f;
-        m01 = 0.0f;
-        m10 = 0.0f;
-        m11 = 0.0f;
-        return;
-    }
-    
-    __host__ __device__ float& cumat2f::operator[](const int &I)
-    {
-        if(I==0) return m00;
-        if(I==1) return m10;
-        if(I==2) return m01;
-        if(I==3) return m11;
-        
-        return m00;
-    }
-
-    __host__ __device__ const float& cumat2f::operator[](const int &I) const 
-    {
-        if(I==0) return m00;
-        if(I==1) return m10;
-        if(I==2) return m01;
-        if(I==3) return m11;
-        
-        return m00;
-    }
-    
-    __host__ __device__ float& cumat2f::operator()(const int &I, const int &J)
-    {
-        if(I==0 && J==0) return m00;
-        if(I==1 && J==0) return m10;
-        if(I==0 && J==1) return m01;
-        if(I==1 && J==1) return m11;
-    
-        return m00;
-    }
-    
-    
-    __host__ __device__ float& cumat2f::at(const int &I, const int &J)
-    {
-        if(I==0 && J==0) return m00;
-        if(I==1 && J==0) return m10;
-        if(I==0 && J==1) return m01;
-        if(I==1 && J==1) return m11;
-
-        return m00;
-    }
-    
-    __host__ __device__ const float& cumat2f::operator()(const int &I, const int &J) const 
-    {
-        if(I==0 && J==0) return m00;
-        if(I==1 && J==0) return m10;
-        if(I==0 && J==1) return m01;
-        if(I==1 && J==1) return m11;
-    
-        return m00;
-    }
-    
-    __host__ __device__ const float& cumat2f::at(const int &I, const int &J) const 
-    {
-        if(I==0 && J==0) return m00;
-        if(I==1 && J==0) return m10;
-        if(I==0 && J==1) return m01;
-        if(I==1 && J==1) return m11;
-    
-        return m00;
-    }
-    
-    
-    __host__ __device__ cumat2f cumat2f::operator+(const cumat2f &rhs)
-    {
-        cumat2f ret;
-        ret.m00 = m00 + rhs.m00;
-        ret.m10 = m10 + rhs.m10;
-        ret.m01 = m01 + rhs.m01;
-        ret.m11 = m11 + rhs.m11;
-        
-    
-        return ret;
-    }
-    
-    __host__ __device__ cumat2f cumat2f::operator-(const cumat2f &rhs)
-    {
-        cumat2f ret;
-        ret.m00 = m00 - rhs.m00;
-        ret.m10 = m10 - rhs.m10;
-        ret.m01 = m01 - rhs.m01;
-        ret.m11 = m11 - rhs.m11;
-
-        return ret;
-    }
-    
-    __host__ __device__ cumat2f cumat2f::operator*(const float &rhs)
-    {
-        cumat2f ret;
-        ret.m00 = m00 * rhs;
-        ret.m10 = m10 * rhs;
-        ret.m01 = m01 * rhs;
-        ret.m11 = m11 * rhs;
-        return ret;
-    }
-    
-    __host__ __device__ cumat2f cumat2f::operator/(const float &rhs)
-    {
-        cumat2f ret;
-        ret.m00 = m00 / rhs;
-        ret.m10 = m10 / rhs;
-        ret.m01 = m01 / rhs;
-        ret.m11 = m11 / rhs;
-        return ret;
-    }
-            
-    __host__ __device__ cuvec2f cumat2f::operator*(const cuvec2f &rhs)
-    {
-        cuvec2f ret;
-        
-        ret.x = m00*rhs.x + m01*rhs.y;
-        ret.y = m10*rhs.x + m11*rhs.y;
-    
-        return ret;
-    }
-    
-    __host__ __device__ cumat2f cumat2f::operator*(const cumat2f &rhs)
-    {
-        cumat2f ret;
-    
-        ret.m00 = m00*rhs.m00 + m01*rhs.m10;
-        ret.m01 = m00*rhs.m01 + m01*rhs.m11;
-        ret.m10 = m10*rhs.m00 + m11*rhs.m10;
-        ret.m11 = m10*rhs.m01 + m11*rhs.m11;
-    
-        return ret;
-    }
-    
-    __host__ __device__ float cumat2f::det()
-    {
-        float ret = 0.0;
-    
-        ret += m00*m11;
-        ret -= m01*m10;
-    
-        return ret;
-    }
-    
-    __host__ __device__ cumat2f cumat2f::transpose()
-    {
-        cumat2f ret;
-    
-        ret.m00 = m00;
-        ret.m01 = m10;
-        ret.m10 = m01;
-        ret.m11 = m11;
-    
-        return ret;
-    }
-    
-    __host__ __device__ cumat2f cumat2f::inverse()
-    {
-        cumat2f q;
-        float dt = det();
-        if(dt!=0)
-        {
-            q(0,0) = q(1,1)/dt;
-            q(0,1) = -q(0,1)/dt;
-            q(1,0) = -q(1,0)/dt;
-            q(1,1) = q(0,0)/dt;
-
-        }
-        else
-        {
-            q(0,0) = inf;
-            q(0,1) = inf;
-            q(1,0) = inf;
-            q(1,1) = inf;
-        }
-    
-        return q;
-    }
-    
-    __host__ __device__ cumat2f operator-(const cumat2f &rhs)
-    {
-        cumat2f ret;
-        ret.m00 = -rhs.m00;
-        ret.m10 = -rhs.m10;
-        ret.m01 = -rhs.m01;
-        ret.m11 = -rhs.m11;
-    
-        return ret;
-    }
-    
-    __host__ __device__ cumat2f& cumat2f::operator+=(const cumat2f &rhs)
-    {
-        m00 += rhs.m00;
-        m10 += rhs.m10;
-        m01 += rhs.m01;
-        m11 += rhs.m11;
-    
-        return *this;
-    }
-    
-    __host__ __device__ cumat2f& cumat2f::operator-=(const cumat2f &rhs)
-    {
-        m00 -= rhs.m00;
-        m10 -= rhs.m10;
-        m01 -= rhs.m01;
-        m11 -= rhs.m11;
-    
-        return *this;
-    }
-    
-    
-    
-    __host__ __device__ cumat2f& cumat2f::operator/=(const float &rhs)
-    {
-        m00 /= rhs;
-        m10 /= rhs;
-        m01 /= rhs;
-        m11 /= rhs;
-    
-        return *this;
-    }
-    
-    __host__ __device__ cumat2f& cumat2f::operator*=(const float &rhs)
-    {
-        m00 *= rhs;
-        m10 *= rhs;
-        m01 *= rhs;
-        m11 *= rhs;
-    
-        return *this;
-    }
-    
-    __host__ __device__ cumat2f& cumat2f::operator*=(const cumat2f &rhs)
-    {
-        cumat2f tmp;
-    
-        tmp.m00 = m00*rhs.m00 + m01*rhs.m10; 
-        tmp.m01 = m00*rhs.m01 + m01*rhs.m11; 
-        tmp.m10 = m10*rhs.m00 + m11*rhs.m10; 
-        tmp.m11 = m10*rhs.m01 + m11*rhs.m11;
-    
-        (*this) = tmp;
-    
-        return *this;
-    }
-    
-    __host__ __device__ cumat2f::cumat2f(
-        const float & _m00, const float & _m01,
-        const float & _m10, const float & _m11
-    )
-    {
-        m00 = _m00;
-        m01 = _m01;
-        m10 = _m10;
-        m11 = _m11;
-    }
-    
-    __host__ __device__ float* cumat2f::data()
-    {
-         //pointer to float[9] representation of matrix
-        return (float*) this;
-    }
-    
-    __host__ __device__ const float* cumat2f::data() const
-    {
-         //pointer to float[9] representation of matrix
-        return (const float*) this;
-    }
-    
-    __host__ __device__ cumat2f::cumat2f(const float* data2x2)
-    {
-        m00 = data2x2[0];
-        m10 = data2x2[1];
-        m01 = data2x2[2];
-        m11 = data2x2[3];
-        return;
-    }
-
-    __host__ __device__ cumat2f cumat2f_rot_from_angle(const float &angle)
-    {
-        cumat2f R;
-        R(0,0) = ::cosf(angle);
-        R(1,0) = ::sinf(angle);
-        R(0,1) = -::sinf(angle);
-        R(1,1) = ::cosf(angle);
-        return R;
-    }
-
-    __host__ __device__ cuvec2f operator-(const cuvec2f &rhs)
-    {
-        cuvec2f ret;
-        ret[0] = -rhs[0];
-        ret[1] = -rhs[1];
-        return ret;
-    }
-
 
 void test_cuvec2f_1()
 {
diff --git a/src/amsculib3/math/cuvec3f.cu b/src/amsculib3/math/cuvec3f.cu
index fb69594..1426dbb 100644
--- a/src/amsculib3/math/cuvec3f.cu
+++ b/src/amsculib3/math/cuvec3f.cu
@@ -51,21 +51,36 @@ namespace amscuda
         return ret;
     }
 
-    __host__ __device__ cuvec3f cuvec3f::operator*(const float &rhs)
+    __host__ __device__ cuvec3f operator*(const cuvec3f& lhs, const float &rhs)
     {
         cuvec3f ret;
-        ret.x = x*rhs;
-        ret.y = y*rhs;
-        ret.z = z*rhs;
+        ret.x = lhs.x*rhs;
+        ret.y = lhs.y*rhs;
+        ret.z = lhs.z*rhs;
         return ret;
     }
-
-    __host__ __device__ cuvec3f cuvec3f::operator/(const float &rhs)
+    __host__ __device__ cuvec3f operator/(const cuvec3f& lhs, const float &rhs)
     {
         cuvec3f ret;
-        ret.x = x/rhs;
-        ret.y = y/rhs;
-        ret.z = z/rhs;
+        ret.x = lhs.x/rhs;
+        ret.y = lhs.y/rhs;
+        ret.z = lhs.z/rhs;
+        return ret;
+    }
+    __host__ __device__ cuvec3f operator*(const float& lhs, const cuvec3f &rhs)
+    {
+        cuvec3f ret;
+        ret.x = rhs.x*lhs;
+        ret.y = rhs.y*lhs;
+        ret.z = rhs.z*lhs;
+        return ret;
+    }
+    __host__ __device__ cuvec3f operator/(const float& lhs, const cuvec3f &rhs)
+    {
+        cuvec3f ret;
+        ret.x = rhs.x/lhs;
+        ret.y = rhs.y/lhs;
+        ret.z = rhs.z/lhs;
         return ret;
     }
 
diff --git a/src/amsculib3/math/cuvec4f.cu b/src/amsculib3/math/cuvec4f.cu
index 588a03e..70bbf23 100644
--- a/src/amsculib3/math/cuvec4f.cu
+++ b/src/amsculib3/math/cuvec4f.cu
@@ -59,23 +59,40 @@ __host__ __device__ cuvec4f cuvec4f::operator-(const cuvec4f &rhs)
     return ret;
 }
 
-__host__ __device__ cuvec4f cuvec4f::operator*(const float &rhs)
+__host__ __device__ cuvec4f operator*(const cuvec4f& lhs, const float &rhs)
 {
     cuvec4f ret;
-    ret.x = x*rhs;
-    ret.y = y*rhs;
-    ret.z = z*rhs;
-    ret.w = w*rhs;
+    ret.x = lhs.x*rhs;
+    ret.y = lhs.y*rhs;
+    ret.z = lhs.z*rhs;
+    ret.w = lhs.w*rhs;
     return ret;
 }
-
-__host__ __device__ cuvec4f cuvec4f::operator/(const float &rhs)
+__host__ __device__ cuvec4f operator/(const cuvec4f& lhs, const float &rhs)
 {
     cuvec4f ret;
-    ret.x = x/rhs;
-    ret.y = y/rhs;
-    ret.z = z/rhs;
-    ret.w = w/rhs;
+    ret.x = lhs.x/rhs;
+    ret.y = lhs.y/rhs;
+    ret.z = lhs.z/rhs;
+    ret.w = lhs.w/rhs;
+    return ret;
+}
+__host__ __device__ cuvec4f operator*(const float& lhs, const cuvec4f &rhs)
+{
+    cuvec4f ret;
+    ret.x = rhs.x*lhs;
+    ret.y = rhs.y*lhs;
+    ret.z = rhs.z*lhs;
+    ret.w = rhs.w*lhs;
+    return ret;
+}
+__host__ __device__ cuvec4f operator/(const float& lhs, const cuvec4f &rhs)
+{
+    cuvec4f ret;
+    ret.x = rhs.x/lhs;
+    ret.y = rhs.y/lhs;
+    ret.z = rhs.z/lhs;
+    ret.w = rhs.w/lhs;
     return ret;
 }
 
diff --git a/test_scripts/cuvec2f_codegen1.cu b/test_scripts/cuvec2f_codegen1.cu
new file mode 100644
index 0000000..445c6f7
--- /dev/null
+++ b/test_scripts/cuvec2f_codegen1.cu
@@ -0,0 +1,412 @@
+	__host__ __device__ cuvec2f::cuvec2f() 
+	{
+		x = 0; y = 0; 
+		return;
+	}
+
+	__host__ __device__ cuvec2f::~cuvec2f() 
+	{
+		x = 0; y = 0; 
+		return;
+	}
+
+	__host__ __device__ cuvec2f::cuvec2f(const float &_x, const float &_y) 
+	{
+		x = _x; y = _y; 
+		return;
+	}
+
+	__host__ __device__ float& cuvec2f::operator[](const int &I)
+	{
+		switch(I)
+		{
+			case 0:
+				return x;
+			case 1:
+				return y;
+		}
+
+		return x;
+	}
+
+	__host__ __device__ const float& cuvec2f::operator[](const int &I) const
+	{
+		switch(I)
+		{
+			case 0:
+				return x;
+			case 1:
+				return y;
+		}
+
+		return x;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator+(const cuvec2f& rhs) const
+	{
+		cuvec2f ret;
+		ret.x = x + rhs.x;
+		ret.y = y + rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator-(const cuvec2f& rhs) const
+	{
+		cuvec2f ret;
+		ret.x = x - rhs.x;
+		ret.y = y - rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator*(const cuvec2f& rhs) const
+	{
+		//Elementwise product
+		cuvec2f ret;
+		ret.x = x * rhs.x;
+		ret.y = y * rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f cuvec2f::operator/(const cuvec2f& rhs) const
+	{
+		//Elementwise division
+		cuvec2f ret;
+		ret.x = x / rhs.x;
+		ret.y = y / rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator*(const cuvec2f& lhs, const float& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs.x*rhs;
+		ret.y = lhs.y*rhs;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator*(const float& lhs, const cuvec2f& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs*rhs.x;
+		ret.y = lhs*rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator/(const cuvec2f& lhs, const float& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs.x/rhs;
+		ret.y = lhs.y/rhs;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator/(const float& lhs, const cuvec2f& rhs)
+	{
+		cuvec2f ret;
+		ret.x = lhs/rhs.x;
+		ret.y = lhs/rhs.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f operator-(const cuvec2f& other)
+	{
+		cuvec2f ret;
+		ret.x = -other.x;
+		ret.y = -other.y;
+		return ret;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator+=(const cuvec2f& rhs)
+	{
+		x += rhs.x;
+		y += rhs.y;
+		return *this;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator-=(const cuvec2f& rhs)
+	{
+		x -= rhs.x;
+		y -= rhs.y;
+		return *this;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator*=(const float& rhs)
+	{
+		x *= rhs;
+		y *= rhs;
+		return *this;
+	}
+
+	__host__ __device__ cuvec2f& cuvec2f::operator/=(const float& rhs)
+	{
+		x /= rhs;
+		y /= rhs;
+		return *this;
+	}
+
+
+//
+//Matrix Header Stuff
+//
+
+	__host__ __device__ cumat2f::cumat2f() 
+	{
+		m00 = 0;
+		m01 = 0;
+
+		m10 = 0;
+		m11 = 0;
+
+		return;
+	}
+
+	__host__ __device__ cumat2f::~cumat2f() 
+	{
+		//m00 = 0;
+		//m01 = 0;
+
+		//m10 = 0;
+		//m11 = 0;
+
+		return;
+	}
+
+	__host__ __device__ cumat2f::cumat2f(
+		const float& _m00, const float& _m10, 
+		const float& _m01, const float& _m11
+	)
+	{
+		m00 = _m00; 
+		m10 = _m10; 
+
+		m01 = _m01; 
+		m11 = _m11; 
+
+
+		return;
+	}
+
+	__host__ __device__ cumat2f::cumat2f(const float* data4)
+	{
+		m00 = data4[0]; 
+		m10 = data4[1]; 
+
+		m01 = data4[2]; 
+		m11 = data4[3]; 
+
+
+		return;
+	}
+
+	__host__ __device__ float& cumat2f::operator[](const int &I)
+	{
+		switch(I)
+		{
+			case 0:
+				return m00;
+			case 1:
+				return m10;
+			case 2:
+				return m01;
+			case 3:
+				return m11;
+		}
+
+		return m00;
+	}
+
+	__host__ __device__ const float& cumat2f::operator[](const int &I) const
+	{
+		switch(I)
+		{
+			case 0:
+				return m00;
+			case 1:
+				return m10;
+			case 2:
+				return m01;
+			case 3:
+				return m11;
+		}
+
+		return m00;
+	}
+
+	__host__ __device__ float& cumat2f::operator()(const int &I, const int &J)
+	{
+		return (*this)[I+2*J];
+	}
+
+	__host__ __device__ const float& cumat2f::operator()(const int &I, const int &J) const
+	{
+		return (*this)[I+2*J];
+	}
+
+	__host__ __device__ float& cumat2f::at(const int &I, const int &J)
+	{
+		return (*this)[I+2*J];
+	}
+
+	__host__ __device__ const float& cumat2f::at(const int &I, const int &J) const
+	{
+		return (*this)[I+2*J];
+	}
+
+__host__ __device__ float* cumat2f::data()
+{
+	return (float*)this;
+}
+
+__host__ __device__ const float* cumat2f::data() const
+{
+	return (float*)this;
+}
+
+__host__ __device__ cumat2f cumat2f::operator+(const cumat2f& rhs) const
+{
+	cumat2f ret;
+	ret.m00 = m00 + rhs.m00;
+	ret.m10 = m10 + rhs.m10;
+
+	ret.m01 = m01 + rhs.m01;
+	ret.m11 = m11 + rhs.m11;
+
+	return ret;
+}
+
+__host__ __device__ cumat2f cumat2f::operator-(const cumat2f& rhs) const
+{
+	cumat2f ret;
+	ret.m00 = m00 - rhs.m00;
+	ret.m10 = m10 - rhs.m10;
+
+	ret.m01 = m01 - rhs.m01;
+	ret.m11 = m11 - rhs.m11;
+
+	return ret;
+}
+
+__host__ __device__ cumat2f cumat2f::operator*(const cumat2f& rhs) const
+{
+	cumat2f ret; //should be zeroed in constructor
+	ret.m00 = m00*rhs.m00 + m01*rhs.m10;
+	ret.m01 = m00*rhs.m01 + m01*rhs.m11;
+
+	ret.m10 = m10*rhs.m00 + m11*rhs.m10;
+	ret.m11 = m10*rhs.m01 + m11*rhs.m11;
+
+	return ret;
+}
+
+__host__ __device__ cumat2f operator*(const cumat2f& lhs, const float& rhs)
+{
+	cumat2f ret;
+	ret.m00=lhs.m00*rhs;
+	ret.m10=lhs.m10*rhs;
+	ret.m01=lhs.m01*rhs;
+	ret.m11=lhs.m11*rhs;
+	return ret;
+}
+
+__host__ __device__ cumat2f operator/(const cumat2f& lhs, const float& rhs)
+{
+	cumat2f ret;
+	ret.m00=lhs.m00/rhs;
+	ret.m10=lhs.m10/rhs;
+	ret.m01=lhs.m01/rhs;
+	ret.m11=lhs.m11/rhs;
+	return ret;
+}
+
+__host__ __device__ cumat2f operator*(const float& lhs, const cumat2f& rhs)
+{
+	cumat2f ret;
+	ret.m00=lhs*rhs.m00;
+	ret.m10=lhs*rhs.m10;
+	ret.m01=lhs*rhs.m01;
+	ret.m11=lhs*rhs.m11;
+	return ret;
+}
+
+__host__ __device__ cuvec2f operator*(const cumat2f& lhs, const cuvec2f& rhs)
+{
+	cuvec2f ret;
+	ret.x = lhs.m00*rhs.x + lhs.m01*rhs.y;
+	ret.y = lhs.m10*rhs.x + lhs.m11*rhs.y;
+	return ret;
+}
+
+__host__ __device__ cuvec2f operator*(const cuvec2f& lhs, const cumat2f& rhs)
+{
+	cuvec2f ret;
+	ret.x = lhs.x*rhs.m00 + lhs.y*rhs.m10;
+	ret.y = lhs.x*rhs.m01 + lhs.y*rhs.m11;
+	return ret;
+}
+
+__host__ __device__ cumat2f operator-(const cumat2f& rhs)
+{
+	cumat2f ret;
+	ret.m00 = -rhs.m00;
+	ret.m10 = -rhs.m10;
+	ret.m01 = -rhs.m01;
+	ret.m11 = -rhs.m11;
+	return ret;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator+=(const cumat2f& rhs)
+{
+	m00 += rhs.m00;
+	m10 += rhs.m10;
+	m01 += rhs.m01;
+	m11 += rhs.m11;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator-=(const cumat2f& rhs)
+{
+	m00 -= rhs.m00;
+	m10 -= rhs.m10;
+	m01 -= rhs.m01;
+	m11 -= rhs.m11;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator*=(const float& rhs)
+{
+	m00 *= rhs;
+	m10 *= rhs;
+	m01 *= rhs;
+	m11 *= rhs;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator/=(const float& rhs)
+{
+	m00 /= rhs;
+	m10 /= rhs;
+	m01 /= rhs;
+	m11 /= rhs;
+	return *this;
+}
+
+__host__ __device__ cumat2f& cumat2f::operator*=(const cumat2f& rhs)
+{
+	cumat2f tmp = *this;
+	m00 = tmp.m00*rhs.m00 + tmp.m01*rhs.m10;
+	m01 = tmp.m00*rhs.m01 + tmp.m01*rhs.m11;
+	m10 = tmp.m10*rhs.m00 + tmp.m11*rhs.m10;
+	m11 = tmp.m10*rhs.m01 + tmp.m11*rhs.m11;
+	return *this;
+}
+
+__host__ __device__ cumat2f cumat2f::transpose() const
+{
+	cumat2f ret;
+	ret.m00 = m00;
+	ret.m10 = m01;
+	ret.m01 = m10;
+	ret.m11 = m11;
+	return ret;
+}
+
diff --git a/test_scripts/cuvec2f_codegen1.hpp b/test_scripts/cuvec2f_codegen1.hpp
new file mode 100644
index 0000000..8f88e2c
--- /dev/null
+++ b/test_scripts/cuvec2f_codegen1.hpp
@@ -0,0 +1,54 @@
+	__host__ __device__ cuvec2f(); 
+	__host__ __device__ ~cuvec2f(); 
+	__host__ __device__ cuvec2f(const float &_x, const float &_y); 
+	__host__ __device__ float& operator[](const int &I);
+	__host__ __device__ const float& operator[](const int &I) const;
+	__host__ __device__ cuvec2f operator+(const cuvec2f& rhs) const;
+	__host__ __device__ cuvec2f operator-(const cuvec2f& rhs) const;
+	__host__ __device__ cuvec2f operator*(const cuvec2f& rhs) const; 	 //elementwise product
+	__host__ __device__ cuvec2f operator/(const cuvec2f& rhs) const; 	 //elementwise division
+	__host__ __device__ friend cuvec2f operator*(const cuvec2f& lhs, const float& rhs);
+	__host__ __device__ friend cuvec2f operator*(const float& lhs, const cuvec2f& rhs);
+	__host__ __device__ friend cuvec2f operator/(const cuvec2f& lhs, const float& rhs);
+	__host__ __device__ friend cuvec2f operator/(const float& lhs, const cuvec2f& rhs);
+	__host__ __device__ friend cuvec2f operator-(const cuvec2f& other);
+	__host__ __device__ cuvec2f& operator+=(const cuvec2f& rhs);
+	__host__ __device__ cuvec2f& operator-=(const cuvec2f& rhs);
+	__host__ __device__ cuvec2f& operator*=(const float& rhs);
+	__host__ __device__ cuvec2f& operator/=(const float& rhs);
+//
+//Matrix Header Stuff
+//
+float m00,m10;
+float m01,m11;
+
+	__host__ __device__ cumat2f(); 
+	__host__ __device__ ~cumat2f(); 
+	__host__ __device__ cumat2f(
+		const float& _m00, const float& _m10, 
+		const float& _m01, const float& _m11
+	);
+	__host__ __device__ cumat2f(const float* data4);
+	__host__ __device__ float& operator[](const int &I);
+	__host__ __device__ const float& operator[](const int &I) const;
+	__host__ __device__ float& operator()(const int &I, const int &J);
+	__host__ __device__ const float& operator()(const int &I, const int &J) const;
+	__host__ __device__ float& at(const int &I, const int &J);
+	__host__ __device__ const float& at(const int &I, const int &J) const;
+__host__ __device__ float* data(); //pointer to float4 representation of matrix
+__host__ __device__ const float* data() const; //pointer to float4 representation of matrix
+__host__ __device__ cumat2f operator+(const cumat2f& rhs) const;
+__host__ __device__ cumat2f operator-(const cumat2f& rhs) const;
+__host__ __device__ cumat2f operator*(const cumat2f& rhs) const;
+__host__ __device__ friend cumat2f operator*(const cumat2f& lhs, const float& rhs);
+__host__ __device__ friend cumat2f operator/(const cumat2f& lhs, const float& rhs);
+__host__ __device__ friend cumat2f operator*(const float& lhs, const cumat2f& rhs);
+__host__ __device__ friend cuvec2f operator*(const cumat2f& lhs, const cuvec2f& rhs);
+__host__ __device__ friend cuvec2f operator*(const cuvec2f& lhs, const cumat2f& rhs);
+__host__ __device__ friend cumat2f operator-(const cumat2f& rhs);
+__host__ __device__ cumat2f& operator+=(const cumat2f& rhs);
+__host__ __device__ cumat2f& operator-=(const cumat2f& rhs);
+__host__ __device__ cumat2f& operator*=(const float& rhs);
+__host__ __device__ cumat2f& operator/=(const float& rhs);
+__host__ __device__ cumat2f& operator*=(const cumat2f& rhs);
+__host__ __device__ cumat2f transpose() const;
diff --git a/test_scripts/cuvec_codegen.py b/test_scripts/cuvec_codegen.py
new file mode 100644
index 0000000..a3b2ed7
--- /dev/null
+++ b/test_scripts/cuvec_codegen.py
@@ -0,0 +1,1104 @@
+#!/usr/bin/python3
+
+import os,sys,math
+import numpy as np
+
+cudadec = "__host__ __device__"
+
+def typechar(dtype):
+    
+    sshort = ""
+    #return short typechar given typestring
+    match(dtype):
+        case "double":
+            sshort = ""
+        case "float":
+            sshort = "f"
+        case "int":
+            sshort = "i"
+
+    return sshort
+
+def vtypename(dim,dtype):
+    name = "cuvec{}{}".format(dim,typechar(dtype))
+    return name
+
+def mtypename(dim,dtype):
+    name = "cumat{}{}".format(dim,typechar(dtype))
+    return name
+
+def vdc(dim):
+    dcomp = "x"
+    match(dim):
+        case 0:
+            dcomp = "x"
+        case 1:
+            dcomp = "y"
+        case 2:
+            dcomp = "z"
+        case 3:
+            dcomp = "w"
+
+    return dcomp
+
+
+def genvecheaderv1(dim,dtype):
+
+    lns = ""
+    name = vtypename(dim,dtype)
+    lns += "class {}\n".format(name)
+    lns += "{\n\tpublic:\n"
+
+    for I in range(0,dim):
+        lns += "\t{} {};\n".format(dtype,vdc(I))
+
+    lns += "\n"
+    lns += "\t{} {}();\n".format(cudadec,name)
+    lns += "\t{} ~{}();\n".format(cudadec,name)
+    lns += "\t{} {}(".format(cudadec,name)
+    for I in range(0,dim):
+        lns += "const {} &_{}".format(dtype,vdc(I))
+        if(I<dim-1):
+            lns +=", "
+    lns +=");\n\n"
+
+
+    
+
+    lns += "};"
+
+    return lns
+
+def fwrite(fname,lns):
+    fp = open(fname,"w+")
+    fp.writelines(lns)
+    fp.close()
+
+def vconstructor1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    vname = vtypename(dim,dtype)
+
+    lnsh += "\t{} {}(); \n".format(cudadec,vname)
+
+    lnss += "\t{} {}::{}() \n".format(cudadec,vname,vname)
+    lnss += "\t{\n"
+
+    lnss += "\t\t"
+    for I in range(0,dim):
+        lnss += "{} = 0; ".format(vdc(I))
+    lnss += "\n"
+    
+
+    lnss += "\t\treturn;\n"
+    lnss += "\t}\n\n"
+    
+    return [lnsh,lnss]
+
+def vdestructor1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    vname = vtypename(dim,dtype)
+
+    lnsh += "\t{} ~{}(); \n".format(cudadec,vname)
+
+    lnss += "\t{} {}::~{}() \n".format(cudadec,vname,vname)
+    lnss += "\t{\n"
+
+    lnss += "\t\t"
+    for I in range(0,dim):
+        lnss += "{} = 0; ".format(vdc(I))
+    lnss += "\n"
+    
+
+    lnss += "\t\treturn;\n"
+    lnss += "\t}\n\n"
+    
+    return [lnsh,lnss]
+
+def vconstructor2(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    vname = vtypename(dim,dtype)
+
+    lnsh += "\t{} {}(".format(cudadec,vname)
+    for I in range(0,dim):
+        lnsh += "const {} &_{}".format(dtype,vdc(I))
+        if(I<dim-1):
+            lnsh += ", "
+    lnsh += "); \n"
+
+    lnss += "\t{} {}::{}(".format(cudadec,vname,vname)
+    for I in range(0,dim):
+        lnss += "const {} &_{}".format(dtype,vdc(I))
+        if(I<dim-1):
+            lnss += ", "
+    lnss += ") \n"
+    lnss += "\t{\n"
+
+    lnss += "\t\t"
+    for I in range(0,dim):
+        lnss += "{} = _{}; ".format(vdc(I),vdc(I))
+    lnss += "\n"
+    
+
+    lnss += "\t\treturn;\n"
+    lnss += "\t}\n\n"
+    
+    return [lnsh,lnss]
+
+def v_operatorbk1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    vname = vtypename(dim,dtype)
+
+    lnsh += "\t{} float& operator[](const int &I);\n".format(cudadec)
+
+    lnss += "\t{} float& {}::operator[](const int &I)\n".format(cudadec,vname)
+    lnss += "\t{\n"
+    lnss += "\t\tswitch(I)\n\t\t{\n"
+    for I in range(0,dim):
+        lnss += "\t\t\tcase {}:\n".format(I)
+        lnss += "\t\t\t\treturn {};\n".format(vdc(I))
+    lnss += "\t\t}\n"
+    lnss += "\n\t\treturn {};\n".format(vdc(0))
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def v_operatorbk2(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    vname = vtypename(dim,dtype)
+
+    lnsh += "\t{} const float& operator[](const int &I) const;\n".format(cudadec)
+
+    lnss += "\t{} const float& {}::operator[](const int &I) const\n".format(cudadec,vname)
+    lnss += "\t{\n"
+    lnss += "\t\tswitch(I)\n\t\t{\n"
+    for I in range(0,dim):
+        lnss += "\t\t\tcase {}:\n".format(I)
+        lnss += "\t\t\t\treturn {};\n".format(vdc(I))
+    lnss += "\t\t}\n"
+    lnss += "\n\t\treturn {};\n".format(vdc(0))
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def v_operatorplus1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    vname = vtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} {vname} operator+(const {vname}& rhs) const;\n"
+
+    lnss += f"\t{cudadec} {vname} {vname}::operator+(const {vname}& rhs) const\n"
+    lnss += "\t{\n"
+    lnss += f"\t\t{vname} ret;\n"
+    for I in range(0,dim):
+        lnss += f"\t\tret.{vdc(I)} = {vdc(I)} + rhs.{vdc(I)};\n"
+    lnss += "\t\treturn ret;\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def v_operatorminus1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    vname = vtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} {vname} operator-(const {vname}& rhs) const;\n"
+
+    lnss += f"\t{cudadec} {vname} {vname}::operator-(const {vname}& rhs) const\n"
+    lnss += "\t{\n"
+    lnss += f"\t\t{vname} ret;\n"
+    for I in range(0,dim):
+        lnss += f"\t\tret.{vdc(I)} = {vdc(I)} - rhs.{vdc(I)};\n"
+    lnss += "\t\treturn ret;\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def v_operatortimes3(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    vname = vtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} {vname} operator*(const {vname}& rhs) const; \t //elementwise product\n"
+
+    lnss += f"\t{cudadec} {vname} {vname}::operator*(const {vname}& rhs) const\n"
+    lnss += "\t{\n"
+    lnss += "\t\t//Elementwise product\n"
+    lnss += f"\t\t{vname} ret;\n"
+    for I in range(0,dim):
+        lnss += f"\t\tret.{vdc(I)} = {vdc(I)} * rhs.{vdc(I)};\n"
+    lnss += "\t\treturn ret;\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def v_operatordiv3(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    vname = vtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} {vname} operator/(const {vname}& rhs) const; \t //elementwise division\n"
+
+    lnss += f"\t{cudadec} {vname} {vname}::operator/(const {vname}& rhs) const\n"
+    lnss += "\t{\n"
+    lnss += "\t\t//Elementwise division\n"
+    lnss += f"\t\t{vname} ret;\n"
+    for I in range(0,dim):
+        lnss += f"\t\tret.{vdc(I)} = {vdc(I)} / rhs.{vdc(I)};\n"
+    lnss += "\t\treturn ret;\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def v_operatortimes1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    vname = vtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} friend {vname} operator*(const {vname}& lhs, const {dtype}& rhs);\n"
+
+    lnss += f"\t{cudadec} {vname} operator*(const {vname}& lhs, const {dtype}& rhs)\n"
+    lnss += "\t{\n"
+    lnss += f"\t\t{vname} ret;\n"
+    for I in range(0,dim):
+        lnss += f"\t\tret.{vdc(I)} = lhs.{vdc(I)}*rhs;\n"
+    lnss += "\t\treturn ret;\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def v_operatortimes2(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    vname = vtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} friend {vname} operator*(const {dtype}& lhs, const {vname}& rhs);\n"
+
+    lnss += f"\t{cudadec} {vname} operator*(const {dtype}& lhs, const {vname}& rhs)\n"
+    lnss += "\t{\n"
+    lnss += f"\t\t{vname} ret;\n"
+    for I in range(0,dim):
+        lnss += f"\t\tret.{vdc(I)} = lhs*rhs.{vdc(I)};\n"
+    lnss += "\t\treturn ret;\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def v_operatordiv1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    vname = vtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} friend {vname} operator/(const {vname}& lhs, const {dtype}& rhs);\n"
+
+    lnss += f"\t{cudadec} {vname} operator/(const {vname}& lhs, const {dtype}& rhs)\n"
+    lnss += "\t{\n"
+    lnss += f"\t\t{vname} ret;\n"
+    for I in range(0,dim):
+        lnss += f"\t\tret.{vdc(I)} = lhs.{vdc(I)}/rhs;\n"
+    lnss += "\t\treturn ret;\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def v_operatordiv2(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    vname = vtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} friend {vname} operator/(const {dtype}& lhs, const {vname}& rhs);\n"
+
+    lnss += f"\t{cudadec} {vname} operator/(const {dtype}& lhs, const {vname}& rhs)\n"
+    lnss += "\t{\n"
+    lnss += f"\t\t{vname} ret;\n"
+    for I in range(0,dim):
+        lnss += f"\t\tret.{vdc(I)} = lhs/rhs.{vdc(I)};\n"
+    lnss += "\t\treturn ret;\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def v_operatorneg(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    vname = vtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} friend {vname} operator-(const {vname}& other);\n"
+
+    lnss += f"\t{cudadec} {vname} operator-(const {vname}& other)\n"
+    lnss += "\t{\n"
+    lnss += f"\t\t{vname} ret;\n"
+    for I in range(0,dim):
+        lnss += f"\t\tret.{vdc(I)} = -other.{vdc(I)};\n"
+    lnss += "\t\treturn ret;\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def v_operatorpluseq1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    vname = vtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} {vname}& operator+=(const {vname}& rhs);\n"
+
+    lnss += f"\t{cudadec} {vname}& {vname}::operator+=(const {vname}& rhs)\n"
+    lnss += "\t{\n"
+    for I in range(0,dim):
+        lnss += f"\t\t{vdc(I)} += rhs.{vdc(I)};\n"
+    lnss += "\t\treturn *this;\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def v_operatorminuseq1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    vname = vtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} {vname}& operator-=(const {vname}& rhs);\n"
+
+    lnss += f"\t{cudadec} {vname}& {vname}::operator-=(const {vname}& rhs)\n"
+    lnss += "\t{\n"
+    for I in range(0,dim):
+        lnss += f"\t\t{vdc(I)} -= rhs.{vdc(I)};\n"
+    lnss += "\t\treturn *this;\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def v_operatortimeseq1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    vname = vtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} {vname}& operator*=(const {dtype}& rhs);\n"
+
+    lnss += f"\t{cudadec} {vname}& {vname}::operator*=(const {dtype}& rhs)\n"
+    lnss += "\t{\n"
+    for I in range(0,dim):
+        lnss += f"\t\t{vdc(I)} *= rhs;\n"
+    lnss += "\t\treturn *this;\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+
+def v_operatordiveq1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    vname = vtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} {vname}& operator/=(const {dtype}& rhs);\n"
+
+    lnss += f"\t{cudadec} {vname}& {vname}::operator/=(const {dtype}& rhs)\n"
+    lnss += "\t{\n"
+    for I in range(0,dim):
+        lnss += f"\t\t{vdc(I)} /= rhs;\n"
+    lnss += "\t\treturn *this;\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def genvectorstuff(dim,dtype):
+
+    lnsh = ""
+    lnss = ""
+
+    [lnsh2,lnss2] = vconstructor1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = vdestructor1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = vconstructor2(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = v_operatorbk1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = v_operatorbk2(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = v_operatorplus1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = v_operatorminus1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = v_operatortimes3(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = v_operatordiv3(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = v_operatortimes1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = v_operatortimes2(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = v_operatordiv1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = v_operatordiv2(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = v_operatorneg(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = v_operatorpluseq1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = v_operatorminuseq1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = v_operatortimeseq1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+    [lnsh2,lnss2] = v_operatordiveq1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2
+
+    return [lnsh,lnss]
+
+def matrix_data(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    
+    for J in range(0,dim):
+        lnsh += f"{dtype} "
+        for I in range(0,dim):
+            lnsh += f"m{I}{J}"
+            if(I<dim-1):
+                lnsh += ","
+        lnsh += ";\n"
+
+    lnsh+="\n"
+
+    return [lnsh, lnss]
+
+
+def mconstructor1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} {mname}(); \n"
+
+    lnss += f"\t{cudadec} {mname}::{mname}() \n"
+    lnss += "\t{\n"
+
+    for I in range(0,dim):
+        for J in range(0,dim):
+            lnss += f"\t\tm{I}{J} = 0;\n"
+        lnss += "\n"
+    
+    lnss += "\t\treturn;\n"
+    lnss += "\t}\n\n"
+    
+    return [lnsh,lnss]
+
+def mdestructor1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} ~{mname}(); \n"
+
+    lnss += f"\t{cudadec} {mname}::~{mname}() \n"
+    lnss += "\t{\n"
+
+    for I in range(0,dim):
+        for J in range(0,dim):
+            lnss += f"\t\t//m{I}{J} = 0;\n"
+        lnss += "\n"
+    
+
+    lnss += "\t\treturn;\n"
+    lnss += "\t}\n\n"
+    
+    return [lnsh,lnss]
+
+def mconstructor2(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} {mname}(\n"
+    for I in range(0,dim):
+        lnsh+="\t\t"
+        for J in range(0,dim):
+            lnsh += f"const {dtype}& _m{J}{I}"
+            if(J+I*dim<dim*dim-1):
+                lnsh+=", "
+        lnsh+="\n"
+    lnsh += "\t);\n"
+
+    lnss += f"\t{cudadec} {mname}::{mname}(\n"
+    for I in range(0,dim):
+        lnss+="\t\t"
+        for J in range(0,dim):
+            lnss += f"const {dtype}& _m{J}{I}"
+            if(J+I*dim<dim*dim-1):
+                lnss+=", "
+        lnss+="\n"
+    lnss += "\t)\n"
+    lnss += "\t{\n"
+
+    for I in range(0,dim):
+        for J in range(0,dim):
+            lnss += f"\t\tm{J}{I} = _m{J}{I}; \n"
+        lnss+="\n"
+    lnss += "\n"
+    
+
+    lnss += "\t\treturn;\n"
+    lnss += "\t}\n\n"
+    
+    return [lnsh,lnss]
+
+def mconstructor3(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} {mname}(const {dtype}* data{dim*dim});\n"
+
+    lnss += f"\t{cudadec} {mname}::{mname}(const {dtype}* data{dim*dim})\n"
+    lnss += "\t{\n"
+
+    for I in range(0,dim):
+        for J in range(0,dim):
+            lnss += f"\t\tm{J}{I} = data{dim*dim}[{J+dim*I}]; \n"
+        lnss+="\n"
+    lnss += "\n"
+    
+
+    lnss += "\t\treturn;\n"
+    lnss += "\t}\n\n"
+    
+    return [lnsh,lnss]
+
+def moperatorbk1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} float& operator[](const int &I);\n"
+
+    lnss += f"\t{cudadec} float& {mname}::operator[](const int &I)\n"
+    lnss += "\t{\n"
+    lnss += "\t\tswitch(I)\n\t\t{\n"
+    for I in range(0,dim*dim):
+        Ix = I%dim
+        Iy = I//dim
+        lnss += "\t\t\tcase {}:\n".format(I)
+        lnss += f"\t\t\t\treturn m{Ix}{Iy};\n"
+    lnss += "\t\t}\n"
+    lnss += "\n\t\treturn m00;\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatorbk2(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} const float& operator[](const int &I) const;\n"
+
+    lnss += f"\t{cudadec} const float& {mname}::operator[](const int &I) const\n"
+    lnss += "\t{\n"
+    lnss += "\t\tswitch(I)\n\t\t{\n"
+    for I in range(0,dim*dim):
+        Ix = I%dim
+        Iy = I//dim
+        lnss += "\t\t\tcase {}:\n".format(I)
+        lnss += f"\t\t\t\treturn m{Ix}{Iy};\n"
+    lnss += "\t\t}\n"
+    lnss += "\n\t\treturn m00;\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatorbk3(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} float& operator()(const int &I, const int &J);\n"
+
+    lnss += f"\t{cudadec} float& {mname}::operator()(const int &I, const int &J)\n"
+    lnss += "\t{\n"
+    lnss += f"\t\treturn (*this)[I+{dim}*J];\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatorbk4(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} const float& operator()(const int &I, const int &J) const;\n"
+
+    lnss += f"\t{cudadec} const float& {mname}::operator()(const int &I, const int &J) const\n"
+    lnss += "\t{\n"
+    lnss += f"\t\treturn (*this)[I+{dim}*J];\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatorbk5(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} float& at(const int &I, const int &J);\n"
+
+    lnss += f"\t{cudadec} float& {mname}::at(const int &I, const int &J)\n"
+    lnss += "\t{\n"
+    lnss += f"\t\treturn (*this)[I+{dim}*J];\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatorbk6(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"\t{cudadec} const float& at(const int &I, const int &J) const;\n"
+
+    lnss += f"\t{cudadec} const float& {mname}::at(const int &I, const int &J) const\n"
+    lnss += "\t{\n"
+    lnss += f"\t\treturn (*this)[I+{dim}*J];\n"
+    lnss += "\t}\n\n"
+
+    return [lnsh,lnss]
+
+#matrix arithmetic operators
+
+def mdataoperator1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"{cudadec} {dtype}* data(); //pointer to float{dim*dim} representation of matrix\n"
+
+    lnss += f"{cudadec} {dtype}* {mname}::data()\n"
+    lnss += "{\n"
+    lnss += f"\treturn ({dtype}*)this;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+def mdataoperator2(dim,dtype):
+    lnsh = ""
+    lnss = ""
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"{cudadec} const {dtype}* data() const; //pointer to float{dim*dim} representation of matrix\n"
+
+    lnss += f"{cudadec} const {dtype}* {mname}::data() const\n"
+    lnss += "{\n"
+    lnss += f"\treturn ({dtype}*)this;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+
+def moperatorplus1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"{cudadec} {mname} operator+(const {mname}& rhs) const;\n"
+    lnss += f"{cudadec} {mname} {mname}::operator+(const {mname}& rhs) const\n"
+    lnss += "{\n"
+    lnss += f"\t{mname} ret;\n"
+    for I in range(0,dim):
+        for J in range(0,dim):
+            lnss += f"\tret.m{J}{I} = m{J}{I} + rhs.m{J}{I};\n"
+        lnss += "\n"
+    lnss += "\treturn ret;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatorminus1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"{cudadec} {mname} operator-(const {mname}& rhs) const;\n"
+    lnss += f"{cudadec} {mname} {mname}::operator-(const {mname}& rhs) const\n"
+    lnss += "{\n"
+    lnss += f"\t{mname} ret;\n"
+    for I in range(0,dim):
+        for J in range(0,dim):
+            lnss += f"\tret.m{J}{I} = m{J}{I} - rhs.m{J}{I};\n"
+        lnss += "\n"
+    lnss += "\treturn ret;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatortimes1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"{cudadec} {mname} operator*(const {mname}& rhs) const;\n"
+    lnss += f"{cudadec} {mname} {mname}::operator*(const {mname}& rhs) const\n"
+    lnss += "{\n"
+    lnss += f"\t{mname} ret; //should be zeroed in constructor\n"
+    for I in range(0,dim):
+        for K in range(0,dim):
+            lnss += f"\tret.m{I}{K} = "
+            for J in range(0,dim):
+                lnss += f"m{I}{J}*rhs.m{J}{K}"
+                if(J<dim-1):
+                    lnss += " + "
+                else:
+                    lnss += ";\n"
+        lnss += "\n"
+    lnss += "\treturn ret;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatortimes2(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"{cudadec} friend {mname} operator*(const {mname}& lhs, const {dtype}& rhs);\n"
+    lnss += f"{cudadec} {mname} operator*(const {mname}& lhs, const {dtype}& rhs)\n"
+    lnss += "{\n"
+    lnss += f"\t{mname} ret;\n"
+    for I in range(0,dim):
+        for J in range(0,dim):
+            lnss += f"\tret.m{J}{I}=lhs.m{J}{I}*rhs;\n"
+        #lnss += "\n"
+    lnss += "\treturn ret;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatordiv2(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"{cudadec} friend {mname} operator/(const {mname}& lhs, const {dtype}& rhs);\n"
+    lnss += f"{cudadec} {mname} operator/(const {mname}& lhs, const {dtype}& rhs)\n"
+    lnss += "{\n"
+    lnss += f"\t{mname} ret;\n"
+    for I in range(0,dim):
+        for J in range(0,dim):
+            lnss += f"\tret.m{J}{I}=lhs.m{J}{I}/rhs;\n"
+        #lnss += "\n"
+    lnss += "\treturn ret;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatortimes3(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"{cudadec} friend {mname} operator*(const {dtype}& lhs, const {mname}& rhs);\n"
+    lnss += f"{cudadec} {mname} operator*(const {dtype}& lhs, const {mname}& rhs)\n"
+    lnss += "{\n"
+    lnss += f"\t{mname} ret;\n"
+    for I in range(0,dim):
+        for J in range(0,dim):
+            lnss += f"\tret.m{J}{I}=lhs*rhs.m{J}{I};\n"
+        #lnss += "\n"
+    lnss += "\treturn ret;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatortimes4(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+    vname = vtypename(dim,dtype)
+
+    lnsh += f"{cudadec} friend {vname} operator*(const {mname}& lhs, const {vname}& rhs);\n"
+    lnss += f"{cudadec} {vname} operator*(const {mname}& lhs, const {vname}& rhs)\n"
+    lnss += "{\n"
+    lnss += f"\t{vname} ret;\n"
+    for I in range(0,dim):
+        lnss += f"\tret.{vdc(I)} = "
+        for J in range(0,dim):
+            lnss += f"lhs.m{I}{J}*rhs.{vdc(J)}"
+            if(J<dim-1):
+                lnss += " + "
+            else:
+                lnss += ";\n"
+        #lnss += "\n"
+    lnss += "\treturn ret;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatortimes5(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+    vname = vtypename(dim,dtype)
+
+    lnsh += f"{cudadec} friend {vname} operator*(const {vname}& lhs, const {mname}& rhs);\n"
+    lnss += f"{cudadec} {vname} operator*(const {vname}& lhs, const {mname}& rhs)\n"
+    lnss += "{\n"
+    lnss += f"\t{vname} ret;\n"
+    for I in range(0,dim):
+        lnss += f"\tret.{vdc(I)} = "
+        for J in range(0,dim):
+            #lnss += f"lhs.m{I}{J}*rhs.{vdc(J)}"
+            lnss += f"lhs.{vdc(J)}*rhs.m{J}{I}"
+            if(J<dim-1):
+                lnss += " + "
+            else:
+                lnss += ";\n"
+        #lnss += "\n"
+    lnss += "\treturn ret;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatorneg(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"{cudadec} friend {mname} operator-(const {mname}& rhs);\n"
+    lnss += f"{cudadec} {mname} operator-(const {mname}& rhs)\n"
+    lnss += "{\n"
+    lnss += f"\t{mname} ret;\n"
+    for I in range(0,dim):
+        for J in range(0,dim):
+            lnss += f"\tret.m{J}{I} = -rhs.m{J}{I};\n"
+        #lnss += "\n"
+    lnss += "\treturn ret;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatorpluseq(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"{cudadec} {mname}& operator+=(const {mname}& rhs);\n"
+    lnss += f"{cudadec} {mname}& {mname}::operator+=(const {mname}& rhs)\n"
+    lnss += "{\n"
+    for I in range(0,dim):
+        for J in range(0,dim):
+            lnss += f"\tm{J}{I} += rhs.m{J}{I};\n"
+        #lnss += "\n"
+    lnss += "\treturn *this;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatorminuseq(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"{cudadec} {mname}& operator-=(const {mname}& rhs);\n"
+    lnss += f"{cudadec} {mname}& {mname}::operator-=(const {mname}& rhs)\n"
+    lnss += "{\n"
+    for I in range(0,dim):
+        for J in range(0,dim):
+            lnss += f"\tm{J}{I} -= rhs.m{J}{I};\n"
+        #lnss += "\n"
+    lnss += "\treturn *this;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatortimeseq1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"{cudadec} {mname}& operator*=(const {dtype}& rhs);\n"
+    lnss += f"{cudadec} {mname}& {mname}::operator*=(const {dtype}& rhs)\n"
+    lnss += "{\n"
+    for I in range(0,dim):
+        for J in range(0,dim):
+            lnss += f"\tm{J}{I} *= rhs;\n"
+        #lnss += "\n"
+    lnss += "\treturn *this;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatordiveq1(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"{cudadec} {mname}& operator/=(const {dtype}& rhs);\n"
+    lnss += f"{cudadec} {mname}& {mname}::operator/=(const {dtype}& rhs)\n"
+    lnss += "{\n"
+    for I in range(0,dim):
+        for J in range(0,dim):
+            lnss += f"\tm{J}{I} /= rhs;\n"
+        #lnss += "\n"
+    lnss += "\treturn *this;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+def moperatortimeseq2(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"{cudadec} {mname}& operator*=(const {mname}& rhs);\n"
+    lnss += f"{cudadec} {mname}& {mname}::operator*=(const {mname}& rhs)\n"
+    lnss += "{\n"
+    lnss += f"\t{mname} tmp = *this;\n"
+    for I in range(0,dim):
+        for K in range(0,dim):
+            lnss += f"\tm{I}{K} = "
+            for J in range(0,dim):
+                lnss += f"tmp.m{I}{J}*rhs.m{J}{K}"
+                if(J<dim-1):
+                    lnss += " + "
+                else:
+                    lnss += ";\n"
+        #lnss += "\n"
+    lnss += "\treturn *this;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+def mtranspose(dim,dtype):
+    lnsh = ""
+    lnss = ""
+
+    mname = mtypename(dim,dtype)
+
+    lnsh += f"{cudadec} {mname} transpose() const;\n"
+    lnss += f"{cudadec} {mname} {mname}::transpose() const\n"
+    lnss += "{\n"
+    lnss += f"\t{mname} ret;\n"
+    for I in range(0,dim):
+        for J in range(0,dim):
+            lnss += f"\tret.m{J}{I} = m{I}{J};\n"
+        #lnss += "\n"
+    lnss += "\treturn ret;\n"
+    lnss += "}\n\n"
+
+    return [lnsh,lnss]
+
+
+    
+
+def genmatrixstuff(dim,dtype):
+
+    lnsh = ""
+    lnss = ""
+
+    lnsh += "//\n//Matrix Header Stuff\n//\n"
+    lnss += "\n//\n//Matrix Header Stuff\n//\n\n"
+
+    [lnsh2,lnss2] = matrix_data(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = mconstructor1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = mdestructor1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = mconstructor2(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = mconstructor3(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatorbk1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatorbk2(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatorbk3(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatorbk4(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatorbk5(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatorbk6(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = mdataoperator1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = mdataoperator2(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatorplus1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatorminus1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatortimes1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatortimes2(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatordiv2(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatortimes3(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatortimes4(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatortimes5(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatorneg(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatorpluseq(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatorminuseq(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatortimeseq1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatordiveq1(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = moperatortimeseq2(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+    [lnsh2,lnss2] = mtranspose(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+
+    return [lnsh,lnss]
+
+if(__name__=="__main__"):
+
+    dim = 2
+    dtype = "float"
+    genheader = "./test_scripts/{}_codegen1.hpp".format(vtypename(dim,dtype))
+    gensource = "./test_scripts/{}_codegen1.cu".format(vtypename(dim,dtype))
+
+    lnsh = ""
+    lnss = ""
+
+    [lnsh2,lnss2] = genvectorstuff(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+
+    [lnsh2,lnss2] = genmatrixstuff(dim,dtype)
+    lnsh+=lnsh2; lnss+=lnss2;
+
+    fwrite(genheader,lnsh)
+    fwrite(gensource,lnss)
+    
+
+    #lns = genvecheaderv1(dim,dtype)
+    #print(lns)