diff --git a/build_linux64/libamsculib3.linux64.a b/build_linux64/libamsculib3.linux64.a index 25d94d0..0472127 100644 Binary files a/build_linux64/libamsculib3.linux64.a and b/build_linux64/libamsculib3.linux64.a differ diff --git a/build_linux64/objstore/amscu_cputhreading.o b/build_linux64/objstore/amscu_cputhreading.o index 1127744..06c4827 100644 Binary files a/build_linux64/objstore/amscu_cputhreading.o and b/build_linux64/objstore/amscu_cputhreading.o differ diff --git a/build_linux64/objstore/amscu_util.o b/build_linux64/objstore/amscu_util.o new file mode 100644 index 0000000..b831198 Binary files /dev/null and b/build_linux64/objstore/amscu_util.o differ diff --git a/build_linux64/objstore/amscurandom1.o b/build_linux64/objstore/amscurandom1.o index 0af19a2..f4bdb59 100644 Binary files a/build_linux64/objstore/amscurandom1.o and b/build_linux64/objstore/amscurandom1.o differ diff --git a/build_linux64/objstore/amscurandom1_dbuff.o b/build_linux64/objstore/amscurandom1_dbuff.o index 92f2234..5f4a2ed 100644 Binary files a/build_linux64/objstore/amscurandom1_dbuff.o and b/build_linux64/objstore/amscurandom1_dbuff.o differ diff --git a/build_linux64/objstore/amscurandom1_hbuff.o b/build_linux64/objstore/amscurandom1_hbuff.o index cc2edf1..31e2c0a 100644 Binary files a/build_linux64/objstore/amscurandom1_hbuff.o and b/build_linux64/objstore/amscurandom1_hbuff.o differ diff --git a/build_linux64/objstore/amscurandom_tests1.o b/build_linux64/objstore/amscurandom_tests1.o index c099166..3fcdab9 100644 Binary files a/build_linux64/objstore/amscurandom_tests1.o and b/build_linux64/objstore/amscurandom_tests1.o differ diff --git a/build_linux64/test b/build_linux64/test index d83a87f..e3083a4 100644 Binary files a/build_linux64/test and b/build_linux64/test differ diff --git a/include/amsculib3/random/amscurandom.cuh b/include/amsculib3/random/amscurandom.cuh index 3365a39..461360c 100644 --- a/include/amsculib3/random/amscurandom.cuh +++ b/include/amsculib3/random/amscurandom.cuh @@ -50,6 +50,7 @@ __host__ int dbuff_randint(int *dbuffer, int size, int low, int high, randstate_ //Tests __host__ void amscurand_tests1(); //test basic random functions +__host__ void amscurand_tests2(); //test basic random functions }; diff --git a/include/amsculib3/util/amscu_util.hpp b/include/amsculib3/util/amscu_util.hpp index 7fde3da..93ebc95 100644 --- a/include/amsculib3/util/amscu_util.hpp +++ b/include/amsculib3/util/amscu_util.hpp @@ -5,7 +5,7 @@ namespace amscuda { namespace util { - + double time_msec(); }; }; diff --git a/src/amsculib3/random/amscurandom1.cu b/src/amsculib3/random/amscurandom1.cu index 7a972e0..e0bcf8d 100644 --- a/src/amsculib3/random/amscurandom1.cu +++ b/src/amsculib3/random/amscurandom1.cu @@ -8,7 +8,9 @@ namespace random { //Choosing xoroshiro64** as my default RNG due to 32 bit only operations - randstate_t global_randstate = xs64ss_state(); + randstate_t global_randstate = xs64ss_state(0); + //must intialize randstate to a value other than {0,0}. The single-param constructor calls splitmix32 to initialize + __host__ void rand_seed(const uint32_t seed) diff --git a/src/amsculib3/random/amscurandom1_dbuff.cu b/src/amsculib3/random/amscurandom1_dbuff.cu index b6f1499..eb59b55 100644 --- a/src/amsculib3/random/amscurandom1_dbuff.cu +++ b/src/amsculib3/random/amscurandom1_dbuff.cu @@ -24,23 +24,30 @@ namespace random int ret = amscu_success; int nthreads = 128; - int nblocks = (size+nthreads)/nthreads; + int nblocks = (size+nthreads*1024)/nthreads/1024; int I; cuarray states; randstate_t *dstates = NULL; cudaError_t err = cudaSuccess; + if(state==NULL) state=&amscuda::random::global_randstate; + states.resize(nblocks*nthreads); + + //printf("debug: %d %d %d\n",nblocks,nthreads,size); cudaMalloc(&dstates,sizeof(randstate_t)*states.length); - cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice); + for(I=0;I>>(dbuffer,size,dstates); cudaDeviceSynchronize(); @@ -76,16 +83,17 @@ namespace random int ret = amscu_success; int nthreads = 128; - int nblocks = (size+nthreads)/nthreads; + int nblocks = (size+nthreads*1024)/nthreads/1024; int I; cuarray states; randstate_t *dstates = NULL; cudaError_t err = cudaSuccess; + if(state==NULL) state=&amscuda::random::global_randstate; + states.resize(nblocks*nthreads); cudaMalloc(&dstates,sizeof(randstate_t)*states.length); - cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice); for(I=0;I>>(dbuffer,size,dstates); cudaDeviceSynchronize(); @@ -142,16 +152,17 @@ namespace random int ret = amscu_success; int nthreads = 128; - int nblocks = (size+nthreads)/nthreads; + int nblocks = (size+nthreads*1024)/nthreads/1024; int I; cuarray states; randstate_t *dstates = NULL; cudaError_t err = cudaSuccess; + if(state==NULL) state=&amscuda::random::global_randstate; + states.resize(nblocks*nthreads); cudaMalloc(&dstates,sizeof(randstate_t)*states.length); - cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice); for(I=0;I>>(dbuffer,size,dstates); cudaDeviceSynchronize(); @@ -180,16 +193,17 @@ namespace random int ret = amscu_success; int nthreads = 128; - int nblocks = (size+nthreads)/nthreads; + int nblocks = (size+nthreads*1024)/nthreads/1024; int I; cuarray states; randstate_t *dstates = NULL; cudaError_t err = cudaSuccess; + if(state==NULL) state=&amscuda::random::global_randstate; + states.resize(nblocks*nthreads); cudaMalloc(&dstates,sizeof(randstate_t)*states.length); - cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice); for(I=0;I>>(dbuffer,size,dstates); cudaDeviceSynchronize(); @@ -232,23 +248,26 @@ namespace random int ret = amscu_success; int nthreads = 128; - int nblocks = (size+nthreads)/nthreads; + int nblocks = (size+nthreads*1024)/nthreads/1024; int I; cuarray states; randstate_t *dstates = NULL; cudaError_t err = cudaSuccess; + if(state==NULL) state=&amscuda::random::global_randstate; + states.resize(nblocks*nthreads); cudaMalloc(&dstates,sizeof(randstate_t)*states.length); - cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice); - + for(I=0;I>>(dbuffer,size,low,high,dstates); cudaDeviceSynchronize(); diff --git a/src/amsculib3/random/amscurandom1_hbuff.cu b/src/amsculib3/random/amscurandom1_hbuff.cu index c4a710e..6015e22 100644 --- a/src/amsculib3/random/amscurandom1_hbuff.cu +++ b/src/amsculib3/random/amscurandom1_hbuff.cu @@ -58,6 +58,7 @@ namespace random { seeds[I] = *mainseed; rand_state_increment(I,&seeds[I]); + //printf("debug: %d %u %u\n",(int)I,seeds[I].high,seeds[I].low); } rand_state_increment(I,mainseed); diff --git a/src/amsculib3/random/amscurandom_tests1.cu b/src/amsculib3/random/amscurandom_tests1.cu index 1659be0..68782d1 100644 --- a/src/amsculib3/random/amscurandom_tests1.cu +++ b/src/amsculib3/random/amscurandom_tests1.cu @@ -127,6 +127,159 @@ namespace random + return; + } + + + void amscurand_tests2_1() + { + const char *fnout = "../test_scripts/randf_array.bin"; + FILE *fp = NULL; + int Nx = 4096; + int Ny = 4096; + cuarray data; + cuarray dims; + fp = fopen(fnout,"w+b"); + + data.resize(Nx*Ny); + dims.resize(2); + dims[0] = Nx; + dims[1] = Ny; + + double t0,t1; + t0 = util::time_msec(); + hbuff_randf(data.data,Nx*Ny); + t1 = util::time_msec(); + + printf("hbuff_randf execution time: %1.3f msec\n",t1-t0); + + fwrite_ndarray(fp,&dims,&data); + if(fp==NULL) + { + printf("amscurand_tests2_1 error: could not write %s",fnout); + } + + fclose(fp); + return; + } + + void amscurand_tests2_2() + { + const char *fnout = "../test_scripts/randf_array2.bin"; + FILE *fp = NULL; + int Nx = 4096; + int Ny = 4096; + cuarray data; + float *ddata = NULL; + cuarray dims; + fp = fopen(fnout,"w+b"); + + data.resize(Nx*Ny); + cudaMalloc(&ddata,sizeof(float)*Nx*Ny); + dims.resize(2); + dims[0] = Nx; + dims[1] = Ny; + + double t0,t1; + t0 = amscuda::util::time_msec(); + dbuff_randf(ddata,Nx*Ny); + t1 = amscuda::util::time_msec(); + + printf("dbuff_randf execution time: %1.3f msec\n",t1-t0); + + cudaMemcpy(data.data,ddata,sizeof(float)*Nx*Ny,cudaMemcpyDeviceToHost); + + + fwrite_ndarray(fp,&dims,&data); + if(fp==NULL) + { + printf("amscurand_tests2_1 error: could not write %s",fnout); + } + + cudaFree(ddata); ddata=NULL; + + fclose(fp); + return; + } + + void amscurand_tests2_3() + { + const char *fnout = "../test_scripts/randint_array.bin"; + FILE *fp = NULL; + int Nx = 4096; + int Ny = 4096; + cuarray data; + cuarray dims; + fp = fopen(fnout,"w+b"); + + data.resize(Nx*Ny); + dims.resize(2); + dims[0] = Nx; + dims[1] = Ny; + + double t0,t1; + t0 = util::time_msec(); + hbuff_randint(data.data,Nx*Ny,0,10); + t1 = util::time_msec(); + + printf("hbuff_randint execution time: %1.3f msec\n",t1-t0); + + fwrite_ndarray(fp,&dims,&data); + if(fp==NULL) + { + printf("amscurand_tests2_1 error: could not write %s",fnout); + } + + fclose(fp); + return; + } + + void amscurand_tests2_4() + { + const char *fnout = "../test_scripts/randint_array2.bin"; + FILE *fp = NULL; + int Nx = 4096; + int Ny = 4096; + cuarray data; + int *ddata = NULL; + cuarray dims; + fp = fopen(fnout,"w+b"); + + data.resize(Nx*Ny); + cudaMalloc(&ddata,sizeof(int)*Nx*Ny); + dims.resize(2); + dims[0] = Nx; + dims[1] = Ny; + + double t0,t1; + t0 = amscuda::util::time_msec(); + dbuff_randint(ddata,Nx*Ny,0,10); + t1 = amscuda::util::time_msec(); + + printf("dbuff_randint execution time: %1.3f msec\n",t1-t0); + + cudaMemcpy(data.data,ddata,sizeof(int)*Nx*Ny,cudaMemcpyDeviceToHost); + + + fwrite_ndarray(fp,&dims,&data); + if(fp==NULL) + { + printf("amscurand_tests2_1 error: could not write %s",fnout); + } + + cudaFree(ddata); ddata=NULL; + + fclose(fp); + return; + } + + __host__ void amscurand_tests2() + { + amscurand_tests2_1(); + amscurand_tests2_2(); + amscurand_tests2_3(); + amscurand_tests2_4(); + return; } diff --git a/src/amsculib3/util/amscu_util.cu b/src/amsculib3/util/amscu_util.cu new file mode 100644 index 0000000..f53c410 --- /dev/null +++ b/src/amsculib3/util/amscu_util.cu @@ -0,0 +1,15 @@ +#include + +namespace amscuda +{ +namespace util +{ + + double time_msec() + { + double ret = (double)clock()/CLOCKS_PER_SEC; + ret *= 1000.0; + return ret; + } +}; +}; \ No newline at end of file diff --git a/src/main.cu b/src/main.cu index c7be39f..12bc8ab 100644 --- a/src/main.cu +++ b/src/main.cu @@ -24,6 +24,7 @@ int main(int argc, char* argv[]) //test_amscurarray1(); //random::amscurand_tests1(); + random::amscurand_tests2(); return 0; } \ No newline at end of file diff --git a/test_scripts/test_dbuff_dpr32.py b/test_scripts/test_dbuff_dpr32.py index 2fb817e..fae8425 100644 --- a/test_scripts/test_dbuff_dpr32.py +++ b/test_scripts/test_dbuff_dpr32.py @@ -40,6 +40,9 @@ def binsave_float_ndarray(fp,arr): ## Main Script ## ################# + + + def test_1(): fname = "./test_scripts/test_dbuff_rand_dpr32.bin" diff --git a/test_scripts/test_randomplot1.py b/test_scripts/test_randomplot1.py new file mode 100644 index 0000000..3a6ca31 --- /dev/null +++ b/test_scripts/test_randomplot1.py @@ -0,0 +1,140 @@ +#!/usr/bin/python3 + +import os,sys,math +import numpy as np +import matplotlib.pyplot as plt + +################# +## Subroutines ## +################# + +def binload_float_ndarray(fp): + arr = np.zeros((0),dtype=np.float32,order='F') + + qb = fp.read(4) + Nd = np.frombuffer(qb,dtype=np.int32,count=1)[0] + shp = np.zeros((Nd),dtype=np.int32) + + piprod = 1 + for I in range(0,Nd): + qb = fp.read(4) + shp[I] = np.frombuffer(qb,dtype=np.int32,count=1)[0] + piprod = piprod*shp[I] + + qb = fp.read(4*piprod) + arr = np.frombuffer(qb,dtype=np.float32,count=piprod) + + arr = arr.reshape(shp) + + return arr; + +def binload_int_ndarray(fp): + arr = np.zeros((0),dtype=np.float32,order='F') + + qb = fp.read(4) + Nd = np.frombuffer(qb,dtype=np.int32,count=1)[0] + shp = np.zeros((Nd),dtype=np.int32) + + piprod = 1 + for I in range(0,Nd): + qb = fp.read(4) + shp[I] = np.frombuffer(qb,dtype=np.int32,count=1)[0] + piprod = piprod*shp[I] + + qb = fp.read(4*piprod) + arr = np.frombuffer(qb,dtype=np.int32,count=piprod) + + arr = arr.reshape(shp) + + return arr; + + +################# +## Main Script ## +################# + +def periodcheck(arr): + + arr = np.asarray(arr).copy().flatten() + N = arr.shape[0] + q1 = arr[0] + q2 = arr[1] + q3 = arr[2] + ind = -1 + for I in range(3,N-3): + if(arr[I]==q1): + if(arr[I+1]==q2): + if(arr[I+2]==q3): + ind = I + break + if(ind>=0): + print("array has detected a period of {} out of {}".format(ind,N)) + + return ind + +def test_1(): + + fname = "./test_scripts/randf_array.bin" + try: + fp = open(fname,"rb") + except: + print("Could not open {} for reading".format(fname)) + return + arr = binload_float_ndarray(fp) + periodcheck(arr) + fp.close() + + fname = "./test_scripts/randf_array2.bin" + try: + fp = open(fname,"rb") + except: + print("Could not open {} for reading".format(fname)) + return + arr2 = binload_float_ndarray(fp) + periodcheck(arr2) + fp.close() + + fname = "./test_scripts/randint_array.bin" + try: + fp = open(fname,"rb") + except: + print("Could not open {} for reading".format(fname)) + return + arr3 = binload_int_ndarray(fp) + periodcheck(arr3) + print("array 3 max {} min {}".format(np.max(arr3),np.min(arr3))) + fp.close() + + fname = "./test_scripts/randint_array2.bin" + try: + fp = open(fname,"rb") + except: + print("Could not open {} for reading".format(fname)) + return + arr4 = binload_int_ndarray(fp) + periodcheck(arr4) + print("array 4 max {} min {}".format(np.max(arr4),np.min(arr4))) + fp.close() + + plt.subplot(2,2,1) + plt.imshow(arr) + plt.subplot(2,2,2) + plt.imshow(arr2) + plt.show() + + plt.subplot(2,2,1) + plt.imshow(arr3) + plt.colorbar() + plt.subplot(2,2,2) + plt.imshow(arr4) + plt.colorbar() + plt.show() + + + return + +if(__name__=="__main__"): + test_1() + + exit(0) +