testing rands
This commit is contained in:
Binary file not shown.
Binary file not shown.
BIN
build_linux64/objstore/amscu_util.o
Normal file
BIN
build_linux64/objstore/amscu_util.o
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -50,6 +50,7 @@ __host__ int dbuff_randint(int *dbuffer, int size, int low, int high, randstate_
|
||||
|
||||
//Tests
|
||||
__host__ void amscurand_tests1(); //test basic random functions
|
||||
__host__ void amscurand_tests2(); //test basic random functions
|
||||
|
||||
|
||||
};
|
||||
|
||||
@ -5,7 +5,7 @@ namespace amscuda
|
||||
{
|
||||
namespace util
|
||||
{
|
||||
|
||||
double time_msec();
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
@ -8,7 +8,9 @@ namespace random
|
||||
{
|
||||
|
||||
//Choosing xoroshiro64** as my default RNG due to 32 bit only operations
|
||||
randstate_t global_randstate = xs64ss_state();
|
||||
randstate_t global_randstate = xs64ss_state(0);
|
||||
//must intialize randstate to a value other than {0,0}. The single-param constructor calls splitmix32 to initialize
|
||||
|
||||
|
||||
|
||||
__host__ void rand_seed(const uint32_t seed)
|
||||
|
||||
@ -24,24 +24,31 @@ namespace random
|
||||
int ret = amscu_success;
|
||||
|
||||
int nthreads = 128;
|
||||
int nblocks = (size+nthreads)/nthreads;
|
||||
int nblocks = (size+nthreads*1024)/nthreads/1024;
|
||||
int I;
|
||||
cuarray<randstate_t> states;
|
||||
randstate_t *dstates = NULL;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
if(state==NULL) state=&amscuda::random::global_randstate;
|
||||
|
||||
states.resize(nblocks*nthreads);
|
||||
|
||||
//printf("debug: %d %d %d\n",nblocks,nthreads,size);
|
||||
|
||||
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
|
||||
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
|
||||
|
||||
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
states[I] = *state;
|
||||
rand_state_increment(I,&(states[I]));
|
||||
//printf("debug: %d, %u %u\n",I,states[I].low,states[I].high);
|
||||
}
|
||||
rand_state_increment(I,state);
|
||||
|
||||
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
|
||||
|
||||
dbuff_randf_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
@ -76,16 +83,17 @@ namespace random
|
||||
int ret = amscu_success;
|
||||
|
||||
int nthreads = 128;
|
||||
int nblocks = (size+nthreads)/nthreads;
|
||||
int nblocks = (size+nthreads*1024)/nthreads/1024;
|
||||
int I;
|
||||
cuarray<randstate_t> states;
|
||||
randstate_t *dstates = NULL;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
if(state==NULL) state=&amscuda::random::global_randstate;
|
||||
|
||||
states.resize(nblocks*nthreads);
|
||||
|
||||
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
|
||||
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
|
||||
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
@ -94,6 +102,8 @@ namespace random
|
||||
}
|
||||
rand_state_increment(I,state);
|
||||
|
||||
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
|
||||
|
||||
dbuff_rand_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
@ -142,16 +152,17 @@ namespace random
|
||||
int ret = amscu_success;
|
||||
|
||||
int nthreads = 128;
|
||||
int nblocks = (size+nthreads)/nthreads;
|
||||
int nblocks = (size+nthreads*1024)/nthreads/1024;
|
||||
int I;
|
||||
cuarray<randstate_t> states;
|
||||
randstate_t *dstates = NULL;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
if(state==NULL) state=&amscuda::random::global_randstate;
|
||||
|
||||
states.resize(nblocks*nthreads);
|
||||
|
||||
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
|
||||
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
|
||||
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
@ -160,6 +171,8 @@ namespace random
|
||||
}
|
||||
rand_state_increment(I,state);
|
||||
|
||||
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
|
||||
|
||||
dbuff_randnf_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
@ -180,16 +193,17 @@ namespace random
|
||||
int ret = amscu_success;
|
||||
|
||||
int nthreads = 128;
|
||||
int nblocks = (size+nthreads)/nthreads;
|
||||
int nblocks = (size+nthreads*1024)/nthreads/1024;
|
||||
int I;
|
||||
cuarray<randstate_t> states;
|
||||
randstate_t *dstates = NULL;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
if(state==NULL) state=&amscuda::random::global_randstate;
|
||||
|
||||
states.resize(nblocks*nthreads);
|
||||
|
||||
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
|
||||
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
|
||||
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
@ -198,6 +212,8 @@ namespace random
|
||||
}
|
||||
rand_state_increment(I,state);
|
||||
|
||||
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
|
||||
|
||||
dbuff_randn_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
@ -232,16 +248,17 @@ namespace random
|
||||
int ret = amscu_success;
|
||||
|
||||
int nthreads = 128;
|
||||
int nblocks = (size+nthreads)/nthreads;
|
||||
int nblocks = (size+nthreads*1024)/nthreads/1024;
|
||||
int I;
|
||||
cuarray<randstate_t> states;
|
||||
randstate_t *dstates = NULL;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
if(state==NULL) state=&amscuda::random::global_randstate;
|
||||
|
||||
states.resize(nblocks*nthreads);
|
||||
|
||||
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
|
||||
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
|
||||
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
@ -250,6 +267,8 @@ namespace random
|
||||
}
|
||||
rand_state_increment(I,state);
|
||||
|
||||
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
|
||||
|
||||
dbuff_randint_kf<<<nblocks,nthreads>>>(dbuffer,size,low,high,dstates);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
|
||||
@ -58,6 +58,7 @@ namespace random
|
||||
{
|
||||
seeds[I] = *mainseed;
|
||||
rand_state_increment(I,&seeds[I]);
|
||||
//printf("debug: %d %u %u\n",(int)I,seeds[I].high,seeds[I].low);
|
||||
}
|
||||
rand_state_increment(I,mainseed);
|
||||
|
||||
|
||||
@ -127,6 +127,159 @@ namespace random
|
||||
|
||||
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void amscurand_tests2_1()
|
||||
{
|
||||
const char *fnout = "../test_scripts/randf_array.bin";
|
||||
FILE *fp = NULL;
|
||||
int Nx = 4096;
|
||||
int Ny = 4096;
|
||||
cuarray<float> data;
|
||||
cuarray<int> dims;
|
||||
fp = fopen(fnout,"w+b");
|
||||
|
||||
data.resize(Nx*Ny);
|
||||
dims.resize(2);
|
||||
dims[0] = Nx;
|
||||
dims[1] = Ny;
|
||||
|
||||
double t0,t1;
|
||||
t0 = util::time_msec();
|
||||
hbuff_randf(data.data,Nx*Ny);
|
||||
t1 = util::time_msec();
|
||||
|
||||
printf("hbuff_randf execution time: %1.3f msec\n",t1-t0);
|
||||
|
||||
fwrite_ndarray(fp,&dims,&data);
|
||||
if(fp==NULL)
|
||||
{
|
||||
printf("amscurand_tests2_1 error: could not write %s",fnout);
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
return;
|
||||
}
|
||||
|
||||
void amscurand_tests2_2()
|
||||
{
|
||||
const char *fnout = "../test_scripts/randf_array2.bin";
|
||||
FILE *fp = NULL;
|
||||
int Nx = 4096;
|
||||
int Ny = 4096;
|
||||
cuarray<float> data;
|
||||
float *ddata = NULL;
|
||||
cuarray<int> dims;
|
||||
fp = fopen(fnout,"w+b");
|
||||
|
||||
data.resize(Nx*Ny);
|
||||
cudaMalloc(&ddata,sizeof(float)*Nx*Ny);
|
||||
dims.resize(2);
|
||||
dims[0] = Nx;
|
||||
dims[1] = Ny;
|
||||
|
||||
double t0,t1;
|
||||
t0 = amscuda::util::time_msec();
|
||||
dbuff_randf(ddata,Nx*Ny);
|
||||
t1 = amscuda::util::time_msec();
|
||||
|
||||
printf("dbuff_randf execution time: %1.3f msec\n",t1-t0);
|
||||
|
||||
cudaMemcpy(data.data,ddata,sizeof(float)*Nx*Ny,cudaMemcpyDeviceToHost);
|
||||
|
||||
|
||||
fwrite_ndarray(fp,&dims,&data);
|
||||
if(fp==NULL)
|
||||
{
|
||||
printf("amscurand_tests2_1 error: could not write %s",fnout);
|
||||
}
|
||||
|
||||
cudaFree(ddata); ddata=NULL;
|
||||
|
||||
fclose(fp);
|
||||
return;
|
||||
}
|
||||
|
||||
void amscurand_tests2_3()
|
||||
{
|
||||
const char *fnout = "../test_scripts/randint_array.bin";
|
||||
FILE *fp = NULL;
|
||||
int Nx = 4096;
|
||||
int Ny = 4096;
|
||||
cuarray<int> data;
|
||||
cuarray<int> dims;
|
||||
fp = fopen(fnout,"w+b");
|
||||
|
||||
data.resize(Nx*Ny);
|
||||
dims.resize(2);
|
||||
dims[0] = Nx;
|
||||
dims[1] = Ny;
|
||||
|
||||
double t0,t1;
|
||||
t0 = util::time_msec();
|
||||
hbuff_randint(data.data,Nx*Ny,0,10);
|
||||
t1 = util::time_msec();
|
||||
|
||||
printf("hbuff_randint execution time: %1.3f msec\n",t1-t0);
|
||||
|
||||
fwrite_ndarray(fp,&dims,&data);
|
||||
if(fp==NULL)
|
||||
{
|
||||
printf("amscurand_tests2_1 error: could not write %s",fnout);
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
return;
|
||||
}
|
||||
|
||||
void amscurand_tests2_4()
|
||||
{
|
||||
const char *fnout = "../test_scripts/randint_array2.bin";
|
||||
FILE *fp = NULL;
|
||||
int Nx = 4096;
|
||||
int Ny = 4096;
|
||||
cuarray<int> data;
|
||||
int *ddata = NULL;
|
||||
cuarray<int> dims;
|
||||
fp = fopen(fnout,"w+b");
|
||||
|
||||
data.resize(Nx*Ny);
|
||||
cudaMalloc(&ddata,sizeof(int)*Nx*Ny);
|
||||
dims.resize(2);
|
||||
dims[0] = Nx;
|
||||
dims[1] = Ny;
|
||||
|
||||
double t0,t1;
|
||||
t0 = amscuda::util::time_msec();
|
||||
dbuff_randint(ddata,Nx*Ny,0,10);
|
||||
t1 = amscuda::util::time_msec();
|
||||
|
||||
printf("dbuff_randint execution time: %1.3f msec\n",t1-t0);
|
||||
|
||||
cudaMemcpy(data.data,ddata,sizeof(int)*Nx*Ny,cudaMemcpyDeviceToHost);
|
||||
|
||||
|
||||
fwrite_ndarray(fp,&dims,&data);
|
||||
if(fp==NULL)
|
||||
{
|
||||
printf("amscurand_tests2_1 error: could not write %s",fnout);
|
||||
}
|
||||
|
||||
cudaFree(ddata); ddata=NULL;
|
||||
|
||||
fclose(fp);
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ void amscurand_tests2()
|
||||
{
|
||||
amscurand_tests2_1();
|
||||
amscurand_tests2_2();
|
||||
amscurand_tests2_3();
|
||||
amscurand_tests2_4();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
15
src/amsculib3/util/amscu_util.cu
Normal file
15
src/amsculib3/util/amscu_util.cu
Normal file
@ -0,0 +1,15 @@
|
||||
#include <amsculib3/amsculib3.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
namespace util
|
||||
{
|
||||
|
||||
double time_msec()
|
||||
{
|
||||
double ret = (double)clock()/CLOCKS_PER_SEC;
|
||||
ret *= 1000.0;
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
};
|
||||
@ -24,6 +24,7 @@ int main(int argc, char* argv[])
|
||||
//test_amscurarray1();
|
||||
|
||||
//random::amscurand_tests1();
|
||||
random::amscurand_tests2();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -40,6 +40,9 @@ def binsave_float_ndarray(fp,arr):
|
||||
## Main Script ##
|
||||
#################
|
||||
|
||||
|
||||
|
||||
|
||||
def test_1():
|
||||
|
||||
fname = "./test_scripts/test_dbuff_rand_dpr32.bin"
|
||||
|
||||
140
test_scripts/test_randomplot1.py
Normal file
140
test_scripts/test_randomplot1.py
Normal file
@ -0,0 +1,140 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os,sys,math
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
#################
|
||||
## Subroutines ##
|
||||
#################
|
||||
|
||||
def binload_float_ndarray(fp):
|
||||
arr = np.zeros((0),dtype=np.float32,order='F')
|
||||
|
||||
qb = fp.read(4)
|
||||
Nd = np.frombuffer(qb,dtype=np.int32,count=1)[0]
|
||||
shp = np.zeros((Nd),dtype=np.int32)
|
||||
|
||||
piprod = 1
|
||||
for I in range(0,Nd):
|
||||
qb = fp.read(4)
|
||||
shp[I] = np.frombuffer(qb,dtype=np.int32,count=1)[0]
|
||||
piprod = piprod*shp[I]
|
||||
|
||||
qb = fp.read(4*piprod)
|
||||
arr = np.frombuffer(qb,dtype=np.float32,count=piprod)
|
||||
|
||||
arr = arr.reshape(shp)
|
||||
|
||||
return arr;
|
||||
|
||||
def binload_int_ndarray(fp):
|
||||
arr = np.zeros((0),dtype=np.float32,order='F')
|
||||
|
||||
qb = fp.read(4)
|
||||
Nd = np.frombuffer(qb,dtype=np.int32,count=1)[0]
|
||||
shp = np.zeros((Nd),dtype=np.int32)
|
||||
|
||||
piprod = 1
|
||||
for I in range(0,Nd):
|
||||
qb = fp.read(4)
|
||||
shp[I] = np.frombuffer(qb,dtype=np.int32,count=1)[0]
|
||||
piprod = piprod*shp[I]
|
||||
|
||||
qb = fp.read(4*piprod)
|
||||
arr = np.frombuffer(qb,dtype=np.int32,count=piprod)
|
||||
|
||||
arr = arr.reshape(shp)
|
||||
|
||||
return arr;
|
||||
|
||||
|
||||
#################
|
||||
## Main Script ##
|
||||
#################
|
||||
|
||||
def periodcheck(arr):
|
||||
|
||||
arr = np.asarray(arr).copy().flatten()
|
||||
N = arr.shape[0]
|
||||
q1 = arr[0]
|
||||
q2 = arr[1]
|
||||
q3 = arr[2]
|
||||
ind = -1
|
||||
for I in range(3,N-3):
|
||||
if(arr[I]==q1):
|
||||
if(arr[I+1]==q2):
|
||||
if(arr[I+2]==q3):
|
||||
ind = I
|
||||
break
|
||||
if(ind>=0):
|
||||
print("array has detected a period of {} out of {}".format(ind,N))
|
||||
|
||||
return ind
|
||||
|
||||
def test_1():
|
||||
|
||||
fname = "./test_scripts/randf_array.bin"
|
||||
try:
|
||||
fp = open(fname,"rb")
|
||||
except:
|
||||
print("Could not open {} for reading".format(fname))
|
||||
return
|
||||
arr = binload_float_ndarray(fp)
|
||||
periodcheck(arr)
|
||||
fp.close()
|
||||
|
||||
fname = "./test_scripts/randf_array2.bin"
|
||||
try:
|
||||
fp = open(fname,"rb")
|
||||
except:
|
||||
print("Could not open {} for reading".format(fname))
|
||||
return
|
||||
arr2 = binload_float_ndarray(fp)
|
||||
periodcheck(arr2)
|
||||
fp.close()
|
||||
|
||||
fname = "./test_scripts/randint_array.bin"
|
||||
try:
|
||||
fp = open(fname,"rb")
|
||||
except:
|
||||
print("Could not open {} for reading".format(fname))
|
||||
return
|
||||
arr3 = binload_int_ndarray(fp)
|
||||
periodcheck(arr3)
|
||||
print("array 3 max {} min {}".format(np.max(arr3),np.min(arr3)))
|
||||
fp.close()
|
||||
|
||||
fname = "./test_scripts/randint_array2.bin"
|
||||
try:
|
||||
fp = open(fname,"rb")
|
||||
except:
|
||||
print("Could not open {} for reading".format(fname))
|
||||
return
|
||||
arr4 = binload_int_ndarray(fp)
|
||||
periodcheck(arr4)
|
||||
print("array 4 max {} min {}".format(np.max(arr4),np.min(arr4)))
|
||||
fp.close()
|
||||
|
||||
plt.subplot(2,2,1)
|
||||
plt.imshow(arr)
|
||||
plt.subplot(2,2,2)
|
||||
plt.imshow(arr2)
|
||||
plt.show()
|
||||
|
||||
plt.subplot(2,2,1)
|
||||
plt.imshow(arr3)
|
||||
plt.colorbar()
|
||||
plt.subplot(2,2,2)
|
||||
plt.imshow(arr4)
|
||||
plt.colorbar()
|
||||
plt.show()
|
||||
|
||||
|
||||
return
|
||||
|
||||
if(__name__=="__main__"):
|
||||
test_1()
|
||||
|
||||
exit(0)
|
||||
|
||||
Reference in New Issue
Block a user