testing rands

This commit is contained in:
2026-04-14 23:03:26 -04:00
parent 1691b2c415
commit 0600b08a63
18 changed files with 348 additions and 13 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -50,6 +50,7 @@ __host__ int dbuff_randint(int *dbuffer, int size, int low, int high, randstate_
//Tests
__host__ void amscurand_tests1(); //test basic random functions
__host__ void amscurand_tests2(); //test basic random functions
};

View File

@ -5,7 +5,7 @@ namespace amscuda
{
namespace util
{
double time_msec();
};
};

View File

@ -8,7 +8,9 @@ namespace random
{
//Choosing xoroshiro64** as my default RNG due to 32 bit only operations
randstate_t global_randstate = xs64ss_state();
randstate_t global_randstate = xs64ss_state(0);
//must intialize randstate to a value other than {0,0}. The single-param constructor calls splitmix32 to initialize
__host__ void rand_seed(const uint32_t seed)

View File

@ -24,24 +24,31 @@ namespace random
int ret = amscu_success;
int nthreads = 128;
int nblocks = (size+nthreads)/nthreads;
int nblocks = (size+nthreads*1024)/nthreads/1024;
int I;
cuarray<randstate_t> states;
randstate_t *dstates = NULL;
cudaError_t err = cudaSuccess;
if(state==NULL) state=&amscuda::random::global_randstate;
states.resize(nblocks*nthreads);
//printf("debug: %d %d %d\n",nblocks,nthreads,size);
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
for(I=0;I<nblocks*nthreads;I++)
{
states[I] = *state;
rand_state_increment(I,&(states[I]));
//printf("debug: %d, %u %u\n",I,states[I].low,states[I].high);
}
rand_state_increment(I,state);
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
dbuff_randf_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
cudaDeviceSynchronize();
@ -76,16 +83,17 @@ namespace random
int ret = amscu_success;
int nthreads = 128;
int nblocks = (size+nthreads)/nthreads;
int nblocks = (size+nthreads*1024)/nthreads/1024;
int I;
cuarray<randstate_t> states;
randstate_t *dstates = NULL;
cudaError_t err = cudaSuccess;
if(state==NULL) state=&amscuda::random::global_randstate;
states.resize(nblocks*nthreads);
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
for(I=0;I<nblocks*nthreads;I++)
{
@ -94,6 +102,8 @@ namespace random
}
rand_state_increment(I,state);
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
dbuff_rand_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
cudaDeviceSynchronize();
@ -142,16 +152,17 @@ namespace random
int ret = amscu_success;
int nthreads = 128;
int nblocks = (size+nthreads)/nthreads;
int nblocks = (size+nthreads*1024)/nthreads/1024;
int I;
cuarray<randstate_t> states;
randstate_t *dstates = NULL;
cudaError_t err = cudaSuccess;
if(state==NULL) state=&amscuda::random::global_randstate;
states.resize(nblocks*nthreads);
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
for(I=0;I<nblocks*nthreads;I++)
{
@ -160,6 +171,8 @@ namespace random
}
rand_state_increment(I,state);
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
dbuff_randnf_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
cudaDeviceSynchronize();
@ -180,16 +193,17 @@ namespace random
int ret = amscu_success;
int nthreads = 128;
int nblocks = (size+nthreads)/nthreads;
int nblocks = (size+nthreads*1024)/nthreads/1024;
int I;
cuarray<randstate_t> states;
randstate_t *dstates = NULL;
cudaError_t err = cudaSuccess;
if(state==NULL) state=&amscuda::random::global_randstate;
states.resize(nblocks*nthreads);
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
for(I=0;I<nblocks*nthreads;I++)
{
@ -198,6 +212,8 @@ namespace random
}
rand_state_increment(I,state);
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
dbuff_randn_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
cudaDeviceSynchronize();
@ -232,16 +248,17 @@ namespace random
int ret = amscu_success;
int nthreads = 128;
int nblocks = (size+nthreads)/nthreads;
int nblocks = (size+nthreads*1024)/nthreads/1024;
int I;
cuarray<randstate_t> states;
randstate_t *dstates = NULL;
cudaError_t err = cudaSuccess;
if(state==NULL) state=&amscuda::random::global_randstate;
states.resize(nblocks*nthreads);
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
for(I=0;I<nblocks*nthreads;I++)
{
@ -250,6 +267,8 @@ namespace random
}
rand_state_increment(I,state);
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
dbuff_randint_kf<<<nblocks,nthreads>>>(dbuffer,size,low,high,dstates);
cudaDeviceSynchronize();

View File

@ -58,6 +58,7 @@ namespace random
{
seeds[I] = *mainseed;
rand_state_increment(I,&seeds[I]);
//printf("debug: %d %u %u\n",(int)I,seeds[I].high,seeds[I].low);
}
rand_state_increment(I,mainseed);

View File

@ -127,6 +127,159 @@ namespace random
return;
}
void amscurand_tests2_1()
{
const char *fnout = "../test_scripts/randf_array.bin";
FILE *fp = NULL;
int Nx = 4096;
int Ny = 4096;
cuarray<float> data;
cuarray<int> dims;
fp = fopen(fnout,"w+b");
data.resize(Nx*Ny);
dims.resize(2);
dims[0] = Nx;
dims[1] = Ny;
double t0,t1;
t0 = util::time_msec();
hbuff_randf(data.data,Nx*Ny);
t1 = util::time_msec();
printf("hbuff_randf execution time: %1.3f msec\n",t1-t0);
fwrite_ndarray(fp,&dims,&data);
if(fp==NULL)
{
printf("amscurand_tests2_1 error: could not write %s",fnout);
}
fclose(fp);
return;
}
void amscurand_tests2_2()
{
const char *fnout = "../test_scripts/randf_array2.bin";
FILE *fp = NULL;
int Nx = 4096;
int Ny = 4096;
cuarray<float> data;
float *ddata = NULL;
cuarray<int> dims;
fp = fopen(fnout,"w+b");
data.resize(Nx*Ny);
cudaMalloc(&ddata,sizeof(float)*Nx*Ny);
dims.resize(2);
dims[0] = Nx;
dims[1] = Ny;
double t0,t1;
t0 = amscuda::util::time_msec();
dbuff_randf(ddata,Nx*Ny);
t1 = amscuda::util::time_msec();
printf("dbuff_randf execution time: %1.3f msec\n",t1-t0);
cudaMemcpy(data.data,ddata,sizeof(float)*Nx*Ny,cudaMemcpyDeviceToHost);
fwrite_ndarray(fp,&dims,&data);
if(fp==NULL)
{
printf("amscurand_tests2_1 error: could not write %s",fnout);
}
cudaFree(ddata); ddata=NULL;
fclose(fp);
return;
}
void amscurand_tests2_3()
{
const char *fnout = "../test_scripts/randint_array.bin";
FILE *fp = NULL;
int Nx = 4096;
int Ny = 4096;
cuarray<int> data;
cuarray<int> dims;
fp = fopen(fnout,"w+b");
data.resize(Nx*Ny);
dims.resize(2);
dims[0] = Nx;
dims[1] = Ny;
double t0,t1;
t0 = util::time_msec();
hbuff_randint(data.data,Nx*Ny,0,10);
t1 = util::time_msec();
printf("hbuff_randint execution time: %1.3f msec\n",t1-t0);
fwrite_ndarray(fp,&dims,&data);
if(fp==NULL)
{
printf("amscurand_tests2_1 error: could not write %s",fnout);
}
fclose(fp);
return;
}
void amscurand_tests2_4()
{
const char *fnout = "../test_scripts/randint_array2.bin";
FILE *fp = NULL;
int Nx = 4096;
int Ny = 4096;
cuarray<int> data;
int *ddata = NULL;
cuarray<int> dims;
fp = fopen(fnout,"w+b");
data.resize(Nx*Ny);
cudaMalloc(&ddata,sizeof(int)*Nx*Ny);
dims.resize(2);
dims[0] = Nx;
dims[1] = Ny;
double t0,t1;
t0 = amscuda::util::time_msec();
dbuff_randint(ddata,Nx*Ny,0,10);
t1 = amscuda::util::time_msec();
printf("dbuff_randint execution time: %1.3f msec\n",t1-t0);
cudaMemcpy(data.data,ddata,sizeof(int)*Nx*Ny,cudaMemcpyDeviceToHost);
fwrite_ndarray(fp,&dims,&data);
if(fp==NULL)
{
printf("amscurand_tests2_1 error: could not write %s",fnout);
}
cudaFree(ddata); ddata=NULL;
fclose(fp);
return;
}
__host__ void amscurand_tests2()
{
amscurand_tests2_1();
amscurand_tests2_2();
amscurand_tests2_3();
amscurand_tests2_4();
return;
}

View File

@ -0,0 +1,15 @@
#include <amsculib3/amsculib3.hpp>
namespace amscuda
{
namespace util
{
double time_msec()
{
double ret = (double)clock()/CLOCKS_PER_SEC;
ret *= 1000.0;
return ret;
}
};
};

View File

@ -24,6 +24,7 @@ int main(int argc, char* argv[])
//test_amscurarray1();
//random::amscurand_tests1();
random::amscurand_tests2();
return 0;
}

View File

@ -40,6 +40,9 @@ def binsave_float_ndarray(fp,arr):
## Main Script ##
#################
def test_1():
fname = "./test_scripts/test_dbuff_rand_dpr32.bin"

View File

@ -0,0 +1,140 @@
#!/usr/bin/python3
import os,sys,math
import numpy as np
import matplotlib.pyplot as plt
#################
## Subroutines ##
#################
def binload_float_ndarray(fp):
arr = np.zeros((0),dtype=np.float32,order='F')
qb = fp.read(4)
Nd = np.frombuffer(qb,dtype=np.int32,count=1)[0]
shp = np.zeros((Nd),dtype=np.int32)
piprod = 1
for I in range(0,Nd):
qb = fp.read(4)
shp[I] = np.frombuffer(qb,dtype=np.int32,count=1)[0]
piprod = piprod*shp[I]
qb = fp.read(4*piprod)
arr = np.frombuffer(qb,dtype=np.float32,count=piprod)
arr = arr.reshape(shp)
return arr;
def binload_int_ndarray(fp):
arr = np.zeros((0),dtype=np.float32,order='F')
qb = fp.read(4)
Nd = np.frombuffer(qb,dtype=np.int32,count=1)[0]
shp = np.zeros((Nd),dtype=np.int32)
piprod = 1
for I in range(0,Nd):
qb = fp.read(4)
shp[I] = np.frombuffer(qb,dtype=np.int32,count=1)[0]
piprod = piprod*shp[I]
qb = fp.read(4*piprod)
arr = np.frombuffer(qb,dtype=np.int32,count=piprod)
arr = arr.reshape(shp)
return arr;
#################
## Main Script ##
#################
def periodcheck(arr):
arr = np.asarray(arr).copy().flatten()
N = arr.shape[0]
q1 = arr[0]
q2 = arr[1]
q3 = arr[2]
ind = -1
for I in range(3,N-3):
if(arr[I]==q1):
if(arr[I+1]==q2):
if(arr[I+2]==q3):
ind = I
break
if(ind>=0):
print("array has detected a period of {} out of {}".format(ind,N))
return ind
def test_1():
fname = "./test_scripts/randf_array.bin"
try:
fp = open(fname,"rb")
except:
print("Could not open {} for reading".format(fname))
return
arr = binload_float_ndarray(fp)
periodcheck(arr)
fp.close()
fname = "./test_scripts/randf_array2.bin"
try:
fp = open(fname,"rb")
except:
print("Could not open {} for reading".format(fname))
return
arr2 = binload_float_ndarray(fp)
periodcheck(arr2)
fp.close()
fname = "./test_scripts/randint_array.bin"
try:
fp = open(fname,"rb")
except:
print("Could not open {} for reading".format(fname))
return
arr3 = binload_int_ndarray(fp)
periodcheck(arr3)
print("array 3 max {} min {}".format(np.max(arr3),np.min(arr3)))
fp.close()
fname = "./test_scripts/randint_array2.bin"
try:
fp = open(fname,"rb")
except:
print("Could not open {} for reading".format(fname))
return
arr4 = binload_int_ndarray(fp)
periodcheck(arr4)
print("array 4 max {} min {}".format(np.max(arr4),np.min(arr4)))
fp.close()
plt.subplot(2,2,1)
plt.imshow(arr)
plt.subplot(2,2,2)
plt.imshow(arr2)
plt.show()
plt.subplot(2,2,1)
plt.imshow(arr3)
plt.colorbar()
plt.subplot(2,2,2)
plt.imshow(arr4)
plt.colorbar()
plt.show()
return
if(__name__=="__main__"):
test_1()
exit(0)