diff --git a/build/make.linux64.test.py b/build/make.linux64.test.py index 1ac9d00..3f5a686 100644 --- a/build/make.linux64.test.py +++ b/build/make.linux64.test.py @@ -17,7 +17,7 @@ builddir = "./build_linux64" doinstall = True #copies the build_output to the install dir when finished cc = "nvcc" #compiler cflags = "-dc --compiler-options '-fPIC -O3'" -libraries = "-l{}".format(libname) +libraries = "-l{} -lpthread".format(libname) libdirs = "-L{} -L{}/lib -L{}/lib".format(builddir,commondir,depdir) linkerflags = " -Xlinker=-rpath,." srcexts = [".c",".cpp",".cu"] diff --git a/build_linux64/libamsculib3.linux64.a b/build_linux64/libamsculib3.linux64.a index 198af3b..85fcdfa 100644 Binary files a/build_linux64/libamsculib3.linux64.a and b/build_linux64/libamsculib3.linux64.a differ diff --git a/build_linux64/objstore/amscu_comp128.o b/build_linux64/objstore/amscu_comp128.o index d7d4407..ce93238 100644 Binary files a/build_linux64/objstore/amscu_comp128.o and b/build_linux64/objstore/amscu_comp128.o differ diff --git a/build_linux64/objstore/amscu_comp64.o b/build_linux64/objstore/amscu_comp64.o index 4e0ef13..857defc 100644 Binary files a/build_linux64/objstore/amscu_comp64.o and b/build_linux64/objstore/amscu_comp64.o differ diff --git a/build_linux64/objstore/amscu_cputhreading.o b/build_linux64/objstore/amscu_cputhreading.o new file mode 100644 index 0000000..1127744 Binary files /dev/null and b/build_linux64/objstore/amscu_cputhreading.o differ diff --git a/build_linux64/objstore/amscu_cudafunctions.o b/build_linux64/objstore/amscu_cudafunctions.o index f996fbb..88ba106 100644 Binary files a/build_linux64/objstore/amscu_cudafunctions.o and b/build_linux64/objstore/amscu_cudafunctions.o differ diff --git a/build_linux64/objstore/amscuarray.o b/build_linux64/objstore/amscuarray.o index c0177d3..f850628 100644 Binary files a/build_linux64/objstore/amscuarray.o and b/build_linux64/objstore/amscuarray.o differ diff --git a/build_linux64/objstore/amscuarray_dops.o b/build_linux64/objstore/amscuarray_dops.o index 55fc2b2..91cc157 100644 Binary files a/build_linux64/objstore/amscuarray_dops.o and b/build_linux64/objstore/amscuarray_dops.o differ diff --git a/build_linux64/objstore/amscufhash.o b/build_linux64/objstore/amscufhash.o index 1d4827a..ffec4eb 100644 Binary files a/build_linux64/objstore/amscufhash.o and b/build_linux64/objstore/amscufhash.o differ diff --git a/build_linux64/objstore/amscugeom.o b/build_linux64/objstore/amscugeom.o index 69e1bc2..dfd6eb2 100644 Binary files a/build_linux64/objstore/amscugeom.o and b/build_linux64/objstore/amscugeom.o differ diff --git a/build_linux64/objstore/amsculib3.o b/build_linux64/objstore/amsculib3.o index adc331e..858a295 100644 Binary files a/build_linux64/objstore/amsculib3.o and b/build_linux64/objstore/amsculib3.o differ diff --git a/build_linux64/objstore/amscumath.o b/build_linux64/objstore/amscumath.o index 96cb9dd..0c17960 100644 Binary files a/build_linux64/objstore/amscumath.o and b/build_linux64/objstore/amscumath.o differ diff --git a/build_linux64/objstore/amscupcg.o b/build_linux64/objstore/amscupcg.o index 1bfa65a..fe2f4e4 100644 Binary files a/build_linux64/objstore/amscupcg.o and b/build_linux64/objstore/amscupcg.o differ diff --git a/build_linux64/objstore/amscurandlcg.o b/build_linux64/objstore/amscurandlcg.o index db1dbaf..9f889e9 100644 Binary files a/build_linux64/objstore/amscurandlcg.o and b/build_linux64/objstore/amscurandlcg.o differ diff --git a/build_linux64/objstore/amscurandom1.o b/build_linux64/objstore/amscurandom1.o index ae4665f..0af19a2 100644 Binary files a/build_linux64/objstore/amscurandom1.o and b/build_linux64/objstore/amscurandom1.o differ diff --git a/build_linux64/objstore/amscurandom1_dbuff.o b/build_linux64/objstore/amscurandom1_dbuff.o index 5291cba..1816f0f 100644 Binary files a/build_linux64/objstore/amscurandom1_dbuff.o and b/build_linux64/objstore/amscurandom1_dbuff.o differ diff --git a/build_linux64/objstore/amscurandom1_hbuff.o b/build_linux64/objstore/amscurandom1_hbuff.o index 4f03534..cc2edf1 100644 Binary files a/build_linux64/objstore/amscurandom1_hbuff.o and b/build_linux64/objstore/amscurandom1_hbuff.o differ diff --git a/build_linux64/objstore/amscurandom_tests1.o b/build_linux64/objstore/amscurandom_tests1.o index ebb723a..c099166 100644 Binary files a/build_linux64/objstore/amscurandom_tests1.o and b/build_linux64/objstore/amscurandom_tests1.o differ diff --git a/build_linux64/objstore/amscurarray.o b/build_linux64/objstore/amscurarray.o index 6df5b4d..ca3dcab 100644 Binary files a/build_linux64/objstore/amscurarray.o and b/build_linux64/objstore/amscurarray.o differ diff --git a/build_linux64/objstore/amscusplitmix.o b/build_linux64/objstore/amscusplitmix.o index 9df9f59..4704f70 100644 Binary files a/build_linux64/objstore/amscusplitmix.o and b/build_linux64/objstore/amscusplitmix.o differ diff --git a/build_linux64/objstore/amsxoroshiro.o b/build_linux64/objstore/amsxoroshiro.o index 2c7743c..e4ce3ef 100644 Binary files a/build_linux64/objstore/amsxoroshiro.o and b/build_linux64/objstore/amsxoroshiro.o differ diff --git a/build_linux64/objstore/cuvec2.o b/build_linux64/objstore/cuvec2.o index ed52394..c09c35d 100644 Binary files a/build_linux64/objstore/cuvec2.o and b/build_linux64/objstore/cuvec2.o differ diff --git a/build_linux64/objstore/cuvec2f.o b/build_linux64/objstore/cuvec2f.o index 22ae19e..3075409 100644 Binary files a/build_linux64/objstore/cuvec2f.o and b/build_linux64/objstore/cuvec2f.o differ diff --git a/build_linux64/objstore/cuvec2i.o b/build_linux64/objstore/cuvec2i.o index 59929a8..6c4f47b 100644 Binary files a/build_linux64/objstore/cuvec2i.o and b/build_linux64/objstore/cuvec2i.o differ diff --git a/build_linux64/objstore/cuvec3.o b/build_linux64/objstore/cuvec3.o index d323ec4..06b05b2 100644 Binary files a/build_linux64/objstore/cuvec3.o and b/build_linux64/objstore/cuvec3.o differ diff --git a/build_linux64/objstore/cuvec3f.o b/build_linux64/objstore/cuvec3f.o index cd4bef3..a41e614 100644 Binary files a/build_linux64/objstore/cuvec3f.o and b/build_linux64/objstore/cuvec3f.o differ diff --git a/build_linux64/objstore/cuvec3i.o b/build_linux64/objstore/cuvec3i.o index d337f37..bfdf711 100644 Binary files a/build_linux64/objstore/cuvec3i.o and b/build_linux64/objstore/cuvec3i.o differ diff --git a/build_linux64/objstore/cuvec4.o b/build_linux64/objstore/cuvec4.o index 16abc9b..fdcedbe 100644 Binary files a/build_linux64/objstore/cuvec4.o and b/build_linux64/objstore/cuvec4.o differ diff --git a/build_linux64/objstore/cuvec4f.o b/build_linux64/objstore/cuvec4f.o index b810182..8dd5c75 100644 Binary files a/build_linux64/objstore/cuvec4f.o and b/build_linux64/objstore/cuvec4f.o differ diff --git a/build_linux64/objstore/cuvec4i.o b/build_linux64/objstore/cuvec4i.o index d78340a..366b5d0 100644 Binary files a/build_linux64/objstore/cuvec4i.o and b/build_linux64/objstore/cuvec4i.o differ diff --git a/build_linux64/test b/build_linux64/test index 5ecad41..1569edf 100644 Binary files a/build_linux64/test and b/build_linux64/test differ diff --git a/include/amsculib3/amsculib3.hpp b/include/amsculib3/amsculib3.hpp index 3d4d8c3..6231635 100644 --- a/include/amsculib3/amsculib3.hpp +++ b/include/amsculib3/amsculib3.hpp @@ -7,7 +7,12 @@ #include #include #include + +//C++ standard library headers #include +#include +#include +#include #include //where all the cuda functions live #include @@ -41,12 +46,18 @@ namespace amscuda //default numthreads to execute on cpu AMSCU_CONST static const int amscu_defcputhreads = 8; + AMSCU_CONST static const int amscu_success = 1; + AMSCU_CONST static const int amscu_meh = 0; + AMSCU_CONST static const int amscu_failure = -1; + + }; //end namespace amscuda //Components #include #include #include +#include #include #include diff --git a/include/amsculib3/random/amscurandom.cuh b/include/amsculib3/random/amscurandom.cuh index 68f52af..3365a39 100644 --- a/include/amsculib3/random/amscurandom.cuh +++ b/include/amsculib3/random/amscurandom.cuh @@ -34,18 +34,18 @@ __host__ __device__ float randnf(randstate_t *state = NULL); __host__ __device__ double randn(randstate_t *state = NULL); //Operations to fill a host buffer with random values -__host__ int hbuff_randf(float *hbuffer, int64_t size, randstate_t *state = NULL); -__host__ int hbuff_rand(double *hbuffer, int64_t size, randstate_t *state = NULL); -__host__ int hbuff_randnf(float *hbuffer, int64_t size, randstate_t *state = NULL); -__host__ int hbuff_randn(double *hbuffer, int64_t size, randstate_t *state = NULL); -__host__ int hbuff_randint(int *hbuffer, int64_t size, int low, int high, randstate_t *state = NULL); +__host__ int hbuff_randf(float *hbuffer, int size, randstate_t *state = NULL); +__host__ int hbuff_rand(double *hbuffer, int size, randstate_t *state = NULL); +__host__ int hbuff_randnf(float *hbuffer, int size, randstate_t *state = NULL); +__host__ int hbuff_randn(double *hbuffer, int size, randstate_t *state = NULL); +__host__ int hbuff_randint(int *hbuffer, int size, int low, int high, randstate_t *state = NULL); //Operations to fill a device buffer with random values -__host__ int dbuff_randf(float *hbuffer, int64_t size, randstate_t *state = NULL); -__host__ int dbuff_rand(double *hbuffer, int64_t size, randstate_t *state = NULL); -__host__ int dbuff_randnf(float *hbuffer, int64_t size, randstate_t *state = NULL); -__host__ int dbuff_randn(double *hbuffer, int64_t size, randstate_t *state = NULL); -__host__ int dbuff_randint(int *hbuffer, int64_t size, int low, int high, randstate_t *state = NULL); +__host__ int dbuff_randf(float *dbuffer, int size, randstate_t *state = NULL); +__host__ int dbuff_rand(double *dbuffer, int size, randstate_t *state = NULL); +__host__ int dbuff_randnf(float *dbuffer, int size, randstate_t *state = NULL); +__host__ int dbuff_randn(double *dbuffer, int size, randstate_t *state = NULL); +__host__ int dbuff_randint(int *dbuffer, int size, int low, int high, randstate_t *state = NULL); //Tests diff --git a/include/amsculib3/util/amscu_cputhreading.hpp b/include/amsculib3/util/amscu_cputhreading.hpp new file mode 100644 index 0000000..0b6e4ef --- /dev/null +++ b/include/amsculib3/util/amscu_cputhreading.hpp @@ -0,0 +1,22 @@ +#ifndef __AMSCU_CPUTHREADING_HPP__ +#define __AMSCU_CPUTHREADING_HPP__ + +namespace amscuda +{ +namespace util +{ + +int amscpu_cputhreading_threadplan(int probsize); + +//A template function that takes as input a function pointer and a series of arguments +//The function is executed with fptr(threadnum, nthreads, otherargs...) with a dynamic number of threads +//psize must be supplied, which will call amscpu_cputhreading_threadplan to determine number of threads to use for execution +template int threaded_execute(callable &&fptr, int64_t psize, argst&&... args); + + +}; +}; + +#include + +#endif \ No newline at end of file diff --git a/include/amsculib3/util/amscu_cputhreading_impl.hpp b/include/amsculib3/util/amscu_cputhreading_impl.hpp new file mode 100644 index 0000000..72273bb --- /dev/null +++ b/include/amsculib3/util/amscu_cputhreading_impl.hpp @@ -0,0 +1,73 @@ +#ifndef __AMSCU_CPUTHREADING_IMPL_HPP__ +#define __AMSCU_CPUTHREADING_IMPL_HPP__ + +namespace amscuda +{ +namespace util +{ + +template int threaded_execute(callable &&fptr, int64_t psize, argst&&... args) +{ + int ret = amscu_success; + + int I; + std::vector threads; + int nthreads = amscpu_cputhreading_threadplan(psize); + + if(nthreads<=1) + { + nthreads = 1; + I = 0; + // std::invoke( + // std::forward(fptr), + // I, + // nthreads, + // std::forward(args)... + // ); + + //std::invoke is a C++17 feature, and mingw8 complains even so. + // Can I get away with just calling the functions? + fptr(I,nthreads,std::forward(args)...); + } + else + { + threads.resize(nthreads); + for(I=0;I(fptr), + I, + nthreads, + std::forward(args)... + ); + } + for(I=0;Ijoin(); + delete threads[I]; + threads[I] = NULL; + } + } + } + + return ret; +} + + + +}; +}; + +#endif \ No newline at end of file diff --git a/include/amsculib3/util/amscu_util.hpp b/include/amsculib3/util/amscu_util.hpp new file mode 100644 index 0000000..7fde3da --- /dev/null +++ b/include/amsculib3/util/amscu_util.hpp @@ -0,0 +1,15 @@ +#ifndef __AMSCU_UTIL_CUH__ +#define __AMSCU_UTIL_CUH__ + +namespace amscuda +{ +namespace util +{ + + +}; +}; + +#include + +#endif \ No newline at end of file diff --git a/src/amsculib3/random/amscurandom1.cu b/src/amsculib3/random/amscurandom1.cu index 27c6fbc..7a972e0 100644 --- a/src/amsculib3/random/amscurandom1.cu +++ b/src/amsculib3/random/amscurandom1.cu @@ -8,12 +8,12 @@ namespace random { //Choosing xoroshiro64** as my default RNG due to 32 bit only operations - randstate_t global_rand_cpustate = xs64ss_state(); + randstate_t global_randstate = xs64ss_state(); __host__ void rand_seed(const uint32_t seed) { - global_rand_cpustate = xs64ss_state(seed); + global_randstate = xs64ss_state(seed); } __host__ __device__ void rand_state_increment(const int32_t inc, randstate_t *state) @@ -22,7 +22,7 @@ namespace random // GPU-specific code (device path) #else // CPU-specific code (host path) - if(state==NULL) state = &global_rand_cpustate; + if(state==NULL) state = &global_randstate; #endif xoroshiro::xs64ss_state* s2 = (xoroshiro::xs64ss_state*)state; s2->low += inc; @@ -36,7 +36,7 @@ namespace random // GPU-specific code (device path) #else // CPU-specific code (host path) - if(state==NULL) state = &global_rand_cpustate; + if(state==NULL) state = &global_randstate; #endif xs64ss_next((xoroshiro::xs64ss_state*)state); return; @@ -49,7 +49,7 @@ namespace random // GPU-specific code (device path) #else // CPU-specific code (host path) - if(state==NULL) state = &global_rand_cpustate; + if(state==NULL) state = &global_randstate; #endif ret = xoroshiro::xs64ss_next((xoroshiro::xs64ss_state*)state); @@ -63,7 +63,7 @@ namespace random // GPU-specific code (device path) #else // CPU-specific code (host path) - if(state==NULL) state = &global_rand_cpustate; + if(state==NULL) state = &global_randstate; #endif int32_t q = (int)((randui32(state)>>1U)%(1U<<16U)); ret = (q%(high-low))+low; @@ -77,7 +77,7 @@ namespace random // GPU-specific code (device path) #else // CPU-specific code (host path) - if(state==NULL) state = &global_rand_cpustate; + if(state==NULL) state = &global_randstate; #endif ret= ((float)randui32(state))/(4294967296.0f); return ret; @@ -90,7 +90,7 @@ namespace random // GPU-specific code (device path) #else // CPU-specific code (host path) - if(state==NULL) state = &global_rand_cpustate; + if(state==NULL) state = &global_randstate; #endif ret= ((double)randui32(state))/(4294967296.0f); return ret; @@ -103,7 +103,7 @@ namespace random // GPU-specific code (device path) #else // CPU-specific code (host path) - if(state==NULL) state = &global_rand_cpustate; + if(state==NULL) state = &global_randstate; #endif q1 = randf(state); @@ -118,7 +118,7 @@ namespace random // GPU-specific code (device path) #else // CPU-specific code (host path) - if(state==NULL) state = &global_rand_cpustate; + if(state==NULL) state = &global_randstate; #endif q1 = rand(state); diff --git a/src/amsculib3/random/amscurandom1_hbuff.cu b/src/amsculib3/random/amscurandom1_hbuff.cu index 51eae29..c4a710e 100644 --- a/src/amsculib3/random/amscurandom1_hbuff.cu +++ b/src/amsculib3/random/amscurandom1_hbuff.cu @@ -4,6 +4,290 @@ namespace amscuda { namespace random { + template void rand_threadfunc1( + int threadnum, + int nthreads, + randfunc &&rf, + dtype *buffer, + int64_t N, + randstate_t *seed + ) + { + int64_t I,Is,I0,I1; + Is = N/nthreads; + if(Is<1) Is = 1; + I0 = Is*(threadnum); + I1 = Is*(threadnum+1); + if(I1>N) I1 = N; + + for(I=I0;I + int hbuff_rand_threadedexec( + callable &&fptr, + randfunc &&rf, + dtype *buffer, + int64_t N, + randstate_t *mainseed + ) + { + int ret = amscu_success; + + int I; + std::vector threads; + std::vector seeds; + int nthreads = amscuda::util::amscpu_cputhreading_threadplan(N); + + if(nthreads<=1) + { + nthreads = 1; + I = 0; + fptr(I,nthreads,rf,buffer,N,mainseed); + } + else + { + threads.resize(nthreads); + seeds.resize(nthreads); + for(I=0;I(fptr), + I, + nthreads, + rf, + buffer, + N, + &seeds[I] + ); + } + for(I=0;Ijoin(); + delete threads[I]; + threads[I] = NULL; + } + } + } + + return ret; + } + + template void rand_threadfunc2( + int threadnum, + int nthreads, + randfunc &&rf, + dtype *buffer, + int64_t N, + rngbnd min, + rngbnd max, + randstate_t *seed + ) + { + int64_t I,Is,I0,I1; + + Is = N/nthreads; + if(Is<1) Is = 1; + I0 = Is*(threadnum); + I1 = Is*(threadnum+1); + if(I1>N) I1 = N; + + for(I=I0;I + int hbuff_rand_threadedexec2( + callable &&fptr, + randfunc &&rf, + dtype *buffer, + int64_t N, + rngbnd min, + rngbnd max, + randstate_t *mainseed + ) + { + int ret = amscu_success; + + int I; + std::vector threads; + std::vector seeds; + int nthreads = amscuda::util::amscpu_cputhreading_threadplan(N); + + if(nthreads<=1) + { + nthreads = 1; + I = 0; + fptr(I,nthreads,rf,buffer,N,min,max,mainseed); + } + else + { + threads.resize(nthreads); + seeds.resize(nthreads); + for(I=0;I(fptr), + I, + nthreads, + rf, + buffer, + N, + min,max, + &seeds[I] + ); + } + for(I=0;Ijoin(); + delete threads[I]; + threads[I] = NULL; + } + } + } + + return ret; + } + + __host__ int hbuff_randf(float *hbuffer, int size, randstate_t *state) + { + int ret = amscu_success; + int res; + if(state==NULL) state = &global_randstate; + + res = hbuff_rand_threadedexec( + rand_threadfunc1, + randf, + hbuffer, + (int64_t) size, + state + ); + + if(res!=amscu_success) ret = amscu_failure; + + return ret; + } + + __host__ int hbuff_rand(double *hbuffer, int size, randstate_t *state) + { + int ret = amscu_success; + int res; + if(state==NULL) state = &global_randstate; + + res = hbuff_rand_threadedexec( + rand_threadfunc1, + rand, + hbuffer, + (int64_t) size, + state + ); + + if(res!=amscu_success) ret = amscu_failure; + + return ret; + } + + __host__ int hbuff_randnf(float *hbuffer, int size, randstate_t *state) + { + int ret = amscu_success; + int res; + if(state==NULL) state = &global_randstate; + + res = hbuff_rand_threadedexec( + rand_threadfunc1, + randnf, + hbuffer, + (int64_t) size, + state + ); + + if(res!=amscu_success) ret = amscu_failure; + + return ret; + } + + __host__ int hbuff_randn(double *hbuffer, int size, randstate_t *state) + { + int ret = amscu_success; + int res; + if(state==NULL) state = &global_randstate; + + res = hbuff_rand_threadedexec( + rand_threadfunc1, + randn, + hbuffer, + (int64_t) size, + state + ); + + if(res!=amscu_success) ret = amscu_failure; + + return ret; + } + + __host__ int hbuff_randint(int *hbuffer, int size, int low, int high, randstate_t *state) + { + int ret = amscu_success; + int res; + if(state==NULL) state = &global_randstate; + + res = hbuff_rand_threadedexec2( + rand_threadfunc2, + randint, + hbuffer, + (int64_t) size, + low,high, + state + ); + + if(res!=amscu_success) ret = amscu_failure; + + return ret; + } + }; //end namespaces }; \ No newline at end of file diff --git a/src/amsculib3/random/amscurandom_tests1.cu b/src/amsculib3/random/amscurandom_tests1.cu index b663b4c..1659be0 100644 --- a/src/amsculib3/random/amscurandom_tests1.cu +++ b/src/amsculib3/random/amscurandom_tests1.cu @@ -5,55 +5,27 @@ namespace amscuda namespace random { - __host__ void amscurand_tests1() + __host__ void amscurand_tests1_randstat1() { - using namespace random; - printf("Random number generator basic function test:\n"); int I; - // uint32_t q1; - // float q2; - // double q3; - // int q4; - - rand_seed(0); - printf("from seed 0...\n"); - for(I=0;I<10;I++) - { - printf("randui32[%d] = %u\n",I,randui32()); - } - - rand_seed(0); - printf("from seed 0...\n"); - for(I=0;I<10;I++) - { - printf("randf[%d] = %1.3f\n",I,randf()); - } - - rand_seed(0); - printf("from seed 0...\n"); - for(I=0;I<10;I++) - { - printf("randf[%d] = %1.3f\n",I,rand()); - } - float q0,qlast,qmindelta,qmaxdelta,qmx,qmn,qmean,qstd,qsum,qsumsq; int N = 100000; - uint32_t seed= 34533623; + uint32_t seed= 2; rand_seed(seed); printf("from seed %u...\n",seed); - q0 = randf(); + q0 = randnf(); qmx = q0; qmn = q0; qsum = 0.0f; qsumsq = 0.0f; qlast = q0; - q0 = randf(); + q0 = randnf(); qmindelta = ::fabsf(q0-qlast); qmaxdelta = ::fabsf(q0-qlast); for(I=0;Iqmx) qmx = q0; + if(q0qmaxdelta) qmaxdelta = ::fabs(q0-qlast); + } + + qmean = qsum/((double)N); + qstd = ::sqrt((qsumsq-qsum*qsum/((double)N))/((double)N)); + + printf("Statistics for randn() N=%d\n",N); + printf("\tmin: %1.6g\n",qmn); + printf("\tmax: %1.6g\n",qmx); + printf("\tmean: %1.6g\n",qmean); + printf("\tstdev: %1.6g\n",qstd); + printf("\tmindelta: %1.6g\n",qmindelta); + printf("\tmaxdelta: %1.6g\n",qmaxdelta); + } + + + __host__ void amscurand_tests1() + { + printf("Random number generator basic function test:\n"); + int I; + // uint32_t q1; + // float q2; + // double q3; + // int q4; + + rand_seed(1); + printf("from seed 1...\n"); + for(I=0;I<10;I++) + { + printf("randui32[%d] = %u\n",I,randui32()); + } + + rand_seed(1); + printf("from seed 1...\n"); + for(I=0;I<10;I++) + { + printf("randf[%d] = %1.3f\n",I,randf()); + } + + rand_seed(0); + printf("from seed 0...\n"); + for(I=0;I<10;I++) + { + printf("randf[%d] = %1.3f\n",I,rand()); + } + + + amscurand_tests1_randstat1(); + amscurand_tests1_randstat2(); + + return; diff --git a/src/amsculib3/random/amsxoroshiro.cu b/src/amsculib3/random/amsxoroshiro.cu index fbfaf13..42f7339 100644 --- a/src/amsculib3/random/amsxoroshiro.cu +++ b/src/amsculib3/random/amsxoroshiro.cu @@ -100,7 +100,7 @@ namespace xoroshiro __host__ __device__ void xs128pp_jump(xs128pp_state* state) { - static const uint64_t JUMP[] = { 0xdf900294d8f554a5, 0x170865df4b3201fc }; + AMSCU_CONST static const uint64_t JUMP[] = { 0xdf900294d8f554a5, 0x170865df4b3201fc }; uint64_t low = 0; uint64_t high = 0; int I; diff --git a/src/amsculib3/util/amscu_cputhreading.cu b/src/amsculib3/util/amscu_cputhreading.cu new file mode 100644 index 0000000..4b11003 --- /dev/null +++ b/src/amsculib3/util/amscu_cputhreading.cu @@ -0,0 +1,26 @@ +#include + +namespace amscuda +{ +namespace util +{ + + int amscpu_cputhreading_threadplan(int probsize) + { + int nthreads = 1; + int ta = std::thread::hardware_concurrency(); + + if(probsize>1024) + { + nthreads = probsize/1024; + + if(nthreads>ta-2) nthreads = ta-2; + if(nthreads>amscuda::amscu_defcputhreads) nthreads = amscuda::amscu_defcputhreads; + if(nthreads<1) nthreads = 1; + } + + return nthreads; + } + +}; +}; \ No newline at end of file diff --git a/src/main.cu b/src/main.cu index 1c6254e..c7be39f 100644 --- a/src/main.cu +++ b/src/main.cu @@ -23,7 +23,7 @@ int main(int argc, char* argv[]) //test_amscurarray1(); - random::amscurand_tests1(); + //random::amscurand_tests1(); return 0; } \ No newline at end of file