host random buffer fillers

2026-04-14 21:20:49 -04:00
parent b0121a2f83
commit 361ee4c6da
42 changed files with 540 additions and 57 deletions
--- a/build/make.linux64.test.py
+++ b/build/make.linux64.test.py
@ -17,7 +17,7 @@ builddir = "./build_linux64"
 doinstall = True   #copies the build_output to the install dir when finished
 cc = "nvcc" #compiler
 cflags = "-dc --compiler-options '-fPIC -O3'"
-libraries = "-l{}".format(libname)
+libraries = "-l{} -lpthread".format(libname)
 libdirs = "-L{} -L{}/lib -L{}/lib".format(builddir,commondir,depdir)
 linkerflags = " -Xlinker=-rpath,."
 srcexts = [".c",".cpp",".cu"]
--- a/build_linux64/libamsculib3.linux64.a
+++ b/build_linux64/libamsculib3.linux64.a
--- a/build_linux64/objstore/amscu_comp128.o
+++ b/build_linux64/objstore/amscu_comp128.o
--- a/build_linux64/objstore/amscu_comp64.o
+++ b/build_linux64/objstore/amscu_comp64.o
--- a/build_linux64/objstore/amscu_cputhreading.o
+++ b/build_linux64/objstore/amscu_cputhreading.o
--- a/build_linux64/objstore/amscu_cudafunctions.o
+++ b/build_linux64/objstore/amscu_cudafunctions.o
--- a/build_linux64/objstore/amscuarray.o
+++ b/build_linux64/objstore/amscuarray.o
--- a/build_linux64/objstore/amscuarray_dops.o
+++ b/build_linux64/objstore/amscuarray_dops.o
--- a/build_linux64/objstore/amscufhash.o
+++ b/build_linux64/objstore/amscufhash.o
--- a/build_linux64/objstore/amscugeom.o
+++ b/build_linux64/objstore/amscugeom.o
--- a/build_linux64/objstore/amsculib3.o
+++ b/build_linux64/objstore/amsculib3.o
--- a/build_linux64/objstore/amscumath.o
+++ b/build_linux64/objstore/amscumath.o
--- a/build_linux64/objstore/amscupcg.o
+++ b/build_linux64/objstore/amscupcg.o
--- a/build_linux64/objstore/amscurandlcg.o
+++ b/build_linux64/objstore/amscurandlcg.o
--- a/build_linux64/objstore/amscurandom1.o
+++ b/build_linux64/objstore/amscurandom1.o
--- a/build_linux64/objstore/amscurandom1_dbuff.o
+++ b/build_linux64/objstore/amscurandom1_dbuff.o
--- a/build_linux64/objstore/amscurandom1_hbuff.o
+++ b/build_linux64/objstore/amscurandom1_hbuff.o
--- a/build_linux64/objstore/amscurandom_tests1.o
+++ b/build_linux64/objstore/amscurandom_tests1.o
--- a/build_linux64/objstore/amscurarray.o
+++ b/build_linux64/objstore/amscurarray.o
--- a/build_linux64/objstore/amscusplitmix.o
+++ b/build_linux64/objstore/amscusplitmix.o
--- a/build_linux64/objstore/amsxoroshiro.o
+++ b/build_linux64/objstore/amsxoroshiro.o
--- a/build_linux64/objstore/cuvec2.o
+++ b/build_linux64/objstore/cuvec2.o
--- a/build_linux64/objstore/cuvec2f.o
+++ b/build_linux64/objstore/cuvec2f.o
--- a/build_linux64/objstore/cuvec2i.o
+++ b/build_linux64/objstore/cuvec2i.o
--- a/build_linux64/objstore/cuvec3.o
+++ b/build_linux64/objstore/cuvec3.o
--- a/build_linux64/objstore/cuvec3f.o
+++ b/build_linux64/objstore/cuvec3f.o
--- a/build_linux64/objstore/cuvec3i.o
+++ b/build_linux64/objstore/cuvec3i.o
--- a/build_linux64/objstore/cuvec4.o
+++ b/build_linux64/objstore/cuvec4.o
--- a/build_linux64/objstore/cuvec4f.o
+++ b/build_linux64/objstore/cuvec4f.o
--- a/build_linux64/objstore/cuvec4i.o
+++ b/build_linux64/objstore/cuvec4i.o
--- a/build_linux64/test
+++ b/build_linux64/test
--- a/include/amsculib3/amsculib3.hpp
+++ b/include/amsculib3/amsculib3.hpp
@ -7,7 +7,12 @@
 #include <math.h>
 #include <stdint.h>
 #include <time.h>
 //C++ standard library headers
 #include <new>
 #include <thread>
 #include <functional>
 #include <mutex>
 #include <cuda_runtime_api.h> //where all the cuda functions live
 #include <cuda_runtime.h>
@ -41,12 +46,18 @@ namespace amscuda
    //default numthreads to execute on cpu
    AMSCU_CONST static const int amscu_defcputhreads = 8;
    AMSCU_CONST static const int amscu_success = 1;
    AMSCU_CONST static const int amscu_meh = 0;
    AMSCU_CONST static const int amscu_failure = -1;
 }; //end namespace amscuda
 //Components
 #include <amsculib3/amscu_cudafunctions.hpp>
 #include <amsculib3/math/amscumath.hpp>
 #include <amsculib3/geom/amscugeom.hpp>
 #include <amsculib3/util/amscu_util.hpp>
 #include <amsculib3/amscuarray.hpp>
 #include <amsculib3/amscuda_binarrrw.hpp>
--- a/include/amsculib3/random/amscurandom.cuh
+++ b/include/amsculib3/random/amscurandom.cuh
@ -34,18 +34,18 @@ __host__ __device__ float randnf(randstate_t *state = NULL);
 __host__ __device__ double randn(randstate_t *state = NULL);
 //Operations to fill a host buffer with random values
-__host__ int hbuff_randf(float *hbuffer, int64_t size, randstate_t *state = NULL);
+__host__ int hbuff_randf(float *hbuffer, int size, randstate_t *state = NULL);
-__host__ int hbuff_rand(double *hbuffer, int64_t size, randstate_t *state = NULL);
+__host__ int hbuff_rand(double *hbuffer, int size, randstate_t *state = NULL);
-__host__ int hbuff_randnf(float *hbuffer, int64_t size, randstate_t *state = NULL);
+__host__ int hbuff_randnf(float *hbuffer, int size, randstate_t *state = NULL);
-__host__ int hbuff_randn(double *hbuffer, int64_t size, randstate_t *state = NULL);
+__host__ int hbuff_randn(double *hbuffer, int size, randstate_t *state = NULL);
-__host__ int hbuff_randint(int *hbuffer, int64_t size, int low, int high, randstate_t *state = NULL);
+__host__ int hbuff_randint(int *hbuffer, int size, int low, int high, randstate_t *state = NULL);
 //Operations to fill a device buffer with random values
-__host__ int dbuff_randf(float *hbuffer, int64_t size, randstate_t *state = NULL);
+__host__ int dbuff_randf(float *dbuffer, int size, randstate_t *state = NULL);
-__host__ int dbuff_rand(double *hbuffer, int64_t size, randstate_t *state = NULL);
+__host__ int dbuff_rand(double *dbuffer, int size, randstate_t *state = NULL);
-__host__ int dbuff_randnf(float *hbuffer, int64_t size, randstate_t *state = NULL);
+__host__ int dbuff_randnf(float *dbuffer, int size, randstate_t *state = NULL);
-__host__ int dbuff_randn(double *hbuffer, int64_t size, randstate_t *state = NULL);
+__host__ int dbuff_randn(double *dbuffer, int size, randstate_t *state = NULL);
-__host__ int dbuff_randint(int *hbuffer, int64_t size, int low, int high, randstate_t *state = NULL);
+__host__ int dbuff_randint(int *dbuffer, int size, int low, int high, randstate_t *state = NULL);
 //Tests
--- a/include/amsculib3/util/amscu_cputhreading.hpp
+++ b/include/amsculib3/util/amscu_cputhreading.hpp
@ -0,0 +1,22 @@
 #ifndef __AMSCU_CPUTHREADING_HPP__
 #define __AMSCU_CPUTHREADING_HPP__
 namespace amscuda
 {
 namespace util
 {
 int amscpu_cputhreading_threadplan(int probsize);
 //A template function that takes as input a function pointer and a series of arguments
 //The function is executed with fptr(threadnum, nthreads, otherargs...) with a dynamic number of threads
 //psize must be supplied, which will call amscpu_cputhreading_threadplan to determine number of threads to use for execution
 template<typename callable, typename ... argst> int threaded_execute(callable &&fptr, int64_t psize, argst&&... args);
 };
 };
 #include <amsculib3/util/amscu_cputhreading_impl.hpp>
 #endif 
--- a/include/amsculib3/util/amscu_cputhreading_impl.hpp
+++ b/include/amsculib3/util/amscu_cputhreading_impl.hpp
@ -0,0 +1,73 @@
 #ifndef __AMSCU_CPUTHREADING_IMPL_HPP__
 #define __AMSCU_CPUTHREADING_IMPL_HPP__
 namespace amscuda
 {
 namespace util
 {
 template<typename callable, typename ... argst> int threaded_execute(callable &&fptr, int64_t psize, argst&&... args)
 {
    int ret = amscu_success;
    int I;
    std::vector<std::thread*> threads;
    int nthreads = amscpu_cputhreading_threadplan(psize);
    if(nthreads<=1)
    {
        nthreads = 1;
        I = 0;
        // std::invoke(
        //     std::forward<callable>(fptr),
        //     I,
        //     nthreads,
        //     std::forward<argst>(args)...
        // );
        //std::invoke is a C++17 feature, and mingw8 complains even so.
        // Can I get away with just calling the functions?
        fptr(I,nthreads,std::forward<argst>(args)...);
    }
    else
    {
        threads.resize(nthreads);
        for(I=0;I<nthreads;I++) threads[I] = NULL;
        for(I=0;I<nthreads;I++)
        {
            threads[I] = new(std::nothrow) std::thread
            (
                std::forward<callable>(fptr),
                I,
                nthreads,
                std::forward<argst>(args)...
            );
        }
        for(I=0;I<nthreads;I++)
        {
            if(threads[I]==NULL)
            { //null thread creation failure check
                //printf("debug check!\n");
                ret = amscu_failure;
            }
        }
        for(I=0;I<nthreads;I++)
        {
            if(threads[I]!=NULL)
            {
                threads[I]->join();
                delete threads[I];
                threads[I] = NULL;
            }
        }
    }
    return ret;
 }
 };
 };
 #endif 
--- a/include/amsculib3/util/amscu_util.hpp
+++ b/include/amsculib3/util/amscu_util.hpp
@ -0,0 +1,15 @@
 #ifndef __AMSCU_UTIL_CUH__
 #define __AMSCU_UTIL_CUH__
 namespace amscuda
 {
 namespace util
 {
 };
 };
 #include <amsculib3/util/amscu_cputhreading.hpp>
 #endif 
--- a/src/amsculib3/random/amscurandom1.cu
+++ b/src/amsculib3/random/amscurandom1.cu
@ -8,12 +8,12 @@ namespace random
 {
    //Choosing xoroshiro64** as my default RNG due to 32 bit only operations 
-    randstate_t global_rand_cpustate = xs64ss_state();
+    randstate_t global_randstate = xs64ss_state();
    __host__ void rand_seed(const uint32_t seed)
    {
-        global_rand_cpustate = xs64ss_state(seed);
+        global_randstate = xs64ss_state(seed);
    }
    __host__ __device__ void rand_state_increment(const int32_t inc, randstate_t *state)
@ -22,7 +22,7 @@ namespace random
            // GPU-specific code (device path)
        #else
            // CPU-specific code (host path)
-            if(state==NULL) state = &global_rand_cpustate;
+            if(state==NULL) state = &global_randstate;
        #endif
        xoroshiro::xs64ss_state* s2 = (xoroshiro::xs64ss_state*)state;
        s2->low += inc;
@ -36,7 +36,7 @@ namespace random
            // GPU-specific code (device path)
        #else
            // CPU-specific code (host path)
-            if(state==NULL) state = &global_rand_cpustate;
+            if(state==NULL) state = &global_randstate;
        #endif
        xs64ss_next((xoroshiro::xs64ss_state*)state);
        return;
@ -49,7 +49,7 @@ namespace random
            // GPU-specific code (device path)
        #else
            // CPU-specific code (host path)
-            if(state==NULL) state = &global_rand_cpustate;
+            if(state==NULL) state = &global_randstate;
        #endif
        ret = xoroshiro::xs64ss_next((xoroshiro::xs64ss_state*)state);
@ -63,7 +63,7 @@ namespace random
            // GPU-specific code (device path)
        #else
            // CPU-specific code (host path)
-            if(state==NULL) state = &global_rand_cpustate;
+            if(state==NULL) state = &global_randstate;
        #endif
        int32_t q = (int)((randui32(state)>>1U)%(1U<<16U));
        ret = (q%(high-low))+low;
@ -77,7 +77,7 @@ namespace random
            // GPU-specific code (device path)
        #else
            // CPU-specific code (host path)
-            if(state==NULL) state = &global_rand_cpustate;
+            if(state==NULL) state = &global_randstate;
        #endif
        ret= ((float)randui32(state))/(4294967296.0f);
        return ret;
@ -90,7 +90,7 @@ namespace random
            // GPU-specific code (device path)
        #else
            // CPU-specific code (host path)
-            if(state==NULL) state = &global_rand_cpustate;
+            if(state==NULL) state = &global_randstate;
        #endif
        ret= ((double)randui32(state))/(4294967296.0f);
        return ret;
@ -103,7 +103,7 @@ namespace random
            // GPU-specific code (device path)
        #else
            // CPU-specific code (host path)
-            if(state==NULL) state = &global_rand_cpustate;
+            if(state==NULL) state = &global_randstate;
        #endif
        q1 = randf(state);
@ -118,7 +118,7 @@ namespace random
            // GPU-specific code (device path)
        #else
            // CPU-specific code (host path)
-            if(state==NULL) state = &global_rand_cpustate;
+            if(state==NULL) state = &global_randstate;
        #endif
        q1 = rand(state);
--- a/src/amsculib3/random/amscurandom1_hbuff.cu
+++ b/src/amsculib3/random/amscurandom1_hbuff.cu
@ -4,6 +4,290 @@ namespace amscuda
 {
 namespace random
 {
    template<typename dtype, typename randfunc> void rand_threadfunc1(
        int threadnum,
        int nthreads,
        randfunc &&rf,
        dtype *buffer,
        int64_t N,
        randstate_t *seed
    )
    {
        int64_t I,Is,I0,I1;
        Is = N/nthreads;
        if(Is<1) Is = 1;
        I0 = Is*(threadnum);
        I1 = Is*(threadnum+1);
        if(I1>N) I1 = N;
        for(I=I0;I<I1;I++)
        {
            buffer[I] = rf(seed);
        }
        return;
    }
    template<typename callable, typename randfunc, typename dtype> 
    int hbuff_rand_threadedexec(
        callable &&fptr, 
        randfunc &&rf,
        dtype *buffer, 
        int64_t N, 
        randstate_t *mainseed
    )
    {
        int ret = amscu_success;
        int I;
        std::vector<std::thread*> threads;
        std::vector<randstate_t> seeds;
        int nthreads = amscuda::util::amscpu_cputhreading_threadplan(N);
        if(nthreads<=1)
        {
            nthreads = 1;
            I = 0;
            fptr(I,nthreads,rf,buffer,N,mainseed);
        }
        else
        {
            threads.resize(nthreads);
            seeds.resize(nthreads);
            for(I=0;I<nthreads;I++)
            {
                seeds[I] = *mainseed;
                rand_state_increment(I,&seeds[I]);
            }
            rand_state_increment(I,mainseed);
            for(I=0;I<nthreads;I++) threads[I] = NULL;
            for(I=0;I<nthreads;I++)
            {
                threads[I] = new(std::nothrow) std::thread
                (
                    std::forward<callable>(fptr),
                    I,
                    nthreads,
                    rf,
                    buffer,
                    N,
                    &seeds[I]
                );
            }
            for(I=0;I<nthreads;I++)
            {
                if(threads[I]==NULL)
                { //null thread creation failure check
                    ret = amscu_failure;
                }
            }
            for(I=0;I<nthreads;I++)
            {
                if(threads[I]!=NULL)
                {
                    threads[I]->join();
                    delete threads[I];
                    threads[I] = NULL;
                }
            }
        }
        return ret;
    }
    template<typename dtype, typename rngbnd, typename randfunc> void rand_threadfunc2(
        int threadnum,
        int nthreads,
        randfunc &&rf,
        dtype *buffer,
        int64_t N,
        rngbnd min,
        rngbnd max,
        randstate_t *seed
    )
    {
        int64_t I,Is,I0,I1;
        Is = N/nthreads;
        if(Is<1) Is = 1;
        I0 = Is*(threadnum);
        I1 = Is*(threadnum+1);
        if(I1>N) I1 = N;
        for(I=I0;I<I1;I++)
        {
            buffer[I] = rf(min,max,seed);
        }
        return;
    }
    template<typename callable, typename randfunc, typename rngbnd, typename dtype> 
    int hbuff_rand_threadedexec2(
        callable &&fptr, 
        randfunc &&rf,
        dtype *buffer, 
        int64_t N, 
        rngbnd min,
        rngbnd max,
        randstate_t *mainseed
    )
    {
        int ret = amscu_success;
        int I;
        std::vector<std::thread*> threads;
        std::vector<randstate_t> seeds;
        int nthreads = amscuda::util::amscpu_cputhreading_threadplan(N);
        if(nthreads<=1)
        {
            nthreads = 1;
            I = 0;
            fptr(I,nthreads,rf,buffer,N,min,max,mainseed);
        }
        else
        {
            threads.resize(nthreads);
            seeds.resize(nthreads);
            for(I=0;I<nthreads;I++)
            {
                seeds[I] = *mainseed;
                rand_state_increment(I,&seeds[I]);
            }
            rand_state_increment(I,mainseed);
            for(I=0;I<nthreads;I++) threads[I] = NULL;
            for(I=0;I<nthreads;I++)
            {
                threads[I] = new(std::nothrow) std::thread
                (
                    std::forward<callable>(fptr),
                    I,
                    nthreads,
                    rf,
                    buffer,
                    N,
                    min,max,
                    &seeds[I]
                );
            }
            for(I=0;I<nthreads;I++)
            {
                if(threads[I]==NULL)
                { //null thread creation failure check
                    ret = amscu_failure;
                }
            }
            for(I=0;I<nthreads;I++)
            {
                if(threads[I]!=NULL)
                {
                    threads[I]->join();
                    delete threads[I];
                    threads[I] = NULL;
                }
            }
        }
        return ret;
    }
    __host__ int hbuff_randf(float *hbuffer, int size, randstate_t *state)
    {
        int ret = amscu_success;
        int res;
        if(state==NULL) state = &global_randstate;
        res = hbuff_rand_threadedexec(
            rand_threadfunc1<float, float (*)(amscuda::random::randstate_t*)>,
            randf,
            hbuffer,
            (int64_t) size,
            state
        );
        if(res!=amscu_success) ret = amscu_failure;
        return ret;
    }
    __host__ int hbuff_rand(double *hbuffer, int size, randstate_t *state)
    {
        int ret = amscu_success;
        int res;
        if(state==NULL) state = &global_randstate;
        res = hbuff_rand_threadedexec(
            rand_threadfunc1<double, double (*)(amscuda::random::randstate_t*)>,
            rand,
            hbuffer,
            (int64_t) size,
            state
        );
        if(res!=amscu_success) ret = amscu_failure;
        return ret;
    }
    __host__ int hbuff_randnf(float *hbuffer, int size, randstate_t *state)
    {
        int ret = amscu_success;
        int res;
        if(state==NULL) state = &global_randstate;
        res = hbuff_rand_threadedexec(
            rand_threadfunc1<float, float (*)(amscuda::random::randstate_t*)>,
            randnf,
            hbuffer,
            (int64_t) size,
            state
        );
        if(res!=amscu_success) ret = amscu_failure;
        return ret;
    }
    __host__ int hbuff_randn(double *hbuffer, int size, randstate_t *state)
    {
        int ret = amscu_success;
        int res;
        if(state==NULL) state = &global_randstate;
        res = hbuff_rand_threadedexec(
            rand_threadfunc1<double, double (*)(amscuda::random::randstate_t*)>,
            randn,
            hbuffer,
            (int64_t) size,
            state
        );
        if(res!=amscu_success) ret = amscu_failure;
        return ret;
    }
    __host__ int hbuff_randint(int *hbuffer, int size, int low, int high, randstate_t *state)
    {
        int ret = amscu_success;
        int res;
        if(state==NULL) state = &global_randstate;
        res = hbuff_rand_threadedexec2(
            rand_threadfunc2<int, int, int (*)(int, int, amscuda::random::randstate_t*)>,
            randint,
            hbuffer,
            (int64_t) size,
            low,high,
            state
        );
        if(res!=amscu_success) ret = amscu_failure;
        return ret;
    }
 }; //end namespaces
 };
--- a/src/amsculib3/random/amscurandom_tests1.cu
+++ b/src/amsculib3/random/amscurandom_tests1.cu
@ -5,55 +5,27 @@ namespace amscuda
 namespace random
 {
-    __host__ void amscurand_tests1()
+    __host__ void amscurand_tests1_randstat1()
    {
        using namespace random;
        printf("Random number generator basic function test:\n");
        int I;
        // uint32_t q1;
        // float q2;
        // double q3;
        // int q4;
        rand_seed(0);
        printf("from seed 0...\n");
        for(I=0;I<10;I++)
        {
            printf("randui32[%d] = %u\n",I,randui32());
        }
        rand_seed(0);
        printf("from seed 0...\n");
        for(I=0;I<10;I++)
        {
            printf("randf[%d] = %1.3f\n",I,randf());
        }
        rand_seed(0);
        printf("from seed 0...\n");
        for(I=0;I<10;I++)
        {
            printf("randf[%d] = %1.3f\n",I,rand());
        }
        float q0,qlast,qmindelta,qmaxdelta,qmx,qmn,qmean,qstd,qsum,qsumsq;
        int N = 100000;
-        uint32_t seed= 34533623;
+        uint32_t seed= 2;
        rand_seed(seed);
        printf("from seed %u...\n",seed);
-        q0 = randf();
+        q0 = randnf();
        qmx = q0;
        qmn = q0;
        qsum = 0.0f;
        qsumsq = 0.0f;
        qlast = q0;
-        q0 = randf();
+        q0 = randnf();
        qmindelta = ::fabsf(q0-qlast);
        qmaxdelta = ::fabsf(q0-qlast);
        for(I=0;I<N;I++)
        {
            qlast = q0;
-            q0 = randf();
+            q0 = randnf();
            qsum += q0;
            qsumsq += q0*q0;
@ -66,13 +38,93 @@ namespace random
        qmean = qsum/((float)N);
        qstd = ::sqrtf((qsumsq-qsum*qsum/((float)N))/((float)N));
-        printf("Statistics for randf() N=%d\n",N);
+        printf("Statistics for randnf() N=%d\n",N);
        printf("\tmin: %1.3g\n",qmn);
        printf("\tmax: %1.3g\n",qmx);
        printf("\tmean: %1.3g\n",qmean);
        printf("\tstdev: %1.3g\n",qstd);
        printf("\tmindelta: %1.3g\n",qmindelta);
        printf("\tmaxdelta: %1.3g\n",qmaxdelta);
    }
    __host__ void amscurand_tests1_randstat2()
    {
        int I;
        double q0,qlast,qmindelta,qmaxdelta,qmx,qmn,qmean,qstd,qsum,qsumsq;
        int N = 100000;
        uint32_t seed= 2;
        rand_seed(seed);
        printf("from seed %u...\n",seed);
        q0 = randn();
        qmx = q0;
        qmn = q0;
        qsum = 0.0f;
        qsumsq = 0.0f;
        qlast = q0;
        q0 = randn();
        qmindelta = ::fabs(q0-qlast);
        qmaxdelta = ::fabs(q0-qlast);
        for(I=0;I<N;I++)
        {
            qlast = q0;
            q0 = randn();
            qsum += q0;
            qsumsq += q0*q0;
            if(q0>qmx) qmx = q0;
            if(q0<qmn) qmn = q0;
            if(::fabs(q0-qlast)<qmindelta) qmindelta = ::fabs(q0-qlast);
            if(::fabs(q0-qlast)>qmaxdelta) qmaxdelta = ::fabs(q0-qlast);
        }
        qmean = qsum/((double)N);
        qstd = ::sqrt((qsumsq-qsum*qsum/((double)N))/((double)N));
        printf("Statistics for randn() N=%d\n",N);
        printf("\tmin: %1.6g\n",qmn);
        printf("\tmax: %1.6g\n",qmx);
        printf("\tmean: %1.6g\n",qmean);
        printf("\tstdev: %1.6g\n",qstd);
        printf("\tmindelta: %1.6g\n",qmindelta);
        printf("\tmaxdelta: %1.6g\n",qmaxdelta);
    }
    __host__ void amscurand_tests1()
    {
        printf("Random number generator basic function test:\n");
        int I;
        // uint32_t q1;
        // float q2;
        // double q3;
        // int q4;
        rand_seed(1);
        printf("from seed 1...\n");
        for(I=0;I<10;I++)
        {
            printf("randui32[%d] = %u\n",I,randui32());
        }
        rand_seed(1);
        printf("from seed 1...\n");
        for(I=0;I<10;I++)
        {
            printf("randf[%d] = %1.3f\n",I,randf());
        }
        rand_seed(0);
        printf("from seed 0...\n");
        for(I=0;I<10;I++)
        {
            printf("randf[%d] = %1.3f\n",I,rand());
        }
        amscurand_tests1_randstat1();
        amscurand_tests1_randstat2();
        return;
--- a/src/amsculib3/random/amsxoroshiro.cu
+++ b/src/amsculib3/random/amsxoroshiro.cu
@ -100,7 +100,7 @@ namespace xoroshiro
    __host__ __device__  void xs128pp_jump(xs128pp_state* state)
    {
-        static const uint64_t JUMP[] = { 0xdf900294d8f554a5, 0x170865df4b3201fc };
+        AMSCU_CONST static const uint64_t JUMP[] = { 0xdf900294d8f554a5, 0x170865df4b3201fc };
        uint64_t low = 0;
        uint64_t high = 0;
        int I;
--- a/src/amsculib3/util/amscu_cputhreading.cu
+++ b/src/amsculib3/util/amscu_cputhreading.cu
@ -0,0 +1,26 @@
 #include <amsculib3/amsculib3.hpp>
 namespace amscuda
 {
 namespace util
 {
    int amscpu_cputhreading_threadplan(int probsize)
    {
        int nthreads = 1;
        int ta = std::thread::hardware_concurrency();
        if(probsize>1024)
        {
            nthreads = probsize/1024;
            if(nthreads>ta-2) nthreads = ta-2;
            if(nthreads>amscuda::amscu_defcputhreads) nthreads = amscuda::amscu_defcputhreads;
            if(nthreads<1) nthreads = 1;
        }
        return nthreads;
    }
 };
 };
--- a/src/main.cu
+++ b/src/main.cu
@ -23,7 +23,7 @@ int main(int argc, char* argv[])
    //test_amscurarray1();
-    random::amscurand_tests1();
+    //random::amscurand_tests1();
    return 0;
 }