host random buffer fillers
This commit is contained in:
@ -17,7 +17,7 @@ builddir = "./build_linux64"
|
|||||||
doinstall = True #copies the build_output to the install dir when finished
|
doinstall = True #copies the build_output to the install dir when finished
|
||||||
cc = "nvcc" #compiler
|
cc = "nvcc" #compiler
|
||||||
cflags = "-dc --compiler-options '-fPIC -O3'"
|
cflags = "-dc --compiler-options '-fPIC -O3'"
|
||||||
libraries = "-l{}".format(libname)
|
libraries = "-l{} -lpthread".format(libname)
|
||||||
libdirs = "-L{} -L{}/lib -L{}/lib".format(builddir,commondir,depdir)
|
libdirs = "-L{} -L{}/lib -L{}/lib".format(builddir,commondir,depdir)
|
||||||
linkerflags = " -Xlinker=-rpath,."
|
linkerflags = " -Xlinker=-rpath,."
|
||||||
srcexts = [".c",".cpp",".cu"]
|
srcexts = [".c",".cpp",".cu"]
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
build_linux64/objstore/amscu_cputhreading.o
Normal file
BIN
build_linux64/objstore/amscu_cputhreading.o
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -7,7 +7,12 @@
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
//C++ standard library headers
|
||||||
#include <new>
|
#include <new>
|
||||||
|
#include <thread>
|
||||||
|
#include <functional>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
#include <cuda_runtime_api.h> //where all the cuda functions live
|
#include <cuda_runtime_api.h> //where all the cuda functions live
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
@ -41,12 +46,18 @@ namespace amscuda
|
|||||||
//default numthreads to execute on cpu
|
//default numthreads to execute on cpu
|
||||||
AMSCU_CONST static const int amscu_defcputhreads = 8;
|
AMSCU_CONST static const int amscu_defcputhreads = 8;
|
||||||
|
|
||||||
|
AMSCU_CONST static const int amscu_success = 1;
|
||||||
|
AMSCU_CONST static const int amscu_meh = 0;
|
||||||
|
AMSCU_CONST static const int amscu_failure = -1;
|
||||||
|
|
||||||
|
|
||||||
}; //end namespace amscuda
|
}; //end namespace amscuda
|
||||||
|
|
||||||
//Components
|
//Components
|
||||||
#include <amsculib3/amscu_cudafunctions.hpp>
|
#include <amsculib3/amscu_cudafunctions.hpp>
|
||||||
#include <amsculib3/math/amscumath.hpp>
|
#include <amsculib3/math/amscumath.hpp>
|
||||||
#include <amsculib3/geom/amscugeom.hpp>
|
#include <amsculib3/geom/amscugeom.hpp>
|
||||||
|
#include <amsculib3/util/amscu_util.hpp>
|
||||||
|
|
||||||
#include <amsculib3/amscuarray.hpp>
|
#include <amsculib3/amscuarray.hpp>
|
||||||
#include <amsculib3/amscuda_binarrrw.hpp>
|
#include <amsculib3/amscuda_binarrrw.hpp>
|
||||||
|
|||||||
@ -34,18 +34,18 @@ __host__ __device__ float randnf(randstate_t *state = NULL);
|
|||||||
__host__ __device__ double randn(randstate_t *state = NULL);
|
__host__ __device__ double randn(randstate_t *state = NULL);
|
||||||
|
|
||||||
//Operations to fill a host buffer with random values
|
//Operations to fill a host buffer with random values
|
||||||
__host__ int hbuff_randf(float *hbuffer, int64_t size, randstate_t *state = NULL);
|
__host__ int hbuff_randf(float *hbuffer, int size, randstate_t *state = NULL);
|
||||||
__host__ int hbuff_rand(double *hbuffer, int64_t size, randstate_t *state = NULL);
|
__host__ int hbuff_rand(double *hbuffer, int size, randstate_t *state = NULL);
|
||||||
__host__ int hbuff_randnf(float *hbuffer, int64_t size, randstate_t *state = NULL);
|
__host__ int hbuff_randnf(float *hbuffer, int size, randstate_t *state = NULL);
|
||||||
__host__ int hbuff_randn(double *hbuffer, int64_t size, randstate_t *state = NULL);
|
__host__ int hbuff_randn(double *hbuffer, int size, randstate_t *state = NULL);
|
||||||
__host__ int hbuff_randint(int *hbuffer, int64_t size, int low, int high, randstate_t *state = NULL);
|
__host__ int hbuff_randint(int *hbuffer, int size, int low, int high, randstate_t *state = NULL);
|
||||||
|
|
||||||
//Operations to fill a device buffer with random values
|
//Operations to fill a device buffer with random values
|
||||||
__host__ int dbuff_randf(float *hbuffer, int64_t size, randstate_t *state = NULL);
|
__host__ int dbuff_randf(float *dbuffer, int size, randstate_t *state = NULL);
|
||||||
__host__ int dbuff_rand(double *hbuffer, int64_t size, randstate_t *state = NULL);
|
__host__ int dbuff_rand(double *dbuffer, int size, randstate_t *state = NULL);
|
||||||
__host__ int dbuff_randnf(float *hbuffer, int64_t size, randstate_t *state = NULL);
|
__host__ int dbuff_randnf(float *dbuffer, int size, randstate_t *state = NULL);
|
||||||
__host__ int dbuff_randn(double *hbuffer, int64_t size, randstate_t *state = NULL);
|
__host__ int dbuff_randn(double *dbuffer, int size, randstate_t *state = NULL);
|
||||||
__host__ int dbuff_randint(int *hbuffer, int64_t size, int low, int high, randstate_t *state = NULL);
|
__host__ int dbuff_randint(int *dbuffer, int size, int low, int high, randstate_t *state = NULL);
|
||||||
|
|
||||||
|
|
||||||
//Tests
|
//Tests
|
||||||
|
|||||||
22
include/amsculib3/util/amscu_cputhreading.hpp
Normal file
22
include/amsculib3/util/amscu_cputhreading.hpp
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
#ifndef __AMSCU_CPUTHREADING_HPP__
|
||||||
|
#define __AMSCU_CPUTHREADING_HPP__
|
||||||
|
|
||||||
|
namespace amscuda
|
||||||
|
{
|
||||||
|
namespace util
|
||||||
|
{
|
||||||
|
|
||||||
|
int amscpu_cputhreading_threadplan(int probsize);
|
||||||
|
|
||||||
|
//A template function that takes as input a function pointer and a series of arguments
|
||||||
|
//The function is executed with fptr(threadnum, nthreads, otherargs...) with a dynamic number of threads
|
||||||
|
//psize must be supplied, which will call amscpu_cputhreading_threadplan to determine number of threads to use for execution
|
||||||
|
template<typename callable, typename ... argst> int threaded_execute(callable &&fptr, int64_t psize, argst&&... args);
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
#include <amsculib3/util/amscu_cputhreading_impl.hpp>
|
||||||
|
|
||||||
|
#endif
|
||||||
73
include/amsculib3/util/amscu_cputhreading_impl.hpp
Normal file
73
include/amsculib3/util/amscu_cputhreading_impl.hpp
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
#ifndef __AMSCU_CPUTHREADING_IMPL_HPP__
|
||||||
|
#define __AMSCU_CPUTHREADING_IMPL_HPP__
|
||||||
|
|
||||||
|
namespace amscuda
|
||||||
|
{
|
||||||
|
namespace util
|
||||||
|
{
|
||||||
|
|
||||||
|
template<typename callable, typename ... argst> int threaded_execute(callable &&fptr, int64_t psize, argst&&... args)
|
||||||
|
{
|
||||||
|
int ret = amscu_success;
|
||||||
|
|
||||||
|
int I;
|
||||||
|
std::vector<std::thread*> threads;
|
||||||
|
int nthreads = amscpu_cputhreading_threadplan(psize);
|
||||||
|
|
||||||
|
if(nthreads<=1)
|
||||||
|
{
|
||||||
|
nthreads = 1;
|
||||||
|
I = 0;
|
||||||
|
// std::invoke(
|
||||||
|
// std::forward<callable>(fptr),
|
||||||
|
// I,
|
||||||
|
// nthreads,
|
||||||
|
// std::forward<argst>(args)...
|
||||||
|
// );
|
||||||
|
|
||||||
|
//std::invoke is a C++17 feature, and mingw8 complains even so.
|
||||||
|
// Can I get away with just calling the functions?
|
||||||
|
fptr(I,nthreads,std::forward<argst>(args)...);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
threads.resize(nthreads);
|
||||||
|
for(I=0;I<nthreads;I++) threads[I] = NULL;
|
||||||
|
for(I=0;I<nthreads;I++)
|
||||||
|
{
|
||||||
|
threads[I] = new(std::nothrow) std::thread
|
||||||
|
(
|
||||||
|
std::forward<callable>(fptr),
|
||||||
|
I,
|
||||||
|
nthreads,
|
||||||
|
std::forward<argst>(args)...
|
||||||
|
);
|
||||||
|
}
|
||||||
|
for(I=0;I<nthreads;I++)
|
||||||
|
{
|
||||||
|
if(threads[I]==NULL)
|
||||||
|
{ //null thread creation failure check
|
||||||
|
//printf("debug check!\n");
|
||||||
|
ret = amscu_failure;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(I=0;I<nthreads;I++)
|
||||||
|
{
|
||||||
|
if(threads[I]!=NULL)
|
||||||
|
{
|
||||||
|
threads[I]->join();
|
||||||
|
delete threads[I];
|
||||||
|
threads[I] = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
15
include/amsculib3/util/amscu_util.hpp
Normal file
15
include/amsculib3/util/amscu_util.hpp
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#ifndef __AMSCU_UTIL_CUH__
|
||||||
|
#define __AMSCU_UTIL_CUH__
|
||||||
|
|
||||||
|
namespace amscuda
|
||||||
|
{
|
||||||
|
namespace util
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
#include <amsculib3/util/amscu_cputhreading.hpp>
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -8,12 +8,12 @@ namespace random
|
|||||||
{
|
{
|
||||||
|
|
||||||
//Choosing xoroshiro64** as my default RNG due to 32 bit only operations
|
//Choosing xoroshiro64** as my default RNG due to 32 bit only operations
|
||||||
randstate_t global_rand_cpustate = xs64ss_state();
|
randstate_t global_randstate = xs64ss_state();
|
||||||
|
|
||||||
|
|
||||||
__host__ void rand_seed(const uint32_t seed)
|
__host__ void rand_seed(const uint32_t seed)
|
||||||
{
|
{
|
||||||
global_rand_cpustate = xs64ss_state(seed);
|
global_randstate = xs64ss_state(seed);
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__ void rand_state_increment(const int32_t inc, randstate_t *state)
|
__host__ __device__ void rand_state_increment(const int32_t inc, randstate_t *state)
|
||||||
@ -22,7 +22,7 @@ namespace random
|
|||||||
// GPU-specific code (device path)
|
// GPU-specific code (device path)
|
||||||
#else
|
#else
|
||||||
// CPU-specific code (host path)
|
// CPU-specific code (host path)
|
||||||
if(state==NULL) state = &global_rand_cpustate;
|
if(state==NULL) state = &global_randstate;
|
||||||
#endif
|
#endif
|
||||||
xoroshiro::xs64ss_state* s2 = (xoroshiro::xs64ss_state*)state;
|
xoroshiro::xs64ss_state* s2 = (xoroshiro::xs64ss_state*)state;
|
||||||
s2->low += inc;
|
s2->low += inc;
|
||||||
@ -36,7 +36,7 @@ namespace random
|
|||||||
// GPU-specific code (device path)
|
// GPU-specific code (device path)
|
||||||
#else
|
#else
|
||||||
// CPU-specific code (host path)
|
// CPU-specific code (host path)
|
||||||
if(state==NULL) state = &global_rand_cpustate;
|
if(state==NULL) state = &global_randstate;
|
||||||
#endif
|
#endif
|
||||||
xs64ss_next((xoroshiro::xs64ss_state*)state);
|
xs64ss_next((xoroshiro::xs64ss_state*)state);
|
||||||
return;
|
return;
|
||||||
@ -49,7 +49,7 @@ namespace random
|
|||||||
// GPU-specific code (device path)
|
// GPU-specific code (device path)
|
||||||
#else
|
#else
|
||||||
// CPU-specific code (host path)
|
// CPU-specific code (host path)
|
||||||
if(state==NULL) state = &global_rand_cpustate;
|
if(state==NULL) state = &global_randstate;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ret = xoroshiro::xs64ss_next((xoroshiro::xs64ss_state*)state);
|
ret = xoroshiro::xs64ss_next((xoroshiro::xs64ss_state*)state);
|
||||||
@ -63,7 +63,7 @@ namespace random
|
|||||||
// GPU-specific code (device path)
|
// GPU-specific code (device path)
|
||||||
#else
|
#else
|
||||||
// CPU-specific code (host path)
|
// CPU-specific code (host path)
|
||||||
if(state==NULL) state = &global_rand_cpustate;
|
if(state==NULL) state = &global_randstate;
|
||||||
#endif
|
#endif
|
||||||
int32_t q = (int)((randui32(state)>>1U)%(1U<<16U));
|
int32_t q = (int)((randui32(state)>>1U)%(1U<<16U));
|
||||||
ret = (q%(high-low))+low;
|
ret = (q%(high-low))+low;
|
||||||
@ -77,7 +77,7 @@ namespace random
|
|||||||
// GPU-specific code (device path)
|
// GPU-specific code (device path)
|
||||||
#else
|
#else
|
||||||
// CPU-specific code (host path)
|
// CPU-specific code (host path)
|
||||||
if(state==NULL) state = &global_rand_cpustate;
|
if(state==NULL) state = &global_randstate;
|
||||||
#endif
|
#endif
|
||||||
ret= ((float)randui32(state))/(4294967296.0f);
|
ret= ((float)randui32(state))/(4294967296.0f);
|
||||||
return ret;
|
return ret;
|
||||||
@ -90,7 +90,7 @@ namespace random
|
|||||||
// GPU-specific code (device path)
|
// GPU-specific code (device path)
|
||||||
#else
|
#else
|
||||||
// CPU-specific code (host path)
|
// CPU-specific code (host path)
|
||||||
if(state==NULL) state = &global_rand_cpustate;
|
if(state==NULL) state = &global_randstate;
|
||||||
#endif
|
#endif
|
||||||
ret= ((double)randui32(state))/(4294967296.0f);
|
ret= ((double)randui32(state))/(4294967296.0f);
|
||||||
return ret;
|
return ret;
|
||||||
@ -103,7 +103,7 @@ namespace random
|
|||||||
// GPU-specific code (device path)
|
// GPU-specific code (device path)
|
||||||
#else
|
#else
|
||||||
// CPU-specific code (host path)
|
// CPU-specific code (host path)
|
||||||
if(state==NULL) state = &global_rand_cpustate;
|
if(state==NULL) state = &global_randstate;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
q1 = randf(state);
|
q1 = randf(state);
|
||||||
@ -118,7 +118,7 @@ namespace random
|
|||||||
// GPU-specific code (device path)
|
// GPU-specific code (device path)
|
||||||
#else
|
#else
|
||||||
// CPU-specific code (host path)
|
// CPU-specific code (host path)
|
||||||
if(state==NULL) state = &global_rand_cpustate;
|
if(state==NULL) state = &global_randstate;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
q1 = rand(state);
|
q1 = rand(state);
|
||||||
|
|||||||
@ -4,6 +4,290 @@ namespace amscuda
|
|||||||
{
|
{
|
||||||
namespace random
|
namespace random
|
||||||
{
|
{
|
||||||
|
template<typename dtype, typename randfunc> void rand_threadfunc1(
|
||||||
|
int threadnum,
|
||||||
|
int nthreads,
|
||||||
|
randfunc &&rf,
|
||||||
|
dtype *buffer,
|
||||||
|
int64_t N,
|
||||||
|
randstate_t *seed
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int64_t I,Is,I0,I1;
|
||||||
|
|
||||||
|
Is = N/nthreads;
|
||||||
|
if(Is<1) Is = 1;
|
||||||
|
I0 = Is*(threadnum);
|
||||||
|
I1 = Is*(threadnum+1);
|
||||||
|
if(I1>N) I1 = N;
|
||||||
|
|
||||||
|
for(I=I0;I<I1;I++)
|
||||||
|
{
|
||||||
|
buffer[I] = rf(seed);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename callable, typename randfunc, typename dtype>
|
||||||
|
int hbuff_rand_threadedexec(
|
||||||
|
callable &&fptr,
|
||||||
|
randfunc &&rf,
|
||||||
|
dtype *buffer,
|
||||||
|
int64_t N,
|
||||||
|
randstate_t *mainseed
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int ret = amscu_success;
|
||||||
|
|
||||||
|
int I;
|
||||||
|
std::vector<std::thread*> threads;
|
||||||
|
std::vector<randstate_t> seeds;
|
||||||
|
int nthreads = amscuda::util::amscpu_cputhreading_threadplan(N);
|
||||||
|
|
||||||
|
if(nthreads<=1)
|
||||||
|
{
|
||||||
|
nthreads = 1;
|
||||||
|
I = 0;
|
||||||
|
fptr(I,nthreads,rf,buffer,N,mainseed);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
threads.resize(nthreads);
|
||||||
|
seeds.resize(nthreads);
|
||||||
|
for(I=0;I<nthreads;I++)
|
||||||
|
{
|
||||||
|
seeds[I] = *mainseed;
|
||||||
|
rand_state_increment(I,&seeds[I]);
|
||||||
|
}
|
||||||
|
rand_state_increment(I,mainseed);
|
||||||
|
|
||||||
|
for(I=0;I<nthreads;I++) threads[I] = NULL;
|
||||||
|
for(I=0;I<nthreads;I++)
|
||||||
|
{
|
||||||
|
threads[I] = new(std::nothrow) std::thread
|
||||||
|
(
|
||||||
|
std::forward<callable>(fptr),
|
||||||
|
I,
|
||||||
|
nthreads,
|
||||||
|
rf,
|
||||||
|
buffer,
|
||||||
|
N,
|
||||||
|
&seeds[I]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
for(I=0;I<nthreads;I++)
|
||||||
|
{
|
||||||
|
if(threads[I]==NULL)
|
||||||
|
{ //null thread creation failure check
|
||||||
|
ret = amscu_failure;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(I=0;I<nthreads;I++)
|
||||||
|
{
|
||||||
|
if(threads[I]!=NULL)
|
||||||
|
{
|
||||||
|
threads[I]->join();
|
||||||
|
delete threads[I];
|
||||||
|
threads[I] = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename dtype, typename rngbnd, typename randfunc> void rand_threadfunc2(
|
||||||
|
int threadnum,
|
||||||
|
int nthreads,
|
||||||
|
randfunc &&rf,
|
||||||
|
dtype *buffer,
|
||||||
|
int64_t N,
|
||||||
|
rngbnd min,
|
||||||
|
rngbnd max,
|
||||||
|
randstate_t *seed
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int64_t I,Is,I0,I1;
|
||||||
|
|
||||||
|
Is = N/nthreads;
|
||||||
|
if(Is<1) Is = 1;
|
||||||
|
I0 = Is*(threadnum);
|
||||||
|
I1 = Is*(threadnum+1);
|
||||||
|
if(I1>N) I1 = N;
|
||||||
|
|
||||||
|
for(I=I0;I<I1;I++)
|
||||||
|
{
|
||||||
|
buffer[I] = rf(min,max,seed);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename callable, typename randfunc, typename rngbnd, typename dtype>
|
||||||
|
int hbuff_rand_threadedexec2(
|
||||||
|
callable &&fptr,
|
||||||
|
randfunc &&rf,
|
||||||
|
dtype *buffer,
|
||||||
|
int64_t N,
|
||||||
|
rngbnd min,
|
||||||
|
rngbnd max,
|
||||||
|
randstate_t *mainseed
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int ret = amscu_success;
|
||||||
|
|
||||||
|
int I;
|
||||||
|
std::vector<std::thread*> threads;
|
||||||
|
std::vector<randstate_t> seeds;
|
||||||
|
int nthreads = amscuda::util::amscpu_cputhreading_threadplan(N);
|
||||||
|
|
||||||
|
if(nthreads<=1)
|
||||||
|
{
|
||||||
|
nthreads = 1;
|
||||||
|
I = 0;
|
||||||
|
fptr(I,nthreads,rf,buffer,N,min,max,mainseed);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
threads.resize(nthreads);
|
||||||
|
seeds.resize(nthreads);
|
||||||
|
for(I=0;I<nthreads;I++)
|
||||||
|
{
|
||||||
|
seeds[I] = *mainseed;
|
||||||
|
rand_state_increment(I,&seeds[I]);
|
||||||
|
}
|
||||||
|
rand_state_increment(I,mainseed);
|
||||||
|
|
||||||
|
for(I=0;I<nthreads;I++) threads[I] = NULL;
|
||||||
|
for(I=0;I<nthreads;I++)
|
||||||
|
{
|
||||||
|
threads[I] = new(std::nothrow) std::thread
|
||||||
|
(
|
||||||
|
std::forward<callable>(fptr),
|
||||||
|
I,
|
||||||
|
nthreads,
|
||||||
|
rf,
|
||||||
|
buffer,
|
||||||
|
N,
|
||||||
|
min,max,
|
||||||
|
&seeds[I]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
for(I=0;I<nthreads;I++)
|
||||||
|
{
|
||||||
|
if(threads[I]==NULL)
|
||||||
|
{ //null thread creation failure check
|
||||||
|
ret = amscu_failure;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(I=0;I<nthreads;I++)
|
||||||
|
{
|
||||||
|
if(threads[I]!=NULL)
|
||||||
|
{
|
||||||
|
threads[I]->join();
|
||||||
|
delete threads[I];
|
||||||
|
threads[I] = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ int hbuff_randf(float *hbuffer, int size, randstate_t *state)
|
||||||
|
{
|
||||||
|
int ret = amscu_success;
|
||||||
|
int res;
|
||||||
|
if(state==NULL) state = &global_randstate;
|
||||||
|
|
||||||
|
res = hbuff_rand_threadedexec(
|
||||||
|
rand_threadfunc1<float, float (*)(amscuda::random::randstate_t*)>,
|
||||||
|
randf,
|
||||||
|
hbuffer,
|
||||||
|
(int64_t) size,
|
||||||
|
state
|
||||||
|
);
|
||||||
|
|
||||||
|
if(res!=amscu_success) ret = amscu_failure;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ int hbuff_rand(double *hbuffer, int size, randstate_t *state)
|
||||||
|
{
|
||||||
|
int ret = amscu_success;
|
||||||
|
int res;
|
||||||
|
if(state==NULL) state = &global_randstate;
|
||||||
|
|
||||||
|
res = hbuff_rand_threadedexec(
|
||||||
|
rand_threadfunc1<double, double (*)(amscuda::random::randstate_t*)>,
|
||||||
|
rand,
|
||||||
|
hbuffer,
|
||||||
|
(int64_t) size,
|
||||||
|
state
|
||||||
|
);
|
||||||
|
|
||||||
|
if(res!=amscu_success) ret = amscu_failure;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ int hbuff_randnf(float *hbuffer, int size, randstate_t *state)
|
||||||
|
{
|
||||||
|
int ret = amscu_success;
|
||||||
|
int res;
|
||||||
|
if(state==NULL) state = &global_randstate;
|
||||||
|
|
||||||
|
res = hbuff_rand_threadedexec(
|
||||||
|
rand_threadfunc1<float, float (*)(amscuda::random::randstate_t*)>,
|
||||||
|
randnf,
|
||||||
|
hbuffer,
|
||||||
|
(int64_t) size,
|
||||||
|
state
|
||||||
|
);
|
||||||
|
|
||||||
|
if(res!=amscu_success) ret = amscu_failure;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ int hbuff_randn(double *hbuffer, int size, randstate_t *state)
|
||||||
|
{
|
||||||
|
int ret = amscu_success;
|
||||||
|
int res;
|
||||||
|
if(state==NULL) state = &global_randstate;
|
||||||
|
|
||||||
|
res = hbuff_rand_threadedexec(
|
||||||
|
rand_threadfunc1<double, double (*)(amscuda::random::randstate_t*)>,
|
||||||
|
randn,
|
||||||
|
hbuffer,
|
||||||
|
(int64_t) size,
|
||||||
|
state
|
||||||
|
);
|
||||||
|
|
||||||
|
if(res!=amscu_success) ret = amscu_failure;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ int hbuff_randint(int *hbuffer, int size, int low, int high, randstate_t *state)
|
||||||
|
{
|
||||||
|
int ret = amscu_success;
|
||||||
|
int res;
|
||||||
|
if(state==NULL) state = &global_randstate;
|
||||||
|
|
||||||
|
res = hbuff_rand_threadedexec2(
|
||||||
|
rand_threadfunc2<int, int, int (*)(int, int, amscuda::random::randstate_t*)>,
|
||||||
|
randint,
|
||||||
|
hbuffer,
|
||||||
|
(int64_t) size,
|
||||||
|
low,high,
|
||||||
|
state
|
||||||
|
);
|
||||||
|
|
||||||
|
if(res!=amscu_success) ret = amscu_failure;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
}; //end namespaces
|
}; //end namespaces
|
||||||
};
|
};
|
||||||
@ -5,55 +5,27 @@ namespace amscuda
|
|||||||
namespace random
|
namespace random
|
||||||
{
|
{
|
||||||
|
|
||||||
__host__ void amscurand_tests1()
|
__host__ void amscurand_tests1_randstat1()
|
||||||
{
|
{
|
||||||
using namespace random;
|
|
||||||
printf("Random number generator basic function test:\n");
|
|
||||||
int I;
|
int I;
|
||||||
// uint32_t q1;
|
|
||||||
// float q2;
|
|
||||||
// double q3;
|
|
||||||
// int q4;
|
|
||||||
|
|
||||||
rand_seed(0);
|
|
||||||
printf("from seed 0...\n");
|
|
||||||
for(I=0;I<10;I++)
|
|
||||||
{
|
|
||||||
printf("randui32[%d] = %u\n",I,randui32());
|
|
||||||
}
|
|
||||||
|
|
||||||
rand_seed(0);
|
|
||||||
printf("from seed 0...\n");
|
|
||||||
for(I=0;I<10;I++)
|
|
||||||
{
|
|
||||||
printf("randf[%d] = %1.3f\n",I,randf());
|
|
||||||
}
|
|
||||||
|
|
||||||
rand_seed(0);
|
|
||||||
printf("from seed 0...\n");
|
|
||||||
for(I=0;I<10;I++)
|
|
||||||
{
|
|
||||||
printf("randf[%d] = %1.3f\n",I,rand());
|
|
||||||
}
|
|
||||||
|
|
||||||
float q0,qlast,qmindelta,qmaxdelta,qmx,qmn,qmean,qstd,qsum,qsumsq;
|
float q0,qlast,qmindelta,qmaxdelta,qmx,qmn,qmean,qstd,qsum,qsumsq;
|
||||||
int N = 100000;
|
int N = 100000;
|
||||||
uint32_t seed= 34533623;
|
uint32_t seed= 2;
|
||||||
rand_seed(seed);
|
rand_seed(seed);
|
||||||
printf("from seed %u...\n",seed);
|
printf("from seed %u...\n",seed);
|
||||||
q0 = randf();
|
q0 = randnf();
|
||||||
qmx = q0;
|
qmx = q0;
|
||||||
qmn = q0;
|
qmn = q0;
|
||||||
qsum = 0.0f;
|
qsum = 0.0f;
|
||||||
qsumsq = 0.0f;
|
qsumsq = 0.0f;
|
||||||
qlast = q0;
|
qlast = q0;
|
||||||
q0 = randf();
|
q0 = randnf();
|
||||||
qmindelta = ::fabsf(q0-qlast);
|
qmindelta = ::fabsf(q0-qlast);
|
||||||
qmaxdelta = ::fabsf(q0-qlast);
|
qmaxdelta = ::fabsf(q0-qlast);
|
||||||
for(I=0;I<N;I++)
|
for(I=0;I<N;I++)
|
||||||
{
|
{
|
||||||
qlast = q0;
|
qlast = q0;
|
||||||
q0 = randf();
|
q0 = randnf();
|
||||||
|
|
||||||
qsum += q0;
|
qsum += q0;
|
||||||
qsumsq += q0*q0;
|
qsumsq += q0*q0;
|
||||||
@ -66,13 +38,93 @@ namespace random
|
|||||||
qmean = qsum/((float)N);
|
qmean = qsum/((float)N);
|
||||||
qstd = ::sqrtf((qsumsq-qsum*qsum/((float)N))/((float)N));
|
qstd = ::sqrtf((qsumsq-qsum*qsum/((float)N))/((float)N));
|
||||||
|
|
||||||
printf("Statistics for randf() N=%d\n",N);
|
printf("Statistics for randnf() N=%d\n",N);
|
||||||
printf("\tmin: %1.3g\n",qmn);
|
printf("\tmin: %1.3g\n",qmn);
|
||||||
printf("\tmax: %1.3g\n",qmx);
|
printf("\tmax: %1.3g\n",qmx);
|
||||||
printf("\tmean: %1.3g\n",qmean);
|
printf("\tmean: %1.3g\n",qmean);
|
||||||
printf("\tstdev: %1.3g\n",qstd);
|
printf("\tstdev: %1.3g\n",qstd);
|
||||||
printf("\tmindelta: %1.3g\n",qmindelta);
|
printf("\tmindelta: %1.3g\n",qmindelta);
|
||||||
printf("\tmaxdelta: %1.3g\n",qmaxdelta);
|
printf("\tmaxdelta: %1.3g\n",qmaxdelta);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ void amscurand_tests1_randstat2()
|
||||||
|
{
|
||||||
|
int I;
|
||||||
|
double q0,qlast,qmindelta,qmaxdelta,qmx,qmn,qmean,qstd,qsum,qsumsq;
|
||||||
|
int N = 100000;
|
||||||
|
uint32_t seed= 2;
|
||||||
|
rand_seed(seed);
|
||||||
|
printf("from seed %u...\n",seed);
|
||||||
|
q0 = randn();
|
||||||
|
qmx = q0;
|
||||||
|
qmn = q0;
|
||||||
|
qsum = 0.0f;
|
||||||
|
qsumsq = 0.0f;
|
||||||
|
qlast = q0;
|
||||||
|
q0 = randn();
|
||||||
|
qmindelta = ::fabs(q0-qlast);
|
||||||
|
qmaxdelta = ::fabs(q0-qlast);
|
||||||
|
for(I=0;I<N;I++)
|
||||||
|
{
|
||||||
|
qlast = q0;
|
||||||
|
q0 = randn();
|
||||||
|
|
||||||
|
qsum += q0;
|
||||||
|
qsumsq += q0*q0;
|
||||||
|
if(q0>qmx) qmx = q0;
|
||||||
|
if(q0<qmn) qmn = q0;
|
||||||
|
if(::fabs(q0-qlast)<qmindelta) qmindelta = ::fabs(q0-qlast);
|
||||||
|
if(::fabs(q0-qlast)>qmaxdelta) qmaxdelta = ::fabs(q0-qlast);
|
||||||
|
}
|
||||||
|
|
||||||
|
qmean = qsum/((double)N);
|
||||||
|
qstd = ::sqrt((qsumsq-qsum*qsum/((double)N))/((double)N));
|
||||||
|
|
||||||
|
printf("Statistics for randn() N=%d\n",N);
|
||||||
|
printf("\tmin: %1.6g\n",qmn);
|
||||||
|
printf("\tmax: %1.6g\n",qmx);
|
||||||
|
printf("\tmean: %1.6g\n",qmean);
|
||||||
|
printf("\tstdev: %1.6g\n",qstd);
|
||||||
|
printf("\tmindelta: %1.6g\n",qmindelta);
|
||||||
|
printf("\tmaxdelta: %1.6g\n",qmaxdelta);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__host__ void amscurand_tests1()
|
||||||
|
{
|
||||||
|
printf("Random number generator basic function test:\n");
|
||||||
|
int I;
|
||||||
|
// uint32_t q1;
|
||||||
|
// float q2;
|
||||||
|
// double q3;
|
||||||
|
// int q4;
|
||||||
|
|
||||||
|
rand_seed(1);
|
||||||
|
printf("from seed 1...\n");
|
||||||
|
for(I=0;I<10;I++)
|
||||||
|
{
|
||||||
|
printf("randui32[%d] = %u\n",I,randui32());
|
||||||
|
}
|
||||||
|
|
||||||
|
rand_seed(1);
|
||||||
|
printf("from seed 1...\n");
|
||||||
|
for(I=0;I<10;I++)
|
||||||
|
{
|
||||||
|
printf("randf[%d] = %1.3f\n",I,randf());
|
||||||
|
}
|
||||||
|
|
||||||
|
rand_seed(0);
|
||||||
|
printf("from seed 0...\n");
|
||||||
|
for(I=0;I<10;I++)
|
||||||
|
{
|
||||||
|
printf("randf[%d] = %1.3f\n",I,rand());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
amscurand_tests1_randstat1();
|
||||||
|
amscurand_tests1_randstat2();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|||||||
@ -100,7 +100,7 @@ namespace xoroshiro
|
|||||||
|
|
||||||
__host__ __device__ void xs128pp_jump(xs128pp_state* state)
|
__host__ __device__ void xs128pp_jump(xs128pp_state* state)
|
||||||
{
|
{
|
||||||
static const uint64_t JUMP[] = { 0xdf900294d8f554a5, 0x170865df4b3201fc };
|
AMSCU_CONST static const uint64_t JUMP[] = { 0xdf900294d8f554a5, 0x170865df4b3201fc };
|
||||||
uint64_t low = 0;
|
uint64_t low = 0;
|
||||||
uint64_t high = 0;
|
uint64_t high = 0;
|
||||||
int I;
|
int I;
|
||||||
|
|||||||
26
src/amsculib3/util/amscu_cputhreading.cu
Normal file
26
src/amsculib3/util/amscu_cputhreading.cu
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
#include <amsculib3/amsculib3.hpp>
|
||||||
|
|
||||||
|
namespace amscuda
|
||||||
|
{
|
||||||
|
namespace util
|
||||||
|
{
|
||||||
|
|
||||||
|
int amscpu_cputhreading_threadplan(int probsize)
|
||||||
|
{
|
||||||
|
int nthreads = 1;
|
||||||
|
int ta = std::thread::hardware_concurrency();
|
||||||
|
|
||||||
|
if(probsize>1024)
|
||||||
|
{
|
||||||
|
nthreads = probsize/1024;
|
||||||
|
|
||||||
|
if(nthreads>ta-2) nthreads = ta-2;
|
||||||
|
if(nthreads>amscuda::amscu_defcputhreads) nthreads = amscuda::amscu_defcputhreads;
|
||||||
|
if(nthreads<1) nthreads = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return nthreads;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
};
|
||||||
@ -23,7 +23,7 @@ int main(int argc, char* argv[])
|
|||||||
|
|
||||||
//test_amscurarray1();
|
//test_amscurarray1();
|
||||||
|
|
||||||
random::amscurand_tests1();
|
//random::amscurand_tests1();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user