random routines
This commit is contained in:
@ -736,7 +736,7 @@ __host__ __device__ cumat3 rotmat_from_axisangle(const cuvec3 &axis, const doubl
|
||||
// Tests //
|
||||
///////////
|
||||
|
||||
__host__ void test_cudavectf_logic1()
|
||||
__host__ void test_cudavect_logic1()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
6
src/amsculib3/random/amscupcg.cu
Normal file
6
src/amsculib3/random/amscupcg.cu
Normal file
@ -0,0 +1,6 @@
|
||||
#include <amsculib3/amsculib3.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
};
|
||||
130
src/amsculib3/random/amscurandom1.cu
Normal file
130
src/amsculib3/random/amscurandom1.cu
Normal file
@ -0,0 +1,130 @@
|
||||
#include <amsculib3/amsculib3.hpp>
|
||||
|
||||
using namespace amscuda::random::xoroshiro;
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
namespace random
|
||||
{
|
||||
|
||||
//Choosing xoroshiro64** as my default RNG due to 32 bit only operations
|
||||
randstate_t global_rand_cpustate = xs64ss_state();
|
||||
|
||||
|
||||
__host__ void rand_seed(const uint32_t seed)
|
||||
{
|
||||
global_rand_cpustate = xs64ss_state(seed);
|
||||
}
|
||||
|
||||
__host__ __device__ void rand_state_increment(const int32_t inc, randstate_t *state)
|
||||
{
|
||||
#ifdef __CUDA_ARCH__
|
||||
// GPU-specific code (device path)
|
||||
#else
|
||||
// CPU-specific code (host path)
|
||||
if(state==NULL) state = &global_rand_cpustate;
|
||||
#endif
|
||||
xoroshiro::xs64ss_state* s2 = (xoroshiro::xs64ss_state*)state;
|
||||
s2->low += inc;
|
||||
xs64ss_next(s2);
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ void rand_next(randstate_t *state)
|
||||
{
|
||||
#ifdef __CUDA_ARCH__
|
||||
// GPU-specific code (device path)
|
||||
#else
|
||||
// CPU-specific code (host path)
|
||||
if(state==NULL) state = &global_rand_cpustate;
|
||||
#endif
|
||||
xs64ss_next((xoroshiro::xs64ss_state*)state);
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ uint32_t randui32(randstate_t *state)
|
||||
{
|
||||
uint32_t ret;
|
||||
#ifdef __CUDA_ARCH__
|
||||
// GPU-specific code (device path)
|
||||
#else
|
||||
// CPU-specific code (host path)
|
||||
if(state==NULL) state = &global_rand_cpustate;
|
||||
#endif
|
||||
|
||||
ret = xoroshiro::xs64ss_next((xoroshiro::xs64ss_state*)state);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ int randint(int low, int high, randstate_t *state)
|
||||
{
|
||||
int ret;
|
||||
#ifdef __CUDA_ARCH__
|
||||
// GPU-specific code (device path)
|
||||
#else
|
||||
// CPU-specific code (host path)
|
||||
if(state==NULL) state = &global_rand_cpustate;
|
||||
#endif
|
||||
int32_t q = (int)((randui32(state)>>1U)%(1U<<16U));
|
||||
ret = (q%(high-low))+low;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float randf(randstate_t *state)
|
||||
{
|
||||
float ret;
|
||||
#ifdef __CUDA_ARCH__
|
||||
// GPU-specific code (device path)
|
||||
#else
|
||||
// CPU-specific code (host path)
|
||||
if(state==NULL) state = &global_rand_cpustate;
|
||||
#endif
|
||||
ret= ((float)randui32(state))/(2147483648.0f);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ double rand(randstate_t *state)
|
||||
{
|
||||
double ret;
|
||||
#ifdef __CUDA_ARCH__
|
||||
// GPU-specific code (device path)
|
||||
#else
|
||||
// CPU-specific code (host path)
|
||||
if(state==NULL) state = &global_rand_cpustate;
|
||||
#endif
|
||||
ret= ((double)randui32(state))/(2147483648.0f);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float randnf(randstate_t *state)
|
||||
{
|
||||
float q1,q2;
|
||||
#ifdef __CUDA_ARCH__
|
||||
// GPU-specific code (device path)
|
||||
#else
|
||||
// CPU-specific code (host path)
|
||||
if(state==NULL) state = &global_rand_cpustate;
|
||||
#endif
|
||||
|
||||
q1 = randf(state);
|
||||
q2 = randf(state);
|
||||
return (::sqrtf(-2.0f*::logf(q1))*cos(2.0f*pif*q2));
|
||||
}
|
||||
|
||||
__host__ __device__ double randn(randstate_t *state)
|
||||
{
|
||||
double q1,q2;
|
||||
#ifdef __CUDA_ARCH__
|
||||
// GPU-specific code (device path)
|
||||
#else
|
||||
// CPU-specific code (host path)
|
||||
if(state==NULL) state = &global_rand_cpustate;
|
||||
#endif
|
||||
|
||||
q1 = rand(state);
|
||||
q2 = rand(state);
|
||||
return (::sqrt(-2.0*::log(q1))*cos(2.0*pi*q2));
|
||||
}
|
||||
|
||||
}; //end namespaces
|
||||
};
|
||||
9
src/amsculib3/random/amscurandom1_dbuff.cu
Normal file
9
src/amsculib3/random/amscurandom1_dbuff.cu
Normal file
@ -0,0 +1,9 @@
|
||||
#include <amsculib3/amsculib3.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
namespace random
|
||||
{
|
||||
|
||||
}; //end namespaces
|
||||
};
|
||||
9
src/amsculib3/random/amscurandom1_hbuff.cu
Normal file
9
src/amsculib3/random/amscurandom1_hbuff.cu
Normal file
@ -0,0 +1,9 @@
|
||||
#include <amsculib3/amsculib3.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
namespace random
|
||||
{
|
||||
|
||||
}; //end namespaces
|
||||
};
|
||||
125
src/amsculib3/random/amscusplitmix.cu
Normal file
125
src/amsculib3/random/amscusplitmix.cu
Normal file
@ -0,0 +1,125 @@
|
||||
#include <amsculib3/amsculib3.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
namespace random
|
||||
{
|
||||
namespace xoroshiro
|
||||
{
|
||||
|
||||
//ref: https://github.com/joelkp/ranoise/blob/main/splitmix32.c
|
||||
// __device__ __host__ uint32_t splitmix32_next(uint32_t *splitmix32_state)
|
||||
// {
|
||||
// uint32_t ret;
|
||||
// *splitmix32_state += 2654435769U;
|
||||
// ret = *splitmix32_state;
|
||||
// ret ^= ret >> 16;
|
||||
// ret *= 0x85ebca6bU;
|
||||
// ret ^= ret >> 13;
|
||||
// ret *= 0xc2b2ae35U;
|
||||
// ret ^= ret >> 16;
|
||||
// return ret;
|
||||
// }
|
||||
|
||||
//Better constants: see discussion below.
|
||||
__device__ __host__ uint32_t splitmix32_next(uint32_t *splitmix32_state)
|
||||
{
|
||||
uint32_t ret;
|
||||
*splitmix32_state += 0x9e3779b9U;
|
||||
ret = *splitmix32_state;
|
||||
ret ^= ret >> 16;
|
||||
ret *= 0x21f0aaadU;
|
||||
ret ^= ret >> 15;
|
||||
ret *= 0x735a2d97U;
|
||||
ret ^= ret >> 15;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ void splitmix64_next(uint64_t *state)
|
||||
{
|
||||
//*state += 0x9E3779B97F4A7C15uL;
|
||||
*state += 0x9E3779B97F4A7C15ULL;
|
||||
}
|
||||
|
||||
__host__ __device__ uint64_t splitmix64_nextint(uint64_t *state)
|
||||
{
|
||||
//ref: https://rosettacode.org/wiki/Pseudo-random_numbers/Splitmix64
|
||||
|
||||
uint64_t ret;
|
||||
*state += 0x9E3779B97F4A7C15ULL;
|
||||
ret = *state;
|
||||
ret = (ret ^ (ret >> 30)) * 0xbf58476d1ce4e5b9ULL;
|
||||
ret = (ret ^ (ret >> 27)) * 0x94d049bb133111ebULL;
|
||||
ret = ret^(ret>>31);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
////////////////////
|
||||
// Unsorted Notes //
|
||||
////////////////////
|
||||
|
||||
//ref: https://github.com/joelkp/ranoise/blob/main/splitmix32.c
|
||||
//2654435769U
|
||||
//#define ROR32(x, r) \
|
||||
//((uint32_t)(x) >> ((r) & 31) | (uint32_t)(x) << ((32-(r)) & 31))
|
||||
|
||||
//#define MUVAROR32(x, r, ro) \
|
||||
//(((uint32_t)(x) | ((1<<((ro) & 31))|1)) * ROR32((x), (r)+(ro)))
|
||||
|
||||
// static inline uint32_t splitmix32(uint32_t *restrict pos) {
|
||||
// uint32_t s = *pos += 2654435769U;
|
||||
// s ^= s >> 16;
|
||||
// s *= 0x85ebca6b;
|
||||
// s ^= s >> 13;
|
||||
// s *= 0xc2b2ae35;
|
||||
// s ^= s >> 16;
|
||||
// return s;
|
||||
// }
|
||||
|
||||
// int main(int argc, char *argv[]) {
|
||||
// uint32_t pos = 0;
|
||||
// for (;;) {
|
||||
// /* SplitMix32 test */
|
||||
// add_output(splitmix32(&pos));
|
||||
// }
|
||||
// return 0;
|
||||
// }
|
||||
|
||||
|
||||
/*
|
||||
//ref: https://stackoverflow.com/questions/17035441/looking-for-decent-quality-prng-with-only-32-bits-of-state
|
||||
|
||||
uint32_t splitmix32(void) {
|
||||
uint32_t z = state += 0x9e3779b9;
|
||||
z ^= z >> 15; // 16 for murmur3
|
||||
z *= 0x85ebca6b;
|
||||
z ^= z >> 13;
|
||||
z *= 0xc2b2ae35;
|
||||
return z ^= z >> 16;
|
||||
}
|
||||
|
||||
Update: Better constants have been found that potentially make this a very good PRNG:
|
||||
uint32_t splitmix32(void) {
|
||||
uint32_t z = (state += 0x9e3779b9);
|
||||
z ^= z >> 16; z *= 0x21f0aaad;
|
||||
z ^= z >> 15; z *= 0x735a2d97;
|
||||
z ^= z >> 15;
|
||||
return z;
|
||||
}
|
||||
|
||||
|
||||
Mark Dickinson
|
||||
Over a year ago
|
||||
Any thoughts on the PCG family? (E.g., the generator called "PCG RXS M XS 32 (LCG)" in the PCG paper.)
|
||||
bryc
|
||||
Over a year ago
|
||||
@MarkDickinson From what I've seen PCG relies on 64-bit math, and there are no fully 32-bit variants that I can find. I can't find any code specific to the generator you mentioned either. I'm sure PCG is a good generator, but I feel like its a poor choice for JavaScript or embedded systems. xoshiro128** for example is ideal in that regard, since it uses 32-bit integers only.
|
||||
|
||||
*/
|
||||
|
||||
};//end namespace splitmix
|
||||
}; //end namespaces
|
||||
};
|
||||
146
src/amsculib3/random/amsxoroshiro.cu
Normal file
146
src/amsculib3/random/amsxoroshiro.cu
Normal file
@ -0,0 +1,146 @@
|
||||
#include <amsculib3/amsculib3.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
namespace random
|
||||
{
|
||||
namespace xoroshiro
|
||||
{
|
||||
|
||||
//////////////////
|
||||
//Xoroshiro 64**//
|
||||
//////////////////
|
||||
|
||||
__device__ __host__ xs64ss_state::xs64ss_state()
|
||||
{
|
||||
low = 0; high = 0;
|
||||
}
|
||||
|
||||
__device__ __host__ xs64ss_state::xs64ss_state(const uint32_t seed)
|
||||
{
|
||||
uint32_t sm32 = seed;
|
||||
splitmix32_next(&sm32);
|
||||
low = splitmix32_next(&sm32);
|
||||
high = splitmix32_next(&sm32);
|
||||
return;
|
||||
}
|
||||
|
||||
__device__ __host__ xs64ss_state::xs64ss_state(const uint32_t _low, const uint32_t _high)
|
||||
{
|
||||
low = _low; high = _high;
|
||||
}
|
||||
|
||||
xs64ss_state xs64ss_globalstate = xs64ss_state();
|
||||
|
||||
__host__ void xs64ss_seed(const uint32_t seed)
|
||||
{
|
||||
xs64ss_globalstate = xs64ss_state(seed);
|
||||
}
|
||||
|
||||
__device__ __host__ static inline uint32_t xs64ss_rotl(const uint32_t x, int k) {
|
||||
return (x << k) | (x >> (32 - k));
|
||||
}
|
||||
|
||||
//implements the xoroshiro64** PRNG
|
||||
__host__ __device__ uint32_t xs64ss_next(xs64ss_state *state)
|
||||
{
|
||||
const uint32_t low = state->low;
|
||||
uint32_t high = state->high;
|
||||
const uint32_t ret = xs64ss_rotl(low*0x9E3779BBU,5)*5;
|
||||
high ^= low;
|
||||
state->low = xs64ss_rotl(low, 26) ^ high ^ (high << 9);
|
||||
state->high = xs64ss_rotl(high, 13);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
//////////////////
|
||||
//Xoroshiro 128+//
|
||||
//////////////////
|
||||
__device__ __host__ xs128pp_state::xs128pp_state()
|
||||
{
|
||||
low = 0;
|
||||
high = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
__device__ __host__ xs128pp_state::xs128pp_state(const uint64_t seed)
|
||||
{
|
||||
uint64_t sm64s = seed;
|
||||
splitmix64_next(&sm64s);
|
||||
low = splitmix64_nextint(&sm64s);
|
||||
high = splitmix64_nextint(&sm64s);
|
||||
return;
|
||||
}
|
||||
|
||||
__device__ __host__ xs128pp_state::xs128pp_state(const uint64_t _low, const uint64_t _high)
|
||||
{
|
||||
low = _low;
|
||||
high = _high;
|
||||
return;
|
||||
}
|
||||
|
||||
xs128pp_state xs128pp_globalstate = xs128pp_state();
|
||||
|
||||
__host__ __device__ uint64_t xs128pp_rotl(const uint64_t x, int k)
|
||||
{
|
||||
return (x << k) | (x >> (64 - k));
|
||||
}
|
||||
|
||||
__host__ __device__ uint64_t xs128pp_next(xs128pp_state* state)
|
||||
{
|
||||
const uint64_t low = state->low;
|
||||
uint64_t high = state->high;
|
||||
const uint64_t result = low + high;
|
||||
high ^= low;
|
||||
state->low = xs128pp_rotl(low,24) ^ high ^ (high << 16);
|
||||
state->high = xs128pp_rotl(high,37);
|
||||
return result;
|
||||
}
|
||||
|
||||
__host__ __device__ void xs128pp_jump(xs128pp_state* state)
|
||||
{
|
||||
static const uint64_t JUMP[] = { 0xdf900294d8f554a5, 0x170865df4b3201fc };
|
||||
uint64_t low = 0;
|
||||
uint64_t high = 0;
|
||||
int I;
|
||||
int B;
|
||||
|
||||
for(I=0;I<2;I++)
|
||||
{
|
||||
for(B=0;B<64;B++)
|
||||
{
|
||||
if(JUMP[I] & (1ULL<<B))
|
||||
{
|
||||
low ^= state->low;
|
||||
high ^= state->high;
|
||||
}
|
||||
xs128pp_next(state);
|
||||
}
|
||||
}
|
||||
|
||||
state->low = low;
|
||||
state->high = high;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ void xs128pp_seed(uint64_t seed)
|
||||
{
|
||||
xs128pp_globalstate = xs128pp_state(seed);
|
||||
}
|
||||
|
||||
|
||||
//To have a different implementation for host and device functions
|
||||
// __host__ __device__ void my_function() {
|
||||
// #ifdef __CUDA_ARCH__
|
||||
// // GPU-specific code (device path)
|
||||
// #else
|
||||
// // CPU-specific code (host path)
|
||||
// #endif
|
||||
// }
|
||||
|
||||
|
||||
|
||||
}; //end namespaces
|
||||
};
|
||||
};
|
||||
Reference in New Issue
Block a user