device buffer random functions

This commit is contained in:
2026-04-14 21:55:31 -04:00
parent 361ee4c6da
commit 1691b2c415
4 changed files with 261 additions and 0 deletions

Binary file not shown.

Binary file not shown.

View File

@ -5,5 +5,266 @@ namespace amscuda
namespace random namespace random
{ {
__global__ void dbuff_randf_kf(float *dbuffer, int size, randstate_t *dstates)
{
int I;
int index = threadIdx.x + blockIdx.x*blockDim.x;
randstate_t dstate = dstates[index];
for(I=index;I<size;I+=blockDim.x*gridDim.x)
{
dbuffer[I] = randf(&dstate);
}
return;
}
__host__ int dbuff_randf(float *dbuffer, int size, randstate_t *state)
{
int ret = amscu_success;
int nthreads = 128;
int nblocks = (size+nthreads)/nthreads;
int I;
cuarray<randstate_t> states;
randstate_t *dstates = NULL;
cudaError_t err = cudaSuccess;
states.resize(nblocks*nthreads);
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
for(I=0;I<nblocks*nthreads;I++)
{
states[I] = *state;
rand_state_increment(I,&(states[I]));
}
rand_state_increment(I,state);
dbuff_randf_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
cudaDeviceSynchronize();
err = cudaGetLastError();
if(err!=cudaSuccess)
{
printf("dbuff_randf: cuda error %s\n",cudaGetErrorString(err));
}
cudaFree(dstates); dstates=NULL;
return ret;
}
__global__ void dbuff_rand_kf(double *dbuffer, int size, randstate_t *dstates)
{
int I;
int index = threadIdx.x + blockIdx.x*blockDim.x;
randstate_t dstate = dstates[index];
for(I=index;I<size;I+=blockDim.x*gridDim.x)
{
dbuffer[I] = rand(&dstate);
}
return;
}
__host__ int dbuff_rand(double *dbuffer, int size, randstate_t *state)
{
int ret = amscu_success;
int nthreads = 128;
int nblocks = (size+nthreads)/nthreads;
int I;
cuarray<randstate_t> states;
randstate_t *dstates = NULL;
cudaError_t err = cudaSuccess;
states.resize(nblocks*nthreads);
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
for(I=0;I<nblocks*nthreads;I++)
{
states[I] = *state;
rand_state_increment(I,&(states[I]));
}
rand_state_increment(I,state);
dbuff_rand_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
cudaDeviceSynchronize();
err = cudaGetLastError();
if(err!=cudaSuccess)
{
printf("dbuff_randf: cuda error %s\n",cudaGetErrorString(err));
}
cudaFree(dstates); dstates=NULL;
return ret;
}
__global__ void dbuff_randnf_kf(float *dbuffer, int size, randstate_t *dstates)
{
int I;
int index = threadIdx.x + blockIdx.x*blockDim.x;
randstate_t dstate = dstates[index];
for(I=index;I<size;I+=blockDim.x*gridDim.x)
{
dbuffer[I] = randnf(&dstate);
}
return;
}
__global__ void dbuff_randn_kf(double *dbuffer, int size, randstate_t *dstates)
{
int I;
int index = threadIdx.x + blockIdx.x*blockDim.x;
randstate_t dstate = dstates[index];
for(I=index;I<size;I+=blockDim.x*gridDim.x)
{
dbuffer[I] = randn(&dstate);
}
return;
}
__host__ int dbuff_randnf(float *dbuffer, int size, randstate_t *state)
{
int ret = amscu_success;
int nthreads = 128;
int nblocks = (size+nthreads)/nthreads;
int I;
cuarray<randstate_t> states;
randstate_t *dstates = NULL;
cudaError_t err = cudaSuccess;
states.resize(nblocks*nthreads);
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
for(I=0;I<nblocks*nthreads;I++)
{
states[I] = *state;
rand_state_increment(I,&(states[I]));
}
rand_state_increment(I,state);
dbuff_randnf_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
cudaDeviceSynchronize();
err = cudaGetLastError();
if(err!=cudaSuccess)
{
printf("dbuff_randf: cuda error %s\n",cudaGetErrorString(err));
}
cudaFree(dstates); dstates=NULL;
return ret;
}
__host__ int dbuff_randn(double *dbuffer, int size, randstate_t *state)
{
int ret = amscu_success;
int nthreads = 128;
int nblocks = (size+nthreads)/nthreads;
int I;
cuarray<randstate_t> states;
randstate_t *dstates = NULL;
cudaError_t err = cudaSuccess;
states.resize(nblocks*nthreads);
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
for(I=0;I<nblocks*nthreads;I++)
{
states[I] = *state;
rand_state_increment(I,&(states[I]));
}
rand_state_increment(I,state);
dbuff_randn_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
cudaDeviceSynchronize();
err = cudaGetLastError();
if(err!=cudaSuccess)
{
printf("dbuff_randf: cuda error %s\n",cudaGetErrorString(err));
}
cudaFree(dstates); dstates=NULL;
return ret;
}
__global__ void dbuff_randint_kf(int *dbuffer, int size, int low, int high, randstate_t *dstates)
{
int I;
int index = threadIdx.x + blockIdx.x*blockDim.x;
randstate_t dstate = dstates[index];
for(I=index;I<size;I+=blockDim.x*gridDim.x)
{
dbuffer[I] = randint(low,high,&dstate);
}
return;
}
__host__ int dbuff_randint(int *dbuffer, int size, int low, int high, randstate_t *state)
{
int ret = amscu_success;
int nthreads = 128;
int nblocks = (size+nthreads)/nthreads;
int I;
cuarray<randstate_t> states;
randstate_t *dstates = NULL;
cudaError_t err = cudaSuccess;
states.resize(nblocks*nthreads);
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
for(I=0;I<nblocks*nthreads;I++)
{
states[I] = *state;
rand_state_increment(I,&(states[I]));
}
rand_state_increment(I,state);
dbuff_randint_kf<<<nblocks,nthreads>>>(dbuffer,size,low,high,dstates);
cudaDeviceSynchronize();
err = cudaGetLastError();
if(err!=cudaSuccess)
{
printf("dbuff_randf: cuda error %s\n",cudaGetErrorString(err));
}
cudaFree(dstates); dstates=NULL;
return ret;
}
}; //end namespaces }; //end namespaces
}; };