device buffer random functions
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -5,5 +5,266 @@ namespace amscuda
|
||||
namespace random
|
||||
{
|
||||
|
||||
__global__ void dbuff_randf_kf(float *dbuffer, int size, randstate_t *dstates)
|
||||
{
|
||||
int I;
|
||||
int index = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
|
||||
randstate_t dstate = dstates[index];
|
||||
|
||||
for(I=index;I<size;I+=blockDim.x*gridDim.x)
|
||||
{
|
||||
dbuffer[I] = randf(&dstate);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ int dbuff_randf(float *dbuffer, int size, randstate_t *state)
|
||||
{
|
||||
int ret = amscu_success;
|
||||
|
||||
int nthreads = 128;
|
||||
int nblocks = (size+nthreads)/nthreads;
|
||||
int I;
|
||||
cuarray<randstate_t> states;
|
||||
randstate_t *dstates = NULL;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
states.resize(nblocks*nthreads);
|
||||
|
||||
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
|
||||
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
|
||||
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
states[I] = *state;
|
||||
rand_state_increment(I,&(states[I]));
|
||||
}
|
||||
rand_state_increment(I,state);
|
||||
|
||||
dbuff_randf_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("dbuff_randf: cuda error %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
|
||||
cudaFree(dstates); dstates=NULL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__global__ void dbuff_rand_kf(double *dbuffer, int size, randstate_t *dstates)
|
||||
{
|
||||
int I;
|
||||
int index = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
|
||||
randstate_t dstate = dstates[index];
|
||||
|
||||
for(I=index;I<size;I+=blockDim.x*gridDim.x)
|
||||
{
|
||||
dbuffer[I] = rand(&dstate);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ int dbuff_rand(double *dbuffer, int size, randstate_t *state)
|
||||
{
|
||||
int ret = amscu_success;
|
||||
|
||||
int nthreads = 128;
|
||||
int nblocks = (size+nthreads)/nthreads;
|
||||
int I;
|
||||
cuarray<randstate_t> states;
|
||||
randstate_t *dstates = NULL;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
states.resize(nblocks*nthreads);
|
||||
|
||||
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
|
||||
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
|
||||
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
states[I] = *state;
|
||||
rand_state_increment(I,&(states[I]));
|
||||
}
|
||||
rand_state_increment(I,state);
|
||||
|
||||
dbuff_rand_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("dbuff_randf: cuda error %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
|
||||
cudaFree(dstates); dstates=NULL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__global__ void dbuff_randnf_kf(float *dbuffer, int size, randstate_t *dstates)
|
||||
{
|
||||
int I;
|
||||
int index = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
|
||||
randstate_t dstate = dstates[index];
|
||||
|
||||
for(I=index;I<size;I+=blockDim.x*gridDim.x)
|
||||
{
|
||||
dbuffer[I] = randnf(&dstate);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__global__ void dbuff_randn_kf(double *dbuffer, int size, randstate_t *dstates)
|
||||
{
|
||||
int I;
|
||||
int index = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
|
||||
randstate_t dstate = dstates[index];
|
||||
|
||||
for(I=index;I<size;I+=blockDim.x*gridDim.x)
|
||||
{
|
||||
dbuffer[I] = randn(&dstate);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ int dbuff_randnf(float *dbuffer, int size, randstate_t *state)
|
||||
{
|
||||
int ret = amscu_success;
|
||||
|
||||
int nthreads = 128;
|
||||
int nblocks = (size+nthreads)/nthreads;
|
||||
int I;
|
||||
cuarray<randstate_t> states;
|
||||
randstate_t *dstates = NULL;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
states.resize(nblocks*nthreads);
|
||||
|
||||
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
|
||||
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
|
||||
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
states[I] = *state;
|
||||
rand_state_increment(I,&(states[I]));
|
||||
}
|
||||
rand_state_increment(I,state);
|
||||
|
||||
dbuff_randnf_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("dbuff_randf: cuda error %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
|
||||
cudaFree(dstates); dstates=NULL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ int dbuff_randn(double *dbuffer, int size, randstate_t *state)
|
||||
{
|
||||
int ret = amscu_success;
|
||||
|
||||
int nthreads = 128;
|
||||
int nblocks = (size+nthreads)/nthreads;
|
||||
int I;
|
||||
cuarray<randstate_t> states;
|
||||
randstate_t *dstates = NULL;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
states.resize(nblocks*nthreads);
|
||||
|
||||
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
|
||||
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
|
||||
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
states[I] = *state;
|
||||
rand_state_increment(I,&(states[I]));
|
||||
}
|
||||
rand_state_increment(I,state);
|
||||
|
||||
dbuff_randn_kf<<<nblocks,nthreads>>>(dbuffer,size,dstates);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("dbuff_randf: cuda error %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
|
||||
cudaFree(dstates); dstates=NULL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__global__ void dbuff_randint_kf(int *dbuffer, int size, int low, int high, randstate_t *dstates)
|
||||
{
|
||||
int I;
|
||||
int index = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
|
||||
randstate_t dstate = dstates[index];
|
||||
|
||||
for(I=index;I<size;I+=blockDim.x*gridDim.x)
|
||||
{
|
||||
dbuffer[I] = randint(low,high,&dstate);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ int dbuff_randint(int *dbuffer, int size, int low, int high, randstate_t *state)
|
||||
{
|
||||
int ret = amscu_success;
|
||||
|
||||
int nthreads = 128;
|
||||
int nblocks = (size+nthreads)/nthreads;
|
||||
int I;
|
||||
cuarray<randstate_t> states;
|
||||
randstate_t *dstates = NULL;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
states.resize(nblocks*nthreads);
|
||||
|
||||
cudaMalloc(&dstates,sizeof(randstate_t)*states.length);
|
||||
cudaMemcpy(dstates,states.data,sizeof(randstate_t)*states.length,cudaMemcpyHostToDevice);
|
||||
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
states[I] = *state;
|
||||
rand_state_increment(I,&(states[I]));
|
||||
}
|
||||
rand_state_increment(I,state);
|
||||
|
||||
dbuff_randint_kf<<<nblocks,nthreads>>>(dbuffer,size,low,high,dstates);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("dbuff_randf: cuda error %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
|
||||
cudaFree(dstates); dstates=NULL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
}; //end namespaces
|
||||
};
|
||||
Reference in New Issue
Block a user