From 8667e2b2ec2719c2423f9003eac6dfeac038598d Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Tue, 29 Sep 2020 18:46:41 +0800 Subject: [PATCH] Entering kernel level --- src/core/kernels/boundconds.cuh | 47 ++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/src/core/kernels/boundconds.cuh b/src/core/kernels/boundconds.cuh index 1fa2c4c..22d353b 100644 --- a/src/core/kernels/boundconds.cuh +++ b/src/core/kernels/boundconds.cuh @@ -1,5 +1,45 @@ #pragma once +static __global__ void +kernel_symmetric_boundconds(const int3 start, const int3 end, AcReal* vtxbuf, const int3 bindex) +{ + const int i_dst = start.x + threadIdx.x + blockIdx.x * blockDim.x; + const int j_dst = start.y + threadIdx.y + blockIdx.y * blockDim.y; + const int k_dst = start.z + threadIdx.z + blockIdx.z * blockDim.z; + + // If within the start-end range (this allows threadblock dims that are not + // divisible by end - start) + if (i_dst >= end.x || j_dst >= end.y || k_dst >= end.z) + return; + + // If destination index is inside the computational domain, return since + // the boundary conditions are only applied to the ghost zones + if (i_dst >= DCONST(AC_nx_min) && i_dst < DCONST(AC_nx_max) && j_dst >= DCONST(AC_ny_min) && + j_dst < DCONST(AC_ny_max) && k_dst >= DCONST(AC_nz_min) && k_dst < DCONST(AC_nz_max)) + return; + + // Find the source index + // Map to nx, ny, nz coordinates + int i_src, j_src, k_src; + int bsize = STENCIL_ORDER/(int) 2; + + if (bindex.x == 1) + { + if (i_dst < bsize) + { + i_src = 2*bsize - i_dst + } else if (i_dst >= DCONST(AC_nx_min) - bsize) + { + i_src = i_dst - 2*bsize BRAIN NOT WORKING CONTINUE TOMORROW + } + } + + const int src_idx = DEVICE_VTXBUF_IDX(i_src, j_src, k_src); + const int dst_idx = DEVICE_VTXBUF_IDX(i_dst, j_dst, k_dst); + vtxbuf[dst_idx] = vtxbuf[src_idx]; +} + + static __global__ void kernel_periodic_boundconds(const int3 start, const int3 end, AcReal* vtxbuf) { @@ -61,8 +101,9 @@ acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, const in return AC_SUCCESS; } -AcResult acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end, - AcReal* vtxbuf, const int bound_direction); +AcResult +acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end, + AcReal* vtxbuf, const int bindex); { const dim3 tpb(8, 2, 8); const dim3 bpg((unsigned int)ceil((end.x - start.x) / (float)tpb.x), @@ -71,7 +112,7 @@ AcResult acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, if (DCONST(AC_bype) == BOUNDCOND_SYM) { - kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bound_direction); + kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex); ERRCHK_CUDA_KERNEL(); }