From b81180ff19cf320ca06c68459da896e9dcd44305 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Tue, 15 Sep 2020 19:08:32 +0800 Subject: [PATCH 01/21] Desperate but perhaps wrongminded attempts. Periodic boundary conditions are etched too deep inside... --- src/core/node.cc | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/core/node.cc b/src/core/node.cc index 70c3115..552cf3f 100644 --- a/src/core/node.cc +++ b/src/core/node.cc @@ -782,6 +782,20 @@ acNodePeriodicBoundcondStep(const Node node, const Stream stream, return AC_SUCCESS; } +AcResult +acNodeGeneralBoundcondStep(const Node node, const Stream stream, + const VertexBufferHandle vtxbuf_handle) +{ + local_boundcondstep(node, stream, vtxbuf_handle); + acNodeSynchronizeVertexBuffer(node, stream, vtxbuf_handle); + + //MV: assume that global step communication is handles by acNodePeriodicBoundcondStep as above. + //global_boundcondstep(node, stream, vtxbuf_handle); + + + return AC_SUCCESS; +} + AcResult acNodePeriodicBoundconds(const Node node, const Stream stream) { @@ -791,6 +805,15 @@ acNodePeriodicBoundconds(const Node node, const Stream stream) return AC_SUCCESS; } +AcResult +acNodeGeneralBoundconds(const Node node, const Stream stream) +{ + for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { + acNodeGeneralBoundcondStep(node, stream, (VertexBufferHandle)i); + } + return AC_SUCCESS; +} + static AcReal simple_final_reduce_scal(const Node node, const ReductionType& rtype, const AcReal* results, const int& n) From 67aa87731bd7bb12f59fd0aee437baf549b34e3d Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Fri, 18 Sep 2020 15:25:14 +0800 Subject: [PATCH 02/21] Crafting code. Attempting to figure out the MPI domain decomposition etc. --- src/core/device.cc | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/core/device.cc b/src/core/device.cc index a6ec793..52f20ed 100644 --- a/src/core/device.cc +++ b/src/core/device.cc @@ -433,6 +433,27 @@ acDevicePeriodicBoundconds(const Device device, const Stream stream, const int3 return AC_SUCCESS; } +AcResult +acDeviceGeneralBoundcondStep(const Device device, const Stream stream, + const VertexBufferHandle vtxbuf_handle, const int3 start, + const int3 end) +{ + cudaSetDevice(device->id); + return acKernelGeneralBoundconds(device->streams[stream], start, end, + device->vba.in[vtxbuf_handle]); +} + +AcResult +acDeviceGeneralBoundconds(const Device device, const Stream stream, const int3 start, + const int3 end) +{ + for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { + acDeviceGeneralBoundcondStep(device, stream, (VertexBufferHandle)i, start, end); + } + return AC_SUCCESS; +} + + AcResult acDeviceReduceScal(const Device device, const Stream stream, const ReductionType rtype, const VertexBufferHandle vtxbuf_handle, AcReal* result) @@ -1652,6 +1673,13 @@ acGridIntegrate(const Stream stream, const AcReal dt) acGridLoadScalarUniform(stream, AC_dt, dt); acDeviceSynchronizeStream(device, stream); + // Check the position in MPI frame + int nprocs, pid; + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + const uint3_64 decomposition = decompose(nprocs); + const int3 pid3d = getPid3D(pid, decomposition); + // Corners #if MPI_INCL_CORNERS // Do not rm: required for corners @@ -1805,6 +1833,21 @@ acGridIntegrate(const Stream stream, const AcReal dt) acSyncCommData(sidexz_data); acSyncCommData(sideyz_data); #endif // MPI_COMM_ENABLED + + + // Set outer boudaries after substep computation. + const int3 m1 = (int3){0, 0, 0}; + const int3 m2 = nn; + const int3 pid3d = getPid3D(pid, decomposition); + // If we are are a boundary element + if ((pid3d.x == 0) || (pid3d.x == decomposition.x - 1) || + (pid3d.y == 0) || (pid3d.y == decomposition.y - 1) || + (pid3d.z == 0) || (pid3d.z == decomposition.z - 1) ||) + { + acDeviceGeneralBoundconds(device, stream, m1, m2); + } + acGridSynchronizeStream(stream); + #if MPI_COMPUTE_ENABLED { // Front const int3 m1 = (int3){NGHOST, NGHOST, NGHOST}; @@ -1840,8 +1883,12 @@ acGridIntegrate(const Stream stream, const AcReal dt) acDeviceSwapBuffers(device); acDeviceSynchronizeStream(device, STREAM_ALL); // Wait until inner and outer done //////////////////////////////////////////// + } + + + return AC_SUCCESS; } From f736aa1cd1655ad8adbea3aebed3a3eee84d2fa7 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Fri, 18 Sep 2020 16:55:36 +0800 Subject: [PATCH 03/21] Attemptiong to make kernels to go where they should. --- src/core/device.cc | 11 ++++++----- src/core/kernels/boundconds.cuh | 17 +++++++++++++++++ src/core/kernels/kernels.h | 3 +++ 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/core/device.cc b/src/core/device.cc index 52f20ed..a68fbd9 100644 --- a/src/core/device.cc +++ b/src/core/device.cc @@ -436,19 +436,19 @@ acDevicePeriodicBoundconds(const Device device, const Stream stream, const int3 AcResult acDeviceGeneralBoundcondStep(const Device device, const Stream stream, const VertexBufferHandle vtxbuf_handle, const int3 start, - const int3 end) + const int3 end, const int bound_direction) { cudaSetDevice(device->id); return acKernelGeneralBoundconds(device->streams[stream], start, end, - device->vba.in[vtxbuf_handle]); + device->vba.in[vtxbuf_handle], bound_direction); } AcResult acDeviceGeneralBoundconds(const Device device, const Stream stream, const int3 start, - const int3 end) + const int3 end, const int bound_direction) { for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { - acDeviceGeneralBoundcondStep(device, stream, (VertexBufferHandle)i, start, end); + acDeviceGeneralBoundcondStep(device, stream, (VertexBufferHandle)i, start, end, bound_direction); } return AC_SUCCESS; } @@ -1844,7 +1844,8 @@ acGridIntegrate(const Stream stream, const AcReal dt) (pid3d.y == 0) || (pid3d.y == decomposition.y - 1) || (pid3d.z == 0) || (pid3d.z == decomposition.z - 1) ||) { - acDeviceGeneralBoundconds(device, stream, m1, m2); + //TODO get bound_direction + acDeviceGeneralBoundconds(device, stream, m1, m2, bound_direction); } acGridSynchronizeStream(stream); diff --git a/src/core/kernels/boundconds.cuh b/src/core/kernels/boundconds.cuh index 6a31d58..1fa2c4c 100644 --- a/src/core/kernels/boundconds.cuh +++ b/src/core/kernels/boundconds.cuh @@ -60,3 +60,20 @@ acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, const in ERRCHK_CUDA_KERNEL(); return AC_SUCCESS; } + +AcResult acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end, + AcReal* vtxbuf, const int bound_direction); +{ + const dim3 tpb(8, 2, 8); + const dim3 bpg((unsigned int)ceil((end.x - start.x) / (float)tpb.x), + (unsigned int)ceil((end.y - start.y) / (float)tpb.y), + (unsigned int)ceil((end.z - start.z) / (float)tpb.z)); + + if (DCONST(AC_bype) == BOUNDCOND_SYM) + { + kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bound_direction); + ERRCHK_CUDA_KERNEL(); + } + + return AC_SUCCESS; +} diff --git a/src/core/kernels/kernels.h b/src/core/kernels/kernels.h index fc9c745..980bafa 100644 --- a/src/core/kernels/kernels.h +++ b/src/core/kernels/kernels.h @@ -43,6 +43,9 @@ extern "C" { /** */ AcResult acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, const int3 end, AcReal* vtxbuf); +/** */ +AcResult acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end, + AcReal* vtxbuf, const int bound_direction); /** */ AcResult acKernelDummy(void); From 1868031f1e74c36f455e8b913726c8f61e995e36 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Fri, 18 Sep 2020 17:25:34 +0800 Subject: [PATCH 04/21] Working on marking the active edges. --- src/core/device.cc | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/core/device.cc b/src/core/device.cc index a68fbd9..d1640c9 100644 --- a/src/core/device.cc +++ b/src/core/device.cc @@ -1840,12 +1840,24 @@ acGridIntegrate(const Stream stream, const AcReal dt) const int3 m2 = nn; const int3 pid3d = getPid3D(pid, decomposition); // If we are are a boundary element - if ((pid3d.x == 0) || (pid3d.x == decomposition.x - 1) || - (pid3d.y == 0) || (pid3d.y == decomposition.y - 1) || - (pid3d.z == 0) || (pid3d.z == decomposition.z - 1) ||) + int3 bindex = (int3){0, 0, 0}; + + // 1 is top edge, 2 bottom edge, 3 both edges, 0 is no boundary + if (pid3d.x == 0) { bindex.x = 1; } + else if (pid3d.x == decomposition.x - 1) { bindex.x = 2; } + else if ((pid3d.x == 0) && (pid3d.x == decomposition.x - 1)) { bindex.x = 3; } + + if (pid3d.y == 0) { bindex.y = 1; } + else if (pid3d.y == decomposition.y - 1) { bindex.y = 2; } + else if ((pid3d.y == 0) && (pid3d.y == decomposition.y - 1)) { bindex.y = 3; } + + if (pid3d.z == 0) { bindex.z = 1; } + else if (pid3d.z == decomposition.z - 1) { bindex.z = 2; } + else if ((pid3d.z == 0) && (pid3d.z == decomposition.z - 1)) { bindex.z = 3; } + { - //TODO get bound_direction - acDeviceGeneralBoundconds(device, stream, m1, m2, bound_direction); + //TODO get bound_direction -> bindex + acDeviceGeneralBoundconds(device, stream, m1, m2, bindex); } acGridSynchronizeStream(stream); From 2f85cbba1a0929d6cd638b65202238746290ad06 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Mon, 21 Sep 2020 15:54:33 +0800 Subject: [PATCH 05/21] Last mimnnut modification before the meeting. --- src/core/device.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/device.cc b/src/core/device.cc index d1640c9..29ef9c8 100644 --- a/src/core/device.cc +++ b/src/core/device.cc @@ -436,19 +436,19 @@ acDevicePeriodicBoundconds(const Device device, const Stream stream, const int3 AcResult acDeviceGeneralBoundcondStep(const Device device, const Stream stream, const VertexBufferHandle vtxbuf_handle, const int3 start, - const int3 end, const int bound_direction) + const int3 end, const int3 bindex) { cudaSetDevice(device->id); return acKernelGeneralBoundconds(device->streams[stream], start, end, - device->vba.in[vtxbuf_handle], bound_direction); + device->vba.in[vtxbuf_handle], bindex); } AcResult acDeviceGeneralBoundconds(const Device device, const Stream stream, const int3 start, - const int3 end, const int bound_direction) + const int3 end, const int3 bindex) { for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { - acDeviceGeneralBoundcondStep(device, stream, (VertexBufferHandle)i, start, end, bound_direction); + acDeviceGeneralBoundcondStep(device, stream, (VertexBufferHandle)i, start, end, bindex); } return AC_SUCCESS; } From 9386129f1b98e33a698b6214e5f46e94e0c061d9 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Tue, 29 Sep 2020 16:31:16 +0800 Subject: [PATCH 06/21] Cleaning and improving the boundary condition draft. --- src/core/device.cc | 63 +++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/src/core/device.cc b/src/core/device.cc index 29ef9c8..cf20e85 100644 --- a/src/core/device.cc +++ b/src/core/device.cc @@ -1435,6 +1435,41 @@ acGridStoreMesh(const Stream stream, AcMesh* host_mesh) return AC_SUCCESS; } +AcResult +acGridGeneralBoundconds(const Device device, const Stream stream) +{ + // Non-periodic Boundary conditions + + // Set outer boudaries after substep computation. + const int3 m1 = (int3){0, 0, 0}; + const int3 m2 = grid.nn; + const int3 pid3d = getPid3D(pid, grid.decomposition); + // If we are are a boundary element + int3 bindex = (int3){0, 0, 0}; + + // Check if there are active boundary condition edges. + // 0 is no boundary, 1 both edges, 2 is top edge, 3 bottom edge + if ((pid3d.x == 0) && (pid3d.x == decomposition.x - 1)) { bindex.x = 1; } + else if (pid3d.x == 0) { bindex.x = 2; } + else if (pid3d.x == decomposition.x - 1) { bindex.x = 3; } + + if ((pid3d.y == 0) && (pid3d.y == decomposition.y - 1)) { bindex.y = 1; } + else if (pid3d.y == 0) { bindex.y = 2; } + else if (pid3d.y == decomposition.y - 1) { bindex.y = 3; } + + if ((pid3d.z == 0) && (pid3d.z == decomposition.z - 1)) { bindex.z = 1; } + else if (pid3d.z == 0) { bindex.z = 2; } + else if (pid3d.z == decomposition.z - 1) { bindex.z = 3; } + + + if (bindex.x != 1) && (bindex.y != 1) && (bindex.z != 1) { + acDeviceGeneralBoundconds(device, stream, m1, m2, bindex); + } + acGridSynchronizeStream(stream); + + return AC_SUCCESS; +} + /* // Unused AcResult @@ -1834,32 +1869,8 @@ acGridIntegrate(const Stream stream, const AcReal dt) acSyncCommData(sideyz_data); #endif // MPI_COMM_ENABLED - - // Set outer boudaries after substep computation. - const int3 m1 = (int3){0, 0, 0}; - const int3 m2 = nn; - const int3 pid3d = getPid3D(pid, decomposition); - // If we are are a boundary element - int3 bindex = (int3){0, 0, 0}; - - // 1 is top edge, 2 bottom edge, 3 both edges, 0 is no boundary - if (pid3d.x == 0) { bindex.x = 1; } - else if (pid3d.x == decomposition.x - 1) { bindex.x = 2; } - else if ((pid3d.x == 0) && (pid3d.x == decomposition.x - 1)) { bindex.x = 3; } - - if (pid3d.y == 0) { bindex.y = 1; } - else if (pid3d.y == decomposition.y - 1) { bindex.y = 2; } - else if ((pid3d.y == 0) && (pid3d.y == decomposition.y - 1)) { bindex.y = 3; } - - if (pid3d.z == 0) { bindex.z = 1; } - else if (pid3d.z == decomposition.z - 1) { bindex.z = 2; } - else if ((pid3d.z == 0) && (pid3d.z == decomposition.z - 1)) { bindex.z = 3; } - - { - //TODO get bound_direction -> bindex - acDeviceGeneralBoundconds(device, stream, m1, m2, bindex); - } - acGridSynchronizeStream(stream); + // Invoke outer edge boundary conditions. + acGridGeneralBoundconds(device, stream) #if MPI_COMPUTE_ENABLED { // Front From 711cc4d350b1ed8c19286269a187e9fb9ead4073 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Tue, 29 Sep 2020 16:36:43 +0800 Subject: [PATCH 07/21] Moving code in wrong place. --- src/core/device.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/core/device.cc b/src/core/device.cc index cf20e85..d931925 100644 --- a/src/core/device.cc +++ b/src/core/device.cc @@ -1439,11 +1439,17 @@ AcResult acGridGeneralBoundconds(const Device device, const Stream stream) { // Non-periodic Boundary conditions + // Check the position in MPI frame + int nprocs, pid; + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + const uint3_64 decomposition = decompose(nprocs); + const int3 pid3d = getPid3D(pid, decomposition); // Set outer boudaries after substep computation. const int3 m1 = (int3){0, 0, 0}; const int3 m2 = grid.nn; - const int3 pid3d = getPid3D(pid, grid.decomposition); + const int3 pid3d = getPid3D(pid, decomposition); // If we are are a boundary element int3 bindex = (int3){0, 0, 0}; @@ -1708,12 +1714,6 @@ acGridIntegrate(const Stream stream, const AcReal dt) acGridLoadScalarUniform(stream, AC_dt, dt); acDeviceSynchronizeStream(device, stream); - // Check the position in MPI frame - int nprocs, pid; - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &pid); - const uint3_64 decomposition = decompose(nprocs); - const int3 pid3d = getPid3D(pid, decomposition); // Corners #if MPI_INCL_CORNERS From 8667e2b2ec2719c2423f9003eac6dfeac038598d Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Tue, 29 Sep 2020 18:46:41 +0800 Subject: [PATCH 08/21] Entering kernel level --- src/core/kernels/boundconds.cuh | 47 ++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/src/core/kernels/boundconds.cuh b/src/core/kernels/boundconds.cuh index 1fa2c4c..22d353b 100644 --- a/src/core/kernels/boundconds.cuh +++ b/src/core/kernels/boundconds.cuh @@ -1,5 +1,45 @@ #pragma once +static __global__ void +kernel_symmetric_boundconds(const int3 start, const int3 end, AcReal* vtxbuf, const int3 bindex) +{ + const int i_dst = start.x + threadIdx.x + blockIdx.x * blockDim.x; + const int j_dst = start.y + threadIdx.y + blockIdx.y * blockDim.y; + const int k_dst = start.z + threadIdx.z + blockIdx.z * blockDim.z; + + // If within the start-end range (this allows threadblock dims that are not + // divisible by end - start) + if (i_dst >= end.x || j_dst >= end.y || k_dst >= end.z) + return; + + // If destination index is inside the computational domain, return since + // the boundary conditions are only applied to the ghost zones + if (i_dst >= DCONST(AC_nx_min) && i_dst < DCONST(AC_nx_max) && j_dst >= DCONST(AC_ny_min) && + j_dst < DCONST(AC_ny_max) && k_dst >= DCONST(AC_nz_min) && k_dst < DCONST(AC_nz_max)) + return; + + // Find the source index + // Map to nx, ny, nz coordinates + int i_src, j_src, k_src; + int bsize = STENCIL_ORDER/(int) 2; + + if (bindex.x == 1) + { + if (i_dst < bsize) + { + i_src = 2*bsize - i_dst + } else if (i_dst >= DCONST(AC_nx_min) - bsize) + { + i_src = i_dst - 2*bsize BRAIN NOT WORKING CONTINUE TOMORROW + } + } + + const int src_idx = DEVICE_VTXBUF_IDX(i_src, j_src, k_src); + const int dst_idx = DEVICE_VTXBUF_IDX(i_dst, j_dst, k_dst); + vtxbuf[dst_idx] = vtxbuf[src_idx]; +} + + static __global__ void kernel_periodic_boundconds(const int3 start, const int3 end, AcReal* vtxbuf) { @@ -61,8 +101,9 @@ acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, const in return AC_SUCCESS; } -AcResult acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end, - AcReal* vtxbuf, const int bound_direction); +AcResult +acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end, + AcReal* vtxbuf, const int bindex); { const dim3 tpb(8, 2, 8); const dim3 bpg((unsigned int)ceil((end.x - start.x) / (float)tpb.x), @@ -71,7 +112,7 @@ AcResult acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, if (DCONST(AC_bype) == BOUNDCOND_SYM) { - kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bound_direction); + kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex); ERRCHK_CUDA_KERNEL(); } From 681035275d70e1fd0dd1342afb6514b19322ecd0 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Wed, 30 Sep 2020 14:38:27 +0800 Subject: [PATCH 09/21] Draft of symmetric/antisymmetric boundary condition. --- src/core/kernels/boundconds.cuh | 54 +++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/src/core/kernels/boundconds.cuh b/src/core/kernels/boundconds.cuh index 22d353b..4b35065 100644 --- a/src/core/kernels/boundconds.cuh +++ b/src/core/kernels/boundconds.cuh @@ -1,7 +1,7 @@ #pragma once static __global__ void -kernel_symmetric_boundconds(const int3 start, const int3 end, AcReal* vtxbuf, const int3 bindex) +kernel_symmetric_boundconds(const int3 start, const int3 end, AcReal* vtxbuf, const int3 bindex, const int sign) { const int i_dst = start.x + threadIdx.x + blockIdx.x * blockDim.x; const int j_dst = start.y + threadIdx.y + blockIdx.y * blockDim.y; @@ -20,23 +20,54 @@ kernel_symmetric_boundconds(const int3 start, const int3 end, AcReal* vtxbuf, co // Find the source index // Map to nx, ny, nz coordinates - int i_src, j_src, k_src; + int i_src, j_src, k_src, boundloc; int bsize = STENCIL_ORDER/(int) 2; - if (bindex.x == 1) + if (bindex.x != 0) { - if (i_dst < bsize) + // Pick up the mirroring value. + if ((i_dst < bsize) && ((bindex.x == 3) || (bindex.x ==1))) { - i_src = 2*bsize - i_dst - } else if (i_dst >= DCONST(AC_nx_min) - bsize) + boundloc = bsize; //Location of central border point. + i_src = 2*boundloc - i_dst; + } else if ((i_dst >= DCONST(AC_nx_min) - bsize) && ((bindex.x == 2) || (bindex.x ==1))) { - i_src = i_dst - 2*bsize BRAIN NOT WORKING CONTINUE TOMORROW + boundloc = DCONST(AC_nx_min) - bsize - 1; //Location of central border point. + i_src = 2*boundloc - i_dst; + } + } + if (bindex.y != 0) + { + // Pick up the mirroring value. + if ((j_dst < bsize) && ((bindex.y == 3) || (bindex.y ==1))) + { + boundloc = bsize; //Location of central border point. + j_src = 2*boundloc - j_dst; + } else if ((j_dst >= DCONST(AC_nx_min) - bsize) && ((bindex.y == 2) || (bindex.y ==1))) + { + boundloc = DCONST(AC_ny_min) - bsize - 1; //Location of central border point. + i_src = 2*boundloc - j_dst; + } + } + if (bindex.z != 0) + { + // Pick up the mirroring value. + if ((k_dst < bsize) && ((bindex.z == 3) || (bindex.z ==1))) + { + boundloc = bsize; //Location of central border point. + k_src = 2*boundloc - k_dst; + } else if ((i_dst >= DCONST(AC_nz_min) - bsize) && ((bindex.z == 2) || (bindex.z ==1))) + { + boundloc = DCONST(AC_nz_min) - bsize - 1; //Location of central border point. + k_src = 2*boundloc - k_dst; } } + + const int src_idx = DEVICE_VTXBUF_IDX(i_src, j_src, k_src); const int dst_idx = DEVICE_VTXBUF_IDX(i_dst, j_dst, k_dst); - vtxbuf[dst_idx] = vtxbuf[src_idx]; + vtxbuf[dst_idx] = sign*vtxbuf[src_idx]; // sign = 1 symmetric, sign = -1 antisymmetric } @@ -112,7 +143,12 @@ acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int if (DCONST(AC_bype) == BOUNDCOND_SYM) { - kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex); + kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex, 1); + ERRCHK_CUDA_KERNEL(); + } + else if (DCONST(AC_bype) == BOUNDCOND_ASYM) + { + kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex, -1); ERRCHK_CUDA_KERNEL(); } From cf6b21e3ab5b4d3ee6cd4d511e9b4c47ed56f972 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Tue, 6 Oct 2020 17:49:36 +0800 Subject: [PATCH 10/21] Created separate acGridIntegrateNonperiodic() from +acGridIntegrate() this is to avoid some potential issues with an upcoming merge by Oskar. --- src/core/device.cc | 215 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) diff --git a/src/core/device.cc b/src/core/device.cc index d931925..9f5a8e1 100644 --- a/src/core/device.cc +++ b/src/core/device.cc @@ -1714,6 +1714,220 @@ acGridIntegrate(const Stream stream, const AcReal dt) acGridLoadScalarUniform(stream, AC_dt, dt); acDeviceSynchronizeStream(device, stream); +// Corners +#if MPI_INCL_CORNERS + // Do not rm: required for corners + const int3 corner_b0s[] = { + (int3){0, 0, 0}, + (int3){NGHOST + nn.x, 0, 0}, + (int3){0, NGHOST + nn.y, 0}, + (int3){0, 0, NGHOST + nn.z}, + + (int3){NGHOST + nn.x, NGHOST + nn.y, 0}, + (int3){NGHOST + nn.x, 0, NGHOST + nn.z}, + (int3){0, NGHOST + nn.y, NGHOST + nn.z}, + (int3){NGHOST + nn.x, NGHOST + nn.y, NGHOST + nn.z}, + }; +#endif // MPI_INCL_CORNERS + + // Edges X + const int3 edgex_b0s[] = { + (int3){NGHOST, 0, 0}, + (int3){NGHOST, NGHOST + nn.y, 0}, + + (int3){NGHOST, 0, NGHOST + nn.z}, + (int3){NGHOST, NGHOST + nn.y, NGHOST + nn.z}, + }; + + // Edges Y + const int3 edgey_b0s[] = { + (int3){0, NGHOST, 0}, + (int3){NGHOST + nn.x, NGHOST, 0}, + + (int3){0, NGHOST, NGHOST + nn.z}, + (int3){NGHOST + nn.x, NGHOST, NGHOST + nn.z}, + }; + + // Edges Z + const int3 edgez_b0s[] = { + (int3){0, 0, NGHOST}, + (int3){NGHOST + nn.x, 0, NGHOST}, + + (int3){0, NGHOST + nn.y, NGHOST}, + (int3){NGHOST + nn.x, NGHOST + nn.y, NGHOST}, + }; + + // Sides XY + const int3 sidexy_b0s[] = { + (int3){NGHOST, NGHOST, 0}, // + (int3){NGHOST, NGHOST, NGHOST + nn.z}, // + }; + + // Sides XZ + const int3 sidexz_b0s[] = { + (int3){NGHOST, 0, NGHOST}, // + (int3){NGHOST, NGHOST + nn.y, NGHOST}, // + }; + + // Sides YZ + const int3 sideyz_b0s[] = { + (int3){0, NGHOST, NGHOST}, // + (int3){NGHOST + nn.x, NGHOST, NGHOST}, // + }; + + for (int isubstep = 0; isubstep < 3; ++isubstep) { + +#if MPI_COMM_ENABLED +#if MPI_INCL_CORNERS + acPackCommData(device, corner_b0s, &corner_data); // Do not rm: required for corners +#endif // MPI_INCL_CORNERS + acPackCommData(device, edgex_b0s, &edgex_data); + acPackCommData(device, edgey_b0s, &edgey_data); + acPackCommData(device, edgez_b0s, &edgez_data); + acPackCommData(device, sidexy_b0s, &sidexy_data); + acPackCommData(device, sidexz_b0s, &sidexz_data); + acPackCommData(device, sideyz_b0s, &sideyz_data); +#endif + +#if MPI_COMM_ENABLED + MPI_Barrier(MPI_COMM_WORLD); + +#if MPI_GPUDIRECT_DISABLED +#if MPI_INCL_CORNERS + acTransferCommDataToHost(device, &corner_data); // Do not rm: required for corners +#endif // MPI_INCL_CORNERS + acTransferCommDataToHost(device, &edgex_data); + acTransferCommDataToHost(device, &edgey_data); + acTransferCommDataToHost(device, &edgez_data); + acTransferCommDataToHost(device, &sidexy_data); + acTransferCommDataToHost(device, &sidexz_data); + acTransferCommDataToHost(device, &sideyz_data); +#endif +#if MPI_INCL_CORNERS + acTransferCommData(device, corner_b0s, &corner_data); // Do not rm: required for corners +#endif // MPI_INCL_CORNERS + acTransferCommData(device, edgex_b0s, &edgex_data); + acTransferCommData(device, edgey_b0s, &edgey_data); + acTransferCommData(device, edgez_b0s, &edgez_data); + acTransferCommData(device, sidexy_b0s, &sidexy_data); + acTransferCommData(device, sidexz_b0s, &sidexz_data); + acTransferCommData(device, sideyz_b0s, &sideyz_data); +#endif // MPI_COMM_ENABLED + +#if MPI_COMPUTE_ENABLED + //////////// INNER INTEGRATION ////////////// + { + const int3 m1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST}; + const int3 m2 = nn; + acKernelIntegrateSubstep(device->streams[STREAM_16], isubstep, m1, m2, device->vba); + } +//////////////////////////////////////////// +#endif // MPI_COMPUTE_ENABLED + +#if MPI_COMM_ENABLED +#if MPI_INCL_CORNERS + acTransferCommDataWait(corner_data); // Do not rm: required for corners +#endif // MPI_INCL_CORNERS + acTransferCommDataWait(edgex_data); + acTransferCommDataWait(edgey_data); + acTransferCommDataWait(edgez_data); + acTransferCommDataWait(sidexy_data); + acTransferCommDataWait(sidexz_data); + acTransferCommDataWait(sideyz_data); + +#if MPI_INCL_CORNERS + acUnpinCommData(device, &corner_data); // Do not rm: required for corners +#endif // MPI_INCL_CORNERS + acUnpinCommData(device, &edgex_data); + acUnpinCommData(device, &edgey_data); + acUnpinCommData(device, &edgez_data); + acUnpinCommData(device, &sidexy_data); + acUnpinCommData(device, &sidexz_data); + acUnpinCommData(device, &sideyz_data); + +#if MPI_INCL_CORNERS + acUnpackCommData(device, corner_b0s, &corner_data); +#endif // MPI_INCL_CORNERS + acUnpackCommData(device, edgex_b0s, &edgex_data); + acUnpackCommData(device, edgey_b0s, &edgey_data); + acUnpackCommData(device, edgez_b0s, &edgez_data); + acUnpackCommData(device, sidexy_b0s, &sidexy_data); + acUnpackCommData(device, sidexz_b0s, &sidexz_data); + acUnpackCommData(device, sideyz_b0s, &sideyz_data); +//////////// OUTER INTEGRATION ////////////// + +// Wait for unpacking +#if MPI_INCL_CORNERS + acSyncCommData(corner_data); // Do not rm: required for corners +#endif // MPI_INCL_CORNERS + acSyncCommData(edgex_data); + acSyncCommData(edgey_data); + acSyncCommData(edgez_data); + acSyncCommData(sidexy_data); + acSyncCommData(sidexz_data); + acSyncCommData(sideyz_data); +#endif // MPI_COMM_ENABLED +#if MPI_COMPUTE_ENABLED + { // Front + const int3 m1 = (int3){NGHOST, NGHOST, NGHOST}; + const int3 m2 = m1 + (int3){nn.x, nn.y, NGHOST}; + acKernelIntegrateSubstep(device->streams[STREAM_0], isubstep, m1, m2, device->vba); + } + { // Back + const int3 m1 = (int3){NGHOST, NGHOST, nn.z}; + const int3 m2 = m1 + (int3){nn.x, nn.y, NGHOST}; + acKernelIntegrateSubstep(device->streams[STREAM_1], isubstep, m1, m2, device->vba); + } + { // Bottom + const int3 m1 = (int3){NGHOST, NGHOST, 2 * NGHOST}; + const int3 m2 = m1 + (int3){nn.x, NGHOST, nn.z - 2 * NGHOST}; + acKernelIntegrateSubstep(device->streams[STREAM_2], isubstep, m1, m2, device->vba); + } + { // Top + const int3 m1 = (int3){NGHOST, nn.y, 2 * NGHOST}; + const int3 m2 = m1 + (int3){nn.x, NGHOST, nn.z - 2 * NGHOST}; + acKernelIntegrateSubstep(device->streams[STREAM_3], isubstep, m1, m2, device->vba); + } + { // Left + const int3 m1 = (int3){NGHOST, 2 * NGHOST, 2 * NGHOST}; + const int3 m2 = m1 + (int3){NGHOST, nn.y - 2 * NGHOST, nn.z - 2 * NGHOST}; + acKernelIntegrateSubstep(device->streams[STREAM_4], isubstep, m1, m2, device->vba); + } + { // Right + const int3 m1 = (int3){nn.x, 2 * NGHOST, 2 * NGHOST}; + const int3 m2 = m1 + (int3){NGHOST, nn.y - 2 * NGHOST, nn.z - 2 * NGHOST}; + acKernelIntegrateSubstep(device->streams[STREAM_5], isubstep, m1, m2, device->vba); + } +#endif // MPI_COMPUTE_ENABLED + acDeviceSwapBuffers(device); + acDeviceSynchronizeStream(device, STREAM_ALL); // Wait until inner and outer done + //////////////////////////////////////////// + } + + return AC_SUCCESS; +} + +AcResult +acGridIntegrateNonperiodic(const Stream stream, const AcReal dt) +{ + ERRCHK(grid.initialized); + acGridSynchronizeStream(stream); + + const Device device = grid.device; + const int3 nn = grid.nn; +#if MPI_INCL_CORNERS + CommData corner_data = grid.corner_data; // Do not rm: required for corners +#endif // MPI_INCL_CORNERS + CommData edgex_data = grid.edgex_data; + CommData edgey_data = grid.edgey_data; + CommData edgez_data = grid.edgez_data; + CommData sidexy_data = grid.sidexy_data; + CommData sidexz_data = grid.sidexz_data; + CommData sideyz_data = grid.sideyz_data; + + acGridLoadScalarUniform(stream, AC_dt, dt); + acDeviceSynchronizeStream(device, stream); + // Corners #if MPI_INCL_CORNERS @@ -1916,6 +2130,7 @@ acGridIntegrate(const Stream stream, const AcReal dt) return AC_SUCCESS; } + AcResult acGridPeriodicBoundconds(const Stream stream) { From d060e67bec5e3912240439349e16565e8c207677 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Tue, 6 Oct 2020 18:16:24 +0800 Subject: [PATCH 11/21] Includes also --- include/astaroth.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/astaroth.h b/include/astaroth.h index d32b367..8db7608 100644 --- a/include/astaroth.h +++ b/include/astaroth.h @@ -303,9 +303,15 @@ AcResult acGridStoreMesh(const Stream stream, AcMesh* host_mesh); /** */ AcResult acGridIntegrate(const Stream stream, const AcReal dt); +/** */ +AcResult acGridIntegrateNonperiodic(const Stream stream, const AcReal dt); + /** */ AcResult acGridPeriodicBoundconds(const Stream stream); +/** */ +AcResult acGridGeneralBoundconds(const Device device, const Stream stream); + /** TODO */ AcResult acGridReduceScal(const Stream stream, const ReductionType rtype, const VertexBufferHandle vtxbuf_handle, AcReal* result); @@ -562,6 +568,16 @@ AcResult acDevicePeriodicBoundcondStep(const Device device, const Stream stream, AcResult acDevicePeriodicBoundconds(const Device device, const Stream stream, const int3 start, const int3 end); +/** */ +AcResult acDeviceGeneralBoundcondStep(const Device device, const Stream stream, + const VertexBufferHandle vtxbuf_handle, const int3 start, + const int3 end, const int3 bindex); + +/** */ +AcResult acDeviceGeneralBoundconds(const Device device, const Stream stream, const int3 start, + const int3 end, const int3 bindex); + + /** */ AcResult acDeviceReduceScal(const Device device, const Stream stream, const ReductionType rtype, const VertexBufferHandle vtxbuf_handle, AcReal* result); From cb15668f2d364be854a0d5bd8a769b82e429f482 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Fri, 20 Nov 2020 11:58:15 +0800 Subject: [PATCH 12/21] Figuring out compilations. --- acc/mhd_solver/stencil_kernel.ac | 5 ++++- include/astaroth.h | 6 ++++++ src/core/device.cc | 4 ++++ src/core/kernels/boundconds.cuh | 10 +++++++--- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/acc/mhd_solver/stencil_kernel.ac b/acc/mhd_solver/stencil_kernel.ac index e0efa94..57097f8 100644 --- a/acc/mhd_solver/stencil_kernel.ac +++ b/acc/mhd_solver/stencil_kernel.ac @@ -20,9 +20,12 @@ uniform int AC_max_steps; uniform int AC_save_steps; uniform int AC_bin_steps; -uniform int AC_bc_type; uniform int AC_start_step; +// In3 params +uniform int3 AC_bc_type_bot; +uniform int3 AC_bc_type_top; + // Real params uniform Scalar AC_dt; uniform Scalar AC_max_time; diff --git a/include/astaroth.h b/include/astaroth.h index 8db7608..ace32c0 100644 --- a/include/astaroth.h +++ b/include/astaroth.h @@ -304,7 +304,13 @@ AcResult acGridStoreMesh(const Stream stream, AcMesh* host_mesh); AcResult acGridIntegrate(const Stream stream, const AcReal dt); /** */ +/* MV: Commented out for a while, but save for the future when standalone_MPI + works with periodic boundary conditions. +AcResult +acGridIntegrateNonperiodic(const Stream stream, const AcReal dt) + AcResult acGridIntegrateNonperiodic(const Stream stream, const AcReal dt); +*/ /** */ AcResult acGridPeriodicBoundconds(const Stream stream); diff --git a/src/core/device.cc b/src/core/device.cc index 9f5a8e1..ad46148 100644 --- a/src/core/device.cc +++ b/src/core/device.cc @@ -1907,6 +1907,8 @@ acGridIntegrate(const Stream stream, const AcReal dt) return AC_SUCCESS; } +/* MV: Commented out for a while, but save for the future when standalone_MPI + works with periodic boundary conditions. AcResult acGridIntegrateNonperiodic(const Stream stream, const AcReal dt) { @@ -2130,6 +2132,8 @@ acGridIntegrateNonperiodic(const Stream stream, const AcReal dt) return AC_SUCCESS; } +*/ + AcResult acGridPeriodicBoundconds(const Stream stream) diff --git a/src/core/kernels/boundconds.cuh b/src/core/kernels/boundconds.cuh index 4b35065..01e89fa 100644 --- a/src/core/kernels/boundconds.cuh +++ b/src/core/kernels/boundconds.cuh @@ -1,5 +1,9 @@ #pragma once +// Temporary defines untils we can figure out smarter swithches. +#define BOUNDCOND_SYM 1 +#define BOUNDCOND_ASYM 2 + static __global__ void kernel_symmetric_boundconds(const int3 start, const int3 end, AcReal* vtxbuf, const int3 bindex, const int sign) { @@ -134,19 +138,19 @@ acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, const in AcResult acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end, - AcReal* vtxbuf, const int bindex); + AcReal* vtxbuf, const int3 bindex) { const dim3 tpb(8, 2, 8); const dim3 bpg((unsigned int)ceil((end.x - start.x) / (float)tpb.x), (unsigned int)ceil((end.y - start.y) / (float)tpb.y), (unsigned int)ceil((end.z - start.z) / (float)tpb.z)); - if (DCONST(AC_bype) == BOUNDCOND_SYM) + if (DCONST(AC_bc_type) == BOUNDCOND_SYM) { kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex, 1); ERRCHK_CUDA_KERNEL(); } - else if (DCONST(AC_bype) == BOUNDCOND_ASYM) + else if (DCONST(AC_bc_type) == BOUNDCOND_ASYM) { kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex, -1); ERRCHK_CUDA_KERNEL(); From efd3cc40cd98d05da6244b6ba5150b7d7646a43f Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Fri, 20 Nov 2020 14:11:14 +0800 Subject: [PATCH 13/21] Compiles without the API funtion call. --- include/astaroth.h | 9 +++++++-- src/core/device.cc | 11 +++++++---- src/core/kernels/boundconds.cuh | 13 ++++++------- src/core/kernels/kernels.h | 3 ++- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/include/astaroth.h b/include/astaroth.h index ace32c0..76f5174 100644 --- a/include/astaroth.h +++ b/include/astaroth.h @@ -49,6 +49,11 @@ typedef struct { typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult; +// Neming the associated number of the boundary condition types +typedef enum { AC_BOUNDCOND_PERIODIC = 0, + AC_BOUNDCOND_SYMMETRIC = 1, + AC_BOUNDCOND_ANTISYMMETRIC = 2 } AcBoundcond; + #define AC_GEN_ID(X) X, typedef enum { AC_FOR_RTYPES(AC_GEN_ID) // @@ -577,11 +582,11 @@ AcResult acDevicePeriodicBoundconds(const Device device, const Stream stream, co /** */ AcResult acDeviceGeneralBoundcondStep(const Device device, const Stream stream, const VertexBufferHandle vtxbuf_handle, const int3 start, - const int3 end, const int3 bindex); + const int3 end, const AcMeshInfo config, const int3 bindex); /** */ AcResult acDeviceGeneralBoundconds(const Device device, const Stream stream, const int3 start, - const int3 end, const int3 bindex); + const int3 end, const AcMeshInfo config, const int3 bindex); /** */ diff --git a/src/core/device.cc b/src/core/device.cc index ad46148..9130f34 100644 --- a/src/core/device.cc +++ b/src/core/device.cc @@ -436,19 +436,19 @@ acDevicePeriodicBoundconds(const Device device, const Stream stream, const int3 AcResult acDeviceGeneralBoundcondStep(const Device device, const Stream stream, const VertexBufferHandle vtxbuf_handle, const int3 start, - const int3 end, const int3 bindex) + const int3 end, const AcMeshInfo config, const int3 bindex) { cudaSetDevice(device->id); return acKernelGeneralBoundconds(device->streams[stream], start, end, - device->vba.in[vtxbuf_handle], bindex); + device->vba.in[vtxbuf_handle], config, bindex); } AcResult acDeviceGeneralBoundconds(const Device device, const Stream stream, const int3 start, - const int3 end, const int3 bindex) + const int3 end, const AcMeshInfo config, const int3 bindex) { for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { - acDeviceGeneralBoundcondStep(device, stream, (VertexBufferHandle)i, start, end, bindex); + acDeviceGeneralBoundcondStep(device, stream, (VertexBufferHandle)i, start, end, config, bindex); } return AC_SUCCESS; } @@ -1435,6 +1435,8 @@ acGridStoreMesh(const Stream stream, AcMesh* host_mesh) return AC_SUCCESS; } +/* MV: Commented out for a while, but save for the future when standalone_MPI + works with periodic boundary conditions. AcResult acGridGeneralBoundconds(const Device device, const Stream stream) { @@ -1475,6 +1477,7 @@ acGridGeneralBoundconds(const Device device, const Stream stream) return AC_SUCCESS; } +*/ /* // Unused diff --git a/src/core/kernels/boundconds.cuh b/src/core/kernels/boundconds.cuh index 01e89fa..64b4a1e 100644 --- a/src/core/kernels/boundconds.cuh +++ b/src/core/kernels/boundconds.cuh @@ -1,9 +1,5 @@ #pragma once -// Temporary defines untils we can figure out smarter swithches. -#define BOUNDCOND_SYM 1 -#define BOUNDCOND_ASYM 2 - static __global__ void kernel_symmetric_boundconds(const int3 start, const int3 end, AcReal* vtxbuf, const int3 bindex, const int sign) { @@ -138,19 +134,22 @@ acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, const in AcResult acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end, - AcReal* vtxbuf, const int3 bindex) + AcReal* vtxbuf, const AcMeshInfo config, const int3 bindex) { const dim3 tpb(8, 2, 8); const dim3 bpg((unsigned int)ceil((end.x - start.x) / (float)tpb.x), (unsigned int)ceil((end.y - start.y) / (float)tpb.y), (unsigned int)ceil((end.z - start.z) / (float)tpb.z)); - if (DCONST(AC_bc_type) == BOUNDCOND_SYM) + int3 bc_top = config.int3_params[AC_bc_type_top]; + int3 bc_bot = config.int3_params[AC_bc_type_bot]; + + if (bc_top.x == AC_BOUNDCOND_SYMMETRIC) { kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex, 1); ERRCHK_CUDA_KERNEL(); } - else if (DCONST(AC_bc_type) == BOUNDCOND_ASYM) + else if (bc_bot.x == AC_BOUNDCOND_ANTISYMMETRIC) { kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex, -1); ERRCHK_CUDA_KERNEL(); diff --git a/src/core/kernels/kernels.h b/src/core/kernels/kernels.h index 980bafa..bc94185 100644 --- a/src/core/kernels/kernels.h +++ b/src/core/kernels/kernels.h @@ -45,7 +45,8 @@ AcResult acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, AcReal* vtxbuf); /** */ AcResult acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end, - AcReal* vtxbuf, const int bound_direction); + AcReal* vtxbuf, const AcMeshInfo config, const int3 bindex); + /** */ AcResult acKernelDummy(void); From 8732480cd99fc6bc253fc3d06d40a91a839391e0 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Fri, 20 Nov 2020 15:37:37 +0800 Subject: [PATCH 14/21] Added acIntegrateGBC() --- include/astaroth.h | 5 ++ src/core/astaroth.cc | 13 +++++ src/core/node.cc | 112 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 124 insertions(+), 6 deletions(-) diff --git a/include/astaroth.h b/include/astaroth.h index 76f5174..ac20d34 100644 --- a/include/astaroth.h +++ b/include/astaroth.h @@ -232,6 +232,11 @@ AcResult acStore(AcMesh* host_mesh); * substep and the user is responsible for calling acBoundcondStep before reading the data. */ AcResult acIntegrate(const AcReal dt); +/** Performs Runge-Kutta 3 integration. Note: Boundary conditions are not applied after the final + * substep and the user is responsible for calling acBoundcondStep before reading the data. + * Has customizable boundary conditions. */ +AcResult acIntegrateGBC(const AcMeshInfo config, const AcReal dt); + /** Applies periodic boundary conditions for the Mesh distributed among the devices visible to * the caller*/ AcResult acBoundcondStep(void); diff --git a/src/core/astaroth.cc b/src/core/astaroth.cc index 1c8d0f7..c565699 100644 --- a/src/core/astaroth.cc +++ b/src/core/astaroth.cc @@ -86,6 +86,13 @@ acIntegrate(const AcReal dt) return acNodeIntegrate(nodes[0], dt); } +AcResult +acIntegrateGBC(const AcMeshInfo config, const AcReal dt) +{ + return acNodeIntegrateGBC(nodes[0], config, dt); +} + + AcResult acIntegrateStep(const int isubstep, const AcReal dt) { @@ -109,6 +116,12 @@ acBoundcondStep(void) return acNodePeriodicBoundconds(nodes[0], STREAM_DEFAULT); } +AcResult +acBoundcondStepGBC(void) //TODO ADAPT +{ + return acNodeGeneralBoundconds(nodes[0], STREAM_DEFAULT); +} + AcReal acReduceScal(const ReductionType rtype, const VertexBufferHandle vtxbuf_handle) { diff --git a/src/core/node.cc b/src/core/node.cc index 552cf3f..58acd25 100644 --- a/src/core/node.cc +++ b/src/core/node.cc @@ -656,6 +656,28 @@ local_boundcondstep(const Node node, const Stream stream, const VertexBufferHand return AC_SUCCESS; } +static AcResult +local_boundcondstep_GBC(const Node node, const Stream stream, const VertexBufferHandle vtxbuf) //TODO ADAPT +{ + acNodeSynchronizeStream(node, stream); + + if (node->num_devices > 1) { + // Local boundary conditions + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { + const int3 d0 = (int3){0, 0, NGHOST}; // DECOMPOSITION OFFSET HERE + const int3 d1 = (int3){node->subgrid.m.x, node->subgrid.m.y, d0.z + node->subgrid.n.z}; + acDeviceGeneralBoundcondStep(node->devices[i], stream, vtxbuf, d0, d1); + } + } + else { + acDeviceGeneralBoundcondStep(node->devices[0], stream, vtxbuf, (int3){0, 0, 0}, + node->subgrid.m); + } + return AC_SUCCESS; +} + + static AcResult global_boundcondstep(const Node node, const Stream stream, const VertexBufferHandle vtxbuf_handle) { @@ -768,6 +790,85 @@ acNodeIntegrate(const Node node, const AcReal dt) return AC_SUCCESS; } +AcResult +acNodeIntegrateGBG(const Node node, const AcReal dt) //TODO ADAPT +{ + acNodeSynchronizeStream(node, STREAM_ALL); + // xxx|OOO OOOOOOOOO OOO|xxx + // ^ ^ ^ ^ + // n0 n1 n2 n3 + // const int3 n0 = (int3){NGHOST, NGHOST, NGHOST}; + // const int3 n1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST}; + // const int3 n2 = node->grid.n; + // const int3 n3 = n0 + node->grid.n; + + for (int isubstep = 0; isubstep < 3; ++isubstep) { + acNodeSynchronizeStream(node, STREAM_ALL); + for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) { + local_boundcondstep_GBC(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf); + } + acNodeSynchronizeStream(node, STREAM_ALL); + + // Inner inner + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { + const int3 m1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST}; + const int3 m2 = node->subgrid.n; + acDeviceIntegrateSubstep(node->devices[i], STREAM_16, isubstep, m1, m2, dt); + } + + for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) { + acNodeSynchronizeVertexBuffer(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf); + global_boundcondstep(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf); + } + for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) { + acNodeSynchronizeStream(node, (Stream)vtxbuf); + } + + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { // Front + const int3 m1 = (int3){NGHOST, NGHOST, NGHOST}; + const int3 m2 = m1 + (int3){node->subgrid.n.x, node->subgrid.n.y, NGHOST}; + acDeviceIntegrateSubstep(node->devices[i], STREAM_0, isubstep, m1, m2, dt); + } + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { // Back + const int3 m1 = (int3){NGHOST, NGHOST, node->subgrid.n.z}; + const int3 m2 = m1 + (int3){node->subgrid.n.x, node->subgrid.n.y, NGHOST}; + acDeviceIntegrateSubstep(node->devices[i], STREAM_1, isubstep, m1, m2, dt); + } + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { // Bottom + const int3 m1 = (int3){NGHOST, NGHOST, 2 * NGHOST}; + const int3 m2 = m1 + (int3){node->subgrid.n.x, NGHOST, node->subgrid.n.z - 2 * NGHOST}; + acDeviceIntegrateSubstep(node->devices[i], STREAM_2, isubstep, m1, m2, dt); + } + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { // Top + const int3 m1 = (int3){NGHOST, node->subgrid.n.y, 2 * NGHOST}; + const int3 m2 = m1 + (int3){node->subgrid.n.x, NGHOST, node->subgrid.n.z - 2 * NGHOST}; + acDeviceIntegrateSubstep(node->devices[i], STREAM_3, isubstep, m1, m2, dt); + } + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { // Left + const int3 m1 = (int3){NGHOST, 2 * NGHOST, 2 * NGHOST}; + const int3 m2 = m1 + (int3){NGHOST, node->subgrid.n.y - 2 * NGHOST, + node->subgrid.n.z - 2 * NGHOST}; + acDeviceIntegrateSubstep(node->devices[i], STREAM_4, isubstep, m1, m2, dt); + } + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { // Right + const int3 m1 = (int3){node->subgrid.n.x, 2 * NGHOST, 2 * NGHOST}; + const int3 m2 = m1 + (int3){NGHOST, node->subgrid.n.y - 2 * NGHOST, + node->subgrid.n.z - 2 * NGHOST}; + acDeviceIntegrateSubstep(node->devices[i], STREAM_5, isubstep, m1, m2, dt); + } + acNodeSwapBuffers(node); + } + acNodeSynchronizeStream(node, STREAM_ALL); + return AC_SUCCESS; +} + AcResult acNodePeriodicBoundcondStep(const Node node, const Stream stream, const VertexBufferHandle vtxbuf_handle) @@ -783,21 +884,20 @@ acNodePeriodicBoundcondStep(const Node node, const Stream stream, } AcResult -acNodeGeneralBoundcondStep(const Node node, const Stream stream, +acNodeGeneralBoundcondStep(const Node node, const Stream stream, //TODO ADAPT const VertexBufferHandle vtxbuf_handle) { - local_boundcondstep(node, stream, vtxbuf_handle); + local_boundcondstep_GBC(node, stream, vtxbuf_handle); acNodeSynchronizeVertexBuffer(node, stream, vtxbuf_handle); - //MV: assume that global step communication is handles by acNodePeriodicBoundcondStep as above. - //global_boundcondstep(node, stream, vtxbuf_handle); + global_boundcondstep(node, stream, vtxbuf_handle); return AC_SUCCESS; } AcResult -acNodePeriodicBoundconds(const Node node, const Stream stream) +acNodePeriodicBoundconds(const Node node, const Stream stream) //TODO ADAPT { for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { acNodePeriodicBoundcondStep(node, stream, (VertexBufferHandle)i); @@ -806,7 +906,7 @@ acNodePeriodicBoundconds(const Node node, const Stream stream) } AcResult -acNodeGeneralBoundconds(const Node node, const Stream stream) +acNodeGeneralBoundconds(const Node node, const Stream stream) //TODO ADAPT { for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { acNodeGeneralBoundcondStep(node, stream, (VertexBufferHandle)i); From f87d65408cd91c69c9c49b538e8dd36c80a89580 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Fri, 20 Nov 2020 15:42:54 +0800 Subject: [PATCH 15/21] Added acBoundcondStepGBC() for flexible boundary conditions. --- include/astaroth.h | 4 ++++ src/core/astaroth.cc | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/astaroth.h b/include/astaroth.h index ac20d34..2f66354 100644 --- a/include/astaroth.h +++ b/include/astaroth.h @@ -241,6 +241,10 @@ AcResult acIntegrateGBC(const AcMeshInfo config, const AcReal dt); * the caller*/ AcResult acBoundcondStep(void); +/** Applies general outer boundary conditions for the Mesh distributed among the devices visible to + * the caller*/ +AcResult acBoundcondStepGBC(const AcMeshInfo config); + /** Does a scalar reduction with the data stored in some vertex buffer */ AcReal acReduceScal(const ReductionType rtype, const VertexBufferHandle vtxbuf_handle); diff --git a/src/core/astaroth.cc b/src/core/astaroth.cc index c565699..086e512 100644 --- a/src/core/astaroth.cc +++ b/src/core/astaroth.cc @@ -117,9 +117,9 @@ acBoundcondStep(void) } AcResult -acBoundcondStepGBC(void) //TODO ADAPT +acBoundcondStepGBC(const AcMeshInfo config) { - return acNodeGeneralBoundconds(nodes[0], STREAM_DEFAULT); + return acNodeGeneralBoundconds(nodes[0], STREAM_DEFAULT, config); } AcReal From 06bacf298ae33dd3a0c034f85bc974a77f6783d8 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Fri, 20 Nov 2020 16:07:33 +0800 Subject: [PATCH 16/21] Created local_boundcondstep_GBC() --- src/core/node.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/core/node.cc b/src/core/node.cc index 58acd25..423bbd1 100644 --- a/src/core/node.cc +++ b/src/core/node.cc @@ -657,22 +657,24 @@ local_boundcondstep(const Node node, const Stream stream, const VertexBufferHand } static AcResult -local_boundcondstep_GBC(const Node node, const Stream stream, const VertexBufferHandle vtxbuf) //TODO ADAPT +local_boundcondstep_GBC(const Node node, const Stream stream, const VertexBufferHandle vtxbuf, const MeshInfo config) { acNodeSynchronizeStream(node, stream); + int3 bindex = {-1, -1, -1} //Dummy for node level. Relevant only for MPI. + if (node->num_devices > 1) { // Local boundary conditions // #pragma omp parallel for for (int i = 0; i < node->num_devices; ++i) { const int3 d0 = (int3){0, 0, NGHOST}; // DECOMPOSITION OFFSET HERE const int3 d1 = (int3){node->subgrid.m.x, node->subgrid.m.y, d0.z + node->subgrid.n.z}; - acDeviceGeneralBoundcondStep(node->devices[i], stream, vtxbuf, d0, d1); + acDeviceGeneralBoundcondStep(node->devices[i], stream, vtxbuf, d0, d1, config, bindex); } } else { acDeviceGeneralBoundcondStep(node->devices[0], stream, vtxbuf, (int3){0, 0, 0}, - node->subgrid.m); + node->subgrid.m, config, bindex); } return AC_SUCCESS; } From 4add619e2fcfaaf3e19798a24c4b9600f0e14ded Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Fri, 20 Nov 2020 16:22:47 +0800 Subject: [PATCH 17/21] Calling boundconds compiles again. --- include/astaroth.h | 10 ++++++++++ src/core/node.cc | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/include/astaroth.h b/include/astaroth.h index 2f66354..512ecd1 100644 --- a/include/astaroth.h +++ b/include/astaroth.h @@ -453,6 +453,9 @@ AcResult acNodeIntegrateSubstep(const Node node, const Stream stream, const int /** */ AcResult acNodeIntegrate(const Node node, const AcReal dt); +/** */ +AcResult acNodeIntegrateGBC(const Node node, const AcMeshInfo config, const AcReal dt); + /** */ AcResult acNodePeriodicBoundcondStep(const Node node, const Stream stream, const VertexBufferHandle vtxbuf_handle); @@ -460,6 +463,13 @@ AcResult acNodePeriodicBoundcondStep(const Node node, const Stream stream, /** */ AcResult acNodePeriodicBoundconds(const Node node, const Stream stream); +/** */ +AcResult acNodeGeneralBoundcondStep(const Node node, const Stream stream, + const VertexBufferHandle vtxbuf_handle, const AcMeshInfo config); + +/** */ +AcResult acNodeGeneralBoundconds(const Node node, const Stream stream, const AcMeshInfo config); + /** */ AcResult acNodeReduceScal(const Node node, const Stream stream, const ReductionType rtype, const VertexBufferHandle vtxbuf_handle, AcReal* result); diff --git a/src/core/node.cc b/src/core/node.cc index 423bbd1..fdc4f49 100644 --- a/src/core/node.cc +++ b/src/core/node.cc @@ -657,11 +657,11 @@ local_boundcondstep(const Node node, const Stream stream, const VertexBufferHand } static AcResult -local_boundcondstep_GBC(const Node node, const Stream stream, const VertexBufferHandle vtxbuf, const MeshInfo config) +local_boundcondstep_GBC(const Node node, const Stream stream, const VertexBufferHandle vtxbuf, const AcMeshInfo config) { acNodeSynchronizeStream(node, stream); - int3 bindex = {-1, -1, -1} //Dummy for node level. Relevant only for MPI. + int3 bindex = {-1, -1, -1}; //Dummy for node level. Relevant only for MPI. if (node->num_devices > 1) { // Local boundary conditions @@ -793,7 +793,7 @@ acNodeIntegrate(const Node node, const AcReal dt) } AcResult -acNodeIntegrateGBG(const Node node, const AcReal dt) //TODO ADAPT +acNodeIntegrateGBC(const Node node, const AcMeshInfo config, const AcReal dt) { acNodeSynchronizeStream(node, STREAM_ALL); // xxx|OOO OOOOOOOOO OOO|xxx @@ -807,7 +807,7 @@ acNodeIntegrateGBG(const Node node, const AcReal dt) //TODO ADAPT for (int isubstep = 0; isubstep < 3; ++isubstep) { acNodeSynchronizeStream(node, STREAM_ALL); for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) { - local_boundcondstep_GBC(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf); + local_boundcondstep_GBC(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf, config); } acNodeSynchronizeStream(node, STREAM_ALL); @@ -886,10 +886,10 @@ acNodePeriodicBoundcondStep(const Node node, const Stream stream, } AcResult -acNodeGeneralBoundcondStep(const Node node, const Stream stream, //TODO ADAPT - const VertexBufferHandle vtxbuf_handle) +acNodeGeneralBoundcondStep(const Node node, const Stream stream, + const VertexBufferHandle vtxbuf_handle, const AcMeshInfo config) { - local_boundcondstep_GBC(node, stream, vtxbuf_handle); + local_boundcondstep_GBC(node, stream, vtxbuf_handle, config); acNodeSynchronizeVertexBuffer(node, stream, vtxbuf_handle); global_boundcondstep(node, stream, vtxbuf_handle); @@ -899,7 +899,7 @@ acNodeGeneralBoundcondStep(const Node node, const Stream stream, //TODO ADAPT } AcResult -acNodePeriodicBoundconds(const Node node, const Stream stream) //TODO ADAPT +acNodePeriodicBoundconds(const Node node, const Stream stream) { for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { acNodePeriodicBoundcondStep(node, stream, (VertexBufferHandle)i); @@ -908,10 +908,10 @@ acNodePeriodicBoundconds(const Node node, const Stream stream) //TODO ADAPT } AcResult -acNodeGeneralBoundconds(const Node node, const Stream stream) //TODO ADAPT +acNodeGeneralBoundconds(const Node node, const Stream stream, const AcMeshInfo config) { for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { - acNodeGeneralBoundcondStep(node, stream, (VertexBufferHandle)i); + acNodeGeneralBoundcondStep(node, stream, (VertexBufferHandle)i, config); } return AC_SUCCESS; } From 288693fab59099e6d6539b52ec37e6331e1069d0 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Fri, 20 Nov 2020 16:31:13 +0800 Subject: [PATCH 18/21] Flexible boundary conditions called from simulation.cc --- samples/standalone/simulation.cc | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/samples/standalone/simulation.cc b/samples/standalone/simulation.cc index 0bf193e..1aa5bf2 100644 --- a/samples/standalone/simulation.cc +++ b/samples/standalone/simulation.cc @@ -367,7 +367,8 @@ run_simulation(const char* config_path) #endif } - acBoundcondStep(); + //acBoundcondStep(); + acBoundcondStepGBC(mesh_info); acStore(mesh); if (start_step == 0) { save_mesh(*mesh, 0, t_step); @@ -426,7 +427,11 @@ run_simulation(const char* config_path) loadForcingParamsToDevice(forcing_params); #endif - acIntegrate(dt); + /* Uses now flexible bokundary conditions */ + //acIntegrate(dt); + acIntegrateGBC(mesh_info, dt); + + t_step += dt; @@ -472,7 +477,8 @@ run_simulation(const char* config_path) acBoundcondStep(); acStore(mesh); */ - acBoundcondStep(); + //acBoundcondStep(); + acBoundcondStepGBC(mesh_info); acStore(mesh); save_mesh(*mesh, i, t_step); @@ -493,7 +499,8 @@ run_simulation(const char* config_path) if (dt < dt_typical/AcReal(1e5)) { if (dtcounter > 10) { printf("dt = %e TOO LOW! Ending run at t = %#e \n", double(dt), double(t_step)); - acBoundcondStep(); + //acBoundcondStep(); + acBoundcondStepGBC(mesh_info); acStore(mesh); save_mesh(*mesh, i, t_step); break; @@ -507,7 +514,8 @@ run_simulation(const char* config_path) // End loop if nan is found if (found_nan > 0) { printf("Found nan at t = %e \n", double(t_step)); - acBoundcondStep(); + //acBoundcondStep(); + acBoundcondStepGBC(mesh_info); acStore(mesh); save_mesh(*mesh, i, t_step); break; @@ -522,7 +530,8 @@ run_simulation(const char* config_path) if (found_stop == 1) { printf("Found STOP file at t = %e \n", double(t_step)); - acBoundcondStep(); + //acBoundcondStep(); + acBoundcondStepGBC(mesh_info); acStore(mesh); save_mesh(*mesh, i, t_step); break; From d4ee066b3c403d4a45a69450e1b22a011a52ee55 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Fri, 20 Nov 2020 17:00:48 +0800 Subject: [PATCH 19/21] Periodic boundary conditions work with switchable system. Still some issue with the custom alternatives. Need to look into kernels again. --- acc/mhd_solver/stencil_kernel.ac | 9 ++-- config/astaroth.conf | 9 ++++ src/core/kernels/boundconds.cuh | 87 +++++++++++++++++++++++++------- 3 files changed, 84 insertions(+), 21 deletions(-) diff --git a/acc/mhd_solver/stencil_kernel.ac b/acc/mhd_solver/stencil_kernel.ac index 57097f8..c26a5f7 100644 --- a/acc/mhd_solver/stencil_kernel.ac +++ b/acc/mhd_solver/stencil_kernel.ac @@ -22,9 +22,12 @@ uniform int AC_save_steps; uniform int AC_bin_steps; uniform int AC_start_step; -// In3 params -uniform int3 AC_bc_type_bot; -uniform int3 AC_bc_type_top; +uniform int AC_bc_type_top_x; +uniform int AC_bc_type_bot_x; +uniform int AC_bc_type_top_y; +uniform int AC_bc_type_bot_y; +uniform int AC_bc_type_top_z; +uniform int AC_bc_type_bot_z; // Real params uniform Scalar AC_dt; diff --git a/config/astaroth.conf b/config/astaroth.conf index 190948b..480bcdc 100644 --- a/config/astaroth.conf +++ b/config/astaroth.conf @@ -13,6 +13,15 @@ AC_dsx = 0.04908738521 AC_dsy = 0.04908738521 AC_dsz = 0.04908738521 +// 0 = periodic bc, 1 = symmetric bc, 2 = antisymmetric bc +AC_bc_type_top_x = 0 +AC_bc_type_top_y = 0 +AC_bc_type_top_z = 0 +AC_bc_type_bot_x = 0 +AC_bc_type_bot_y = 0 +AC_bc_type_bot_z = 0 + + /* * ============================================================================= * Run-time params diff --git a/src/core/kernels/boundconds.cuh b/src/core/kernels/boundconds.cuh index 64b4a1e..6a0c1ab 100644 --- a/src/core/kernels/boundconds.cuh +++ b/src/core/kernels/boundconds.cuh @@ -23,47 +23,83 @@ kernel_symmetric_boundconds(const int3 start, const int3 end, AcReal* vtxbuf, co int i_src, j_src, k_src, boundloc; int bsize = STENCIL_ORDER/(int) 2; - if (bindex.x != 0) - { + //if (bindex.x != 0) + //{ + // // Pick up the mirroring value. + // if ((i_dst < bsize) && ((bindex.x == 3) || (bindex.x ==1))) + // { + // boundloc = bsize; //Location of central border point. + // i_src = 2*boundloc - i_dst; + // } else if ((i_dst >= DCONST(AC_nx_min) - bsize) && ((bindex.x == 2) || (bindex.x ==1))) + // { + // boundloc = DCONST(AC_nx_min) - bsize - 1; //Location of central border point. + // i_src = 2*boundloc - i_dst; + // } + //} + //if (bindex.y != 0) + //{ + // // Pick up the mirroring value. + // if ((j_dst < bsize) && ((bindex.y == 3) || (bindex.y ==1))) + // { + // boundloc = bsize; //Location of central border point. + // j_src = 2*boundloc - j_dst; + // } else if ((j_dst >= DCONST(AC_nx_min) - bsize) && ((bindex.y == 2) || (bindex.y ==1))) + // { + // boundloc = DCONST(AC_ny_min) - bsize - 1; //Location of central border point. + // i_src = 2*boundloc - j_dst; + // } + //} + //if (bindex.z != 0) + //{ + // // Pick up the mirroring value. + // if ((k_dst < bsize) && ((bindex.z == 3) || (bindex.z ==1))) + // { + // boundloc = bsize; //Location of central border point. + // k_src = 2*boundloc - k_dst; + // } else if ((i_dst >= DCONST(AC_nz_min) - bsize) && ((bindex.z == 2) || (bindex.z ==1))) + // { + // boundloc = DCONST(AC_nz_min) - bsize - 1; //Location of central border point. + // k_src = 2*boundloc - k_dst; + // } + //} + + if (bindex.x < 0) + { + // Pick up the mirroring value. - if ((i_dst < bsize) && ((bindex.x == 3) || (bindex.x ==1))) + if ((i_dst < bsize)) { boundloc = bsize; //Location of central border point. i_src = 2*boundloc - i_dst; - } else if ((i_dst >= DCONST(AC_nx_min) - bsize) && ((bindex.x == 2) || (bindex.x ==1))) + } else if ((i_dst >= DCONST(AC_nx_min) - bsize)) { boundloc = DCONST(AC_nx_min) - bsize - 1; //Location of central border point. i_src = 2*boundloc - i_dst; } - } - if (bindex.y != 0) - { + // Pick up the mirroring value. - if ((j_dst < bsize) && ((bindex.y == 3) || (bindex.y ==1))) + if ((j_dst < bsize)) { boundloc = bsize; //Location of central border point. j_src = 2*boundloc - j_dst; - } else if ((j_dst >= DCONST(AC_nx_min) - bsize) && ((bindex.y == 2) || (bindex.y ==1))) + } else if ((j_dst >= DCONST(AC_nx_min) - bsize)) { boundloc = DCONST(AC_ny_min) - bsize - 1; //Location of central border point. i_src = 2*boundloc - j_dst; } - } - if (bindex.z != 0) - { + // Pick up the mirroring value. - if ((k_dst < bsize) && ((bindex.z == 3) || (bindex.z ==1))) + if ((k_dst < bsize)) { boundloc = bsize; //Location of central border point. k_src = 2*boundloc - k_dst; - } else if ((i_dst >= DCONST(AC_nz_min) - bsize) && ((bindex.z == 2) || (bindex.z ==1))) + } else if ((i_dst >= DCONST(AC_nz_min) - bsize)) { boundloc = DCONST(AC_nz_min) - bsize - 1; //Location of central border point. k_src = 2*boundloc - k_dst; } - } - + } const int src_idx = DEVICE_VTXBUF_IDX(i_src, j_src, k_src); const int dst_idx = DEVICE_VTXBUF_IDX(i_dst, j_dst, k_dst); @@ -141,8 +177,10 @@ acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int (unsigned int)ceil((end.y - start.y) / (float)tpb.y), (unsigned int)ceil((end.z - start.z) / (float)tpb.z)); - int3 bc_top = config.int3_params[AC_bc_type_top]; - int3 bc_bot = config.int3_params[AC_bc_type_bot]; + int3 bc_top = {config.int_params[AC_bc_type_top_x], config.int_params[AC_bc_type_top_y], + config.int_params[AC_bc_type_top_z]}; + int3 bc_bot = {config.int_params[AC_bc_type_bot_x], config.int_params[AC_bc_type_bot_y], + config.int_params[AC_bc_type_bot_z]}; if (bc_top.x == AC_BOUNDCOND_SYMMETRIC) { @@ -153,6 +191,19 @@ acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int { kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex, -1); ERRCHK_CUDA_KERNEL(); + } + else if (bc_bot.x == AC_BOUNDCOND_PERIODIC) + { + kernel_periodic_boundconds<<>>(start, end, vtxbuf); + ERRCHK_CUDA_KERNEL(); + } + else + { + printf("ERROR: Boundary condition not recognized!\n"); + printf("ERROR: bc_top = %i, %i, %i \n", bc_top.x, bc_top.y, bc_top.z); + printf("ERROR: bc_bot = %i, %i, %i \n", bc_bot.x, bc_bot.y, bc_bot.z); + + return AC_FAILURE; } return AC_SUCCESS; From 2f0f6ceac205789c57dda51e74925d7991f90b6a Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Mon, 23 Nov 2020 15:19:47 +0800 Subject: [PATCH 20/21] Functioning symmetric antisymmetric boundary condition. --- src/core/kernels/boundconds.cuh | 139 ++++++++++++++++++-------------- 1 file changed, 80 insertions(+), 59 deletions(-) diff --git a/src/core/kernels/boundconds.cuh b/src/core/kernels/boundconds.cuh index 6a0c1ab..541b22b 100644 --- a/src/core/kernels/boundconds.cuh +++ b/src/core/kernels/boundconds.cuh @@ -20,90 +20,111 @@ kernel_symmetric_boundconds(const int3 start, const int3 end, AcReal* vtxbuf, co // Find the source index // Map to nx, ny, nz coordinates - int i_src, j_src, k_src, boundloc; + int i_src, j_src, k_src, boundlocx0, boundlocx1, boundlocy0, boundlocy1, boundlocz0, boundlocz1; int bsize = STENCIL_ORDER/(int) 2; //if (bindex.x != 0) - //{ - // // Pick up the mirroring value. - // if ((i_dst < bsize) && ((bindex.x == 3) || (bindex.x ==1))) - // { - // boundloc = bsize; //Location of central border point. - // i_src = 2*boundloc - i_dst; - // } else if ((i_dst >= DCONST(AC_nx_min) - bsize) && ((bindex.x == 2) || (bindex.x ==1))) - // { - // boundloc = DCONST(AC_nx_min) - bsize - 1; //Location of central border point. - // i_src = 2*boundloc - i_dst; - // } - //} //if (bindex.y != 0) - //{ - // // Pick up the mirroring value. - // if ((j_dst < bsize) && ((bindex.y == 3) || (bindex.y ==1))) - // { - // boundloc = bsize; //Location of central border point. - // j_src = 2*boundloc - j_dst; - // } else if ((j_dst >= DCONST(AC_nx_min) - bsize) && ((bindex.y == 2) || (bindex.y ==1))) - // { - // boundloc = DCONST(AC_ny_min) - bsize - 1; //Location of central border point. - // i_src = 2*boundloc - j_dst; - // } - //} //if (bindex.z != 0) - //{ - // // Pick up the mirroring value. - // if ((k_dst < bsize) && ((bindex.z == 3) || (bindex.z ==1))) - // { - // boundloc = bsize; //Location of central border point. - // k_src = 2*boundloc - k_dst; - // } else if ((i_dst >= DCONST(AC_nz_min) - bsize) && ((bindex.z == 2) || (bindex.z ==1))) - // { - // boundloc = DCONST(AC_nz_min) - bsize - 1; //Location of central border point. - // k_src = 2*boundloc - k_dst; - // } - //} + + //Location of central border point. + boundlocx0 = bsize; + boundlocy0 = bsize; + boundlocz0 = bsize; + boundlocx1 = DCONST(AC_nx_max) - 1; + boundlocy1 = DCONST(AC_ny_max) - 1; + boundlocz1 = DCONST(AC_nz_max) - 1; + + //Defaults + i_src = i_dst; + j_src = j_dst; + k_src = k_dst; if (bindex.x < 0) - { + { // Pick up the mirroring value. - if ((i_dst < bsize)) + if ((i_dst < boundlocx0)) { - boundloc = bsize; //Location of central border point. - i_src = 2*boundloc - i_dst; - } else if ((i_dst >= DCONST(AC_nx_min) - bsize)) + i_src = 2.0f*boundlocx0 - i_dst; + + } else if ((i_dst > boundlocx1)) { - boundloc = DCONST(AC_nx_min) - bsize - 1; //Location of central border point. - i_src = 2*boundloc - i_dst; + i_src = 2.0f*boundlocx1 - i_dst; } // Pick up the mirroring value. - if ((j_dst < bsize)) + if ((j_dst < boundlocy0)) { - boundloc = bsize; //Location of central border point. - j_src = 2*boundloc - j_dst; - } else if ((j_dst >= DCONST(AC_nx_min) - bsize)) + j_src = 2.0f*boundlocy0 - j_dst; + } else if ((j_dst > boundlocx1)) { - boundloc = DCONST(AC_ny_min) - bsize - 1; //Location of central border point. - i_src = 2*boundloc - j_dst; + j_src = 2.0f*boundlocy1 - j_dst; } // Pick up the mirroring value. - if ((k_dst < bsize)) + if ((k_dst < boundlocz0)) { - boundloc = bsize; //Location of central border point. - k_src = 2*boundloc - k_dst; - } else if ((i_dst >= DCONST(AC_nz_min) - bsize)) + k_src = 2.0f*boundlocz0 - k_dst; + } else if ((k_dst > boundlocz1)) { - boundloc = DCONST(AC_nz_min) - bsize - 1; //Location of central border point. - k_src = 2*boundloc - k_dst; + k_src = 2.0f*boundlocz1 - k_dst; + } + + //Edges + if ( (i_dst < boundlocx0) && (j_dst < boundlocy0) ) + { + i_src = 2.0f*boundlocx0 - i_dst; + j_src = 2.0f*boundlocy0 - j_dst; + //if ((k_dst == 50)) printf("i_dst %i j_dst %i k_dst %i i_src %i j_src %i k_src %i bsize %i \n", i_dst, j_dst, k_dst, i_src, j_src, k_src, bsize); + } else if ((i_dst < boundlocx0) && (k_dst < boundlocz0) ) + { + i_src = 2.0f*boundlocx0 - i_dst; + k_src = 2.0f*boundlocz0 - k_dst; + } else if ( (j_dst < boundlocy0) && (k_dst < boundlocz0) ) + { + j_src = 2.0f*boundlocy0 - j_dst; + k_src = 2.0f*boundlocz0 - k_dst; + + } else if ((i_dst > boundlocx1) && (j_dst > boundlocx1) ) + { + i_src = 2.0f*boundlocx1 - i_dst; + j_src = 2.0f*boundlocy1 - j_dst; + } else if ( (i_dst > boundlocx1) && (k_dst > boundlocz1) ) + { + i_src = 2.0f*boundlocx1 - i_dst; + k_src = 2.0f*boundlocz1 - k_dst; + } else if ( (j_dst > boundlocy1) && (k_dst > boundlocz1) ) + { + j_src = 2.0f*boundlocy1 - j_dst; + k_src = 2.0f*boundlocz1 - k_dst; + } else if ( (i_dst > boundlocx1) && (k_dst < boundlocz0) ) + { + i_src = 2.0f*boundlocx1 - i_dst; + k_src = 2.0f*boundlocz0 - k_dst; + } else if ( (i_dst > boundlocx1) && (j_dst < bsize) ) + { + i_src = 2.0f*boundlocx1 - i_dst; + j_src = 2.0f*boundlocy0 - j_dst; + } else if ( (i_dst < boundlocx0) && (k_dst > boundlocz1) ) + { + i_src = 2.0f*boundlocx0 - i_dst; + k_src = 2.0f*boundlocz1 - k_dst; + } else if ( (i_dst < boundlocx0) && (j_dst > boundlocy1) ) + { + i_src = 2.0f*boundlocx0 - i_dst; + j_src = 2.0f*boundlocy1 - j_dst; + } else if ( (j_dst > boundlocy1) && (k_dst < boundlocz0) ) + { + j_src = 2.0f*boundlocy1 - j_dst; + k_src = 2.0f*boundlocz0 - k_dst; } } const int src_idx = DEVICE_VTXBUF_IDX(i_src, j_src, k_src); const int dst_idx = DEVICE_VTXBUF_IDX(i_dst, j_dst, k_dst); - vtxbuf[dst_idx] = sign*vtxbuf[src_idx]; // sign = 1 symmetric, sign = -1 antisymmetric + vtxbuf[dst_idx] = sign*vtxbuf[src_idx] *0.0 + 1.0; // sign = 1 symmetric, sign = -1 antisymmetric } @@ -187,12 +208,12 @@ acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex, 1); ERRCHK_CUDA_KERNEL(); } - else if (bc_bot.x == AC_BOUNDCOND_ANTISYMMETRIC) + else if (bc_top.x == AC_BOUNDCOND_ANTISYMMETRIC) { kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex, -1); ERRCHK_CUDA_KERNEL(); } - else if (bc_bot.x == AC_BOUNDCOND_PERIODIC) + else if (bc_top.x == AC_BOUNDCOND_PERIODIC) { kernel_periodic_boundconds<<>>(start, end, vtxbuf); ERRCHK_CUDA_KERNEL(); From 543c565e5d676914ff93998d0dd878a75e6ff84e Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Mon, 23 Nov 2020 15:43:52 +0800 Subject: [PATCH 21/21] Dummy at the moment, but now the boundary condition kernel caller can see what vertex buffer name is in use. --- src/core/device.cc | 2 +- src/core/kernels/boundconds.cuh | 55 ++++++++++++++++++++------------- src/core/kernels/kernels.h | 3 +- 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/src/core/device.cc b/src/core/device.cc index 9130f34..0d2127e 100644 --- a/src/core/device.cc +++ b/src/core/device.cc @@ -440,7 +440,7 @@ acDeviceGeneralBoundcondStep(const Device device, const Stream stream, { cudaSetDevice(device->id); return acKernelGeneralBoundconds(device->streams[stream], start, end, - device->vba.in[vtxbuf_handle], config, bindex); + device->vba.in[vtxbuf_handle], vtxbuf_handle, config, bindex); } AcResult diff --git a/src/core/kernels/boundconds.cuh b/src/core/kernels/boundconds.cuh index 541b22b..f0518c0 100644 --- a/src/core/kernels/boundconds.cuh +++ b/src/core/kernels/boundconds.cuh @@ -191,7 +191,8 @@ acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, const in AcResult acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end, - AcReal* vtxbuf, const AcMeshInfo config, const int3 bindex) + AcReal* vtxbuf, const VertexBufferHandle vtxbuf_handle, + const AcMeshInfo config, const int3 bindex) { const dim3 tpb(8, 2, 8); const dim3 bpg((unsigned int)ceil((end.x - start.x) / (float)tpb.x), @@ -203,28 +204,38 @@ acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int int3 bc_bot = {config.int_params[AC_bc_type_bot_x], config.int_params[AC_bc_type_bot_y], config.int_params[AC_bc_type_bot_z]}; - if (bc_top.x == AC_BOUNDCOND_SYMMETRIC) - { - kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex, 1); - ERRCHK_CUDA_KERNEL(); - } - else if (bc_top.x == AC_BOUNDCOND_ANTISYMMETRIC) - { - kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex, -1); - ERRCHK_CUDA_KERNEL(); - } - else if (bc_top.x == AC_BOUNDCOND_PERIODIC) - { - kernel_periodic_boundconds<<>>(start, end, vtxbuf); - ERRCHK_CUDA_KERNEL(); - } - else - { - printf("ERROR: Boundary condition not recognized!\n"); - printf("ERROR: bc_top = %i, %i, %i \n", bc_top.x, bc_top.y, bc_top.z); - printf("ERROR: bc_bot = %i, %i, %i \n", bc_bot.x, bc_bot.y, bc_bot.z); +//#if AC_MPI_ENABLED +// printf( "WARNING : NON-PERIODIC BOUNDARY CONDITIONS NOT SUPPORTER BY MPI! Only working at node level.\n"); +// return AC_FAILURE; +//#endif + + if ( vtxbuf_handle != -1) // This is a dummy to make swithing boundary condition with respect to more possible later + { + + if (bc_top.x == AC_BOUNDCOND_SYMMETRIC) + { + kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex, 1); + ERRCHK_CUDA_KERNEL(); + } + else if (bc_top.x == AC_BOUNDCOND_ANTISYMMETRIC) + { + kernel_symmetric_boundconds<<>>(start, end, vtxbuf, bindex, -1); + ERRCHK_CUDA_KERNEL(); + } + else if (bc_top.x == AC_BOUNDCOND_PERIODIC) + { + kernel_periodic_boundconds<<>>(start, end, vtxbuf); + ERRCHK_CUDA_KERNEL(); + } + else + { + printf("ERROR: Boundary condition not recognized!\n"); + printf("ERROR: bc_top = %i, %i, %i \n", bc_top.x, bc_top.y, bc_top.z); + printf("ERROR: bc_bot = %i, %i, %i \n", bc_bot.x, bc_bot.y, bc_bot.z); + + return AC_FAILURE; + } - return AC_FAILURE; } return AC_SUCCESS; diff --git a/src/core/kernels/kernels.h b/src/core/kernels/kernels.h index bc94185..a9e05f9 100644 --- a/src/core/kernels/kernels.h +++ b/src/core/kernels/kernels.h @@ -45,7 +45,8 @@ AcResult acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, AcReal* vtxbuf); /** */ AcResult acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end, - AcReal* vtxbuf, const AcMeshInfo config, const int3 bindex); + AcReal* vtxbuf, const VertexBufferHandle vtxbuf_handle, + const AcMeshInfo config, const int3 bindex); /** */