From d6d59205535ecc769308e4661b4dcbaadb8364bb Mon Sep 17 00:00:00 2001 From: jpekkila Date: Tue, 31 Mar 2020 14:23:36 +0300 Subject: [PATCH] Pulled improvements to device.cc from the benchmark branch to master --- src/core/device.cc | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/core/device.cc b/src/core/device.cc index 8c78b09..59ad2e8 100644 --- a/src/core/device.cc +++ b/src/core/device.cc @@ -495,6 +495,10 @@ decompose(const int target) return (int3){4, 2, 2}; if (target == 32) return (int3){4, 4, 2}; + if (target == 128) + return (int3){8, 4, 4}; + if (target == 256) + return (int3){8, 8, 4}; int decomposition[] = {1, 1, 1}; @@ -1216,7 +1220,7 @@ AcResult acGridIntegrate(const Stream stream, const AcReal dt) { ERRCHK(grid.initialized); - acGridSynchronizeStream(stream); + //acGridSynchronizeStream(stream); const Device device = grid.device; const int3 nn = grid.nn; @@ -1227,6 +1231,8 @@ acGridIntegrate(const Stream stream, const AcReal dt) CommData sidexy_data = grid.sidexy_data; CommData sidexz_data = grid.sidexz_data; CommData sideyz_data = grid.sideyz_data; + + acDeviceSynchronizeStream(device, stream); // Corners const int3 corner_a0s[] = { @@ -1339,13 +1345,7 @@ acGridIntegrate(const Stream stream, const AcReal dt) acPackCommData(device, sidexz_a0s, &sidexz_data); acPackCommData(device, sideyz_a0s, &sideyz_data); - //////////// INNER INTEGRATION ////////////// - { - const int3 m1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST}; - const int3 m2 = nn; - acDeviceIntegrateSubstep(device, STREAM_16, isubstep, m1, m2, dt); - } - //////////////////////////////////////////// + MPI_Barrier(MPI_COMM_WORLD); #if MPI_GPUDIRECT_DISABLED acTransferCommDataToHost(device, &corner_data); @@ -1364,6 +1364,14 @@ acGridIntegrate(const Stream stream, const AcReal dt) acTransferCommData(device, sidexy_a0s, sidexy_b0s, &sidexy_data); acTransferCommData(device, sidexz_a0s, sidexz_b0s, &sidexz_data); acTransferCommData(device, sideyz_a0s, sideyz_b0s, &sideyz_data); + + //////////// INNER INTEGRATION ////////////// + { + const int3 m1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST}; + const int3 m2 = nn; + acDeviceIntegrateSubstep(device, STREAM_16, isubstep, m1, m2, dt); + } + //////////////////////////////////////////// acTransferCommDataWait(corner_data); acTransferCommDataWait(edgex_data);