Better overlap with computation and comm. when inner integration is launched first

This commit is contained in:
jpekkila
2020-03-26 18:00:01 +02:00
parent 4c82e3c563
commit 28792770f2

View File

@@ -10,7 +10,7 @@
#include "kernels/kernels.h" #include "kernels/kernels.h"
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
#define MPI_GPUDIRECT_DISABLED (0) #define MPI_GPUDIRECT_DISABLED (1)
AcResult AcResult
acDevicePrintInfo(const Device device) acDevicePrintInfo(const Device device)
@@ -491,6 +491,11 @@ getPid3D(const int pid, const int3 decomposition)
static int3 static int3
decompose(const int target) decompose(const int target)
{ {
if (target == 16)
return (int3){4, 2, 2};
if (target == 32)
return (int3){4, 4, 2};
int decomposition[] = {1, 1, 1}; int decomposition[] = {1, 1, 1};
int axis = 0; int axis = 0;
@@ -1334,6 +1339,14 @@ acGridIntegrate(const Stream stream, const AcReal dt)
acPackCommData(device, sidexz_a0s, &sidexz_data); acPackCommData(device, sidexz_a0s, &sidexz_data);
acPackCommData(device, sideyz_a0s, &sideyz_data); acPackCommData(device, sideyz_a0s, &sideyz_data);
//////////// INNER INTEGRATION //////////////
{
const int3 m1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
const int3 m2 = nn;
acDeviceIntegrateSubstep(device, STREAM_16, isubstep, m1, m2, dt);
}
////////////////////////////////////////////
#if MPI_GPUDIRECT_DISABLED #if MPI_GPUDIRECT_DISABLED
acTransferCommDataToHost(device, &corner_data); acTransferCommDataToHost(device, &corner_data);
acTransferCommDataToHost(device, &edgex_data); acTransferCommDataToHost(device, &edgex_data);
@@ -1344,14 +1357,6 @@ acGridIntegrate(const Stream stream, const AcReal dt)
acTransferCommDataToHost(device, &sideyz_data); acTransferCommDataToHost(device, &sideyz_data);
#endif #endif
//////////// INNER INTEGRATION //////////////
{
const int3 m1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
const int3 m2 = nn;
acDeviceIntegrateSubstep(device, STREAM_16, isubstep, m1, m2, dt);
}
////////////////////////////////////////////
acTransferCommData(device, corner_a0s, corner_b0s, &corner_data); acTransferCommData(device, corner_a0s, corner_b0s, &corner_data);
acTransferCommData(device, edgex_a0s, edgex_b0s, &edgex_data); acTransferCommData(device, edgex_a0s, edgex_b0s, &edgex_data);
acTransferCommData(device, edgey_a0s, edgey_b0s, &edgey_data); acTransferCommData(device, edgey_a0s, edgey_b0s, &edgey_data);