Optimized MPI synchronization a bit

This commit is contained in:
Johannes Pekkila
2020-03-31 12:36:25 +02:00
parent 24e65ab02d
commit 742dcc2697

View File

@@ -1220,7 +1220,7 @@ AcResult
acGridIntegrate(const Stream stream, const AcReal dt) acGridIntegrate(const Stream stream, const AcReal dt)
{ {
ERRCHK(grid.initialized); ERRCHK(grid.initialized);
acGridSynchronizeStream(stream); //acGridSynchronizeStream(stream);
const Device device = grid.device; const Device device = grid.device;
const int3 nn = grid.nn; const int3 nn = grid.nn;
@@ -1231,6 +1231,8 @@ acGridIntegrate(const Stream stream, const AcReal dt)
CommData sidexy_data = grid.sidexy_data; CommData sidexy_data = grid.sidexy_data;
CommData sidexz_data = grid.sidexz_data; CommData sidexz_data = grid.sidexz_data;
CommData sideyz_data = grid.sideyz_data; CommData sideyz_data = grid.sideyz_data;
acDeviceSynchronizeStream(device, stream);
// Corners // Corners
const int3 corner_a0s[] = { const int3 corner_a0s[] = {
@@ -1343,13 +1345,7 @@ acGridIntegrate(const Stream stream, const AcReal dt)
acPackCommData(device, sidexz_a0s, &sidexz_data); acPackCommData(device, sidexz_a0s, &sidexz_data);
acPackCommData(device, sideyz_a0s, &sideyz_data); acPackCommData(device, sideyz_a0s, &sideyz_data);
//////////// INNER INTEGRATION ////////////// MPI_Barrier(MPI_COMM_WORLD);
{
const int3 m1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
const int3 m2 = nn;
acDeviceIntegrateSubstep(device, STREAM_16, isubstep, m1, m2, dt);
}
////////////////////////////////////////////
#if MPI_GPUDIRECT_DISABLED #if MPI_GPUDIRECT_DISABLED
acTransferCommDataToHost(device, &corner_data); acTransferCommDataToHost(device, &corner_data);
@@ -1368,6 +1364,14 @@ acGridIntegrate(const Stream stream, const AcReal dt)
acTransferCommData(device, sidexy_a0s, sidexy_b0s, &sidexy_data); acTransferCommData(device, sidexy_a0s, sidexy_b0s, &sidexy_data);
acTransferCommData(device, sidexz_a0s, sidexz_b0s, &sidexz_data); acTransferCommData(device, sidexz_a0s, sidexz_b0s, &sidexz_data);
acTransferCommData(device, sideyz_a0s, sideyz_b0s, &sideyz_data); acTransferCommData(device, sideyz_a0s, sideyz_b0s, &sideyz_data);
//////////// INNER INTEGRATION //////////////
{
const int3 m1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
const int3 m2 = nn;
acDeviceIntegrateSubstep(device, STREAM_16, isubstep, m1, m2, dt);
}
////////////////////////////////////////////
acTransferCommDataWait(corner_data); acTransferCommDataWait(corner_data);
acTransferCommDataWait(edgex_data); acTransferCommDataWait(edgex_data);