Created separate acGridIntegrateNonperiodic()
from +acGridIntegrate() this is to avoid some potential issues with an upcoming merge by Oskar.
This commit is contained in:
@@ -1714,6 +1714,220 @@ acGridIntegrate(const Stream stream, const AcReal dt)
|
|||||||
acGridLoadScalarUniform(stream, AC_dt, dt);
|
acGridLoadScalarUniform(stream, AC_dt, dt);
|
||||||
acDeviceSynchronizeStream(device, stream);
|
acDeviceSynchronizeStream(device, stream);
|
||||||
|
|
||||||
|
// Corners
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
// Do not rm: required for corners
|
||||||
|
const int3 corner_b0s[] = {
|
||||||
|
(int3){0, 0, 0},
|
||||||
|
(int3){NGHOST + nn.x, 0, 0},
|
||||||
|
(int3){0, NGHOST + nn.y, 0},
|
||||||
|
(int3){0, 0, NGHOST + nn.z},
|
||||||
|
|
||||||
|
(int3){NGHOST + nn.x, NGHOST + nn.y, 0},
|
||||||
|
(int3){NGHOST + nn.x, 0, NGHOST + nn.z},
|
||||||
|
(int3){0, NGHOST + nn.y, NGHOST + nn.z},
|
||||||
|
(int3){NGHOST + nn.x, NGHOST + nn.y, NGHOST + nn.z},
|
||||||
|
};
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
|
||||||
|
// Edges X
|
||||||
|
const int3 edgex_b0s[] = {
|
||||||
|
(int3){NGHOST, 0, 0},
|
||||||
|
(int3){NGHOST, NGHOST + nn.y, 0},
|
||||||
|
|
||||||
|
(int3){NGHOST, 0, NGHOST + nn.z},
|
||||||
|
(int3){NGHOST, NGHOST + nn.y, NGHOST + nn.z},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Edges Y
|
||||||
|
const int3 edgey_b0s[] = {
|
||||||
|
(int3){0, NGHOST, 0},
|
||||||
|
(int3){NGHOST + nn.x, NGHOST, 0},
|
||||||
|
|
||||||
|
(int3){0, NGHOST, NGHOST + nn.z},
|
||||||
|
(int3){NGHOST + nn.x, NGHOST, NGHOST + nn.z},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Edges Z
|
||||||
|
const int3 edgez_b0s[] = {
|
||||||
|
(int3){0, 0, NGHOST},
|
||||||
|
(int3){NGHOST + nn.x, 0, NGHOST},
|
||||||
|
|
||||||
|
(int3){0, NGHOST + nn.y, NGHOST},
|
||||||
|
(int3){NGHOST + nn.x, NGHOST + nn.y, NGHOST},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Sides XY
|
||||||
|
const int3 sidexy_b0s[] = {
|
||||||
|
(int3){NGHOST, NGHOST, 0}, //
|
||||||
|
(int3){NGHOST, NGHOST, NGHOST + nn.z}, //
|
||||||
|
};
|
||||||
|
|
||||||
|
// Sides XZ
|
||||||
|
const int3 sidexz_b0s[] = {
|
||||||
|
(int3){NGHOST, 0, NGHOST}, //
|
||||||
|
(int3){NGHOST, NGHOST + nn.y, NGHOST}, //
|
||||||
|
};
|
||||||
|
|
||||||
|
// Sides YZ
|
||||||
|
const int3 sideyz_b0s[] = {
|
||||||
|
(int3){0, NGHOST, NGHOST}, //
|
||||||
|
(int3){NGHOST + nn.x, NGHOST, NGHOST}, //
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int isubstep = 0; isubstep < 3; ++isubstep) {
|
||||||
|
|
||||||
|
#if MPI_COMM_ENABLED
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
acPackCommData(device, corner_b0s, &corner_data); // Do not rm: required for corners
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
acPackCommData(device, edgex_b0s, &edgex_data);
|
||||||
|
acPackCommData(device, edgey_b0s, &edgey_data);
|
||||||
|
acPackCommData(device, edgez_b0s, &edgez_data);
|
||||||
|
acPackCommData(device, sidexy_b0s, &sidexy_data);
|
||||||
|
acPackCommData(device, sidexz_b0s, &sidexz_data);
|
||||||
|
acPackCommData(device, sideyz_b0s, &sideyz_data);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if MPI_COMM_ENABLED
|
||||||
|
MPI_Barrier(MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
#if MPI_GPUDIRECT_DISABLED
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
acTransferCommDataToHost(device, &corner_data); // Do not rm: required for corners
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
acTransferCommDataToHost(device, &edgex_data);
|
||||||
|
acTransferCommDataToHost(device, &edgey_data);
|
||||||
|
acTransferCommDataToHost(device, &edgez_data);
|
||||||
|
acTransferCommDataToHost(device, &sidexy_data);
|
||||||
|
acTransferCommDataToHost(device, &sidexz_data);
|
||||||
|
acTransferCommDataToHost(device, &sideyz_data);
|
||||||
|
#endif
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
acTransferCommData(device, corner_b0s, &corner_data); // Do not rm: required for corners
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
acTransferCommData(device, edgex_b0s, &edgex_data);
|
||||||
|
acTransferCommData(device, edgey_b0s, &edgey_data);
|
||||||
|
acTransferCommData(device, edgez_b0s, &edgez_data);
|
||||||
|
acTransferCommData(device, sidexy_b0s, &sidexy_data);
|
||||||
|
acTransferCommData(device, sidexz_b0s, &sidexz_data);
|
||||||
|
acTransferCommData(device, sideyz_b0s, &sideyz_data);
|
||||||
|
#endif // MPI_COMM_ENABLED
|
||||||
|
|
||||||
|
#if MPI_COMPUTE_ENABLED
|
||||||
|
//////////// INNER INTEGRATION //////////////
|
||||||
|
{
|
||||||
|
const int3 m1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
|
||||||
|
const int3 m2 = nn;
|
||||||
|
acKernelIntegrateSubstep(device->streams[STREAM_16], isubstep, m1, m2, device->vba);
|
||||||
|
}
|
||||||
|
////////////////////////////////////////////
|
||||||
|
#endif // MPI_COMPUTE_ENABLED
|
||||||
|
|
||||||
|
#if MPI_COMM_ENABLED
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
acTransferCommDataWait(corner_data); // Do not rm: required for corners
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
acTransferCommDataWait(edgex_data);
|
||||||
|
acTransferCommDataWait(edgey_data);
|
||||||
|
acTransferCommDataWait(edgez_data);
|
||||||
|
acTransferCommDataWait(sidexy_data);
|
||||||
|
acTransferCommDataWait(sidexz_data);
|
||||||
|
acTransferCommDataWait(sideyz_data);
|
||||||
|
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
acUnpinCommData(device, &corner_data); // Do not rm: required for corners
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
acUnpinCommData(device, &edgex_data);
|
||||||
|
acUnpinCommData(device, &edgey_data);
|
||||||
|
acUnpinCommData(device, &edgez_data);
|
||||||
|
acUnpinCommData(device, &sidexy_data);
|
||||||
|
acUnpinCommData(device, &sidexz_data);
|
||||||
|
acUnpinCommData(device, &sideyz_data);
|
||||||
|
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
acUnpackCommData(device, corner_b0s, &corner_data);
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
acUnpackCommData(device, edgex_b0s, &edgex_data);
|
||||||
|
acUnpackCommData(device, edgey_b0s, &edgey_data);
|
||||||
|
acUnpackCommData(device, edgez_b0s, &edgez_data);
|
||||||
|
acUnpackCommData(device, sidexy_b0s, &sidexy_data);
|
||||||
|
acUnpackCommData(device, sidexz_b0s, &sidexz_data);
|
||||||
|
acUnpackCommData(device, sideyz_b0s, &sideyz_data);
|
||||||
|
//////////// OUTER INTEGRATION //////////////
|
||||||
|
|
||||||
|
// Wait for unpacking
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
acSyncCommData(corner_data); // Do not rm: required for corners
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
acSyncCommData(edgex_data);
|
||||||
|
acSyncCommData(edgey_data);
|
||||||
|
acSyncCommData(edgez_data);
|
||||||
|
acSyncCommData(sidexy_data);
|
||||||
|
acSyncCommData(sidexz_data);
|
||||||
|
acSyncCommData(sideyz_data);
|
||||||
|
#endif // MPI_COMM_ENABLED
|
||||||
|
#if MPI_COMPUTE_ENABLED
|
||||||
|
{ // Front
|
||||||
|
const int3 m1 = (int3){NGHOST, NGHOST, NGHOST};
|
||||||
|
const int3 m2 = m1 + (int3){nn.x, nn.y, NGHOST};
|
||||||
|
acKernelIntegrateSubstep(device->streams[STREAM_0], isubstep, m1, m2, device->vba);
|
||||||
|
}
|
||||||
|
{ // Back
|
||||||
|
const int3 m1 = (int3){NGHOST, NGHOST, nn.z};
|
||||||
|
const int3 m2 = m1 + (int3){nn.x, nn.y, NGHOST};
|
||||||
|
acKernelIntegrateSubstep(device->streams[STREAM_1], isubstep, m1, m2, device->vba);
|
||||||
|
}
|
||||||
|
{ // Bottom
|
||||||
|
const int3 m1 = (int3){NGHOST, NGHOST, 2 * NGHOST};
|
||||||
|
const int3 m2 = m1 + (int3){nn.x, NGHOST, nn.z - 2 * NGHOST};
|
||||||
|
acKernelIntegrateSubstep(device->streams[STREAM_2], isubstep, m1, m2, device->vba);
|
||||||
|
}
|
||||||
|
{ // Top
|
||||||
|
const int3 m1 = (int3){NGHOST, nn.y, 2 * NGHOST};
|
||||||
|
const int3 m2 = m1 + (int3){nn.x, NGHOST, nn.z - 2 * NGHOST};
|
||||||
|
acKernelIntegrateSubstep(device->streams[STREAM_3], isubstep, m1, m2, device->vba);
|
||||||
|
}
|
||||||
|
{ // Left
|
||||||
|
const int3 m1 = (int3){NGHOST, 2 * NGHOST, 2 * NGHOST};
|
||||||
|
const int3 m2 = m1 + (int3){NGHOST, nn.y - 2 * NGHOST, nn.z - 2 * NGHOST};
|
||||||
|
acKernelIntegrateSubstep(device->streams[STREAM_4], isubstep, m1, m2, device->vba);
|
||||||
|
}
|
||||||
|
{ // Right
|
||||||
|
const int3 m1 = (int3){nn.x, 2 * NGHOST, 2 * NGHOST};
|
||||||
|
const int3 m2 = m1 + (int3){NGHOST, nn.y - 2 * NGHOST, nn.z - 2 * NGHOST};
|
||||||
|
acKernelIntegrateSubstep(device->streams[STREAM_5], isubstep, m1, m2, device->vba);
|
||||||
|
}
|
||||||
|
#endif // MPI_COMPUTE_ENABLED
|
||||||
|
acDeviceSwapBuffers(device);
|
||||||
|
acDeviceSynchronizeStream(device, STREAM_ALL); // Wait until inner and outer done
|
||||||
|
////////////////////////////////////////////
|
||||||
|
}
|
||||||
|
|
||||||
|
return AC_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
AcResult
|
||||||
|
acGridIntegrateNonperiodic(const Stream stream, const AcReal dt)
|
||||||
|
{
|
||||||
|
ERRCHK(grid.initialized);
|
||||||
|
acGridSynchronizeStream(stream);
|
||||||
|
|
||||||
|
const Device device = grid.device;
|
||||||
|
const int3 nn = grid.nn;
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
CommData corner_data = grid.corner_data; // Do not rm: required for corners
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
CommData edgex_data = grid.edgex_data;
|
||||||
|
CommData edgey_data = grid.edgey_data;
|
||||||
|
CommData edgez_data = grid.edgez_data;
|
||||||
|
CommData sidexy_data = grid.sidexy_data;
|
||||||
|
CommData sidexz_data = grid.sidexz_data;
|
||||||
|
CommData sideyz_data = grid.sideyz_data;
|
||||||
|
|
||||||
|
acGridLoadScalarUniform(stream, AC_dt, dt);
|
||||||
|
acDeviceSynchronizeStream(device, stream);
|
||||||
|
|
||||||
|
|
||||||
// Corners
|
// Corners
|
||||||
#if MPI_INCL_CORNERS
|
#if MPI_INCL_CORNERS
|
||||||
@@ -1916,6 +2130,7 @@ acGridIntegrate(const Stream stream, const AcReal dt)
|
|||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acGridPeriodicBoundconds(const Stream stream)
|
acGridPeriodicBoundconds(const Stream stream)
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user