Removed old unused functions for MPi integration and comm
This commit is contained in:
@@ -1207,544 +1207,6 @@ acGridStoreMesh(const Stream stream, AcMesh* host_mesh)
|
|||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static AcResult
|
|
||||||
acDeviceIntegrateMPI(const Device device, const AcReal dt)
|
|
||||||
{
|
|
||||||
// Configure
|
|
||||||
const int3 nn = (int3){
|
|
||||||
device->local_config.int_params[AC_nx],
|
|
||||||
device->local_config.int_params[AC_ny],
|
|
||||||
device->local_config.int_params[AC_nz],
|
|
||||||
};
|
|
||||||
|
|
||||||
// Corners
|
|
||||||
const int3 corner_a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){nn.x, NGHOST, NGHOST}, //
|
|
||||||
(int3){NGHOST, nn.y, NGHOST}, //
|
|
||||||
(int3){nn.x, nn.y, NGHOST}, //
|
|
||||||
|
|
||||||
(int3){NGHOST, NGHOST, nn.z}, //
|
|
||||||
(int3){nn.x, NGHOST, nn.z}, //
|
|
||||||
(int3){NGHOST, nn.y, nn.z}, //
|
|
||||||
(int3){nn.x, nn.y, nn.z},
|
|
||||||
};
|
|
||||||
const int3 corner_b0s[] = {
|
|
||||||
(int3){0, 0, 0},
|
|
||||||
(int3){NGHOST + nn.x, 0, 0},
|
|
||||||
(int3){0, NGHOST + nn.y, 0},
|
|
||||||
(int3){NGHOST + nn.x, NGHOST + nn.y, 0},
|
|
||||||
|
|
||||||
(int3){0, 0, NGHOST + nn.z},
|
|
||||||
(int3){NGHOST + nn.x, 0, NGHOST + nn.z},
|
|
||||||
(int3){0, NGHOST + nn.y, NGHOST + nn.z},
|
|
||||||
(int3){NGHOST + nn.x, NGHOST + nn.y, NGHOST + nn.z},
|
|
||||||
};
|
|
||||||
const int3 corner_dims = (int3){NGHOST, NGHOST, NGHOST};
|
|
||||||
|
|
||||||
// Edges X
|
|
||||||
const int3 edgex_a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){NGHOST, nn.y, NGHOST}, //
|
|
||||||
|
|
||||||
(int3){NGHOST, NGHOST, nn.z}, //
|
|
||||||
(int3){NGHOST, nn.y, nn.z}, //
|
|
||||||
};
|
|
||||||
const int3 edgex_b0s[] = {
|
|
||||||
(int3){NGHOST, 0, 0},
|
|
||||||
(int3){NGHOST, NGHOST + nn.y, 0},
|
|
||||||
|
|
||||||
(int3){NGHOST, 0, NGHOST + nn.z},
|
|
||||||
(int3){NGHOST, NGHOST + nn.y, NGHOST + nn.z},
|
|
||||||
};
|
|
||||||
const int3 edgex_dims = (int3){nn.x, NGHOST, NGHOST};
|
|
||||||
|
|
||||||
// Edges Y
|
|
||||||
const int3 edgey_a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){nn.x, NGHOST, NGHOST}, //
|
|
||||||
|
|
||||||
(int3){NGHOST, NGHOST, nn.z}, //
|
|
||||||
(int3){nn.x, NGHOST, nn.z}, //
|
|
||||||
};
|
|
||||||
const int3 edgey_b0s[] = {
|
|
||||||
(int3){0, NGHOST, 0},
|
|
||||||
(int3){NGHOST + nn.x, NGHOST, 0},
|
|
||||||
|
|
||||||
(int3){0, NGHOST, NGHOST + nn.z},
|
|
||||||
(int3){NGHOST + nn.x, NGHOST, NGHOST + nn.z},
|
|
||||||
};
|
|
||||||
const int3 edgey_dims = (int3){NGHOST, nn.y, NGHOST};
|
|
||||||
|
|
||||||
// Edges Z
|
|
||||||
const int3 edgez_a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){nn.x, NGHOST, NGHOST}, //
|
|
||||||
|
|
||||||
(int3){NGHOST, nn.y, NGHOST}, //
|
|
||||||
(int3){nn.x, nn.y, NGHOST}, //
|
|
||||||
};
|
|
||||||
const int3 edgez_b0s[] = {
|
|
||||||
(int3){0, 0, NGHOST},
|
|
||||||
(int3){NGHOST + nn.x, 0, NGHOST},
|
|
||||||
|
|
||||||
(int3){0, NGHOST + nn.y, NGHOST},
|
|
||||||
(int3){NGHOST + nn.x, NGHOST + nn.y, NGHOST},
|
|
||||||
};
|
|
||||||
|
|
||||||
const int3 edgez_dims = (int3){NGHOST, NGHOST, nn.z};
|
|
||||||
|
|
||||||
// Sides XY
|
|
||||||
const int3 sidexy_a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){NGHOST, NGHOST, nn.z}, //
|
|
||||||
};
|
|
||||||
const int3 sidexy_b0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, 0}, //
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST + nn.z}, //
|
|
||||||
};
|
|
||||||
const int3 sidexy_dims = (int3){nn.x, nn.y, NGHOST};
|
|
||||||
|
|
||||||
// Sides XZ
|
|
||||||
const int3 sidexz_a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){NGHOST, nn.y, NGHOST}, //
|
|
||||||
};
|
|
||||||
const int3 sidexz_b0s[] = {
|
|
||||||
(int3){NGHOST, 0, NGHOST}, //
|
|
||||||
(int3){NGHOST, NGHOST + nn.y, NGHOST}, //
|
|
||||||
};
|
|
||||||
const int3 sidexz_dims = (int3){nn.x, NGHOST, nn.z};
|
|
||||||
|
|
||||||
// Sides YZ
|
|
||||||
const int3 sideyz_a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){nn.x, NGHOST, NGHOST}, //
|
|
||||||
};
|
|
||||||
const int3 sideyz_b0s[] = {
|
|
||||||
(int3){0, NGHOST, NGHOST}, //
|
|
||||||
(int3){NGHOST + nn.x, NGHOST, NGHOST}, //
|
|
||||||
};
|
|
||||||
const int3 sideyz_dims = (int3){NGHOST, nn.y, nn.z};
|
|
||||||
|
|
||||||
// Alloc
|
|
||||||
CommData corner_data = acCreateCommData(device, corner_dims, ARRAY_SIZE(corner_a0s));
|
|
||||||
CommData edgex_data = acCreateCommData(device, edgex_dims, ARRAY_SIZE(edgex_a0s));
|
|
||||||
CommData edgey_data = acCreateCommData(device, edgey_dims, ARRAY_SIZE(edgey_a0s));
|
|
||||||
CommData edgez_data = acCreateCommData(device, edgez_dims, ARRAY_SIZE(edgez_a0s));
|
|
||||||
CommData sidexy_data = acCreateCommData(device, sidexy_dims, ARRAY_SIZE(sidexy_a0s));
|
|
||||||
CommData sidexz_data = acCreateCommData(device, sidexz_dims, ARRAY_SIZE(sidexz_a0s));
|
|
||||||
CommData sideyz_data = acCreateCommData(device, sideyz_dims, ARRAY_SIZE(sideyz_a0s));
|
|
||||||
|
|
||||||
// Warmup
|
|
||||||
for (int i = 0; i < 10; ++i) {
|
|
||||||
acPackCommData(device, corner_a0s, &corner_data);
|
|
||||||
acPackCommData(device, edgex_a0s, &edgex_data);
|
|
||||||
acPackCommData(device, edgey_a0s, &edgey_data);
|
|
||||||
acPackCommData(device, edgez_a0s, &edgez_data);
|
|
||||||
acPackCommData(device, sidexy_a0s, &sidexy_data);
|
|
||||||
acPackCommData(device, sidexz_a0s, &sidexz_data);
|
|
||||||
acPackCommData(device, sideyz_a0s, &sideyz_data);
|
|
||||||
|
|
||||||
#if MPI_GPUDIRECT_DISABLED
|
|
||||||
acTransferCommDataToHost(device, &corner_data);
|
|
||||||
acTransferCommDataToHost(device, &edgex_data);
|
|
||||||
acTransferCommDataToHost(device, &edgey_data);
|
|
||||||
acTransferCommDataToHost(device, &edgez_data);
|
|
||||||
acTransferCommDataToHost(device, &sidexy_data);
|
|
||||||
acTransferCommDataToHost(device, &sidexz_data);
|
|
||||||
acTransferCommDataToHost(device, &sideyz_data);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
acTransferCommData(device, corner_a0s, corner_b0s, &corner_data);
|
|
||||||
acTransferCommData(device, edgex_a0s, edgex_b0s, &edgex_data);
|
|
||||||
acTransferCommData(device, edgey_a0s, edgey_b0s, &edgey_data);
|
|
||||||
acTransferCommData(device, edgez_a0s, edgez_b0s, &edgez_data);
|
|
||||||
acTransferCommData(device, sidexy_a0s, sidexy_b0s, &sidexy_data);
|
|
||||||
acTransferCommData(device, sidexz_a0s, sidexz_b0s, &sidexz_data);
|
|
||||||
acTransferCommData(device, sideyz_a0s, sideyz_b0s, &sideyz_data);
|
|
||||||
|
|
||||||
acTransferCommDataWait(corner_data);
|
|
||||||
acTransferCommDataWait(edgex_data);
|
|
||||||
acTransferCommDataWait(edgey_data);
|
|
||||||
acTransferCommDataWait(edgez_data);
|
|
||||||
acTransferCommDataWait(sidexy_data);
|
|
||||||
acTransferCommDataWait(sidexz_data);
|
|
||||||
acTransferCommDataWait(sideyz_data);
|
|
||||||
|
|
||||||
#if MPI_GPUDIRECT_DISABLED
|
|
||||||
acTransferCommDataToDevice(device, &corner_data);
|
|
||||||
acTransferCommDataToDevice(device, &edgex_data);
|
|
||||||
acTransferCommDataToDevice(device, &edgey_data);
|
|
||||||
acTransferCommDataToDevice(device, &edgez_data);
|
|
||||||
acTransferCommDataToDevice(device, &sidexy_data);
|
|
||||||
acTransferCommDataToDevice(device, &sidexz_data);
|
|
||||||
acTransferCommDataToDevice(device, &sideyz_data);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
acUnpackCommData(device, corner_b0s, &corner_data);
|
|
||||||
acUnpackCommData(device, edgex_b0s, &edgex_data);
|
|
||||||
acUnpackCommData(device, edgey_b0s, &edgey_data);
|
|
||||||
acUnpackCommData(device, edgez_b0s, &edgez_data);
|
|
||||||
acUnpackCommData(device, sidexy_b0s, &sidexy_data);
|
|
||||||
acUnpackCommData(device, sidexz_b0s, &sidexz_data);
|
|
||||||
acUnpackCommData(device, sideyz_b0s, &sideyz_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Communicate
|
|
||||||
Timer ttot;
|
|
||||||
cudaDeviceSynchronize();
|
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
|
||||||
timer_reset(&ttot);
|
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
|
||||||
|
|
||||||
const int num_iterations = 1;
|
|
||||||
for (int i = 0; i < num_iterations; ++i) {
|
|
||||||
for (int isubstep = 0; isubstep < 3; ++isubstep) {
|
|
||||||
acPackCommData(device, corner_a0s, &corner_data);
|
|
||||||
acPackCommData(device, edgex_a0s, &edgex_data);
|
|
||||||
acPackCommData(device, edgey_a0s, &edgey_data);
|
|
||||||
acPackCommData(device, edgez_a0s, &edgez_data);
|
|
||||||
acPackCommData(device, sidexy_a0s, &sidexy_data);
|
|
||||||
acPackCommData(device, sidexz_a0s, &sidexz_data);
|
|
||||||
acPackCommData(device, sideyz_a0s, &sideyz_data);
|
|
||||||
|
|
||||||
#if MPI_GPUDIRECT_DISABLED
|
|
||||||
acTransferCommDataToHost(device, &corner_data);
|
|
||||||
acTransferCommDataToHost(device, &edgex_data);
|
|
||||||
acTransferCommDataToHost(device, &edgey_data);
|
|
||||||
acTransferCommDataToHost(device, &edgez_data);
|
|
||||||
acTransferCommDataToHost(device, &sidexy_data);
|
|
||||||
acTransferCommDataToHost(device, &sidexz_data);
|
|
||||||
acTransferCommDataToHost(device, &sideyz_data);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//////////// INNER INTEGRATION //////////////
|
|
||||||
{
|
|
||||||
const int3 m1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
|
|
||||||
const int3 m2 = nn;
|
|
||||||
acDeviceIntegrateSubstep(device, STREAM_16, isubstep, m1, m2, dt);
|
|
||||||
}
|
|
||||||
////////////////////////////////////////////
|
|
||||||
|
|
||||||
acTransferCommData(device, corner_a0s, corner_b0s, &corner_data);
|
|
||||||
acTransferCommData(device, edgex_a0s, edgex_b0s, &edgex_data);
|
|
||||||
acTransferCommData(device, edgey_a0s, edgey_b0s, &edgey_data);
|
|
||||||
acTransferCommData(device, edgez_a0s, edgez_b0s, &edgez_data);
|
|
||||||
acTransferCommData(device, sidexy_a0s, sidexy_b0s, &sidexy_data);
|
|
||||||
acTransferCommData(device, sidexz_a0s, sidexz_b0s, &sidexz_data);
|
|
||||||
acTransferCommData(device, sideyz_a0s, sideyz_b0s, &sideyz_data);
|
|
||||||
|
|
||||||
acTransferCommDataWait(corner_data);
|
|
||||||
acTransferCommDataWait(edgex_data);
|
|
||||||
acTransferCommDataWait(edgey_data);
|
|
||||||
acTransferCommDataWait(edgez_data);
|
|
||||||
acTransferCommDataWait(sidexy_data);
|
|
||||||
acTransferCommDataWait(sidexz_data);
|
|
||||||
acTransferCommDataWait(sideyz_data);
|
|
||||||
|
|
||||||
#if MPI_GPUDIRECT_DISABLED
|
|
||||||
acTransferCommDataToDevice(device, &corner_data);
|
|
||||||
acTransferCommDataToDevice(device, &edgex_data);
|
|
||||||
acTransferCommDataToDevice(device, &edgey_data);
|
|
||||||
acTransferCommDataToDevice(device, &edgez_data);
|
|
||||||
acTransferCommDataToDevice(device, &sidexy_data);
|
|
||||||
acTransferCommDataToDevice(device, &sidexz_data);
|
|
||||||
acTransferCommDataToDevice(device, &sideyz_data);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
acUnpackCommData(device, corner_b0s, &corner_data);
|
|
||||||
acUnpackCommData(device, edgex_b0s, &edgex_data);
|
|
||||||
acUnpackCommData(device, edgey_b0s, &edgey_data);
|
|
||||||
acUnpackCommData(device, edgez_b0s, &edgez_data);
|
|
||||||
acUnpackCommData(device, sidexy_b0s, &sidexy_data);
|
|
||||||
acUnpackCommData(device, sidexz_b0s, &sidexz_data);
|
|
||||||
acUnpackCommData(device, sideyz_b0s, &sideyz_data);
|
|
||||||
//////////// OUTER INTEGRATION //////////////
|
|
||||||
acDeviceSynchronizeStream(device, STREAM_ALL); // Wait for unpacking
|
|
||||||
{ // Front
|
|
||||||
const int3 m1 = (int3){NGHOST, NGHOST, NGHOST};
|
|
||||||
const int3 m2 = m1 + (int3){nn.x, nn.y, NGHOST};
|
|
||||||
acDeviceIntegrateSubstep(device, STREAM_0, isubstep, m1, m2, dt);
|
|
||||||
}
|
|
||||||
{ // Back
|
|
||||||
const int3 m1 = (int3){NGHOST, NGHOST, nn.z};
|
|
||||||
const int3 m2 = m1 + (int3){nn.x, nn.y, NGHOST};
|
|
||||||
acDeviceIntegrateSubstep(device, STREAM_1, isubstep, m1, m2, dt);
|
|
||||||
}
|
|
||||||
{ // Bottom
|
|
||||||
const int3 m1 = (int3){NGHOST, NGHOST, 2 * NGHOST};
|
|
||||||
const int3 m2 = m1 + (int3){nn.x, NGHOST, nn.z - 2 * NGHOST};
|
|
||||||
acDeviceIntegrateSubstep(device, STREAM_2, isubstep, m1, m2, dt);
|
|
||||||
}
|
|
||||||
{ // Top
|
|
||||||
const int3 m1 = (int3){NGHOST, nn.y, 2 * NGHOST};
|
|
||||||
const int3 m2 = m1 + (int3){nn.x, NGHOST, nn.z - 2 * NGHOST};
|
|
||||||
acDeviceIntegrateSubstep(device, STREAM_3, isubstep, m1, m2, dt);
|
|
||||||
}
|
|
||||||
{ // Left
|
|
||||||
const int3 m1 = (int3){NGHOST, 2 * NGHOST, 2 * NGHOST};
|
|
||||||
const int3 m2 = m1 + (int3){NGHOST, nn.y - 2 * NGHOST, nn.z - 2 * NGHOST};
|
|
||||||
acDeviceIntegrateSubstep(device, STREAM_4, isubstep, m1, m2, dt);
|
|
||||||
}
|
|
||||||
{ // Right
|
|
||||||
const int3 m1 = (int3){nn.x, 2 * NGHOST, 2 * NGHOST};
|
|
||||||
const int3 m2 = m1 + (int3){NGHOST, nn.y - 2 * NGHOST, nn.z - 2 * NGHOST};
|
|
||||||
acDeviceIntegrateSubstep(device, STREAM_5, isubstep, m1, m2, dt);
|
|
||||||
}
|
|
||||||
acDeviceSwapBuffers(device);
|
|
||||||
acDeviceSynchronizeStream(device, STREAM_ALL); // Wait until inner and outer done
|
|
||||||
////////////////////////////////////////////
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
|
||||||
const double msec = timer_diff_nsec(ttot) / 1e6;
|
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
|
||||||
|
|
||||||
int pid, nprocs;
|
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
|
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
|
||||||
if (!pid) {
|
|
||||||
printf("--- Total communication time per step w/ integration: %f ms\n",
|
|
||||||
msec / num_iterations);
|
|
||||||
|
|
||||||
// Write out to file
|
|
||||||
FILE* fp = fopen("benchmark.result", "a+");
|
|
||||||
fprintf(fp, "%d, %f\n", nprocs, msec);
|
|
||||||
fclose(fp);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Dealloc
|
|
||||||
acDestroyCommData(device, &corner_data);
|
|
||||||
acDestroyCommData(device, &edgex_data);
|
|
||||||
acDestroyCommData(device, &edgey_data);
|
|
||||||
acDestroyCommData(device, &edgez_data);
|
|
||||||
acDestroyCommData(device, &sidexy_data);
|
|
||||||
acDestroyCommData(device, &sidexz_data);
|
|
||||||
acDestroyCommData(device, &sideyz_data);
|
|
||||||
|
|
||||||
return AC_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static AcResult
|
|
||||||
acDeviceCommunicateHalosMPI(const Device device)
|
|
||||||
{
|
|
||||||
// Configure
|
|
||||||
const int3 nn = (int3){
|
|
||||||
device->local_config.int_params[AC_nx],
|
|
||||||
device->local_config.int_params[AC_ny],
|
|
||||||
device->local_config.int_params[AC_nz],
|
|
||||||
};
|
|
||||||
|
|
||||||
// Corners
|
|
||||||
const int3 corner_a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){nn.x, NGHOST, NGHOST}, //
|
|
||||||
(int3){NGHOST, nn.y, NGHOST}, //
|
|
||||||
(int3){nn.x, nn.y, NGHOST}, //
|
|
||||||
|
|
||||||
(int3){NGHOST, NGHOST, nn.z}, //
|
|
||||||
(int3){nn.x, NGHOST, nn.z}, //
|
|
||||||
(int3){NGHOST, nn.y, nn.z}, //
|
|
||||||
(int3){nn.x, nn.y, nn.z},
|
|
||||||
};
|
|
||||||
const int3 corner_b0s[] = {
|
|
||||||
(int3){0, 0, 0},
|
|
||||||
(int3){NGHOST + nn.x, 0, 0},
|
|
||||||
(int3){0, NGHOST + nn.y, 0},
|
|
||||||
(int3){NGHOST + nn.x, NGHOST + nn.y, 0},
|
|
||||||
|
|
||||||
(int3){0, 0, NGHOST + nn.z},
|
|
||||||
(int3){NGHOST + nn.x, 0, NGHOST + nn.z},
|
|
||||||
(int3){0, NGHOST + nn.y, NGHOST + nn.z},
|
|
||||||
(int3){NGHOST + nn.x, NGHOST + nn.y, NGHOST + nn.z},
|
|
||||||
};
|
|
||||||
const int3 corner_dims = (int3){NGHOST, NGHOST, NGHOST};
|
|
||||||
|
|
||||||
// Edges X
|
|
||||||
const int3 edgex_a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){NGHOST, nn.y, NGHOST}, //
|
|
||||||
|
|
||||||
(int3){NGHOST, NGHOST, nn.z}, //
|
|
||||||
(int3){NGHOST, nn.y, nn.z}, //
|
|
||||||
};
|
|
||||||
const int3 edgex_b0s[] = {
|
|
||||||
(int3){NGHOST, 0, 0},
|
|
||||||
(int3){NGHOST, NGHOST + nn.y, 0},
|
|
||||||
|
|
||||||
(int3){NGHOST, 0, NGHOST + nn.z},
|
|
||||||
(int3){NGHOST, NGHOST + nn.y, NGHOST + nn.z},
|
|
||||||
};
|
|
||||||
const int3 edgex_dims = (int3){nn.x, NGHOST, NGHOST};
|
|
||||||
|
|
||||||
// Edges Y
|
|
||||||
const int3 edgey_a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){nn.x, NGHOST, NGHOST}, //
|
|
||||||
|
|
||||||
(int3){NGHOST, NGHOST, nn.z}, //
|
|
||||||
(int3){nn.x, NGHOST, nn.z}, //
|
|
||||||
};
|
|
||||||
const int3 edgey_b0s[] = {
|
|
||||||
(int3){0, NGHOST, 0},
|
|
||||||
(int3){NGHOST + nn.x, NGHOST, 0},
|
|
||||||
|
|
||||||
(int3){0, NGHOST, NGHOST + nn.z},
|
|
||||||
(int3){NGHOST + nn.x, NGHOST, NGHOST + nn.z},
|
|
||||||
};
|
|
||||||
const int3 edgey_dims = (int3){NGHOST, nn.y, NGHOST};
|
|
||||||
|
|
||||||
// Edges Z
|
|
||||||
const int3 edgez_a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){nn.x, NGHOST, NGHOST}, //
|
|
||||||
|
|
||||||
(int3){NGHOST, nn.y, NGHOST}, //
|
|
||||||
(int3){nn.x, nn.y, NGHOST}, //
|
|
||||||
};
|
|
||||||
const int3 edgez_b0s[] = {
|
|
||||||
(int3){0, 0, NGHOST},
|
|
||||||
(int3){NGHOST + nn.x, 0, NGHOST},
|
|
||||||
|
|
||||||
(int3){0, NGHOST + nn.y, NGHOST},
|
|
||||||
(int3){NGHOST + nn.x, NGHOST + nn.y, NGHOST},
|
|
||||||
};
|
|
||||||
|
|
||||||
const int3 edgez_dims = (int3){NGHOST, NGHOST, nn.z};
|
|
||||||
|
|
||||||
// Sides XY
|
|
||||||
const int3 sidexy_a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){NGHOST, NGHOST, nn.z}, //
|
|
||||||
};
|
|
||||||
const int3 sidexy_b0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, 0}, //
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST + nn.z}, //
|
|
||||||
};
|
|
||||||
const int3 sidexy_dims = (int3){nn.x, nn.y, NGHOST};
|
|
||||||
|
|
||||||
// Sides XZ
|
|
||||||
const int3 sidexz_a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){NGHOST, nn.y, NGHOST}, //
|
|
||||||
};
|
|
||||||
const int3 sidexz_b0s[] = {
|
|
||||||
(int3){NGHOST, 0, NGHOST}, //
|
|
||||||
(int3){NGHOST, NGHOST + nn.y, NGHOST}, //
|
|
||||||
};
|
|
||||||
const int3 sidexz_dims = (int3){nn.x, NGHOST, nn.z};
|
|
||||||
|
|
||||||
// Sides YZ
|
|
||||||
const int3 sideyz_a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){nn.x, NGHOST, NGHOST}, //
|
|
||||||
};
|
|
||||||
const int3 sideyz_b0s[] = {
|
|
||||||
(int3){0, NGHOST, NGHOST}, //
|
|
||||||
(int3){NGHOST + nn.x, NGHOST, NGHOST}, //
|
|
||||||
};
|
|
||||||
const int3 sideyz_dims = (int3){NGHOST, nn.y, nn.z};
|
|
||||||
|
|
||||||
// Alloc
|
|
||||||
CommData corner_data = acCreateCommData(device, corner_dims, ARRAY_SIZE(corner_a0s));
|
|
||||||
CommData edgex_data = acCreateCommData(device, edgex_dims, ARRAY_SIZE(edgex_a0s));
|
|
||||||
CommData edgey_data = acCreateCommData(device, edgey_dims, ARRAY_SIZE(edgey_a0s));
|
|
||||||
CommData edgez_data = acCreateCommData(device, edgez_dims, ARRAY_SIZE(edgez_a0s));
|
|
||||||
CommData sidexy_data = acCreateCommData(device, sidexy_dims, ARRAY_SIZE(sidexy_a0s));
|
|
||||||
CommData sidexz_data = acCreateCommData(device, sidexz_dims, ARRAY_SIZE(sidexz_a0s));
|
|
||||||
CommData sideyz_data = acCreateCommData(device, sideyz_dims, ARRAY_SIZE(sideyz_a0s));
|
|
||||||
|
|
||||||
// Communicate
|
|
||||||
Timer ttot;
|
|
||||||
cudaDeviceSynchronize();
|
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
|
||||||
timer_reset(&ttot);
|
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
|
||||||
|
|
||||||
acPackCommData(device, corner_a0s, &corner_data);
|
|
||||||
acPackCommData(device, edgex_a0s, &edgex_data);
|
|
||||||
acPackCommData(device, edgey_a0s, &edgey_data);
|
|
||||||
acPackCommData(device, edgez_a0s, &edgez_data);
|
|
||||||
acPackCommData(device, sidexy_a0s, &sidexy_data);
|
|
||||||
acPackCommData(device, sidexz_a0s, &sidexz_data);
|
|
||||||
acPackCommData(device, sideyz_a0s, &sideyz_data);
|
|
||||||
|
|
||||||
#if MPI_GPUDIRECT_DISABLED
|
|
||||||
acTransferCommDataToHost(device, &corner_data);
|
|
||||||
acTransferCommDataToHost(device, &edgex_data);
|
|
||||||
acTransferCommDataToHost(device, &edgey_data);
|
|
||||||
acTransferCommDataToHost(device, &edgez_data);
|
|
||||||
acTransferCommDataToHost(device, &sidexy_data);
|
|
||||||
acTransferCommDataToHost(device, &sidexz_data);
|
|
||||||
acTransferCommDataToHost(device, &sideyz_data);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
acTransferCommData(device, corner_a0s, corner_b0s, &corner_data);
|
|
||||||
acTransferCommData(device, edgex_a0s, edgex_b0s, &edgex_data);
|
|
||||||
acTransferCommData(device, edgey_a0s, edgey_b0s, &edgey_data);
|
|
||||||
acTransferCommData(device, edgez_a0s, edgez_b0s, &edgez_data);
|
|
||||||
acTransferCommData(device, sidexy_a0s, sidexy_b0s, &sidexy_data);
|
|
||||||
acTransferCommData(device, sidexz_a0s, sidexz_b0s, &sidexz_data);
|
|
||||||
acTransferCommData(device, sideyz_a0s, sideyz_b0s, &sideyz_data);
|
|
||||||
|
|
||||||
acTransferCommDataWait(corner_data);
|
|
||||||
acTransferCommDataWait(edgex_data);
|
|
||||||
acTransferCommDataWait(edgey_data);
|
|
||||||
acTransferCommDataWait(edgez_data);
|
|
||||||
acTransferCommDataWait(sidexy_data);
|
|
||||||
acTransferCommDataWait(sidexz_data);
|
|
||||||
acTransferCommDataWait(sideyz_data);
|
|
||||||
|
|
||||||
#if MPI_GPUDIRECT_DISABLED
|
|
||||||
acTransferCommDataToDevice(device, &corner_data);
|
|
||||||
acTransferCommDataToDevice(device, &edgex_data);
|
|
||||||
acTransferCommDataToDevice(device, &edgey_data);
|
|
||||||
acTransferCommDataToDevice(device, &edgez_data);
|
|
||||||
acTransferCommDataToDevice(device, &sidexy_data);
|
|
||||||
acTransferCommDataToDevice(device, &sidexz_data);
|
|
||||||
acTransferCommDataToDevice(device, &sideyz_data);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
acUnpackCommData(device, corner_b0s, &corner_data);
|
|
||||||
acUnpackCommData(device, edgex_b0s, &edgex_data);
|
|
||||||
acUnpackCommData(device, edgey_b0s, &edgey_data);
|
|
||||||
acUnpackCommData(device, edgez_b0s, &edgez_data);
|
|
||||||
acUnpackCommData(device, sidexy_b0s, &sidexy_data);
|
|
||||||
acUnpackCommData(device, sidexz_b0s, &sidexz_data);
|
|
||||||
acUnpackCommData(device, sideyz_b0s, &sideyz_data);
|
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
|
||||||
const double msec = timer_diff_nsec(ttot) / 1e6;
|
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
|
||||||
|
|
||||||
int pid, nprocs;
|
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
|
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
|
||||||
if (!pid) {
|
|
||||||
printf("--- Total communication time per substep (comm): %f ms\n", msec);
|
|
||||||
|
|
||||||
// Write out to file
|
|
||||||
FILE* fp = fopen("benchmark.result", "a+");
|
|
||||||
fprintf(fp, "%d, %f\n", nprocs, msec);
|
|
||||||
fclose(fp);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Dealloc
|
|
||||||
acDestroyCommData(device, &corner_data);
|
|
||||||
acDestroyCommData(device, &edgex_data);
|
|
||||||
acDestroyCommData(device, &edgey_data);
|
|
||||||
acDestroyCommData(device, &edgez_data);
|
|
||||||
acDestroyCommData(device, &sidexy_data);
|
|
||||||
acDestroyCommData(device, &sidexz_data);
|
|
||||||
acDestroyCommData(device, &sideyz_data);
|
|
||||||
|
|
||||||
return AC_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acGridIntegrate(const Stream stream, const AcReal dt)
|
acGridIntegrate(const Stream stream, const AcReal dt)
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user