Added a function for getting pid of a neighboring process when decomposing in 3D

This commit is contained in:
Johannes Pekkila
2019-10-23 19:26:35 +02:00
parent 474bdf185d
commit 8894b7c7d6

View File

@@ -233,6 +233,7 @@ acDeviceDestroy(Device device)
{
cudaSetDevice(device->id);
printf("Destroying device %d (%p)\n", device->id, device);
acDeviceSynchronizeStream(device, STREAM_ALL);
// Memory
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
@@ -791,6 +792,31 @@ acDeviceReduceVec(const Device device, const Stream stream, const ReductionType
*/
#include <mpi.h>
static int
mod(const int a, const int b)
{
const int r = a % b;
return r < 0 ? r + b : r;
}
static int
get_neighbor(const int3 offset)
{
// The number of nodes is n^3 = m = num_processes
// Require that the problem size is always equivalent among processes ((floor(cbrt(m))^3 == m)
// Require that mesh dimension is (n 2^w), where w is some integer
int pid, num_processes;
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
const int n = floor(cbrt(num_processes));
ERRCHK_ALWAYS(ceil(cbrt(num_processes)) == n);
ERRCHK_ALWAYS(n * n * n == num_processes);
return mod(pid + offset.x, n) + offset.y * n + offset.z * n * n;
}
static void
acDeviceDistributeMeshMPI(const AcMesh src, AcMesh* dst)
{
@@ -1176,7 +1202,7 @@ acDeviceRunMPITest(void)
acLoadConfig(AC_DEFAULT_CONFIG, &info);
// Large mesh dim
const int nn = 128;
const int nn = 256;
info.int_params[AC_nx] = info.int_params[AC_ny] = info.int_params[AC_nz] = nn;
acUpdateConfig(&info);
@@ -1220,12 +1246,15 @@ acDeviceRunMPITest(void)
acDeviceLoadMesh(device, STREAM_DEFAULT, submesh);
// Benchmark
const int num_iters = 10;
const int num_iters = 100;
Timer total_time;
timer_reset(&total_time);
for (int i = 0; i < num_iters; ++i) {
acDeviceBoundStepMPI(device);
// acDeviceBoundStepMPI(device);
acDeviceIntegrateStepMPI(device, FLT_EPSILON); // TODO recheck
}
acDeviceSynchronizeStream(device, STREAM_ALL);
MPI_Barrier(MPI_COMM_WORLD);
if (pid == 0) {
const double ms_elapsed = timer_diff_nsec(total_time) / 1e6;
printf("vertices: %d^3, iterations: %d\n", nn, num_iters);
@@ -1233,7 +1262,7 @@ acDeviceRunMPITest(void)
printf("Time per step: %f ms\n", ms_elapsed / num_iters);
}
////////////////////////////// Timer end
acDeviceBoundStepMPI(device);
acDeviceStoreMesh(device, STREAM_DEFAULT, &submesh);
acDeviceDestroy(device);
////////////////////////////////////////////////////////////////////////////////////////////////