From 26bbfa089dc9127e0282f2c05d10922fce69829e Mon Sep 17 00:00:00 2001 From: jpekkila Date: Thu, 17 Oct 2019 18:17:37 +0300 Subject: [PATCH] Better multi-node communication: fire and forget. --- src/core/device.cu | 53 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/src/core/device.cu b/src/core/device.cu index d6581ff..ffcb9be 100644 --- a/src/core/device.cu +++ b/src/core/device.cu @@ -873,7 +873,59 @@ acDeviceCommunicateHalosMPI(const Device device) const size_t count = device->local_config.int_params[AC_mx] * device->local_config.int_params[AC_my] * NGHOST; + MPI_Request request_front[NUM_VTXBUF_HANDLES]; + MPI_Status status_front[NUM_VTXBUF_HANDLES]; + MPI_Request request_back[NUM_VTXBUF_HANDLES]; + MPI_Status status_back[NUM_VTXBUF_HANDLES]; + for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { + { // Front + // ...|ooooxxx|... -> xxx|ooooooo|... + const size_t src_idx = acVertexBufferIdx(0, 0, device->local_config.int_params[AC_nz], + device->local_config); + const size_t dst_idx = acVertexBufferIdx(0, 0, 0, device->local_config); + const int send_pid = (pid + 1) % num_processes; + MPI_Isend(&device->vba.in[i][src_idx], count, datatype, send_pid, i, MPI_COMM_WORLD, + &request_front[i]); + } + + { // Back + // ...|ooooooo|xxx <- ...|xxxoooo|... + const size_t src_idx = acVertexBufferIdx(0, 0, NGHOST, device->local_config); + const size_t dst_idx = acVertexBufferIdx( + 0, 0, NGHOST + device->local_config.int_params[AC_nz], device->local_config); + const int send_pid = (pid + num_processes - 1) % num_processes; + + MPI_Isend(&device->vba.in[i][src_idx], count, datatype, send_pid, + NUM_VTXBUF_HANDLES + i, MPI_COMM_WORLD, &request_back[i]); + } + } + for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { + { // Front + // ...|ooooxxx|... -> xxx|ooooooo|... + const size_t src_idx = acVertexBufferIdx(0, 0, device->local_config.int_params[AC_nz], + device->local_config); + const size_t dst_idx = acVertexBufferIdx(0, 0, 0, device->local_config); + const int recv_pid = (pid + num_processes - 1) % num_processes; + MPI_Recv(&device->vba.in[i][dst_idx], count, datatype, recv_pid, i, MPI_COMM_WORLD, + &status_front[i]); + } + { // Back + // ...|ooooooo|xxx <- ...|xxxoooo|... + const size_t src_idx = acVertexBufferIdx(0, 0, NGHOST, device->local_config); + const size_t dst_idx = acVertexBufferIdx( + 0, 0, NGHOST + device->local_config.int_params[AC_nz], device->local_config); + const int recv_pid = (pid + 1) % num_processes; + + MPI_Recv(&device->vba.in[i][dst_idx], count, datatype, recv_pid, NUM_VTXBUF_HANDLES + i, + MPI_COMM_WORLD, &status_back[i]); + } + } + for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { + MPI_Wait(&request_front[i], &status_front[i]); + MPI_Wait(&request_back[i], &status_back[i]); + } + /* for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { { // Front // ...|ooooxxx|... -> xxx|ooooooo|... @@ -913,6 +965,7 @@ acDeviceCommunicateHalosMPI(const Device device) MPI_Wait(&request, &status); } } + */ return AC_SUCCESS; }