From f1203431107fa0eda906c6c701bda93a10c7b79a Mon Sep 17 00:00:00 2001 From: jpekkila Date: Mon, 21 Oct 2019 16:23:24 +0300 Subject: [PATCH] Bugfix: peer access was not disabled when Node was destroyed, leading to cudaErrorPeerAccessAlreadyEnabled error when creating new Nodes --- src/core/node.cu | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/core/node.cu b/src/core/node.cu index 05695ad..e90cdef 100644 --- a/src/core/node.cu +++ b/src/core/node.cu @@ -309,6 +309,29 @@ acNodeDestroy(Node node) { acNodeSynchronizeStream(node, STREAM_ALL); + // Disable peer access + for (int i = 0; i < node->num_devices; ++i) { + const int front = (i + 1) % node->num_devices; + const int back = (i - 1 + node->num_devices) % node->num_devices; + + int can_access_front, can_access_back; + cudaDeviceCanAccessPeer(&can_access_front, i, front); + cudaDeviceCanAccessPeer(&can_access_back, i, back); +#if VERBOSE_PRINTING + printf("Trying to disable peer access from %d to %d (can access: %d) and %d (can access: " + "%d)\n", + i, front, can_access_front, back, can_access_back); +#endif + + cudaSetDevice(i); + if (can_access_front) { + ERRCHK_CUDA_ALWAYS(cudaDeviceDisablePeerAccess(front)); + } + if (can_access_back) { + ERRCHK_CUDA_ALWAYS(cudaDeviceDisablePeerAccess(back)); + } + } + // #pragma omp parallel for for (int i = 0; i < node->num_devices; ++i) { acDeviceDestroy(node->devices[i]);