From 361725adc628991727789231ac610b85c2916cf5 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Mon, 17 Jun 2019 17:52:14 +0300 Subject: [PATCH] Cleaned reductions a bit --- src/core/kernels/kernels.cuh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/kernels/kernels.cuh b/src/core/kernels/kernels.cuh index f930bc8..b90fcec 100644 --- a/src/core/kernels/kernels.cuh +++ b/src/core/kernels/kernels.cuh @@ -926,14 +926,13 @@ _kernel_reduce(AcReal* src, AcReal* result) while (offset > 0) { if (threadIdx.x < offset) { - tmp = reduce(tmp, smem[threadIdx.x + offset]); - smem[threadIdx.x] = tmp; + smem[threadIdx.x] = reduce(smem[threadIdx.x], smem[threadIdx.x + offset]); } offset /= 2; __syncthreads(); } if (threadIdx.x == 0) - src[idx] = tmp; + src[idx] = smem[threadIdx.x]; } template @@ -1080,6 +1079,7 @@ reduce_vec(const cudaStream_t stream, } AcReal result; - cudaMemcpy(&result, reduce_result, sizeof(AcReal), cudaMemcpyDeviceToHost); + cudaMemcpyAsync(&result, reduce_result, sizeof(AcReal), cudaMemcpyDeviceToHost, stream); + cudaStreamSynchronize(stream); return result; }