From b08d5b26f5dc1c2ba317dfafa2a48f7fc8067701 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Wed, 10 Jul 2019 15:05:57 +0300 Subject: [PATCH] cudaMemcpyToSymbol -> cudaMemcpyToSymbolAsync --- src/core/device.cu | 16 ++++++++++------ src/core/device.cuh | 6 ++++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/core/device.cu b/src/core/device.cu index 61476c2..b5fc87a 100644 --- a/src/core/device.cu +++ b/src/core/device.cu @@ -387,7 +387,8 @@ swapBuffers(const Device device) } AcResult -loadDeviceConstant(const Device device, const AcIntParam param, const int value) +loadDeviceConstant(const Device device, const StreamType stream_type, const AcIntParam param, + const int value) { cudaSetDevice(device->id); // CUDA 10 apparently creates only a single name for a device constant (d_mesh_info here) @@ -395,18 +396,21 @@ loadDeviceConstant(const Device device, const AcIntParam param, const int value) // Therefore we have to obfuscate the code a bit and compute the offset address before // invoking cudaMemcpyToSymbol. const size_t offset = (size_t)&d_mesh_info.int_params[param] - (size_t)&d_mesh_info; - ERRCHK_CUDA_ALWAYS( - cudaMemcpyToSymbol(d_mesh_info, &value, sizeof(value), offset, cudaMemcpyHostToDevice)); + ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset, + cudaMemcpyHostToDevice, + device->streams[stream_type])); return AC_SUCCESS; } AcResult -loadDeviceConstant(const Device device, const AcRealParam param, const AcReal value) +loadDeviceConstant(const Device device, const StreamType stream_type, const AcRealParam param, + const AcReal value) { cudaSetDevice(device->id); const size_t offset = (size_t)&d_mesh_info.real_params[param] - (size_t)&d_mesh_info; - ERRCHK_CUDA_ALWAYS( - cudaMemcpyToSymbol(d_mesh_info, &value, sizeof(value), offset, cudaMemcpyHostToDevice)); + ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset, + cudaMemcpyHostToDevice, + device->streams[stream_type])); return AC_SUCCESS; } diff --git a/src/core/device.cuh b/src/core/device.cuh index bd2a308..103e8ad 100644 --- a/src/core/device.cuh +++ b/src/core/device.cuh @@ -82,10 +82,12 @@ AcResult copyMeshDeviceToDevice(const Device src, const StreamType stream_type, AcResult swapBuffers(const Device device); /** */ -AcResult loadDeviceConstant(const Device device, const AcIntParam param, const int value); +AcResult loadDeviceConstant(const Device device, const StreamType stream_type, + const AcIntParam param, const int value); /** */ -AcResult loadDeviceConstant(const Device device, const AcRealParam param, const AcReal value); +AcResult loadDeviceConstant(const Device device, const StreamType stream_type, + const AcRealParam param, const AcReal value); /** */ AcResult loadGlobalGrid(const Device device, const Grid grid);