From a3ca6cf1327ff072e0b30717e9d9db12210ac2dd Mon Sep 17 00:00:00 2001 From: jpekkila Date: Mon, 1 Jul 2019 13:56:05 +0300 Subject: [PATCH] Added skeletons for packing parts of the ghost zones into buffers to speed up data transfers --- src/core/device.cu | 21 +++++++++++++++++++++ src/core/device.cuh | 5 +++++ 2 files changed, 26 insertions(+) diff --git a/src/core/device.cu b/src/core/device.cu index 6df10f7..dda8494 100644 --- a/src/core/device.cu +++ b/src/core/device.cu @@ -42,6 +42,10 @@ __constant__ Grid globalGrid; #define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy)) #include "kernels/kernels.cuh" +#if PACKED_DATA_TRANSFERS // Defined in device.cuh +// #include "kernels/pack_unpack.cuh" +#endif + struct device_s { int id; AcMeshInfo local_config; @@ -53,6 +57,11 @@ struct device_s { VertexBufferArray vba; AcReal* reduce_scratchpad; AcReal* reduce_result; + +#if PACKED_DATA_TRANSFERS +// Declare memory for buffers needed for packed data transfers here +// AcReal* data_packing_buffer; +#endif }; AcResult @@ -154,6 +163,10 @@ createDevice(const int id, const AcMeshInfo device_config, Device* device_handle cudaMalloc(&device->reduce_scratchpad, AC_VTXBUF_COMPDOMAIN_SIZE_BYTES(device_config))); ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->reduce_result, sizeof(AcReal))); +#if PACKED_DATA_TRANSFERS +// Allocate data required for packed transfers here (cudaMalloc) +#endif + // Device constants ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(d_mesh_info, &device_config, sizeof(device_config), 0, cudaMemcpyHostToDevice)); @@ -184,6 +197,10 @@ destroyDevice(Device device) cudaFree(device->reduce_scratchpad); cudaFree(device->reduce_result); +#if PACKED_DATA_TRANSFERS +// Free data required for packed tranfers here (cudaFree) +#endif + // Concurrency for (int i = 0; i < NUM_STREAM_TYPES; ++i) cudaStreamDestroy(device->streams[i]); @@ -373,3 +390,7 @@ loadGlobalGrid(const Device device, const Grid grid) cudaMemcpyToSymbol(globalGrid, &grid, sizeof(grid), 0, cudaMemcpyHostToDevice)); return AC_SUCCESS; } + +#if PACKED_DATA_TRANSFERS +// Functions for calling packed data transfers +#endif diff --git a/src/core/device.cuh b/src/core/device.cuh index 28dbd50..1e9becc 100644 --- a/src/core/device.cuh +++ b/src/core/device.cuh @@ -98,3 +98,8 @@ AcResult loadDeviceConstant(const Device device, const AcRealParam param, const /** */ AcResult loadGlobalGrid(const Device device, const Grid grid); + +// #define PACKED_DATA_TRANSFERS (1) %JP: placeholder for optimized ghost zone packing and transfers +#if PACKED_DATA_TRANSFERS +// Declarations used for packed data transfers +#endif