From 37268476838081e74f1deaea85d669eecad4296f Mon Sep 17 00:00:00 2001 From: jpekkila Date: Tue, 6 Aug 2019 16:39:15 +0300 Subject: [PATCH] Made globalGridN and d_multigpu_offsets built-in parameters. Note the renaming from globalGrid.n to globalGridN. --- acc/mhd_solver/stencil_defines.h | 2 +- acc/mhd_solver/stencil_process.sps | 6 +++--- include/astaroth_defines.h | 4 +++- src/core/device.cu | 10 ++-------- src/core/node.cu | 9 +++++++-- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/acc/mhd_solver/stencil_defines.h b/acc/mhd_solver/stencil_defines.h index 0ab07c2..5124820 100644 --- a/acc/mhd_solver/stencil_defines.h +++ b/acc/mhd_solver/stencil_defines.h @@ -30,7 +30,7 @@ #define LMAGNETIC (1) #define LENTROPY (1) #define LTEMPERATURE (0) -#define LFORCING (0) +#define LFORCING (1) #define LUPWD (0) #define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter diff --git a/acc/mhd_solver/stencil_process.sps b/acc/mhd_solver/stencil_process.sps index eb0fc0c..af30d79 100644 --- a/acc/mhd_solver/stencil_process.sps +++ b/acc/mhd_solver/stencil_process.sps @@ -248,9 +248,9 @@ helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vecto Vector forcing(int3 globalVertexIdx, Scalar dt) { - Vector a = Scalar(.5) * (Vector){globalGrid.n.x * dsx, - globalGrid.n.y * dsy, - globalGrid.n.z * dsz}; // source (origin) + Vector a = Scalar(.5) * (Vector){globalGridN.x * dsx, + globalGridN.y * dsy, + globalGridN.z * dsz}; // source (origin) Vector xx = (Vector){(globalVertexIdx.x - nx_min) * dsx, (globalVertexIdx.y - ny_min) * dsy, (globalVertexIdx.z - nz_min) * dsz}; // sink (current index) diff --git a/include/astaroth_defines.h b/include/astaroth_defines.h index 808dd1f..345dd5d 100644 --- a/include/astaroth_defines.h +++ b/include/astaroth_defines.h @@ -86,7 +86,9 @@ typedef struct { FUNC(AC_nxy),\ FUNC(AC_nxyz),\ -#define AC_FOR_BUILTIN_INT3_PARAM_TYPES(FUNC) +#define AC_FOR_BUILTIN_INT3_PARAM_TYPES(FUNC)\ + FUNC(AC_global_grid_n),\ + FUNC(AC_multigpu_offset), #define AC_FOR_BUILTIN_REAL_PARAM_TYPES(FUNC) diff --git a/src/core/device.cu b/src/core/device.cu index 9846e85..a21ab4d 100644 --- a/src/core/device.cu +++ b/src/core/device.cu @@ -40,13 +40,14 @@ typedef struct { } VertexBufferArray; __constant__ AcMeshInfo d_mesh_info; -__constant__ int3 d_multigpu_offset; #define DCONST_INT(X) (d_mesh_info.int_params[X]) #define DCONST_INT3(X) (d_mesh_info.int3_params[X]) #define DCONST_REAL(X) (d_mesh_info.real_params[X]) #define DCONST_REAL3(X) (d_mesh_info.real3_params[X]) #define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy)) #define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy)) +#define globalGridN (d_mesh_info.int3_params[AC_global_grid_n]) +#define d_multigpu_offset (d_mesh_info.int3_params[AC_multigpu_offset]) #include "kernels/kernels.cuh" static dim3 rk3_tpb(32, 1, 4); @@ -122,13 +123,6 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(d_mesh_info, &device_config, sizeof(device_config), 0, cudaMemcpyHostToDevice)); - // Multi-GPU offset. This is used to compute globalVertexIdx. - // Might be better to calculate this in astaroth.cu instead of here, s.t. - // everything related to the decomposition is limited to the multi-GPU layer - const int3 multigpu_offset = (int3){0, 0, device->id * device->local_config.int_params[AC_nz]}; - ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(d_multigpu_offset, &multigpu_offset, - sizeof(multigpu_offset), 0, cudaMemcpyHostToDevice)); - printf("Created device %d (%p)\n", device->id, device); *device_handle = device; diff --git a/src/core/node.cu b/src/core/node.cu index 6a37f95..5166fa1 100644 --- a/src/core/node.cu +++ b/src/core/node.cu @@ -152,17 +152,22 @@ acNodeCreate(const int id, const AcMeshInfo node_config, Node* node_handle) ERRCHK_ALWAYS(node->subgrid.n.y >= STENCIL_ORDER); ERRCHK_ALWAYS(node->subgrid.n.z >= STENCIL_ORDER); +#if VERBOSE_PRINTING // clang-format off - #if VERBOSE_PRINTING printf("Grid m "); printInt3(node->grid.m); printf("\n"); printf("Grid n "); printInt3(node->grid.n); printf("\n"); printf("Subrid m "); printInt3(node->subgrid.m); printf("\n"); printf("Subrid n "); printInt3(node->subgrid.n); printf("\n"); - #endif // clang-format on +#endif // Initialize the devices for (int i = 0; i < node->num_devices; ++i) { + const int3 multinode_offset = (int3){0, 0, 0}; // Placeholder + const int3 multigpu_offset = (int3){0, 0, i * node->subgrid.n.z}; + subgrid_config.int3_params[AC_global_grid_n] = node->grid.n; + subgrid_config.int3_params[AC_multigpu_offset] = multinode_offset + multigpu_offset; + acDeviceCreate(i, subgrid_config, &node->devices[i]); acDevicePrintInfo(node->devices[i]); }