Made globalGridN and d_multigpu_offsets built-in parameters. Note the renaming from globalGrid.n to globalGridN.
This commit is contained in:
@@ -30,7 +30,7 @@
|
|||||||
#define LMAGNETIC (1)
|
#define LMAGNETIC (1)
|
||||||
#define LENTROPY (1)
|
#define LENTROPY (1)
|
||||||
#define LTEMPERATURE (0)
|
#define LTEMPERATURE (0)
|
||||||
#define LFORCING (0)
|
#define LFORCING (1)
|
||||||
#define LUPWD (0)
|
#define LUPWD (0)
|
||||||
|
|
||||||
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
|
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
|
||||||
|
@@ -248,9 +248,9 @@ helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vecto
|
|||||||
Vector
|
Vector
|
||||||
forcing(int3 globalVertexIdx, Scalar dt)
|
forcing(int3 globalVertexIdx, Scalar dt)
|
||||||
{
|
{
|
||||||
Vector a = Scalar(.5) * (Vector){globalGrid.n.x * dsx,
|
Vector a = Scalar(.5) * (Vector){globalGridN.x * dsx,
|
||||||
globalGrid.n.y * dsy,
|
globalGridN.y * dsy,
|
||||||
globalGrid.n.z * dsz}; // source (origin)
|
globalGridN.z * dsz}; // source (origin)
|
||||||
Vector xx = (Vector){(globalVertexIdx.x - nx_min) * dsx,
|
Vector xx = (Vector){(globalVertexIdx.x - nx_min) * dsx,
|
||||||
(globalVertexIdx.y - ny_min) * dsy,
|
(globalVertexIdx.y - ny_min) * dsy,
|
||||||
(globalVertexIdx.z - nz_min) * dsz}; // sink (current index)
|
(globalVertexIdx.z - nz_min) * dsz}; // sink (current index)
|
||||||
|
@@ -86,7 +86,9 @@ typedef struct {
|
|||||||
FUNC(AC_nxy),\
|
FUNC(AC_nxy),\
|
||||||
FUNC(AC_nxyz),\
|
FUNC(AC_nxyz),\
|
||||||
|
|
||||||
#define AC_FOR_BUILTIN_INT3_PARAM_TYPES(FUNC)
|
#define AC_FOR_BUILTIN_INT3_PARAM_TYPES(FUNC)\
|
||||||
|
FUNC(AC_global_grid_n),\
|
||||||
|
FUNC(AC_multigpu_offset),
|
||||||
|
|
||||||
#define AC_FOR_BUILTIN_REAL_PARAM_TYPES(FUNC)
|
#define AC_FOR_BUILTIN_REAL_PARAM_TYPES(FUNC)
|
||||||
|
|
||||||
|
@@ -40,13 +40,14 @@ typedef struct {
|
|||||||
} VertexBufferArray;
|
} VertexBufferArray;
|
||||||
|
|
||||||
__constant__ AcMeshInfo d_mesh_info;
|
__constant__ AcMeshInfo d_mesh_info;
|
||||||
__constant__ int3 d_multigpu_offset;
|
|
||||||
#define DCONST_INT(X) (d_mesh_info.int_params[X])
|
#define DCONST_INT(X) (d_mesh_info.int_params[X])
|
||||||
#define DCONST_INT3(X) (d_mesh_info.int3_params[X])
|
#define DCONST_INT3(X) (d_mesh_info.int3_params[X])
|
||||||
#define DCONST_REAL(X) (d_mesh_info.real_params[X])
|
#define DCONST_REAL(X) (d_mesh_info.real_params[X])
|
||||||
#define DCONST_REAL3(X) (d_mesh_info.real3_params[X])
|
#define DCONST_REAL3(X) (d_mesh_info.real3_params[X])
|
||||||
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
|
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
|
||||||
#define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy))
|
#define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy))
|
||||||
|
#define globalGridN (d_mesh_info.int3_params[AC_global_grid_n])
|
||||||
|
#define d_multigpu_offset (d_mesh_info.int3_params[AC_multigpu_offset])
|
||||||
#include "kernels/kernels.cuh"
|
#include "kernels/kernels.cuh"
|
||||||
|
|
||||||
static dim3 rk3_tpb(32, 1, 4);
|
static dim3 rk3_tpb(32, 1, 4);
|
||||||
@@ -122,13 +123,6 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand
|
|||||||
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(d_mesh_info, &device_config, sizeof(device_config), 0,
|
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(d_mesh_info, &device_config, sizeof(device_config), 0,
|
||||||
cudaMemcpyHostToDevice));
|
cudaMemcpyHostToDevice));
|
||||||
|
|
||||||
// Multi-GPU offset. This is used to compute globalVertexIdx.
|
|
||||||
// Might be better to calculate this in astaroth.cu instead of here, s.t.
|
|
||||||
// everything related to the decomposition is limited to the multi-GPU layer
|
|
||||||
const int3 multigpu_offset = (int3){0, 0, device->id * device->local_config.int_params[AC_nz]};
|
|
||||||
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(d_multigpu_offset, &multigpu_offset,
|
|
||||||
sizeof(multigpu_offset), 0, cudaMemcpyHostToDevice));
|
|
||||||
|
|
||||||
printf("Created device %d (%p)\n", device->id, device);
|
printf("Created device %d (%p)\n", device->id, device);
|
||||||
*device_handle = device;
|
*device_handle = device;
|
||||||
|
|
||||||
|
@@ -152,17 +152,22 @@ acNodeCreate(const int id, const AcMeshInfo node_config, Node* node_handle)
|
|||||||
ERRCHK_ALWAYS(node->subgrid.n.y >= STENCIL_ORDER);
|
ERRCHK_ALWAYS(node->subgrid.n.y >= STENCIL_ORDER);
|
||||||
ERRCHK_ALWAYS(node->subgrid.n.z >= STENCIL_ORDER);
|
ERRCHK_ALWAYS(node->subgrid.n.z >= STENCIL_ORDER);
|
||||||
|
|
||||||
|
#if VERBOSE_PRINTING
|
||||||
// clang-format off
|
// clang-format off
|
||||||
#if VERBOSE_PRINTING
|
|
||||||
printf("Grid m "); printInt3(node->grid.m); printf("\n");
|
printf("Grid m "); printInt3(node->grid.m); printf("\n");
|
||||||
printf("Grid n "); printInt3(node->grid.n); printf("\n");
|
printf("Grid n "); printInt3(node->grid.n); printf("\n");
|
||||||
printf("Subrid m "); printInt3(node->subgrid.m); printf("\n");
|
printf("Subrid m "); printInt3(node->subgrid.m); printf("\n");
|
||||||
printf("Subrid n "); printInt3(node->subgrid.n); printf("\n");
|
printf("Subrid n "); printInt3(node->subgrid.n); printf("\n");
|
||||||
#endif
|
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
#endif
|
||||||
|
|
||||||
// Initialize the devices
|
// Initialize the devices
|
||||||
for (int i = 0; i < node->num_devices; ++i) {
|
for (int i = 0; i < node->num_devices; ++i) {
|
||||||
|
const int3 multinode_offset = (int3){0, 0, 0}; // Placeholder
|
||||||
|
const int3 multigpu_offset = (int3){0, 0, i * node->subgrid.n.z};
|
||||||
|
subgrid_config.int3_params[AC_global_grid_n] = node->grid.n;
|
||||||
|
subgrid_config.int3_params[AC_multigpu_offset] = multinode_offset + multigpu_offset;
|
||||||
|
|
||||||
acDeviceCreate(i, subgrid_config, &node->devices[i]);
|
acDeviceCreate(i, subgrid_config, &node->devices[i]);
|
||||||
acDevicePrintInfo(node->devices[i]);
|
acDevicePrintInfo(node->devices[i]);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user