diff --git a/src/core/astaroth.cu b/src/core/astaroth.cu index 956a24f..fecfdcb 100644 --- a/src/core/astaroth.cu +++ b/src/core/astaroth.cu @@ -74,6 +74,14 @@ printInt3(const int3 vec) AcResult acInit(const AcMeshInfo& config) +{ + Acresult res=acGetDevice(); + res=acInitialize(config); + return AC_SUCCESS; +} + +AcResult +acCheckDeviceAvail() { // Check devices cudaGetDeviceCount(&num_devices); @@ -89,6 +97,11 @@ acInit(const AcMeshInfo& config) WARNING("MULTIGPU_ENABLED was false. Using only one device"); num_devices = 1; // Use only one device if multi-GPU is not enabled } + return AC_SUCCESS; +} +AcResult +acInitialize(const AcMeshInfo& config) +{ // Check that num_devices is divisible with AC_nz. This makes decomposing the // problem domain to multiple GPUs much easier since we do not have to worry // about remainders @@ -108,14 +121,14 @@ acInit(const AcMeshInfo& config) ERRCHK_ALWAYS(subgrid.n.x >= STENCIL_ORDER); ERRCHK_ALWAYS(subgrid.n.y >= STENCIL_ORDER); ERRCHK_ALWAYS(subgrid.n.z >= STENCIL_ORDER); - +#ifndef PENCIL_ASTAROTH // clang-format off printf("Grid m "); printInt3(grid.m); printf("\n"); printf("Grid n "); printInt3(grid.n); printf("\n"); printf("Subrid m "); printInt3(subgrid.m); printf("\n"); printf("Subrid n "); printInt3(subgrid.n); printf("\n"); // clang-format on - +#endif // Initialize the devices for (int i = 0; i < num_devices; ++i) { createDevice(i, subgrid_config, &devices[i]); @@ -259,9 +272,9 @@ acStore(AcMesh* host_mesh) AcResult acIntegrateStep(const int& isubstep, const AcReal& dt) { - const int3 start = (int3){STENCIL_ORDER / 2, STENCIL_ORDER / 2, STENCIL_ORDER / 2}; - const int3 end = (int3){STENCIL_ORDER / 2 + subgrid.n.x, STENCIL_ORDER / 2 + subgrid.n.y, - STENCIL_ORDER / 2 + subgrid.n.z}; + const int3 start = (int3){NGHOST, NGHOST, NGHOST}; + const int3 end = (int3){NGHOST + subgrid.n.x, NGHOST + subgrid.n.y, + NGHOST + subgrid.n.z}; for (int i = 0; i < num_devices; ++i) { rkStep(devices[i], STREAM_PRIMARY, isubstep, start, end, dt); } @@ -294,7 +307,7 @@ acBoundcondStep(void) else { // Local boundary conditions for (int i = 0; i < num_devices; ++i) { - const int3 d0 = (int3){0, 0, STENCIL_ORDER / 2}; // DECOMPOSITION OFFSET HERE + const int3 d0 = (int3){0, 0, NGHOST}; // DECOMPOSITION OFFSET HERE const int3 d1 = (int3){subgrid.m.x, subgrid.m.y, d0.z + subgrid.n.z}; boundcondStep(devices[i], STREAM_PRIMARY, d0, d1); } @@ -393,7 +406,7 @@ acBoundcondStep(void) */ // Exchange halos for (int i = 0; i < num_devices; ++i) { - const int num_vertices = subgrid.m.x * subgrid.m.y * STENCIL_ORDER / 2; + const int num_vertices = subgrid.m.x * subgrid.m.y * NGHOST; // ...|ooooxxx|... -> xxx|ooooooo|... { const int3 src = (int3){0, 0, subgrid.n.z}; @@ -403,8 +416,8 @@ acBoundcondStep(void) } // ...|ooooooo|xxx <- ...|xxxoooo|... { - const int3 src = (int3){0, 0, STENCIL_ORDER / 2}; - const int3 dst = (int3){0, 0, STENCIL_ORDER / 2 + subgrid.n.z}; + const int3 src = (int3){0, 0, NGHOST}; + const int3 dst = (int3){0, 0, NGHOST + subgrid.n.z}; copyMeshDeviceToDevice(devices[(i + 1) % num_devices], STREAM_PRIMARY, src, devices[i], dst, num_vertices); }