diff --git a/include/astaroth.h b/include/astaroth.h index 76f5174..ac20d34 100644 --- a/include/astaroth.h +++ b/include/astaroth.h @@ -232,6 +232,11 @@ AcResult acStore(AcMesh* host_mesh); * substep and the user is responsible for calling acBoundcondStep before reading the data. */ AcResult acIntegrate(const AcReal dt); +/** Performs Runge-Kutta 3 integration. Note: Boundary conditions are not applied after the final + * substep and the user is responsible for calling acBoundcondStep before reading the data. + * Has customizable boundary conditions. */ +AcResult acIntegrateGBC(const AcMeshInfo config, const AcReal dt); + /** Applies periodic boundary conditions for the Mesh distributed among the devices visible to * the caller*/ AcResult acBoundcondStep(void); diff --git a/src/core/astaroth.cc b/src/core/astaroth.cc index 1c8d0f7..c565699 100644 --- a/src/core/astaroth.cc +++ b/src/core/astaroth.cc @@ -86,6 +86,13 @@ acIntegrate(const AcReal dt) return acNodeIntegrate(nodes[0], dt); } +AcResult +acIntegrateGBC(const AcMeshInfo config, const AcReal dt) +{ + return acNodeIntegrateGBC(nodes[0], config, dt); +} + + AcResult acIntegrateStep(const int isubstep, const AcReal dt) { @@ -109,6 +116,12 @@ acBoundcondStep(void) return acNodePeriodicBoundconds(nodes[0], STREAM_DEFAULT); } +AcResult +acBoundcondStepGBC(void) //TODO ADAPT +{ + return acNodeGeneralBoundconds(nodes[0], STREAM_DEFAULT); +} + AcReal acReduceScal(const ReductionType rtype, const VertexBufferHandle vtxbuf_handle) { diff --git a/src/core/node.cc b/src/core/node.cc index 552cf3f..58acd25 100644 --- a/src/core/node.cc +++ b/src/core/node.cc @@ -656,6 +656,28 @@ local_boundcondstep(const Node node, const Stream stream, const VertexBufferHand return AC_SUCCESS; } +static AcResult +local_boundcondstep_GBC(const Node node, const Stream stream, const VertexBufferHandle vtxbuf) //TODO ADAPT +{ + acNodeSynchronizeStream(node, stream); + + if (node->num_devices > 1) { + // Local boundary conditions + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { + const int3 d0 = (int3){0, 0, NGHOST}; // DECOMPOSITION OFFSET HERE + const int3 d1 = (int3){node->subgrid.m.x, node->subgrid.m.y, d0.z + node->subgrid.n.z}; + acDeviceGeneralBoundcondStep(node->devices[i], stream, vtxbuf, d0, d1); + } + } + else { + acDeviceGeneralBoundcondStep(node->devices[0], stream, vtxbuf, (int3){0, 0, 0}, + node->subgrid.m); + } + return AC_SUCCESS; +} + + static AcResult global_boundcondstep(const Node node, const Stream stream, const VertexBufferHandle vtxbuf_handle) { @@ -768,6 +790,85 @@ acNodeIntegrate(const Node node, const AcReal dt) return AC_SUCCESS; } +AcResult +acNodeIntegrateGBG(const Node node, const AcReal dt) //TODO ADAPT +{ + acNodeSynchronizeStream(node, STREAM_ALL); + // xxx|OOO OOOOOOOOO OOO|xxx + // ^ ^ ^ ^ + // n0 n1 n2 n3 + // const int3 n0 = (int3){NGHOST, NGHOST, NGHOST}; + // const int3 n1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST}; + // const int3 n2 = node->grid.n; + // const int3 n3 = n0 + node->grid.n; + + for (int isubstep = 0; isubstep < 3; ++isubstep) { + acNodeSynchronizeStream(node, STREAM_ALL); + for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) { + local_boundcondstep_GBC(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf); + } + acNodeSynchronizeStream(node, STREAM_ALL); + + // Inner inner + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { + const int3 m1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST}; + const int3 m2 = node->subgrid.n; + acDeviceIntegrateSubstep(node->devices[i], STREAM_16, isubstep, m1, m2, dt); + } + + for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) { + acNodeSynchronizeVertexBuffer(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf); + global_boundcondstep(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf); + } + for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) { + acNodeSynchronizeStream(node, (Stream)vtxbuf); + } + + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { // Front + const int3 m1 = (int3){NGHOST, NGHOST, NGHOST}; + const int3 m2 = m1 + (int3){node->subgrid.n.x, node->subgrid.n.y, NGHOST}; + acDeviceIntegrateSubstep(node->devices[i], STREAM_0, isubstep, m1, m2, dt); + } + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { // Back + const int3 m1 = (int3){NGHOST, NGHOST, node->subgrid.n.z}; + const int3 m2 = m1 + (int3){node->subgrid.n.x, node->subgrid.n.y, NGHOST}; + acDeviceIntegrateSubstep(node->devices[i], STREAM_1, isubstep, m1, m2, dt); + } + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { // Bottom + const int3 m1 = (int3){NGHOST, NGHOST, 2 * NGHOST}; + const int3 m2 = m1 + (int3){node->subgrid.n.x, NGHOST, node->subgrid.n.z - 2 * NGHOST}; + acDeviceIntegrateSubstep(node->devices[i], STREAM_2, isubstep, m1, m2, dt); + } + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { // Top + const int3 m1 = (int3){NGHOST, node->subgrid.n.y, 2 * NGHOST}; + const int3 m2 = m1 + (int3){node->subgrid.n.x, NGHOST, node->subgrid.n.z - 2 * NGHOST}; + acDeviceIntegrateSubstep(node->devices[i], STREAM_3, isubstep, m1, m2, dt); + } + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { // Left + const int3 m1 = (int3){NGHOST, 2 * NGHOST, 2 * NGHOST}; + const int3 m2 = m1 + (int3){NGHOST, node->subgrid.n.y - 2 * NGHOST, + node->subgrid.n.z - 2 * NGHOST}; + acDeviceIntegrateSubstep(node->devices[i], STREAM_4, isubstep, m1, m2, dt); + } + // #pragma omp parallel for + for (int i = 0; i < node->num_devices; ++i) { // Right + const int3 m1 = (int3){node->subgrid.n.x, 2 * NGHOST, 2 * NGHOST}; + const int3 m2 = m1 + (int3){NGHOST, node->subgrid.n.y - 2 * NGHOST, + node->subgrid.n.z - 2 * NGHOST}; + acDeviceIntegrateSubstep(node->devices[i], STREAM_5, isubstep, m1, m2, dt); + } + acNodeSwapBuffers(node); + } + acNodeSynchronizeStream(node, STREAM_ALL); + return AC_SUCCESS; +} + AcResult acNodePeriodicBoundcondStep(const Node node, const Stream stream, const VertexBufferHandle vtxbuf_handle) @@ -783,21 +884,20 @@ acNodePeriodicBoundcondStep(const Node node, const Stream stream, } AcResult -acNodeGeneralBoundcondStep(const Node node, const Stream stream, +acNodeGeneralBoundcondStep(const Node node, const Stream stream, //TODO ADAPT const VertexBufferHandle vtxbuf_handle) { - local_boundcondstep(node, stream, vtxbuf_handle); + local_boundcondstep_GBC(node, stream, vtxbuf_handle); acNodeSynchronizeVertexBuffer(node, stream, vtxbuf_handle); - //MV: assume that global step communication is handles by acNodePeriodicBoundcondStep as above. - //global_boundcondstep(node, stream, vtxbuf_handle); + global_boundcondstep(node, stream, vtxbuf_handle); return AC_SUCCESS; } AcResult -acNodePeriodicBoundconds(const Node node, const Stream stream) +acNodePeriodicBoundconds(const Node node, const Stream stream) //TODO ADAPT { for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { acNodePeriodicBoundcondStep(node, stream, (VertexBufferHandle)i); @@ -806,7 +906,7 @@ acNodePeriodicBoundconds(const Node node, const Stream stream) } AcResult -acNodeGeneralBoundconds(const Node node, const Stream stream) +acNodeGeneralBoundconds(const Node node, const Stream stream) //TODO ADAPT { for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { acNodeGeneralBoundcondStep(node, stream, (VertexBufferHandle)i);