Added the optimized implementation of acNodeIntegrate where boundconds are done before integration instead of after

This commit is contained in:
jpekkila
2019-08-05 20:10:13 +03:00
parent 8df49370c8
commit b73c2675e8
3 changed files with 128 additions and 21 deletions

View File

@@ -104,7 +104,27 @@ typedef enum {
NUM_REDUCTION_TYPES NUM_REDUCTION_TYPES
} ReductionType; } ReductionType;
typedef enum { STREAM_DEFAULT, NUM_STREAM_TYPES } Stream; typedef enum {
STREAM_DEFAULT,
STREAM_0,
STREAM_1,
STREAM_2,
STREAM_3,
STREAM_4,
STREAM_5,
STREAM_6,
STREAM_7,
STREAM_8,
STREAM_9,
STREAM_10,
STREAM_11,
STREAM_12,
STREAM_13,
STREAM_14,
STREAM_15,
STREAM_16,
NUM_STREAM_TYPES
} Stream;
#define STREAM_ALL (NUM_STREAM_TYPES) #define STREAM_ALL (NUM_STREAM_TYPES)
#define AC_GEN_ID(X) X #define AC_GEN_ID(X) X

View File

@@ -73,7 +73,8 @@ acStore(AcMesh* host_mesh)
AcResult AcResult
acIntegrate(const AcReal dt) acIntegrate(const AcReal dt)
{ {
return acNodeIntegrate(nodes[0], dt); acNodeIntegrate(nodes[0], dt);
return acBoundcondStep();
} }
AcResult AcResult

View File

@@ -478,25 +478,6 @@ acNodeIntegrateSubstep(const Node node, const Stream stream, const int isubstep,
return AC_SUCCESS; return AC_SUCCESS;
} }
AcResult
acNodeIntegrate(const Node node, const AcReal dt)
{
acNodeSynchronizeStream(node, STREAM_ALL);
for (int isubstep = 0; isubstep < 3; ++isubstep) {
acNodePeriodicBoundconds(node, STREAM_DEFAULT);
acNodeSynchronizeStream(node, STREAM_DEFAULT); // DEBUG
const int3 start = (int3){NGHOST, NGHOST, NGHOST};
const int3 end = start + node->grid.n;
acNodeIntegrateSubstep(node, STREAM_DEFAULT, isubstep, start, end, dt);
acNodeSwapBuffers(node);
acNodeSynchronizeStream(node, STREAM_DEFAULT); // DEBUG
}
acNodePeriodicBoundconds(node, STREAM_DEFAULT); // DEBUG
acNodeSynchronizeStream(node, STREAM_ALL);
return AC_SUCCESS;
}
static AcResult static AcResult
local_boundcondstep(const Node node, const Stream stream, const VertexBufferHandle vtxbuf) local_boundcondstep(const Node node, const Stream stream, const VertexBufferHandle vtxbuf)
{ {
@@ -550,6 +531,111 @@ global_boundcondstep(const Node node, const Stream stream, const VertexBufferHan
return AC_SUCCESS; return AC_SUCCESS;
} }
AcResult
acNodeIntegrate(const Node node, const AcReal dt)
{
acNodeSynchronizeStream(node, STREAM_ALL);
// xxx|OOO OOOOOOOOO OOO|xxx
// ^ ^ ^ ^
// n0 n1 n2 n3
const int3 n0 = (int3){NGHOST, NGHOST, NGHOST};
const int3 n1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
const int3 n2 = node->grid.n;
const int3 n3 = n0 + node->grid.n;
for (int isubstep = 0; isubstep < 3; ++isubstep) {
acNodeSynchronizeStream(node, STREAM_ALL);
for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) {
local_boundcondstep(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf);
}
acNodeSynchronizeStream(node, STREAM_ALL);
// Inner inner
for (int i = 0; i < node->num_devices; ++i) {
const int3 m1 = n1 + (int3){0, 0, i * node->subgrid.n.z};
const int3 m2 = m1 + node->subgrid.n - (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
acNodeIntegrateSubstep(node, STREAM_16, isubstep, m1, m2, dt);
}
for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) {
const int num_vertices = node->subgrid.m.x * node->subgrid.m.y * NGHOST;
for (int device_id = 0; device_id < node->num_devices; ++device_id) {
// ...|ooooxxx|... -> xxx|ooooooo|...
{
const int3 src = (int3){0, 0, node->subgrid.n.z};
const int3 dst = (int3){0, 0, 0};
acDeviceTransferVertexBufferWithOffset(
node->devices[device_id], (Stream)vtxbuf, (VertexBufferHandle)vtxbuf, src,
dst, num_vertices, node->devices[(device_id + 1) % node->num_devices]);
}
// ...|ooooooo|xxx <- ...|xxxoooo|...
{
const int3 src = (int3){0, 0, NGHOST};
const int3 dst = (int3){0, 0, NGHOST + node->subgrid.n.z};
acDeviceTransferVertexBufferWithOffset(
node->devices[device_id], (Stream)vtxbuf, (VertexBufferHandle)vtxbuf, src,
dst, num_vertices,
node->devices[(device_id - 1 + node->num_devices) % node->num_devices]);
}
}
}
for (int vtxbuf = 0; vtxbuf < 2 * NUM_VTXBUF_HANDLES; ++vtxbuf) {
acNodeSynchronizeStream(node, (Stream)vtxbuf);
}
// Inner outer
for (int i = 0; i < node->num_devices - 1; ++i) {
const int3 m1 = n1 + (int3){0, 0, (i + 1) * node->subgrid.n.z - 2 * NGHOST};
const int3 m2 = m1 + (int3){node->subgrid.n.x - 2 * NGHOST,
node->subgrid.n.y - 2 * NGHOST, 2 * NGHOST};
acNodeIntegrateSubstep(node, STREAM_0, isubstep, m1, m2, dt);
}
// Outer
// Front
{
const int3 m1 = (int3){n0.x, n0.y, n0.z};
const int3 m2 = (int3){n3.x, n3.y, n1.z};
acNodeIntegrateSubstep(node, STREAM_1, isubstep, m1, m2, dt);
}
// Back
{
const int3 m1 = (int3){n0.x, n0.y, n2.z};
const int3 m2 = (int3){n3.x, n3.y, n3.z};
acNodeIntegrateSubstep(node, STREAM_2, isubstep, m1, m2, dt);
}
// Top
{
const int3 m1 = (int3){n0.x, n0.y, n1.z};
const int3 m2 = (int3){n3.x, n1.y, n2.z};
acNodeIntegrateSubstep(node, STREAM_3, isubstep, m1, m2, dt);
}
// Bottom
{
const int3 m1 = (int3){n0.x, n2.y, n1.z};
const int3 m2 = (int3){n3.x, n3.y, n2.z};
acNodeIntegrateSubstep(node, STREAM_4, isubstep, m1, m2, dt);
}
// Left
{
const int3 m1 = (int3){n0.x, n1.y, n1.z};
const int3 m2 = (int3){n1.x, n2.y, n2.z};
acNodeIntegrateSubstep(node, STREAM_5, isubstep, m1, m2, dt);
}
// Right
{
const int3 m1 = (int3){n2.x, n1.y, n1.z};
const int3 m2 = (int3){n3.x, n2.y, n2.z};
acNodeIntegrateSubstep(node, STREAM_6, isubstep, m1, m2, dt);
}
acNodeSwapBuffers(node);
}
acNodeSynchronizeStream(node, STREAM_ALL);
return AC_SUCCESS;
}
AcResult AcResult
acNodePeriodicBoundcondStep(const Node node, const Stream stream, acNodePeriodicBoundcondStep(const Node node, const Stream stream,
const VertexBufferHandle vtxbuf_handle) const VertexBufferHandle vtxbuf_handle)