Added the optimized implementation of acNodeIntegrate where boundconds are done before integration instead of after
This commit is contained in:
@@ -104,7 +104,27 @@ typedef enum {
|
||||
NUM_REDUCTION_TYPES
|
||||
} ReductionType;
|
||||
|
||||
typedef enum { STREAM_DEFAULT, NUM_STREAM_TYPES } Stream;
|
||||
typedef enum {
|
||||
STREAM_DEFAULT,
|
||||
STREAM_0,
|
||||
STREAM_1,
|
||||
STREAM_2,
|
||||
STREAM_3,
|
||||
STREAM_4,
|
||||
STREAM_5,
|
||||
STREAM_6,
|
||||
STREAM_7,
|
||||
STREAM_8,
|
||||
STREAM_9,
|
||||
STREAM_10,
|
||||
STREAM_11,
|
||||
STREAM_12,
|
||||
STREAM_13,
|
||||
STREAM_14,
|
||||
STREAM_15,
|
||||
STREAM_16,
|
||||
NUM_STREAM_TYPES
|
||||
} Stream;
|
||||
#define STREAM_ALL (NUM_STREAM_TYPES)
|
||||
|
||||
#define AC_GEN_ID(X) X
|
||||
|
@@ -73,7 +73,8 @@ acStore(AcMesh* host_mesh)
|
||||
AcResult
|
||||
acIntegrate(const AcReal dt)
|
||||
{
|
||||
return acNodeIntegrate(nodes[0], dt);
|
||||
acNodeIntegrate(nodes[0], dt);
|
||||
return acBoundcondStep();
|
||||
}
|
||||
|
||||
AcResult
|
||||
|
124
src/core/node.cu
124
src/core/node.cu
@@ -478,25 +478,6 @@ acNodeIntegrateSubstep(const Node node, const Stream stream, const int isubstep,
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acNodeIntegrate(const Node node, const AcReal dt)
|
||||
{
|
||||
acNodeSynchronizeStream(node, STREAM_ALL);
|
||||
|
||||
for (int isubstep = 0; isubstep < 3; ++isubstep) {
|
||||
acNodePeriodicBoundconds(node, STREAM_DEFAULT);
|
||||
acNodeSynchronizeStream(node, STREAM_DEFAULT); // DEBUG
|
||||
const int3 start = (int3){NGHOST, NGHOST, NGHOST};
|
||||
const int3 end = start + node->grid.n;
|
||||
acNodeIntegrateSubstep(node, STREAM_DEFAULT, isubstep, start, end, dt);
|
||||
acNodeSwapBuffers(node);
|
||||
acNodeSynchronizeStream(node, STREAM_DEFAULT); // DEBUG
|
||||
}
|
||||
acNodePeriodicBoundconds(node, STREAM_DEFAULT); // DEBUG
|
||||
acNodeSynchronizeStream(node, STREAM_ALL);
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
static AcResult
|
||||
local_boundcondstep(const Node node, const Stream stream, const VertexBufferHandle vtxbuf)
|
||||
{
|
||||
@@ -550,6 +531,111 @@ global_boundcondstep(const Node node, const Stream stream, const VertexBufferHan
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acNodeIntegrate(const Node node, const AcReal dt)
|
||||
{
|
||||
acNodeSynchronizeStream(node, STREAM_ALL);
|
||||
// xxx|OOO OOOOOOOOO OOO|xxx
|
||||
// ^ ^ ^ ^
|
||||
// n0 n1 n2 n3
|
||||
const int3 n0 = (int3){NGHOST, NGHOST, NGHOST};
|
||||
const int3 n1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
|
||||
const int3 n2 = node->grid.n;
|
||||
const int3 n3 = n0 + node->grid.n;
|
||||
|
||||
for (int isubstep = 0; isubstep < 3; ++isubstep) {
|
||||
acNodeSynchronizeStream(node, STREAM_ALL);
|
||||
for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) {
|
||||
local_boundcondstep(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf);
|
||||
}
|
||||
acNodeSynchronizeStream(node, STREAM_ALL);
|
||||
// Inner inner
|
||||
for (int i = 0; i < node->num_devices; ++i) {
|
||||
const int3 m1 = n1 + (int3){0, 0, i * node->subgrid.n.z};
|
||||
const int3 m2 = m1 + node->subgrid.n - (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
|
||||
acNodeIntegrateSubstep(node, STREAM_16, isubstep, m1, m2, dt);
|
||||
}
|
||||
for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) {
|
||||
const int num_vertices = node->subgrid.m.x * node->subgrid.m.y * NGHOST;
|
||||
for (int device_id = 0; device_id < node->num_devices; ++device_id) {
|
||||
// ...|ooooxxx|... -> xxx|ooooooo|...
|
||||
{
|
||||
const int3 src = (int3){0, 0, node->subgrid.n.z};
|
||||
const int3 dst = (int3){0, 0, 0};
|
||||
acDeviceTransferVertexBufferWithOffset(
|
||||
node->devices[device_id], (Stream)vtxbuf, (VertexBufferHandle)vtxbuf, src,
|
||||
dst, num_vertices, node->devices[(device_id + 1) % node->num_devices]);
|
||||
}
|
||||
// ...|ooooooo|xxx <- ...|xxxoooo|...
|
||||
{
|
||||
const int3 src = (int3){0, 0, NGHOST};
|
||||
const int3 dst = (int3){0, 0, NGHOST + node->subgrid.n.z};
|
||||
acDeviceTransferVertexBufferWithOffset(
|
||||
node->devices[device_id], (Stream)vtxbuf, (VertexBufferHandle)vtxbuf, src,
|
||||
dst, num_vertices,
|
||||
node->devices[(device_id - 1 + node->num_devices) % node->num_devices]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int vtxbuf = 0; vtxbuf < 2 * NUM_VTXBUF_HANDLES; ++vtxbuf) {
|
||||
acNodeSynchronizeStream(node, (Stream)vtxbuf);
|
||||
}
|
||||
// Inner outer
|
||||
for (int i = 0; i < node->num_devices - 1; ++i) {
|
||||
const int3 m1 = n1 + (int3){0, 0, (i + 1) * node->subgrid.n.z - 2 * NGHOST};
|
||||
const int3 m2 = m1 + (int3){node->subgrid.n.x - 2 * NGHOST,
|
||||
node->subgrid.n.y - 2 * NGHOST, 2 * NGHOST};
|
||||
acNodeIntegrateSubstep(node, STREAM_0, isubstep, m1, m2, dt);
|
||||
}
|
||||
// Outer
|
||||
// Front
|
||||
{
|
||||
const int3 m1 = (int3){n0.x, n0.y, n0.z};
|
||||
const int3 m2 = (int3){n3.x, n3.y, n1.z};
|
||||
acNodeIntegrateSubstep(node, STREAM_1, isubstep, m1, m2, dt);
|
||||
}
|
||||
|
||||
// Back
|
||||
{
|
||||
const int3 m1 = (int3){n0.x, n0.y, n2.z};
|
||||
const int3 m2 = (int3){n3.x, n3.y, n3.z};
|
||||
acNodeIntegrateSubstep(node, STREAM_2, isubstep, m1, m2, dt);
|
||||
}
|
||||
|
||||
// Top
|
||||
{
|
||||
const int3 m1 = (int3){n0.x, n0.y, n1.z};
|
||||
const int3 m2 = (int3){n3.x, n1.y, n2.z};
|
||||
acNodeIntegrateSubstep(node, STREAM_3, isubstep, m1, m2, dt);
|
||||
}
|
||||
|
||||
// Bottom
|
||||
{
|
||||
const int3 m1 = (int3){n0.x, n2.y, n1.z};
|
||||
const int3 m2 = (int3){n3.x, n3.y, n2.z};
|
||||
acNodeIntegrateSubstep(node, STREAM_4, isubstep, m1, m2, dt);
|
||||
}
|
||||
|
||||
// Left
|
||||
{
|
||||
const int3 m1 = (int3){n0.x, n1.y, n1.z};
|
||||
const int3 m2 = (int3){n1.x, n2.y, n2.z};
|
||||
acNodeIntegrateSubstep(node, STREAM_5, isubstep, m1, m2, dt);
|
||||
}
|
||||
|
||||
// Right
|
||||
{
|
||||
const int3 m1 = (int3){n2.x, n1.y, n1.z};
|
||||
const int3 m2 = (int3){n3.x, n2.y, n2.z};
|
||||
acNodeIntegrateSubstep(node, STREAM_6, isubstep, m1, m2, dt);
|
||||
}
|
||||
|
||||
acNodeSwapBuffers(node);
|
||||
}
|
||||
acNodeSynchronizeStream(node, STREAM_ALL);
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acNodePeriodicBoundcondStep(const Node node, const Stream stream,
|
||||
const VertexBufferHandle vtxbuf_handle)
|
||||
|
Reference in New Issue
Block a user