Added the revised node interface

This commit is contained in:
jpekkila
2019-08-01 14:04:11 +03:00
parent 4594d73022
commit 328b809efe
2 changed files with 188 additions and 85 deletions

View File

@@ -48,7 +48,7 @@ AcResult acNodeAutoOptimize(const Node node);
/** */ /** */
AcResult acNodeSynchronizeStream(const Node node, const Stream stream); AcResult acNodeSynchronizeStream(const Node node, const Stream stream);
/** */ /** Deprecated ? */
AcResult acNodeSynchronizeVertexBuffer(const Node node, const Stream stream, AcResult acNodeSynchronizeVertexBuffer(const Node node, const Stream stream,
const VertexBufferHandle vtxbuf_handle); // Not in Device const VertexBufferHandle vtxbuf_handle); // Not in Device
@@ -62,7 +62,8 @@ AcResult acNodeSwapBuffers(const Node node);
AcResult acNodeLoadConstant(const Node node, const Stream stream, const AcRealParam param, AcResult acNodeLoadConstant(const Node node, const Stream stream, const AcRealParam param,
const AcReal value); const AcReal value);
/** */ /** Deprecated ? Might be useful though if the user wants to load only one vtxbuf. But in this case
* the user should supply a AcReal* instead of vtxbuf_handle */
AcResult acNodeLoadVertexBufferWithOffset(const Node node, const Stream stream, AcResult acNodeLoadVertexBufferWithOffset(const Node node, const Stream stream,
const AcMesh host_mesh, const AcMesh host_mesh,
const VertexBufferHandle vtxbuf_handle, const int3 src, const VertexBufferHandle vtxbuf_handle, const int3 src,
@@ -72,14 +73,14 @@ AcResult acNodeLoadVertexBufferWithOffset(const Node node, const Stream stream,
AcResult acNodeLoadMeshWithOffset(const Node node, const Stream stream, const AcMesh host_mesh, AcResult acNodeLoadMeshWithOffset(const Node node, const Stream stream, const AcMesh host_mesh,
const int3 src, const int3 dst, const int num_vertices); const int3 src, const int3 dst, const int num_vertices);
/** */ /** Deprecated ? */
AcResult acNodeLoadVertexBuffer(const Node node, const Stream stream, const AcMesh host_mesh, AcResult acNodeLoadVertexBuffer(const Node node, const Stream stream, const AcMesh host_mesh,
const VertexBufferHandle vtxbuf_handle); const VertexBufferHandle vtxbuf_handle);
/** */ /** */
AcResult acNodeLoadMesh(const Node node, const Stream stream, const AcMesh host_mesh); AcResult acNodeLoadMesh(const Node node, const Stream stream, const AcMesh host_mesh);
/** */ /** Deprecated ? */
AcResult acNodeStoreVertexBufferWithOffset(const Node node, const Stream stream, AcResult acNodeStoreVertexBufferWithOffset(const Node node, const Stream stream,
const VertexBufferHandle vtxbuf_handle, const int3 src, const VertexBufferHandle vtxbuf_handle, const int3 src,
const int3 dst, const int num_vertices, const int3 dst, const int num_vertices,
@@ -89,41 +90,26 @@ AcResult acNodeStoreVertexBufferWithOffset(const Node node, const Stream stream,
AcResult acNodeStoreMeshWithOffset(const Node node, const Stream stream, const int3 src, AcResult acNodeStoreMeshWithOffset(const Node node, const Stream stream, const int3 src,
const int3 dst, const int num_vertices, AcMesh* host_mesh); const int3 dst, const int num_vertices, AcMesh* host_mesh);
/** */ /** Deprecated ? */
AcResult acNodeStoreVertexBuffer(const Node node, const Stream stream, AcResult acNodeStoreVertexBuffer(const Node node, const Stream stream,
const VertexBufferHandle vtxbuf_handle, AcMesh* host_mesh); const VertexBufferHandle vtxbuf_handle, AcMesh* host_mesh);
/** */ /** */
AcResult acNodeStoreMesh(const Node node, const Stream stream, AcMesh* host_mesh); AcResult acNodeStoreMesh(const Node node, const Stream stream, AcMesh* host_mesh);
/** */
AcResult acNodeTransferVertexBufferWithOffset(const Node src_node, const Stream stream,
const VertexBufferHandle vtxbuf_handle,
const int3 src, const int3 dst,
const int num_vertices, Node dst_node);
/** */
AcResult acNodeTransferMeshWithOffset(const Node src_node, const Stream stream, const int3 src,
const int3 dst, const int num_vertices, Node* dst_node);
/** */
AcResult acNodeTransferVertexBuffer(const Node src_node, const Stream stream,
const VertexBufferHandle vtxbuf_handle, Node* dst_node);
/** */
AcResult acNodeTransferMesh(const Node src_node, const Stream stream, Node* dst_node);
/** */ /** */
AcResult acNodeIntegrateSubstep(const Node node, const Stream stream, const int step_number, AcResult acNodeIntegrateSubstep(const Node node, const Stream stream, const int step_number,
const int3 start, const int3 end, const AcReal dt); const int3 start, const int3 end, const AcReal dt);
/** */
AcResult acNodePeriodicBoundcondStep(const Node node, const Stream stream,
const VertexBufferHandle vtxbuf_handle, const int3 start,
const int3 end);
/** */ /** */
AcResult acNodePeriodicBoundconds(const Node node, const Stream stream, const int3 start, AcResult acNodeIntegrate(const Node node, const AcReal dt);
const int3 end);
/** */
AcResult acNodePeriodicBoundcondStep(const Node node, const Stream stream,
const VertexBufferHandle vtxbuf_handle);
/** */
AcResult acNodePeriodicBoundconds(const Node node, const Stream stream);
/** */ /** */
AcResult acNodeReduceScal(const Node node, const Stream stream, const ReductionType rtype, AcResult acNodeReduceScal(const Node node, const Stream stream, const ReductionType rtype,

View File

@@ -370,15 +370,22 @@ AcResult
acNodeLoadVertexBuffer(const Node node, const Stream stream, const AcMesh host_mesh, acNodeLoadVertexBuffer(const Node node, const Stream stream, const AcMesh host_mesh,
const VertexBufferHandle vtxbuf_handle) const VertexBufferHandle vtxbuf_handle)
{ {
WARNING("Not implemented"); const int3 src = (int3){0, 0, 0};
return AC_FAILURE; const int3 dst = src;
const size_t num_vertices = acVertexBufferSize(host_mesh.info);
acNodeLoadVertexBufferWithOffset(node, stream, host_mesh, vtxbuf_handle, src, dst,
num_vertices);
return AC_SUCCESS;
} }
AcResult AcResult
acNodeLoadMesh(const Node node, const Stream stream, const AcMesh host_mesh) acNodeLoadMesh(const Node node, const Stream stream, const AcMesh host_mesh)
{ {
WARNING("Not implemented"); for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
return AC_FAILURE; acNodeLoadVertexBuffer(node, stream, host_mesh, (VertexBufferHandle)i);
}
return AC_SUCCESS;
} }
AcResult AcResult
@@ -386,95 +393,199 @@ acNodeStoreVertexBufferWithOffset(const Node node, const Stream stream,
const VertexBufferHandle vtxbuf_handle, const int3 src, const VertexBufferHandle vtxbuf_handle, const int3 src,
const int3 dst, const int num_vertices, AcMesh* host_mesh) const int3 dst, const int num_vertices, AcMesh* host_mesh)
{ {
WARNING("Not implemented"); for (int i = 0; i < num_devices; ++i) {
return AC_FAILURE; const int3 d0 = (int3){0, 0, i * subgrid.n.z}; // DECOMPOSITION OFFSET HERE
const int3 d1 = (int3){subgrid.m.x, subgrid.m.y, d0.z + subgrid.m.z};
const int3 s0 = src;
const int3 s1 = gridIdx3d(grid, gridIdx(grid, s0) + num_vertices);
const int3 da = max(s0, d0);
const int3 db = min(s1, d1);
if (db.z >= da.z) {
const int copy_cells = gridIdx(subgrid, db) - gridIdx(subgrid, da);
// DECOMPOSITION OFFSET HERE
const int3 da_local = (int3){da.x, da.y, da.z - i * grid.n.z / num_devices};
acDeviceStoreVertexBufferWithOffset(devices[i], stream, vtxbuf_handle, da_local, da,
copy_cells, host_mesh);
}
}
return AC_SUCCESS;
} }
AcResult AcResult
acNodeStoreMeshWithOffset(const Node node, const Stream stream, const int3 src, const int3 dst, acNodeStoreMeshWithOffset(const Node node, const Stream stream, const int3 src, const int3 dst,
const int num_vertices, AcMesh* host_mesh) const int num_vertices, AcMesh* host_mesh)
{ {
WARNING("Not implemented"); for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
return AC_FAILURE; acNodeStoreVertexBufferWithOffset(node, stream, (VertexBufferHandle)i, src, dst,
num_vertices, host_mesh);
}
return AC_SUCCESS;
} }
AcResult AcResult
acNodeStoreVertexBuffer(const Node node, const Stream stream, acNodeStoreVertexBuffer(const Node node, const Stream stream,
const VertexBufferHandle vtxbuf_handle, AcMesh* host_mesh) const VertexBufferHandle vtxbuf_handle, AcMesh* host_mesh)
{ {
WARNING("Not implemented"); const int3 src = (int3){0, 0, 0};
return AC_FAILURE; const int3 dst = src;
const size_t num_vertices = acVertexBufferSize(host_mesh.info);
acNodeStoreVertexBufferWithOffset(node, stream, vtxbuf_handle, src, dst, num_vertices,
host_mesh);
return AC_SUCCESS;
} }
AcResult AcResult
acNodeStoreMesh(const Node node, const Stream stream, AcMesh* host_mesh) acNodeStoreMesh(const Node node, const Stream stream, AcMesh* host_mesh)
{ {
WARNING("Not implemented"); for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
return AC_FAILURE; acNodeStoreVertexBuffer(node, stream, (VertexBufferHandle)i, host_mesh);
} }
return AC_SUCCESS;
AcResult
acNodeTransferVertexBufferWithOffset(const Node src_node, const Stream stream,
const VertexBufferHandle vtxbuf_handle, const int3 src,
const int3 dst, const int num_vertices, Node dst_node)
{
WARNING("Not implemented");
return AC_FAILURE;
}
AcResult
acNodeTransferMeshWithOffset(const Node src_node, const Stream stream, const int3 src,
const int3 dst, const int num_vertices, Node* dst_node)
{
WARNING("Not implemented");
return AC_FAILURE;
}
AcResult
acNodeTransferVertexBuffer(const Node src_node, const Stream stream,
const VertexBufferHandle vtxbuf_handle, Node* dst_node)
{
WARNING("Not implemented");
return AC_FAILURE;
}
AcResult
acNodeTransferMesh(const Node src_node, const Stream stream, Node* dst_node)
{
WARNING("Not implemented");
return AC_FAILURE;
} }
AcResult AcResult
acNodeIntegrateSubstep(const Node node, const Stream stream, const int step_number, acNodeIntegrateSubstep(const Node node, const Stream stream, const int step_number,
const int3 start, const int3 end, const AcReal dt) const int3 start, const int3 end, const AcReal dt)
{ {
WARNING("Not implemented"); for (int i = 0; i < num_devices; ++i) {
return AC_FAILURE; // DECOMPOSITION OFFSET HERE
const int3 d0 = (int3){NGHOST, NGHOST, NGHOST + i * subgrid.n.z};
const int3 d1 = d0 + (int3){subgrid.n.x, subgrid.n.y, subgrid.n.z};
const int3 da = max(start, d0);
const int3 db = min(end, d1);
if (db.z >= da.z) {
const int3 da_local = da - (int3){0, 0, i * subgrid.n.z};
const int3 db_local = db - (int3){0, 0, i * subgrid.n.z};
acDeviceIntegrateSubstep(devices[i], stream, isubstep, da_local, db_local, dt);
}
}
return AC_SUCCESS;
}
AcResult
acNodeIntegrate(const Node node, const AcReal dt)
{
acNodeSynchronizeStream(STREAM_ALL);
WARNING("Not implementad\n");
acNodeSynchronizeStream(STREAM_ALL);
return AC_SUCCESS;
}
static AcResult
local_boundcondstep(const Node node, const StreamType stream, const VertexBufferHandle vtxbuf)
{
if (num_devices == 1) {
acDeviceBoundcondStep(devices[0], stream, vtxbuf, (int3){0, 0, 0}, subgrid.m);
}
else {
// Local boundary conditions
for (int i = 0; i < num_devices; ++i) {
const int3 d0 = (int3){0, 0, NGHOST}; // DECOMPOSITION OFFSET HERE
const int3 d1 = (int3){subgrid.m.x, subgrid.m.y, d0.z + subgrid.n.z};
acDeviceBoundcondStep(devices[i], stream, vtxbuf, d0, d1);
}
}
return AC_SUCCESS;
}
static AcResult
global_boundcondstep(const Node node, const StreamType stream, const VertexBufferHandle vtxbuf)
{
if (num_devices > 1) {
const size_t num_vertices = subgrid.m.x * subgrid.m.y * NGHOST;
{
// ...|ooooxxx|... -> xxx|ooooooo|...
const int3 src = (int3){0, 0, subgrid.n.z};
const int3 dst = (int3){0, 0, 0};
const Device src_device = devices[num_devices - 1];
Device dst_device = devices[0];
acDeviceTransferVertexBufferWithOffset(src_device, stream, vtxbuf_handle, src, dst,
num_vertices, dst_device);
}
{
// ...|ooooooo|xxx <- ...|xxxoooo|...
const int3 src = (int3){0, 0, NGHOST};
const int3 dst = (int3){0, 0, NGHOST + subgrid.n.z};
const Device src_device = devices[0];
Device dst_device = devices[num_devices - 1];
acDeviceTransferVertexBufferWithOffset(src_device, stream, vtxbuf_handle, src, dst,
num_vertices, dst_device);
}
}
return AC_SUCCESS;
} }
AcResult AcResult
acNodePeriodicBoundcondStep(const Node node, const Stream stream, acNodePeriodicBoundcondStep(const Node node, const Stream stream,
const VertexBufferHandle vtxbuf_handle, const int3 start, const VertexBufferHandle vtxbuf_handle)
const int3 end)
{ {
WARNING("Not implemented"); local_boundcondstep(node, stream, vtxbuf_handle);
return AC_FAILURE; global_boundcondstep(node, stream, vtxbuf_handle);
acNodeSynchronizeVertexBuffer(node, stream, vtxbuf_handle);
return AC_SUCCESS;
} }
AcResult AcResult
acNodePeriodicBoundconds(const Node node, const Stream stream, const int3 start, const int3 end) acNodePeriodicBoundconds(const Node node, const Stream stream)
{ {
WARNING("Not implemented"); for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
return AC_FAILURE; acNodePeriodicBoundcondStep(node, stream, (VertexBufferHandle)i);
}
return AC_SUCCESS;
}
static AcReal
simple_final_reduce_scal(const ReductionType& rtype, const AcReal* results, const int& n)
{
AcReal res = results[0];
for (int i = 1; i < n; ++i) {
if (rtype == RTYPE_MAX) {
res = max(res, results[i]);
}
else if (rtype == RTYPE_MIN) {
res = min(res, results[i]);
}
else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) {
res = sum(res, results[i]);
}
else {
ERROR("Invalid rtype");
}
}
if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) {
const AcReal inv_n = AcReal(1.) / (grid.n.x * grid.n.y * grid.n.z);
res = sqrt(inv_n * res);
}
return res;
} }
AcResult AcResult
acNodeReduceScal(const Node node, const Stream stream, const ReductionType rtype, acNodeReduceScal(const Node node, const Stream stream, const ReductionType rtype,
const VertexBufferHandle vtxbuf_handle, AcReal* result) const VertexBufferHandle vtxbuf_handle, AcReal* result)
{ {
WARNING("Not implemented"); acSynchronizeStream(STREAM_ALL);
return AC_FAILURE;
AcReal results[num_devices];
for (int i = 0; i < num_devices; ++i) {
acDeviceReduceScal(devices[i], STREAM_DEFAULT, rtype, vtxbuffer_handle, &results[i]);
}
return simple_final_reduce_scal(rtype, results, num_devices);
} }
AcResult AcResult
@@ -482,6 +593,12 @@ acNodeReduceVec(const Node node, const Stream stream_type, const ReductionType r
const VertexBufferHandle vtxbuf0, const VertexBufferHandle vtxbuf1, const VertexBufferHandle vtxbuf0, const VertexBufferHandle vtxbuf1,
const VertexBufferHandle vtxbuf2, AcReal* result) const VertexBufferHandle vtxbuf2, AcReal* result)
{ {
WARNING("Not implemented"); acSynchronizeStream(STREAM_ALL);
return AC_FAILURE;
AcReal results[num_devices];
for (int i = 0; i < num_devices; ++i) {
acDeviceReduceScal(devices[i], STREAM_DEFAULT, rtype, a, b, c, &results[i]);
}
return simple_final_reduce_scal(rtype, results, num_devices);
} }