Working 3D decomp, unoptimized

This commit is contained in:
jpekkila
2020-01-16 21:47:05 +02:00
parent 29b38d3b89
commit 9264b7515a

View File

@@ -1211,10 +1211,10 @@ acDeviceGatherMeshMPI(const AcMesh src, const int3 decomposition, AcMesh* dst)
if (pid == 0) { if (pid == 0) {
for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) { for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) {
// For pencils // For pencils
for (int k = NGHOST; k < NGHOST + nn.z; ++k) { for (int k = 0; k < mm.z; ++k) {
for (int j = NGHOST; j < NGHOST + nn.y; ++j) { for (int j = 0; j < mm.y; ++j) {
const int i = NGHOST; const int i = 0;
const int count = nn.x; const int count = mm.x;
const int src_idx = acVertexBufferIdx(i, j, k, src.info); const int src_idx = acVertexBufferIdx(i, j, k, src.info);
const int dst_idx = acVertexBufferIdx(i, j, k, dst->info); const int dst_idx = acVertexBufferIdx(i, j, k, dst->info);
memcpy(&dst->vertex_buffer[vtxbuf][dst_idx], // memcpy(&dst->vertex_buffer[vtxbuf][dst_idx], //
@@ -1227,10 +1227,10 @@ acDeviceGatherMeshMPI(const AcMesh src, const int3 decomposition, AcMesh* dst)
for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) { for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) {
// For pencils // For pencils
for (int k = NGHOST; k < NGHOST + nn.z; ++k) { for (int k = 0; k < mm.z; ++k) {
for (int j = NGHOST; j < NGHOST + nn.y; ++j) { for (int j = 0; j < mm.y; ++j) {
const int i = NGHOST; const int i = 0;
const int count = nn.x; const int count = mm.x;
if (pid != 0) { if (pid != 0) {
// Send // Send
@@ -1716,6 +1716,8 @@ acDeviceCommunicateCornersMPI(const Device device)
PackedData src_host = acCreatePackedDataHost(dims); PackedData src_host = acCreatePackedDataHost(dims);
PackedData dst_host = acCreatePackedDataHost(dims); PackedData dst_host = acCreatePackedDataHost(dims);
acTransferPackedDataToHost(src, &src_host); acTransferPackedDataToHost(src, &src_host);
acDeviceSynchronizeStream(device, STREAM_ALL);
MPI_Barrier(MPI_COMM_WORLD);
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
const int3 pid3d = getPid3D(pid, decomp); const int3 pid3d = getPid3D(pid, decomp);
@@ -1826,6 +1828,7 @@ acDeviceCommunicateEdgesMPI(const Device device)
PackedData src_host = acCreatePackedDataHost(dims); PackedData src_host = acCreatePackedDataHost(dims);
PackedData dst_host = acCreatePackedDataHost(dims); PackedData dst_host = acCreatePackedDataHost(dims);
acTransferPackedDataToHost(src, &src_host); acTransferPackedDataToHost(src, &src_host);
acDeviceSynchronizeStream(device, STREAM_ALL);
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
const int3 pid3d = getPid3D(pid, decomp); const int3 pid3d = getPid3D(pid, decomp);
@@ -1912,6 +1915,7 @@ acDeviceCommunicateEdgesMPI(const Device device)
PackedData src_host = acCreatePackedDataHost(dims); PackedData src_host = acCreatePackedDataHost(dims);
PackedData dst_host = acCreatePackedDataHost(dims); PackedData dst_host = acCreatePackedDataHost(dims);
acTransferPackedDataToHost(src, &src_host); acTransferPackedDataToHost(src, &src_host);
acDeviceSynchronizeStream(device, STREAM_ALL);
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
const int3 pid3d = getPid3D(pid, decomp); const int3 pid3d = getPid3D(pid, decomp);
@@ -1998,6 +2002,7 @@ acDeviceCommunicateEdgesMPI(const Device device)
PackedData src_host = acCreatePackedDataHost(dims); PackedData src_host = acCreatePackedDataHost(dims);
PackedData dst_host = acCreatePackedDataHost(dims); PackedData dst_host = acCreatePackedDataHost(dims);
acTransferPackedDataToHost(src, &src_host); acTransferPackedDataToHost(src, &src_host);
acDeviceSynchronizeStream(device, STREAM_ALL);
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
const int3 pid3d = getPid3D(pid, decomp); const int3 pid3d = getPid3D(pid, decomp);
@@ -2103,6 +2108,7 @@ acDeviceCommunicateSidesMPI(const Device device)
PackedData src_host = acCreatePackedDataHost(dims); PackedData src_host = acCreatePackedDataHost(dims);
PackedData dst_host = acCreatePackedDataHost(dims); PackedData dst_host = acCreatePackedDataHost(dims);
acTransferPackedDataToHost(src, &src_host); acTransferPackedDataToHost(src, &src_host);
acDeviceSynchronizeStream(device, STREAM_ALL);
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
const int3 pid3d = getPid3D(pid, decomp); const int3 pid3d = getPid3D(pid, decomp);
@@ -2182,6 +2188,7 @@ acDeviceCommunicateSidesMPI(const Device device)
PackedData src_host = acCreatePackedDataHost(dims); PackedData src_host = acCreatePackedDataHost(dims);
PackedData dst_host = acCreatePackedDataHost(dims); PackedData dst_host = acCreatePackedDataHost(dims);
acTransferPackedDataToHost(src, &src_host); acTransferPackedDataToHost(src, &src_host);
acDeviceSynchronizeStream(device, STREAM_ALL);
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
const int3 pid3d = getPid3D(pid, decomp); const int3 pid3d = getPid3D(pid, decomp);
@@ -2261,6 +2268,7 @@ acDeviceCommunicateSidesMPI(const Device device)
PackedData src_host = acCreatePackedDataHost(dims); PackedData src_host = acCreatePackedDataHost(dims);
PackedData dst_host = acCreatePackedDataHost(dims); PackedData dst_host = acCreatePackedDataHost(dims);
acTransferPackedDataToHost(src, &src_host); acTransferPackedDataToHost(src, &src_host);
acDeviceSynchronizeStream(device, STREAM_ALL);
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
const int3 pid3d = getPid3D(pid, decomp); const int3 pid3d = getPid3D(pid, decomp);
@@ -2564,10 +2572,14 @@ acDeviceRunMPITest(void)
// const float dt = FLT_EPSILON; // TODO // const float dt = FLT_EPSILON; // TODO
// acDeviceIntegrateStepMPI(device, dt); // TODO // acDeviceIntegrateStepMPI(device, dt); // TODO
// acDeviceBoundStepMPI(device); TODO // acDeviceBoundStepMPI(device); TODO
acDeviceSynchronizeStream(device, STREAM_ALL);
MPI_Barrier(MPI_COMM_WORLD);
acDeviceCommunicateHalosMPI(device); acDeviceCommunicateHalosMPI(device);
acDeviceSynchronizeStream(device, STREAM_ALL); acDeviceSynchronizeStream(device, STREAM_ALL);
MPI_Barrier(MPI_COMM_WORLD);
// acDeviceStoreMesh(device, STREAM_DEFAULT, &submesh); // TODO re-enable acDeviceStoreMesh(device, STREAM_DEFAULT, &submesh);
acDeviceSynchronizeStream(device, STREAM_DEFAULT);
acDeviceGatherMeshMPI(submesh, decomposition, &candidate); acDeviceGatherMeshMPI(submesh, decomposition, &candidate);
if (pid == 0) { if (pid == 0) {
// acModelIntegrateStep(model, FLT_EPSILON); // TODO // acModelIntegrateStep(model, FLT_EPSILON); // TODO