Crafting code. Attempting to figure out the MPI domain decomposition etc.

2020-09-18 15:25:14 +08:00
parent b81180ff19
commit 67aa87731b
1 changed files with 47 additions and 0 deletions
--- a/src/core/device.cc
+++ b/src/core/device.cc
@@ -433,6 +433,27 @@ acDevicePeriodicBoundconds(const Device device, const Stream stream, const int3
    return AC_SUCCESS;
 }
 AcResult
 acDeviceGeneralBoundcondStep(const Device device, const Stream stream,
                              const VertexBufferHandle vtxbuf_handle, const int3 start,
                              const int3 end)
 {
    cudaSetDevice(device->id);
    return acKernelGeneralBoundconds(device->streams[stream], start, end,
                                     device->vba.in[vtxbuf_handle]);
 }
 AcResult
 acDeviceGeneralBoundconds(const Device device, const Stream stream, const int3 start,
                           const int3 end)
 {
    for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
        acDeviceGeneralBoundcondStep(device, stream, (VertexBufferHandle)i, start, end);
    }
    return AC_SUCCESS;
 }
 AcResult
 acDeviceReduceScal(const Device device, const Stream stream, const ReductionType rtype,
                   const VertexBufferHandle vtxbuf_handle, AcReal* result)
@@ -1652,6 +1673,13 @@ acGridIntegrate(const Stream stream, const AcReal dt)
    acGridLoadScalarUniform(stream, AC_dt, dt);
    acDeviceSynchronizeStream(device, stream);
    // Check the position in MPI frame 
    int nprocs, pid;
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &pid);
    const uint3_64 decomposition = decompose(nprocs);
    const int3 pid3d             = getPid3D(pid, decomposition);
 // Corners
 #if MPI_INCL_CORNERS
    // Do not rm: required for corners
@@ -1805,6 +1833,21 @@ acGridIntegrate(const Stream stream, const AcReal dt)
        acSyncCommData(sidexz_data);
        acSyncCommData(sideyz_data);
 #endif // MPI_COMM_ENABLED
        // Set outer boudaries after substep computation. 
        const int3 m1 = (int3){0, 0, 0};
        const int3 m2 = nn;
        const int3 pid3d = getPid3D(pid, decomposition);
        // If we are are a boundary element
        if ((pid3d.x == 0) || (pid3d.x == decomposition.x - 1) || 
            (pid3d.y == 0) || (pid3d.y == decomposition.y - 1) ||
            (pid3d.z == 0) || (pid3d.z == decomposition.z - 1) ||)
        {
            acDeviceGeneralBoundconds(device, stream, m1, m2);
        }
        acGridSynchronizeStream(stream);
 #if MPI_COMPUTE_ENABLED
        { // Front
            const int3 m1 = (int3){NGHOST, NGHOST, NGHOST};
@@ -1840,8 +1883,12 @@ acGridIntegrate(const Stream stream, const AcReal dt)
        acDeviceSwapBuffers(device);
        acDeviceSynchronizeStream(device, STREAM_ALL); // Wait until inner and outer done
        ////////////////////////////////////////////
    }
    return AC_SUCCESS;
 }