@@ -20,9 +20,15 @@
|
||||
uniform int AC_max_steps;
|
||||
uniform int AC_save_steps;
|
||||
uniform int AC_bin_steps;
|
||||
uniform int AC_bc_type;
|
||||
uniform int AC_start_step;
|
||||
|
||||
uniform int AC_bc_type_top_x;
|
||||
uniform int AC_bc_type_bot_x;
|
||||
uniform int AC_bc_type_top_y;
|
||||
uniform int AC_bc_type_bot_y;
|
||||
uniform int AC_bc_type_top_z;
|
||||
uniform int AC_bc_type_bot_z;
|
||||
|
||||
// Real params
|
||||
uniform Scalar AC_dt;
|
||||
uniform Scalar AC_max_time;
|
||||
|
@@ -13,6 +13,15 @@ AC_dsx = 0.04908738521
|
||||
AC_dsy = 0.04908738521
|
||||
AC_dsz = 0.04908738521
|
||||
|
||||
// 0 = periodic bc, 1 = symmetric bc, 2 = antisymmetric bc
|
||||
AC_bc_type_top_x = 0
|
||||
AC_bc_type_top_y = 0
|
||||
AC_bc_type_top_z = 0
|
||||
AC_bc_type_bot_x = 0
|
||||
AC_bc_type_bot_y = 0
|
||||
AC_bc_type_bot_z = 0
|
||||
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Run-time params
|
||||
|
@@ -49,6 +49,11 @@ typedef struct {
|
||||
|
||||
typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult;
|
||||
|
||||
// Neming the associated number of the boundary condition types
|
||||
typedef enum { AC_BOUNDCOND_PERIODIC = 0,
|
||||
AC_BOUNDCOND_SYMMETRIC = 1,
|
||||
AC_BOUNDCOND_ANTISYMMETRIC = 2 } AcBoundcond;
|
||||
|
||||
#define AC_GEN_ID(X) X,
|
||||
typedef enum {
|
||||
AC_FOR_RTYPES(AC_GEN_ID) //
|
||||
@@ -227,10 +232,19 @@ AcResult acStore(AcMesh* host_mesh);
|
||||
* substep and the user is responsible for calling acBoundcondStep before reading the data. */
|
||||
AcResult acIntegrate(const AcReal dt);
|
||||
|
||||
/** Performs Runge-Kutta 3 integration. Note: Boundary conditions are not applied after the final
|
||||
* substep and the user is responsible for calling acBoundcondStep before reading the data.
|
||||
* Has customizable boundary conditions. */
|
||||
AcResult acIntegrateGBC(const AcMeshInfo config, const AcReal dt);
|
||||
|
||||
/** Applies periodic boundary conditions for the Mesh distributed among the devices visible to
|
||||
* the caller*/
|
||||
AcResult acBoundcondStep(void);
|
||||
|
||||
/** Applies general outer boundary conditions for the Mesh distributed among the devices visible to
|
||||
* the caller*/
|
||||
AcResult acBoundcondStepGBC(const AcMeshInfo config);
|
||||
|
||||
/** Does a scalar reduction with the data stored in some vertex buffer */
|
||||
AcReal acReduceScal(const ReductionType rtype, const VertexBufferHandle vtxbuf_handle);
|
||||
|
||||
@@ -306,9 +320,21 @@ AcResult acGridStoreMesh(const Stream stream, AcMesh* host_mesh);
|
||||
/** */
|
||||
AcResult acGridIntegrate(const Stream stream, const AcReal dt);
|
||||
|
||||
/** */
|
||||
/* MV: Commented out for a while, but save for the future when standalone_MPI
|
||||
works with periodic boundary conditions.
|
||||
AcResult
|
||||
acGridIntegrateNonperiodic(const Stream stream, const AcReal dt)
|
||||
|
||||
AcResult acGridIntegrateNonperiodic(const Stream stream, const AcReal dt);
|
||||
*/
|
||||
|
||||
/** */
|
||||
AcResult acGridPeriodicBoundconds(const Stream stream);
|
||||
|
||||
/** */
|
||||
AcResult acGridGeneralBoundconds(const Device device, const Stream stream);
|
||||
|
||||
/** TODO */
|
||||
AcResult acGridReduceScal(const Stream stream, const ReductionType rtype,
|
||||
const VertexBufferHandle vtxbuf_handle, AcReal* result);
|
||||
@@ -430,6 +456,9 @@ AcResult acNodeIntegrateSubstep(const Node node, const Stream stream, const int
|
||||
/** */
|
||||
AcResult acNodeIntegrate(const Node node, const AcReal dt);
|
||||
|
||||
/** */
|
||||
AcResult acNodeIntegrateGBC(const Node node, const AcMeshInfo config, const AcReal dt);
|
||||
|
||||
/** */
|
||||
AcResult acNodePeriodicBoundcondStep(const Node node, const Stream stream,
|
||||
const VertexBufferHandle vtxbuf_handle);
|
||||
@@ -437,6 +466,13 @@ AcResult acNodePeriodicBoundcondStep(const Node node, const Stream stream,
|
||||
/** */
|
||||
AcResult acNodePeriodicBoundconds(const Node node, const Stream stream);
|
||||
|
||||
/** */
|
||||
AcResult acNodeGeneralBoundcondStep(const Node node, const Stream stream,
|
||||
const VertexBufferHandle vtxbuf_handle, const AcMeshInfo config);
|
||||
|
||||
/** */
|
||||
AcResult acNodeGeneralBoundconds(const Node node, const Stream stream, const AcMeshInfo config);
|
||||
|
||||
/** */
|
||||
AcResult acNodeReduceScal(const Node node, const Stream stream, const ReductionType rtype,
|
||||
const VertexBufferHandle vtxbuf_handle, AcReal* result);
|
||||
@@ -565,6 +601,16 @@ AcResult acDevicePeriodicBoundcondStep(const Device device, const Stream stream,
|
||||
AcResult acDevicePeriodicBoundconds(const Device device, const Stream stream, const int3 start,
|
||||
const int3 end);
|
||||
|
||||
/** */
|
||||
AcResult acDeviceGeneralBoundcondStep(const Device device, const Stream stream,
|
||||
const VertexBufferHandle vtxbuf_handle, const int3 start,
|
||||
const int3 end, const AcMeshInfo config, const int3 bindex);
|
||||
|
||||
/** */
|
||||
AcResult acDeviceGeneralBoundconds(const Device device, const Stream stream, const int3 start,
|
||||
const int3 end, const AcMeshInfo config, const int3 bindex);
|
||||
|
||||
|
||||
/** */
|
||||
AcResult acDeviceReduceScal(const Device device, const Stream stream, const ReductionType rtype,
|
||||
const VertexBufferHandle vtxbuf_handle, AcReal* result);
|
||||
|
@@ -374,7 +374,8 @@ run_simulation(const char* config_path)
|
||||
#endif
|
||||
}
|
||||
|
||||
acBoundcondStep();
|
||||
//acBoundcondStep();
|
||||
acBoundcondStepGBC(mesh_info);
|
||||
acStore(mesh);
|
||||
if (start_step == 0) {
|
||||
save_mesh(*mesh, 0, t_step);
|
||||
@@ -440,7 +441,11 @@ run_simulation(const char* config_path)
|
||||
loadForcingParamsToDevice(forcing_params);
|
||||
#endif
|
||||
|
||||
acIntegrate(dt);
|
||||
/* Uses now flexible bokundary conditions */
|
||||
//acIntegrate(dt);
|
||||
acIntegrateGBC(mesh_info, dt);
|
||||
|
||||
|
||||
|
||||
t_step += dt;
|
||||
|
||||
@@ -486,7 +491,8 @@ run_simulation(const char* config_path)
|
||||
acBoundcondStep();
|
||||
acStore(mesh);
|
||||
*/
|
||||
acBoundcondStep();
|
||||
//acBoundcondStep();
|
||||
acBoundcondStepGBC(mesh_info);
|
||||
acStore(mesh);
|
||||
|
||||
save_mesh(*mesh, i, t_step);
|
||||
@@ -507,7 +513,8 @@ run_simulation(const char* config_path)
|
||||
if (dt < dt_typical/AcReal(1e5)) {
|
||||
if (dtcounter > 10) {
|
||||
printf("dt = %e TOO LOW! Ending run at t = %#e \n", double(dt), double(t_step));
|
||||
acBoundcondStep();
|
||||
//acBoundcondStep();
|
||||
acBoundcondStepGBC(mesh_info);
|
||||
acStore(mesh);
|
||||
save_mesh(*mesh, i, t_step);
|
||||
break;
|
||||
@@ -521,7 +528,8 @@ run_simulation(const char* config_path)
|
||||
// End loop if nan is found
|
||||
if (found_nan > 0) {
|
||||
printf("Found nan at t = %e \n", double(t_step));
|
||||
acBoundcondStep();
|
||||
//acBoundcondStep();
|
||||
acBoundcondStepGBC(mesh_info);
|
||||
acStore(mesh);
|
||||
save_mesh(*mesh, i, t_step);
|
||||
break;
|
||||
@@ -536,7 +544,8 @@ run_simulation(const char* config_path)
|
||||
|
||||
if (found_stop == 1) {
|
||||
printf("Found STOP file at t = %e \n", double(t_step));
|
||||
acBoundcondStep();
|
||||
//acBoundcondStep();
|
||||
acBoundcondStepGBC(mesh_info);
|
||||
acStore(mesh);
|
||||
save_mesh(*mesh, i, t_step);
|
||||
break;
|
||||
|
@@ -86,6 +86,13 @@ acIntegrate(const AcReal dt)
|
||||
return acNodeIntegrate(nodes[0], dt);
|
||||
}
|
||||
|
||||
AcResult
|
||||
acIntegrateGBC(const AcMeshInfo config, const AcReal dt)
|
||||
{
|
||||
return acNodeIntegrateGBC(nodes[0], config, dt);
|
||||
}
|
||||
|
||||
|
||||
AcResult
|
||||
acIntegrateStep(const int isubstep, const AcReal dt)
|
||||
{
|
||||
@@ -109,6 +116,12 @@ acBoundcondStep(void)
|
||||
return acNodePeriodicBoundconds(nodes[0], STREAM_DEFAULT);
|
||||
}
|
||||
|
||||
AcResult
|
||||
acBoundcondStepGBC(const AcMeshInfo config)
|
||||
{
|
||||
return acNodeGeneralBoundconds(nodes[0], STREAM_DEFAULT, config);
|
||||
}
|
||||
|
||||
AcReal
|
||||
acReduceScal(const ReductionType rtype, const VertexBufferHandle vtxbuf_handle)
|
||||
{
|
||||
|
@@ -433,6 +433,27 @@ acDevicePeriodicBoundconds(const Device device, const Stream stream, const int3
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceGeneralBoundcondStep(const Device device, const Stream stream,
|
||||
const VertexBufferHandle vtxbuf_handle, const int3 start,
|
||||
const int3 end, const AcMeshInfo config, const int3 bindex)
|
||||
{
|
||||
cudaSetDevice(device->id);
|
||||
return acKernelGeneralBoundconds(device->streams[stream], start, end,
|
||||
device->vba.in[vtxbuf_handle], vtxbuf_handle, config, bindex);
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceGeneralBoundconds(const Device device, const Stream stream, const int3 start,
|
||||
const int3 end, const AcMeshInfo config, const int3 bindex)
|
||||
{
|
||||
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
||||
acDeviceGeneralBoundcondStep(device, stream, (VertexBufferHandle)i, start, end, config, bindex);
|
||||
}
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
AcResult
|
||||
acDeviceReduceScal(const Device device, const Stream stream, const ReductionType rtype,
|
||||
const VertexBufferHandle vtxbuf_handle, AcReal* result)
|
||||
@@ -1433,6 +1454,50 @@ acGridStoreMesh(const Stream stream, AcMesh* host_mesh)
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
/* MV: Commented out for a while, but save for the future when standalone_MPI
|
||||
works with periodic boundary conditions.
|
||||
AcResult
|
||||
acGridGeneralBoundconds(const Device device, const Stream stream)
|
||||
{
|
||||
// Non-periodic Boundary conditions
|
||||
// Check the position in MPI frame
|
||||
int nprocs, pid;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
|
||||
const uint3_64 decomposition = decompose(nprocs);
|
||||
const int3 pid3d = getPid3D(pid, decomposition);
|
||||
|
||||
// Set outer boudaries after substep computation.
|
||||
const int3 m1 = (int3){0, 0, 0};
|
||||
const int3 m2 = grid.nn;
|
||||
const int3 pid3d = getPid3D(pid, decomposition);
|
||||
// If we are are a boundary element
|
||||
int3 bindex = (int3){0, 0, 0};
|
||||
|
||||
// Check if there are active boundary condition edges.
|
||||
// 0 is no boundary, 1 both edges, 2 is top edge, 3 bottom edge
|
||||
if ((pid3d.x == 0) && (pid3d.x == decomposition.x - 1)) { bindex.x = 1; }
|
||||
else if (pid3d.x == 0) { bindex.x = 2; }
|
||||
else if (pid3d.x == decomposition.x - 1) { bindex.x = 3; }
|
||||
|
||||
if ((pid3d.y == 0) && (pid3d.y == decomposition.y - 1)) { bindex.y = 1; }
|
||||
else if (pid3d.y == 0) { bindex.y = 2; }
|
||||
else if (pid3d.y == decomposition.y - 1) { bindex.y = 3; }
|
||||
|
||||
if ((pid3d.z == 0) && (pid3d.z == decomposition.z - 1)) { bindex.z = 1; }
|
||||
else if (pid3d.z == 0) { bindex.z = 2; }
|
||||
else if (pid3d.z == decomposition.z - 1) { bindex.z = 3; }
|
||||
|
||||
|
||||
if (bindex.x != 1) && (bindex.y != 1) && (bindex.z != 1) {
|
||||
acDeviceGeneralBoundconds(device, stream, m1, m2, bindex);
|
||||
}
|
||||
acGridSynchronizeStream(stream);
|
||||
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
// Unused
|
||||
AcResult
|
||||
@@ -1864,6 +1929,234 @@ acGridIntegrate(const Stream stream, const AcReal dt)
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
/* MV: Commented out for a while, but save for the future when standalone_MPI
|
||||
works with periodic boundary conditions.
|
||||
AcResult
|
||||
acGridIntegrateNonperiodic(const Stream stream, const AcReal dt)
|
||||
{
|
||||
ERRCHK(grid.initialized);
|
||||
acGridSynchronizeStream(stream);
|
||||
|
||||
const Device device = grid.device;
|
||||
const int3 nn = grid.nn;
|
||||
#if MPI_INCL_CORNERS
|
||||
CommData corner_data = grid.corner_data; // Do not rm: required for corners
|
||||
#endif // MPI_INCL_CORNERS
|
||||
CommData edgex_data = grid.edgex_data;
|
||||
CommData edgey_data = grid.edgey_data;
|
||||
CommData edgez_data = grid.edgez_data;
|
||||
CommData sidexy_data = grid.sidexy_data;
|
||||
CommData sidexz_data = grid.sidexz_data;
|
||||
CommData sideyz_data = grid.sideyz_data;
|
||||
|
||||
acGridLoadScalarUniform(stream, AC_dt, dt);
|
||||
acDeviceSynchronizeStream(device, stream);
|
||||
|
||||
|
||||
// Corners
|
||||
#if MPI_INCL_CORNERS
|
||||
// Do not rm: required for corners
|
||||
const int3 corner_b0s[] = {
|
||||
(int3){0, 0, 0},
|
||||
(int3){NGHOST + nn.x, 0, 0},
|
||||
(int3){0, NGHOST + nn.y, 0},
|
||||
(int3){0, 0, NGHOST + nn.z},
|
||||
|
||||
(int3){NGHOST + nn.x, NGHOST + nn.y, 0},
|
||||
(int3){NGHOST + nn.x, 0, NGHOST + nn.z},
|
||||
(int3){0, NGHOST + nn.y, NGHOST + nn.z},
|
||||
(int3){NGHOST + nn.x, NGHOST + nn.y, NGHOST + nn.z},
|
||||
};
|
||||
#endif // MPI_INCL_CORNERS
|
||||
|
||||
// Edges X
|
||||
const int3 edgex_b0s[] = {
|
||||
(int3){NGHOST, 0, 0},
|
||||
(int3){NGHOST, NGHOST + nn.y, 0},
|
||||
|
||||
(int3){NGHOST, 0, NGHOST + nn.z},
|
||||
(int3){NGHOST, NGHOST + nn.y, NGHOST + nn.z},
|
||||
};
|
||||
|
||||
// Edges Y
|
||||
const int3 edgey_b0s[] = {
|
||||
(int3){0, NGHOST, 0},
|
||||
(int3){NGHOST + nn.x, NGHOST, 0},
|
||||
|
||||
(int3){0, NGHOST, NGHOST + nn.z},
|
||||
(int3){NGHOST + nn.x, NGHOST, NGHOST + nn.z},
|
||||
};
|
||||
|
||||
// Edges Z
|
||||
const int3 edgez_b0s[] = {
|
||||
(int3){0, 0, NGHOST},
|
||||
(int3){NGHOST + nn.x, 0, NGHOST},
|
||||
|
||||
(int3){0, NGHOST + nn.y, NGHOST},
|
||||
(int3){NGHOST + nn.x, NGHOST + nn.y, NGHOST},
|
||||
};
|
||||
|
||||
// Sides XY
|
||||
const int3 sidexy_b0s[] = {
|
||||
(int3){NGHOST, NGHOST, 0}, //
|
||||
(int3){NGHOST, NGHOST, NGHOST + nn.z}, //
|
||||
};
|
||||
|
||||
// Sides XZ
|
||||
const int3 sidexz_b0s[] = {
|
||||
(int3){NGHOST, 0, NGHOST}, //
|
||||
(int3){NGHOST, NGHOST + nn.y, NGHOST}, //
|
||||
};
|
||||
|
||||
// Sides YZ
|
||||
const int3 sideyz_b0s[] = {
|
||||
(int3){0, NGHOST, NGHOST}, //
|
||||
(int3){NGHOST + nn.x, NGHOST, NGHOST}, //
|
||||
};
|
||||
|
||||
for (int isubstep = 0; isubstep < 3; ++isubstep) {
|
||||
|
||||
#if MPI_COMM_ENABLED
|
||||
#if MPI_INCL_CORNERS
|
||||
acPackCommData(device, corner_b0s, &corner_data); // Do not rm: required for corners
|
||||
#endif // MPI_INCL_CORNERS
|
||||
acPackCommData(device, edgex_b0s, &edgex_data);
|
||||
acPackCommData(device, edgey_b0s, &edgey_data);
|
||||
acPackCommData(device, edgez_b0s, &edgez_data);
|
||||
acPackCommData(device, sidexy_b0s, &sidexy_data);
|
||||
acPackCommData(device, sidexz_b0s, &sidexz_data);
|
||||
acPackCommData(device, sideyz_b0s, &sideyz_data);
|
||||
#endif
|
||||
|
||||
#if MPI_COMM_ENABLED
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
#if MPI_GPUDIRECT_DISABLED
|
||||
#if MPI_INCL_CORNERS
|
||||
acTransferCommDataToHost(device, &corner_data); // Do not rm: required for corners
|
||||
#endif // MPI_INCL_CORNERS
|
||||
acTransferCommDataToHost(device, &edgex_data);
|
||||
acTransferCommDataToHost(device, &edgey_data);
|
||||
acTransferCommDataToHost(device, &edgez_data);
|
||||
acTransferCommDataToHost(device, &sidexy_data);
|
||||
acTransferCommDataToHost(device, &sidexz_data);
|
||||
acTransferCommDataToHost(device, &sideyz_data);
|
||||
#endif
|
||||
#if MPI_INCL_CORNERS
|
||||
acTransferCommData(device, corner_b0s, &corner_data); // Do not rm: required for corners
|
||||
#endif // MPI_INCL_CORNERS
|
||||
acTransferCommData(device, edgex_b0s, &edgex_data);
|
||||
acTransferCommData(device, edgey_b0s, &edgey_data);
|
||||
acTransferCommData(device, edgez_b0s, &edgez_data);
|
||||
acTransferCommData(device, sidexy_b0s, &sidexy_data);
|
||||
acTransferCommData(device, sidexz_b0s, &sidexz_data);
|
||||
acTransferCommData(device, sideyz_b0s, &sideyz_data);
|
||||
#endif // MPI_COMM_ENABLED
|
||||
|
||||
#if MPI_COMPUTE_ENABLED
|
||||
//////////// INNER INTEGRATION //////////////
|
||||
{
|
||||
const int3 m1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
|
||||
const int3 m2 = nn;
|
||||
acKernelIntegrateSubstep(device->streams[STREAM_16], isubstep, m1, m2, device->vba);
|
||||
}
|
||||
////////////////////////////////////////////
|
||||
#endif // MPI_COMPUTE_ENABLED
|
||||
|
||||
#if MPI_COMM_ENABLED
|
||||
#if MPI_INCL_CORNERS
|
||||
acTransferCommDataWait(corner_data); // Do not rm: required for corners
|
||||
#endif // MPI_INCL_CORNERS
|
||||
acTransferCommDataWait(edgex_data);
|
||||
acTransferCommDataWait(edgey_data);
|
||||
acTransferCommDataWait(edgez_data);
|
||||
acTransferCommDataWait(sidexy_data);
|
||||
acTransferCommDataWait(sidexz_data);
|
||||
acTransferCommDataWait(sideyz_data);
|
||||
|
||||
#if MPI_INCL_CORNERS
|
||||
acUnpinCommData(device, &corner_data); // Do not rm: required for corners
|
||||
#endif // MPI_INCL_CORNERS
|
||||
acUnpinCommData(device, &edgex_data);
|
||||
acUnpinCommData(device, &edgey_data);
|
||||
acUnpinCommData(device, &edgez_data);
|
||||
acUnpinCommData(device, &sidexy_data);
|
||||
acUnpinCommData(device, &sidexz_data);
|
||||
acUnpinCommData(device, &sideyz_data);
|
||||
|
||||
#if MPI_INCL_CORNERS
|
||||
acUnpackCommData(device, corner_b0s, &corner_data);
|
||||
#endif // MPI_INCL_CORNERS
|
||||
acUnpackCommData(device, edgex_b0s, &edgex_data);
|
||||
acUnpackCommData(device, edgey_b0s, &edgey_data);
|
||||
acUnpackCommData(device, edgez_b0s, &edgez_data);
|
||||
acUnpackCommData(device, sidexy_b0s, &sidexy_data);
|
||||
acUnpackCommData(device, sidexz_b0s, &sidexz_data);
|
||||
acUnpackCommData(device, sideyz_b0s, &sideyz_data);
|
||||
//////////// OUTER INTEGRATION //////////////
|
||||
|
||||
// Wait for unpacking
|
||||
#if MPI_INCL_CORNERS
|
||||
acSyncCommData(corner_data); // Do not rm: required for corners
|
||||
#endif // MPI_INCL_CORNERS
|
||||
acSyncCommData(edgex_data);
|
||||
acSyncCommData(edgey_data);
|
||||
acSyncCommData(edgez_data);
|
||||
acSyncCommData(sidexy_data);
|
||||
acSyncCommData(sidexz_data);
|
||||
acSyncCommData(sideyz_data);
|
||||
#endif // MPI_COMM_ENABLED
|
||||
|
||||
// Invoke outer edge boundary conditions.
|
||||
acGridGeneralBoundconds(device, stream)
|
||||
|
||||
#if MPI_COMPUTE_ENABLED
|
||||
{ // Front
|
||||
const int3 m1 = (int3){NGHOST, NGHOST, NGHOST};
|
||||
const int3 m2 = m1 + (int3){nn.x, nn.y, NGHOST};
|
||||
acKernelIntegrateSubstep(device->streams[STREAM_0], isubstep, m1, m2, device->vba);
|
||||
}
|
||||
{ // Back
|
||||
const int3 m1 = (int3){NGHOST, NGHOST, nn.z};
|
||||
const int3 m2 = m1 + (int3){nn.x, nn.y, NGHOST};
|
||||
acKernelIntegrateSubstep(device->streams[STREAM_1], isubstep, m1, m2, device->vba);
|
||||
}
|
||||
{ // Bottom
|
||||
const int3 m1 = (int3){NGHOST, NGHOST, 2 * NGHOST};
|
||||
const int3 m2 = m1 + (int3){nn.x, NGHOST, nn.z - 2 * NGHOST};
|
||||
acKernelIntegrateSubstep(device->streams[STREAM_2], isubstep, m1, m2, device->vba);
|
||||
}
|
||||
{ // Top
|
||||
const int3 m1 = (int3){NGHOST, nn.y, 2 * NGHOST};
|
||||
const int3 m2 = m1 + (int3){nn.x, NGHOST, nn.z - 2 * NGHOST};
|
||||
acKernelIntegrateSubstep(device->streams[STREAM_3], isubstep, m1, m2, device->vba);
|
||||
}
|
||||
{ // Left
|
||||
const int3 m1 = (int3){NGHOST, 2 * NGHOST, 2 * NGHOST};
|
||||
const int3 m2 = m1 + (int3){NGHOST, nn.y - 2 * NGHOST, nn.z - 2 * NGHOST};
|
||||
acKernelIntegrateSubstep(device->streams[STREAM_4], isubstep, m1, m2, device->vba);
|
||||
}
|
||||
{ // Right
|
||||
const int3 m1 = (int3){nn.x, 2 * NGHOST, 2 * NGHOST};
|
||||
const int3 m2 = m1 + (int3){NGHOST, nn.y - 2 * NGHOST, nn.z - 2 * NGHOST};
|
||||
acKernelIntegrateSubstep(device->streams[STREAM_5], isubstep, m1, m2, device->vba);
|
||||
}
|
||||
#endif // MPI_COMPUTE_ENABLED
|
||||
acDeviceSwapBuffers(device);
|
||||
acDeviceSynchronizeStream(device, STREAM_ALL); // Wait until inner and outer done
|
||||
////////////////////////////////////////////
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
|
||||
AcResult
|
||||
acGridPeriodicBoundconds(const Stream stream)
|
||||
{
|
||||
|
@@ -1,5 +1,133 @@
|
||||
#pragma once
|
||||
|
||||
static __global__ void
|
||||
kernel_symmetric_boundconds(const int3 start, const int3 end, AcReal* vtxbuf, const int3 bindex, const int sign)
|
||||
{
|
||||
const int i_dst = start.x + threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int j_dst = start.y + threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int k_dst = start.z + threadIdx.z + blockIdx.z * blockDim.z;
|
||||
|
||||
// If within the start-end range (this allows threadblock dims that are not
|
||||
// divisible by end - start)
|
||||
if (i_dst >= end.x || j_dst >= end.y || k_dst >= end.z)
|
||||
return;
|
||||
|
||||
// If destination index is inside the computational domain, return since
|
||||
// the boundary conditions are only applied to the ghost zones
|
||||
if (i_dst >= DCONST(AC_nx_min) && i_dst < DCONST(AC_nx_max) && j_dst >= DCONST(AC_ny_min) &&
|
||||
j_dst < DCONST(AC_ny_max) && k_dst >= DCONST(AC_nz_min) && k_dst < DCONST(AC_nz_max))
|
||||
return;
|
||||
|
||||
// Find the source index
|
||||
// Map to nx, ny, nz coordinates
|
||||
int i_src, j_src, k_src, boundlocx0, boundlocx1, boundlocy0, boundlocy1, boundlocz0, boundlocz1;
|
||||
int bsize = STENCIL_ORDER/(int) 2;
|
||||
|
||||
//if (bindex.x != 0)
|
||||
//if (bindex.y != 0)
|
||||
//if (bindex.z != 0)
|
||||
|
||||
//Location of central border point.
|
||||
boundlocx0 = bsize;
|
||||
boundlocy0 = bsize;
|
||||
boundlocz0 = bsize;
|
||||
boundlocx1 = DCONST(AC_nx_max) - 1;
|
||||
boundlocy1 = DCONST(AC_ny_max) - 1;
|
||||
boundlocz1 = DCONST(AC_nz_max) - 1;
|
||||
|
||||
//Defaults
|
||||
i_src = i_dst;
|
||||
j_src = j_dst;
|
||||
k_src = k_dst;
|
||||
|
||||
if (bindex.x < 0)
|
||||
{
|
||||
|
||||
// Pick up the mirroring value.
|
||||
if ((i_dst < boundlocx0))
|
||||
{
|
||||
i_src = 2.0f*boundlocx0 - i_dst;
|
||||
|
||||
} else if ((i_dst > boundlocx1))
|
||||
{
|
||||
i_src = 2.0f*boundlocx1 - i_dst;
|
||||
}
|
||||
|
||||
// Pick up the mirroring value.
|
||||
if ((j_dst < boundlocy0))
|
||||
{
|
||||
j_src = 2.0f*boundlocy0 - j_dst;
|
||||
} else if ((j_dst > boundlocx1))
|
||||
{
|
||||
j_src = 2.0f*boundlocy1 - j_dst;
|
||||
}
|
||||
|
||||
// Pick up the mirroring value.
|
||||
if ((k_dst < boundlocz0))
|
||||
{
|
||||
k_src = 2.0f*boundlocz0 - k_dst;
|
||||
} else if ((k_dst > boundlocz1))
|
||||
{
|
||||
k_src = 2.0f*boundlocz1 - k_dst;
|
||||
}
|
||||
|
||||
//Edges
|
||||
if ( (i_dst < boundlocx0) && (j_dst < boundlocy0) )
|
||||
{
|
||||
i_src = 2.0f*boundlocx0 - i_dst;
|
||||
j_src = 2.0f*boundlocy0 - j_dst;
|
||||
//if ((k_dst == 50)) printf("i_dst %i j_dst %i k_dst %i i_src %i j_src %i k_src %i bsize %i \n", i_dst, j_dst, k_dst, i_src, j_src, k_src, bsize);
|
||||
} else if ((i_dst < boundlocx0) && (k_dst < boundlocz0) )
|
||||
{
|
||||
i_src = 2.0f*boundlocx0 - i_dst;
|
||||
k_src = 2.0f*boundlocz0 - k_dst;
|
||||
} else if ( (j_dst < boundlocy0) && (k_dst < boundlocz0) )
|
||||
{
|
||||
j_src = 2.0f*boundlocy0 - j_dst;
|
||||
k_src = 2.0f*boundlocz0 - k_dst;
|
||||
|
||||
} else if ((i_dst > boundlocx1) && (j_dst > boundlocx1) )
|
||||
{
|
||||
i_src = 2.0f*boundlocx1 - i_dst;
|
||||
j_src = 2.0f*boundlocy1 - j_dst;
|
||||
} else if ( (i_dst > boundlocx1) && (k_dst > boundlocz1) )
|
||||
{
|
||||
i_src = 2.0f*boundlocx1 - i_dst;
|
||||
k_src = 2.0f*boundlocz1 - k_dst;
|
||||
} else if ( (j_dst > boundlocy1) && (k_dst > boundlocz1) )
|
||||
{
|
||||
j_src = 2.0f*boundlocy1 - j_dst;
|
||||
k_src = 2.0f*boundlocz1 - k_dst;
|
||||
} else if ( (i_dst > boundlocx1) && (k_dst < boundlocz0) )
|
||||
{
|
||||
i_src = 2.0f*boundlocx1 - i_dst;
|
||||
k_src = 2.0f*boundlocz0 - k_dst;
|
||||
} else if ( (i_dst > boundlocx1) && (j_dst < bsize) )
|
||||
{
|
||||
i_src = 2.0f*boundlocx1 - i_dst;
|
||||
j_src = 2.0f*boundlocy0 - j_dst;
|
||||
} else if ( (i_dst < boundlocx0) && (k_dst > boundlocz1) )
|
||||
{
|
||||
i_src = 2.0f*boundlocx0 - i_dst;
|
||||
k_src = 2.0f*boundlocz1 - k_dst;
|
||||
} else if ( (i_dst < boundlocx0) && (j_dst > boundlocy1) )
|
||||
{
|
||||
i_src = 2.0f*boundlocx0 - i_dst;
|
||||
j_src = 2.0f*boundlocy1 - j_dst;
|
||||
} else if ( (j_dst > boundlocy1) && (k_dst < boundlocz0) )
|
||||
{
|
||||
j_src = 2.0f*boundlocy1 - j_dst;
|
||||
k_src = 2.0f*boundlocz0 - k_dst;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const int src_idx = DEVICE_VTXBUF_IDX(i_src, j_src, k_src);
|
||||
const int dst_idx = DEVICE_VTXBUF_IDX(i_dst, j_dst, k_dst);
|
||||
vtxbuf[dst_idx] = sign*vtxbuf[src_idx] *0.0 + 1.0; // sign = 1 symmetric, sign = -1 antisymmetric
|
||||
}
|
||||
|
||||
|
||||
static __global__ void
|
||||
kernel_periodic_boundconds(const int3 start, const int3 end, AcReal* vtxbuf)
|
||||
{
|
||||
@@ -60,3 +188,55 @@ acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, const in
|
||||
ERRCHK_CUDA_KERNEL();
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end,
|
||||
AcReal* vtxbuf, const VertexBufferHandle vtxbuf_handle,
|
||||
const AcMeshInfo config, const int3 bindex)
|
||||
{
|
||||
const dim3 tpb(8, 2, 8);
|
||||
const dim3 bpg((unsigned int)ceil((end.x - start.x) / (float)tpb.x),
|
||||
(unsigned int)ceil((end.y - start.y) / (float)tpb.y),
|
||||
(unsigned int)ceil((end.z - start.z) / (float)tpb.z));
|
||||
|
||||
int3 bc_top = {config.int_params[AC_bc_type_top_x], config.int_params[AC_bc_type_top_y],
|
||||
config.int_params[AC_bc_type_top_z]};
|
||||
int3 bc_bot = {config.int_params[AC_bc_type_bot_x], config.int_params[AC_bc_type_bot_y],
|
||||
config.int_params[AC_bc_type_bot_z]};
|
||||
|
||||
//#if AC_MPI_ENABLED
|
||||
// printf( "WARNING : NON-PERIODIC BOUNDARY CONDITIONS NOT SUPPORTER BY MPI! Only working at node level.\n");
|
||||
// return AC_FAILURE;
|
||||
//#endif
|
||||
|
||||
if ( vtxbuf_handle != -1) // This is a dummy to make swithing boundary condition with respect to more possible later
|
||||
{
|
||||
|
||||
if (bc_top.x == AC_BOUNDCOND_SYMMETRIC)
|
||||
{
|
||||
kernel_symmetric_boundconds<<<bpg, tpb, 0, stream>>>(start, end, vtxbuf, bindex, 1);
|
||||
ERRCHK_CUDA_KERNEL();
|
||||
}
|
||||
else if (bc_top.x == AC_BOUNDCOND_ANTISYMMETRIC)
|
||||
{
|
||||
kernel_symmetric_boundconds<<<bpg, tpb, 0, stream>>>(start, end, vtxbuf, bindex, -1);
|
||||
ERRCHK_CUDA_KERNEL();
|
||||
}
|
||||
else if (bc_top.x == AC_BOUNDCOND_PERIODIC)
|
||||
{
|
||||
kernel_periodic_boundconds<<<bpg, tpb, 0, stream>>>(start, end, vtxbuf);
|
||||
ERRCHK_CUDA_KERNEL();
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("ERROR: Boundary condition not recognized!\n");
|
||||
printf("ERROR: bc_top = %i, %i, %i \n", bc_top.x, bc_top.y, bc_top.z);
|
||||
printf("ERROR: bc_bot = %i, %i, %i \n", bc_bot.x, bc_bot.y, bc_bot.z);
|
||||
|
||||
return AC_FAILURE;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
@@ -45,6 +45,11 @@ extern "C" {
|
||||
/** */
|
||||
AcResult acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, const int3 end,
|
||||
AcReal* vtxbuf);
|
||||
/** */
|
||||
AcResult acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end,
|
||||
AcReal* vtxbuf, const VertexBufferHandle vtxbuf_handle,
|
||||
const AcMeshInfo config, const int3 bindex);
|
||||
|
||||
|
||||
/** */
|
||||
AcResult acKernelDummy(void);
|
||||
|
127
src/core/node.cc
127
src/core/node.cc
@@ -656,6 +656,30 @@ local_boundcondstep(const Node node, const Stream stream, const VertexBufferHand
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
static AcResult
|
||||
local_boundcondstep_GBC(const Node node, const Stream stream, const VertexBufferHandle vtxbuf, const AcMeshInfo config)
|
||||
{
|
||||
acNodeSynchronizeStream(node, stream);
|
||||
|
||||
int3 bindex = {-1, -1, -1}; //Dummy for node level. Relevant only for MPI.
|
||||
|
||||
if (node->num_devices > 1) {
|
||||
// Local boundary conditions
|
||||
// #pragma omp parallel for
|
||||
for (int i = 0; i < node->num_devices; ++i) {
|
||||
const int3 d0 = (int3){0, 0, NGHOST}; // DECOMPOSITION OFFSET HERE
|
||||
const int3 d1 = (int3){node->subgrid.m.x, node->subgrid.m.y, d0.z + node->subgrid.n.z};
|
||||
acDeviceGeneralBoundcondStep(node->devices[i], stream, vtxbuf, d0, d1, config, bindex);
|
||||
}
|
||||
}
|
||||
else {
|
||||
acDeviceGeneralBoundcondStep(node->devices[0], stream, vtxbuf, (int3){0, 0, 0},
|
||||
node->subgrid.m, config, bindex);
|
||||
}
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static AcResult
|
||||
global_boundcondstep(const Node node, const Stream stream, const VertexBufferHandle vtxbuf_handle)
|
||||
{
|
||||
@@ -768,6 +792,85 @@ acNodeIntegrate(const Node node, const AcReal dt)
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acNodeIntegrateGBC(const Node node, const AcMeshInfo config, const AcReal dt)
|
||||
{
|
||||
acNodeSynchronizeStream(node, STREAM_ALL);
|
||||
// xxx|OOO OOOOOOOOO OOO|xxx
|
||||
// ^ ^ ^ ^
|
||||
// n0 n1 n2 n3
|
||||
// const int3 n0 = (int3){NGHOST, NGHOST, NGHOST};
|
||||
// const int3 n1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
|
||||
// const int3 n2 = node->grid.n;
|
||||
// const int3 n3 = n0 + node->grid.n;
|
||||
|
||||
for (int isubstep = 0; isubstep < 3; ++isubstep) {
|
||||
acNodeSynchronizeStream(node, STREAM_ALL);
|
||||
for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) {
|
||||
local_boundcondstep_GBC(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf, config);
|
||||
}
|
||||
acNodeSynchronizeStream(node, STREAM_ALL);
|
||||
|
||||
// Inner inner
|
||||
// #pragma omp parallel for
|
||||
for (int i = 0; i < node->num_devices; ++i) {
|
||||
const int3 m1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
|
||||
const int3 m2 = node->subgrid.n;
|
||||
acDeviceIntegrateSubstep(node->devices[i], STREAM_16, isubstep, m1, m2, dt);
|
||||
}
|
||||
|
||||
for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) {
|
||||
acNodeSynchronizeVertexBuffer(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf);
|
||||
global_boundcondstep(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf);
|
||||
}
|
||||
for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) {
|
||||
acNodeSynchronizeStream(node, (Stream)vtxbuf);
|
||||
}
|
||||
|
||||
// #pragma omp parallel for
|
||||
for (int i = 0; i < node->num_devices; ++i) { // Front
|
||||
const int3 m1 = (int3){NGHOST, NGHOST, NGHOST};
|
||||
const int3 m2 = m1 + (int3){node->subgrid.n.x, node->subgrid.n.y, NGHOST};
|
||||
acDeviceIntegrateSubstep(node->devices[i], STREAM_0, isubstep, m1, m2, dt);
|
||||
}
|
||||
// #pragma omp parallel for
|
||||
for (int i = 0; i < node->num_devices; ++i) { // Back
|
||||
const int3 m1 = (int3){NGHOST, NGHOST, node->subgrid.n.z};
|
||||
const int3 m2 = m1 + (int3){node->subgrid.n.x, node->subgrid.n.y, NGHOST};
|
||||
acDeviceIntegrateSubstep(node->devices[i], STREAM_1, isubstep, m1, m2, dt);
|
||||
}
|
||||
// #pragma omp parallel for
|
||||
for (int i = 0; i < node->num_devices; ++i) { // Bottom
|
||||
const int3 m1 = (int3){NGHOST, NGHOST, 2 * NGHOST};
|
||||
const int3 m2 = m1 + (int3){node->subgrid.n.x, NGHOST, node->subgrid.n.z - 2 * NGHOST};
|
||||
acDeviceIntegrateSubstep(node->devices[i], STREAM_2, isubstep, m1, m2, dt);
|
||||
}
|
||||
// #pragma omp parallel for
|
||||
for (int i = 0; i < node->num_devices; ++i) { // Top
|
||||
const int3 m1 = (int3){NGHOST, node->subgrid.n.y, 2 * NGHOST};
|
||||
const int3 m2 = m1 + (int3){node->subgrid.n.x, NGHOST, node->subgrid.n.z - 2 * NGHOST};
|
||||
acDeviceIntegrateSubstep(node->devices[i], STREAM_3, isubstep, m1, m2, dt);
|
||||
}
|
||||
// #pragma omp parallel for
|
||||
for (int i = 0; i < node->num_devices; ++i) { // Left
|
||||
const int3 m1 = (int3){NGHOST, 2 * NGHOST, 2 * NGHOST};
|
||||
const int3 m2 = m1 + (int3){NGHOST, node->subgrid.n.y - 2 * NGHOST,
|
||||
node->subgrid.n.z - 2 * NGHOST};
|
||||
acDeviceIntegrateSubstep(node->devices[i], STREAM_4, isubstep, m1, m2, dt);
|
||||
}
|
||||
// #pragma omp parallel for
|
||||
for (int i = 0; i < node->num_devices; ++i) { // Right
|
||||
const int3 m1 = (int3){node->subgrid.n.x, 2 * NGHOST, 2 * NGHOST};
|
||||
const int3 m2 = m1 + (int3){NGHOST, node->subgrid.n.y - 2 * NGHOST,
|
||||
node->subgrid.n.z - 2 * NGHOST};
|
||||
acDeviceIntegrateSubstep(node->devices[i], STREAM_5, isubstep, m1, m2, dt);
|
||||
}
|
||||
acNodeSwapBuffers(node);
|
||||
}
|
||||
acNodeSynchronizeStream(node, STREAM_ALL);
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acNodePeriodicBoundcondStep(const Node node, const Stream stream,
|
||||
const VertexBufferHandle vtxbuf_handle)
|
||||
@@ -783,7 +886,20 @@ acNodePeriodicBoundcondStep(const Node node, const Stream stream,
|
||||
}
|
||||
|
||||
AcResult
|
||||
acNodePeriodicBoundconds(const Node node, const Stream stream)
|
||||
acNodeGeneralBoundcondStep(const Node node, const Stream stream,
|
||||
const VertexBufferHandle vtxbuf_handle, const AcMeshInfo config)
|
||||
{
|
||||
local_boundcondstep_GBC(node, stream, vtxbuf_handle, config);
|
||||
acNodeSynchronizeVertexBuffer(node, stream, vtxbuf_handle);
|
||||
|
||||
global_boundcondstep(node, stream, vtxbuf_handle);
|
||||
|
||||
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acNodePeriodicBoundconds(const Node node, const Stream stream)
|
||||
{
|
||||
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
||||
acNodePeriodicBoundcondStep(node, stream, (VertexBufferHandle)i);
|
||||
@@ -791,6 +907,15 @@ acNodePeriodicBoundconds(const Node node, const Stream stream)
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acNodeGeneralBoundconds(const Node node, const Stream stream, const AcMeshInfo config)
|
||||
{
|
||||
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
||||
acNodeGeneralBoundcondStep(node, stream, (VertexBufferHandle)i, config);
|
||||
}
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
static AcReal
|
||||
simple_final_reduce_scal(const Node node, const ReductionType& rtype, const AcReal* results,
|
||||
const int& n)
|
||||
|
Reference in New Issue
Block a user