@@ -20,9 +20,15 @@
|
|||||||
uniform int AC_max_steps;
|
uniform int AC_max_steps;
|
||||||
uniform int AC_save_steps;
|
uniform int AC_save_steps;
|
||||||
uniform int AC_bin_steps;
|
uniform int AC_bin_steps;
|
||||||
uniform int AC_bc_type;
|
|
||||||
uniform int AC_start_step;
|
uniform int AC_start_step;
|
||||||
|
|
||||||
|
uniform int AC_bc_type_top_x;
|
||||||
|
uniform int AC_bc_type_bot_x;
|
||||||
|
uniform int AC_bc_type_top_y;
|
||||||
|
uniform int AC_bc_type_bot_y;
|
||||||
|
uniform int AC_bc_type_top_z;
|
||||||
|
uniform int AC_bc_type_bot_z;
|
||||||
|
|
||||||
// Real params
|
// Real params
|
||||||
uniform Scalar AC_dt;
|
uniform Scalar AC_dt;
|
||||||
uniform Scalar AC_max_time;
|
uniform Scalar AC_max_time;
|
||||||
|
@@ -13,6 +13,15 @@ AC_dsx = 0.04908738521
|
|||||||
AC_dsy = 0.04908738521
|
AC_dsy = 0.04908738521
|
||||||
AC_dsz = 0.04908738521
|
AC_dsz = 0.04908738521
|
||||||
|
|
||||||
|
// 0 = periodic bc, 1 = symmetric bc, 2 = antisymmetric bc
|
||||||
|
AC_bc_type_top_x = 0
|
||||||
|
AC_bc_type_top_y = 0
|
||||||
|
AC_bc_type_top_z = 0
|
||||||
|
AC_bc_type_bot_x = 0
|
||||||
|
AC_bc_type_bot_y = 0
|
||||||
|
AC_bc_type_bot_z = 0
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* =============================================================================
|
* =============================================================================
|
||||||
* Run-time params
|
* Run-time params
|
||||||
|
@@ -49,6 +49,11 @@ typedef struct {
|
|||||||
|
|
||||||
typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult;
|
typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult;
|
||||||
|
|
||||||
|
// Neming the associated number of the boundary condition types
|
||||||
|
typedef enum { AC_BOUNDCOND_PERIODIC = 0,
|
||||||
|
AC_BOUNDCOND_SYMMETRIC = 1,
|
||||||
|
AC_BOUNDCOND_ANTISYMMETRIC = 2 } AcBoundcond;
|
||||||
|
|
||||||
#define AC_GEN_ID(X) X,
|
#define AC_GEN_ID(X) X,
|
||||||
typedef enum {
|
typedef enum {
|
||||||
AC_FOR_RTYPES(AC_GEN_ID) //
|
AC_FOR_RTYPES(AC_GEN_ID) //
|
||||||
@@ -227,10 +232,19 @@ AcResult acStore(AcMesh* host_mesh);
|
|||||||
* substep and the user is responsible for calling acBoundcondStep before reading the data. */
|
* substep and the user is responsible for calling acBoundcondStep before reading the data. */
|
||||||
AcResult acIntegrate(const AcReal dt);
|
AcResult acIntegrate(const AcReal dt);
|
||||||
|
|
||||||
|
/** Performs Runge-Kutta 3 integration. Note: Boundary conditions are not applied after the final
|
||||||
|
* substep and the user is responsible for calling acBoundcondStep before reading the data.
|
||||||
|
* Has customizable boundary conditions. */
|
||||||
|
AcResult acIntegrateGBC(const AcMeshInfo config, const AcReal dt);
|
||||||
|
|
||||||
/** Applies periodic boundary conditions for the Mesh distributed among the devices visible to
|
/** Applies periodic boundary conditions for the Mesh distributed among the devices visible to
|
||||||
* the caller*/
|
* the caller*/
|
||||||
AcResult acBoundcondStep(void);
|
AcResult acBoundcondStep(void);
|
||||||
|
|
||||||
|
/** Applies general outer boundary conditions for the Mesh distributed among the devices visible to
|
||||||
|
* the caller*/
|
||||||
|
AcResult acBoundcondStepGBC(const AcMeshInfo config);
|
||||||
|
|
||||||
/** Does a scalar reduction with the data stored in some vertex buffer */
|
/** Does a scalar reduction with the data stored in some vertex buffer */
|
||||||
AcReal acReduceScal(const ReductionType rtype, const VertexBufferHandle vtxbuf_handle);
|
AcReal acReduceScal(const ReductionType rtype, const VertexBufferHandle vtxbuf_handle);
|
||||||
|
|
||||||
@@ -306,9 +320,21 @@ AcResult acGridStoreMesh(const Stream stream, AcMesh* host_mesh);
|
|||||||
/** */
|
/** */
|
||||||
AcResult acGridIntegrate(const Stream stream, const AcReal dt);
|
AcResult acGridIntegrate(const Stream stream, const AcReal dt);
|
||||||
|
|
||||||
|
/** */
|
||||||
|
/* MV: Commented out for a while, but save for the future when standalone_MPI
|
||||||
|
works with periodic boundary conditions.
|
||||||
|
AcResult
|
||||||
|
acGridIntegrateNonperiodic(const Stream stream, const AcReal dt)
|
||||||
|
|
||||||
|
AcResult acGridIntegrateNonperiodic(const Stream stream, const AcReal dt);
|
||||||
|
*/
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
AcResult acGridPeriodicBoundconds(const Stream stream);
|
AcResult acGridPeriodicBoundconds(const Stream stream);
|
||||||
|
|
||||||
|
/** */
|
||||||
|
AcResult acGridGeneralBoundconds(const Device device, const Stream stream);
|
||||||
|
|
||||||
/** TODO */
|
/** TODO */
|
||||||
AcResult acGridReduceScal(const Stream stream, const ReductionType rtype,
|
AcResult acGridReduceScal(const Stream stream, const ReductionType rtype,
|
||||||
const VertexBufferHandle vtxbuf_handle, AcReal* result);
|
const VertexBufferHandle vtxbuf_handle, AcReal* result);
|
||||||
@@ -430,6 +456,9 @@ AcResult acNodeIntegrateSubstep(const Node node, const Stream stream, const int
|
|||||||
/** */
|
/** */
|
||||||
AcResult acNodeIntegrate(const Node node, const AcReal dt);
|
AcResult acNodeIntegrate(const Node node, const AcReal dt);
|
||||||
|
|
||||||
|
/** */
|
||||||
|
AcResult acNodeIntegrateGBC(const Node node, const AcMeshInfo config, const AcReal dt);
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
AcResult acNodePeriodicBoundcondStep(const Node node, const Stream stream,
|
AcResult acNodePeriodicBoundcondStep(const Node node, const Stream stream,
|
||||||
const VertexBufferHandle vtxbuf_handle);
|
const VertexBufferHandle vtxbuf_handle);
|
||||||
@@ -437,6 +466,13 @@ AcResult acNodePeriodicBoundcondStep(const Node node, const Stream stream,
|
|||||||
/** */
|
/** */
|
||||||
AcResult acNodePeriodicBoundconds(const Node node, const Stream stream);
|
AcResult acNodePeriodicBoundconds(const Node node, const Stream stream);
|
||||||
|
|
||||||
|
/** */
|
||||||
|
AcResult acNodeGeneralBoundcondStep(const Node node, const Stream stream,
|
||||||
|
const VertexBufferHandle vtxbuf_handle, const AcMeshInfo config);
|
||||||
|
|
||||||
|
/** */
|
||||||
|
AcResult acNodeGeneralBoundconds(const Node node, const Stream stream, const AcMeshInfo config);
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
AcResult acNodeReduceScal(const Node node, const Stream stream, const ReductionType rtype,
|
AcResult acNodeReduceScal(const Node node, const Stream stream, const ReductionType rtype,
|
||||||
const VertexBufferHandle vtxbuf_handle, AcReal* result);
|
const VertexBufferHandle vtxbuf_handle, AcReal* result);
|
||||||
@@ -565,6 +601,16 @@ AcResult acDevicePeriodicBoundcondStep(const Device device, const Stream stream,
|
|||||||
AcResult acDevicePeriodicBoundconds(const Device device, const Stream stream, const int3 start,
|
AcResult acDevicePeriodicBoundconds(const Device device, const Stream stream, const int3 start,
|
||||||
const int3 end);
|
const int3 end);
|
||||||
|
|
||||||
|
/** */
|
||||||
|
AcResult acDeviceGeneralBoundcondStep(const Device device, const Stream stream,
|
||||||
|
const VertexBufferHandle vtxbuf_handle, const int3 start,
|
||||||
|
const int3 end, const AcMeshInfo config, const int3 bindex);
|
||||||
|
|
||||||
|
/** */
|
||||||
|
AcResult acDeviceGeneralBoundconds(const Device device, const Stream stream, const int3 start,
|
||||||
|
const int3 end, const AcMeshInfo config, const int3 bindex);
|
||||||
|
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
AcResult acDeviceReduceScal(const Device device, const Stream stream, const ReductionType rtype,
|
AcResult acDeviceReduceScal(const Device device, const Stream stream, const ReductionType rtype,
|
||||||
const VertexBufferHandle vtxbuf_handle, AcReal* result);
|
const VertexBufferHandle vtxbuf_handle, AcReal* result);
|
||||||
|
@@ -374,7 +374,8 @@ run_simulation(const char* config_path)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
acBoundcondStep();
|
//acBoundcondStep();
|
||||||
|
acBoundcondStepGBC(mesh_info);
|
||||||
acStore(mesh);
|
acStore(mesh);
|
||||||
if (start_step == 0) {
|
if (start_step == 0) {
|
||||||
save_mesh(*mesh, 0, t_step);
|
save_mesh(*mesh, 0, t_step);
|
||||||
@@ -440,7 +441,11 @@ run_simulation(const char* config_path)
|
|||||||
loadForcingParamsToDevice(forcing_params);
|
loadForcingParamsToDevice(forcing_params);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
acIntegrate(dt);
|
/* Uses now flexible bokundary conditions */
|
||||||
|
//acIntegrate(dt);
|
||||||
|
acIntegrateGBC(mesh_info, dt);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
t_step += dt;
|
t_step += dt;
|
||||||
|
|
||||||
@@ -486,7 +491,8 @@ run_simulation(const char* config_path)
|
|||||||
acBoundcondStep();
|
acBoundcondStep();
|
||||||
acStore(mesh);
|
acStore(mesh);
|
||||||
*/
|
*/
|
||||||
acBoundcondStep();
|
//acBoundcondStep();
|
||||||
|
acBoundcondStepGBC(mesh_info);
|
||||||
acStore(mesh);
|
acStore(mesh);
|
||||||
|
|
||||||
save_mesh(*mesh, i, t_step);
|
save_mesh(*mesh, i, t_step);
|
||||||
@@ -507,7 +513,8 @@ run_simulation(const char* config_path)
|
|||||||
if (dt < dt_typical/AcReal(1e5)) {
|
if (dt < dt_typical/AcReal(1e5)) {
|
||||||
if (dtcounter > 10) {
|
if (dtcounter > 10) {
|
||||||
printf("dt = %e TOO LOW! Ending run at t = %#e \n", double(dt), double(t_step));
|
printf("dt = %e TOO LOW! Ending run at t = %#e \n", double(dt), double(t_step));
|
||||||
acBoundcondStep();
|
//acBoundcondStep();
|
||||||
|
acBoundcondStepGBC(mesh_info);
|
||||||
acStore(mesh);
|
acStore(mesh);
|
||||||
save_mesh(*mesh, i, t_step);
|
save_mesh(*mesh, i, t_step);
|
||||||
break;
|
break;
|
||||||
@@ -521,7 +528,8 @@ run_simulation(const char* config_path)
|
|||||||
// End loop if nan is found
|
// End loop if nan is found
|
||||||
if (found_nan > 0) {
|
if (found_nan > 0) {
|
||||||
printf("Found nan at t = %e \n", double(t_step));
|
printf("Found nan at t = %e \n", double(t_step));
|
||||||
acBoundcondStep();
|
//acBoundcondStep();
|
||||||
|
acBoundcondStepGBC(mesh_info);
|
||||||
acStore(mesh);
|
acStore(mesh);
|
||||||
save_mesh(*mesh, i, t_step);
|
save_mesh(*mesh, i, t_step);
|
||||||
break;
|
break;
|
||||||
@@ -536,7 +544,8 @@ run_simulation(const char* config_path)
|
|||||||
|
|
||||||
if (found_stop == 1) {
|
if (found_stop == 1) {
|
||||||
printf("Found STOP file at t = %e \n", double(t_step));
|
printf("Found STOP file at t = %e \n", double(t_step));
|
||||||
acBoundcondStep();
|
//acBoundcondStep();
|
||||||
|
acBoundcondStepGBC(mesh_info);
|
||||||
acStore(mesh);
|
acStore(mesh);
|
||||||
save_mesh(*mesh, i, t_step);
|
save_mesh(*mesh, i, t_step);
|
||||||
break;
|
break;
|
||||||
|
@@ -86,6 +86,13 @@ acIntegrate(const AcReal dt)
|
|||||||
return acNodeIntegrate(nodes[0], dt);
|
return acNodeIntegrate(nodes[0], dt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AcResult
|
||||||
|
acIntegrateGBC(const AcMeshInfo config, const AcReal dt)
|
||||||
|
{
|
||||||
|
return acNodeIntegrateGBC(nodes[0], config, dt);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acIntegrateStep(const int isubstep, const AcReal dt)
|
acIntegrateStep(const int isubstep, const AcReal dt)
|
||||||
{
|
{
|
||||||
@@ -109,6 +116,12 @@ acBoundcondStep(void)
|
|||||||
return acNodePeriodicBoundconds(nodes[0], STREAM_DEFAULT);
|
return acNodePeriodicBoundconds(nodes[0], STREAM_DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AcResult
|
||||||
|
acBoundcondStepGBC(const AcMeshInfo config)
|
||||||
|
{
|
||||||
|
return acNodeGeneralBoundconds(nodes[0], STREAM_DEFAULT, config);
|
||||||
|
}
|
||||||
|
|
||||||
AcReal
|
AcReal
|
||||||
acReduceScal(const ReductionType rtype, const VertexBufferHandle vtxbuf_handle)
|
acReduceScal(const ReductionType rtype, const VertexBufferHandle vtxbuf_handle)
|
||||||
{
|
{
|
||||||
|
@@ -433,6 +433,27 @@ acDevicePeriodicBoundconds(const Device device, const Stream stream, const int3
|
|||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AcResult
|
||||||
|
acDeviceGeneralBoundcondStep(const Device device, const Stream stream,
|
||||||
|
const VertexBufferHandle vtxbuf_handle, const int3 start,
|
||||||
|
const int3 end, const AcMeshInfo config, const int3 bindex)
|
||||||
|
{
|
||||||
|
cudaSetDevice(device->id);
|
||||||
|
return acKernelGeneralBoundconds(device->streams[stream], start, end,
|
||||||
|
device->vba.in[vtxbuf_handle], vtxbuf_handle, config, bindex);
|
||||||
|
}
|
||||||
|
|
||||||
|
AcResult
|
||||||
|
acDeviceGeneralBoundconds(const Device device, const Stream stream, const int3 start,
|
||||||
|
const int3 end, const AcMeshInfo config, const int3 bindex)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
||||||
|
acDeviceGeneralBoundcondStep(device, stream, (VertexBufferHandle)i, start, end, config, bindex);
|
||||||
|
}
|
||||||
|
return AC_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acDeviceReduceScal(const Device device, const Stream stream, const ReductionType rtype,
|
acDeviceReduceScal(const Device device, const Stream stream, const ReductionType rtype,
|
||||||
const VertexBufferHandle vtxbuf_handle, AcReal* result)
|
const VertexBufferHandle vtxbuf_handle, AcReal* result)
|
||||||
@@ -1433,6 +1454,50 @@ acGridStoreMesh(const Stream stream, AcMesh* host_mesh)
|
|||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* MV: Commented out for a while, but save for the future when standalone_MPI
|
||||||
|
works with periodic boundary conditions.
|
||||||
|
AcResult
|
||||||
|
acGridGeneralBoundconds(const Device device, const Stream stream)
|
||||||
|
{
|
||||||
|
// Non-periodic Boundary conditions
|
||||||
|
// Check the position in MPI frame
|
||||||
|
int nprocs, pid;
|
||||||
|
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
|
||||||
|
const uint3_64 decomposition = decompose(nprocs);
|
||||||
|
const int3 pid3d = getPid3D(pid, decomposition);
|
||||||
|
|
||||||
|
// Set outer boudaries after substep computation.
|
||||||
|
const int3 m1 = (int3){0, 0, 0};
|
||||||
|
const int3 m2 = grid.nn;
|
||||||
|
const int3 pid3d = getPid3D(pid, decomposition);
|
||||||
|
// If we are are a boundary element
|
||||||
|
int3 bindex = (int3){0, 0, 0};
|
||||||
|
|
||||||
|
// Check if there are active boundary condition edges.
|
||||||
|
// 0 is no boundary, 1 both edges, 2 is top edge, 3 bottom edge
|
||||||
|
if ((pid3d.x == 0) && (pid3d.x == decomposition.x - 1)) { bindex.x = 1; }
|
||||||
|
else if (pid3d.x == 0) { bindex.x = 2; }
|
||||||
|
else if (pid3d.x == decomposition.x - 1) { bindex.x = 3; }
|
||||||
|
|
||||||
|
if ((pid3d.y == 0) && (pid3d.y == decomposition.y - 1)) { bindex.y = 1; }
|
||||||
|
else if (pid3d.y == 0) { bindex.y = 2; }
|
||||||
|
else if (pid3d.y == decomposition.y - 1) { bindex.y = 3; }
|
||||||
|
|
||||||
|
if ((pid3d.z == 0) && (pid3d.z == decomposition.z - 1)) { bindex.z = 1; }
|
||||||
|
else if (pid3d.z == 0) { bindex.z = 2; }
|
||||||
|
else if (pid3d.z == decomposition.z - 1) { bindex.z = 3; }
|
||||||
|
|
||||||
|
|
||||||
|
if (bindex.x != 1) && (bindex.y != 1) && (bindex.z != 1) {
|
||||||
|
acDeviceGeneralBoundconds(device, stream, m1, m2, bindex);
|
||||||
|
}
|
||||||
|
acGridSynchronizeStream(stream);
|
||||||
|
|
||||||
|
return AC_SUCCESS;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// Unused
|
// Unused
|
||||||
AcResult
|
AcResult
|
||||||
@@ -1864,6 +1929,234 @@ acGridIntegrate(const Stream stream, const AcReal dt)
|
|||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* MV: Commented out for a while, but save for the future when standalone_MPI
|
||||||
|
works with periodic boundary conditions.
|
||||||
|
AcResult
|
||||||
|
acGridIntegrateNonperiodic(const Stream stream, const AcReal dt)
|
||||||
|
{
|
||||||
|
ERRCHK(grid.initialized);
|
||||||
|
acGridSynchronizeStream(stream);
|
||||||
|
|
||||||
|
const Device device = grid.device;
|
||||||
|
const int3 nn = grid.nn;
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
CommData corner_data = grid.corner_data; // Do not rm: required for corners
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
CommData edgex_data = grid.edgex_data;
|
||||||
|
CommData edgey_data = grid.edgey_data;
|
||||||
|
CommData edgez_data = grid.edgez_data;
|
||||||
|
CommData sidexy_data = grid.sidexy_data;
|
||||||
|
CommData sidexz_data = grid.sidexz_data;
|
||||||
|
CommData sideyz_data = grid.sideyz_data;
|
||||||
|
|
||||||
|
acGridLoadScalarUniform(stream, AC_dt, dt);
|
||||||
|
acDeviceSynchronizeStream(device, stream);
|
||||||
|
|
||||||
|
|
||||||
|
// Corners
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
// Do not rm: required for corners
|
||||||
|
const int3 corner_b0s[] = {
|
||||||
|
(int3){0, 0, 0},
|
||||||
|
(int3){NGHOST + nn.x, 0, 0},
|
||||||
|
(int3){0, NGHOST + nn.y, 0},
|
||||||
|
(int3){0, 0, NGHOST + nn.z},
|
||||||
|
|
||||||
|
(int3){NGHOST + nn.x, NGHOST + nn.y, 0},
|
||||||
|
(int3){NGHOST + nn.x, 0, NGHOST + nn.z},
|
||||||
|
(int3){0, NGHOST + nn.y, NGHOST + nn.z},
|
||||||
|
(int3){NGHOST + nn.x, NGHOST + nn.y, NGHOST + nn.z},
|
||||||
|
};
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
|
||||||
|
// Edges X
|
||||||
|
const int3 edgex_b0s[] = {
|
||||||
|
(int3){NGHOST, 0, 0},
|
||||||
|
(int3){NGHOST, NGHOST + nn.y, 0},
|
||||||
|
|
||||||
|
(int3){NGHOST, 0, NGHOST + nn.z},
|
||||||
|
(int3){NGHOST, NGHOST + nn.y, NGHOST + nn.z},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Edges Y
|
||||||
|
const int3 edgey_b0s[] = {
|
||||||
|
(int3){0, NGHOST, 0},
|
||||||
|
(int3){NGHOST + nn.x, NGHOST, 0},
|
||||||
|
|
||||||
|
(int3){0, NGHOST, NGHOST + nn.z},
|
||||||
|
(int3){NGHOST + nn.x, NGHOST, NGHOST + nn.z},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Edges Z
|
||||||
|
const int3 edgez_b0s[] = {
|
||||||
|
(int3){0, 0, NGHOST},
|
||||||
|
(int3){NGHOST + nn.x, 0, NGHOST},
|
||||||
|
|
||||||
|
(int3){0, NGHOST + nn.y, NGHOST},
|
||||||
|
(int3){NGHOST + nn.x, NGHOST + nn.y, NGHOST},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Sides XY
|
||||||
|
const int3 sidexy_b0s[] = {
|
||||||
|
(int3){NGHOST, NGHOST, 0}, //
|
||||||
|
(int3){NGHOST, NGHOST, NGHOST + nn.z}, //
|
||||||
|
};
|
||||||
|
|
||||||
|
// Sides XZ
|
||||||
|
const int3 sidexz_b0s[] = {
|
||||||
|
(int3){NGHOST, 0, NGHOST}, //
|
||||||
|
(int3){NGHOST, NGHOST + nn.y, NGHOST}, //
|
||||||
|
};
|
||||||
|
|
||||||
|
// Sides YZ
|
||||||
|
const int3 sideyz_b0s[] = {
|
||||||
|
(int3){0, NGHOST, NGHOST}, //
|
||||||
|
(int3){NGHOST + nn.x, NGHOST, NGHOST}, //
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int isubstep = 0; isubstep < 3; ++isubstep) {
|
||||||
|
|
||||||
|
#if MPI_COMM_ENABLED
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
acPackCommData(device, corner_b0s, &corner_data); // Do not rm: required for corners
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
acPackCommData(device, edgex_b0s, &edgex_data);
|
||||||
|
acPackCommData(device, edgey_b0s, &edgey_data);
|
||||||
|
acPackCommData(device, edgez_b0s, &edgez_data);
|
||||||
|
acPackCommData(device, sidexy_b0s, &sidexy_data);
|
||||||
|
acPackCommData(device, sidexz_b0s, &sidexz_data);
|
||||||
|
acPackCommData(device, sideyz_b0s, &sideyz_data);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if MPI_COMM_ENABLED
|
||||||
|
MPI_Barrier(MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
#if MPI_GPUDIRECT_DISABLED
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
acTransferCommDataToHost(device, &corner_data); // Do not rm: required for corners
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
acTransferCommDataToHost(device, &edgex_data);
|
||||||
|
acTransferCommDataToHost(device, &edgey_data);
|
||||||
|
acTransferCommDataToHost(device, &edgez_data);
|
||||||
|
acTransferCommDataToHost(device, &sidexy_data);
|
||||||
|
acTransferCommDataToHost(device, &sidexz_data);
|
||||||
|
acTransferCommDataToHost(device, &sideyz_data);
|
||||||
|
#endif
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
acTransferCommData(device, corner_b0s, &corner_data); // Do not rm: required for corners
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
acTransferCommData(device, edgex_b0s, &edgex_data);
|
||||||
|
acTransferCommData(device, edgey_b0s, &edgey_data);
|
||||||
|
acTransferCommData(device, edgez_b0s, &edgez_data);
|
||||||
|
acTransferCommData(device, sidexy_b0s, &sidexy_data);
|
||||||
|
acTransferCommData(device, sidexz_b0s, &sidexz_data);
|
||||||
|
acTransferCommData(device, sideyz_b0s, &sideyz_data);
|
||||||
|
#endif // MPI_COMM_ENABLED
|
||||||
|
|
||||||
|
#if MPI_COMPUTE_ENABLED
|
||||||
|
//////////// INNER INTEGRATION //////////////
|
||||||
|
{
|
||||||
|
const int3 m1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
|
||||||
|
const int3 m2 = nn;
|
||||||
|
acKernelIntegrateSubstep(device->streams[STREAM_16], isubstep, m1, m2, device->vba);
|
||||||
|
}
|
||||||
|
////////////////////////////////////////////
|
||||||
|
#endif // MPI_COMPUTE_ENABLED
|
||||||
|
|
||||||
|
#if MPI_COMM_ENABLED
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
acTransferCommDataWait(corner_data); // Do not rm: required for corners
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
acTransferCommDataWait(edgex_data);
|
||||||
|
acTransferCommDataWait(edgey_data);
|
||||||
|
acTransferCommDataWait(edgez_data);
|
||||||
|
acTransferCommDataWait(sidexy_data);
|
||||||
|
acTransferCommDataWait(sidexz_data);
|
||||||
|
acTransferCommDataWait(sideyz_data);
|
||||||
|
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
acUnpinCommData(device, &corner_data); // Do not rm: required for corners
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
acUnpinCommData(device, &edgex_data);
|
||||||
|
acUnpinCommData(device, &edgey_data);
|
||||||
|
acUnpinCommData(device, &edgez_data);
|
||||||
|
acUnpinCommData(device, &sidexy_data);
|
||||||
|
acUnpinCommData(device, &sidexz_data);
|
||||||
|
acUnpinCommData(device, &sideyz_data);
|
||||||
|
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
acUnpackCommData(device, corner_b0s, &corner_data);
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
acUnpackCommData(device, edgex_b0s, &edgex_data);
|
||||||
|
acUnpackCommData(device, edgey_b0s, &edgey_data);
|
||||||
|
acUnpackCommData(device, edgez_b0s, &edgez_data);
|
||||||
|
acUnpackCommData(device, sidexy_b0s, &sidexy_data);
|
||||||
|
acUnpackCommData(device, sidexz_b0s, &sidexz_data);
|
||||||
|
acUnpackCommData(device, sideyz_b0s, &sideyz_data);
|
||||||
|
//////////// OUTER INTEGRATION //////////////
|
||||||
|
|
||||||
|
// Wait for unpacking
|
||||||
|
#if MPI_INCL_CORNERS
|
||||||
|
acSyncCommData(corner_data); // Do not rm: required for corners
|
||||||
|
#endif // MPI_INCL_CORNERS
|
||||||
|
acSyncCommData(edgex_data);
|
||||||
|
acSyncCommData(edgey_data);
|
||||||
|
acSyncCommData(edgez_data);
|
||||||
|
acSyncCommData(sidexy_data);
|
||||||
|
acSyncCommData(sidexz_data);
|
||||||
|
acSyncCommData(sideyz_data);
|
||||||
|
#endif // MPI_COMM_ENABLED
|
||||||
|
|
||||||
|
// Invoke outer edge boundary conditions.
|
||||||
|
acGridGeneralBoundconds(device, stream)
|
||||||
|
|
||||||
|
#if MPI_COMPUTE_ENABLED
|
||||||
|
{ // Front
|
||||||
|
const int3 m1 = (int3){NGHOST, NGHOST, NGHOST};
|
||||||
|
const int3 m2 = m1 + (int3){nn.x, nn.y, NGHOST};
|
||||||
|
acKernelIntegrateSubstep(device->streams[STREAM_0], isubstep, m1, m2, device->vba);
|
||||||
|
}
|
||||||
|
{ // Back
|
||||||
|
const int3 m1 = (int3){NGHOST, NGHOST, nn.z};
|
||||||
|
const int3 m2 = m1 + (int3){nn.x, nn.y, NGHOST};
|
||||||
|
acKernelIntegrateSubstep(device->streams[STREAM_1], isubstep, m1, m2, device->vba);
|
||||||
|
}
|
||||||
|
{ // Bottom
|
||||||
|
const int3 m1 = (int3){NGHOST, NGHOST, 2 * NGHOST};
|
||||||
|
const int3 m2 = m1 + (int3){nn.x, NGHOST, nn.z - 2 * NGHOST};
|
||||||
|
acKernelIntegrateSubstep(device->streams[STREAM_2], isubstep, m1, m2, device->vba);
|
||||||
|
}
|
||||||
|
{ // Top
|
||||||
|
const int3 m1 = (int3){NGHOST, nn.y, 2 * NGHOST};
|
||||||
|
const int3 m2 = m1 + (int3){nn.x, NGHOST, nn.z - 2 * NGHOST};
|
||||||
|
acKernelIntegrateSubstep(device->streams[STREAM_3], isubstep, m1, m2, device->vba);
|
||||||
|
}
|
||||||
|
{ // Left
|
||||||
|
const int3 m1 = (int3){NGHOST, 2 * NGHOST, 2 * NGHOST};
|
||||||
|
const int3 m2 = m1 + (int3){NGHOST, nn.y - 2 * NGHOST, nn.z - 2 * NGHOST};
|
||||||
|
acKernelIntegrateSubstep(device->streams[STREAM_4], isubstep, m1, m2, device->vba);
|
||||||
|
}
|
||||||
|
{ // Right
|
||||||
|
const int3 m1 = (int3){nn.x, 2 * NGHOST, 2 * NGHOST};
|
||||||
|
const int3 m2 = m1 + (int3){NGHOST, nn.y - 2 * NGHOST, nn.z - 2 * NGHOST};
|
||||||
|
acKernelIntegrateSubstep(device->streams[STREAM_5], isubstep, m1, m2, device->vba);
|
||||||
|
}
|
||||||
|
#endif // MPI_COMPUTE_ENABLED
|
||||||
|
acDeviceSwapBuffers(device);
|
||||||
|
acDeviceSynchronizeStream(device, STREAM_ALL); // Wait until inner and outer done
|
||||||
|
////////////////////////////////////////////
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return AC_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acGridPeriodicBoundconds(const Stream stream)
|
acGridPeriodicBoundconds(const Stream stream)
|
||||||
{
|
{
|
||||||
|
@@ -1,5 +1,133 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
static __global__ void
|
||||||
|
kernel_symmetric_boundconds(const int3 start, const int3 end, AcReal* vtxbuf, const int3 bindex, const int sign)
|
||||||
|
{
|
||||||
|
const int i_dst = start.x + threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
|
const int j_dst = start.y + threadIdx.y + blockIdx.y * blockDim.y;
|
||||||
|
const int k_dst = start.z + threadIdx.z + blockIdx.z * blockDim.z;
|
||||||
|
|
||||||
|
// If within the start-end range (this allows threadblock dims that are not
|
||||||
|
// divisible by end - start)
|
||||||
|
if (i_dst >= end.x || j_dst >= end.y || k_dst >= end.z)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// If destination index is inside the computational domain, return since
|
||||||
|
// the boundary conditions are only applied to the ghost zones
|
||||||
|
if (i_dst >= DCONST(AC_nx_min) && i_dst < DCONST(AC_nx_max) && j_dst >= DCONST(AC_ny_min) &&
|
||||||
|
j_dst < DCONST(AC_ny_max) && k_dst >= DCONST(AC_nz_min) && k_dst < DCONST(AC_nz_max))
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Find the source index
|
||||||
|
// Map to nx, ny, nz coordinates
|
||||||
|
int i_src, j_src, k_src, boundlocx0, boundlocx1, boundlocy0, boundlocy1, boundlocz0, boundlocz1;
|
||||||
|
int bsize = STENCIL_ORDER/(int) 2;
|
||||||
|
|
||||||
|
//if (bindex.x != 0)
|
||||||
|
//if (bindex.y != 0)
|
||||||
|
//if (bindex.z != 0)
|
||||||
|
|
||||||
|
//Location of central border point.
|
||||||
|
boundlocx0 = bsize;
|
||||||
|
boundlocy0 = bsize;
|
||||||
|
boundlocz0 = bsize;
|
||||||
|
boundlocx1 = DCONST(AC_nx_max) - 1;
|
||||||
|
boundlocy1 = DCONST(AC_ny_max) - 1;
|
||||||
|
boundlocz1 = DCONST(AC_nz_max) - 1;
|
||||||
|
|
||||||
|
//Defaults
|
||||||
|
i_src = i_dst;
|
||||||
|
j_src = j_dst;
|
||||||
|
k_src = k_dst;
|
||||||
|
|
||||||
|
if (bindex.x < 0)
|
||||||
|
{
|
||||||
|
|
||||||
|
// Pick up the mirroring value.
|
||||||
|
if ((i_dst < boundlocx0))
|
||||||
|
{
|
||||||
|
i_src = 2.0f*boundlocx0 - i_dst;
|
||||||
|
|
||||||
|
} else if ((i_dst > boundlocx1))
|
||||||
|
{
|
||||||
|
i_src = 2.0f*boundlocx1 - i_dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pick up the mirroring value.
|
||||||
|
if ((j_dst < boundlocy0))
|
||||||
|
{
|
||||||
|
j_src = 2.0f*boundlocy0 - j_dst;
|
||||||
|
} else if ((j_dst > boundlocx1))
|
||||||
|
{
|
||||||
|
j_src = 2.0f*boundlocy1 - j_dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pick up the mirroring value.
|
||||||
|
if ((k_dst < boundlocz0))
|
||||||
|
{
|
||||||
|
k_src = 2.0f*boundlocz0 - k_dst;
|
||||||
|
} else if ((k_dst > boundlocz1))
|
||||||
|
{
|
||||||
|
k_src = 2.0f*boundlocz1 - k_dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Edges
|
||||||
|
if ( (i_dst < boundlocx0) && (j_dst < boundlocy0) )
|
||||||
|
{
|
||||||
|
i_src = 2.0f*boundlocx0 - i_dst;
|
||||||
|
j_src = 2.0f*boundlocy0 - j_dst;
|
||||||
|
//if ((k_dst == 50)) printf("i_dst %i j_dst %i k_dst %i i_src %i j_src %i k_src %i bsize %i \n", i_dst, j_dst, k_dst, i_src, j_src, k_src, bsize);
|
||||||
|
} else if ((i_dst < boundlocx0) && (k_dst < boundlocz0) )
|
||||||
|
{
|
||||||
|
i_src = 2.0f*boundlocx0 - i_dst;
|
||||||
|
k_src = 2.0f*boundlocz0 - k_dst;
|
||||||
|
} else if ( (j_dst < boundlocy0) && (k_dst < boundlocz0) )
|
||||||
|
{
|
||||||
|
j_src = 2.0f*boundlocy0 - j_dst;
|
||||||
|
k_src = 2.0f*boundlocz0 - k_dst;
|
||||||
|
|
||||||
|
} else if ((i_dst > boundlocx1) && (j_dst > boundlocx1) )
|
||||||
|
{
|
||||||
|
i_src = 2.0f*boundlocx1 - i_dst;
|
||||||
|
j_src = 2.0f*boundlocy1 - j_dst;
|
||||||
|
} else if ( (i_dst > boundlocx1) && (k_dst > boundlocz1) )
|
||||||
|
{
|
||||||
|
i_src = 2.0f*boundlocx1 - i_dst;
|
||||||
|
k_src = 2.0f*boundlocz1 - k_dst;
|
||||||
|
} else if ( (j_dst > boundlocy1) && (k_dst > boundlocz1) )
|
||||||
|
{
|
||||||
|
j_src = 2.0f*boundlocy1 - j_dst;
|
||||||
|
k_src = 2.0f*boundlocz1 - k_dst;
|
||||||
|
} else if ( (i_dst > boundlocx1) && (k_dst < boundlocz0) )
|
||||||
|
{
|
||||||
|
i_src = 2.0f*boundlocx1 - i_dst;
|
||||||
|
k_src = 2.0f*boundlocz0 - k_dst;
|
||||||
|
} else if ( (i_dst > boundlocx1) && (j_dst < bsize) )
|
||||||
|
{
|
||||||
|
i_src = 2.0f*boundlocx1 - i_dst;
|
||||||
|
j_src = 2.0f*boundlocy0 - j_dst;
|
||||||
|
} else if ( (i_dst < boundlocx0) && (k_dst > boundlocz1) )
|
||||||
|
{
|
||||||
|
i_src = 2.0f*boundlocx0 - i_dst;
|
||||||
|
k_src = 2.0f*boundlocz1 - k_dst;
|
||||||
|
} else if ( (i_dst < boundlocx0) && (j_dst > boundlocy1) )
|
||||||
|
{
|
||||||
|
i_src = 2.0f*boundlocx0 - i_dst;
|
||||||
|
j_src = 2.0f*boundlocy1 - j_dst;
|
||||||
|
} else if ( (j_dst > boundlocy1) && (k_dst < boundlocz0) )
|
||||||
|
{
|
||||||
|
j_src = 2.0f*boundlocy1 - j_dst;
|
||||||
|
k_src = 2.0f*boundlocz0 - k_dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
const int src_idx = DEVICE_VTXBUF_IDX(i_src, j_src, k_src);
|
||||||
|
const int dst_idx = DEVICE_VTXBUF_IDX(i_dst, j_dst, k_dst);
|
||||||
|
vtxbuf[dst_idx] = sign*vtxbuf[src_idx] *0.0 + 1.0; // sign = 1 symmetric, sign = -1 antisymmetric
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static __global__ void
|
static __global__ void
|
||||||
kernel_periodic_boundconds(const int3 start, const int3 end, AcReal* vtxbuf)
|
kernel_periodic_boundconds(const int3 start, const int3 end, AcReal* vtxbuf)
|
||||||
{
|
{
|
||||||
@@ -60,3 +188,55 @@ acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, const in
|
|||||||
ERRCHK_CUDA_KERNEL();
|
ERRCHK_CUDA_KERNEL();
|
||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AcResult
|
||||||
|
acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end,
|
||||||
|
AcReal* vtxbuf, const VertexBufferHandle vtxbuf_handle,
|
||||||
|
const AcMeshInfo config, const int3 bindex)
|
||||||
|
{
|
||||||
|
const dim3 tpb(8, 2, 8);
|
||||||
|
const dim3 bpg((unsigned int)ceil((end.x - start.x) / (float)tpb.x),
|
||||||
|
(unsigned int)ceil((end.y - start.y) / (float)tpb.y),
|
||||||
|
(unsigned int)ceil((end.z - start.z) / (float)tpb.z));
|
||||||
|
|
||||||
|
int3 bc_top = {config.int_params[AC_bc_type_top_x], config.int_params[AC_bc_type_top_y],
|
||||||
|
config.int_params[AC_bc_type_top_z]};
|
||||||
|
int3 bc_bot = {config.int_params[AC_bc_type_bot_x], config.int_params[AC_bc_type_bot_y],
|
||||||
|
config.int_params[AC_bc_type_bot_z]};
|
||||||
|
|
||||||
|
//#if AC_MPI_ENABLED
|
||||||
|
// printf( "WARNING : NON-PERIODIC BOUNDARY CONDITIONS NOT SUPPORTER BY MPI! Only working at node level.\n");
|
||||||
|
// return AC_FAILURE;
|
||||||
|
//#endif
|
||||||
|
|
||||||
|
if ( vtxbuf_handle != -1) // This is a dummy to make swithing boundary condition with respect to more possible later
|
||||||
|
{
|
||||||
|
|
||||||
|
if (bc_top.x == AC_BOUNDCOND_SYMMETRIC)
|
||||||
|
{
|
||||||
|
kernel_symmetric_boundconds<<<bpg, tpb, 0, stream>>>(start, end, vtxbuf, bindex, 1);
|
||||||
|
ERRCHK_CUDA_KERNEL();
|
||||||
|
}
|
||||||
|
else if (bc_top.x == AC_BOUNDCOND_ANTISYMMETRIC)
|
||||||
|
{
|
||||||
|
kernel_symmetric_boundconds<<<bpg, tpb, 0, stream>>>(start, end, vtxbuf, bindex, -1);
|
||||||
|
ERRCHK_CUDA_KERNEL();
|
||||||
|
}
|
||||||
|
else if (bc_top.x == AC_BOUNDCOND_PERIODIC)
|
||||||
|
{
|
||||||
|
kernel_periodic_boundconds<<<bpg, tpb, 0, stream>>>(start, end, vtxbuf);
|
||||||
|
ERRCHK_CUDA_KERNEL();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf("ERROR: Boundary condition not recognized!\n");
|
||||||
|
printf("ERROR: bc_top = %i, %i, %i \n", bc_top.x, bc_top.y, bc_top.z);
|
||||||
|
printf("ERROR: bc_bot = %i, %i, %i \n", bc_bot.x, bc_bot.y, bc_bot.z);
|
||||||
|
|
||||||
|
return AC_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return AC_SUCCESS;
|
||||||
|
}
|
||||||
|
@@ -45,6 +45,11 @@ extern "C" {
|
|||||||
/** */
|
/** */
|
||||||
AcResult acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, const int3 end,
|
AcResult acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, const int3 end,
|
||||||
AcReal* vtxbuf);
|
AcReal* vtxbuf);
|
||||||
|
/** */
|
||||||
|
AcResult acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end,
|
||||||
|
AcReal* vtxbuf, const VertexBufferHandle vtxbuf_handle,
|
||||||
|
const AcMeshInfo config, const int3 bindex);
|
||||||
|
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
AcResult acKernelDummy(void);
|
AcResult acKernelDummy(void);
|
||||||
|
127
src/core/node.cc
127
src/core/node.cc
@@ -656,6 +656,30 @@ local_boundcondstep(const Node node, const Stream stream, const VertexBufferHand
|
|||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static AcResult
|
||||||
|
local_boundcondstep_GBC(const Node node, const Stream stream, const VertexBufferHandle vtxbuf, const AcMeshInfo config)
|
||||||
|
{
|
||||||
|
acNodeSynchronizeStream(node, stream);
|
||||||
|
|
||||||
|
int3 bindex = {-1, -1, -1}; //Dummy for node level. Relevant only for MPI.
|
||||||
|
|
||||||
|
if (node->num_devices > 1) {
|
||||||
|
// Local boundary conditions
|
||||||
|
// #pragma omp parallel for
|
||||||
|
for (int i = 0; i < node->num_devices; ++i) {
|
||||||
|
const int3 d0 = (int3){0, 0, NGHOST}; // DECOMPOSITION OFFSET HERE
|
||||||
|
const int3 d1 = (int3){node->subgrid.m.x, node->subgrid.m.y, d0.z + node->subgrid.n.z};
|
||||||
|
acDeviceGeneralBoundcondStep(node->devices[i], stream, vtxbuf, d0, d1, config, bindex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
acDeviceGeneralBoundcondStep(node->devices[0], stream, vtxbuf, (int3){0, 0, 0},
|
||||||
|
node->subgrid.m, config, bindex);
|
||||||
|
}
|
||||||
|
return AC_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static AcResult
|
static AcResult
|
||||||
global_boundcondstep(const Node node, const Stream stream, const VertexBufferHandle vtxbuf_handle)
|
global_boundcondstep(const Node node, const Stream stream, const VertexBufferHandle vtxbuf_handle)
|
||||||
{
|
{
|
||||||
@@ -768,6 +792,85 @@ acNodeIntegrate(const Node node, const AcReal dt)
|
|||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AcResult
|
||||||
|
acNodeIntegrateGBC(const Node node, const AcMeshInfo config, const AcReal dt)
|
||||||
|
{
|
||||||
|
acNodeSynchronizeStream(node, STREAM_ALL);
|
||||||
|
// xxx|OOO OOOOOOOOO OOO|xxx
|
||||||
|
// ^ ^ ^ ^
|
||||||
|
// n0 n1 n2 n3
|
||||||
|
// const int3 n0 = (int3){NGHOST, NGHOST, NGHOST};
|
||||||
|
// const int3 n1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
|
||||||
|
// const int3 n2 = node->grid.n;
|
||||||
|
// const int3 n3 = n0 + node->grid.n;
|
||||||
|
|
||||||
|
for (int isubstep = 0; isubstep < 3; ++isubstep) {
|
||||||
|
acNodeSynchronizeStream(node, STREAM_ALL);
|
||||||
|
for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) {
|
||||||
|
local_boundcondstep_GBC(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf, config);
|
||||||
|
}
|
||||||
|
acNodeSynchronizeStream(node, STREAM_ALL);
|
||||||
|
|
||||||
|
// Inner inner
|
||||||
|
// #pragma omp parallel for
|
||||||
|
for (int i = 0; i < node->num_devices; ++i) {
|
||||||
|
const int3 m1 = (int3){2 * NGHOST, 2 * NGHOST, 2 * NGHOST};
|
||||||
|
const int3 m2 = node->subgrid.n;
|
||||||
|
acDeviceIntegrateSubstep(node->devices[i], STREAM_16, isubstep, m1, m2, dt);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) {
|
||||||
|
acNodeSynchronizeVertexBuffer(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf);
|
||||||
|
global_boundcondstep(node, (Stream)vtxbuf, (VertexBufferHandle)vtxbuf);
|
||||||
|
}
|
||||||
|
for (int vtxbuf = 0; vtxbuf < NUM_VTXBUF_HANDLES; ++vtxbuf) {
|
||||||
|
acNodeSynchronizeStream(node, (Stream)vtxbuf);
|
||||||
|
}
|
||||||
|
|
||||||
|
// #pragma omp parallel for
|
||||||
|
for (int i = 0; i < node->num_devices; ++i) { // Front
|
||||||
|
const int3 m1 = (int3){NGHOST, NGHOST, NGHOST};
|
||||||
|
const int3 m2 = m1 + (int3){node->subgrid.n.x, node->subgrid.n.y, NGHOST};
|
||||||
|
acDeviceIntegrateSubstep(node->devices[i], STREAM_0, isubstep, m1, m2, dt);
|
||||||
|
}
|
||||||
|
// #pragma omp parallel for
|
||||||
|
for (int i = 0; i < node->num_devices; ++i) { // Back
|
||||||
|
const int3 m1 = (int3){NGHOST, NGHOST, node->subgrid.n.z};
|
||||||
|
const int3 m2 = m1 + (int3){node->subgrid.n.x, node->subgrid.n.y, NGHOST};
|
||||||
|
acDeviceIntegrateSubstep(node->devices[i], STREAM_1, isubstep, m1, m2, dt);
|
||||||
|
}
|
||||||
|
// #pragma omp parallel for
|
||||||
|
for (int i = 0; i < node->num_devices; ++i) { // Bottom
|
||||||
|
const int3 m1 = (int3){NGHOST, NGHOST, 2 * NGHOST};
|
||||||
|
const int3 m2 = m1 + (int3){node->subgrid.n.x, NGHOST, node->subgrid.n.z - 2 * NGHOST};
|
||||||
|
acDeviceIntegrateSubstep(node->devices[i], STREAM_2, isubstep, m1, m2, dt);
|
||||||
|
}
|
||||||
|
// #pragma omp parallel for
|
||||||
|
for (int i = 0; i < node->num_devices; ++i) { // Top
|
||||||
|
const int3 m1 = (int3){NGHOST, node->subgrid.n.y, 2 * NGHOST};
|
||||||
|
const int3 m2 = m1 + (int3){node->subgrid.n.x, NGHOST, node->subgrid.n.z - 2 * NGHOST};
|
||||||
|
acDeviceIntegrateSubstep(node->devices[i], STREAM_3, isubstep, m1, m2, dt);
|
||||||
|
}
|
||||||
|
// #pragma omp parallel for
|
||||||
|
for (int i = 0; i < node->num_devices; ++i) { // Left
|
||||||
|
const int3 m1 = (int3){NGHOST, 2 * NGHOST, 2 * NGHOST};
|
||||||
|
const int3 m2 = m1 + (int3){NGHOST, node->subgrid.n.y - 2 * NGHOST,
|
||||||
|
node->subgrid.n.z - 2 * NGHOST};
|
||||||
|
acDeviceIntegrateSubstep(node->devices[i], STREAM_4, isubstep, m1, m2, dt);
|
||||||
|
}
|
||||||
|
// #pragma omp parallel for
|
||||||
|
for (int i = 0; i < node->num_devices; ++i) { // Right
|
||||||
|
const int3 m1 = (int3){node->subgrid.n.x, 2 * NGHOST, 2 * NGHOST};
|
||||||
|
const int3 m2 = m1 + (int3){NGHOST, node->subgrid.n.y - 2 * NGHOST,
|
||||||
|
node->subgrid.n.z - 2 * NGHOST};
|
||||||
|
acDeviceIntegrateSubstep(node->devices[i], STREAM_5, isubstep, m1, m2, dt);
|
||||||
|
}
|
||||||
|
acNodeSwapBuffers(node);
|
||||||
|
}
|
||||||
|
acNodeSynchronizeStream(node, STREAM_ALL);
|
||||||
|
return AC_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acNodePeriodicBoundcondStep(const Node node, const Stream stream,
|
acNodePeriodicBoundcondStep(const Node node, const Stream stream,
|
||||||
const VertexBufferHandle vtxbuf_handle)
|
const VertexBufferHandle vtxbuf_handle)
|
||||||
@@ -783,7 +886,20 @@ acNodePeriodicBoundcondStep(const Node node, const Stream stream,
|
|||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acNodePeriodicBoundconds(const Node node, const Stream stream)
|
acNodeGeneralBoundcondStep(const Node node, const Stream stream,
|
||||||
|
const VertexBufferHandle vtxbuf_handle, const AcMeshInfo config)
|
||||||
|
{
|
||||||
|
local_boundcondstep_GBC(node, stream, vtxbuf_handle, config);
|
||||||
|
acNodeSynchronizeVertexBuffer(node, stream, vtxbuf_handle);
|
||||||
|
|
||||||
|
global_boundcondstep(node, stream, vtxbuf_handle);
|
||||||
|
|
||||||
|
|
||||||
|
return AC_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
AcResult
|
||||||
|
acNodePeriodicBoundconds(const Node node, const Stream stream)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
||||||
acNodePeriodicBoundcondStep(node, stream, (VertexBufferHandle)i);
|
acNodePeriodicBoundcondStep(node, stream, (VertexBufferHandle)i);
|
||||||
@@ -791,6 +907,15 @@ acNodePeriodicBoundconds(const Node node, const Stream stream)
|
|||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AcResult
|
||||||
|
acNodeGeneralBoundconds(const Node node, const Stream stream, const AcMeshInfo config)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
||||||
|
acNodeGeneralBoundcondStep(node, stream, (VertexBufferHandle)i, config);
|
||||||
|
}
|
||||||
|
return AC_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
static AcReal
|
static AcReal
|
||||||
simple_final_reduce_scal(const Node node, const ReductionType& rtype, const AcReal* results,
|
simple_final_reduce_scal(const Node node, const ReductionType& rtype, const AcReal* results,
|
||||||
const int& n)
|
const int& n)
|
||||||
|
Reference in New Issue
Block a user