Merge branch 'master' into forcing

This commit is contained in:
Miikka Vaisala
2019-07-02 16:43:10 +08:00
17 changed files with 152 additions and 113 deletions

View File

@@ -77,7 +77,7 @@ acCheckDeviceAvailability(void)
{
int device_count; // Separate from num_devices to avoid side effects
ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&device_count));
if (device_count > 0)
if (device_count > 0)
return AC_SUCCESS;
else
return AC_FAILURE;
@@ -120,12 +120,14 @@ acInit(const AcMeshInfo& config)
ERRCHK_ALWAYS(subgrid.n.y >= STENCIL_ORDER);
ERRCHK_ALWAYS(subgrid.n.z >= STENCIL_ORDER);
#if VERBOSE_PRINTING
// clang-format off
printf("Grid m "); printInt3(grid.m); printf("\n");
printf("Grid n "); printInt3(grid.n); printf("\n");
printf("Subrid m "); printInt3(subgrid.m); printf("\n");
printf("Subrid n "); printInt3(subgrid.n); printf("\n");
// clang-format on
#endif
// Initialize the devices
for (int i = 0; i < num_devices; ++i) {
@@ -271,8 +273,7 @@ AcResult
acIntegrateStep(const int& isubstep, const AcReal& dt)
{
const int3 start = (int3){NGHOST, NGHOST, NGHOST};
const int3 end = (int3){NGHOST + subgrid.n.x, NGHOST + subgrid.n.y,
NGHOST + subgrid.n.z};
const int3 end = (int3){NGHOST + subgrid.n.x, NGHOST + subgrid.n.y, NGHOST + subgrid.n.z};
for (int i = 0; i < num_devices; ++i) {
rkStep(devices[i], STREAM_PRIMARY, isubstep, start, end, dt);
}

View File

@@ -42,6 +42,10 @@ __constant__ Grid globalGrid;
#define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy))
#include "kernels/kernels.cuh"
#if PACKED_DATA_TRANSFERS // Defined in device.cuh
// #include "kernels/pack_unpack.cuh"
#endif
struct device_s {
int id;
AcMeshInfo local_config;
@@ -53,6 +57,11 @@ struct device_s {
VertexBufferArray vba;
AcReal* reduce_scratchpad;
AcReal* reduce_result;
#if PACKED_DATA_TRANSFERS
// Declare memory for buffers needed for packed data transfers here
// AcReal* data_packing_buffer;
#endif
};
AcResult
@@ -154,6 +163,10 @@ createDevice(const int id, const AcMeshInfo device_config, Device* device_handle
cudaMalloc(&device->reduce_scratchpad, AC_VTXBUF_COMPDOMAIN_SIZE_BYTES(device_config)));
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->reduce_result, sizeof(AcReal)));
#if PACKED_DATA_TRANSFERS
// Allocate data required for packed transfers here (cudaMalloc)
#endif
// Device constants
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(d_mesh_info, &device_config, sizeof(device_config), 0,
cudaMemcpyHostToDevice));
@@ -184,6 +197,10 @@ destroyDevice(Device device)
cudaFree(device->reduce_scratchpad);
cudaFree(device->reduce_result);
#if PACKED_DATA_TRANSFERS
// Free data required for packed tranfers here (cudaFree)
#endif
// Concurrency
for (int i = 0; i < NUM_STREAM_TYPES; ++i)
cudaStreamDestroy(device->streams[i]);
@@ -373,3 +390,7 @@ loadGlobalGrid(const Device device, const Grid grid)
cudaMemcpyToSymbol(globalGrid, &grid, sizeof(grid), 0, cudaMemcpyHostToDevice));
return AC_SUCCESS;
}
#if PACKED_DATA_TRANSFERS
// Functions for calling packed data transfers
#endif

View File

@@ -98,3 +98,8 @@ AcResult loadDeviceConstant(const Device device, const AcRealParam param, const
/** */
AcResult loadGlobalGrid(const Device device, const Grid grid);
// #define PACKED_DATA_TRANSFERS (1) %JP: placeholder for optimized ghost zone packing and transfers
#if PACKED_DATA_TRANSFERS
// Declarations used for packed data transfers
#endif

View File

@@ -426,7 +426,7 @@ cross(const AcReal3& a, const AcReal3& b)
}
static __host__ __device__ __forceinline__ bool
is_valid(const AcReal a)
is_valid(const AcReal& a)
{
return !isnan(a) && !isinf(a);
}
@@ -546,7 +546,7 @@ normalized(const AcReal3& vec)
// Sinusoidal forcing
// https://arxiv.org/pdf/1704.04676.pdf
// NOTE: This method of forcing is depracated. However, it will remain in here
// until a corresponding scheme exists in the new code.
// until a corresponding scheme exists in the new code.
__constant__ AcReal3 forcing_vec;
__constant__ AcReal forcing_phi;
static __device__ __forceinline__ AcReal3

View File

@@ -152,7 +152,7 @@ update_config(AcMeshInfo* config)
config->real_params[AC_G_CONST];
config->real_params[AC_sq2GM_star] = AcReal(sqrt(AcReal(2) * config->real_params[AC_GM_star]));
#if VERBOSE_PRINTING
#if VERBOSE_PRINTING // Defined in astaroth.h
printf("###############################################################\n");
printf("Config dimensions recalculated:\n");
print(*config);

View File

@@ -745,7 +745,7 @@ solve_alpha_step(const int step_number, const ModelScalar dt, const int i, const
ModelScalar rate_of_change[NUM_VTXBUF_HANDLES] = {0};
rate_of_change[VTXBUF_LNRHO] = continuity(uu, lnrho);
#if LINDUCTION
#if LMAGNETIC
const ModelVectorData aa = read_data(i, j, k, in.vertex_buffer,
(int3){VTXBUF_AX, VTXBUF_AY, VTXBUF_AZ});
const ModelVector aa_res = induction(uu, aa);