Merge branch 'master' into forcing

Now I need to test what works...

Conflicts:
	acc/mhd_solver/stencil_process.sps
This commit is contained in:
Miikka Vaisala
2019-06-27 11:22:31 +08:00
6 changed files with 120 additions and 82 deletions

View File

@@ -48,8 +48,8 @@ message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER})
message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER_ID}) message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER_ID})
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.4) if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.0) # Because of GCC bug 48891
message(FATAL_ERROR "GCC version 5.4 or higher required") message(FATAL_ERROR "GCC version 6.0 or higher required")
endif() endif()
endif() endif()

View File

@@ -49,6 +49,7 @@ extern "C" {
#define AUTO_OPTIMIZE (0) // DEPRECATED TODO remove #define AUTO_OPTIMIZE (0) // DEPRECATED TODO remove
#define BOUNDCONDS_OPTIMIZE (0) #define BOUNDCONDS_OPTIMIZE (0)
#define GENERATE_BENCHMARK_DATA (0) #define GENERATE_BENCHMARK_DATA (0)
#define VERBOSE_PRINTING (1)
// Device info // Device info
#define REGISTERS_PER_THREAD (255) #define REGISTERS_PER_THREAD (255)
@@ -57,27 +58,26 @@ extern "C" {
#define MAX_TB_DIM (MAX_THREADS_PER_BLOCK) #define MAX_TB_DIM (MAX_THREADS_PER_BLOCK)
#define NUM_ITERATIONS (10) #define NUM_ITERATIONS (10)
#define WARP_SIZE (32) #define WARP_SIZE (32)
/* /*
* ============================================================================= * =============================================================================
* Compile-time constants used during simulation (user definable) * Compile-time constants used during simulation (user definable)
* ============================================================================= * =============================================================================
*/ */
#define STENCIL_ORDER (6) // USER_PROVIDED_DEFINES must be defined in user.h if the user wants to override the following
// logical switches
#include "user.h"
///////////// PAD TEST #ifndef USER_PROVIDED_DEFINES
// NOTE: works only with nx is divisible by 32 #define STENCIL_ORDER (6)
//#define PAD_LEAD (32 - STENCIL_ORDER/2) #define NGHOST (STENCIL_ORDER/2)
//#define PAD_SIZE (32 - STENCIL_ORDER) #define LHYDRO (1)
///////////// PAD TEST #define LDENSITY (1)
#define LFORCING (1)
// L-prefix inherited from the old Astaroth, no idea what it means #define LINDUCTION (1)
// MV: L means a Logical switch variale, something having true of false value. #define LENTROPY (1)
// Note: forcing is disabled currently in the files generated by acc (compiler of our DSL) #define LTEMPERATURE (0)
#define LFORCING (1) #define LMAGNETIC LINDUCTION
#define LINDUCTION (1) #endif
#define LENTROPY (1)
#define LTEMPERATURE (0)
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter #define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
@@ -190,15 +190,31 @@ extern "C" {
* ============================================================================= * =============================================================================
*/ */
// clang-format off // clang-format off
#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)\ #ifdef LHYDRO
FUNC(VTXBUF_LNRHO), \ #define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC) \
FUNC(VTXBUF_UUX), \ FUNC(VTXBUF_UUX), \
FUNC(VTXBUF_UUY), \ FUNC(VTXBUF_UUY), \
FUNC(VTXBUF_UUZ), \ FUNC(VTXBUF_UUZ),
// FUNC(VTXBUF_DYE), #else
#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)
#endif
#if LINDUCTION #ifdef LDENSITY
#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)\ #define AC_FOR_DENSITY_VTXBUF_HANDLES(FUNC) \
FUNC(VTXBUF_LNRHO),
#else
#define AC_FOR_DENSITY_VTXBUF_HANDLES(FUNC)
#endif
#ifdef LENTROPY
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC) \
FUNC(VTXBUF_ENTROPY),
#else
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)
#endif
#ifdef LMAGNETIC
#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC) \
FUNC(VTXBUF_AX), \ FUNC(VTXBUF_AX), \
FUNC(VTXBUF_AY), \ FUNC(VTXBUF_AY), \
FUNC(VTXBUF_AZ), FUNC(VTXBUF_AZ),
@@ -206,25 +222,22 @@ extern "C" {
#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC) #define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)
#endif #endif
#if LENTROPY #define AC_FOR_VTXBUF_HANDLES(FUNC) AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC) \
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)\ AC_FOR_DENSITY_VTXBUF_HANDLES(FUNC) \
FUNC(VTXBUF_ENTROPY), AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC) \
#else AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC) \
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)
//MR: Temperature must not have an additional variable slot, but should sit on the
// same as entropy.
#ifndef USER_PROVIDED
#if LTEMPERATURE
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_TEMPERATURE),
#else
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)
#endif
#endif #endif
#if LTEMPERATURE
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_TEMPERATURE),
#else
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)
#endif
#define AC_FOR_VTXBUF_HANDLES(FUNC)\
AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)\
AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)\
AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)\
AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)
// clang-format on // clang-format on
/* /*
@@ -280,6 +293,7 @@ typedef enum { RTYPE_MAX, RTYPE_MIN, RTYPE_RMS, RTYPE_RMS_EXP, NUM_REDUCTION_TYP
typedef enum { AC_FOR_INT_PARAM_TYPES(AC_GEN_ID), NUM_INT_PARAM_TYPES } AcIntParam; typedef enum { AC_FOR_INT_PARAM_TYPES(AC_GEN_ID), NUM_INT_PARAM_TYPES } AcIntParam;
typedef enum { AC_FOR_REAL_PARAM_TYPES(AC_GEN_ID), NUM_REAL_PARAM_TYPES } AcRealParam; typedef enum { AC_FOR_REAL_PARAM_TYPES(AC_GEN_ID), NUM_REAL_PARAM_TYPES } AcRealParam;
//typedef enum { AC_FOR_VEC_PARAM_TYPES(AC_GEN_ID), NUM_VEC_PARAM_TYPES } AcVecParam;
extern const char* intparam_names[]; // Defined in astaroth.cu extern const char* intparam_names[]; // Defined in astaroth.cu
extern const char* realparam_names[]; // Defined in astaroth.cu extern const char* realparam_names[]; // Defined in astaroth.cu
@@ -287,6 +301,7 @@ extern const char* realparam_names[]; // Defined in astaroth.cu
typedef struct { typedef struct {
int int_params[NUM_INT_PARAM_TYPES]; int int_params[NUM_INT_PARAM_TYPES];
AcReal real_params[NUM_REAL_PARAM_TYPES]; AcReal real_params[NUM_REAL_PARAM_TYPES];
//AcReal* vec_params[NUM_VEC_PARAM_TYPES];
} AcMeshInfo; } AcMeshInfo;
/* /*
@@ -335,6 +350,11 @@ typedef struct {
* Astaroth interface * Astaroth interface
* ============================================================================= * =============================================================================
*/ */
/** Checks whether there are any CUDA devices available. Returns AC_SUCCESS if there is 1 or more,
AC_FAILURE otherwise. */
AcResult acCheckDeviceAvailability(void);
/** Starting point of all GPU computation. Handles the allocation and /** Starting point of all GPU computation. Handles the allocation and
initialization of *all memory needed on all GPUs in the node*. In other words, initialization of *all memory needed on all GPUs in the node*. In other words,
setups everything GPU-side so that calling any other GPU interface function setups everything GPU-side so that calling any other GPU interface function

16
include/user.h Normal file
View File

@@ -0,0 +1,16 @@
#ifdef PENCIL_ASTAROTH
#include "../cparam.inc_c.h"
#define NGHOST nghost
#define STENCIL_ORDER (2*nghost)
#include "PC_moduleflags.h"
#define CONFIG_PATH
#define AC_MULTIGPU_ENABLED (false)
#ifdef DOUBLE_PRECISION
#define AC_DOUBLE_PRECISION 1
#else
#define AC_DOUBLE_PRECISION 0
#endif
#define USER_PROVIDED_DEFINES
#endif

View File

@@ -4,13 +4,7 @@
#----------------------Find CUDA-----------------------------------------------# #----------------------Find CUDA-----------------------------------------------#
find_package(CUDA) find_package(CUDA 9 REQUIRED)
if (NOT CUDA_FOUND)
# find_package(CUDA REQUIRED) gives a confusing error message if it fails,
# therefore we print the reason here explicitly
message(FATAL_ERROR "CUDA not found")
endif()
#----------------------CUDA settings-------------------------------------------# #----------------------CUDA settings-------------------------------------------#

View File

@@ -36,20 +36,9 @@ const char* realparam_names[] = {AC_FOR_REAL_PARAM_TYPES(AC_GEN_STR)};
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)}; const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
static const int MAX_NUM_DEVICES = 32; static const int MAX_NUM_DEVICES = 32;
static int num_devices = 1; static int num_devices = 0;
static Device devices[MAX_NUM_DEVICES] = {}; static Device devices[MAX_NUM_DEVICES] = {};
static Grid
createGrid(const AcMeshInfo& config)
{
Grid grid;
grid.m = (int3){config.int_params[AC_mx], config.int_params[AC_my], config.int_params[AC_mz]};
grid.n = (int3){config.int_params[AC_nx], config.int_params[AC_ny], config.int_params[AC_nz]};
return grid;
}
static Grid grid; // A grid consists of num_devices subgrids static Grid grid; // A grid consists of num_devices subgrids
static Grid subgrid; static Grid subgrid;
@@ -72,11 +61,33 @@ printInt3(const int3 vec)
printf("(%d, %d, %d)", vec.x, vec.y, vec.z); printf("(%d, %d, %d)", vec.x, vec.y, vec.z);
} }
static Grid
createGrid(const AcMeshInfo& config)
{
Grid grid;
grid.m = (int3){config.int_params[AC_mx], config.int_params[AC_my], config.int_params[AC_mz]};
grid.n = (int3){config.int_params[AC_nx], config.int_params[AC_ny], config.int_params[AC_nz]};
return grid;
}
AcResult
acCheckDeviceAvailability(void)
{
int device_count; // Separate from num_devices to avoid side effects
ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&device_count));
if (device_count > 0)
return AC_SUCCESS;
else
return AC_FAILURE;
}
AcResult AcResult
acInit(const AcMeshInfo& config) acInit(const AcMeshInfo& config)
{ {
// Check devices // Get num_devices
cudaGetDeviceCount(&num_devices); ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&num_devices));
if (num_devices < 1) { if (num_devices < 1) {
ERROR("No CUDA devices found!"); ERROR("No CUDA devices found!");
return AC_FAILURE; return AC_FAILURE;
@@ -259,9 +270,9 @@ acStore(AcMesh* host_mesh)
AcResult AcResult
acIntegrateStep(const int& isubstep, const AcReal& dt) acIntegrateStep(const int& isubstep, const AcReal& dt)
{ {
const int3 start = (int3){STENCIL_ORDER / 2, STENCIL_ORDER / 2, STENCIL_ORDER / 2}; const int3 start = (int3){NGHOST, NGHOST, NGHOST};
const int3 end = (int3){STENCIL_ORDER / 2 + subgrid.n.x, STENCIL_ORDER / 2 + subgrid.n.y, const int3 end = (int3){NGHOST + subgrid.n.x, NGHOST + subgrid.n.y,
STENCIL_ORDER / 2 + subgrid.n.z}; NGHOST + subgrid.n.z};
for (int i = 0; i < num_devices; ++i) { for (int i = 0; i < num_devices; ++i) {
rkStep(devices[i], STREAM_PRIMARY, isubstep, start, end, dt); rkStep(devices[i], STREAM_PRIMARY, isubstep, start, end, dt);
} }
@@ -275,12 +286,12 @@ acIntegrateStepWithOffset(const int& isubstep, const AcReal& dt, const int3& sta
/* /*
// A skeleton function for computing integrations with arbitrary subblocks // A skeleton function for computing integrations with arbitrary subblocks
// Uncommenting the following should work with a single GPU. // Uncommenting the following should work with a single GPU.
const int3 start = (int3){STENCIL_ORDER / 2, STENCIL_ORDER / 2, STENCIL_ORDER / 2}; const int3 start = (int3){NGHOST, NGHOST, NGHOST};
const int3 end = (int3){STENCIL_ORDER / 2 + subgrid.n.x, STENCIL_ORDER / 2 + subgrid.n.y, const int3 end = (int3){NGHOST + subgrid.n.x, NGHOST + subgrid.n.y,
STENCIL_ORDER / 2 + subgrid.n.z}; NGHOST + subgrid.n.z};
rkStep(devices[0], STREAM_PRIMARY, isubstep, start, end, dt);
*/ */
return AC_FAILURE; rkStep(devices[0], STREAM_PRIMARY, isubstep, start, end, dt);
return AC_SUCCESS;
} }
AcResult AcResult
@@ -294,7 +305,7 @@ acBoundcondStep(void)
else { else {
// Local boundary conditions // Local boundary conditions
for (int i = 0; i < num_devices; ++i) { for (int i = 0; i < num_devices; ++i) {
const int3 d0 = (int3){0, 0, STENCIL_ORDER / 2}; // DECOMPOSITION OFFSET HERE const int3 d0 = (int3){0, 0, NGHOST}; // DECOMPOSITION OFFSET HERE
const int3 d1 = (int3){subgrid.m.x, subgrid.m.y, d0.z + subgrid.n.z}; const int3 d1 = (int3){subgrid.m.x, subgrid.m.y, d0.z + subgrid.n.z};
boundcondStep(devices[i], STREAM_PRIMARY, d0, d1); boundcondStep(devices[i], STREAM_PRIMARY, d0, d1);
} }
@@ -393,7 +404,7 @@ acBoundcondStep(void)
*/ */
// Exchange halos // Exchange halos
for (int i = 0; i < num_devices; ++i) { for (int i = 0; i < num_devices; ++i) {
const int num_vertices = subgrid.m.x * subgrid.m.y * STENCIL_ORDER / 2; const int num_vertices = subgrid.m.x * subgrid.m.y * NGHOST;
// ...|ooooxxx|... -> xxx|ooooooo|... // ...|ooooxxx|... -> xxx|ooooooo|...
{ {
const int3 src = (int3){0, 0, subgrid.n.z}; const int3 src = (int3){0, 0, subgrid.n.z};
@@ -403,8 +414,8 @@ acBoundcondStep(void)
} }
// ...|ooooooo|xxx <- ...|xxxoooo|... // ...|ooooooo|xxx <- ...|xxxoooo|...
{ {
const int3 src = (int3){0, 0, STENCIL_ORDER / 2}; const int3 src = (int3){0, 0, NGHOST};
const int3 dst = (int3){0, 0, STENCIL_ORDER / 2 + subgrid.n.z}; const int3 dst = (int3){0, 0, NGHOST + subgrid.n.z};
copyMeshDeviceToDevice(devices[(i + 1) % num_devices], STREAM_PRIMARY, src, copyMeshDeviceToDevice(devices[(i + 1) % num_devices], STREAM_PRIMARY, src,
devices[i], dst, num_vertices); devices[i], dst, num_vertices);
} }

View File

@@ -152,15 +152,12 @@ update_config(AcMeshInfo* config)
config->real_params[AC_G_CONST]; config->real_params[AC_G_CONST];
config->real_params[AC_sq2GM_star] = AcReal(sqrt(AcReal(2) * config->real_params[AC_GM_star])); config->real_params[AC_sq2GM_star] = AcReal(sqrt(AcReal(2) * config->real_params[AC_GM_star]));
const bool print_config = true; #if VERBOSE_PRINTING
if (print_config) { printf("###############################################################\n");
printf("###############################################################" printf("Config dimensions recalculated:\n");
"\n"); print(*config);
printf("Config dimensions recalculated:\n"); printf("###############################################################\n");
print(*config); #endif
printf("###############################################################"
"\n");
}
} }
/** /**