diff --git a/CMakeLists.txt b/CMakeLists.txt index 15a48d9..349201e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,8 +48,8 @@ message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER}) message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER_ID}) if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.4) - message(FATAL_ERROR "GCC version 5.4 or higher required") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.0) # Because of GCC bug 48891 + message(FATAL_ERROR "GCC version 6.0 or higher required") endif() endif() diff --git a/include/astaroth.h b/include/astaroth.h index d9a347f..572d194 100644 --- a/include/astaroth.h +++ b/include/astaroth.h @@ -49,6 +49,7 @@ extern "C" { #define AUTO_OPTIMIZE (0) // DEPRECATED TODO remove #define BOUNDCONDS_OPTIMIZE (0) #define GENERATE_BENCHMARK_DATA (0) +#define VERBOSE_PRINTING (1) // Device info #define REGISTERS_PER_THREAD (255) @@ -57,27 +58,26 @@ extern "C" { #define MAX_TB_DIM (MAX_THREADS_PER_BLOCK) #define NUM_ITERATIONS (10) #define WARP_SIZE (32) - /* * ============================================================================= * Compile-time constants used during simulation (user definable) * ============================================================================= */ -#define STENCIL_ORDER (6) +// USER_PROVIDED_DEFINES must be defined in user.h if the user wants to override the following +// logical switches +#include "user.h" -///////////// PAD TEST -// NOTE: works only with nx is divisible by 32 -//#define PAD_LEAD (32 - STENCIL_ORDER/2) -//#define PAD_SIZE (32 - STENCIL_ORDER) -///////////// PAD TEST - -// L-prefix inherited from the old Astaroth, no idea what it means -// MV: L means a Logical switch variale, something having true of false value. -// Note: forcing is disabled currently in the files generated by acc (compiler of our DSL) -#define LFORCING (1) -#define LINDUCTION (1) -#define LENTROPY (1) -#define LTEMPERATURE (0) +#ifndef USER_PROVIDED_DEFINES + #define STENCIL_ORDER (6) + #define NGHOST (STENCIL_ORDER/2) + #define LHYDRO (1) + #define LDENSITY (1) + #define LFORCING (1) + #define LINDUCTION (1) + #define LENTROPY (1) + #define LTEMPERATURE (0) + #define LMAGNETIC LINDUCTION +#endif #define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter @@ -190,15 +190,31 @@ extern "C" { * ============================================================================= */ // clang-format off -#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)\ - FUNC(VTXBUF_LNRHO), \ +#ifdef LHYDRO +#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC) \ FUNC(VTXBUF_UUX), \ FUNC(VTXBUF_UUY), \ - FUNC(VTXBUF_UUZ), \ - // FUNC(VTXBUF_DYE), + FUNC(VTXBUF_UUZ), +#else +#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC) +#endif -#if LINDUCTION -#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)\ +#ifdef LDENSITY +#define AC_FOR_DENSITY_VTXBUF_HANDLES(FUNC) \ + FUNC(VTXBUF_LNRHO), +#else +#define AC_FOR_DENSITY_VTXBUF_HANDLES(FUNC) +#endif + +#ifdef LENTROPY +#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC) \ + FUNC(VTXBUF_ENTROPY), +#else +#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC) +#endif + +#ifdef LMAGNETIC +#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC) \ FUNC(VTXBUF_AX), \ FUNC(VTXBUF_AY), \ FUNC(VTXBUF_AZ), @@ -206,25 +222,22 @@ extern "C" { #define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC) #endif -#if LENTROPY -#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)\ - FUNC(VTXBUF_ENTROPY), -#else -#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC) +#define AC_FOR_VTXBUF_HANDLES(FUNC) AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC) \ + AC_FOR_DENSITY_VTXBUF_HANDLES(FUNC) \ + AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC) \ + AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC) \ + +//MR: Temperature must not have an additional variable slot, but should sit on the +// same as entropy. +#ifndef USER_PROVIDED + #if LTEMPERATURE + #define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)\ + FUNC(VTXBUF_TEMPERATURE), + #else + #define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC) + #endif #endif -#if LTEMPERATURE -#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)\ - FUNC(VTXBUF_TEMPERATURE), -#else -#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC) -#endif - -#define AC_FOR_VTXBUF_HANDLES(FUNC)\ - AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)\ - AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)\ - AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)\ - AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC) // clang-format on /* @@ -280,6 +293,7 @@ typedef enum { RTYPE_MAX, RTYPE_MIN, RTYPE_RMS, RTYPE_RMS_EXP, NUM_REDUCTION_TYP typedef enum { AC_FOR_INT_PARAM_TYPES(AC_GEN_ID), NUM_INT_PARAM_TYPES } AcIntParam; typedef enum { AC_FOR_REAL_PARAM_TYPES(AC_GEN_ID), NUM_REAL_PARAM_TYPES } AcRealParam; +//typedef enum { AC_FOR_VEC_PARAM_TYPES(AC_GEN_ID), NUM_VEC_PARAM_TYPES } AcVecParam; extern const char* intparam_names[]; // Defined in astaroth.cu extern const char* realparam_names[]; // Defined in astaroth.cu @@ -287,6 +301,7 @@ extern const char* realparam_names[]; // Defined in astaroth.cu typedef struct { int int_params[NUM_INT_PARAM_TYPES]; AcReal real_params[NUM_REAL_PARAM_TYPES]; + //AcReal* vec_params[NUM_VEC_PARAM_TYPES]; } AcMeshInfo; /* @@ -335,6 +350,11 @@ typedef struct { * Astaroth interface * ============================================================================= */ + +/** Checks whether there are any CUDA devices available. Returns AC_SUCCESS if there is 1 or more, + AC_FAILURE otherwise. */ +AcResult acCheckDeviceAvailability(void); + /** Starting point of all GPU computation. Handles the allocation and initialization of *all memory needed on all GPUs in the node*. In other words, setups everything GPU-side so that calling any other GPU interface function diff --git a/include/user.h b/include/user.h new file mode 100644 index 0000000..c2075b4 --- /dev/null +++ b/include/user.h @@ -0,0 +1,16 @@ +#ifdef PENCIL_ASTAROTH + + #include "../cparam.inc_c.h" + #define NGHOST nghost + #define STENCIL_ORDER (2*nghost) + #include "PC_moduleflags.h" + #define CONFIG_PATH + #define AC_MULTIGPU_ENABLED (false) + #ifdef DOUBLE_PRECISION + #define AC_DOUBLE_PRECISION 1 + #else + #define AC_DOUBLE_PRECISION 0 + #endif + #define USER_PROVIDED_DEFINES +#endif + diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index e875faa..9b67f11 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -4,13 +4,7 @@ #----------------------Find CUDA-----------------------------------------------# -find_package(CUDA) -if (NOT CUDA_FOUND) - # find_package(CUDA REQUIRED) gives a confusing error message if it fails, - # therefore we print the reason here explicitly - message(FATAL_ERROR "CUDA not found") -endif() - +find_package(CUDA 9 REQUIRED) #----------------------CUDA settings-------------------------------------------# diff --git a/src/core/astaroth.cu b/src/core/astaroth.cu index 4ea78be..d676613 100644 --- a/src/core/astaroth.cu +++ b/src/core/astaroth.cu @@ -36,20 +36,9 @@ const char* realparam_names[] = {AC_FOR_REAL_PARAM_TYPES(AC_GEN_STR)}; const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)}; static const int MAX_NUM_DEVICES = 32; -static int num_devices = 1; +static int num_devices = 0; static Device devices[MAX_NUM_DEVICES] = {}; -static Grid -createGrid(const AcMeshInfo& config) -{ - Grid grid; - - grid.m = (int3){config.int_params[AC_mx], config.int_params[AC_my], config.int_params[AC_mz]}; - grid.n = (int3){config.int_params[AC_nx], config.int_params[AC_ny], config.int_params[AC_nz]}; - - return grid; -} - static Grid grid; // A grid consists of num_devices subgrids static Grid subgrid; @@ -72,11 +61,33 @@ printInt3(const int3 vec) printf("(%d, %d, %d)", vec.x, vec.y, vec.z); } +static Grid +createGrid(const AcMeshInfo& config) +{ + Grid grid; + + grid.m = (int3){config.int_params[AC_mx], config.int_params[AC_my], config.int_params[AC_mz]}; + grid.n = (int3){config.int_params[AC_nx], config.int_params[AC_ny], config.int_params[AC_nz]}; + + return grid; +} + +AcResult +acCheckDeviceAvailability(void) +{ + int device_count; // Separate from num_devices to avoid side effects + ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&device_count)); + if (device_count > 0) + return AC_SUCCESS; + else + return AC_FAILURE; +} + AcResult acInit(const AcMeshInfo& config) { - // Check devices - cudaGetDeviceCount(&num_devices); + // Get num_devices + ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&num_devices)); if (num_devices < 1) { ERROR("No CUDA devices found!"); return AC_FAILURE; @@ -259,9 +270,9 @@ acStore(AcMesh* host_mesh) AcResult acIntegrateStep(const int& isubstep, const AcReal& dt) { - const int3 start = (int3){STENCIL_ORDER / 2, STENCIL_ORDER / 2, STENCIL_ORDER / 2}; - const int3 end = (int3){STENCIL_ORDER / 2 + subgrid.n.x, STENCIL_ORDER / 2 + subgrid.n.y, - STENCIL_ORDER / 2 + subgrid.n.z}; + const int3 start = (int3){NGHOST, NGHOST, NGHOST}; + const int3 end = (int3){NGHOST + subgrid.n.x, NGHOST + subgrid.n.y, + NGHOST + subgrid.n.z}; for (int i = 0; i < num_devices; ++i) { rkStep(devices[i], STREAM_PRIMARY, isubstep, start, end, dt); } @@ -275,12 +286,12 @@ acIntegrateStepWithOffset(const int& isubstep, const AcReal& dt, const int3& sta /* // A skeleton function for computing integrations with arbitrary subblocks // Uncommenting the following should work with a single GPU. - const int3 start = (int3){STENCIL_ORDER / 2, STENCIL_ORDER / 2, STENCIL_ORDER / 2}; - const int3 end = (int3){STENCIL_ORDER / 2 + subgrid.n.x, STENCIL_ORDER / 2 + subgrid.n.y, - STENCIL_ORDER / 2 + subgrid.n.z}; - rkStep(devices[0], STREAM_PRIMARY, isubstep, start, end, dt); + const int3 start = (int3){NGHOST, NGHOST, NGHOST}; + const int3 end = (int3){NGHOST + subgrid.n.x, NGHOST + subgrid.n.y, + NGHOST + subgrid.n.z}; */ - return AC_FAILURE; + rkStep(devices[0], STREAM_PRIMARY, isubstep, start, end, dt); + return AC_SUCCESS; } AcResult @@ -294,7 +305,7 @@ acBoundcondStep(void) else { // Local boundary conditions for (int i = 0; i < num_devices; ++i) { - const int3 d0 = (int3){0, 0, STENCIL_ORDER / 2}; // DECOMPOSITION OFFSET HERE + const int3 d0 = (int3){0, 0, NGHOST}; // DECOMPOSITION OFFSET HERE const int3 d1 = (int3){subgrid.m.x, subgrid.m.y, d0.z + subgrid.n.z}; boundcondStep(devices[i], STREAM_PRIMARY, d0, d1); } @@ -393,7 +404,7 @@ acBoundcondStep(void) */ // Exchange halos for (int i = 0; i < num_devices; ++i) { - const int num_vertices = subgrid.m.x * subgrid.m.y * STENCIL_ORDER / 2; + const int num_vertices = subgrid.m.x * subgrid.m.y * NGHOST; // ...|ooooxxx|... -> xxx|ooooooo|... { const int3 src = (int3){0, 0, subgrid.n.z}; @@ -403,8 +414,8 @@ acBoundcondStep(void) } // ...|ooooooo|xxx <- ...|xxxoooo|... { - const int3 src = (int3){0, 0, STENCIL_ORDER / 2}; - const int3 dst = (int3){0, 0, STENCIL_ORDER / 2 + subgrid.n.z}; + const int3 src = (int3){0, 0, NGHOST}; + const int3 dst = (int3){0, 0, NGHOST + subgrid.n.z}; copyMeshDeviceToDevice(devices[(i + 1) % num_devices], STREAM_PRIMARY, src, devices[i], dst, num_vertices); } diff --git a/src/standalone/config_loader.cc b/src/standalone/config_loader.cc index 637bcd4..36e33e3 100644 --- a/src/standalone/config_loader.cc +++ b/src/standalone/config_loader.cc @@ -152,15 +152,12 @@ update_config(AcMeshInfo* config) config->real_params[AC_G_CONST]; config->real_params[AC_sq2GM_star] = AcReal(sqrt(AcReal(2) * config->real_params[AC_GM_star])); - const bool print_config = true; - if (print_config) { - printf("###############################################################" - "\n"); - printf("Config dimensions recalculated:\n"); - print(*config); - printf("###############################################################" - "\n"); - } +#if VERBOSE_PRINTING + printf("###############################################################\n"); + printf("Config dimensions recalculated:\n"); + print(*config); + printf("###############################################################\n"); +#endif } /**