Merge branch 'master' into forcing
Now I need to test what works... Conflicts: acc/mhd_solver/stencil_process.sps
This commit is contained in:
@@ -48,8 +48,8 @@ message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER})
|
|||||||
message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER_ID})
|
message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER_ID})
|
||||||
|
|
||||||
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.4)
|
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.0) # Because of GCC bug 48891
|
||||||
message(FATAL_ERROR "GCC version 5.4 or higher required")
|
message(FATAL_ERROR "GCC version 6.0 or higher required")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@@ -49,6 +49,7 @@ extern "C" {
|
|||||||
#define AUTO_OPTIMIZE (0) // DEPRECATED TODO remove
|
#define AUTO_OPTIMIZE (0) // DEPRECATED TODO remove
|
||||||
#define BOUNDCONDS_OPTIMIZE (0)
|
#define BOUNDCONDS_OPTIMIZE (0)
|
||||||
#define GENERATE_BENCHMARK_DATA (0)
|
#define GENERATE_BENCHMARK_DATA (0)
|
||||||
|
#define VERBOSE_PRINTING (1)
|
||||||
|
|
||||||
// Device info
|
// Device info
|
||||||
#define REGISTERS_PER_THREAD (255)
|
#define REGISTERS_PER_THREAD (255)
|
||||||
@@ -57,27 +58,26 @@ extern "C" {
|
|||||||
#define MAX_TB_DIM (MAX_THREADS_PER_BLOCK)
|
#define MAX_TB_DIM (MAX_THREADS_PER_BLOCK)
|
||||||
#define NUM_ITERATIONS (10)
|
#define NUM_ITERATIONS (10)
|
||||||
#define WARP_SIZE (32)
|
#define WARP_SIZE (32)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* =============================================================================
|
* =============================================================================
|
||||||
* Compile-time constants used during simulation (user definable)
|
* Compile-time constants used during simulation (user definable)
|
||||||
* =============================================================================
|
* =============================================================================
|
||||||
*/
|
*/
|
||||||
#define STENCIL_ORDER (6)
|
// USER_PROVIDED_DEFINES must be defined in user.h if the user wants to override the following
|
||||||
|
// logical switches
|
||||||
|
#include "user.h"
|
||||||
|
|
||||||
///////////// PAD TEST
|
#ifndef USER_PROVIDED_DEFINES
|
||||||
// NOTE: works only with nx is divisible by 32
|
#define STENCIL_ORDER (6)
|
||||||
//#define PAD_LEAD (32 - STENCIL_ORDER/2)
|
#define NGHOST (STENCIL_ORDER/2)
|
||||||
//#define PAD_SIZE (32 - STENCIL_ORDER)
|
#define LHYDRO (1)
|
||||||
///////////// PAD TEST
|
#define LDENSITY (1)
|
||||||
|
#define LFORCING (1)
|
||||||
// L-prefix inherited from the old Astaroth, no idea what it means
|
#define LINDUCTION (1)
|
||||||
// MV: L means a Logical switch variale, something having true of false value.
|
#define LENTROPY (1)
|
||||||
// Note: forcing is disabled currently in the files generated by acc (compiler of our DSL)
|
#define LTEMPERATURE (0)
|
||||||
#define LFORCING (1)
|
#define LMAGNETIC LINDUCTION
|
||||||
#define LINDUCTION (1)
|
#endif
|
||||||
#define LENTROPY (1)
|
|
||||||
#define LTEMPERATURE (0)
|
|
||||||
|
|
||||||
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
|
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
|
||||||
|
|
||||||
@@ -190,15 +190,31 @@ extern "C" {
|
|||||||
* =============================================================================
|
* =============================================================================
|
||||||
*/
|
*/
|
||||||
// clang-format off
|
// clang-format off
|
||||||
#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)\
|
#ifdef LHYDRO
|
||||||
FUNC(VTXBUF_LNRHO), \
|
#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC) \
|
||||||
FUNC(VTXBUF_UUX), \
|
FUNC(VTXBUF_UUX), \
|
||||||
FUNC(VTXBUF_UUY), \
|
FUNC(VTXBUF_UUY), \
|
||||||
FUNC(VTXBUF_UUZ), \
|
FUNC(VTXBUF_UUZ),
|
||||||
// FUNC(VTXBUF_DYE),
|
#else
|
||||||
|
#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)
|
||||||
|
#endif
|
||||||
|
|
||||||
#if LINDUCTION
|
#ifdef LDENSITY
|
||||||
#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)\
|
#define AC_FOR_DENSITY_VTXBUF_HANDLES(FUNC) \
|
||||||
|
FUNC(VTXBUF_LNRHO),
|
||||||
|
#else
|
||||||
|
#define AC_FOR_DENSITY_VTXBUF_HANDLES(FUNC)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef LENTROPY
|
||||||
|
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC) \
|
||||||
|
FUNC(VTXBUF_ENTROPY),
|
||||||
|
#else
|
||||||
|
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef LMAGNETIC
|
||||||
|
#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC) \
|
||||||
FUNC(VTXBUF_AX), \
|
FUNC(VTXBUF_AX), \
|
||||||
FUNC(VTXBUF_AY), \
|
FUNC(VTXBUF_AY), \
|
||||||
FUNC(VTXBUF_AZ),
|
FUNC(VTXBUF_AZ),
|
||||||
@@ -206,25 +222,22 @@ extern "C" {
|
|||||||
#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)
|
#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if LENTROPY
|
#define AC_FOR_VTXBUF_HANDLES(FUNC) AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC) \
|
||||||
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)\
|
AC_FOR_DENSITY_VTXBUF_HANDLES(FUNC) \
|
||||||
FUNC(VTXBUF_ENTROPY),
|
AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC) \
|
||||||
#else
|
AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC) \
|
||||||
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)
|
|
||||||
|
//MR: Temperature must not have an additional variable slot, but should sit on the
|
||||||
|
// same as entropy.
|
||||||
|
#ifndef USER_PROVIDED
|
||||||
|
#if LTEMPERATURE
|
||||||
|
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)\
|
||||||
|
FUNC(VTXBUF_TEMPERATURE),
|
||||||
|
#else
|
||||||
|
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if LTEMPERATURE
|
|
||||||
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)\
|
|
||||||
FUNC(VTXBUF_TEMPERATURE),
|
|
||||||
#else
|
|
||||||
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define AC_FOR_VTXBUF_HANDLES(FUNC)\
|
|
||||||
AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)\
|
|
||||||
AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)\
|
|
||||||
AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)\
|
|
||||||
AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)
|
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -280,6 +293,7 @@ typedef enum { RTYPE_MAX, RTYPE_MIN, RTYPE_RMS, RTYPE_RMS_EXP, NUM_REDUCTION_TYP
|
|||||||
typedef enum { AC_FOR_INT_PARAM_TYPES(AC_GEN_ID), NUM_INT_PARAM_TYPES } AcIntParam;
|
typedef enum { AC_FOR_INT_PARAM_TYPES(AC_GEN_ID), NUM_INT_PARAM_TYPES } AcIntParam;
|
||||||
|
|
||||||
typedef enum { AC_FOR_REAL_PARAM_TYPES(AC_GEN_ID), NUM_REAL_PARAM_TYPES } AcRealParam;
|
typedef enum { AC_FOR_REAL_PARAM_TYPES(AC_GEN_ID), NUM_REAL_PARAM_TYPES } AcRealParam;
|
||||||
|
//typedef enum { AC_FOR_VEC_PARAM_TYPES(AC_GEN_ID), NUM_VEC_PARAM_TYPES } AcVecParam;
|
||||||
|
|
||||||
extern const char* intparam_names[]; // Defined in astaroth.cu
|
extern const char* intparam_names[]; // Defined in astaroth.cu
|
||||||
extern const char* realparam_names[]; // Defined in astaroth.cu
|
extern const char* realparam_names[]; // Defined in astaroth.cu
|
||||||
@@ -287,6 +301,7 @@ extern const char* realparam_names[]; // Defined in astaroth.cu
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
int int_params[NUM_INT_PARAM_TYPES];
|
int int_params[NUM_INT_PARAM_TYPES];
|
||||||
AcReal real_params[NUM_REAL_PARAM_TYPES];
|
AcReal real_params[NUM_REAL_PARAM_TYPES];
|
||||||
|
//AcReal* vec_params[NUM_VEC_PARAM_TYPES];
|
||||||
} AcMeshInfo;
|
} AcMeshInfo;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -335,6 +350,11 @@ typedef struct {
|
|||||||
* Astaroth interface
|
* Astaroth interface
|
||||||
* =============================================================================
|
* =============================================================================
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/** Checks whether there are any CUDA devices available. Returns AC_SUCCESS if there is 1 or more,
|
||||||
|
AC_FAILURE otherwise. */
|
||||||
|
AcResult acCheckDeviceAvailability(void);
|
||||||
|
|
||||||
/** Starting point of all GPU computation. Handles the allocation and
|
/** Starting point of all GPU computation. Handles the allocation and
|
||||||
initialization of *all memory needed on all GPUs in the node*. In other words,
|
initialization of *all memory needed on all GPUs in the node*. In other words,
|
||||||
setups everything GPU-side so that calling any other GPU interface function
|
setups everything GPU-side so that calling any other GPU interface function
|
||||||
|
16
include/user.h
Normal file
16
include/user.h
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#ifdef PENCIL_ASTAROTH
|
||||||
|
|
||||||
|
#include "../cparam.inc_c.h"
|
||||||
|
#define NGHOST nghost
|
||||||
|
#define STENCIL_ORDER (2*nghost)
|
||||||
|
#include "PC_moduleflags.h"
|
||||||
|
#define CONFIG_PATH
|
||||||
|
#define AC_MULTIGPU_ENABLED (false)
|
||||||
|
#ifdef DOUBLE_PRECISION
|
||||||
|
#define AC_DOUBLE_PRECISION 1
|
||||||
|
#else
|
||||||
|
#define AC_DOUBLE_PRECISION 0
|
||||||
|
#endif
|
||||||
|
#define USER_PROVIDED_DEFINES
|
||||||
|
#endif
|
||||||
|
|
@@ -4,13 +4,7 @@
|
|||||||
|
|
||||||
#----------------------Find CUDA-----------------------------------------------#
|
#----------------------Find CUDA-----------------------------------------------#
|
||||||
|
|
||||||
find_package(CUDA)
|
find_package(CUDA 9 REQUIRED)
|
||||||
if (NOT CUDA_FOUND)
|
|
||||||
# find_package(CUDA REQUIRED) gives a confusing error message if it fails,
|
|
||||||
# therefore we print the reason here explicitly
|
|
||||||
message(FATAL_ERROR "CUDA not found")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
|
|
||||||
#----------------------CUDA settings-------------------------------------------#
|
#----------------------CUDA settings-------------------------------------------#
|
||||||
|
|
||||||
|
@@ -36,20 +36,9 @@ const char* realparam_names[] = {AC_FOR_REAL_PARAM_TYPES(AC_GEN_STR)};
|
|||||||
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
|
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
|
||||||
|
|
||||||
static const int MAX_NUM_DEVICES = 32;
|
static const int MAX_NUM_DEVICES = 32;
|
||||||
static int num_devices = 1;
|
static int num_devices = 0;
|
||||||
static Device devices[MAX_NUM_DEVICES] = {};
|
static Device devices[MAX_NUM_DEVICES] = {};
|
||||||
|
|
||||||
static Grid
|
|
||||||
createGrid(const AcMeshInfo& config)
|
|
||||||
{
|
|
||||||
Grid grid;
|
|
||||||
|
|
||||||
grid.m = (int3){config.int_params[AC_mx], config.int_params[AC_my], config.int_params[AC_mz]};
|
|
||||||
grid.n = (int3){config.int_params[AC_nx], config.int_params[AC_ny], config.int_params[AC_nz]};
|
|
||||||
|
|
||||||
return grid;
|
|
||||||
}
|
|
||||||
|
|
||||||
static Grid grid; // A grid consists of num_devices subgrids
|
static Grid grid; // A grid consists of num_devices subgrids
|
||||||
static Grid subgrid;
|
static Grid subgrid;
|
||||||
|
|
||||||
@@ -72,11 +61,33 @@ printInt3(const int3 vec)
|
|||||||
printf("(%d, %d, %d)", vec.x, vec.y, vec.z);
|
printf("(%d, %d, %d)", vec.x, vec.y, vec.z);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Grid
|
||||||
|
createGrid(const AcMeshInfo& config)
|
||||||
|
{
|
||||||
|
Grid grid;
|
||||||
|
|
||||||
|
grid.m = (int3){config.int_params[AC_mx], config.int_params[AC_my], config.int_params[AC_mz]};
|
||||||
|
grid.n = (int3){config.int_params[AC_nx], config.int_params[AC_ny], config.int_params[AC_nz]};
|
||||||
|
|
||||||
|
return grid;
|
||||||
|
}
|
||||||
|
|
||||||
|
AcResult
|
||||||
|
acCheckDeviceAvailability(void)
|
||||||
|
{
|
||||||
|
int device_count; // Separate from num_devices to avoid side effects
|
||||||
|
ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&device_count));
|
||||||
|
if (device_count > 0)
|
||||||
|
return AC_SUCCESS;
|
||||||
|
else
|
||||||
|
return AC_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acInit(const AcMeshInfo& config)
|
acInit(const AcMeshInfo& config)
|
||||||
{
|
{
|
||||||
// Check devices
|
// Get num_devices
|
||||||
cudaGetDeviceCount(&num_devices);
|
ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&num_devices));
|
||||||
if (num_devices < 1) {
|
if (num_devices < 1) {
|
||||||
ERROR("No CUDA devices found!");
|
ERROR("No CUDA devices found!");
|
||||||
return AC_FAILURE;
|
return AC_FAILURE;
|
||||||
@@ -259,9 +270,9 @@ acStore(AcMesh* host_mesh)
|
|||||||
AcResult
|
AcResult
|
||||||
acIntegrateStep(const int& isubstep, const AcReal& dt)
|
acIntegrateStep(const int& isubstep, const AcReal& dt)
|
||||||
{
|
{
|
||||||
const int3 start = (int3){STENCIL_ORDER / 2, STENCIL_ORDER / 2, STENCIL_ORDER / 2};
|
const int3 start = (int3){NGHOST, NGHOST, NGHOST};
|
||||||
const int3 end = (int3){STENCIL_ORDER / 2 + subgrid.n.x, STENCIL_ORDER / 2 + subgrid.n.y,
|
const int3 end = (int3){NGHOST + subgrid.n.x, NGHOST + subgrid.n.y,
|
||||||
STENCIL_ORDER / 2 + subgrid.n.z};
|
NGHOST + subgrid.n.z};
|
||||||
for (int i = 0; i < num_devices; ++i) {
|
for (int i = 0; i < num_devices; ++i) {
|
||||||
rkStep(devices[i], STREAM_PRIMARY, isubstep, start, end, dt);
|
rkStep(devices[i], STREAM_PRIMARY, isubstep, start, end, dt);
|
||||||
}
|
}
|
||||||
@@ -275,12 +286,12 @@ acIntegrateStepWithOffset(const int& isubstep, const AcReal& dt, const int3& sta
|
|||||||
/*
|
/*
|
||||||
// A skeleton function for computing integrations with arbitrary subblocks
|
// A skeleton function for computing integrations with arbitrary subblocks
|
||||||
// Uncommenting the following should work with a single GPU.
|
// Uncommenting the following should work with a single GPU.
|
||||||
const int3 start = (int3){STENCIL_ORDER / 2, STENCIL_ORDER / 2, STENCIL_ORDER / 2};
|
const int3 start = (int3){NGHOST, NGHOST, NGHOST};
|
||||||
const int3 end = (int3){STENCIL_ORDER / 2 + subgrid.n.x, STENCIL_ORDER / 2 + subgrid.n.y,
|
const int3 end = (int3){NGHOST + subgrid.n.x, NGHOST + subgrid.n.y,
|
||||||
STENCIL_ORDER / 2 + subgrid.n.z};
|
NGHOST + subgrid.n.z};
|
||||||
rkStep(devices[0], STREAM_PRIMARY, isubstep, start, end, dt);
|
|
||||||
*/
|
*/
|
||||||
return AC_FAILURE;
|
rkStep(devices[0], STREAM_PRIMARY, isubstep, start, end, dt);
|
||||||
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
@@ -294,7 +305,7 @@ acBoundcondStep(void)
|
|||||||
else {
|
else {
|
||||||
// Local boundary conditions
|
// Local boundary conditions
|
||||||
for (int i = 0; i < num_devices; ++i) {
|
for (int i = 0; i < num_devices; ++i) {
|
||||||
const int3 d0 = (int3){0, 0, STENCIL_ORDER / 2}; // DECOMPOSITION OFFSET HERE
|
const int3 d0 = (int3){0, 0, NGHOST}; // DECOMPOSITION OFFSET HERE
|
||||||
const int3 d1 = (int3){subgrid.m.x, subgrid.m.y, d0.z + subgrid.n.z};
|
const int3 d1 = (int3){subgrid.m.x, subgrid.m.y, d0.z + subgrid.n.z};
|
||||||
boundcondStep(devices[i], STREAM_PRIMARY, d0, d1);
|
boundcondStep(devices[i], STREAM_PRIMARY, d0, d1);
|
||||||
}
|
}
|
||||||
@@ -393,7 +404,7 @@ acBoundcondStep(void)
|
|||||||
*/
|
*/
|
||||||
// Exchange halos
|
// Exchange halos
|
||||||
for (int i = 0; i < num_devices; ++i) {
|
for (int i = 0; i < num_devices; ++i) {
|
||||||
const int num_vertices = subgrid.m.x * subgrid.m.y * STENCIL_ORDER / 2;
|
const int num_vertices = subgrid.m.x * subgrid.m.y * NGHOST;
|
||||||
// ...|ooooxxx|... -> xxx|ooooooo|...
|
// ...|ooooxxx|... -> xxx|ooooooo|...
|
||||||
{
|
{
|
||||||
const int3 src = (int3){0, 0, subgrid.n.z};
|
const int3 src = (int3){0, 0, subgrid.n.z};
|
||||||
@@ -403,8 +414,8 @@ acBoundcondStep(void)
|
|||||||
}
|
}
|
||||||
// ...|ooooooo|xxx <- ...|xxxoooo|...
|
// ...|ooooooo|xxx <- ...|xxxoooo|...
|
||||||
{
|
{
|
||||||
const int3 src = (int3){0, 0, STENCIL_ORDER / 2};
|
const int3 src = (int3){0, 0, NGHOST};
|
||||||
const int3 dst = (int3){0, 0, STENCIL_ORDER / 2 + subgrid.n.z};
|
const int3 dst = (int3){0, 0, NGHOST + subgrid.n.z};
|
||||||
copyMeshDeviceToDevice(devices[(i + 1) % num_devices], STREAM_PRIMARY, src,
|
copyMeshDeviceToDevice(devices[(i + 1) % num_devices], STREAM_PRIMARY, src,
|
||||||
devices[i], dst, num_vertices);
|
devices[i], dst, num_vertices);
|
||||||
}
|
}
|
||||||
|
@@ -152,15 +152,12 @@ update_config(AcMeshInfo* config)
|
|||||||
config->real_params[AC_G_CONST];
|
config->real_params[AC_G_CONST];
|
||||||
config->real_params[AC_sq2GM_star] = AcReal(sqrt(AcReal(2) * config->real_params[AC_GM_star]));
|
config->real_params[AC_sq2GM_star] = AcReal(sqrt(AcReal(2) * config->real_params[AC_GM_star]));
|
||||||
|
|
||||||
const bool print_config = true;
|
#if VERBOSE_PRINTING
|
||||||
if (print_config) {
|
printf("###############################################################\n");
|
||||||
printf("###############################################################"
|
printf("Config dimensions recalculated:\n");
|
||||||
"\n");
|
print(*config);
|
||||||
printf("Config dimensions recalculated:\n");
|
printf("###############################################################\n");
|
||||||
print(*config);
|
#endif
|
||||||
printf("###############################################################"
|
|
||||||
"\n");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Reference in New Issue
Block a user