From cda17c9b085b3198b42b9edd6f184ad5d24ec912 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Wed, 26 Jun 2019 18:50:15 +0300 Subject: [PATCH] VERBOSE_PRINTING flag is now globally used in the whole program and should be used to suppress development/debugging-related printing. Also added comments to the new interface function acCheckDeviceAvailability and made it free from side effects. --- include/astaroth.h | 8 ++++++-- src/core/astaroth.cu | 26 ++++++++++++-------------- src/standalone/config_loader.cc | 15 +++++---------- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/include/astaroth.h b/include/astaroth.h index 1220ef0..483f9ba 100644 --- a/include/astaroth.h +++ b/include/astaroth.h @@ -49,6 +49,7 @@ extern "C" { #define AUTO_OPTIMIZE (0) // DEPRECATED TODO remove #define BOUNDCONDS_OPTIMIZE (0) #define GENERATE_BENCHMARK_DATA (0) +#define VERBOSE_PRINTING (1) // Device info #define REGISTERS_PER_THREAD (255) @@ -347,13 +348,16 @@ typedef struct { * Astaroth interface * ============================================================================= */ + +/** Checks whether there are any CUDA devices available. Returns AC_SUCCESS if there is 1 or more, + AC_FAILURE otherwise. */ +AcResult acCheckDeviceAvailability(void); + /** Starting point of all GPU computation. Handles the allocation and initialization of *all memory needed on all GPUs in the node*. In other words, setups everything GPU-side so that calling any other GPU interface function afterwards does not result in illegal memory accesses. */ AcResult acInit(const AcMeshInfo& mesh_info); -AcResult acCheckDeviceAvail(); -AcResult acInitialize(const AcMeshInfo& mesh_info); /** Splits the host_mesh and distributes it among the GPUs in the node */ AcResult acLoad(const AcMesh& host_mesh); diff --git a/src/core/astaroth.cu b/src/core/astaroth.cu index dd6e410..74dd55a 100644 --- a/src/core/astaroth.cu +++ b/src/core/astaroth.cu @@ -73,18 +73,21 @@ printInt3(const int3 vec) } AcResult -acInit(const AcMeshInfo& config) +acCheckDeviceAvailability(void) { - AcResult res=acGetDevice(); - res=acInitialize(config); - return AC_SUCCESS; + int device_count; // Separate from num_devices to avoid side effects + ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&device_count)); + if (device_count > 0) + return AC_SUCCESS; + else + return AC_FAILURE; } AcResult -acCheckDeviceAvail() +acInit(const AcMeshInfo& config) { - // Check devices - cudaGetDeviceCount(&num_devices); + // Get num_devices + ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&num_devices)); if (num_devices < 1) { ERROR("No CUDA devices found!"); return AC_FAILURE; @@ -97,11 +100,6 @@ acCheckDeviceAvail() WARNING("MULTIGPU_ENABLED was false. Using only one device"); num_devices = 1; // Use only one device if multi-GPU is not enabled } - return AC_SUCCESS; -} -AcResult -acInitialize(const AcMeshInfo& config) -{ // Check that num_devices is divisible with AC_nz. This makes decomposing the // problem domain to multiple GPUs much easier since we do not have to worry // about remainders @@ -121,14 +119,14 @@ acInitialize(const AcMeshInfo& config) ERRCHK_ALWAYS(subgrid.n.x >= STENCIL_ORDER); ERRCHK_ALWAYS(subgrid.n.y >= STENCIL_ORDER); ERRCHK_ALWAYS(subgrid.n.z >= STENCIL_ORDER); -#ifndef PENCIL_ASTAROTH + // clang-format off printf("Grid m "); printInt3(grid.m); printf("\n"); printf("Grid n "); printInt3(grid.n); printf("\n"); printf("Subrid m "); printInt3(subgrid.m); printf("\n"); printf("Subrid n "); printInt3(subgrid.n); printf("\n"); // clang-format on -#endif + // Initialize the devices for (int i = 0; i < num_devices; ++i) { createDevice(i, subgrid_config, &devices[i]); diff --git a/src/standalone/config_loader.cc b/src/standalone/config_loader.cc index a3a9abe..36e33e3 100644 --- a/src/standalone/config_loader.cc +++ b/src/standalone/config_loader.cc @@ -152,16 +152,11 @@ update_config(AcMeshInfo* config) config->real_params[AC_G_CONST]; config->real_params[AC_sq2GM_star] = AcReal(sqrt(AcReal(2) * config->real_params[AC_GM_star])); - const bool print_config = true; -#ifdef VERBOSE_PRINT - if (print_config) { - printf("###############################################################" - "\n"); - printf("Config dimensions recalculated:\n"); - print(*config); - printf("###############################################################" - "\n"); - } +#if VERBOSE_PRINTING + printf("###############################################################\n"); + printf("Config dimensions recalculated:\n"); + print(*config); + printf("###############################################################\n"); #endif }