From d966afe830ce79c8e769c46bb35abbf2e9aedded Mon Sep 17 00:00:00 2001 From: jpekkila Date: Fri, 21 Aug 2020 21:19:42 +0300 Subject: [PATCH] Added VERBOSE CMake option and made various prints optional to clean the output. VERBOSE is by off by default, pass cmake -DVERBOSE=ON to re-enable various non-critical warning and status prints (important warnings are still visible regardless of the flag). --- CMakeLists.txt | 4 ++++ include/astaroth.h | 1 - samples/benchmark/main.cc | 2 +- samples/mpitest/main.cc | 2 ++ src/core/device.cc | 29 +++++++++++++++++++++++------ src/core/kernels/integration.cuh | 5 ++++- src/core/kernels/kernels.cu | 10 +++++++++- src/core/node.cc | 10 +++++----- src/utils/config_loader.c | 6 ++++-- src/utils/modelsolver.c | 2 ++ src/utils/verification.c | 1 + 11 files changed, 55 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2812d1b..61e560c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,6 +42,7 @@ option(DOUBLE_PRECISION "Generates double precision code." OF option(BUILD_SAMPLES "Builds projects in samples subdirectory." ON) option(MPI_ENABLED "Enables additional functions for MPI communciation." OFF) option(MULTIGPU_ENABLED "Enables multi-GPU on a single node. Uses peer-to-peer communication instead of MPI. Affects Legacy & Node layers only." ON) +option(VERBOSE "Enables various status and warning messages" OFF) ## Options (DEPRECATED) # option(BUILD_DEBUG "Builds the program with extensive error checking" OFF) @@ -77,6 +78,9 @@ add_custom_target(dsl_headers ALL DEPENDS ${DSL_HEADERS}) if (DOUBLE_PRECISION) add_definitions(-DAC_DOUBLE_PRECISION=1) endif () +if (VERBOSE) + add_definitions(-DAC_VERBOSE=1) +endif () if (MPI_ENABLED) find_package(MPI REQUIRED COMPONENTS C) include_directories(${MPI_C_INCLUDE_DIRS}) diff --git a/include/astaroth.h b/include/astaroth.h index 9b1a820..c3320dc 100644 --- a/include/astaroth.h +++ b/include/astaroth.h @@ -25,7 +25,6 @@ // Library flags #define STENCIL_ORDER (6) #define NGHOST (STENCIL_ORDER / 2) -#define VERBOSE_PRINTING (1) // Built-in types and parameters #if AC_DOUBLE_PRECISION == 1 diff --git a/samples/benchmark/main.cc b/samples/benchmark/main.cc index 4b22cc9..f205b04 100644 --- a/samples/benchmark/main.cc +++ b/samples/benchmark/main.cc @@ -99,7 +99,7 @@ main(int argc, char** argv) info.int_params[AC_ny] = ny; info.int_params[AC_nz] = nz; acUpdateBuiltinParams(&info); - printf("Updated mesh dimensions to (%d, %d, %d)\n", nx, ny, nz); + printf("Benchmark mesh dimensions: (%d, %d, %d)\n", nx, ny, nz); } else { fprintf(stderr, "Could not parse arguments. Usage: ./benchmark .\n"); diff --git a/samples/mpitest/main.cc b/samples/mpitest/main.cc index 606ea3b..f3ad473 100644 --- a/samples/mpitest/main.cc +++ b/samples/mpitest/main.cc @@ -86,6 +86,7 @@ main(void) if (pid == 0) { printf("---Test: Scalar reductions---\n"); printf("Warning: testing only RTYPE_MAX and RTYPE_MIN\n"); + fflush(stdout); } for (size_t i = 0; i < 2; ++i) { // NOTE: 2 instead of NUM_RTYPES const VertexBufferHandle v0 = VTXBUF_UUX; @@ -104,6 +105,7 @@ main(void) if (pid == 0) { printf("---Test: Vector reductions---\n"); printf("Warning: testing only RTYPE_MAX and RTYPE_MIN\n"); + fflush(stdout); } for (size_t i = 0; i < 2; ++i) { // NOTE: 2 instead of NUM_RTYPES const VertexBufferHandle v0 = VTXBUF_UUX; diff --git a/src/core/device.cc b/src/core/device.cc index ae38bfe..2721c59 100644 --- a/src/core/device.cc +++ b/src/core/device.cc @@ -123,15 +123,20 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand device->id = id; device->local_config = device_config; +#if AC_VERBOSE acDevicePrintInfo(device); +#endif - // Check that the code was compiled for the proper GPU architecture +// Check that the code was compiled for the proper GPU architecture +#if AC_VERBOSE printf("Trying to run a dummy kernel. If this fails, make sure that your\n" - "device supports the CUDA architecture you are compiling for.\n" - "Running dummy kernel... "); + "device supports the CUDA architecture you are compiling for.\n"); +#endif + printf("Testing CUDA... "); fflush(stdout); acKernelDummy(); - printf("Success!\n"); + printf("\x1B[32m%s\x1B[0m\n", "OK!"); + fflush(stdout); // Concurrency for (int i = 0; i < NUM_STREAMS; ++i) { @@ -162,7 +167,9 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand acDeviceLoadDefaultUniforms(device); acDeviceLoadMeshInfo(device, device_config); +#if AC_VERBOSE printf("Created device %d (%p)\n", device->id, device); +#endif *device_handle = device; // Autoptimize @@ -175,7 +182,9 @@ AcResult acDeviceDestroy(Device device) { cudaSetDevice(device->id); +#if AC_VERBOSE printf("Destroying device %d (%p)\n", device->id, device); +#endif acDeviceSynchronizeStream(device, STREAM_ALL); // Memory @@ -761,8 +770,10 @@ static AcResult acDeviceDistributeMeshMPI(const AcMesh src, const uint3_64 decomposition, AcMesh* dst) { MPI_Barrier(MPI_COMM_WORLD); +#if AC_VERBOSE printf("Distributing mesh...\n"); fflush(stdout); +#endif MPI_Datatype datatype = MPI_FLOAT; if (sizeof(AcReal) == 8) @@ -837,8 +848,10 @@ static AcResult acDeviceGatherMeshMPI(const AcMesh src, const uint3_64 decomposition, AcMesh* dst) { MPI_Barrier(MPI_COMM_WORLD); +#if AC_VERBOSE printf("Gathering mesh...\n"); fflush(stdout); +#endif MPI_Datatype datatype = MPI_FLOAT; if (sizeof(AcReal) == 8) @@ -1215,15 +1228,19 @@ acGridInit(const AcMeshInfo info) char processor_name[MPI_MAX_PROCESSOR_NAME]; int name_len; MPI_Get_processor_name(processor_name, &name_len); - printf("Processor %s. Process %d of %d.\n", processor_name, pid, nprocs); // Decompose AcMeshInfo submesh_info = info; const uint3_64 decomposition = decompose(nprocs); const int3 pid3d = getPid3D(pid, decomposition); + MPI_Barrier(MPI_COMM_WORLD); + printf("Processor %s. Process %d of %d: (%d, %d, %d)\n", processor_name, pid, nprocs, pid3d.x, + pid3d.y, pid3d.z); printf("Decomposition: %lu, %lu, %lu\n", decomposition.x, decomposition.y, decomposition.z); - printf("Process %d: (%d, %d, %d)\n", pid, pid3d.x, pid3d.y, pid3d.z); + fflush(stdout); + MPI_Barrier(MPI_COMM_WORLD); + ERRCHK_ALWAYS(info.int_params[AC_nx] % decomposition.x == 0); ERRCHK_ALWAYS(info.int_params[AC_ny] % decomposition.y == 0); ERRCHK_ALWAYS(info.int_params[AC_nz] % decomposition.z == 0); diff --git a/src/core/kernels/integration.cuh b/src/core/kernels/integration.cuh index 40c6b15..9a264c9 100644 --- a/src/core/kernels/integration.cuh +++ b/src/core/kernels/integration.cuh @@ -130,6 +130,7 @@ static dim3 rk3_tpb(32, 1, 4); AcResult acKernelAutoOptimizeIntegration(const int3 start, const int3 end, VertexBufferArray vba) { + printf("Autotuning... "); // RK3 dim3 best_dims(0, 0, 0); float best_time = INFINITY; @@ -192,7 +193,9 @@ acKernelAutoOptimizeIntegration(const int3 start, const int3 end, VertexBufferAr } } } -#if VERBOSE_PRINTING + printf("\x1B[32m%s\x1B[0m\n", "OK!"); + fflush(stdout); +#if AC_VERBOSE printf("Auto-optimization done. The best threadblock dimensions for rkStep: (%d, %d, %d) %f " "ms\n", best_dims.x, best_dims.y, best_dims.z, double(best_time) / num_iterations); diff --git a/src/core/kernels/kernels.cu b/src/core/kernels/kernels.cu index ccbd500..e8ac357 100644 --- a/src/core/kernels/kernels.cu +++ b/src/core/kernels/kernels.cu @@ -105,13 +105,15 @@ acDeviceLoadScalarUniform(const Device device, const Stream stream, const AcReal { cudaSetDevice(device->id); if (param < 0 || param >= NUM_REAL_PARAMS) { - fprintf(stderr, "WARNING: invalid AcRealParam %d. Skipping.\n", param); + fprintf(stderr, "WARNING: invalid AcRealParam %d.\n", param); return AC_FAILURE; } if (!is_valid(value)) { +#if AC_VERBOSE fprintf(stderr, "WARNING: Passed an invalid value %g to device constant %s. Skipping.\n", (double)value, realparam_names[param]); +#endif return AC_FAILURE; } @@ -132,9 +134,11 @@ acDeviceLoadVectorUniform(const Device device, const Stream stream, const AcReal } if (!is_valid(value)) { +#if AC_VERBOSE fprintf(stderr, "WARNING: Passed an invalid value (%g, %g, %g) to device constant %s. Skipping.\n", (double)value.x, (double)value.y, (double)value.z, real3param_names[param]); +#endif return AC_FAILURE; } @@ -155,8 +159,10 @@ acDeviceLoadIntUniform(const Device device, const Stream stream, const AcIntPara } if (!is_valid(value)) { +#if AC_VERBOSE fprintf(stderr, "WARNING: Passed an invalid value %d to device constant %s. Skipping.\n", value, intparam_names[param]); +#endif return AC_FAILURE; } @@ -177,10 +183,12 @@ acDeviceLoadInt3Uniform(const Device device, const Stream stream, const AcInt3Pa } if (!is_valid(value.x) || !is_valid(value.y) || !is_valid(value.z)) { +#if AC_VERBOSE fprintf(stderr, "WARNING: Passed an invalid value (%d, %d, %def) to device constant %s. " "Skipping.\n", value.x, value.y, value.z, int3param_names[param]); +#endif return AC_FAILURE; } diff --git a/src/core/node.cc b/src/core/node.cc index adf3b52..7a0ca03 100644 --- a/src/core/node.cc +++ b/src/core/node.cc @@ -155,7 +155,7 @@ gridIdx3d(const GridDims grid, const int idx) idx / (grid.m.x * grid.m.y)}; } -static void +static inline void printInt3(const int3 vec) { printf("(%d, %d, %d)", vec.x, vec.y, vec.z); @@ -240,7 +240,7 @@ acNodeCreate(const int id, const AcMeshInfo node_config, Node* node_handle) AcMeshInfo subgrid_config = node->config; subgrid_config.int_params[AC_nz] /= node->num_devices; update_builtin_params(&subgrid_config); -#if VERBOSE_PRINTING // Defined in astaroth.h +#if AC_VERBOSE printf("###############################################################\n"); printf("Config dimensions recalculated:\n"); print(subgrid_config); @@ -253,7 +253,7 @@ acNodeCreate(const int id, const AcMeshInfo node_config, Node* node_handle) ERRCHK_ALWAYS(node->subgrid.n.y >= STENCIL_ORDER); ERRCHK_ALWAYS(node->subgrid.n.z >= STENCIL_ORDER); -#if VERBOSE_PRINTING +#if AC_VERBOSE // clang-format off printf("GridDims m "); printInt3(node->grid.m); printf("\n"); printf("GridDims n "); printInt3(node->grid.n); printf("\n"); @@ -284,7 +284,7 @@ acNodeCreate(const int id, const AcMeshInfo node_config, Node* node_handle) int can_access_front, can_access_back; cudaDeviceCanAccessPeer(&can_access_front, i, front); cudaDeviceCanAccessPeer(&can_access_back, i, back); -#if VERBOSE_PRINTING +#if AC_VERBOSE printf( "Trying to enable peer access from %d to %d (can access: %d) and %d (can access: %d)\n", i, front, can_access_front, back, can_access_back); @@ -318,7 +318,7 @@ acNodeDestroy(Node node) int can_access_front, can_access_back; cudaDeviceCanAccessPeer(&can_access_front, i, front); cudaDeviceCanAccessPeer(&can_access_back, i, back); -#if VERBOSE_PRINTING +#if AC_VERBOSE printf("Trying to disable peer access from %d to %d (can access: %d) and %d (can access: " "%d)\n", i, front, can_access_front, back, can_access_back); diff --git a/src/utils/config_loader.c b/src/utils/config_loader.c index 50ed84c..5799157 100644 --- a/src/utils/config_loader.c +++ b/src/utils/config_loader.c @@ -52,7 +52,7 @@ parse_config(const char* path, AcMeshInfo* config) FILE* fp; fp = fopen(path, "r"); // For knowing which .conf file will be used - printf("Config file path: \n %s \n ", path); + printf("Config file path: %s\n", path); ERRCHK_ALWAYS(fp != NULL); const size_t BUF_SIZE = 128; @@ -90,7 +90,7 @@ acLoadConfig(const char* config_path, AcMeshInfo* config) parse_config(config_path, config); acUpdateBuiltinParams(config); -#if VERBOSE_PRINTING // Defined in astaroth.h +#if AC_VERBOSE printf("###############################################################\n"); printf("Config dimensions loaded:\n"); acPrintMeshInfo(*config); @@ -101,8 +101,10 @@ acLoadConfig(const char* config_path, AcMeshInfo* config) ERRCHK_ALWAYS(sizeof(*config) % sizeof(uint32_t) == 0); for (size_t i = 0; i < sizeof(*config) / sizeof(uint32_t); ++i) { if (((uint32_t*)config)[i] == (uint32_t)0xFFFFFFFF) { +#if AC_VERBOSE fprintf(stderr, "Some config values may be uninitialized. " "See that all are defined in astaroth.conf\n"); +#endif retval = AC_FAILURE; } } diff --git a/src/utils/modelsolver.c b/src/utils/modelsolver.c index 3937e5e..86cff90 100644 --- a/src/utils/modelsolver.c +++ b/src/utils/modelsolver.c @@ -949,6 +949,7 @@ solve_beta_step(const AcMesh in, const int step_number, const Scalar dt, const i static void checkConfiguration(const AcMeshInfo info) { +#if AC_VERBOSE for (int i = 0; i < NUM_REAL_PARAMS; ++i) { if (!is_valid(info.real_params[i])) { fprintf(stderr, "WARNING: Passed an invalid value %g to model solver (%s). Skipping.\n", @@ -973,6 +974,7 @@ checkConfiguration(const AcMeshInfo info) (double)info.real3_params[i].z, realparam_names[i]); } } +#endif ERRCHK_ALWAYS(is_valid(info.real_params[AC_inv_dsx])); ERRCHK_ALWAYS(is_valid(info.real_params[AC_inv_dsy])); diff --git a/src/utils/verification.c b/src/utils/verification.c index 5a5cad1..e57498c 100644 --- a/src/utils/verification.c +++ b/src/utils/verification.c @@ -149,6 +149,7 @@ AcResult acVerifyMesh(const char* label, const AcMesh model, const AcMesh candidate) { printf("---Test: %s---\n", label); + fflush(stdout); printf("Errors at the point of the maximum absolute error:\n"); int errors_found = 0;