Added VERBOSE CMake option and made various prints optional to clean the output. VERBOSE is by off by default, pass cmake -DVERBOSE=ON to re-enable various non-critical warning and status prints (important warnings are still visible regardless of the flag).
This commit is contained in:
@@ -42,6 +42,7 @@ option(DOUBLE_PRECISION "Generates double precision code." OF
|
||||
option(BUILD_SAMPLES "Builds projects in samples subdirectory." ON)
|
||||
option(MPI_ENABLED "Enables additional functions for MPI communciation." OFF)
|
||||
option(MULTIGPU_ENABLED "Enables multi-GPU on a single node. Uses peer-to-peer communication instead of MPI. Affects Legacy & Node layers only." ON)
|
||||
option(VERBOSE "Enables various status and warning messages" OFF)
|
||||
|
||||
## Options (DEPRECATED)
|
||||
# option(BUILD_DEBUG "Builds the program with extensive error checking" OFF)
|
||||
@@ -77,6 +78,9 @@ add_custom_target(dsl_headers ALL DEPENDS ${DSL_HEADERS})
|
||||
if (DOUBLE_PRECISION)
|
||||
add_definitions(-DAC_DOUBLE_PRECISION=1)
|
||||
endif ()
|
||||
if (VERBOSE)
|
||||
add_definitions(-DAC_VERBOSE=1)
|
||||
endif ()
|
||||
if (MPI_ENABLED)
|
||||
find_package(MPI REQUIRED COMPONENTS C)
|
||||
include_directories(${MPI_C_INCLUDE_DIRS})
|
||||
|
@@ -25,7 +25,6 @@
|
||||
// Library flags
|
||||
#define STENCIL_ORDER (6)
|
||||
#define NGHOST (STENCIL_ORDER / 2)
|
||||
#define VERBOSE_PRINTING (1)
|
||||
|
||||
// Built-in types and parameters
|
||||
#if AC_DOUBLE_PRECISION == 1
|
||||
|
@@ -99,7 +99,7 @@ main(int argc, char** argv)
|
||||
info.int_params[AC_ny] = ny;
|
||||
info.int_params[AC_nz] = nz;
|
||||
acUpdateBuiltinParams(&info);
|
||||
printf("Updated mesh dimensions to (%d, %d, %d)\n", nx, ny, nz);
|
||||
printf("Benchmark mesh dimensions: (%d, %d, %d)\n", nx, ny, nz);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "Could not parse arguments. Usage: ./benchmark <nx> <ny> <nz>.\n");
|
||||
|
@@ -86,6 +86,7 @@ main(void)
|
||||
if (pid == 0) {
|
||||
printf("---Test: Scalar reductions---\n");
|
||||
printf("Warning: testing only RTYPE_MAX and RTYPE_MIN\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
for (size_t i = 0; i < 2; ++i) { // NOTE: 2 instead of NUM_RTYPES
|
||||
const VertexBufferHandle v0 = VTXBUF_UUX;
|
||||
@@ -104,6 +105,7 @@ main(void)
|
||||
if (pid == 0) {
|
||||
printf("---Test: Vector reductions---\n");
|
||||
printf("Warning: testing only RTYPE_MAX and RTYPE_MIN\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
for (size_t i = 0; i < 2; ++i) { // NOTE: 2 instead of NUM_RTYPES
|
||||
const VertexBufferHandle v0 = VTXBUF_UUX;
|
||||
|
@@ -123,15 +123,20 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand
|
||||
|
||||
device->id = id;
|
||||
device->local_config = device_config;
|
||||
#if AC_VERBOSE
|
||||
acDevicePrintInfo(device);
|
||||
#endif
|
||||
|
||||
// Check that the code was compiled for the proper GPU architecture
|
||||
// Check that the code was compiled for the proper GPU architecture
|
||||
#if AC_VERBOSE
|
||||
printf("Trying to run a dummy kernel. If this fails, make sure that your\n"
|
||||
"device supports the CUDA architecture you are compiling for.\n"
|
||||
"Running dummy kernel... ");
|
||||
"device supports the CUDA architecture you are compiling for.\n");
|
||||
#endif
|
||||
printf("Testing CUDA... ");
|
||||
fflush(stdout);
|
||||
acKernelDummy();
|
||||
printf("Success!\n");
|
||||
printf("\x1B[32m%s\x1B[0m\n", "OK!");
|
||||
fflush(stdout);
|
||||
|
||||
// Concurrency
|
||||
for (int i = 0; i < NUM_STREAMS; ++i) {
|
||||
@@ -162,7 +167,9 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand
|
||||
acDeviceLoadDefaultUniforms(device);
|
||||
acDeviceLoadMeshInfo(device, device_config);
|
||||
|
||||
#if AC_VERBOSE
|
||||
printf("Created device %d (%p)\n", device->id, device);
|
||||
#endif
|
||||
*device_handle = device;
|
||||
|
||||
// Autoptimize
|
||||
@@ -175,7 +182,9 @@ AcResult
|
||||
acDeviceDestroy(Device device)
|
||||
{
|
||||
cudaSetDevice(device->id);
|
||||
#if AC_VERBOSE
|
||||
printf("Destroying device %d (%p)\n", device->id, device);
|
||||
#endif
|
||||
acDeviceSynchronizeStream(device, STREAM_ALL);
|
||||
|
||||
// Memory
|
||||
@@ -761,8 +770,10 @@ static AcResult
|
||||
acDeviceDistributeMeshMPI(const AcMesh src, const uint3_64 decomposition, AcMesh* dst)
|
||||
{
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
#if AC_VERBOSE
|
||||
printf("Distributing mesh...\n");
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
MPI_Datatype datatype = MPI_FLOAT;
|
||||
if (sizeof(AcReal) == 8)
|
||||
@@ -837,8 +848,10 @@ static AcResult
|
||||
acDeviceGatherMeshMPI(const AcMesh src, const uint3_64 decomposition, AcMesh* dst)
|
||||
{
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
#if AC_VERBOSE
|
||||
printf("Gathering mesh...\n");
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
MPI_Datatype datatype = MPI_FLOAT;
|
||||
if (sizeof(AcReal) == 8)
|
||||
@@ -1215,15 +1228,19 @@ acGridInit(const AcMeshInfo info)
|
||||
char processor_name[MPI_MAX_PROCESSOR_NAME];
|
||||
int name_len;
|
||||
MPI_Get_processor_name(processor_name, &name_len);
|
||||
printf("Processor %s. Process %d of %d.\n", processor_name, pid, nprocs);
|
||||
|
||||
// Decompose
|
||||
AcMeshInfo submesh_info = info;
|
||||
const uint3_64 decomposition = decompose(nprocs);
|
||||
const int3 pid3d = getPid3D(pid, decomposition);
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
printf("Processor %s. Process %d of %d: (%d, %d, %d)\n", processor_name, pid, nprocs, pid3d.x,
|
||||
pid3d.y, pid3d.z);
|
||||
printf("Decomposition: %lu, %lu, %lu\n", decomposition.x, decomposition.y, decomposition.z);
|
||||
printf("Process %d: (%d, %d, %d)\n", pid, pid3d.x, pid3d.y, pid3d.z);
|
||||
fflush(stdout);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
ERRCHK_ALWAYS(info.int_params[AC_nx] % decomposition.x == 0);
|
||||
ERRCHK_ALWAYS(info.int_params[AC_ny] % decomposition.y == 0);
|
||||
ERRCHK_ALWAYS(info.int_params[AC_nz] % decomposition.z == 0);
|
||||
|
@@ -130,6 +130,7 @@ static dim3 rk3_tpb(32, 1, 4);
|
||||
AcResult
|
||||
acKernelAutoOptimizeIntegration(const int3 start, const int3 end, VertexBufferArray vba)
|
||||
{
|
||||
printf("Autotuning... ");
|
||||
// RK3
|
||||
dim3 best_dims(0, 0, 0);
|
||||
float best_time = INFINITY;
|
||||
@@ -192,7 +193,9 @@ acKernelAutoOptimizeIntegration(const int3 start, const int3 end, VertexBufferAr
|
||||
}
|
||||
}
|
||||
}
|
||||
#if VERBOSE_PRINTING
|
||||
printf("\x1B[32m%s\x1B[0m\n", "OK!");
|
||||
fflush(stdout);
|
||||
#if AC_VERBOSE
|
||||
printf("Auto-optimization done. The best threadblock dimensions for rkStep: (%d, %d, %d) %f "
|
||||
"ms\n",
|
||||
best_dims.x, best_dims.y, best_dims.z, double(best_time) / num_iterations);
|
||||
|
@@ -105,13 +105,15 @@ acDeviceLoadScalarUniform(const Device device, const Stream stream, const AcReal
|
||||
{
|
||||
cudaSetDevice(device->id);
|
||||
if (param < 0 || param >= NUM_REAL_PARAMS) {
|
||||
fprintf(stderr, "WARNING: invalid AcRealParam %d. Skipping.\n", param);
|
||||
fprintf(stderr, "WARNING: invalid AcRealParam %d.\n", param);
|
||||
return AC_FAILURE;
|
||||
}
|
||||
|
||||
if (!is_valid(value)) {
|
||||
#if AC_VERBOSE
|
||||
fprintf(stderr, "WARNING: Passed an invalid value %g to device constant %s. Skipping.\n",
|
||||
(double)value, realparam_names[param]);
|
||||
#endif
|
||||
return AC_FAILURE;
|
||||
}
|
||||
|
||||
@@ -132,9 +134,11 @@ acDeviceLoadVectorUniform(const Device device, const Stream stream, const AcReal
|
||||
}
|
||||
|
||||
if (!is_valid(value)) {
|
||||
#if AC_VERBOSE
|
||||
fprintf(stderr,
|
||||
"WARNING: Passed an invalid value (%g, %g, %g) to device constant %s. Skipping.\n",
|
||||
(double)value.x, (double)value.y, (double)value.z, real3param_names[param]);
|
||||
#endif
|
||||
return AC_FAILURE;
|
||||
}
|
||||
|
||||
@@ -155,8 +159,10 @@ acDeviceLoadIntUniform(const Device device, const Stream stream, const AcIntPara
|
||||
}
|
||||
|
||||
if (!is_valid(value)) {
|
||||
#if AC_VERBOSE
|
||||
fprintf(stderr, "WARNING: Passed an invalid value %d to device constant %s. Skipping.\n",
|
||||
value, intparam_names[param]);
|
||||
#endif
|
||||
return AC_FAILURE;
|
||||
}
|
||||
|
||||
@@ -177,10 +183,12 @@ acDeviceLoadInt3Uniform(const Device device, const Stream stream, const AcInt3Pa
|
||||
}
|
||||
|
||||
if (!is_valid(value.x) || !is_valid(value.y) || !is_valid(value.z)) {
|
||||
#if AC_VERBOSE
|
||||
fprintf(stderr,
|
||||
"WARNING: Passed an invalid value (%d, %d, %def) to device constant %s. "
|
||||
"Skipping.\n",
|
||||
value.x, value.y, value.z, int3param_names[param]);
|
||||
#endif
|
||||
return AC_FAILURE;
|
||||
}
|
||||
|
||||
|
@@ -155,7 +155,7 @@ gridIdx3d(const GridDims grid, const int idx)
|
||||
idx / (grid.m.x * grid.m.y)};
|
||||
}
|
||||
|
||||
static void
|
||||
static inline void
|
||||
printInt3(const int3 vec)
|
||||
{
|
||||
printf("(%d, %d, %d)", vec.x, vec.y, vec.z);
|
||||
@@ -240,7 +240,7 @@ acNodeCreate(const int id, const AcMeshInfo node_config, Node* node_handle)
|
||||
AcMeshInfo subgrid_config = node->config;
|
||||
subgrid_config.int_params[AC_nz] /= node->num_devices;
|
||||
update_builtin_params(&subgrid_config);
|
||||
#if VERBOSE_PRINTING // Defined in astaroth.h
|
||||
#if AC_VERBOSE
|
||||
printf("###############################################################\n");
|
||||
printf("Config dimensions recalculated:\n");
|
||||
print(subgrid_config);
|
||||
@@ -253,7 +253,7 @@ acNodeCreate(const int id, const AcMeshInfo node_config, Node* node_handle)
|
||||
ERRCHK_ALWAYS(node->subgrid.n.y >= STENCIL_ORDER);
|
||||
ERRCHK_ALWAYS(node->subgrid.n.z >= STENCIL_ORDER);
|
||||
|
||||
#if VERBOSE_PRINTING
|
||||
#if AC_VERBOSE
|
||||
// clang-format off
|
||||
printf("GridDims m "); printInt3(node->grid.m); printf("\n");
|
||||
printf("GridDims n "); printInt3(node->grid.n); printf("\n");
|
||||
@@ -284,7 +284,7 @@ acNodeCreate(const int id, const AcMeshInfo node_config, Node* node_handle)
|
||||
int can_access_front, can_access_back;
|
||||
cudaDeviceCanAccessPeer(&can_access_front, i, front);
|
||||
cudaDeviceCanAccessPeer(&can_access_back, i, back);
|
||||
#if VERBOSE_PRINTING
|
||||
#if AC_VERBOSE
|
||||
printf(
|
||||
"Trying to enable peer access from %d to %d (can access: %d) and %d (can access: %d)\n",
|
||||
i, front, can_access_front, back, can_access_back);
|
||||
@@ -318,7 +318,7 @@ acNodeDestroy(Node node)
|
||||
int can_access_front, can_access_back;
|
||||
cudaDeviceCanAccessPeer(&can_access_front, i, front);
|
||||
cudaDeviceCanAccessPeer(&can_access_back, i, back);
|
||||
#if VERBOSE_PRINTING
|
||||
#if AC_VERBOSE
|
||||
printf("Trying to disable peer access from %d to %d (can access: %d) and %d (can access: "
|
||||
"%d)\n",
|
||||
i, front, can_access_front, back, can_access_back);
|
||||
|
@@ -52,7 +52,7 @@ parse_config(const char* path, AcMeshInfo* config)
|
||||
FILE* fp;
|
||||
fp = fopen(path, "r");
|
||||
// For knowing which .conf file will be used
|
||||
printf("Config file path: \n %s \n ", path);
|
||||
printf("Config file path: %s\n", path);
|
||||
ERRCHK_ALWAYS(fp != NULL);
|
||||
|
||||
const size_t BUF_SIZE = 128;
|
||||
@@ -90,7 +90,7 @@ acLoadConfig(const char* config_path, AcMeshInfo* config)
|
||||
|
||||
parse_config(config_path, config);
|
||||
acUpdateBuiltinParams(config);
|
||||
#if VERBOSE_PRINTING // Defined in astaroth.h
|
||||
#if AC_VERBOSE
|
||||
printf("###############################################################\n");
|
||||
printf("Config dimensions loaded:\n");
|
||||
acPrintMeshInfo(*config);
|
||||
@@ -101,8 +101,10 @@ acLoadConfig(const char* config_path, AcMeshInfo* config)
|
||||
ERRCHK_ALWAYS(sizeof(*config) % sizeof(uint32_t) == 0);
|
||||
for (size_t i = 0; i < sizeof(*config) / sizeof(uint32_t); ++i) {
|
||||
if (((uint32_t*)config)[i] == (uint32_t)0xFFFFFFFF) {
|
||||
#if AC_VERBOSE
|
||||
fprintf(stderr, "Some config values may be uninitialized. "
|
||||
"See that all are defined in astaroth.conf\n");
|
||||
#endif
|
||||
retval = AC_FAILURE;
|
||||
}
|
||||
}
|
||||
|
@@ -949,6 +949,7 @@ solve_beta_step(const AcMesh in, const int step_number, const Scalar dt, const i
|
||||
static void
|
||||
checkConfiguration(const AcMeshInfo info)
|
||||
{
|
||||
#if AC_VERBOSE
|
||||
for (int i = 0; i < NUM_REAL_PARAMS; ++i) {
|
||||
if (!is_valid(info.real_params[i])) {
|
||||
fprintf(stderr, "WARNING: Passed an invalid value %g to model solver (%s). Skipping.\n",
|
||||
@@ -973,6 +974,7 @@ checkConfiguration(const AcMeshInfo info)
|
||||
(double)info.real3_params[i].z, realparam_names[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
ERRCHK_ALWAYS(is_valid(info.real_params[AC_inv_dsx]));
|
||||
ERRCHK_ALWAYS(is_valid(info.real_params[AC_inv_dsy]));
|
||||
|
@@ -149,6 +149,7 @@ AcResult
|
||||
acVerifyMesh(const char* label, const AcMesh model, const AcMesh candidate)
|
||||
{
|
||||
printf("---Test: %s---\n", label);
|
||||
fflush(stdout);
|
||||
printf("Errors at the point of the maximum absolute error:\n");
|
||||
|
||||
int errors_found = 0;
|
||||
|
Reference in New Issue
Block a user