VERBOSE_PRINTING flag is now globally used in the whole program and should be used to suppress development/debugging-related printing. Also added comments to the new interface function acCheckDeviceAvailability and made it free from side effects.

This commit is contained in:
jpekkila
2019-06-26 18:50:15 +03:00
parent 0bc8b7e827
commit cda17c9b08
3 changed files with 23 additions and 26 deletions

View File

@@ -49,6 +49,7 @@ extern "C" {
#define AUTO_OPTIMIZE (0) // DEPRECATED TODO remove
#define BOUNDCONDS_OPTIMIZE (0)
#define GENERATE_BENCHMARK_DATA (0)
#define VERBOSE_PRINTING (1)
// Device info
#define REGISTERS_PER_THREAD (255)
@@ -347,13 +348,16 @@ typedef struct {
* Astaroth interface
* =============================================================================
*/
/** Checks whether there are any CUDA devices available. Returns AC_SUCCESS if there is 1 or more,
AC_FAILURE otherwise. */
AcResult acCheckDeviceAvailability(void);
/** Starting point of all GPU computation. Handles the allocation and
initialization of *all memory needed on all GPUs in the node*. In other words,
setups everything GPU-side so that calling any other GPU interface function
afterwards does not result in illegal memory accesses. */
AcResult acInit(const AcMeshInfo& mesh_info);
AcResult acCheckDeviceAvail();
AcResult acInitialize(const AcMeshInfo& mesh_info);
/** Splits the host_mesh and distributes it among the GPUs in the node */
AcResult acLoad(const AcMesh& host_mesh);

View File

@@ -73,18 +73,21 @@ printInt3(const int3 vec)
}
AcResult
acInit(const AcMeshInfo& config)
acCheckDeviceAvailability(void)
{
AcResult res=acGetDevice();
res=acInitialize(config);
return AC_SUCCESS;
int device_count; // Separate from num_devices to avoid side effects
ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&device_count));
if (device_count > 0)
return AC_SUCCESS;
else
return AC_FAILURE;
}
AcResult
acCheckDeviceAvail()
acInit(const AcMeshInfo& config)
{
// Check devices
cudaGetDeviceCount(&num_devices);
// Get num_devices
ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&num_devices));
if (num_devices < 1) {
ERROR("No CUDA devices found!");
return AC_FAILURE;
@@ -97,11 +100,6 @@ acCheckDeviceAvail()
WARNING("MULTIGPU_ENABLED was false. Using only one device");
num_devices = 1; // Use only one device if multi-GPU is not enabled
}
return AC_SUCCESS;
}
AcResult
acInitialize(const AcMeshInfo& config)
{
// Check that num_devices is divisible with AC_nz. This makes decomposing the
// problem domain to multiple GPUs much easier since we do not have to worry
// about remainders
@@ -121,14 +119,14 @@ acInitialize(const AcMeshInfo& config)
ERRCHK_ALWAYS(subgrid.n.x >= STENCIL_ORDER);
ERRCHK_ALWAYS(subgrid.n.y >= STENCIL_ORDER);
ERRCHK_ALWAYS(subgrid.n.z >= STENCIL_ORDER);
#ifndef PENCIL_ASTAROTH
// clang-format off
printf("Grid m "); printInt3(grid.m); printf("\n");
printf("Grid n "); printInt3(grid.n); printf("\n");
printf("Subrid m "); printInt3(subgrid.m); printf("\n");
printf("Subrid n "); printInt3(subgrid.n); printf("\n");
// clang-format on
#endif
// Initialize the devices
for (int i = 0; i < num_devices; ++i) {
createDevice(i, subgrid_config, &devices[i]);

View File

@@ -152,16 +152,11 @@ update_config(AcMeshInfo* config)
config->real_params[AC_G_CONST];
config->real_params[AC_sq2GM_star] = AcReal(sqrt(AcReal(2) * config->real_params[AC_GM_star]));
const bool print_config = true;
#ifdef VERBOSE_PRINT
if (print_config) {
printf("###############################################################"
"\n");
printf("Config dimensions recalculated:\n");
print(*config);
printf("###############################################################"
"\n");
}
#if VERBOSE_PRINTING
printf("###############################################################\n");
printf("Config dimensions recalculated:\n");
print(*config);
printf("###############################################################\n");
#endif
}