VERBOSE_PRINTING flag is now globally used in the whole program and should be used to suppress development/debugging-related printing. Also added comments to the new interface function acCheckDeviceAvailability and made it free from side effects.

This commit is contained in:
jpekkila
2019-06-26 18:50:15 +03:00
parent 0bc8b7e827
commit cda17c9b08
3 changed files with 23 additions and 26 deletions

View File

@@ -49,6 +49,7 @@ extern "C" {
#define AUTO_OPTIMIZE (0) // DEPRECATED TODO remove #define AUTO_OPTIMIZE (0) // DEPRECATED TODO remove
#define BOUNDCONDS_OPTIMIZE (0) #define BOUNDCONDS_OPTIMIZE (0)
#define GENERATE_BENCHMARK_DATA (0) #define GENERATE_BENCHMARK_DATA (0)
#define VERBOSE_PRINTING (1)
// Device info // Device info
#define REGISTERS_PER_THREAD (255) #define REGISTERS_PER_THREAD (255)
@@ -347,13 +348,16 @@ typedef struct {
* Astaroth interface * Astaroth interface
* ============================================================================= * =============================================================================
*/ */
/** Checks whether there are any CUDA devices available. Returns AC_SUCCESS if there is 1 or more,
AC_FAILURE otherwise. */
AcResult acCheckDeviceAvailability(void);
/** Starting point of all GPU computation. Handles the allocation and /** Starting point of all GPU computation. Handles the allocation and
initialization of *all memory needed on all GPUs in the node*. In other words, initialization of *all memory needed on all GPUs in the node*. In other words,
setups everything GPU-side so that calling any other GPU interface function setups everything GPU-side so that calling any other GPU interface function
afterwards does not result in illegal memory accesses. */ afterwards does not result in illegal memory accesses. */
AcResult acInit(const AcMeshInfo& mesh_info); AcResult acInit(const AcMeshInfo& mesh_info);
AcResult acCheckDeviceAvail();
AcResult acInitialize(const AcMeshInfo& mesh_info);
/** Splits the host_mesh and distributes it among the GPUs in the node */ /** Splits the host_mesh and distributes it among the GPUs in the node */
AcResult acLoad(const AcMesh& host_mesh); AcResult acLoad(const AcMesh& host_mesh);

View File

@@ -73,18 +73,21 @@ printInt3(const int3 vec)
} }
AcResult AcResult
acInit(const AcMeshInfo& config) acCheckDeviceAvailability(void)
{ {
AcResult res=acGetDevice(); int device_count; // Separate from num_devices to avoid side effects
res=acInitialize(config); ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&device_count));
return AC_SUCCESS; if (device_count > 0)
return AC_SUCCESS;
else
return AC_FAILURE;
} }
AcResult AcResult
acCheckDeviceAvail() acInit(const AcMeshInfo& config)
{ {
// Check devices // Get num_devices
cudaGetDeviceCount(&num_devices); ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&num_devices));
if (num_devices < 1) { if (num_devices < 1) {
ERROR("No CUDA devices found!"); ERROR("No CUDA devices found!");
return AC_FAILURE; return AC_FAILURE;
@@ -97,11 +100,6 @@ acCheckDeviceAvail()
WARNING("MULTIGPU_ENABLED was false. Using only one device"); WARNING("MULTIGPU_ENABLED was false. Using only one device");
num_devices = 1; // Use only one device if multi-GPU is not enabled num_devices = 1; // Use only one device if multi-GPU is not enabled
} }
return AC_SUCCESS;
}
AcResult
acInitialize(const AcMeshInfo& config)
{
// Check that num_devices is divisible with AC_nz. This makes decomposing the // Check that num_devices is divisible with AC_nz. This makes decomposing the
// problem domain to multiple GPUs much easier since we do not have to worry // problem domain to multiple GPUs much easier since we do not have to worry
// about remainders // about remainders
@@ -121,14 +119,14 @@ acInitialize(const AcMeshInfo& config)
ERRCHK_ALWAYS(subgrid.n.x >= STENCIL_ORDER); ERRCHK_ALWAYS(subgrid.n.x >= STENCIL_ORDER);
ERRCHK_ALWAYS(subgrid.n.y >= STENCIL_ORDER); ERRCHK_ALWAYS(subgrid.n.y >= STENCIL_ORDER);
ERRCHK_ALWAYS(subgrid.n.z >= STENCIL_ORDER); ERRCHK_ALWAYS(subgrid.n.z >= STENCIL_ORDER);
#ifndef PENCIL_ASTAROTH
// clang-format off // clang-format off
printf("Grid m "); printInt3(grid.m); printf("\n"); printf("Grid m "); printInt3(grid.m); printf("\n");
printf("Grid n "); printInt3(grid.n); printf("\n"); printf("Grid n "); printInt3(grid.n); printf("\n");
printf("Subrid m "); printInt3(subgrid.m); printf("\n"); printf("Subrid m "); printInt3(subgrid.m); printf("\n");
printf("Subrid n "); printInt3(subgrid.n); printf("\n"); printf("Subrid n "); printInt3(subgrid.n); printf("\n");
// clang-format on // clang-format on
#endif
// Initialize the devices // Initialize the devices
for (int i = 0; i < num_devices; ++i) { for (int i = 0; i < num_devices; ++i) {
createDevice(i, subgrid_config, &devices[i]); createDevice(i, subgrid_config, &devices[i]);

View File

@@ -152,16 +152,11 @@ update_config(AcMeshInfo* config)
config->real_params[AC_G_CONST]; config->real_params[AC_G_CONST];
config->real_params[AC_sq2GM_star] = AcReal(sqrt(AcReal(2) * config->real_params[AC_GM_star])); config->real_params[AC_sq2GM_star] = AcReal(sqrt(AcReal(2) * config->real_params[AC_GM_star]));
const bool print_config = true; #if VERBOSE_PRINTING
#ifdef VERBOSE_PRINT printf("###############################################################\n");
if (print_config) { printf("Config dimensions recalculated:\n");
printf("###############################################################" print(*config);
"\n"); printf("###############################################################\n");
printf("Config dimensions recalculated:\n");
print(*config);
printf("###############################################################"
"\n");
}
#endif #endif
} }