VERBOSE_PRINTING flag is now globally used in the whole program and should be used to suppress development/debugging-related printing. Also added comments to the new interface function acCheckDeviceAvailability and made it free from side effects.
This commit is contained in:
@@ -49,6 +49,7 @@ extern "C" {
|
|||||||
#define AUTO_OPTIMIZE (0) // DEPRECATED TODO remove
|
#define AUTO_OPTIMIZE (0) // DEPRECATED TODO remove
|
||||||
#define BOUNDCONDS_OPTIMIZE (0)
|
#define BOUNDCONDS_OPTIMIZE (0)
|
||||||
#define GENERATE_BENCHMARK_DATA (0)
|
#define GENERATE_BENCHMARK_DATA (0)
|
||||||
|
#define VERBOSE_PRINTING (1)
|
||||||
|
|
||||||
// Device info
|
// Device info
|
||||||
#define REGISTERS_PER_THREAD (255)
|
#define REGISTERS_PER_THREAD (255)
|
||||||
@@ -347,13 +348,16 @@ typedef struct {
|
|||||||
* Astaroth interface
|
* Astaroth interface
|
||||||
* =============================================================================
|
* =============================================================================
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/** Checks whether there are any CUDA devices available. Returns AC_SUCCESS if there is 1 or more,
|
||||||
|
AC_FAILURE otherwise. */
|
||||||
|
AcResult acCheckDeviceAvailability(void);
|
||||||
|
|
||||||
/** Starting point of all GPU computation. Handles the allocation and
|
/** Starting point of all GPU computation. Handles the allocation and
|
||||||
initialization of *all memory needed on all GPUs in the node*. In other words,
|
initialization of *all memory needed on all GPUs in the node*. In other words,
|
||||||
setups everything GPU-side so that calling any other GPU interface function
|
setups everything GPU-side so that calling any other GPU interface function
|
||||||
afterwards does not result in illegal memory accesses. */
|
afterwards does not result in illegal memory accesses. */
|
||||||
AcResult acInit(const AcMeshInfo& mesh_info);
|
AcResult acInit(const AcMeshInfo& mesh_info);
|
||||||
AcResult acCheckDeviceAvail();
|
|
||||||
AcResult acInitialize(const AcMeshInfo& mesh_info);
|
|
||||||
|
|
||||||
/** Splits the host_mesh and distributes it among the GPUs in the node */
|
/** Splits the host_mesh and distributes it among the GPUs in the node */
|
||||||
AcResult acLoad(const AcMesh& host_mesh);
|
AcResult acLoad(const AcMesh& host_mesh);
|
||||||
|
@@ -73,18 +73,21 @@ printInt3(const int3 vec)
|
|||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acInit(const AcMeshInfo& config)
|
acCheckDeviceAvailability(void)
|
||||||
{
|
{
|
||||||
AcResult res=acGetDevice();
|
int device_count; // Separate from num_devices to avoid side effects
|
||||||
res=acInitialize(config);
|
ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&device_count));
|
||||||
return AC_SUCCESS;
|
if (device_count > 0)
|
||||||
|
return AC_SUCCESS;
|
||||||
|
else
|
||||||
|
return AC_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acCheckDeviceAvail()
|
acInit(const AcMeshInfo& config)
|
||||||
{
|
{
|
||||||
// Check devices
|
// Get num_devices
|
||||||
cudaGetDeviceCount(&num_devices);
|
ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&num_devices));
|
||||||
if (num_devices < 1) {
|
if (num_devices < 1) {
|
||||||
ERROR("No CUDA devices found!");
|
ERROR("No CUDA devices found!");
|
||||||
return AC_FAILURE;
|
return AC_FAILURE;
|
||||||
@@ -97,11 +100,6 @@ acCheckDeviceAvail()
|
|||||||
WARNING("MULTIGPU_ENABLED was false. Using only one device");
|
WARNING("MULTIGPU_ENABLED was false. Using only one device");
|
||||||
num_devices = 1; // Use only one device if multi-GPU is not enabled
|
num_devices = 1; // Use only one device if multi-GPU is not enabled
|
||||||
}
|
}
|
||||||
return AC_SUCCESS;
|
|
||||||
}
|
|
||||||
AcResult
|
|
||||||
acInitialize(const AcMeshInfo& config)
|
|
||||||
{
|
|
||||||
// Check that num_devices is divisible with AC_nz. This makes decomposing the
|
// Check that num_devices is divisible with AC_nz. This makes decomposing the
|
||||||
// problem domain to multiple GPUs much easier since we do not have to worry
|
// problem domain to multiple GPUs much easier since we do not have to worry
|
||||||
// about remainders
|
// about remainders
|
||||||
@@ -121,14 +119,14 @@ acInitialize(const AcMeshInfo& config)
|
|||||||
ERRCHK_ALWAYS(subgrid.n.x >= STENCIL_ORDER);
|
ERRCHK_ALWAYS(subgrid.n.x >= STENCIL_ORDER);
|
||||||
ERRCHK_ALWAYS(subgrid.n.y >= STENCIL_ORDER);
|
ERRCHK_ALWAYS(subgrid.n.y >= STENCIL_ORDER);
|
||||||
ERRCHK_ALWAYS(subgrid.n.z >= STENCIL_ORDER);
|
ERRCHK_ALWAYS(subgrid.n.z >= STENCIL_ORDER);
|
||||||
#ifndef PENCIL_ASTAROTH
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
printf("Grid m "); printInt3(grid.m); printf("\n");
|
printf("Grid m "); printInt3(grid.m); printf("\n");
|
||||||
printf("Grid n "); printInt3(grid.n); printf("\n");
|
printf("Grid n "); printInt3(grid.n); printf("\n");
|
||||||
printf("Subrid m "); printInt3(subgrid.m); printf("\n");
|
printf("Subrid m "); printInt3(subgrid.m); printf("\n");
|
||||||
printf("Subrid n "); printInt3(subgrid.n); printf("\n");
|
printf("Subrid n "); printInt3(subgrid.n); printf("\n");
|
||||||
// clang-format on
|
// clang-format on
|
||||||
#endif
|
|
||||||
// Initialize the devices
|
// Initialize the devices
|
||||||
for (int i = 0; i < num_devices; ++i) {
|
for (int i = 0; i < num_devices; ++i) {
|
||||||
createDevice(i, subgrid_config, &devices[i]);
|
createDevice(i, subgrid_config, &devices[i]);
|
||||||
|
@@ -152,16 +152,11 @@ update_config(AcMeshInfo* config)
|
|||||||
config->real_params[AC_G_CONST];
|
config->real_params[AC_G_CONST];
|
||||||
config->real_params[AC_sq2GM_star] = AcReal(sqrt(AcReal(2) * config->real_params[AC_GM_star]));
|
config->real_params[AC_sq2GM_star] = AcReal(sqrt(AcReal(2) * config->real_params[AC_GM_star]));
|
||||||
|
|
||||||
const bool print_config = true;
|
#if VERBOSE_PRINTING
|
||||||
#ifdef VERBOSE_PRINT
|
printf("###############################################################\n");
|
||||||
if (print_config) {
|
printf("Config dimensions recalculated:\n");
|
||||||
printf("###############################################################"
|
print(*config);
|
||||||
"\n");
|
printf("###############################################################\n");
|
||||||
printf("Config dimensions recalculated:\n");
|
|
||||||
print(*config);
|
|
||||||
printf("###############################################################"
|
|
||||||
"\n");
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user