Added some final changes to benchmarking
This commit is contained in:
@@ -1008,8 +1008,8 @@ acDeviceRunMPITest(void)
|
|||||||
acLoadConfig(AC_DEFAULT_CONFIG, &info);
|
acLoadConfig(AC_DEFAULT_CONFIG, &info);
|
||||||
|
|
||||||
// Large mesh dim
|
// Large mesh dim
|
||||||
const int nn = 128;
|
const int nn = 512;
|
||||||
const int num_iters = 10;
|
const int num_iters = 100;
|
||||||
info.int_params[AC_nx] = info.int_params[AC_ny] = nn;
|
info.int_params[AC_nx] = info.int_params[AC_ny] = nn;
|
||||||
info.int_params[AC_nz] = BENCH_STRONG_SCALING ? nn : nn * num_processes;
|
info.int_params[AC_nz] = BENCH_STRONG_SCALING ? nn : nn * num_processes;
|
||||||
info.real_params[AC_inv_dsx] = AcReal(1.0) / info.real_params[AC_dsx];
|
info.real_params[AC_inv_dsx] = AcReal(1.0) / info.real_params[AC_dsx];
|
||||||
@@ -1064,6 +1064,14 @@ acDeviceRunMPITest(void)
|
|||||||
acDeviceCreate(pid % devices_per_node, submesh_info, &device);
|
acDeviceCreate(pid % devices_per_node, submesh_info, &device);
|
||||||
acDeviceLoadMesh(device, STREAM_DEFAULT, submesh);
|
acDeviceLoadMesh(device, STREAM_DEFAULT, submesh);
|
||||||
|
|
||||||
|
// Enable peer access
|
||||||
|
MPI_Barrier(MPI_COMM_WORLD);
|
||||||
|
const int front = (device->id + 1) % devices_per_node;
|
||||||
|
const int back = (device->id + devices_per_node - 1) % devices_per_node;
|
||||||
|
cudaSetDevice(device->id);
|
||||||
|
WARNCHK_CUDA_ALWAYS(cudaDeviceEnablePeerAccess(front, 0));
|
||||||
|
WARNCHK_CUDA_ALWAYS(cudaDeviceEnablePeerAccess(back, 0));
|
||||||
|
|
||||||
// Verification start ///////////////////////////////////////////////////////////////////////
|
// Verification start ///////////////////////////////////////////////////////////////////////
|
||||||
#if BENCH_STRONG_SCALING
|
#if BENCH_STRONG_SCALING
|
||||||
{
|
{
|
||||||
@@ -1094,6 +1102,13 @@ acDeviceRunMPITest(void)
|
|||||||
#endif
|
#endif
|
||||||
// Verification end ///////////////////////////////////////////////////////////////////////
|
// Verification end ///////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
// Warmup
|
||||||
|
for (int i = 0; i < 10; ++i)
|
||||||
|
acDeviceIntegrateStepMPI(device, 0);
|
||||||
|
|
||||||
|
acDeviceSynchronizeStream(device, STREAM_ALL);
|
||||||
|
MPI_Barrier(MPI_COMM_WORLD);
|
||||||
|
|
||||||
// Benchmark start ///////////////////////////////////////////////////////////////////////
|
// Benchmark start ///////////////////////////////////////////////////////////////////////
|
||||||
std::vector<double> results;
|
std::vector<double> results;
|
||||||
results.reserve(num_iters);
|
results.reserve(num_iters);
|
||||||
@@ -1114,7 +1129,7 @@ acDeviceRunMPITest(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
const double ms_elapsed = timer_diff_nsec(total_time) / 1e6;
|
const double ms_elapsed = timer_diff_nsec(total_time) / 1e6;
|
||||||
const double nth_percentile = 0.95;
|
const double nth_percentile = 0.90;
|
||||||
std::sort(results.begin(), results.end(),
|
std::sort(results.begin(), results.end(),
|
||||||
[](const double& a, const double& b) { return a < b; });
|
[](const double& a, const double& b) { return a < b; });
|
||||||
|
|
||||||
|
@@ -49,14 +49,15 @@ smaller_than(const double& a, const double& b)
|
|||||||
int
|
int
|
||||||
run_benchmark(const char* config_path)
|
run_benchmark(const char* config_path)
|
||||||
{
|
{
|
||||||
const int nn = 256;
|
const int nn = 512;
|
||||||
const int num_iters = 100;
|
const int num_iters = 100;
|
||||||
|
#define BENCH_STRONG_SCALING (1)
|
||||||
|
const int num_processes = acGetNumDevicesPerNode();
|
||||||
|
|
||||||
AcMeshInfo mesh_info;
|
AcMeshInfo mesh_info;
|
||||||
load_config(config_path, &mesh_info);
|
load_config(config_path, &mesh_info);
|
||||||
mesh_info.int_params[AC_nx] = nn;
|
mesh_info.int_params[AC_nx] = mesh_info.int_params[AC_ny] = nn;
|
||||||
mesh_info.int_params[AC_ny] = mesh_info.int_params[AC_nx];
|
mesh_info.int_params[AC_nz] = BENCH_STRONG_SCALING ? nn : nn * num_processes;
|
||||||
mesh_info.int_params[AC_nz] = mesh_info.int_params[AC_nx];
|
|
||||||
update_config(&mesh_info);
|
update_config(&mesh_info);
|
||||||
|
|
||||||
AcMesh* mesh = acmesh_create(mesh_info);
|
AcMesh* mesh = acmesh_create(mesh_info);
|
||||||
@@ -74,6 +75,7 @@ run_benchmark(const char* config_path)
|
|||||||
}
|
}
|
||||||
acSynchronize();
|
acSynchronize();
|
||||||
const AcReal dt = FLT_EPSILON;
|
const AcReal dt = FLT_EPSILON;
|
||||||
|
printf("Using dt = %g\n", dt);
|
||||||
|
|
||||||
Timer total_time;
|
Timer total_time;
|
||||||
timer_reset(&total_time);
|
timer_reset(&total_time);
|
||||||
@@ -89,13 +91,25 @@ run_benchmark(const char* config_path)
|
|||||||
}
|
}
|
||||||
acSynchronize();
|
acSynchronize();
|
||||||
const double ms_elapsed = timer_diff_nsec(total_time) / 1e6;
|
const double ms_elapsed = timer_diff_nsec(total_time) / 1e6;
|
||||||
const double nth_percentile = 0.95;
|
const double nth_percentile = 0.90;
|
||||||
std::sort(results.begin(), results.end(), smaller_than);
|
std::sort(results.begin(), results.end(), smaller_than);
|
||||||
|
|
||||||
printf("vertices: %d^3, iterations: %d\n", nn, num_iters);
|
printf("vertices: %d^3, iterations: %d\n", nn, num_iters);
|
||||||
printf("Total time: %f ms\n", ms_elapsed);
|
printf("Total time: %f ms\n", ms_elapsed);
|
||||||
printf("%dth percentile per step: %f ms\n", int(100 * nth_percentile),
|
printf("Time per step: %f ms\n", ms_elapsed / num_iters);
|
||||||
results[int(nth_percentile * num_iters)]);
|
|
||||||
|
const size_t nth_index = int(nth_percentile * num_iters);
|
||||||
|
printf("%dth percentile per step: %f ms\n", int(100 * nth_percentile), results[nth_index]);
|
||||||
|
|
||||||
|
// Write out
|
||||||
|
char buf[256];
|
||||||
|
sprintf(buf, "nprocs_%d_result_%s.bench", num_processes,
|
||||||
|
BENCH_STRONG_SCALING ? "strong" : "weak");
|
||||||
|
FILE* fp = fopen(buf, "w");
|
||||||
|
ERRCHK_ALWAYS(fp);
|
||||||
|
fprintf(fp, "num_processes, percentile (%dth)\n", int(100 * nth_percentile));
|
||||||
|
fprintf(fp, "%d, %g\n", num_processes, results[nth_index]);
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
acQuit();
|
acQuit();
|
||||||
acmesh_destroy(mesh);
|
acmesh_destroy(mesh);
|
||||||
|
Reference in New Issue
Block a user