Fetched improvements to benchmarks from the mpi-paper-benchmarks branch
This commit is contained in:
@@ -149,30 +149,6 @@ main(int argc, char** argv)
|
|||||||
}
|
}
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
/*
|
|
||||||
// Basic
|
|
||||||
const size_t num_iters = 100;
|
|
||||||
|
|
||||||
// Warmup
|
|
||||||
for (size_t i = 0; i < num_iters / 10; ++i)
|
|
||||||
acGridIntegrate(STREAM_DEFAULT, FLT_EPSILON);
|
|
||||||
|
|
||||||
// Benchmark
|
|
||||||
Timer t;
|
|
||||||
const AcReal dt = FLT_EPSILON;
|
|
||||||
|
|
||||||
acGridSynchronizeStream(STREAM_ALL);
|
|
||||||
timer_reset(&t);
|
|
||||||
acGridSynchronizeStream(STREAM_ALL);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < num_iters; ++i)
|
|
||||||
acGridIntegrate(STREAM_DEFAULT, dt);
|
|
||||||
|
|
||||||
acGridSynchronizeStream(STREAM_ALL);
|
|
||||||
if (!pid)
|
|
||||||
timer_diff_print(t);
|
|
||||||
acGridSynchronizeStream(STREAM_ALL);
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Percentiles
|
// Percentiles
|
||||||
const size_t num_iters = 1000;
|
const size_t num_iters = 1000;
|
||||||
@@ -217,47 +193,6 @@ main(int argc, char** argv)
|
|||||||
fclose(fp);
|
fclose(fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
const size_t num_iters = 1000;
|
|
||||||
const double nth_percentile = 0.90;
|
|
||||||
|
|
||||||
std::vector<double> results; // ms
|
|
||||||
results.reserve(num_iters);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < num_iters; ++i) {
|
|
||||||
acGridSynchronizeStream(STREAM_ALL);
|
|
||||||
timer_reset(&t);
|
|
||||||
acGridSynchronizeStream(STREAM_ALL);
|
|
||||||
acGridIntegrate(STREAM_DEFAULT, dt);
|
|
||||||
acGridSynchronizeStream(STREAM_ALL);
|
|
||||||
results.push_back(timer_diff_nsec(t) / 1e6);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write benchmark to file
|
|
||||||
if (!pid) {
|
|
||||||
std::sort(results.begin(), results.end(),
|
|
||||||
[](const double& a, const double& b) { return a < b; });
|
|
||||||
fprintf(stdout,
|
|
||||||
"Integration step time %g ms (%gth "
|
|
||||||
"percentile)--------------------------------------\n",
|
|
||||||
results[nth_percentile * num_iters], 100 * nth_percentile);
|
|
||||||
|
|
||||||
char path[4096] = "";
|
|
||||||
if (test == TEST_STRONG_SCALING)
|
|
||||||
strncpy(path, "strong_scaling.csv", sizeof(path));
|
|
||||||
else if (test == TEST_WEAK_SCALING)
|
|
||||||
strncpy(path, "weak_scaling.csv", sizeof(path));
|
|
||||||
else
|
|
||||||
ERROR("Invalid test type");
|
|
||||||
|
|
||||||
FILE* fp = fopen(path, "a");
|
|
||||||
ERRCHK_ALWAYS(fp);
|
|
||||||
// Format
|
|
||||||
// nprocs, measured (ms)
|
|
||||||
fprintf(fp, "%d, %g\n", nprocs, results[nth_percentile * num_iters]);
|
|
||||||
|
|
||||||
fclose(fp);
|
|
||||||
}*/
|
|
||||||
|
|
||||||
acGridQuit();
|
acGridQuit();
|
||||||
MPI_Finalize();
|
MPI_Finalize();
|
||||||
|
@@ -2,11 +2,12 @@
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
int
|
int
|
||||||
main(void)
|
main(void)
|
||||||
{
|
{
|
||||||
const int max_nprocs = 128;
|
const int max_nprocs = 64;
|
||||||
for (int nprocs = 1; nprocs <= max_nprocs; nprocs *= 2) {
|
for (int nprocs = 1; nprocs <= max_nprocs; nprocs *= 2) {
|
||||||
char filename[4096];
|
char filename[4096];
|
||||||
sprintf(filename, "benchmark_%d.sh", nprocs);
|
sprintf(filename, "benchmark_%d.sh", nprocs);
|
||||||
@@ -18,10 +19,11 @@ main(void)
|
|||||||
fprintf(fp, "#!/bin/bash\n");
|
fprintf(fp, "#!/bin/bash\n");
|
||||||
fprintf(fp, "#BATCH --job-name=astaroth\n");
|
fprintf(fp, "#BATCH --job-name=astaroth\n");
|
||||||
fprintf(fp, "#SBATCH --account=project_2000403\n");
|
fprintf(fp, "#SBATCH --account=project_2000403\n");
|
||||||
fprintf(fp, "#SBATCH --time=00:14:59\n");
|
fprintf(fp, "#SBATCH --time=03:00:00\n");
|
||||||
fprintf(fp, "#SBATCH --mem=32000\n");
|
fprintf(fp, "#SBATCH --mem=32000\n");
|
||||||
fprintf(fp, "#SBATCH --partition=gpu\n");
|
fprintf(fp, "#SBATCH --partition=gpu\n");
|
||||||
fprintf(fp, "#SBATCH --cpus-per-task=10\n");
|
fprintf(fp, "#SBATCH --output=benchmark-%d-%%j.out\n", nprocs);
|
||||||
|
// fprintf(fp, "#SBATCH --cpus-per-task=10\n");
|
||||||
|
|
||||||
// nprocs, nodes, gpus
|
// nprocs, nodes, gpus
|
||||||
const int max_gpus_per_node = 4;
|
const int max_gpus_per_node = 4;
|
||||||
@@ -30,30 +32,62 @@ main(void)
|
|||||||
fprintf(fp, "#SBATCH --gres=gpu:v100:%d\n", gpus_per_node);
|
fprintf(fp, "#SBATCH --gres=gpu:v100:%d\n", gpus_per_node);
|
||||||
fprintf(fp, "#SBATCH -n %d\n", nprocs);
|
fprintf(fp, "#SBATCH -n %d\n", nprocs);
|
||||||
fprintf(fp, "#SBATCH -N %d\n", nodes);
|
fprintf(fp, "#SBATCH -N %d\n", nodes);
|
||||||
//fprintf(fp, "#SBATCH --exclusive\n");
|
// fprintf(fp, "#SBATCH --exclusive\n");
|
||||||
if (nprocs > 4)
|
if (nprocs >= 4)
|
||||||
fprintf(fp, "#SBATCH --ntasks-per-socket=2\n");
|
fprintf(fp, "#SBATCH --ntasks-per-socket=2\n");
|
||||||
|
|
||||||
// Modules
|
// Modules
|
||||||
// OpenMPI
|
// OpenMPI
|
||||||
fprintf(fp, "module load gcc/8.3.0 cuda/10.1.168 cmake openmpi nccl\n");
|
fprintf(fp, "module load gcc/8.3.0 cuda/10.1.168 cmake openmpi/4.0.3-cuda nccl\n");
|
||||||
|
//fprintf(fp, "export UCX_TLS=rc,sm,cuda_copy,gdr_copy,cuda_ipc\n"); // https://www.open-mpi.org/fa
|
||||||
|
//fprintf(fp, "export PSM2_CUDA=1\nexport PSM2_GPUDIRECT=1\n");
|
||||||
|
//if (nprocs >= 32)
|
||||||
|
// fprintf(fp, "export UCX_TLS=ud_x,cuda_copy,gdr_copy,cuda_ipc\n"); // https://www.open-mpi.org/fa
|
||||||
|
|
||||||
// HPCX
|
// HPCX
|
||||||
//fprintf(fp, "module load gcc/8.3.0 cuda/10.1.168 cmake hpcx-mpi/2.5.0-cuda nccl\n");
|
//fprintf(fp, "module load gcc/8.3.0 cuda/10.1.168 cmake hpcx-mpi/2.5.0-cuda nccl\n");
|
||||||
fprintf(fp, "export UCX_MEMTYPE_CACHE=n\n");
|
//fprintf(fp, "export UCX_MEMTYPE_CACHE=n\n"); // Workaround for bug in hpcx-mpi/2.5.0
|
||||||
|
|
||||||
// Profile and run
|
// Profile and run
|
||||||
//fprintf(fp, "mkdir -p profile_%d\n", nprocs);
|
// fprintf(fp, "mkdir -p profile_%d\n", nprocs);
|
||||||
|
|
||||||
|
/*
|
||||||
const int nx = 256; // max size 1792;
|
const int nx = 256; // max size 1792;
|
||||||
const int ny = nx;
|
const int ny = nx;
|
||||||
const int nz = nx;
|
const int nz = nx;
|
||||||
/*
|
|
||||||
fprintf(fp,
|
fprintf(fp,
|
||||||
//"srun nvprof --annotate-mpi openmpi -o profile_%d/%%p.nvprof ./benchmark %d %d "
|
//"srun nvprof --annotate-mpi openmpi -o profile_%d/%%p.nvprof ./benchmark %d %d "
|
||||||
//"%d\n",
|
//"%d\n",
|
||||||
"srun ./benchmark %d %d %d\n", nx, ny, nz);
|
"srun ./benchmark %d %d %d\n", nx, ny, nz);
|
||||||
*/
|
*/
|
||||||
fprintf(fp, "srun ./benchmark %d %d %d\n", nx, ny, nz);
|
// fprintf(fp, "srun ./benchmark %d %d %d\n", nx, ny, nz);
|
||||||
|
|
||||||
|
const char* files[] = {
|
||||||
|
"benchmark_decomp_1D", "benchmark_decomp_2D", "benchmark_decomp_3D",
|
||||||
|
"benchmark_decomp_1D_comm", "benchmark_decomp_2D_comm", "benchmark_decomp_3D_comm",
|
||||||
|
"benchmark_meshsize_256", "benchmark_meshsize_512", "benchmark_meshsize_1024",
|
||||||
|
"benchmark_meshsize_1792", "benchmark_stencilord_2", "benchmark_stencilord_4",
|
||||||
|
"benchmark_stencilord_6", "benchmark_stencilord_8", "benchmark_timings_control",
|
||||||
|
"benchmark_timings_comp", "benchmark_timings_comm", "benchmark_timings_default",
|
||||||
|
"benchmark_timings_corners", "benchmark_weak_128", "benchmark_weak_256",
|
||||||
|
"benchmark_weak_448",
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < sizeof(files) / sizeof(files[0]); ++i) {
|
||||||
|
int nn = 256;
|
||||||
|
if (strcmp(files[i], "benchmark_meshsize_512") == 0)
|
||||||
|
nn = 512;
|
||||||
|
else if (strcmp(files[i], "benchmark_meshsize_1024") == 0)
|
||||||
|
nn = 1024;
|
||||||
|
else if (strcmp(files[i], "benchmark_meshsize_1792") == 0)
|
||||||
|
nn = 1792;
|
||||||
|
else if (strcmp(files[i], "benchmark_weak_128") == 0)
|
||||||
|
nn = 128;
|
||||||
|
else if (strcmp(files[i], "benchmark_weak_448") == 0)
|
||||||
|
nn = 448;
|
||||||
|
|
||||||
|
fprintf(fp, "$(cd %s && srun ./benchmark %d %d %d && cd ..)\n", files[i], nn, nn, nn);
|
||||||
|
}
|
||||||
|
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user