Pulled useful changes from the benchmark branch. GPUDirect RDMA (unpinned) is now the default for MPI communication.
This commit is contained in:
@@ -207,24 +207,18 @@ main(int argc, char** argv)
|
||||
results[nth_percentile * num_iters], 100 * nth_percentile);
|
||||
|
||||
char path[4096] = "";
|
||||
if (test == TEST_STRONG_SCALING)
|
||||
strncpy(path, "strong_scaling.csv", sizeof(path));
|
||||
else if (test == TEST_WEAK_SCALING)
|
||||
strncpy(path, "weak_scaling.csv", sizeof(path));
|
||||
else
|
||||
ERROR("Invalid test type");
|
||||
sprintf(path, "%s_%d.csv", test == TEST_STRONG_SCALING ? "strong" : "weak", nprocs);
|
||||
|
||||
FILE* fp = fopen(path, "a");
|
||||
ERRCHK_ALWAYS(fp);
|
||||
// Format
|
||||
// nprocs, measured (ms)
|
||||
fprintf(fp, "%d, %g\n", nprocs, results[nth_percentile * num_iters]);
|
||||
|
||||
// nprocs, min, 50th perc, 90th perc, max
|
||||
fprintf(fp, "%d, %g, %g, %g, %g\n", nprocs, results[0], results[0.5 * num_iters], results[nth_percentile * num_iters], results[num_iters-1]);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
/*
|
||||
const size_t num_iters = 100;
|
||||
const size_t num_iters = 1000;
|
||||
const double nth_percentile = 0.90;
|
||||
|
||||
std::vector<double> results; // ms
|
||||
|
@@ -21,6 +21,7 @@ main(void)
|
||||
fprintf(fp, "#SBATCH --time=00:14:59\n");
|
||||
fprintf(fp, "#SBATCH --mem=32000\n");
|
||||
fprintf(fp, "#SBATCH --partition=gpu\n");
|
||||
fprintf(fp, "#SBATCH --cpus-per-task=10\n");
|
||||
|
||||
// nprocs, nodes, gpus
|
||||
const int max_gpus_per_node = 4;
|
||||
@@ -29,22 +30,30 @@ main(void)
|
||||
fprintf(fp, "#SBATCH --gres=gpu:v100:%d\n", gpus_per_node);
|
||||
fprintf(fp, "#SBATCH -n %d\n", nprocs);
|
||||
fprintf(fp, "#SBATCH -N %d\n", nodes);
|
||||
fprintf(fp, "#SBATCH --exclusive\n");
|
||||
//fprintf(fp, "#SBATCH --exclusive\n");
|
||||
if (nprocs > 4)
|
||||
fprintf(fp, "#SBATCH --ntasks-per-socket=2\n");
|
||||
|
||||
// Modules
|
||||
fprintf(fp, "module load gcc/8.3.0 cuda/10.1.168 cmake hpcx-mpi/2.5.0-cuda nccl\n");
|
||||
// OpenMPI
|
||||
fprintf(fp, "module load gcc/8.3.0 cuda/10.1.168 cmake openmpi nccl\n");
|
||||
// HPCX
|
||||
//fprintf(fp, "module load gcc/8.3.0 cuda/10.1.168 cmake hpcx-mpi/2.5.0-cuda nccl\n");
|
||||
fprintf(fp, "export UCX_MEMTYPE_CACHE=n\n");
|
||||
|
||||
// Profile and run
|
||||
fprintf(fp, "mkdir -p profile_%d\n", nprocs);
|
||||
//fprintf(fp, "mkdir -p profile_%d\n", nprocs);
|
||||
|
||||
const int nx = 256; // max size 1792;
|
||||
const int ny = nx;
|
||||
const int nz = nx;
|
||||
/*
|
||||
fprintf(fp,
|
||||
//"srun nvprof --annotate-mpi openmpi -o profile_%d/%%p.nvprof ./benchmark %d %d "
|
||||
//"%d\n",
|
||||
"srun ./benchmark %d %d %d\n", nx, ny, nz);
|
||||
*/
|
||||
fprintf(fp, "srun ./benchmark %d %d %d\n", nx, ny, nz);
|
||||
|
||||
fclose(fp);
|
||||
}
|
||||
|
Reference in New Issue
Block a user