Added the (hopefully final) basic test case used for the benchmarks

This commit is contained in:
jpekkila
2020-06-07 21:59:33 +03:00
parent 17a4f31451
commit 9840b817d0
4 changed files with 50 additions and 25 deletions

View File

@@ -56,11 +56,12 @@ morton3D(const uint64_t pid)
{
uint64_t i, j, k;
i = j = k = 0;
for (int bit = 0; bit <= 21; ++bit) {
const uint64_t mask = 0x1l << 3 * bit;
i |= ((pid & (mask << 0)) >> 2 * bit) >> 0;
k |= ((pid & (mask << 0)) >> 2 * bit) >> 0;
j |= ((pid & (mask << 1)) >> 2 * bit) >> 1;
k |= ((pid & (mask << 2)) >> 2 * bit) >> 2;
i |= ((pid & (mask << 2)) >> 2 * bit) >> 2;
}
return (uint3_64){i, j, k};
@@ -174,7 +175,7 @@ main(int argc, char** argv)
*/
// Percentiles
const size_t num_iters = 100;
const size_t num_iters = 1000;
const double nth_percentile = 0.90;
std::vector<double> results; // ms
results.reserve(num_iters);

View File

@@ -29,6 +29,7 @@ main(void)
fprintf(fp, "#SBATCH --gres=gpu:v100:%d\n", gpus_per_node);
fprintf(fp, "#SBATCH -n %d\n", nprocs);
fprintf(fp, "#SBATCH -N %d\n", nodes);
fprintf(fp, "#SBATCH --exclusive\n");
// Modules
fprintf(fp, "module load gcc/8.3.0 cuda/10.1.168 cmake hpcx-mpi/2.5.0-cuda nccl\n");
@@ -37,13 +38,13 @@ main(void)
// Profile and run
fprintf(fp, "mkdir -p profile_%d\n", nprocs);
const int nx = 1792;
const int nx = 256; // max size 1792;
const int ny = nx;
const int nz = nx;
fprintf(fp,
"srun nvprof --annotate-mpi openmpi -o profile_%d/%%p.nvprof ./benchmark %d %d "
"%d\n",
nprocs, nx, ny, nz);
//"srun nvprof --annotate-mpi openmpi -o profile_%d/%%p.nvprof ./benchmark %d %d "
//"%d\n",
"srun ./benchmark %d %d %d\n", nx, ny, nz);
fclose(fp);
}