Added the (hopefully final) basic test case used for the benchmarks

2020-06-07 21:59:33 +03:00
parent 17a4f31451
commit 9840b817d0
4 changed files with 50 additions and 25 deletions
--- a/samples/benchmark/main.cc
+++ b/samples/benchmark/main.cc
@@ -56,11 +56,12 @@ morton3D(const uint64_t pid)
 {
    uint64_t i, j, k;
    i = j = k = 0;
+
    for (int bit = 0; bit <= 21; ++bit) {
        const uint64_t mask = 0x1l << 3 * bit;
-        i |= ((pid & (mask << 0)) >> 2 * bit) >> 0;
+        k |= ((pid & (mask << 0)) >> 2 * bit) >> 0;
        j |= ((pid & (mask << 1)) >> 2 * bit) >> 1;
-        k |= ((pid & (mask << 2)) >> 2 * bit) >> 2;
+        i |= ((pid & (mask << 2)) >> 2 * bit) >> 2;
    }

    return (uint3_64){i, j, k};
@@ -174,7 +175,7 @@ main(int argc, char** argv)
    */

    // Percentiles
-    const size_t num_iters      = 100;
+    const size_t num_iters      = 1000;
    const double nth_percentile = 0.90;
    std::vector<double> results; // ms
    results.reserve(num_iters);
--- a/samples/genbenchmarkscripts/main.c
+++ b/samples/genbenchmarkscripts/main.c
@@ -29,6 +29,7 @@ main(void)
        fprintf(fp, "#SBATCH --gres=gpu:v100:%d\n", gpus_per_node);
        fprintf(fp, "#SBATCH -n %d\n", nprocs);
        fprintf(fp, "#SBATCH -N %d\n", nodes);
+        fprintf(fp, "#SBATCH --exclusive\n");

        // Modules
        fprintf(fp, "module load gcc/8.3.0 cuda/10.1.168 cmake hpcx-mpi/2.5.0-cuda nccl\n");
@@ -37,13 +38,13 @@ main(void)
        // Profile and run
        fprintf(fp, "mkdir -p profile_%d\n", nprocs);

-        const int nx = 1792;
+        const int nx = 256; // max size 1792;
        const int ny = nx;
        const int nz = nx;
        fprintf(fp,
-                "srun nvprof --annotate-mpi openmpi -o profile_%d/%%p.nvprof ./benchmark %d %d "
-                "%d\n",
-                nprocs, nx, ny, nz);
+                //"srun nvprof --annotate-mpi openmpi -o profile_%d/%%p.nvprof ./benchmark %d %d "
+                //"%d\n",
+                "srun ./benchmark %d %d %d\n", nx, ny, nz);

        fclose(fp);
    }