Files
frontier-gpu-bandwidth/results/implicit_mapped_GPUWrHost.json
2023-10-10 13:23:18 -04:00

1200 lines
41 KiB
JSON

{
"context": {
"date": "2023-10-10T13:17:42-04:00",
"host_name": "frontier00741",
"executable": "/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth/build/comm_scope",
"num_cpus": 128,
"mhz_per_cpu": 1795,
"cpu_scaling_enabled": false,
"caches": [
{
"type": "Data",
"level": 1,
"size": 32768,
"num_sharing": 2
},
{
"type": "Instruction",
"level": 1,
"size": 32768,
"num_sharing": 2
},
{
"type": "Unified",
"level": 2,
"size": 524288,
"num_sharing": 2
},
{
"type": "Unified",
"level": 3,
"size": 33554432,
"num_sharing": 16
}
],
"load_avg": [2.33,4.27,7.21],
"library_build_type": "release"
},
"benchmarks": [
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
"family_index": 0,
"per_family_instance_index": 0,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 0,
"threads": 1,
"iterations": 52613,
"real_time": 1.3306196793273351e+04,
"cpu_time": 2.0417493072054429e+04,
"time_unit": "ns",
"bytes": 4.0960000000000000e+03,
"bytes_per_second": 3.0782650096311820e+08,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
"family_index": 0,
"per_family_instance_index": 0,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 1,
"threads": 1,
"iterations": 52613,
"real_time": 1.3300892996205879e+04,
"cpu_time": 2.0429356261760397e+04,
"time_unit": "ns",
"bytes": 4.0960000000000000e+03,
"bytes_per_second": 3.0794924830749309e+08,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
"family_index": 0,
"per_family_instance_index": 0,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 2,
"threads": 1,
"iterations": 52613,
"real_time": 1.3302283200872382e+04,
"cpu_time": 2.0441628190751337e+04,
"time_unit": "ns",
"bytes": 4.0960000000000000e+03,
"bytes_per_second": 3.0791706492396575e+08,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
"family_index": 0,
"per_family_instance_index": 0,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 3,
"threads": 1,
"iterations": 52613,
"real_time": 1.3304591109098894e+04,
"cpu_time": 2.0434165415391642e+04,
"time_unit": "ns",
"bytes": 4.0960000000000000e+03,
"bytes_per_second": 3.0786365145778745e+08,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
"family_index": 0,
"per_family_instance_index": 0,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 4,
"threads": 1,
"iterations": 52613,
"real_time": 1.3293467127319691e+04,
"cpu_time": 2.0402218140003402e+04,
"time_unit": "ns",
"bytes": 4.0960000000000000e+03,
"bytes_per_second": 3.0812127195787936e+08,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time_mean",
"family_index": 0,
"per_family_instance_index": 0,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "mean",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 1.3301486245354041e+04,
"cpu_time": 2.0424972215992242e+04,
"time_unit": "ns",
"bytes": 4.0960000000000000e+03,
"bytes_per_second": 3.0793554752204877e+08,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time_median",
"family_index": 0,
"per_family_instance_index": 0,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "median",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 1.3302283200872382e+04,
"cpu_time": 2.0429356261760393e+04,
"time_unit": "ns",
"bytes": 4.0960000000000000e+03,
"bytes_per_second": 3.0791706492396575e+08,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time_stddev",
"family_index": 0,
"per_family_instance_index": 0,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "stddev",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 4.9275444455396196e+00,
"cpu_time": 1.5450257565623494e+01,
"time_unit": "ns",
"bytes": 0.0000000000000000e+00,
"bytes_per_second": 1.1410858880908132e+05,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time_cv",
"family_index": 0,
"per_family_instance_index": 0,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "cv",
"aggregate_unit": "percentage",
"iterations": 5,
"real_time": 3.7045066653816362e-04,
"cpu_time": 7.5643958788479187e-04,
"time_unit": "ns",
"bytes": 0.0000000000000000e+00,
"bytes_per_second": 3.7055997505747835e-04,
"own_numa": NaN,
"wr_gpu": NaN
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
"family_index": 0,
"per_family_instance_index": 1,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 0,
"threads": 1,
"iterations": 52606,
"real_time": 1.3301542443827195e+04,
"cpu_time": 2.0655706839524035e+04,
"time_unit": "ns",
"bytes": 8.1920000000000000e+03,
"bytes_per_second": 6.1586842537961721e+08,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
"family_index": 0,
"per_family_instance_index": 1,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 1,
"threads": 1,
"iterations": 52606,
"real_time": 1.3305887178059214e+04,
"cpu_time": 2.0655983195833174e+04,
"time_unit": "ns",
"bytes": 8.1920000000000000e+03,
"bytes_per_second": 6.1566732758024776e+08,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
"family_index": 0,
"per_family_instance_index": 1,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 2,
"threads": 1,
"iterations": 52606,
"real_time": 1.3302569747431944e+04,
"cpu_time": 2.0632565125651076e+04,
"time_unit": "ns",
"bytes": 8.1920000000000000e+03,
"bytes_per_second": 6.1582086435453296e+08,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
"family_index": 0,
"per_family_instance_index": 1,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 3,
"threads": 1,
"iterations": 52606,
"real_time": 1.3294035660672711e+04,
"cpu_time": 2.0610971391096064e+04,
"time_unit": "ns",
"bytes": 8.1920000000000000e+03,
"bytes_per_second": 6.1621618965820229e+08,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
"family_index": 0,
"per_family_instance_index": 1,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 4,
"threads": 1,
"iterations": 52606,
"real_time": 1.3305873130043401e+04,
"cpu_time": 2.0670776375318404e+04,
"time_unit": "ns",
"bytes": 8.1920000000000000e+03,
"bytes_per_second": 6.1566797758677268e+08,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time_mean",
"family_index": 0,
"per_family_instance_index": 1,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "mean",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 1.3301981632006893e+04,
"cpu_time": 2.0645200585484552e+04,
"time_unit": "ns",
"bytes": 8.1920000000000000e+03,
"bytes_per_second": 6.1584815691187465e+08,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time_median",
"family_index": 0,
"per_family_instance_index": 1,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "median",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 1.3302569747431942e+04,
"cpu_time": 2.0655706839524035e+04,
"time_unit": "ns",
"bytes": 8.1920000000000000e+03,
"bytes_per_second": 6.1582086435453296e+08,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time_stddev",
"family_index": 0,
"per_family_instance_index": 1,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "stddev",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 4.8495946542418906e+00,
"cpu_time": 2.3516269256781630e+01,
"time_unit": "ns",
"bytes": 0.0000000000000000e+00,
"bytes_per_second": 2.2459276088066597e+05,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time_cv",
"family_index": 0,
"per_family_instance_index": 1,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "cv",
"aggregate_unit": "percentage",
"iterations": 5,
"real_time": 3.6457685692280000e-04,
"cpu_time": 1.1390671240712334e-03,
"time_unit": "ns",
"bytes": 0.0000000000000000e+00,
"bytes_per_second": 3.6468853297681346e-04,
"own_numa": NaN,
"wr_gpu": NaN
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
"family_index": 0,
"per_family_instance_index": 2,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 0,
"threads": 1,
"iterations": 52631,
"real_time": 1.3304213196133955e+04,
"cpu_time": 2.0817869943569371e+04,
"time_unit": "ns",
"bytes": 1.6384000000000000e+04,
"bytes_per_second": 1.2314895859276364e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
"family_index": 0,
"per_family_instance_index": 2,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 1,
"threads": 1,
"iterations": 52631,
"real_time": 1.3297664440957693e+04,
"cpu_time": 2.0802262373886118e+04,
"time_unit": "ns",
"bytes": 1.6384000000000000e+04,
"bytes_per_second": 1.2320960626391044e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
"family_index": 0,
"per_family_instance_index": 2,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 2,
"threads": 1,
"iterations": 52631,
"real_time": 1.3294788606614275e+04,
"cpu_time": 2.0806361535976899e+04,
"time_unit": "ns",
"bytes": 1.6384000000000000e+04,
"bytes_per_second": 1.2323625809175196e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
"family_index": 0,
"per_family_instance_index": 2,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 3,
"threads": 1,
"iterations": 52631,
"real_time": 1.3297339042672187e+04,
"cpu_time": 2.0788243924683138e+04,
"time_unit": "ns",
"bytes": 1.6384000000000000e+04,
"bytes_per_second": 1.2321262131786275e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
"family_index": 0,
"per_family_instance_index": 2,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 4,
"threads": 1,
"iterations": 52631,
"real_time": 1.3297865139036545e+04,
"cpu_time": 2.0793954912504021e+04,
"time_unit": "ns",
"bytes": 1.6384000000000000e+04,
"bytes_per_second": 1.2320774672247167e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time_mean",
"family_index": 0,
"per_family_instance_index": 2,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "mean",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 1.3298374085082931e+04,
"cpu_time": 2.0801738538123911e+04,
"time_unit": "ns",
"bytes": 1.6384000000000000e+04,
"bytes_per_second": 1.2320303819775207e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time_median",
"family_index": 0,
"per_family_instance_index": 2,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "median",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 1.3297664440957693e+04,
"cpu_time": 2.0802262373886111e+04,
"time_unit": "ns",
"bytes": 1.6384000000000000e+04,
"bytes_per_second": 1.2320960626391044e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time_stddev",
"family_index": 0,
"per_family_instance_index": 2,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "stddev",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 3.4923060864635134e+00,
"cpu_time": 1.1451617506076158e+01,
"time_unit": "ns",
"bytes": 0.0000000000000000e+00,
"bytes_per_second": 3.2346989621910721e+05,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time_cv",
"family_index": 0,
"per_family_instance_index": 2,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "cv",
"aggregate_unit": "percentage",
"iterations": 5,
"real_time": 2.6261150905514889e-04,
"cpu_time": 5.5051252014770146e-04,
"time_unit": "ns",
"bytes": 0.0000000000000000e+00,
"bytes_per_second": 2.6255025927194149e-04,
"own_numa": NaN,
"wr_gpu": NaN
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
"family_index": 0,
"per_family_instance_index": 3,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 0,
"threads": 1,
"iterations": 52641,
"real_time": 1.3303220245301385e+04,
"cpu_time": 2.1281443361638259e+04,
"time_unit": "ns",
"bytes": 3.2768000000000000e+04,
"bytes_per_second": 2.4631630083380342e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
"family_index": 0,
"per_family_instance_index": 3,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 1,
"threads": 1,
"iterations": 52641,
"real_time": 1.3298491159102583e+04,
"cpu_time": 2.1285288216409230e+04,
"time_unit": "ns",
"bytes": 3.2768000000000000e+04,
"bytes_per_second": 2.4640389355427651e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
"family_index": 0,
"per_family_instance_index": 3,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 2,
"threads": 1,
"iterations": 52641,
"real_time": 1.3299138695366622e+04,
"cpu_time": 2.1259317167227116e+04,
"time_unit": "ns",
"bytes": 3.2768000000000000e+04,
"bytes_per_second": 2.4639189612644820e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
"family_index": 0,
"per_family_instance_index": 3,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 3,
"threads": 1,
"iterations": 52641,
"real_time": 1.3301007920854543e+04,
"cpu_time": 2.1301811477745461e+04,
"time_unit": "ns",
"bytes": 3.2768000000000000e+04,
"bytes_per_second": 2.4635727002780981e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
"family_index": 0,
"per_family_instance_index": 3,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 4,
"threads": 1,
"iterations": 52641,
"real_time": 1.3299730704492045e+04,
"cpu_time": 2.1309309093672269e+04,
"time_unit": "ns",
"bytes": 3.2768000000000000e+04,
"bytes_per_second": 2.4638092851708984e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time_mean",
"family_index": 0,
"per_family_instance_index": 3,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "mean",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 1.3300317745023436e+04,
"cpu_time": 2.1287433863338469e+04,
"time_unit": "ns",
"bytes": 3.2768000000000000e+04,
"bytes_per_second": 2.4637005781188555e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time_median",
"family_index": 0,
"per_family_instance_index": 3,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "median",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 1.3299730704492049e+04,
"cpu_time": 2.1285288216409233e+04,
"time_unit": "ns",
"bytes": 3.2768000000000000e+04,
"bytes_per_second": 2.4638092851708984e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time_stddev",
"family_index": 0,
"per_family_instance_index": 3,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "stddev",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 1.8689596991158961e+00,
"cpu_time": 1.9469720142743476e+01,
"time_unit": "ns",
"bytes": 0.0000000000000000e+00,
"bytes_per_second": 3.4616765602811595e+05,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time_cv",
"family_index": 0,
"per_family_instance_index": 3,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "cv",
"aggregate_unit": "percentage",
"iterations": 5,
"real_time": 1.4051992854194800e-04,
"cpu_time": 9.1461095159405361e-04,
"time_unit": "ns",
"bytes": 0.0000000000000000e+00,
"bytes_per_second": 1.4050719438172567e-04,
"own_numa": NaN,
"wr_gpu": NaN
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
"family_index": 0,
"per_family_instance_index": 4,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 0,
"threads": 1,
"iterations": 51034,
"real_time": 1.3720347545104036e+04,
"cpu_time": 2.3503861837206565e+04,
"time_unit": "ns",
"bytes": 6.5536000000000000e+04,
"bytes_per_second": 4.7765553885977068e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
"family_index": 0,
"per_family_instance_index": 4,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 1,
"threads": 1,
"iterations": 51034,
"real_time": 1.3720279803984004e+04,
"cpu_time": 2.3506104361797970e+04,
"time_unit": "ns",
"bytes": 6.5536000000000000e+04,
"bytes_per_second": 4.7765789718785543e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
"family_index": 0,
"per_family_instance_index": 4,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 2,
"threads": 1,
"iterations": 51034,
"real_time": 1.3734347556406927e+04,
"cpu_time": 2.3491187482854628e+04,
"time_unit": "ns",
"bytes": 6.5536000000000000e+04,
"bytes_per_second": 4.7716864402072134e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
"family_index": 0,
"per_family_instance_index": 4,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 3,
"threads": 1,
"iterations": 51034,
"real_time": 1.3723028327251232e+04,
"cpu_time": 2.3482363071677748e+04,
"time_unit": "ns",
"bytes": 6.5536000000000000e+04,
"bytes_per_second": 4.7756222924832420e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
"family_index": 0,
"per_family_instance_index": 4,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 4,
"threads": 1,
"iterations": 51034,
"real_time": 1.3715694856741407e+04,
"cpu_time": 2.3495123211976304e+04,
"time_unit": "ns",
"bytes": 6.5536000000000000e+04,
"bytes_per_second": 4.7781757092524090e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time_mean",
"family_index": 0,
"per_family_instance_index": 4,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "mean",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 1.3722739617897523e+04,
"cpu_time": 2.3495727993102642e+04,
"time_unit": "ns",
"bytes": 6.5536000000000000e+04,
"bytes_per_second": 4.7757237604838257e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time_median",
"family_index": 0,
"per_family_instance_index": 4,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "median",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 1.3720347545104034e+04,
"cpu_time": 2.3495123211976308e+04,
"time_unit": "ns",
"bytes": 6.5536000000000000e+04,
"bytes_per_second": 4.7765553885977068e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time_stddev",
"family_index": 0,
"per_family_instance_index": 4,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "stddev",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 7.0040871259057820e+00,
"cpu_time": 9.6621887475303758e+00,
"time_unit": "ns",
"bytes": 0.0000000000000000e+00,
"bytes_per_second": 2.4364572323601330e+06,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time_cv",
"family_index": 0,
"per_family_instance_index": 4,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "cv",
"aggregate_unit": "percentage",
"iterations": 5,
"real_time": 5.1040006011415437e-04,
"cpu_time": 4.1123172477851245e-04,
"time_unit": "ns",
"bytes": 0.0000000000000000e+00,
"bytes_per_second": 5.1017549476381290e-04,
"own_numa": NaN,
"wr_gpu": NaN
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
"family_index": 0,
"per_family_instance_index": 5,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 0,
"threads": 1,
"iterations": 38978,
"real_time": 1.7968485754897763e+04,
"cpu_time": 3.1748902149930731e+04,
"time_unit": "ns",
"bytes": 1.3107200000000000e+05,
"bytes_per_second": 7.2945490114142218e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
"family_index": 0,
"per_family_instance_index": 5,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 1,
"threads": 1,
"iterations": 38978,
"real_time": 1.7958427993333866e+04,
"cpu_time": 3.1733720252449948e+04,
"time_unit": "ns",
"bytes": 1.3107200000000000e+05,
"bytes_per_second": 7.2986343820658293e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
"family_index": 0,
"per_family_instance_index": 5,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 2,
"threads": 1,
"iterations": 38978,
"real_time": 1.7973468434851544e+04,
"cpu_time": 3.1713126943403811e+04,
"time_unit": "ns",
"bytes": 1.3107200000000000e+05,
"bytes_per_second": 7.2925267860845480e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
"family_index": 0,
"per_family_instance_index": 5,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 3,
"threads": 1,
"iterations": 38978,
"real_time": 1.7970139272219414e+04,
"cpu_time": 3.1765658627944067e+04,
"time_unit": "ns",
"bytes": 1.3107200000000000e+05,
"bytes_per_second": 7.2938778055342169e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
"family_index": 0,
"per_family_instance_index": 5,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 4,
"threads": 1,
"iterations": 38978,
"real_time": 1.7960823707052594e+04,
"cpu_time": 3.1759489814767414e+04,
"time_unit": "ns",
"bytes": 1.3107200000000000e+05,
"bytes_per_second": 7.2976608499604940e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time_mean",
"family_index": 0,
"per_family_instance_index": 5,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "mean",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 1.7966269032471035e+04,
"cpu_time": 3.1744179557699194e+04,
"time_unit": "ns",
"bytes": 1.3107200000000000e+05,
"bytes_per_second": 7.2954497670118628e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time_median",
"family_index": 0,
"per_family_instance_index": 5,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "median",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 1.7968485754897767e+04,
"cpu_time": 3.1748902149930735e+04,
"time_unit": "ns",
"bytes": 1.3107200000000000e+05,
"bytes_per_second": 7.2945490114142218e+09,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time_stddev",
"family_index": 0,
"per_family_instance_index": 5,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "stddev",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 6.3807811375348384e+00,
"cpu_time": 2.1164352273604386e+01,
"time_unit": "ns",
"bytes": 0.0000000000000000e+00,
"bytes_per_second": 2.5911822512513474e+06,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time_cv",
"family_index": 0,
"per_family_instance_index": 5,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "cv",
"aggregate_unit": "percentage",
"iterations": 5,
"real_time": 3.5515337803317088e-04,
"cpu_time": 6.6671599545155714e-04,
"time_unit": "ns",
"bytes": 0.0000000000000000e+00,
"bytes_per_second": 3.5517786209261608e-04,
"own_numa": NaN,
"wr_gpu": NaN
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
"family_index": 0,
"per_family_instance_index": 6,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 0,
"threads": 1,
"iterations": 30312,
"real_time": 2.3097334131614200e+04,
"cpu_time": 4.5072066640274476e+04,
"time_unit": "ns",
"bytes": 2.6214400000000000e+05,
"bytes_per_second": 1.1349534907632198e+10,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
"family_index": 0,
"per_family_instance_index": 6,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 1,
"threads": 1,
"iterations": 30312,
"real_time": 2.3098034811769248e+04,
"cpu_time": 4.5092631730007786e+04,
"time_unit": "ns",
"bytes": 2.6214400000000000e+05,
"bytes_per_second": 1.1349190618867218e+10,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
"family_index": 0,
"per_family_instance_index": 6,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 2,
"threads": 1,
"iterations": 30312,
"real_time": 2.3092193061830068e+04,
"cpu_time": 4.5194068355766532e+04,
"time_unit": "ns",
"bytes": 2.6214400000000000e+05,
"bytes_per_second": 1.1352061681543249e+10,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
"family_index": 0,
"per_family_instance_index": 6,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 3,
"threads": 1,
"iterations": 30312,
"real_time": 2.3094934982585437e+04,
"cpu_time": 4.5199293910002714e+04,
"time_unit": "ns",
"bytes": 2.6214400000000000e+05,
"bytes_per_second": 1.1350713920505415e+10,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
"family_index": 0,
"per_family_instance_index": 6,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
"run_type": "iteration",
"repetitions": 5,
"repetition_index": 4,
"threads": 1,
"iterations": 30312,
"real_time": 2.3095478893421987e+04,
"cpu_time": 4.5219164258379526e+04,
"time_unit": "ns",
"bytes": 2.6214400000000000e+05,
"bytes_per_second": 1.1350446605143288e+10,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time_mean",
"family_index": 0,
"per_family_instance_index": 6,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "mean",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 2.3095595176244191e+04,
"cpu_time": 4.5155444978886211e+04,
"time_unit": "ns",
"bytes": 2.6214400000000000e+05,
"bytes_per_second": 1.1350389546738274e+10,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time_median",
"family_index": 0,
"per_family_instance_index": 6,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "median",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 2.3095478893421987e+04,
"cpu_time": 4.5194068355766540e+04,
"time_unit": "ns",
"bytes": 2.6214400000000000e+05,
"bytes_per_second": 1.1350446605143288e+10,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time_stddev",
"family_index": 0,
"per_family_instance_index": 6,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "stddev",
"aggregate_unit": "time",
"iterations": 5,
"real_time": 2.2912631354625681e+00,
"cpu_time": 6.7771845493547289e+01,
"time_unit": "ns",
"bytes": 0.0000000000000000e+00,
"bytes_per_second": 1.1260933400744363e+06,
"own_numa": 0.0000000000000000e+00,
"wr_gpu": 0.0000000000000000e+00
},
{
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time_cv",
"family_index": 0,
"per_family_instance_index": 6,
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
"run_type": "aggregate",
"repetitions": 5,
"threads": 1,
"aggregate_name": "cv",
"aggregate_unit": "percentage",
"iterations": 5,
"real_time": 9.9207797763070001e-05,
"cpu_time": 1.5008565528528413e-03,
"time_unit": "ns",
"bytes": 0.0000000000000000e+00,
"bytes_per_second": 9.9211867173143700e-05,
"own_numa": NaN,
"wr_gpu": NaN
}