3195 lines
110 KiB
JSON
3195 lines
110 KiB
JSON
{
|
|
"context": {
|
|
"date": "2023-10-10T13:17:42-04:00",
|
|
"host_name": "frontier00741",
|
|
"executable": "/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth/build/comm_scope",
|
|
"num_cpus": 128,
|
|
"mhz_per_cpu": 1795,
|
|
"cpu_scaling_enabled": false,
|
|
"caches": [
|
|
{
|
|
"type": "Data",
|
|
"level": 1,
|
|
"size": 32768,
|
|
"num_sharing": 2
|
|
},
|
|
{
|
|
"type": "Instruction",
|
|
"level": 1,
|
|
"size": 32768,
|
|
"num_sharing": 2
|
|
},
|
|
{
|
|
"type": "Unified",
|
|
"level": 2,
|
|
"size": 524288,
|
|
"num_sharing": 2
|
|
},
|
|
{
|
|
"type": "Unified",
|
|
"level": 3,
|
|
"size": 33554432,
|
|
"num_sharing": 16
|
|
}
|
|
],
|
|
"load_avg": [2.33,4.27,7.21],
|
|
"library_build_type": "release"
|
|
},
|
|
"benchmarks": [
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 0,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 52613,
|
|
"real_time": 1.3306196793273351e+04,
|
|
"cpu_time": 2.0417493072054429e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 4.0960000000000000e+03,
|
|
"bytes_per_second": 3.0782650096311820e+08,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 0,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 52613,
|
|
"real_time": 1.3300892996205879e+04,
|
|
"cpu_time": 2.0429356261760397e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 4.0960000000000000e+03,
|
|
"bytes_per_second": 3.0794924830749309e+08,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 0,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 52613,
|
|
"real_time": 1.3302283200872382e+04,
|
|
"cpu_time": 2.0441628190751337e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 4.0960000000000000e+03,
|
|
"bytes_per_second": 3.0791706492396575e+08,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 0,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 52613,
|
|
"real_time": 1.3304591109098894e+04,
|
|
"cpu_time": 2.0434165415391642e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 4.0960000000000000e+03,
|
|
"bytes_per_second": 3.0786365145778745e+08,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 0,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 52613,
|
|
"real_time": 1.3293467127319691e+04,
|
|
"cpu_time": 2.0402218140003402e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 4.0960000000000000e+03,
|
|
"bytes_per_second": 3.0812127195787936e+08,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 0,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.3301486245354041e+04,
|
|
"cpu_time": 2.0424972215992242e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 4.0960000000000000e+03,
|
|
"bytes_per_second": 3.0793554752204877e+08,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 0,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.3302283200872382e+04,
|
|
"cpu_time": 2.0429356261760393e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 4.0960000000000000e+03,
|
|
"bytes_per_second": 3.0791706492396575e+08,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 0,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 4.9275444455396196e+00,
|
|
"cpu_time": 1.5450257565623494e+01,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 1.1410858880908132e+05,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 0,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):12/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 3.7045066653816362e-04,
|
|
"cpu_time": 7.5643958788479187e-04,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 3.7055997505747835e-04,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 1,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 52606,
|
|
"real_time": 1.3301542443827195e+04,
|
|
"cpu_time": 2.0655706839524035e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 8.1920000000000000e+03,
|
|
"bytes_per_second": 6.1586842537961721e+08,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 1,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 52606,
|
|
"real_time": 1.3305887178059214e+04,
|
|
"cpu_time": 2.0655983195833174e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 8.1920000000000000e+03,
|
|
"bytes_per_second": 6.1566732758024776e+08,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 1,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 52606,
|
|
"real_time": 1.3302569747431944e+04,
|
|
"cpu_time": 2.0632565125651076e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 8.1920000000000000e+03,
|
|
"bytes_per_second": 6.1582086435453296e+08,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 1,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 52606,
|
|
"real_time": 1.3294035660672711e+04,
|
|
"cpu_time": 2.0610971391096064e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 8.1920000000000000e+03,
|
|
"bytes_per_second": 6.1621618965820229e+08,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 1,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 52606,
|
|
"real_time": 1.3305873130043401e+04,
|
|
"cpu_time": 2.0670776375318404e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 8.1920000000000000e+03,
|
|
"bytes_per_second": 6.1566797758677268e+08,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 1,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.3301981632006893e+04,
|
|
"cpu_time": 2.0645200585484552e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 8.1920000000000000e+03,
|
|
"bytes_per_second": 6.1584815691187465e+08,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 1,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.3302569747431942e+04,
|
|
"cpu_time": 2.0655706839524035e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 8.1920000000000000e+03,
|
|
"bytes_per_second": 6.1582086435453296e+08,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 1,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 4.8495946542418906e+00,
|
|
"cpu_time": 2.3516269256781630e+01,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 2.2459276088066597e+05,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 1,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):13/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 3.6457685692280000e-04,
|
|
"cpu_time": 1.1390671240712334e-03,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 3.6468853297681346e-04,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 2,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 52631,
|
|
"real_time": 1.3304213196133955e+04,
|
|
"cpu_time": 2.0817869943569371e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 1.6384000000000000e+04,
|
|
"bytes_per_second": 1.2314895859276364e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 2,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 52631,
|
|
"real_time": 1.3297664440957693e+04,
|
|
"cpu_time": 2.0802262373886118e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 1.6384000000000000e+04,
|
|
"bytes_per_second": 1.2320960626391044e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 2,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 52631,
|
|
"real_time": 1.3294788606614275e+04,
|
|
"cpu_time": 2.0806361535976899e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 1.6384000000000000e+04,
|
|
"bytes_per_second": 1.2323625809175196e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 2,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 52631,
|
|
"real_time": 1.3297339042672187e+04,
|
|
"cpu_time": 2.0788243924683138e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 1.6384000000000000e+04,
|
|
"bytes_per_second": 1.2321262131786275e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 2,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 52631,
|
|
"real_time": 1.3297865139036545e+04,
|
|
"cpu_time": 2.0793954912504021e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 1.6384000000000000e+04,
|
|
"bytes_per_second": 1.2320774672247167e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 2,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.3298374085082931e+04,
|
|
"cpu_time": 2.0801738538123911e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 1.6384000000000000e+04,
|
|
"bytes_per_second": 1.2320303819775207e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 2,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.3297664440957693e+04,
|
|
"cpu_time": 2.0802262373886111e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 1.6384000000000000e+04,
|
|
"bytes_per_second": 1.2320960626391044e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 2,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 3.4923060864635134e+00,
|
|
"cpu_time": 1.1451617506076158e+01,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 3.2346989621910721e+05,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 2,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):14/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 2.6261150905514889e-04,
|
|
"cpu_time": 5.5051252014770146e-04,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 2.6255025927194149e-04,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 3,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 52641,
|
|
"real_time": 1.3303220245301385e+04,
|
|
"cpu_time": 2.1281443361638259e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 3.2768000000000000e+04,
|
|
"bytes_per_second": 2.4631630083380342e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 3,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 52641,
|
|
"real_time": 1.3298491159102583e+04,
|
|
"cpu_time": 2.1285288216409230e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 3.2768000000000000e+04,
|
|
"bytes_per_second": 2.4640389355427651e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 3,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 52641,
|
|
"real_time": 1.3299138695366622e+04,
|
|
"cpu_time": 2.1259317167227116e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 3.2768000000000000e+04,
|
|
"bytes_per_second": 2.4639189612644820e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 3,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 52641,
|
|
"real_time": 1.3301007920854543e+04,
|
|
"cpu_time": 2.1301811477745461e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 3.2768000000000000e+04,
|
|
"bytes_per_second": 2.4635727002780981e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 3,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 52641,
|
|
"real_time": 1.3299730704492045e+04,
|
|
"cpu_time": 2.1309309093672269e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 3.2768000000000000e+04,
|
|
"bytes_per_second": 2.4638092851708984e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 3,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.3300317745023436e+04,
|
|
"cpu_time": 2.1287433863338469e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 3.2768000000000000e+04,
|
|
"bytes_per_second": 2.4637005781188555e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 3,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.3299730704492049e+04,
|
|
"cpu_time": 2.1285288216409233e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 3.2768000000000000e+04,
|
|
"bytes_per_second": 2.4638092851708984e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 3,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.8689596991158961e+00,
|
|
"cpu_time": 1.9469720142743476e+01,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 3.4616765602811595e+05,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 3,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):15/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 1.4051992854194800e-04,
|
|
"cpu_time": 9.1461095159405361e-04,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 1.4050719438172567e-04,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 4,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 51034,
|
|
"real_time": 1.3720347545104036e+04,
|
|
"cpu_time": 2.3503861837206565e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 6.5536000000000000e+04,
|
|
"bytes_per_second": 4.7765553885977068e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 4,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 51034,
|
|
"real_time": 1.3720279803984004e+04,
|
|
"cpu_time": 2.3506104361797970e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 6.5536000000000000e+04,
|
|
"bytes_per_second": 4.7765789718785543e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 4,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 51034,
|
|
"real_time": 1.3734347556406927e+04,
|
|
"cpu_time": 2.3491187482854628e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 6.5536000000000000e+04,
|
|
"bytes_per_second": 4.7716864402072134e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 4,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 51034,
|
|
"real_time": 1.3723028327251232e+04,
|
|
"cpu_time": 2.3482363071677748e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 6.5536000000000000e+04,
|
|
"bytes_per_second": 4.7756222924832420e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 4,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 51034,
|
|
"real_time": 1.3715694856741407e+04,
|
|
"cpu_time": 2.3495123211976304e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 6.5536000000000000e+04,
|
|
"bytes_per_second": 4.7781757092524090e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 4,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.3722739617897523e+04,
|
|
"cpu_time": 2.3495727993102642e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 6.5536000000000000e+04,
|
|
"bytes_per_second": 4.7757237604838257e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 4,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.3720347545104034e+04,
|
|
"cpu_time": 2.3495123211976308e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 6.5536000000000000e+04,
|
|
"bytes_per_second": 4.7765553885977068e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 4,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 7.0040871259057820e+00,
|
|
"cpu_time": 9.6621887475303758e+00,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 2.4364572323601330e+06,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 4,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):16/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 5.1040006011415437e-04,
|
|
"cpu_time": 4.1123172477851245e-04,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 5.1017549476381290e-04,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 5,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 38978,
|
|
"real_time": 1.7968485754897763e+04,
|
|
"cpu_time": 3.1748902149930731e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 1.3107200000000000e+05,
|
|
"bytes_per_second": 7.2945490114142218e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 5,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 38978,
|
|
"real_time": 1.7958427993333866e+04,
|
|
"cpu_time": 3.1733720252449948e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 1.3107200000000000e+05,
|
|
"bytes_per_second": 7.2986343820658293e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 5,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 38978,
|
|
"real_time": 1.7973468434851544e+04,
|
|
"cpu_time": 3.1713126943403811e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 1.3107200000000000e+05,
|
|
"bytes_per_second": 7.2925267860845480e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 5,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 38978,
|
|
"real_time": 1.7970139272219414e+04,
|
|
"cpu_time": 3.1765658627944067e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 1.3107200000000000e+05,
|
|
"bytes_per_second": 7.2938778055342169e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 5,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 38978,
|
|
"real_time": 1.7960823707052594e+04,
|
|
"cpu_time": 3.1759489814767414e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 1.3107200000000000e+05,
|
|
"bytes_per_second": 7.2976608499604940e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 5,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.7966269032471035e+04,
|
|
"cpu_time": 3.1744179557699194e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 1.3107200000000000e+05,
|
|
"bytes_per_second": 7.2954497670118628e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 5,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.7968485754897767e+04,
|
|
"cpu_time": 3.1748902149930735e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 1.3107200000000000e+05,
|
|
"bytes_per_second": 7.2945490114142218e+09,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 5,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 6.3807811375348384e+00,
|
|
"cpu_time": 2.1164352273604386e+01,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 2.5911822512513474e+06,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 5,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):17/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 3.5515337803317088e-04,
|
|
"cpu_time": 6.6671599545155714e-04,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 3.5517786209261608e-04,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 6,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 30312,
|
|
"real_time": 2.3097334131614200e+04,
|
|
"cpu_time": 4.5072066640274476e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 2.6214400000000000e+05,
|
|
"bytes_per_second": 1.1349534907632198e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 6,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 30312,
|
|
"real_time": 2.3098034811769248e+04,
|
|
"cpu_time": 4.5092631730007786e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 2.6214400000000000e+05,
|
|
"bytes_per_second": 1.1349190618867218e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 6,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 30312,
|
|
"real_time": 2.3092193061830068e+04,
|
|
"cpu_time": 4.5194068355766532e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 2.6214400000000000e+05,
|
|
"bytes_per_second": 1.1352061681543249e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 6,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 30312,
|
|
"real_time": 2.3094934982585437e+04,
|
|
"cpu_time": 4.5199293910002714e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 2.6214400000000000e+05,
|
|
"bytes_per_second": 1.1350713920505415e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 6,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 30312,
|
|
"real_time": 2.3095478893421987e+04,
|
|
"cpu_time": 4.5219164258379526e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 2.6214400000000000e+05,
|
|
"bytes_per_second": 1.1350446605143288e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 6,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 2.3095595176244191e+04,
|
|
"cpu_time": 4.5155444978886211e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 2.6214400000000000e+05,
|
|
"bytes_per_second": 1.1350389546738274e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 6,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 2.3095478893421987e+04,
|
|
"cpu_time": 4.5194068355766540e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 2.6214400000000000e+05,
|
|
"bytes_per_second": 1.1350446605143288e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 6,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 2.2912631354625681e+00,
|
|
"cpu_time": 6.7771845493547289e+01,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 1.1260933400744363e+06,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 6,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):18/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 9.9207797763070001e-05,
|
|
"cpu_time": 1.5008565528528413e-03,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 9.9211867173143700e-05,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 7,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 21237,
|
|
"real_time": 3.2961222086962647e+04,
|
|
"cpu_time": 6.8156828977727593e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 5.2428800000000000e+05,
|
|
"bytes_per_second": 1.5906206348076357e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 7,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 21237,
|
|
"real_time": 3.2970548260706557e+04,
|
|
"cpu_time": 6.8216041531289651e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 5.2428800000000000e+05,
|
|
"bytes_per_second": 1.5901707058503267e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 7,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 21237,
|
|
"real_time": 3.2976801415748079e+04,
|
|
"cpu_time": 6.8193017281160210e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 5.2428800000000000e+05,
|
|
"bytes_per_second": 1.5898691731503899e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 7,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 21237,
|
|
"real_time": 3.2962047690767336e+04,
|
|
"cpu_time": 6.8210511560013256e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 5.2428800000000000e+05,
|
|
"bytes_per_second": 1.5905807943687094e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 7,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 21237,
|
|
"real_time": 3.2961580673439152e+04,
|
|
"cpu_time": 6.8164303055987184e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 5.2428800000000000e+05,
|
|
"bytes_per_second": 1.5906033305692703e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 7,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 3.2966440025524753e+04,
|
|
"cpu_time": 6.8188140481235576e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 5.2428800000000000e+05,
|
|
"bytes_per_second": 1.5903689277492664e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 7,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 3.2962047690767336e+04,
|
|
"cpu_time": 6.8193017281160210e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 5.2428800000000000e+05,
|
|
"bytes_per_second": 1.5905807943687094e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 7,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 6.9708104316588475e+00,
|
|
"cpu_time": 2.6699016971253826e+01,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 3.3624216309558800e+06,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 7,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):19/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 2.1145171957486443e-04,
|
|
"cpu_time": 3.9154927503267262e-04,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 2.1142400183298796e-04,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 8,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 13266,
|
|
"real_time": 5.2775078919960768e+04,
|
|
"cpu_time": 1.1250615535956579e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 1.0485760000000000e+06,
|
|
"bytes_per_second": 1.9868771804022903e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 8,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 13266,
|
|
"real_time": 5.2764586005406316e+04,
|
|
"cpu_time": 1.1262464458012964e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 1.0485760000000000e+06,
|
|
"bytes_per_second": 1.9872722964083561e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 8,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 13266,
|
|
"real_time": 5.2756615466702096e+04,
|
|
"cpu_time": 1.1279118181818175e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 1.0485760000000000e+06,
|
|
"bytes_per_second": 1.9875725361150585e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 8,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 13266,
|
|
"real_time": 5.2770204045635961e+04,
|
|
"cpu_time": 1.1296532948891872e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 1.0485760000000000e+06,
|
|
"bytes_per_second": 1.9870607267184067e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 8,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 13266,
|
|
"real_time": 5.2766645865699305e+04,
|
|
"cpu_time": 1.1294324981154836e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 1.0485760000000000e+06,
|
|
"bytes_per_second": 1.9871947189306221e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 8,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 5.2766626060680908e+04,
|
|
"cpu_time": 1.1276611221166886e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 1.0485760000000000e+06,
|
|
"bytes_per_second": 1.9871954917149471e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 8,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 5.2766645865699298e+04,
|
|
"cpu_time": 1.1279118181818177e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 1.0485760000000000e+06,
|
|
"bytes_per_second": 1.9871947189306221e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 8,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 6.8670775134915960e+00,
|
|
"cpu_time": 1.9955224487373715e+02,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 2.5862411244738954e+06,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 8,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):20/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 1.3014054576077218e-04,
|
|
"cpu_time": 1.7696118183019864e-03,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 1.3014527937772104e-04,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 9,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 7591,
|
|
"real_time": 9.2221364105251152e+04,
|
|
"cpu_time": 1.9643949598208437e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 2.0971520000000000e+06,
|
|
"bytes_per_second": 2.2740414006526138e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 9,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 7591,
|
|
"real_time": 9.2226256172176654e+04,
|
|
"cpu_time": 1.9653569661441131e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 2.0971520000000000e+06,
|
|
"bytes_per_second": 2.2739207759716923e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 9,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 7591,
|
|
"real_time": 9.2221411166539910e+04,
|
|
"cpu_time": 1.9642581860097559e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 2.0971520000000000e+06,
|
|
"bytes_per_second": 2.2740402401919613e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 9,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 7591,
|
|
"real_time": 9.2209142642536128e+04,
|
|
"cpu_time": 1.9642320129100201e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 2.0971520000000000e+06,
|
|
"bytes_per_second": 2.2743428036522949e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 9,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 7591,
|
|
"real_time": 9.2205963422872912e+04,
|
|
"cpu_time": 1.9643737755236329e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 2.0971520000000000e+06,
|
|
"bytes_per_second": 2.2744212219572922e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 9,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 9.2216827501875363e+04,
|
|
"cpu_time": 1.9645231800816732e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 2.0971520000000000e+06,
|
|
"bytes_per_second": 2.2741532884851711e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 9,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 9.2221364105251152e+04,
|
|
"cpu_time": 1.9643737755236329e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 2.0971520000000000e+06,
|
|
"bytes_per_second": 2.2740414006526138e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 9,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 8.7689202473802528e+00,
|
|
"cpu_time": 4.7142378466514508e+01,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 2.1625518176820641e+06,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 9,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):21/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 9.5090239871914093e-05,
|
|
"cpu_time": 2.3996855290124198e-04,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 9.5092614408703931e-05,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 10,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 4196,
|
|
"real_time": 1.6699423211059280e+05,
|
|
"cpu_time": 3.7165227216396388e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 4.1943040000000000e+06,
|
|
"bytes_per_second": 2.5116460293205219e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 10,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 4196,
|
|
"real_time": 1.6697093377642694e+05,
|
|
"cpu_time": 3.7121869470924750e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 4.1943040000000000e+06,
|
|
"bytes_per_second": 2.5119964925249485e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 10,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 4196,
|
|
"real_time": 1.6704639775804032e+05,
|
|
"cpu_time": 3.7205727859866450e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 4.1943040000000000e+06,
|
|
"bytes_per_second": 2.5108616865089622e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 10,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 4196,
|
|
"real_time": 1.6695709462413317e+05,
|
|
"cpu_time": 3.7206392612011568e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 4.1943040000000000e+06,
|
|
"bytes_per_second": 2.5122047130986221e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 10,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 4196,
|
|
"real_time": 1.6696925839181506e+05,
|
|
"cpu_time": 3.7263947426120221e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 4.1943040000000000e+06,
|
|
"bytes_per_second": 2.5120216981245258e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 10,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.6698758333220167e+05,
|
|
"cpu_time": 3.7192632917063881e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 4.1943040000000000e+06,
|
|
"bytes_per_second": 2.5117461239155163e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 10,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.6697093377642697e+05,
|
|
"cpu_time": 3.7205727859866444e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 4.1943040000000000e+06,
|
|
"bytes_per_second": 2.5119964925249485e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 10,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 3.5517303507519230e+01,
|
|
"cpu_time": 5.2927081141717542e+02,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 5.3412968454337008e+06,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 10,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):22/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 2.1269427821385880e-04,
|
|
"cpu_time": 1.4230528196199506e-03,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 2.1265273566371622e-04,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 11,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 2162,
|
|
"real_time": 3.2362471260920865e+05,
|
|
"cpu_time": 7.6086096808510472e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 8.3886080000000000e+06,
|
|
"bytes_per_second": 2.5920789337647461e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 11,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 2162,
|
|
"real_time": 3.2385360870306159e+05,
|
|
"cpu_time": 7.6403697548566130e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 8.3886080000000000e+06,
|
|
"bytes_per_second": 2.5902468814826267e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 11,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 2162,
|
|
"real_time": 3.2381787432810408e+05,
|
|
"cpu_time": 7.6293607631822373e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 8.3886080000000000e+06,
|
|
"bytes_per_second": 2.5905327238052204e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 11,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 2162,
|
|
"real_time": 3.2395479499464069e+05,
|
|
"cpu_time": 7.6423365356151876e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 8.3886080000000000e+06,
|
|
"bytes_per_second": 2.5894378257740482e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 11,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 2162,
|
|
"real_time": 3.2391327836703259e+05,
|
|
"cpu_time": 7.6305557446808484e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 8.3886080000000000e+06,
|
|
"bytes_per_second": 2.5897697193181133e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 11,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 3.2383285380040959e+05,
|
|
"cpu_time": 7.6302464958371874e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 8.3886080000000000e+06,
|
|
"bytes_per_second": 2.5904132168289509e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 11,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 3.2385360870306159e+05,
|
|
"cpu_time": 7.6305557446808484e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 8.3886080000000000e+06,
|
|
"bytes_per_second": 2.5902468814826267e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 11,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.2778465458092390e+02,
|
|
"cpu_time": 1.3394787744381345e+03,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 1.0225017158792449e+07,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 11,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):23/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 3.9460064993801530e-04,
|
|
"cpu_time": 1.7554855864340821e-03,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 3.9472533155576560e-04,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 12,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 1093,
|
|
"real_time": 6.4030860468475975e+05,
|
|
"cpu_time": 1.5422251427264416e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 1.6777216000000000e+07,
|
|
"bytes_per_second": 2.6201765644333096e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 12,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 1093,
|
|
"real_time": 6.4019633339690720e+05,
|
|
"cpu_time": 1.5421269990850834e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 1.6777216000000000e+07,
|
|
"bytes_per_second": 2.6206360650301487e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 12,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 1093,
|
|
"real_time": 6.4018975049921905e+05,
|
|
"cpu_time": 1.5421097200366000e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 1.6777216000000000e+07,
|
|
"bytes_per_second": 2.6206630123205116e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 12,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 1093,
|
|
"real_time": 6.4018037592376990e+05,
|
|
"cpu_time": 1.5392733851784063e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 1.6777216000000000e+07,
|
|
"bytes_per_second": 2.6207013883846020e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 12,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 1093,
|
|
"real_time": 6.4030084634380380e+05,
|
|
"cpu_time": 1.5422688755718253e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 1.6777216000000000e+07,
|
|
"bytes_per_second": 2.6202083123581604e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 12,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 6.4023518216969189e+05,
|
|
"cpu_time": 1.5416008245196715e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 1.6777216000000000e+07,
|
|
"bytes_per_second": 2.6204770685053467e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 12,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 6.4019633339690720e+05,
|
|
"cpu_time": 1.5421269990850834e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 1.6777216000000000e+07,
|
|
"bytes_per_second": 2.6206360650301487e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 12,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 6.3795842125627587e+01,
|
|
"cpu_time": 1.3027733529701841e+03,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 2.6110676044867164e+06,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 12,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):24/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 9.9644386785227826e-05,
|
|
"cpu_time": 8.4507826685685592e-04,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 9.9640925534830296e-05,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 13,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 552,
|
|
"real_time": 1.2669229008547585e+06,
|
|
"cpu_time": 3.0502753387681046e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 3.3554432000000000e+07,
|
|
"bytes_per_second": 2.6484983401406460e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 13,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 552,
|
|
"real_time": 1.2666994152823463e+06,
|
|
"cpu_time": 3.0563120235507386e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 3.3554432000000000e+07,
|
|
"bytes_per_second": 2.6489656184550098e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 13,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 552,
|
|
"real_time": 1.2664808560604826e+06,
|
|
"cpu_time": 3.0523025289855087e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 3.3554432000000000e+07,
|
|
"bytes_per_second": 2.6494227559328831e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 13,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 552,
|
|
"real_time": 1.2670275348194782e+06,
|
|
"cpu_time": 3.0611593931159587e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 3.3554432000000000e+07,
|
|
"bytes_per_second": 2.6482796212302303e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 13,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 552,
|
|
"real_time": 1.2647185358218849e+06,
|
|
"cpu_time": 3.0810984202898555e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 3.3554432000000000e+07,
|
|
"bytes_per_second": 2.6531145902905941e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 13,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.2663698485677899e+06,
|
|
"cpu_time": 3.0602295409420333e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 3.3554432000000000e+07,
|
|
"bytes_per_second": 2.6496561852098728e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 13,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.2666994152823463e+06,
|
|
"cpu_time": 3.0563120235507386e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 3.3554432000000000e+07,
|
|
"bytes_per_second": 2.6489656184550098e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 13,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 9.4686175155303465e+02,
|
|
"cpu_time": 1.2385933072334949e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 1.9828937794469979e+07,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 13,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):25/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 7.4769764348376960e-04,
|
|
"cpu_time": 4.0473869383412901e-03,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 7.4835889671849543e-04,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 14,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 276,
|
|
"real_time": 2.5333063238286884e+06,
|
|
"cpu_time": 6.3108529601448970e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 6.7108864000000000e+07,
|
|
"bytes_per_second": 2.6490623486296616e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 14,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 276,
|
|
"real_time": 2.5300186513788132e+06,
|
|
"cpu_time": 6.3209407572463332e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 6.7108864000000000e+07,
|
|
"bytes_per_second": 2.6525047142805412e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 14,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 276,
|
|
"real_time": 2.5328795553819425e+06,
|
|
"cpu_time": 6.3274877862319350e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 6.7108864000000000e+07,
|
|
"bytes_per_second": 2.6495086928790184e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 14,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 276,
|
|
"real_time": 2.5344555792601211e+06,
|
|
"cpu_time": 6.2937752789855171e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 6.7108864000000000e+07,
|
|
"bytes_per_second": 2.6478611244625153e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 14,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 276,
|
|
"real_time": 2.5340672244952209e+06,
|
|
"cpu_time": 6.3178111231884155e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 6.7108864000000000e+07,
|
|
"bytes_per_second": 2.6482669185450634e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 14,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 2.5329454668689580e+06,
|
|
"cpu_time": 6.3141735811594194e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 6.7108864000000000e+07,
|
|
"bytes_per_second": 2.6494407597593601e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 14,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 2.5333063238286888e+06,
|
|
"cpu_time": 6.3178111231884155e+06,
|
|
"time_unit": "ns",
|
|
"bytes": 6.7108864000000000e+07,
|
|
"bytes_per_second": 2.6490623486296616e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 14,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.7492563122584434e+03,
|
|
"cpu_time": 1.2878389810379260e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 1.8308999194232326e+07,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 14,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):26/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 6.9060164742541666e-04,
|
|
"cpu_time": 2.0396002176447147e-03,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 6.9105146536264776e-04,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 15,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 138,
|
|
"real_time": 5.0558920450292639e+06,
|
|
"cpu_time": 1.2831195268115934e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 1.3421772800000000e+08,
|
|
"bytes_per_second": 2.6546794671369045e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 15,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 138,
|
|
"real_time": 5.0564067256029537e+06,
|
|
"cpu_time": 1.2827766746376827e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 1.3421772800000000e+08,
|
|
"bytes_per_second": 2.6544092531242165e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 15,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 138,
|
|
"real_time": 5.0571938686450757e+06,
|
|
"cpu_time": 1.2825944268115975e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 1.3421772800000000e+08,
|
|
"bytes_per_second": 2.6539960991442009e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 15,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 138,
|
|
"real_time": 5.0556807529073264e+06,
|
|
"cpu_time": 1.2818127210144922e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 1.3421772800000000e+08,
|
|
"bytes_per_second": 2.6547904141854008e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 15,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 138,
|
|
"real_time": 5.0558662480688179e+06,
|
|
"cpu_time": 1.2801847253623167e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 1.3421772800000000e+08,
|
|
"bytes_per_second": 2.6546930123253746e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 15,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 5.0562079280506885e+06,
|
|
"cpu_time": 1.2820976149275366e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 1.3421772800000000e+08,
|
|
"bytes_per_second": 2.6545136491832199e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 15,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 5.0558920450292639e+06,
|
|
"cpu_time": 1.2825944268115977e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 1.3421772800000000e+08,
|
|
"bytes_per_second": 2.6546794671369045e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 15,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 6.1360414982758527e+02,
|
|
"cpu_time": 1.1718104654839601e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 3.2211107045365577e+06,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 15,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):27/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 1.2135658947557307e-04,
|
|
"cpu_time": 9.1397913219750456e-04,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 1.2134466535998701e-04,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 16,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 69,
|
|
"real_time": 1.0100161184327327e+07,
|
|
"cpu_time": 2.5814307347826120e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 2.6843545600000000e+08,
|
|
"bytes_per_second": 2.6577343777101109e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 16,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 69,
|
|
"real_time": 1.0097395277757576e+07,
|
|
"cpu_time": 2.5787414159420196e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 2.6843545600000000e+08,
|
|
"bytes_per_second": 2.6584623916952770e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 16,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 69,
|
|
"real_time": 1.0099725635803264e+07,
|
|
"cpu_time": 2.5806926695652127e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 2.6843545600000000e+08,
|
|
"bytes_per_second": 2.6578489919409622e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 16,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 69,
|
|
"real_time": 1.0099199182097463e+07,
|
|
"cpu_time": 2.5868804782608852e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 2.6843545600000000e+08,
|
|
"bytes_per_second": 2.6579875409908463e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 16,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 69,
|
|
"real_time": 1.0099013619448828e+07,
|
|
"cpu_time": 2.5855631666666530e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 2.6843545600000000e+08,
|
|
"bytes_per_second": 2.6580363797415134e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 16,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.0099098979886893e+07,
|
|
"cpu_time": 2.5826616930434763e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 2.6843545600000000e+08,
|
|
"bytes_per_second": 2.6580139364157421e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 16,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.0099199182097461e+07,
|
|
"cpu_time": 2.5814307347826120e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 2.6843545600000000e+08,
|
|
"bytes_per_second": 2.6579875409908463e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 16,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.0536725890616965e+03,
|
|
"cpu_time": 3.4270097078257219e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 2.7734108575831312e+06,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 16,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):28/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 1.0433332628585620e-04,
|
|
"cpu_time": 1.3269293911225527e-03,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 1.0434147163739099e-04,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 17,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 35,
|
|
"real_time": 2.0186294828142438e+07,
|
|
"cpu_time": 5.1467226800000228e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 5.3687091200000000e+08,
|
|
"bytes_per_second": 2.6595812484197395e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 17,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 35,
|
|
"real_time": 2.0185914476002965e+07,
|
|
"cpu_time": 5.1488342057142846e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 5.3687091200000000e+08,
|
|
"bytes_per_second": 2.6596313614537140e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 17,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 35,
|
|
"real_time": 2.0195094176701136e+07,
|
|
"cpu_time": 5.1817182285714939e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 5.3687091200000000e+08,
|
|
"bytes_per_second": 2.6584224233001209e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 17,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 35,
|
|
"real_time": 2.0210578452263560e+07,
|
|
"cpu_time": 5.1889857685714394e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 5.3687091200000000e+08,
|
|
"bytes_per_second": 2.6563856807367683e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 17,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 35,
|
|
"real_time": 2.0202862098813057e+07,
|
|
"cpu_time": 5.1876610628571436e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 5.3687091200000000e+08,
|
|
"bytes_per_second": 2.6574002701901421e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 17,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 2.0196148806384631e+07,
|
|
"cpu_time": 5.1707843891428776e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 5.3687091200000000e+08,
|
|
"bytes_per_second": 2.6582841968200974e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 17,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 2.0195094176701136e+07,
|
|
"cpu_time": 5.1817182285714939e+07,
|
|
"time_unit": "ns",
|
|
"bytes": 5.3687091200000000e+08,
|
|
"bytes_per_second": 2.6584224233001209e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 17,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 1.0679859839636831e+04,
|
|
"cpu_time": 2.1192201235430557e+05,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 1.4055240624769112e+07,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 17,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):29/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 5.2880675132778757e-04,
|
|
"cpu_time": 4.0984499914419048e-03,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 5.2873355834497776e-04,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 18,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 0,
|
|
"threads": 1,
|
|
"iterations": 17,
|
|
"real_time": 4.0506263866144069e+07,
|
|
"cpu_time": 1.0457281752941166e+08,
|
|
"time_unit": "ns",
|
|
"bytes": 1.0737418240000000e+09,
|
|
"bytes_per_second": 2.6508043979278339e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 18,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 1,
|
|
"threads": 1,
|
|
"iterations": 17,
|
|
"real_time": 4.0494555936140172e+07,
|
|
"cpu_time": 1.0461779129411753e+08,
|
|
"time_unit": "ns",
|
|
"bytes": 1.0737418240000000e+09,
|
|
"bytes_per_second": 2.6515708079211647e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 18,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 2,
|
|
"threads": 1,
|
|
"iterations": 17,
|
|
"real_time": 4.0568456711137995e+07,
|
|
"cpu_time": 1.0457464858823621e+08,
|
|
"time_unit": "ns",
|
|
"bytes": 1.0737418240000000e+09,
|
|
"bytes_per_second": 2.6467406232517742e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 18,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 3,
|
|
"threads": 1,
|
|
"iterations": 17,
|
|
"real_time": 4.0443346561754450e+07,
|
|
"cpu_time": 1.0446937582353024e+08,
|
|
"time_unit": "ns",
|
|
"bytes": 1.0737418240000000e+09,
|
|
"bytes_per_second": 2.6549282274662006e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 18,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time",
|
|
"run_type": "iteration",
|
|
"repetitions": 5,
|
|
"repetition_index": 4,
|
|
"threads": 1,
|
|
"iterations": 17,
|
|
"real_time": 4.0510471910238266e+07,
|
|
"cpu_time": 1.0454226788235299e+08,
|
|
"time_unit": "ns",
|
|
"bytes": 1.0737418240000000e+09,
|
|
"bytes_per_second": 2.6505290443892159e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time_mean",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 18,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "mean",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 4.0504618997082993e+07,
|
|
"cpu_time": 1.0455538022352973e+08,
|
|
"time_unit": "ns",
|
|
"bytes": 1.0737418240000000e+09,
|
|
"bytes_per_second": 2.6509146201912380e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time_median",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 18,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "median",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 4.0506263866144076e+07,
|
|
"cpu_time": 1.0457281752941164e+08,
|
|
"time_unit": "ns",
|
|
"bytes": 1.0737418240000000e+09,
|
|
"bytes_per_second": 2.6508043979278339e+10,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time_stddev",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 18,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "stddev",
|
|
"aggregate_unit": "time",
|
|
"iterations": 5,
|
|
"real_time": 4.4631245919956767e+04,
|
|
"cpu_time": 5.5088931637860565e+04,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 2.9207316630822491e+07,
|
|
"own_numa": 0.0000000000000000e+00,
|
|
"wr_gpu": 0.0000000000000000e+00
|
|
},
|
|
{
|
|
"name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time_cv",
|
|
"family_index": 0,
|
|
"per_family_instance_index": 18,
|
|
"run_name": "Comm_implicit_mapped_GPUWrHost/0/0/log2(N):30/manual_time",
|
|
"run_type": "aggregate",
|
|
"repetitions": 5,
|
|
"threads": 1,
|
|
"aggregate_name": "cv",
|
|
"aggregate_unit": "percentage",
|
|
"iterations": 5,
|
|
"real_time": 1.1018804033972264e-03,
|
|
"cpu_time": 5.2688758359527299e-04,
|
|
"time_unit": "ns",
|
|
"bytes": 0.0000000000000000e+00,
|
|
"bytes_per_second": 1.1017826228109703e-03,
|
|
"own_numa": NaN,
|
|
"wr_gpu": NaN
|
|
}
|
|
]
|
|
}
|