frontier scripts
This commit is contained in:
302
.gitignore
vendored
302
.gitignore
vendored
@@ -1,301 +1 @@
|
||||
## Core latex/pdflatex auxiliary files:
|
||||
*.aux
|
||||
*.lof
|
||||
*.log
|
||||
*.lot
|
||||
*.fls
|
||||
*.out
|
||||
*.toc
|
||||
*.fmt
|
||||
*.fot
|
||||
*.cb
|
||||
*.cb2
|
||||
.*.lb
|
||||
|
||||
## Intermediate documents:
|
||||
*.dvi
|
||||
*.xdv
|
||||
*-converted-to.*
|
||||
# these rules might exclude image files for figures etc.
|
||||
# *.ps
|
||||
# *.eps
|
||||
# *.pdf
|
||||
|
||||
## Generated if empty string is given at "Please type another file name for output:"
|
||||
.pdf
|
||||
|
||||
## Bibliography auxiliary files (bibtex/biblatex/biber):
|
||||
*.bbl
|
||||
*.bcf
|
||||
*.blg
|
||||
*-blx.aux
|
||||
*-blx.bib
|
||||
*.run.xml
|
||||
|
||||
## Build tool auxiliary files:
|
||||
*.fdb_latexmk
|
||||
*.synctex
|
||||
*.synctex(busy)
|
||||
*.synctex.gz
|
||||
*.synctex.gz(busy)
|
||||
*.pdfsync
|
||||
|
||||
## Build tool directories for auxiliary files
|
||||
# latexrun
|
||||
latex.out/
|
||||
|
||||
## Auxiliary and intermediate files from other packages:
|
||||
# algorithms
|
||||
*.alg
|
||||
*.loa
|
||||
|
||||
# achemso
|
||||
acs-*.bib
|
||||
|
||||
# amsthm
|
||||
*.thm
|
||||
|
||||
# beamer
|
||||
*.nav
|
||||
*.pre
|
||||
*.snm
|
||||
*.vrb
|
||||
|
||||
# changes
|
||||
*.soc
|
||||
|
||||
# comment
|
||||
*.cut
|
||||
|
||||
# cprotect
|
||||
*.cpt
|
||||
|
||||
# elsarticle (documentclass of Elsevier journals)
|
||||
*.spl
|
||||
|
||||
# endnotes
|
||||
*.ent
|
||||
|
||||
# fixme
|
||||
*.lox
|
||||
|
||||
# feynmf/feynmp
|
||||
*.mf
|
||||
*.mp
|
||||
*.t[1-9]
|
||||
*.t[1-9][0-9]
|
||||
*.tfm
|
||||
|
||||
#(r)(e)ledmac/(r)(e)ledpar
|
||||
*.end
|
||||
*.?end
|
||||
*.[1-9]
|
||||
*.[1-9][0-9]
|
||||
*.[1-9][0-9][0-9]
|
||||
*.[1-9]R
|
||||
*.[1-9][0-9]R
|
||||
*.[1-9][0-9][0-9]R
|
||||
*.eledsec[1-9]
|
||||
*.eledsec[1-9]R
|
||||
*.eledsec[1-9][0-9]
|
||||
*.eledsec[1-9][0-9]R
|
||||
*.eledsec[1-9][0-9][0-9]
|
||||
*.eledsec[1-9][0-9][0-9]R
|
||||
|
||||
# glossaries
|
||||
*.acn
|
||||
*.acr
|
||||
*.glg
|
||||
*.glo
|
||||
*.gls
|
||||
*.glsdefs
|
||||
*.lzo
|
||||
*.lzs
|
||||
*.slg
|
||||
*.slo
|
||||
*.sls
|
||||
|
||||
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
|
||||
# *.ist
|
||||
|
||||
# gnuplot
|
||||
*.gnuplot
|
||||
*.table
|
||||
|
||||
# gnuplottex
|
||||
*-gnuplottex-*
|
||||
|
||||
# gregoriotex
|
||||
*.gaux
|
||||
*.glog
|
||||
*.gtex
|
||||
|
||||
# htlatex
|
||||
*.4ct
|
||||
*.4tc
|
||||
*.idv
|
||||
*.lg
|
||||
*.trc
|
||||
*.xref
|
||||
|
||||
# hyperref
|
||||
*.brf
|
||||
|
||||
# knitr
|
||||
*-concordance.tex
|
||||
# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
|
||||
# *.tikz
|
||||
*-tikzDictionary
|
||||
|
||||
# listings
|
||||
*.lol
|
||||
|
||||
# luatexja-ruby
|
||||
*.ltjruby
|
||||
|
||||
# makeidx
|
||||
*.idx
|
||||
*.ilg
|
||||
*.ind
|
||||
|
||||
# minitoc
|
||||
*.maf
|
||||
*.mlf
|
||||
*.mlt
|
||||
*.mtc[0-9]*
|
||||
*.slf[0-9]*
|
||||
*.slt[0-9]*
|
||||
*.stc[0-9]*
|
||||
|
||||
# minted
|
||||
_minted*
|
||||
*.pyg
|
||||
|
||||
# morewrites
|
||||
*.mw
|
||||
|
||||
# newpax
|
||||
*.newpax
|
||||
|
||||
# nomencl
|
||||
*.nlg
|
||||
*.nlo
|
||||
*.nls
|
||||
|
||||
# pax
|
||||
*.pax
|
||||
|
||||
# pdfpcnotes
|
||||
*.pdfpc
|
||||
|
||||
# sagetex
|
||||
*.sagetex.sage
|
||||
*.sagetex.py
|
||||
*.sagetex.scmd
|
||||
|
||||
# scrwfile
|
||||
*.wrt
|
||||
|
||||
# svg
|
||||
svg-inkscape/
|
||||
|
||||
# sympy
|
||||
*.sout
|
||||
*.sympy
|
||||
sympy-plots-for-*.tex/
|
||||
|
||||
# pdfcomment
|
||||
*.upa
|
||||
*.upb
|
||||
|
||||
# pythontex
|
||||
*.pytxcode
|
||||
pythontex-files-*/
|
||||
|
||||
# tcolorbox
|
||||
*.listing
|
||||
|
||||
# thmtools
|
||||
*.loe
|
||||
|
||||
# TikZ & PGF
|
||||
*.dpth
|
||||
*.md5
|
||||
*.auxlock
|
||||
|
||||
# titletoc
|
||||
*.ptc
|
||||
|
||||
# todonotes
|
||||
*.tdo
|
||||
|
||||
# vhistory
|
||||
*.hst
|
||||
*.ver
|
||||
|
||||
# easy-todo
|
||||
*.lod
|
||||
|
||||
# xcolor
|
||||
*.xcp
|
||||
|
||||
# xmpincl
|
||||
*.xmpi
|
||||
|
||||
# xindy
|
||||
*.xdy
|
||||
|
||||
# xypic precompiled matrices and outlines
|
||||
*.xyc
|
||||
*.xyd
|
||||
|
||||
# endfloat
|
||||
*.ttt
|
||||
*.fff
|
||||
|
||||
# Latexian
|
||||
TSWLatexianTemp*
|
||||
|
||||
## Editors:
|
||||
# WinEdt
|
||||
*.bak
|
||||
*.sav
|
||||
|
||||
# Texpad
|
||||
.texpadtmp
|
||||
|
||||
# LyX
|
||||
*.lyx~
|
||||
|
||||
# Kile
|
||||
*.backup
|
||||
|
||||
# gummi
|
||||
.*.swp
|
||||
|
||||
# KBibTeX
|
||||
*~[0-9]*
|
||||
|
||||
# TeXnicCenter
|
||||
*.tps
|
||||
|
||||
# auto folder when using emacs and auctex
|
||||
./auto/*
|
||||
*.el
|
||||
|
||||
# expex forward references with \gathertags
|
||||
*-tags.tex
|
||||
|
||||
# standalone packages
|
||||
*.sta
|
||||
|
||||
# Makeindex log files
|
||||
*.lpz
|
||||
|
||||
# xwatermark package
|
||||
*.xwm
|
||||
|
||||
# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
|
||||
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
|
||||
# Uncomment the next line to have this generated file ignored.
|
||||
#*Notes.bib
|
||||
.venv
|
108
results/figs.py
Normal file
108
results/figs.py
Normal file
@@ -0,0 +1,108 @@
|
||||
from pathlib import Path
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
RESULTS_DIR = Path(__file__).parent
|
||||
|
||||
## Read all data
|
||||
data = {}
|
||||
for e in RESULTS_DIR.iterdir():
|
||||
if e.suffix != ".json":
|
||||
continue
|
||||
|
||||
print(f'load {e}')
|
||||
with open(e, 'r') as f:
|
||||
try:
|
||||
res = json.load(f)
|
||||
except json.decoder.JSONDecodeError as e:
|
||||
print(f"SKIP - incorrect formatting")
|
||||
continue
|
||||
|
||||
benchmarks = res["benchmarks"]
|
||||
for benchmark in benchmarks:
|
||||
run_type = benchmark["run_type"]
|
||||
|
||||
if run_type == "iteration":
|
||||
|
||||
run_name = benchmark["run_name"]
|
||||
|
||||
if "Comm_prefetch_managed_GPUToGPU/0/0" in run_name:
|
||||
continue
|
||||
elif "/0/0/" in run_name:
|
||||
continue
|
||||
|
||||
xs, ys = data.get(run_name, ([],[]))
|
||||
ys += [benchmark["real_time"]]
|
||||
xs += [int(benchmark["bytes"])]
|
||||
data[run_name] = (xs, ys)
|
||||
|
||||
## compute aggregates
|
||||
for name, (xs, ys) in data.items():
|
||||
assert all(xs[0] == x_i for x_i in xs)
|
||||
b = xs[0] # bytes
|
||||
times = ys
|
||||
bws = [b / y for y in ys]
|
||||
|
||||
times_mean = np.mean(times)
|
||||
times_stddev = np.std(times)
|
||||
|
||||
bws_mean = np.mean(bws)
|
||||
bws_stddev = np.std(bws)
|
||||
|
||||
data[name] = (b, times_mean, times_stddev, bws_mean, bws_stddev)
|
||||
|
||||
## split data by name
|
||||
series = {}
|
||||
for name, point in data.items():
|
||||
name, f1, f2 = name.split("/")[0:3]
|
||||
# expect these to be ints
|
||||
f1 = int(f1)
|
||||
f2 = int(f2)
|
||||
name = "/".join((name, str(f1), str(f2)))
|
||||
|
||||
s = series.get(name, [])
|
||||
s += [point]
|
||||
series[name] = s
|
||||
|
||||
# sort all series
|
||||
for name, points in series.items():
|
||||
series[name] = sorted(points, key=lambda p: p[0])
|
||||
|
||||
# split to x,t, terr, bw, bwerr
|
||||
for name, points in series.items():
|
||||
# [(x,y,z), ...] -> ([x...], [y...], [z...])
|
||||
x, t, terr, bw, bwerr = zip(*points)
|
||||
series[name] = (x, t, terr, bw, bwerr)
|
||||
# print(series)
|
||||
|
||||
for name, (x, t, terr, bw, bwerr) in series.items():
|
||||
plt.errorbar(x, bw, yerr=bwerr, label=name)
|
||||
|
||||
|
||||
for pattern in [
|
||||
"hipManaged_HostToGPUWriteDst",
|
||||
"hipMemcpyAsync_GPUToGPU",
|
||||
"hipMemcpyAsync_GPUToPageable",
|
||||
"hipMemcpyAsync_GPUToPinned",
|
||||
"implicit_managed_GPURdHost_coarse",
|
||||
"implicit_managed_GPURdHost_fine",
|
||||
"implicit_managed_GPUWrGPU_coarse",
|
||||
"implicit_managed_GPUWrGPU_fine",
|
||||
"implicit_mapped_GPURdHost",
|
||||
"prefetch_managed_GPUToGPU",
|
||||
"prefetch_managed_GPUToHost",
|
||||
]:
|
||||
plt.clf()
|
||||
for name, (x, t, terr, bw, bwerr) in series.items():
|
||||
if pattern not in name:
|
||||
continue
|
||||
plt.errorbar(x, bw, yerr=bwerr, label=name)
|
||||
output_path = f"{pattern}.pdf"
|
||||
print(f"write {output_path}")
|
||||
plt.xscale('log')
|
||||
lgd = plt.legend(bbox_to_anchor=(1.04, 1))
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_path, bbox_extra_artists=(lgd,), bbox_inches='tight')
|
25272
results/hipManaged_HostToGPUWriteDst.json
Normal file
25272
results/hipManaged_HostToGPUWriteDst.json
Normal file
File diff suppressed because it is too large
Load Diff
2030
results/hipMemcpyAsync_GPUToGPU.json
Normal file
2030
results/hipMemcpyAsync_GPUToGPU.json
Normal file
File diff suppressed because it is too large
Load Diff
30584
results/hipMemcpyAsync_GPUToPageable.json
Normal file
30584
results/hipMemcpyAsync_GPUToPageable.json
Normal file
File diff suppressed because it is too large
Load Diff
3858
results/hipMemcpyAsync_GPUToPinned.json
Normal file
3858
results/hipMemcpyAsync_GPUToPinned.json
Normal file
File diff suppressed because it is too large
Load Diff
15326
results/hipMemcpyAsync_PageableToGPU.json
Normal file
15326
results/hipMemcpyAsync_PageableToGPU.json
Normal file
File diff suppressed because it is too large
Load Diff
25272
results/implicit_managed_GPURdHost_coarse.json
Normal file
25272
results/implicit_managed_GPURdHost_coarse.json
Normal file
File diff suppressed because it is too large
Load Diff
25272
results/implicit_managed_GPURdHost_fine.json
Normal file
25272
results/implicit_managed_GPURdHost_fine.json
Normal file
File diff suppressed because it is too large
Load Diff
25272
results/implicit_managed_GPUWrGPU_coarse.json
Normal file
25272
results/implicit_managed_GPUWrGPU_coarse.json
Normal file
File diff suppressed because it is too large
Load Diff
25272
results/implicit_managed_GPUWrGPU_fine.json
Normal file
25272
results/implicit_managed_GPUWrGPU_fine.json
Normal file
File diff suppressed because it is too large
Load Diff
38
results/implicit_managed_HostWrGPU_fine.json
Normal file
38
results/implicit_managed_HostWrGPU_fine.json
Normal file
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"context": {
|
||||
"date": "2023-10-09T16:54:29-04:00",
|
||||
"host_name": "frontier01017",
|
||||
"executable": "/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth/build/comm_scope",
|
||||
"num_cpus": 128,
|
||||
"mhz_per_cpu": 1813,
|
||||
"cpu_scaling_enabled": false,
|
||||
"caches": [
|
||||
{
|
||||
"type": "Data",
|
||||
"level": 1,
|
||||
"size": 32768,
|
||||
"num_sharing": 2
|
||||
},
|
||||
{
|
||||
"type": "Instruction",
|
||||
"level": 1,
|
||||
"size": 32768,
|
||||
"num_sharing": 2
|
||||
},
|
||||
{
|
||||
"type": "Unified",
|
||||
"level": 2,
|
||||
"size": 524288,
|
||||
"num_sharing": 2
|
||||
},
|
||||
{
|
||||
"type": "Unified",
|
||||
"level": 3,
|
||||
"size": 33554432,
|
||||
"num_sharing": 16
|
||||
}
|
||||
],
|
||||
"load_avg": [12.35,15.21,12.5],
|
||||
"library_build_type": "release"
|
||||
},
|
||||
"benchmarks": [
|
1698
results/implicit_mapped_GPURdHost.json
Normal file
1698
results/implicit_mapped_GPURdHost.json
Normal file
File diff suppressed because it is too large
Load Diff
6180
results/implicit_mapped_GPUWrGPU.json
Normal file
6180
results/implicit_mapped_GPUWrGPU.json
Normal file
File diff suppressed because it is too large
Load Diff
38
results/implicit_mapped_HostWrGPU.json
Normal file
38
results/implicit_mapped_HostWrGPU.json
Normal file
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"context": {
|
||||
"date": "2023-10-09T16:41:39-04:00",
|
||||
"host_name": "frontier06647",
|
||||
"executable": "/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth/build/comm_scope",
|
||||
"num_cpus": 128,
|
||||
"mhz_per_cpu": 1798,
|
||||
"cpu_scaling_enabled": false,
|
||||
"caches": [
|
||||
{
|
||||
"type": "Data",
|
||||
"level": 1,
|
||||
"size": 32768,
|
||||
"num_sharing": 2
|
||||
},
|
||||
{
|
||||
"type": "Instruction",
|
||||
"level": 1,
|
||||
"size": 32768,
|
||||
"num_sharing": 2
|
||||
},
|
||||
{
|
||||
"type": "Unified",
|
||||
"level": 2,
|
||||
"size": 524288,
|
||||
"num_sharing": 2
|
||||
},
|
||||
{
|
||||
"type": "Unified",
|
||||
"level": 3,
|
||||
"size": 33554432,
|
||||
"num_sharing": 16
|
||||
}
|
||||
],
|
||||
"load_avg": [1.31,5.45,4.52],
|
||||
"library_build_type": "release"
|
||||
},
|
||||
"benchmarks": [
|
27928
results/prefetch_managed_GPUToGPU.json
Normal file
27928
results/prefetch_managed_GPUToGPU.json
Normal file
File diff suppressed because it is too large
Load Diff
27928
results/prefetch_managed_GPUToHost.json
Normal file
27928
results/prefetch_managed_GPUToHost.json
Normal file
File diff suppressed because it is too large
Load Diff
23444
results/prefetch_managed_HostToGPU.json
Normal file
23444
results/prefetch_managed_HostToGPU.json
Normal file
File diff suppressed because it is too large
Load Diff
2
results/requirements.txt
Normal file
2
results/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
matplotlib
|
||||
numpy
|
@@ -20,49 +20,27 @@ export HSA_XNACK=1
|
||||
mkdir -p $SCOPE_RESULTS
|
||||
module list > $SCOPE_RESULTS/modules.r1.$SLURM_JOBID.txt 2>&1
|
||||
env > $SCOPE_RESULTS/env.r1.$SLURM_JOBID.txt
|
||||
rocm-smi > $SCOPE_RESULTS/rocm-smi.r1.$SLURM_JOBID.txt 2>&1
|
||||
lscpu > $SCOPE_RESULTS/lscpu.r1.$SLURM_JOBID.txt 2>&1
|
||||
|
||||
date
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*hipMemcpyAsync_GPUToPinned/0/.*' \
|
||||
--benchmark_filter='.*hipMemcpyAsync_GPUToPinned/0/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/hipMemcpyAsync_GPUToPinned.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*hipMemcpyAsync_PinnedToGPU/0/.*' \
|
||||
--benchmark_filter='.*hipMemcpyAsync_PinnedToGPU/0/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/hipMemcpyAsync_PinnedToGPU"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*hipMemcpyAsync_GPUToGPU/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/hipMemcpyAsync_GPUToGPU.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*hipMemcpyAsync_GPUToPageable/0/.*' \
|
||||
--benchmark_filter='.*hipMemcpyAsync_GPUToPageable/0/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/hipMemcpyAsync_GPUToPageable.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*hipMemcpyAsync_PageableToGPU/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/hipMemcpyAsync_PageableToGPU.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*prefetch_managed_GPUToHost/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/prefetch_managed_GPUToHost.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*prefetch_managed_HostToGPU/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/prefetch_managed_HostToGPU.json"
|
||||
|
||||
date
|
||||
|
34
scripts/run_10.sh
Executable file
34
scripts/run_10.sh
Executable file
@@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
|
||||
#SBATCH -A CSC465
|
||||
#SBATCH -J r10
|
||||
#SBATCH -o %x-%j.out
|
||||
#SBATCH -t 2:00:00
|
||||
#SBATCH -p batch
|
||||
#SBATCH -N 1
|
||||
|
||||
# salloc -A CSC465 -N 1 -p batch -t 2:00:00
|
||||
|
||||
export ROOT=/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth
|
||||
export SCOPE_SRC=${ROOT}/comm_scope
|
||||
export SCOPE_BUILD=${ROOT}/build
|
||||
export SCOPE_RESULTS=${ROOT}/run
|
||||
|
||||
module load PrgEnv-amd/8.3.3
|
||||
export HSA_XNACK=1
|
||||
|
||||
mkdir -p $SCOPE_RESULTS
|
||||
module list > $SCOPE_RESULTS/modules.r10.$SLURM_JOBID.txt 2>&1
|
||||
env > $SCOPE_RESULTS/env.r10.$SLURM_JOBID.txt
|
||||
rocm-smi > $SCOPE_RESULTS/rocm-smi.r10.$SLURM_JOBID.txt 2>&1
|
||||
lscpu > $SCOPE_RESULTS/lscpu.r10.$SLURM_JOBID.txt 2>&1
|
||||
|
||||
date
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*hipMemcpyAsync_GPUToGPU/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/hipMemcpyAsync_GPUToGPU.json"
|
||||
|
||||
date
|
@@ -20,49 +20,15 @@ export HSA_XNACK=1
|
||||
mkdir -p $SCOPE_RESULTS
|
||||
module list > $SCOPE_RESULTS/modules.r2.$SLURM_JOBID.txt 2>&1
|
||||
env > $SCOPE_RESULTS/env.r2.$SLURM_JOBID.txt
|
||||
rocm-smi > $SCOPE_RESULTS/rocm-smi.r2.$SLURM_JOBID.txt 2>&1
|
||||
lscpu > $SCOPE_RESULTS/lscpu.r2.$SLURM_JOBID.txt 2>&1
|
||||
|
||||
date
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_mapped_GPURdHost/0/.*' \
|
||||
--benchmark_filter='.*implicit_mapped_GPURdHost/0/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_mapped_GPURdHost.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_managed_HostWrGPU_fine/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_managed_HostWrGPU_fine.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_managed_HostWrGPU_coarse/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_managed_HostWrGPU_coarse.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_managed_GPUWrHost_fine/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_managed_GPUWrHost_fine.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_managed_GPUWrHost_coarse/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_managed_GPUWrHost_coarse.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_mapped_GPUWrHost/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_mapped_GPUWrHost.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_mapped_GPUWrGPU/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_mapped_GPUWrGPU.json"
|
||||
|
||||
date
|
||||
|
@@ -20,6 +20,8 @@ export HSA_XNACK=1
|
||||
mkdir -p $SCOPE_RESULTS
|
||||
module list > $SCOPE_RESULTS/modules.r3.$SLURM_JOBID.txt 2>&1
|
||||
env > $SCOPE_RESULTS/env.r3.$SLURM_JOBID.txt
|
||||
rocm-smi > $SCOPE_RESULTS/rocm-smi.r3.$SLURM_JOBID.txt 2>&1
|
||||
lscpu > $SCOPE_RESULTS/lscpu.r3.$SLURM_JOBID.txt 2>&1
|
||||
|
||||
date
|
||||
|
||||
@@ -37,32 +39,20 @@ srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_managed_GPURdHost_fine/0/.*' \
|
||||
--benchmark_filter='.*implicit_managed_GPURdHost_fine/0/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_managed_GPURdHost_fine.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_managed_GPURdHost_coarse/0/.*' \
|
||||
--benchmark_filter='.*implicit_managed_GPURdHost_coarse/0/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_managed_GPURdHost_coarse.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*hipManaged_HostToGPUWriteDst/0/.*' \
|
||||
--benchmark_filter='.*hipManaged_HostToGPUWriteDst/0/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/hipManaged_HostToGPUWriteDst.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*prefetch_managed_GPUToGPU/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/prefetch_managed_GPUToGPU.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_mapped_HostWrGPU/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_mapped_HostWrGPU.json"
|
||||
|
||||
date
|
||||
|
34
scripts/run_4.sh
Executable file
34
scripts/run_4.sh
Executable file
@@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
|
||||
#SBATCH -A CSC465
|
||||
#SBATCH -J r4
|
||||
#SBATCH -o %x-%j.out
|
||||
#SBATCH -t 2:00:00
|
||||
#SBATCH -p batch
|
||||
#SBATCH -N 1
|
||||
|
||||
# salloc -A CSC465 -N 1 -p batch -t 2:00:00
|
||||
|
||||
export ROOT=/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth
|
||||
export SCOPE_SRC=${ROOT}/comm_scope
|
||||
export SCOPE_BUILD=${ROOT}/build
|
||||
export SCOPE_RESULTS=${ROOT}/run
|
||||
|
||||
module load PrgEnv-amd/8.3.3
|
||||
export HSA_XNACK=1
|
||||
|
||||
mkdir -p $SCOPE_RESULTS
|
||||
module list > $SCOPE_RESULTS/modules.r4.$SLURM_JOBID.txt 2>&1
|
||||
env > $SCOPE_RESULTS/env.r4.$SLURM_JOBID.txt
|
||||
rocm-smi > $SCOPE_RESULTS/rocm-smi.r4.$SLURM_JOBID.txt 2>&1
|
||||
lscpu > $SCOPE_RESULTS/lscpu.r4.$SLURM_JOBID.txt 2>&1
|
||||
|
||||
date
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*hipMemcpyAsync_PageableToGPU/0/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/hipMemcpyAsync_PageableToGPU.json"
|
||||
|
||||
date
|
58
scripts/run_5.sh
Executable file
58
scripts/run_5.sh
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/bin/bash
|
||||
|
||||
#SBATCH -A CSC465
|
||||
#SBATCH -J r5
|
||||
#SBATCH -o %x-%j.out
|
||||
#SBATCH -t 2:00:00
|
||||
#SBATCH -p batch
|
||||
#SBATCH -N 1
|
||||
|
||||
# salloc -A CSC465 -N 1 -p batch -t 2:00:00
|
||||
|
||||
export ROOT=/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth
|
||||
export SCOPE_SRC=${ROOT}/comm_scope
|
||||
export SCOPE_BUILD=${ROOT}/build
|
||||
export SCOPE_RESULTS=${ROOT}/run
|
||||
|
||||
module load PrgEnv-amd/8.3.3
|
||||
export HSA_XNACK=1
|
||||
|
||||
mkdir -p $SCOPE_RESULTS
|
||||
module list > $SCOPE_RESULTS/modules.r5.$SLURM_JOBID.txt 2>&1
|
||||
env > $SCOPE_RESULTS/env.r5.$SLURM_JOBID.txt
|
||||
rocm-smi > $SCOPE_RESULTS/rocm-smi.r5.$SLURM_JOBID.txt 2>&1
|
||||
lscpu > $SCOPE_RESULTS/lscpu.r5.$SLURM_JOBID.txt 2>&1
|
||||
|
||||
date
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_managed_HostWrGPU_fine/0/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_managed_HostWrGPU_fine.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_managed_HostWrGPU_coarse/0/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_managed_HostWrGPU_coarse.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_managed_GPUWrHost_fine/0/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_managed_GPUWrHost_fine.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_managed_GPUWrHost_coarse/0/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_managed_GPUWrHost_coarse.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_mapped_GPUWrHost/0/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_mapped_GPUWrHost.json"
|
||||
|
||||
date
|
34
scripts/run_6.sh
Executable file
34
scripts/run_6.sh
Executable file
@@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
|
||||
#SBATCH -A CSC465
|
||||
#SBATCH -J r6
|
||||
#SBATCH -o %x-%j.out
|
||||
#SBATCH -t 2:00:00
|
||||
#SBATCH -p batch
|
||||
#SBATCH -N 1
|
||||
|
||||
# salloc -A CSC465 -N 1 -p batch -t 2:00:00
|
||||
|
||||
export ROOT=/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth
|
||||
export SCOPE_SRC=${ROOT}/comm_scope
|
||||
export SCOPE_BUILD=${ROOT}/build
|
||||
export SCOPE_RESULTS=${ROOT}/run
|
||||
|
||||
module load PrgEnv-amd/8.3.3
|
||||
export HSA_XNACK=1
|
||||
|
||||
mkdir -p $SCOPE_RESULTS
|
||||
module list > $SCOPE_RESULTS/modules.r6.$SLURM_JOBID.txt 2>&1
|
||||
env > $SCOPE_RESULTS/env.r6.$SLURM_JOBID.txt
|
||||
rocm-smi > $SCOPE_RESULTS/rocm-smi.r6.$SLURM_JOBID.txt 2>&1
|
||||
lscpu > $SCOPE_RESULTS/lscpu.r6.$SLURM_JOBID.txt 2>&1
|
||||
|
||||
date
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*prefetch_managed_GPUToGPU/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/prefetch_managed_GPUToGPU.json"
|
||||
|
||||
date
|
40
scripts/run_7.sh
Executable file
40
scripts/run_7.sh
Executable file
@@ -0,0 +1,40 @@
|
||||
#!/bin/bash
|
||||
|
||||
#SBATCH -A CSC465
|
||||
#SBATCH -J r7
|
||||
#SBATCH -o %x-%j.out
|
||||
#SBATCH -t 2:00:00
|
||||
#SBATCH -p batch
|
||||
#SBATCH -N 1
|
||||
|
||||
# salloc -A CSC465 -N 1 -p batch -t 2:00:00
|
||||
|
||||
export ROOT=/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth
|
||||
export SCOPE_SRC=${ROOT}/comm_scope
|
||||
export SCOPE_BUILD=${ROOT}/build
|
||||
export SCOPE_RESULTS=${ROOT}/run
|
||||
|
||||
module load PrgEnv-amd/8.3.3
|
||||
export HSA_XNACK=1
|
||||
|
||||
mkdir -p $SCOPE_RESULTS
|
||||
module list > $SCOPE_RESULTS/modules.r7.$SLURM_JOBID.txt 2>&1
|
||||
env > $SCOPE_RESULTS/env.r7.$SLURM_JOBID.txt
|
||||
rocm-smi > $SCOPE_RESULTS/rocm-smi.r7.$SLURM_JOBID.txt 2>&1
|
||||
lscpu > $SCOPE_RESULTS/lscpu.r7.$SLURM_JOBID.txt 2>&1
|
||||
|
||||
date
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*prefetch_managed_GPUToHost/0/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/prefetch_managed_GPUToHost.json"
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*prefetch_managed_HostToGPU/0/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/prefetch_managed_HostToGPU.json"
|
||||
|
||||
date
|
34
scripts/run_8.sh
Executable file
34
scripts/run_8.sh
Executable file
@@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
|
||||
#SBATCH -A CSC465
|
||||
#SBATCH -J r8
|
||||
#SBATCH -o %x-%j.out
|
||||
#SBATCH -t 2:00:00
|
||||
#SBATCH -p batch
|
||||
#SBATCH -N 1
|
||||
|
||||
# salloc -A CSC465 -N 1 -p batch -t 2:00:00
|
||||
|
||||
export ROOT=/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth
|
||||
export SCOPE_SRC=${ROOT}/comm_scope
|
||||
export SCOPE_BUILD=${ROOT}/build
|
||||
export SCOPE_RESULTS=${ROOT}/run
|
||||
|
||||
module load PrgEnv-amd/8.3.3
|
||||
export HSA_XNACK=1
|
||||
|
||||
mkdir -p $SCOPE_RESULTS
|
||||
module list > $SCOPE_RESULTS/modules.r8.$SLURM_JOBID.txt 2>&1
|
||||
env > $SCOPE_RESULTS/env.r8.$SLURM_JOBID.txt
|
||||
rocm-smi > $SCOPE_RESULTS/rocm-smi.r8.$SLURM_JOBID.txt 2>&1
|
||||
lscpu > $SCOPE_RESULTS/lscpu.r8.$SLURM_JOBID.txt 2>&1
|
||||
|
||||
date
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_mapped_HostWrGPU/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_mapped_HostWrGPU.json"
|
||||
|
||||
date
|
34
scripts/run_9.sh
Executable file
34
scripts/run_9.sh
Executable file
@@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
|
||||
#SBATCH -A CSC465
|
||||
#SBATCH -J r9
|
||||
#SBATCH -o %x-%j.out
|
||||
#SBATCH -t 2:00:00
|
||||
#SBATCH -p batch
|
||||
#SBATCH -N 1
|
||||
|
||||
# salloc -A CSC465 -N 1 -p batch -t 2:00:00
|
||||
|
||||
export ROOT=/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth
|
||||
export SCOPE_SRC=${ROOT}/comm_scope
|
||||
export SCOPE_BUILD=${ROOT}/build
|
||||
export SCOPE_RESULTS=${ROOT}/run
|
||||
|
||||
module load PrgEnv-amd/8.3.3
|
||||
export HSA_XNACK=1
|
||||
|
||||
mkdir -p $SCOPE_RESULTS
|
||||
module list > $SCOPE_RESULTS/modules.r9.$SLURM_JOBID.txt 2>&1
|
||||
env > $SCOPE_RESULTS/env.r9.$SLURM_JOBID.txt
|
||||
rocm-smi > $SCOPE_RESULTS/rocm-smi.r9.$SLURM_JOBID.txt 2>&1
|
||||
lscpu > $SCOPE_RESULTS/lscpu.r9.$SLURM_JOBID.txt 2>&1
|
||||
|
||||
date
|
||||
|
||||
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
|
||||
--benchmark_repetitions=5 \
|
||||
--benchmark_filter='.*implicit_mapped_GPUWrGPU/0/.*' \
|
||||
--benchmark_out_format=json \
|
||||
--benchmark_out="$SCOPE_RESULTS/implicit_mapped_GPUWrGPU.json"
|
||||
|
||||
date
|
Reference in New Issue
Block a user