frontier scripts

This commit is contained in:
Carl Pearson
2023-10-09 17:04:07 -04:00
parent d19fcf17c4
commit 90b1286205
29 changed files with 265804 additions and 380 deletions

302
.gitignore vendored
View File

@@ -1,301 +1 @@
## Core latex/pdflatex auxiliary files:
*.aux
*.lof
*.log
*.lot
*.fls
*.out
*.toc
*.fmt
*.fot
*.cb
*.cb2
.*.lb
## Intermediate documents:
*.dvi
*.xdv
*-converted-to.*
# these rules might exclude image files for figures etc.
# *.ps
# *.eps
# *.pdf
## Generated if empty string is given at "Please type another file name for output:"
.pdf
## Bibliography auxiliary files (bibtex/biblatex/biber):
*.bbl
*.bcf
*.blg
*-blx.aux
*-blx.bib
*.run.xml
## Build tool auxiliary files:
*.fdb_latexmk
*.synctex
*.synctex(busy)
*.synctex.gz
*.synctex.gz(busy)
*.pdfsync
## Build tool directories for auxiliary files
# latexrun
latex.out/
## Auxiliary and intermediate files from other packages:
# algorithms
*.alg
*.loa
# achemso
acs-*.bib
# amsthm
*.thm
# beamer
*.nav
*.pre
*.snm
*.vrb
# changes
*.soc
# comment
*.cut
# cprotect
*.cpt
# elsarticle (documentclass of Elsevier journals)
*.spl
# endnotes
*.ent
# fixme
*.lox
# feynmf/feynmp
*.mf
*.mp
*.t[1-9]
*.t[1-9][0-9]
*.tfm
#(r)(e)ledmac/(r)(e)ledpar
*.end
*.?end
*.[1-9]
*.[1-9][0-9]
*.[1-9][0-9][0-9]
*.[1-9]R
*.[1-9][0-9]R
*.[1-9][0-9][0-9]R
*.eledsec[1-9]
*.eledsec[1-9]R
*.eledsec[1-9][0-9]
*.eledsec[1-9][0-9]R
*.eledsec[1-9][0-9][0-9]
*.eledsec[1-9][0-9][0-9]R
# glossaries
*.acn
*.acr
*.glg
*.glo
*.gls
*.glsdefs
*.lzo
*.lzs
*.slg
*.slo
*.sls
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
# *.ist
# gnuplot
*.gnuplot
*.table
# gnuplottex
*-gnuplottex-*
# gregoriotex
*.gaux
*.glog
*.gtex
# htlatex
*.4ct
*.4tc
*.idv
*.lg
*.trc
*.xref
# hyperref
*.brf
# knitr
*-concordance.tex
# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
# *.tikz
*-tikzDictionary
# listings
*.lol
# luatexja-ruby
*.ltjruby
# makeidx
*.idx
*.ilg
*.ind
# minitoc
*.maf
*.mlf
*.mlt
*.mtc[0-9]*
*.slf[0-9]*
*.slt[0-9]*
*.stc[0-9]*
# minted
_minted*
*.pyg
# morewrites
*.mw
# newpax
*.newpax
# nomencl
*.nlg
*.nlo
*.nls
# pax
*.pax
# pdfpcnotes
*.pdfpc
# sagetex
*.sagetex.sage
*.sagetex.py
*.sagetex.scmd
# scrwfile
*.wrt
# svg
svg-inkscape/
# sympy
*.sout
*.sympy
sympy-plots-for-*.tex/
# pdfcomment
*.upa
*.upb
# pythontex
*.pytxcode
pythontex-files-*/
# tcolorbox
*.listing
# thmtools
*.loe
# TikZ & PGF
*.dpth
*.md5
*.auxlock
# titletoc
*.ptc
# todonotes
*.tdo
# vhistory
*.hst
*.ver
# easy-todo
*.lod
# xcolor
*.xcp
# xmpincl
*.xmpi
# xindy
*.xdy
# xypic precompiled matrices and outlines
*.xyc
*.xyd
# endfloat
*.ttt
*.fff
# Latexian
TSWLatexianTemp*
## Editors:
# WinEdt
*.bak
*.sav
# Texpad
.texpadtmp
# LyX
*.lyx~
# Kile
*.backup
# gummi
.*.swp
# KBibTeX
*~[0-9]*
# TeXnicCenter
*.tps
# auto folder when using emacs and auctex
./auto/*
*.el
# expex forward references with \gathertags
*-tags.tex
# standalone packages
*.sta
# Makeindex log files
*.lpz
# xwatermark package
*.xwm
# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
# Uncomment the next line to have this generated file ignored.
#*Notes.bib
.venv

108
results/figs.py Normal file
View File

@@ -0,0 +1,108 @@
from pathlib import Path
import json
from dataclasses import dataclass
import numpy as np
import matplotlib.pyplot as plt
RESULTS_DIR = Path(__file__).parent
## Read all data
data = {}
for e in RESULTS_DIR.iterdir():
if e.suffix != ".json":
continue
print(f'load {e}')
with open(e, 'r') as f:
try:
res = json.load(f)
except json.decoder.JSONDecodeError as e:
print(f"SKIP - incorrect formatting")
continue
benchmarks = res["benchmarks"]
for benchmark in benchmarks:
run_type = benchmark["run_type"]
if run_type == "iteration":
run_name = benchmark["run_name"]
if "Comm_prefetch_managed_GPUToGPU/0/0" in run_name:
continue
elif "/0/0/" in run_name:
continue
xs, ys = data.get(run_name, ([],[]))
ys += [benchmark["real_time"]]
xs += [int(benchmark["bytes"])]
data[run_name] = (xs, ys)
## compute aggregates
for name, (xs, ys) in data.items():
assert all(xs[0] == x_i for x_i in xs)
b = xs[0] # bytes
times = ys
bws = [b / y for y in ys]
times_mean = np.mean(times)
times_stddev = np.std(times)
bws_mean = np.mean(bws)
bws_stddev = np.std(bws)
data[name] = (b, times_mean, times_stddev, bws_mean, bws_stddev)
## split data by name
series = {}
for name, point in data.items():
name, f1, f2 = name.split("/")[0:3]
# expect these to be ints
f1 = int(f1)
f2 = int(f2)
name = "/".join((name, str(f1), str(f2)))
s = series.get(name, [])
s += [point]
series[name] = s
# sort all series
for name, points in series.items():
series[name] = sorted(points, key=lambda p: p[0])
# split to x,t, terr, bw, bwerr
for name, points in series.items():
# [(x,y,z), ...] -> ([x...], [y...], [z...])
x, t, terr, bw, bwerr = zip(*points)
series[name] = (x, t, terr, bw, bwerr)
# print(series)
for name, (x, t, terr, bw, bwerr) in series.items():
plt.errorbar(x, bw, yerr=bwerr, label=name)
for pattern in [
"hipManaged_HostToGPUWriteDst",
"hipMemcpyAsync_GPUToGPU",
"hipMemcpyAsync_GPUToPageable",
"hipMemcpyAsync_GPUToPinned",
"implicit_managed_GPURdHost_coarse",
"implicit_managed_GPURdHost_fine",
"implicit_managed_GPUWrGPU_coarse",
"implicit_managed_GPUWrGPU_fine",
"implicit_mapped_GPURdHost",
"prefetch_managed_GPUToGPU",
"prefetch_managed_GPUToHost",
]:
plt.clf()
for name, (x, t, terr, bw, bwerr) in series.items():
if pattern not in name:
continue
plt.errorbar(x, bw, yerr=bwerr, label=name)
output_path = f"{pattern}.pdf"
print(f"write {output_path}")
plt.xscale('log')
lgd = plt.legend(bbox_to_anchor=(1.04, 1))
plt.tight_layout()
plt.savefig(output_path, bbox_extra_artists=(lgd,), bbox_inches='tight')

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,38 @@
{
"context": {
"date": "2023-10-09T16:54:29-04:00",
"host_name": "frontier01017",
"executable": "/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth/build/comm_scope",
"num_cpus": 128,
"mhz_per_cpu": 1813,
"cpu_scaling_enabled": false,
"caches": [
{
"type": "Data",
"level": 1,
"size": 32768,
"num_sharing": 2
},
{
"type": "Instruction",
"level": 1,
"size": 32768,
"num_sharing": 2
},
{
"type": "Unified",
"level": 2,
"size": 524288,
"num_sharing": 2
},
{
"type": "Unified",
"level": 3,
"size": 33554432,
"num_sharing": 16
}
],
"load_avg": [12.35,15.21,12.5],
"library_build_type": "release"
},
"benchmarks": [

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,38 @@
{
"context": {
"date": "2023-10-09T16:41:39-04:00",
"host_name": "frontier06647",
"executable": "/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth/build/comm_scope",
"num_cpus": 128,
"mhz_per_cpu": 1798,
"cpu_scaling_enabled": false,
"caches": [
{
"type": "Data",
"level": 1,
"size": 32768,
"num_sharing": 2
},
{
"type": "Instruction",
"level": 1,
"size": 32768,
"num_sharing": 2
},
{
"type": "Unified",
"level": 2,
"size": 524288,
"num_sharing": 2
},
{
"type": "Unified",
"level": 3,
"size": 33554432,
"num_sharing": 16
}
],
"load_avg": [1.31,5.45,4.52],
"library_build_type": "release"
},
"benchmarks": [

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

2
results/requirements.txt Normal file
View File

@@ -0,0 +1,2 @@
matplotlib
numpy

View File

@@ -20,49 +20,27 @@ export HSA_XNACK=1
mkdir -p $SCOPE_RESULTS
module list > $SCOPE_RESULTS/modules.r1.$SLURM_JOBID.txt 2>&1
env > $SCOPE_RESULTS/env.r1.$SLURM_JOBID.txt
rocm-smi > $SCOPE_RESULTS/rocm-smi.r1.$SLURM_JOBID.txt 2>&1
lscpu > $SCOPE_RESULTS/lscpu.r1.$SLURM_JOBID.txt 2>&1
date
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*hipMemcpyAsync_GPUToPinned/0/.*' \
--benchmark_filter='.*hipMemcpyAsync_GPUToPinned/0/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/hipMemcpyAsync_GPUToPinned.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*hipMemcpyAsync_PinnedToGPU/0/.*' \
--benchmark_filter='.*hipMemcpyAsync_PinnedToGPU/0/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/hipMemcpyAsync_PinnedToGPU"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*hipMemcpyAsync_GPUToGPU/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/hipMemcpyAsync_GPUToGPU.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*hipMemcpyAsync_GPUToPageable/0/.*' \
--benchmark_filter='.*hipMemcpyAsync_GPUToPageable/0/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/hipMemcpyAsync_GPUToPageable.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*hipMemcpyAsync_PageableToGPU/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/hipMemcpyAsync_PageableToGPU.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*prefetch_managed_GPUToHost/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/prefetch_managed_GPUToHost.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*prefetch_managed_HostToGPU/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/prefetch_managed_HostToGPU.json"
date

34
scripts/run_10.sh Executable file
View File

@@ -0,0 +1,34 @@
#!/bin/bash
#SBATCH -A CSC465
#SBATCH -J r10
#SBATCH -o %x-%j.out
#SBATCH -t 2:00:00
#SBATCH -p batch
#SBATCH -N 1
# salloc -A CSC465 -N 1 -p batch -t 2:00:00
export ROOT=/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth
export SCOPE_SRC=${ROOT}/comm_scope
export SCOPE_BUILD=${ROOT}/build
export SCOPE_RESULTS=${ROOT}/run
module load PrgEnv-amd/8.3.3
export HSA_XNACK=1
mkdir -p $SCOPE_RESULTS
module list > $SCOPE_RESULTS/modules.r10.$SLURM_JOBID.txt 2>&1
env > $SCOPE_RESULTS/env.r10.$SLURM_JOBID.txt
rocm-smi > $SCOPE_RESULTS/rocm-smi.r10.$SLURM_JOBID.txt 2>&1
lscpu > $SCOPE_RESULTS/lscpu.r10.$SLURM_JOBID.txt 2>&1
date
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*hipMemcpyAsync_GPUToGPU/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/hipMemcpyAsync_GPUToGPU.json"
date

View File

@@ -20,49 +20,15 @@ export HSA_XNACK=1
mkdir -p $SCOPE_RESULTS
module list > $SCOPE_RESULTS/modules.r2.$SLURM_JOBID.txt 2>&1
env > $SCOPE_RESULTS/env.r2.$SLURM_JOBID.txt
rocm-smi > $SCOPE_RESULTS/rocm-smi.r2.$SLURM_JOBID.txt 2>&1
lscpu > $SCOPE_RESULTS/lscpu.r2.$SLURM_JOBID.txt 2>&1
date
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_mapped_GPURdHost/0/.*' \
--benchmark_filter='.*implicit_mapped_GPURdHost/0/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_mapped_GPURdHost.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_managed_HostWrGPU_fine/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_managed_HostWrGPU_fine.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_managed_HostWrGPU_coarse/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_managed_HostWrGPU_coarse.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_managed_GPUWrHost_fine/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_managed_GPUWrHost_fine.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_managed_GPUWrHost_coarse/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_managed_GPUWrHost_coarse.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_mapped_GPUWrHost/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_mapped_GPUWrHost.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_mapped_GPUWrGPU/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_mapped_GPUWrGPU.json"
date

View File

@@ -20,6 +20,8 @@ export HSA_XNACK=1
mkdir -p $SCOPE_RESULTS
module list > $SCOPE_RESULTS/modules.r3.$SLURM_JOBID.txt 2>&1
env > $SCOPE_RESULTS/env.r3.$SLURM_JOBID.txt
rocm-smi > $SCOPE_RESULTS/rocm-smi.r3.$SLURM_JOBID.txt 2>&1
lscpu > $SCOPE_RESULTS/lscpu.r3.$SLURM_JOBID.txt 2>&1
date
@@ -37,32 +39,20 @@ srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_managed_GPURdHost_fine/0/.*' \
--benchmark_filter='.*implicit_managed_GPURdHost_fine/0/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_managed_GPURdHost_fine.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_managed_GPURdHost_coarse/0/.*' \
--benchmark_filter='.*implicit_managed_GPURdHost_coarse/0/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_managed_GPURdHost_coarse.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*hipManaged_HostToGPUWriteDst/0/.*' \
--benchmark_filter='.*hipManaged_HostToGPUWriteDst/0/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/hipManaged_HostToGPUWriteDst.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*prefetch_managed_GPUToGPU/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/prefetch_managed_GPUToGPU.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_mapped_HostWrGPU/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_mapped_HostWrGPU.json"
date

34
scripts/run_4.sh Executable file
View File

@@ -0,0 +1,34 @@
#!/bin/bash
#SBATCH -A CSC465
#SBATCH -J r4
#SBATCH -o %x-%j.out
#SBATCH -t 2:00:00
#SBATCH -p batch
#SBATCH -N 1
# salloc -A CSC465 -N 1 -p batch -t 2:00:00
export ROOT=/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth
export SCOPE_SRC=${ROOT}/comm_scope
export SCOPE_BUILD=${ROOT}/build
export SCOPE_RESULTS=${ROOT}/run
module load PrgEnv-amd/8.3.3
export HSA_XNACK=1
mkdir -p $SCOPE_RESULTS
module list > $SCOPE_RESULTS/modules.r4.$SLURM_JOBID.txt 2>&1
env > $SCOPE_RESULTS/env.r4.$SLURM_JOBID.txt
rocm-smi > $SCOPE_RESULTS/rocm-smi.r4.$SLURM_JOBID.txt 2>&1
lscpu > $SCOPE_RESULTS/lscpu.r4.$SLURM_JOBID.txt 2>&1
date
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*hipMemcpyAsync_PageableToGPU/0/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/hipMemcpyAsync_PageableToGPU.json"
date

58
scripts/run_5.sh Executable file
View File

@@ -0,0 +1,58 @@
#!/bin/bash
#SBATCH -A CSC465
#SBATCH -J r5
#SBATCH -o %x-%j.out
#SBATCH -t 2:00:00
#SBATCH -p batch
#SBATCH -N 1
# salloc -A CSC465 -N 1 -p batch -t 2:00:00
export ROOT=/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth
export SCOPE_SRC=${ROOT}/comm_scope
export SCOPE_BUILD=${ROOT}/build
export SCOPE_RESULTS=${ROOT}/run
module load PrgEnv-amd/8.3.3
export HSA_XNACK=1
mkdir -p $SCOPE_RESULTS
module list > $SCOPE_RESULTS/modules.r5.$SLURM_JOBID.txt 2>&1
env > $SCOPE_RESULTS/env.r5.$SLURM_JOBID.txt
rocm-smi > $SCOPE_RESULTS/rocm-smi.r5.$SLURM_JOBID.txt 2>&1
lscpu > $SCOPE_RESULTS/lscpu.r5.$SLURM_JOBID.txt 2>&1
date
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_managed_HostWrGPU_fine/0/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_managed_HostWrGPU_fine.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_managed_HostWrGPU_coarse/0/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_managed_HostWrGPU_coarse.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_managed_GPUWrHost_fine/0/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_managed_GPUWrHost_fine.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_managed_GPUWrHost_coarse/0/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_managed_GPUWrHost_coarse.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_mapped_GPUWrHost/0/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_mapped_GPUWrHost.json"
date

34
scripts/run_6.sh Executable file
View File

@@ -0,0 +1,34 @@
#!/bin/bash
#SBATCH -A CSC465
#SBATCH -J r6
#SBATCH -o %x-%j.out
#SBATCH -t 2:00:00
#SBATCH -p batch
#SBATCH -N 1
# salloc -A CSC465 -N 1 -p batch -t 2:00:00
export ROOT=/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth
export SCOPE_SRC=${ROOT}/comm_scope
export SCOPE_BUILD=${ROOT}/build
export SCOPE_RESULTS=${ROOT}/run
module load PrgEnv-amd/8.3.3
export HSA_XNACK=1
mkdir -p $SCOPE_RESULTS
module list > $SCOPE_RESULTS/modules.r6.$SLURM_JOBID.txt 2>&1
env > $SCOPE_RESULTS/env.r6.$SLURM_JOBID.txt
rocm-smi > $SCOPE_RESULTS/rocm-smi.r6.$SLURM_JOBID.txt 2>&1
lscpu > $SCOPE_RESULTS/lscpu.r6.$SLURM_JOBID.txt 2>&1
date
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*prefetch_managed_GPUToGPU/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/prefetch_managed_GPUToGPU.json"
date

40
scripts/run_7.sh Executable file
View File

@@ -0,0 +1,40 @@
#!/bin/bash
#SBATCH -A CSC465
#SBATCH -J r7
#SBATCH -o %x-%j.out
#SBATCH -t 2:00:00
#SBATCH -p batch
#SBATCH -N 1
# salloc -A CSC465 -N 1 -p batch -t 2:00:00
export ROOT=/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth
export SCOPE_SRC=${ROOT}/comm_scope
export SCOPE_BUILD=${ROOT}/build
export SCOPE_RESULTS=${ROOT}/run
module load PrgEnv-amd/8.3.3
export HSA_XNACK=1
mkdir -p $SCOPE_RESULTS
module list > $SCOPE_RESULTS/modules.r7.$SLURM_JOBID.txt 2>&1
env > $SCOPE_RESULTS/env.r7.$SLURM_JOBID.txt
rocm-smi > $SCOPE_RESULTS/rocm-smi.r7.$SLURM_JOBID.txt 2>&1
lscpu > $SCOPE_RESULTS/lscpu.r7.$SLURM_JOBID.txt 2>&1
date
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*prefetch_managed_GPUToHost/0/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/prefetch_managed_GPUToHost.json"
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*prefetch_managed_HostToGPU/0/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/prefetch_managed_HostToGPU.json"
date

34
scripts/run_8.sh Executable file
View File

@@ -0,0 +1,34 @@
#!/bin/bash
#SBATCH -A CSC465
#SBATCH -J r8
#SBATCH -o %x-%j.out
#SBATCH -t 2:00:00
#SBATCH -p batch
#SBATCH -N 1
# salloc -A CSC465 -N 1 -p batch -t 2:00:00
export ROOT=/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth
export SCOPE_SRC=${ROOT}/comm_scope
export SCOPE_BUILD=${ROOT}/build
export SCOPE_RESULTS=${ROOT}/run
module load PrgEnv-amd/8.3.3
export HSA_XNACK=1
mkdir -p $SCOPE_RESULTS
module list > $SCOPE_RESULTS/modules.r8.$SLURM_JOBID.txt 2>&1
env > $SCOPE_RESULTS/env.r8.$SLURM_JOBID.txt
rocm-smi > $SCOPE_RESULTS/rocm-smi.r8.$SLURM_JOBID.txt 2>&1
lscpu > $SCOPE_RESULTS/lscpu.r8.$SLURM_JOBID.txt 2>&1
date
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_mapped_HostWrGPU/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_mapped_HostWrGPU.json"
date

34
scripts/run_9.sh Executable file
View File

@@ -0,0 +1,34 @@
#!/bin/bash
#SBATCH -A CSC465
#SBATCH -J r9
#SBATCH -o %x-%j.out
#SBATCH -t 2:00:00
#SBATCH -p batch
#SBATCH -N 1
# salloc -A CSC465 -N 1 -p batch -t 2:00:00
export ROOT=/lustre/orion/csc465/scratch/cpearson/frontier-gpu-bandwidth
export SCOPE_SRC=${ROOT}/comm_scope
export SCOPE_BUILD=${ROOT}/build
export SCOPE_RESULTS=${ROOT}/run
module load PrgEnv-amd/8.3.3
export HSA_XNACK=1
mkdir -p $SCOPE_RESULTS
module list > $SCOPE_RESULTS/modules.r9.$SLURM_JOBID.txt 2>&1
env > $SCOPE_RESULTS/env.r9.$SLURM_JOBID.txt
rocm-smi > $SCOPE_RESULTS/rocm-smi.r9.$SLURM_JOBID.txt 2>&1
lscpu > $SCOPE_RESULTS/lscpu.r9.$SLURM_JOBID.txt 2>&1
date
srun -c 56 -n 1 --gpus 8 $SCOPE_BUILD/comm_scope \
--benchmark_repetitions=5 \
--benchmark_filter='.*implicit_mapped_GPUWrGPU/0/.*' \
--benchmark_out_format=json \
--benchmark_out="$SCOPE_RESULTS/implicit_mapped_GPUWrGPU.json"
date