From 54204ea0867bd681173db66153c4569b3336faca Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 24 Nov 2021 11:55:07 -0800 Subject: [PATCH] download to central repo and symlink --- download.py | 62 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/download.py b/download.py index e5d9d71..a3cc7ca 100755 --- a/download.py +++ b/download.py @@ -4,6 +4,33 @@ import sys import datasets +def ensure_dir(path): + print("ensure", path) + try: + os.makedirs(path) + except FileExistsError: + pass # dir already exists + +def ensure_matrix_download(dir, mat): + if os.path.exists(dir / mat.name / (mat.name + ".mtx")): + print(f"SKIP {mat.name}: already exists") + return + mat.download(format='MM', destpath=dir, extract=True) + +# link matrix in downDir to linkDir +def ensure_matrix_link(downDir, linkDir, mat): + files = os.listdir(downDir / mat.name) + for f in files: + if f == mat.name + ".mtx": + src = downDir / f + dst = linkDir / (mat.name + ".mtx") + print(f"{src} <- {dst}") + try: + os.symlink(src, dst) + except FileExistsError: + pass # dir already exists + return + dataset = None for ds in datasets.DATASETS: if ds.name == sys.argv[1]: @@ -13,38 +40,21 @@ mats = dataset.mats print(len(mats)) +# scratch directory scratchPath = Path(os.environ["SCRATCH"]) -downPath = scratchPath / dataset.name -print("ensure", downPath) -try: - os.makedirs(downPath) -except FileExistsError: - pass # dir already exists +# where matrices will be downloaded +downDir = scratchPath / "suitesparse" +# where the matrix will be linked to +linkDir = scratchPath / dataset.name +ensure_dir(downDir) +ensure_dir(linkDir) for mat in mats: print(mat.name) - if os.path.exists(downPath / mat.name / (mat.name + ".mtx")): - print(f"skipping {mat.name}: already exists") - continue - - mat.download(format='MM', destpath=downPath, extract=True) - - # TODO: check download for a type that is not 'real' and remove if so - - # TODO: check for non-coordinate and remove, if so - - # many mats include rhs/whatever in extracted. toss that. - files = os.listdir(downPath / mat.name) - for f in files: - if f != (mat.name + ".mtx"): - print("DELETE ", f) - os.remove(downPath / mat.name / f) - - files = os.listdir(downPath / mat.name) - if len(files) == 0: - os.rmdir(downPath / mat.name) + ensure_matrix_download(downDir, mat) + ensure_matrix_link(downDir, linkDir, mat) # blacklist: