download to central repo and symlink

This commit is contained in:
Carl Pearson
2021-11-24 11:55:07 -08:00
parent e029b66edb
commit 54204ea086

View File

@@ -4,6 +4,33 @@ import sys
import datasets import datasets
def ensure_dir(path):
print("ensure", path)
try:
os.makedirs(path)
except FileExistsError:
pass # dir already exists
def ensure_matrix_download(dir, mat):
if os.path.exists(dir / mat.name / (mat.name + ".mtx")):
print(f"SKIP {mat.name}: already exists")
return
mat.download(format='MM', destpath=dir, extract=True)
# link matrix in downDir to linkDir
def ensure_matrix_link(downDir, linkDir, mat):
files = os.listdir(downDir / mat.name)
for f in files:
if f == mat.name + ".mtx":
src = downDir / f
dst = linkDir / (mat.name + ".mtx")
print(f"{src} <- {dst}")
try:
os.symlink(src, dst)
except FileExistsError:
pass # dir already exists
return
dataset = None dataset = None
for ds in datasets.DATASETS: for ds in datasets.DATASETS:
if ds.name == sys.argv[1]: if ds.name == sys.argv[1]:
@@ -13,38 +40,21 @@ mats = dataset.mats
print(len(mats)) print(len(mats))
# scratch directory
scratchPath = Path(os.environ["SCRATCH"]) scratchPath = Path(os.environ["SCRATCH"])
downPath = scratchPath / dataset.name # where matrices will be downloaded
print("ensure", downPath) downDir = scratchPath / "suitesparse"
try: # where the matrix will be linked to
os.makedirs(downPath) linkDir = scratchPath / dataset.name
except FileExistsError: ensure_dir(downDir)
pass # dir already exists ensure_dir(linkDir)
for mat in mats: for mat in mats:
print(mat.name) print(mat.name)
if os.path.exists(downPath / mat.name / (mat.name + ".mtx")): ensure_matrix_download(downDir, mat)
print(f"skipping {mat.name}: already exists") ensure_matrix_link(downDir, linkDir, mat)
continue
mat.download(format='MM', destpath=downPath, extract=True)
# TODO: check download for a type that is not 'real' and remove if so
# TODO: check for non-coordinate and remove, if so
# many mats include rhs/whatever in extracted. toss that.
files = os.listdir(downPath / mat.name)
for f in files:
if f != (mat.name + ".mtx"):
print("DELETE ", f)
os.remove(downPath / mat.name / f)
files = os.listdir(downPath / mat.name)
if len(files) == 0:
os.rmdir(downPath / mat.name)
# blacklist: # blacklist: