download to central repo and symlink
This commit is contained in:
62
download.py
62
download.py
@@ -4,6 +4,33 @@ import sys
|
|||||||
|
|
||||||
import datasets
|
import datasets
|
||||||
|
|
||||||
|
def ensure_dir(path):
|
||||||
|
print("ensure", path)
|
||||||
|
try:
|
||||||
|
os.makedirs(path)
|
||||||
|
except FileExistsError:
|
||||||
|
pass # dir already exists
|
||||||
|
|
||||||
|
def ensure_matrix_download(dir, mat):
|
||||||
|
if os.path.exists(dir / mat.name / (mat.name + ".mtx")):
|
||||||
|
print(f"SKIP {mat.name}: already exists")
|
||||||
|
return
|
||||||
|
mat.download(format='MM', destpath=dir, extract=True)
|
||||||
|
|
||||||
|
# link matrix in downDir to linkDir
|
||||||
|
def ensure_matrix_link(downDir, linkDir, mat):
|
||||||
|
files = os.listdir(downDir / mat.name)
|
||||||
|
for f in files:
|
||||||
|
if f == mat.name + ".mtx":
|
||||||
|
src = downDir / f
|
||||||
|
dst = linkDir / (mat.name + ".mtx")
|
||||||
|
print(f"{src} <- {dst}")
|
||||||
|
try:
|
||||||
|
os.symlink(src, dst)
|
||||||
|
except FileExistsError:
|
||||||
|
pass # dir already exists
|
||||||
|
return
|
||||||
|
|
||||||
dataset = None
|
dataset = None
|
||||||
for ds in datasets.DATASETS:
|
for ds in datasets.DATASETS:
|
||||||
if ds.name == sys.argv[1]:
|
if ds.name == sys.argv[1]:
|
||||||
@@ -13,38 +40,21 @@ mats = dataset.mats
|
|||||||
|
|
||||||
print(len(mats))
|
print(len(mats))
|
||||||
|
|
||||||
|
# scratch directory
|
||||||
scratchPath = Path(os.environ["SCRATCH"])
|
scratchPath = Path(os.environ["SCRATCH"])
|
||||||
downPath = scratchPath / dataset.name
|
# where matrices will be downloaded
|
||||||
print("ensure", downPath)
|
downDir = scratchPath / "suitesparse"
|
||||||
try:
|
# where the matrix will be linked to
|
||||||
os.makedirs(downPath)
|
linkDir = scratchPath / dataset.name
|
||||||
except FileExistsError:
|
ensure_dir(downDir)
|
||||||
pass # dir already exists
|
ensure_dir(linkDir)
|
||||||
|
|
||||||
for mat in mats:
|
for mat in mats:
|
||||||
|
|
||||||
print(mat.name)
|
print(mat.name)
|
||||||
|
|
||||||
if os.path.exists(downPath / mat.name / (mat.name + ".mtx")):
|
ensure_matrix_download(downDir, mat)
|
||||||
print(f"skipping {mat.name}: already exists")
|
ensure_matrix_link(downDir, linkDir, mat)
|
||||||
continue
|
|
||||||
|
|
||||||
mat.download(format='MM', destpath=downPath, extract=True)
|
|
||||||
|
|
||||||
# TODO: check download for a type that is not 'real' and remove if so
|
|
||||||
|
|
||||||
# TODO: check for non-coordinate and remove, if so
|
|
||||||
|
|
||||||
# many mats include rhs/whatever in extracted. toss that.
|
|
||||||
files = os.listdir(downPath / mat.name)
|
|
||||||
for f in files:
|
|
||||||
if f != (mat.name + ".mtx"):
|
|
||||||
print("DELETE ", f)
|
|
||||||
os.remove(downPath / mat.name / f)
|
|
||||||
|
|
||||||
files = os.listdir(downPath / mat.name)
|
|
||||||
if len(files) == 0:
|
|
||||||
os.rmdir(downPath / mat.name)
|
|
||||||
|
|
||||||
|
|
||||||
# blacklist:
|
# blacklist:
|
||||||
|
Reference in New Issue
Block a user