initial commit
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
__pycache__
|
||||
poetry.lock
|
26
README.md
Normal file
26
README.md
Normal file
@@ -0,0 +1,26 @@
|
||||
# ss-downloader
|
||||
|
||||
Install poetry & Python 3.8+
|
||||
|
||||
```
|
||||
curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python -
|
||||
```
|
||||
|
||||
## how to use
|
||||
|
||||
```
|
||||
source load-env.sh
|
||||
poetry run python list.py
|
||||
poetry run python download.py
|
||||
```
|
||||
|
||||
## how this was done
|
||||
|
||||
```
|
||||
poetry-new init
|
||||
poetry add ssgetpy
|
||||
```
|
||||
|
||||
```
|
||||
poetry install
|
||||
```
|
97
datasets.py
Normal file
97
datasets.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import collections
|
||||
|
||||
import ssgetpy
|
||||
|
||||
Dataset = collections.namedtuple("Dataset", ["name", "mats"])
|
||||
|
||||
|
||||
def filter_reject_blacklist(mats):
|
||||
filtered = []
|
||||
for mat in mats:
|
||||
if mat.name.endswith("_b"):
|
||||
continue
|
||||
filtered += [mat]
|
||||
return filtered
|
||||
|
||||
def filter_reject_large(mats):
|
||||
filtered = []
|
||||
for mat in mats:
|
||||
if mat.rows > 1_000_000 or mat.cols > 1_000_000 or mat.nnz > 20_000_000:
|
||||
continue
|
||||
filtered += [mat]
|
||||
return filtered
|
||||
|
||||
def filter_reject_small(mats):
|
||||
filtered = []
|
||||
for mat in mats:
|
||||
if mat.rows < 1_000 or mat.cols < 1_000 or mat.nnz < 20_000:
|
||||
continue
|
||||
filtered += [mat]
|
||||
return filtered
|
||||
|
||||
## all real-valued matrices
|
||||
REAL_MATS = Dataset(
|
||||
name = "all_reals",
|
||||
mats = filter_reject_blacklist(ssgetpy.search(
|
||||
dtype='real',
|
||||
limit=1_000_000
|
||||
))
|
||||
)
|
||||
|
||||
## certain matrices with regular structure
|
||||
kinds = [
|
||||
"2D/3D",
|
||||
"Acoustics Problem",
|
||||
"Materials Problem",
|
||||
"Structural Problem",
|
||||
"Computational Fluid Dynamics Problem",
|
||||
"Model Reduction Problem",
|
||||
"Semiconductor Device Problem",
|
||||
"Theoretical/Quantum Chemistry Problem",
|
||||
"Thermal Problem",
|
||||
]
|
||||
REGULAR_REAL_MATS = Dataset(
|
||||
name = "regular_reals",
|
||||
mats = []
|
||||
)
|
||||
mats = []
|
||||
for kind in kinds:
|
||||
mats += ssgetpy.search(
|
||||
kind=kind,
|
||||
dtype='real',
|
||||
limit=1_000_000
|
||||
)
|
||||
REGULAR_REAL_MATS = Dataset(
|
||||
name="regular_reals",
|
||||
mats = filter_reject_blacklist(mats)
|
||||
)
|
||||
|
||||
## keep "small" matrices
|
||||
REGULAR_REAL_SMALL_MATS = Dataset (
|
||||
name = "regular_reals_small",
|
||||
mats = filter_reject_large(REGULAR_REAL_MATS.mats)
|
||||
)
|
||||
REAL_SMALL_MATS = Dataset (
|
||||
name = "reals_small",
|
||||
mats = filter_reject_large(REAL_MATS.mats)
|
||||
)
|
||||
|
||||
## keep "medium" matrices
|
||||
REGULAR_REAL_MED_MATS = Dataset (
|
||||
name = "regular_reals_med",
|
||||
mats = filter_reject_large(filter_reject_small(REGULAR_REAL_MATS.mats))
|
||||
)
|
||||
REAL_MED_MATS = Dataset (
|
||||
name = "reals_med",
|
||||
mats = filter_reject_large(filter_reject_small(REAL_MATS.mats))
|
||||
)
|
||||
|
||||
## export all datasets
|
||||
DATASETS = [
|
||||
REAL_MATS,
|
||||
REAL_SMALL_MATS,
|
||||
REAL_MED_MATS,
|
||||
REGULAR_REAL_MATS,
|
||||
REGULAR_REAL_SMALL_MATS,
|
||||
REGULAR_REAL_MED_MATS
|
||||
]
|
49
download.py
Executable file
49
download.py
Executable file
@@ -0,0 +1,49 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
import datasets
|
||||
|
||||
mats = datasets.ALL_REAL_MATS
|
||||
|
||||
print(len(mats))
|
||||
|
||||
scratchPath = Path(os.environ["SCRATCH"])
|
||||
downPath = scratchPath / "suitesparse"
|
||||
print("ensure", downPath)
|
||||
try:
|
||||
os.makedirs(downPath)
|
||||
except FileExistsError:
|
||||
pass # dir already exists
|
||||
|
||||
for mat in mats:
|
||||
|
||||
print(mat.name)
|
||||
|
||||
if os.path.exists(downPath / mat.name / (mat.name + ".mtx")):
|
||||
print(f"skipping {mat.name}: already exists")
|
||||
continue
|
||||
|
||||
mat.download(format='MM', destpath=downPath, extract=True)
|
||||
|
||||
# TODO: check download for a type that is not 'real' and remove if so
|
||||
|
||||
# TODO: check for non-coordinate and remove, if so
|
||||
|
||||
# many mats include rhs/whatever in extracted. toss that.
|
||||
files = os.listdir(downPath / mat.name)
|
||||
for f in files:
|
||||
if f != (mat.name + ".mtx"):
|
||||
print("DELETE ", f)
|
||||
os.remove(downPath / mat.name / f)
|
||||
|
||||
files = os.listdir(downPath / mat.name)
|
||||
if len(files) == 0:
|
||||
os.rmdir(downPath / mat.name)
|
||||
|
||||
|
||||
# blacklist:
|
||||
# cavity(\d+)_[bx].mtx
|
||||
# circuit(\d+)_[bx].mtx
|
||||
# other things that end in _b.mtx?
|
||||
#
|
4
list.py
Normal file
4
list.py
Normal file
@@ -0,0 +1,4 @@
|
||||
import datasets
|
||||
|
||||
for ds in datasets.DATASETS:
|
||||
print(f"{ds.name}: {len(ds.mats)} matrices")
|
17
load-env.sh
Normal file
17
load-env.sh
Normal file
@@ -0,0 +1,17 @@
|
||||
#! /bin/bash
|
||||
|
||||
host=`hostname`
|
||||
|
||||
if [[ "$NERSC_HOST" == cori ]]; then
|
||||
echo \$NERSC_HOST matched cori
|
||||
module load cray-python/3.8.5.0
|
||||
|
||||
which python
|
||||
elif [[ "$NERSC_HOST" == perlmutter ]]; then
|
||||
echo \$NERSC_HOST matched perlmutter
|
||||
|
||||
module load cray-python/3.9.4.1
|
||||
|
||||
which python
|
||||
fi
|
||||
|
15
pyproject.toml
Normal file
15
pyproject.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
[tool.poetry]
|
||||
name = "ss-downloader"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["Carl Pearson <cwpears@sandia.gov>"]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.8"
|
||||
ssgetpy = "^1.0rc2"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
Reference in New Issue
Block a user