initial commit
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
__pycache__
|
||||||
|
poetry.lock
|
26
README.md
Normal file
26
README.md
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# ss-downloader
|
||||||
|
|
||||||
|
Install poetry & Python 3.8+
|
||||||
|
|
||||||
|
```
|
||||||
|
curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python -
|
||||||
|
```
|
||||||
|
|
||||||
|
## how to use
|
||||||
|
|
||||||
|
```
|
||||||
|
source load-env.sh
|
||||||
|
poetry run python list.py
|
||||||
|
poetry run python download.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## how this was done
|
||||||
|
|
||||||
|
```
|
||||||
|
poetry-new init
|
||||||
|
poetry add ssgetpy
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
poetry install
|
||||||
|
```
|
97
datasets.py
Normal file
97
datasets.py
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
import collections
|
||||||
|
|
||||||
|
import ssgetpy
|
||||||
|
|
||||||
|
Dataset = collections.namedtuple("Dataset", ["name", "mats"])
|
||||||
|
|
||||||
|
|
||||||
|
def filter_reject_blacklist(mats):
|
||||||
|
filtered = []
|
||||||
|
for mat in mats:
|
||||||
|
if mat.name.endswith("_b"):
|
||||||
|
continue
|
||||||
|
filtered += [mat]
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
def filter_reject_large(mats):
|
||||||
|
filtered = []
|
||||||
|
for mat in mats:
|
||||||
|
if mat.rows > 1_000_000 or mat.cols > 1_000_000 or mat.nnz > 20_000_000:
|
||||||
|
continue
|
||||||
|
filtered += [mat]
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
def filter_reject_small(mats):
|
||||||
|
filtered = []
|
||||||
|
for mat in mats:
|
||||||
|
if mat.rows < 1_000 or mat.cols < 1_000 or mat.nnz < 20_000:
|
||||||
|
continue
|
||||||
|
filtered += [mat]
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
## all real-valued matrices
|
||||||
|
REAL_MATS = Dataset(
|
||||||
|
name = "all_reals",
|
||||||
|
mats = filter_reject_blacklist(ssgetpy.search(
|
||||||
|
dtype='real',
|
||||||
|
limit=1_000_000
|
||||||
|
))
|
||||||
|
)
|
||||||
|
|
||||||
|
## certain matrices with regular structure
|
||||||
|
kinds = [
|
||||||
|
"2D/3D",
|
||||||
|
"Acoustics Problem",
|
||||||
|
"Materials Problem",
|
||||||
|
"Structural Problem",
|
||||||
|
"Computational Fluid Dynamics Problem",
|
||||||
|
"Model Reduction Problem",
|
||||||
|
"Semiconductor Device Problem",
|
||||||
|
"Theoretical/Quantum Chemistry Problem",
|
||||||
|
"Thermal Problem",
|
||||||
|
]
|
||||||
|
REGULAR_REAL_MATS = Dataset(
|
||||||
|
name = "regular_reals",
|
||||||
|
mats = []
|
||||||
|
)
|
||||||
|
mats = []
|
||||||
|
for kind in kinds:
|
||||||
|
mats += ssgetpy.search(
|
||||||
|
kind=kind,
|
||||||
|
dtype='real',
|
||||||
|
limit=1_000_000
|
||||||
|
)
|
||||||
|
REGULAR_REAL_MATS = Dataset(
|
||||||
|
name="regular_reals",
|
||||||
|
mats = filter_reject_blacklist(mats)
|
||||||
|
)
|
||||||
|
|
||||||
|
## keep "small" matrices
|
||||||
|
REGULAR_REAL_SMALL_MATS = Dataset (
|
||||||
|
name = "regular_reals_small",
|
||||||
|
mats = filter_reject_large(REGULAR_REAL_MATS.mats)
|
||||||
|
)
|
||||||
|
REAL_SMALL_MATS = Dataset (
|
||||||
|
name = "reals_small",
|
||||||
|
mats = filter_reject_large(REAL_MATS.mats)
|
||||||
|
)
|
||||||
|
|
||||||
|
## keep "medium" matrices
|
||||||
|
REGULAR_REAL_MED_MATS = Dataset (
|
||||||
|
name = "regular_reals_med",
|
||||||
|
mats = filter_reject_large(filter_reject_small(REGULAR_REAL_MATS.mats))
|
||||||
|
)
|
||||||
|
REAL_MED_MATS = Dataset (
|
||||||
|
name = "reals_med",
|
||||||
|
mats = filter_reject_large(filter_reject_small(REAL_MATS.mats))
|
||||||
|
)
|
||||||
|
|
||||||
|
## export all datasets
|
||||||
|
DATASETS = [
|
||||||
|
REAL_MATS,
|
||||||
|
REAL_SMALL_MATS,
|
||||||
|
REAL_MED_MATS,
|
||||||
|
REGULAR_REAL_MATS,
|
||||||
|
REGULAR_REAL_SMALL_MATS,
|
||||||
|
REGULAR_REAL_MED_MATS
|
||||||
|
]
|
49
download.py
Executable file
49
download.py
Executable file
@@ -0,0 +1,49 @@
|
|||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import datasets
|
||||||
|
|
||||||
|
mats = datasets.ALL_REAL_MATS
|
||||||
|
|
||||||
|
print(len(mats))
|
||||||
|
|
||||||
|
scratchPath = Path(os.environ["SCRATCH"])
|
||||||
|
downPath = scratchPath / "suitesparse"
|
||||||
|
print("ensure", downPath)
|
||||||
|
try:
|
||||||
|
os.makedirs(downPath)
|
||||||
|
except FileExistsError:
|
||||||
|
pass # dir already exists
|
||||||
|
|
||||||
|
for mat in mats:
|
||||||
|
|
||||||
|
print(mat.name)
|
||||||
|
|
||||||
|
if os.path.exists(downPath / mat.name / (mat.name + ".mtx")):
|
||||||
|
print(f"skipping {mat.name}: already exists")
|
||||||
|
continue
|
||||||
|
|
||||||
|
mat.download(format='MM', destpath=downPath, extract=True)
|
||||||
|
|
||||||
|
# TODO: check download for a type that is not 'real' and remove if so
|
||||||
|
|
||||||
|
# TODO: check for non-coordinate and remove, if so
|
||||||
|
|
||||||
|
# many mats include rhs/whatever in extracted. toss that.
|
||||||
|
files = os.listdir(downPath / mat.name)
|
||||||
|
for f in files:
|
||||||
|
if f != (mat.name + ".mtx"):
|
||||||
|
print("DELETE ", f)
|
||||||
|
os.remove(downPath / mat.name / f)
|
||||||
|
|
||||||
|
files = os.listdir(downPath / mat.name)
|
||||||
|
if len(files) == 0:
|
||||||
|
os.rmdir(downPath / mat.name)
|
||||||
|
|
||||||
|
|
||||||
|
# blacklist:
|
||||||
|
# cavity(\d+)_[bx].mtx
|
||||||
|
# circuit(\d+)_[bx].mtx
|
||||||
|
# other things that end in _b.mtx?
|
||||||
|
#
|
4
list.py
Normal file
4
list.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
import datasets
|
||||||
|
|
||||||
|
for ds in datasets.DATASETS:
|
||||||
|
print(f"{ds.name}: {len(ds.mats)} matrices")
|
17
load-env.sh
Normal file
17
load-env.sh
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
#! /bin/bash
|
||||||
|
|
||||||
|
host=`hostname`
|
||||||
|
|
||||||
|
if [[ "$NERSC_HOST" == cori ]]; then
|
||||||
|
echo \$NERSC_HOST matched cori
|
||||||
|
module load cray-python/3.8.5.0
|
||||||
|
|
||||||
|
which python
|
||||||
|
elif [[ "$NERSC_HOST" == perlmutter ]]; then
|
||||||
|
echo \$NERSC_HOST matched perlmutter
|
||||||
|
|
||||||
|
module load cray-python/3.9.4.1
|
||||||
|
|
||||||
|
which python
|
||||||
|
fi
|
||||||
|
|
15
pyproject.toml
Normal file
15
pyproject.toml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
[tool.poetry]
|
||||||
|
name = "ss-downloader"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = ""
|
||||||
|
authors = ["Carl Pearson <cwpears@sandia.gov>"]
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.8"
|
||||||
|
ssgetpy = "^1.0rc2"
|
||||||
|
|
||||||
|
[tool.poetry.dev-dependencies]
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core>=1.0.0"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
Reference in New Issue
Block a user