initial commit

This commit is contained in:
Carl Pearson
2021-11-23 07:33:20 -08:00
commit 7e68d4f712
7 changed files with 210 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
__pycache__
poetry.lock

26
README.md Normal file
View File

@@ -0,0 +1,26 @@
# ss-downloader
Install poetry & Python 3.8+
```
curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python -
```
## how to use
```
source load-env.sh
poetry run python list.py
poetry run python download.py
```
## how this was done
```
poetry-new init
poetry add ssgetpy
```
```
poetry install
```

97
datasets.py Normal file
View File

@@ -0,0 +1,97 @@
import collections
import ssgetpy
Dataset = collections.namedtuple("Dataset", ["name", "mats"])
def filter_reject_blacklist(mats):
filtered = []
for mat in mats:
if mat.name.endswith("_b"):
continue
filtered += [mat]
return filtered
def filter_reject_large(mats):
filtered = []
for mat in mats:
if mat.rows > 1_000_000 or mat.cols > 1_000_000 or mat.nnz > 20_000_000:
continue
filtered += [mat]
return filtered
def filter_reject_small(mats):
filtered = []
for mat in mats:
if mat.rows < 1_000 or mat.cols < 1_000 or mat.nnz < 20_000:
continue
filtered += [mat]
return filtered
## all real-valued matrices
REAL_MATS = Dataset(
name = "all_reals",
mats = filter_reject_blacklist(ssgetpy.search(
dtype='real',
limit=1_000_000
))
)
## certain matrices with regular structure
kinds = [
"2D/3D",
"Acoustics Problem",
"Materials Problem",
"Structural Problem",
"Computational Fluid Dynamics Problem",
"Model Reduction Problem",
"Semiconductor Device Problem",
"Theoretical/Quantum Chemistry Problem",
"Thermal Problem",
]
REGULAR_REAL_MATS = Dataset(
name = "regular_reals",
mats = []
)
mats = []
for kind in kinds:
mats += ssgetpy.search(
kind=kind,
dtype='real',
limit=1_000_000
)
REGULAR_REAL_MATS = Dataset(
name="regular_reals",
mats = filter_reject_blacklist(mats)
)
## keep "small" matrices
REGULAR_REAL_SMALL_MATS = Dataset (
name = "regular_reals_small",
mats = filter_reject_large(REGULAR_REAL_MATS.mats)
)
REAL_SMALL_MATS = Dataset (
name = "reals_small",
mats = filter_reject_large(REAL_MATS.mats)
)
## keep "medium" matrices
REGULAR_REAL_MED_MATS = Dataset (
name = "regular_reals_med",
mats = filter_reject_large(filter_reject_small(REGULAR_REAL_MATS.mats))
)
REAL_MED_MATS = Dataset (
name = "reals_med",
mats = filter_reject_large(filter_reject_small(REAL_MATS.mats))
)
## export all datasets
DATASETS = [
REAL_MATS,
REAL_SMALL_MATS,
REAL_MED_MATS,
REGULAR_REAL_MATS,
REGULAR_REAL_SMALL_MATS,
REGULAR_REAL_MED_MATS
]

49
download.py Executable file
View File

@@ -0,0 +1,49 @@
import os
from pathlib import Path
import sys
import datasets
mats = datasets.ALL_REAL_MATS
print(len(mats))
scratchPath = Path(os.environ["SCRATCH"])
downPath = scratchPath / "suitesparse"
print("ensure", downPath)
try:
os.makedirs(downPath)
except FileExistsError:
pass # dir already exists
for mat in mats:
print(mat.name)
if os.path.exists(downPath / mat.name / (mat.name + ".mtx")):
print(f"skipping {mat.name}: already exists")
continue
mat.download(format='MM', destpath=downPath, extract=True)
# TODO: check download for a type that is not 'real' and remove if so
# TODO: check for non-coordinate and remove, if so
# many mats include rhs/whatever in extracted. toss that.
files = os.listdir(downPath / mat.name)
for f in files:
if f != (mat.name + ".mtx"):
print("DELETE ", f)
os.remove(downPath / mat.name / f)
files = os.listdir(downPath / mat.name)
if len(files) == 0:
os.rmdir(downPath / mat.name)
# blacklist:
# cavity(\d+)_[bx].mtx
# circuit(\d+)_[bx].mtx
# other things that end in _b.mtx?
#

4
list.py Normal file
View File

@@ -0,0 +1,4 @@
import datasets
for ds in datasets.DATASETS:
print(f"{ds.name}: {len(ds.mats)} matrices")

17
load-env.sh Normal file
View File

@@ -0,0 +1,17 @@
#! /bin/bash
host=`hostname`
if [[ "$NERSC_HOST" == cori ]]; then
echo \$NERSC_HOST matched cori
module load cray-python/3.8.5.0
which python
elif [[ "$NERSC_HOST" == perlmutter ]]; then
echo \$NERSC_HOST matched perlmutter
module load cray-python/3.9.4.1
which python
fi

15
pyproject.toml Normal file
View File

@@ -0,0 +1,15 @@
[tool.poetry]
name = "ss-downloader"
version = "0.1.0"
description = ""
authors = ["Carl Pearson <cwpears@sandia.gov>"]
[tool.poetry.dependencies]
python = "^3.8"
ssgetpy = "^1.0rc2"
[tool.poetry.dev-dependencies]
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"