Files
suitesparse-downloader/lib/datasets.py
Carl William Pearson 4584ce14ee add square datasets
2022-01-28 13:14:30 -07:00

140 lines
3.2 KiB
Python

import collections
import sys
import ssgetpy
from lib import dtypes
Dataset = collections.namedtuple("Dataset", ["name", "mats"])
def safe_dir_name(s):
t = s.strip()
t = t.replace(" ", "_")
t = t.replace("/", "_")
t = t.replace("-", "_")
t = t.lower()
return t
def mat_is_real(mat):
val = dtypes.DTYPES[(mat.group, mat.name)] == "real"
return val
def filter_keep_real(mats):
return [mat for mat in mats if mat_is_real(mat)]
def mat_is_small(mat):
return (mat.rows < 1_000 and mat.cols < 1_000) \
or mat.nnz < 20_000
def mat_is_large(mat):
return (mat.rows > 1_000_000 and mat.cols > 1_000_000) \
or mat.nnz > 20_000_000
def filter_reject_large(mats):
return [mat for mat in mats if not mat_is_large(mat)]
def filter_reject_small(mats):
return [mat for mat in mats if not mat_is_small(mat)]
def filter_keep_square(mats):
return [mat for mat in mats if mat.rows == mat.cols]
## all real-valued matrices
REAL_MATS = Dataset(
name = "reals",
mats = filter_keep_real(ssgetpy.search(
dtype='real',
limit=1_000_000
))
)
## certain matrices with regular structure
kinds = [
"2D/3D",
"Acoustics Problem",
"Materials Problem",
"Structural Problem",
"Computational Fluid Dynamics Problem",
"Model Reduction Problem",
"Semiconductor Device Problem",
"Theoretical/Quantum Chemistry Problem",
"Thermal Problem",
]
mats = []
for kind in kinds:
mats += ssgetpy.search(
kind=kind,
dtype='real',
limit=1_000_000
)
REGULAR_REAL_MATS = Dataset(
name="regular_reals",
mats = filter_keep_real(mats)
)
## keep "small" matrices
REGULAR_REAL_SMALL_MATS = Dataset (
name = "regular_reals_small",
mats = filter_reject_large(REGULAR_REAL_MATS.mats)
)
REAL_SMALL_MATS = Dataset (
name = "reals_small",
mats = filter_reject_large(REAL_MATS.mats)
)
REGULAR_SQUARE_REAL_SMALL_MATS = Dataset (
name = "regular_square_reals_small",
mats = filter_keep_square(REGULAR_REAL_SMALL_MATS.mats)
)
SQUARE_REAL_SMALL_MATS = Dataset (
name = "square_reals_small",
mats = filter_keep_square(REAL_SMALL_MATS.mats)
)
## keep "medium" matrices
REGULAR_REAL_MED_MATS = Dataset (
name = "regular_reals_med",
mats = filter_reject_large(filter_reject_small(REGULAR_REAL_MATS.mats))
)
REAL_MED_MATS = Dataset (
name = "reals_med",
mats = filter_reject_large(filter_reject_small(REAL_MATS.mats))
)
## export all datasets
DATASETS = [
# REAL_MATS,
REAL_SMALL_MATS,
REAL_MED_MATS,
# REGULAR_REAL_MATS,
REGULAR_REAL_SMALL_MATS,
REGULAR_REAL_MED_MATS,
REGULAR_SQUARE_REAL_SMALL_MATS,
SQUARE_REAL_SMALL_MATS,
]
def get_kinds():
"""return set of unique kind fields"""
mats = ssgetpy.search(
limit=1_000_000
)
kinds = set()
for mat in mats:
kinds.add(mat.kind)
return kinds
for kind in get_kinds():
d = Dataset(
name = "kind_"+safe_dir_name(kind),
mats = filter_reject_large( \
filter_reject_small( \
filter_keep_real(ssgetpy.search(
kind=kind,
dtype='real',
limit=1_000_000
))))
)
if len(d.mats) > 0:
DATASETS += [d]