add unused.py, refactor common code a bit
This commit is contained in:
124
lib/datasets.py
Normal file
124
lib/datasets.py
Normal file
@@ -0,0 +1,124 @@
|
||||
import collections
|
||||
import sys
|
||||
|
||||
import ssgetpy
|
||||
|
||||
from lib import lists
|
||||
|
||||
Dataset = collections.namedtuple("Dataset", ["name", "mats"])
|
||||
|
||||
def safe_dir_name(s):
|
||||
t = s.strip()
|
||||
t = t.replace(" ", "_")
|
||||
t = t.replace("/", "_")
|
||||
t = t.replace("-", "_")
|
||||
t = t.lower()
|
||||
return t
|
||||
|
||||
def mat_is_integer(mat):
|
||||
return mat.name in lists.INTEGER_MATS
|
||||
|
||||
def filter_reject_integer(mats):
|
||||
return [mat for mat in mats if not mat_is_integer(mat)]
|
||||
|
||||
def mat_is_small(mat):
|
||||
return (mat.rows < 1_000 and mat.cols < 1_000) \
|
||||
or mat.nnz < 20_000
|
||||
|
||||
def mat_is_large(mat):
|
||||
return (mat.rows > 1_000_000 and mat.cols < 1_000_000) \
|
||||
or mat.nnz > 20_000_000
|
||||
|
||||
def filter_reject_large(mats):
|
||||
return [mat for mat in mats if not mat_is_large(mat)]
|
||||
|
||||
def filter_reject_small(mats):
|
||||
return [mat for mat in mats if not mat_is_small(mat)]
|
||||
|
||||
## all real-valued matrices
|
||||
REAL_MATS = Dataset(
|
||||
name = "reals",
|
||||
mats = filter_reject_integer(ssgetpy.search(
|
||||
dtype='real',
|
||||
limit=1_000_000
|
||||
))
|
||||
)
|
||||
|
||||
## certain matrices with regular structure
|
||||
kinds = [
|
||||
"2D/3D",
|
||||
"Acoustics Problem",
|
||||
"Materials Problem",
|
||||
"Structural Problem",
|
||||
"Computational Fluid Dynamics Problem",
|
||||
"Model Reduction Problem",
|
||||
"Semiconductor Device Problem",
|
||||
"Theoretical/Quantum Chemistry Problem",
|
||||
"Thermal Problem",
|
||||
]
|
||||
|
||||
mats = []
|
||||
for kind in kinds:
|
||||
mats += ssgetpy.search(
|
||||
kind=kind,
|
||||
dtype='real',
|
||||
limit=1_000_000
|
||||
)
|
||||
REGULAR_REAL_MATS = Dataset(
|
||||
name="regular_reals",
|
||||
mats = filter_reject_integer(mats)
|
||||
)
|
||||
|
||||
## keep "small" matrices
|
||||
REGULAR_REAL_SMALL_MATS = Dataset (
|
||||
name = "regular_reals_small",
|
||||
mats = filter_reject_large(REGULAR_REAL_MATS.mats)
|
||||
)
|
||||
REAL_SMALL_MATS = Dataset (
|
||||
name = "reals_small",
|
||||
mats = filter_reject_large(REAL_MATS.mats)
|
||||
)
|
||||
|
||||
## keep "medium" matrices
|
||||
REGULAR_REAL_MED_MATS = Dataset (
|
||||
name = "regular_reals_med",
|
||||
mats = filter_reject_large(filter_reject_small(REGULAR_REAL_MATS.mats))
|
||||
)
|
||||
REAL_MED_MATS = Dataset (
|
||||
name = "reals_med",
|
||||
mats = filter_reject_large(filter_reject_small(REAL_MATS.mats))
|
||||
)
|
||||
|
||||
## export all datasets
|
||||
DATASETS = [
|
||||
REAL_MATS,
|
||||
REAL_SMALL_MATS,
|
||||
REAL_MED_MATS,
|
||||
REGULAR_REAL_MATS,
|
||||
REGULAR_REAL_SMALL_MATS,
|
||||
REGULAR_REAL_MED_MATS
|
||||
]
|
||||
|
||||
def get_kinds():
|
||||
"""return set of unique kind fields"""
|
||||
mats = ssgetpy.search(
|
||||
limit=1_000_000
|
||||
)
|
||||
kinds = set()
|
||||
for mat in mats:
|
||||
kinds.add(mat.kind)
|
||||
return kinds
|
||||
|
||||
for kind in get_kinds():
|
||||
d = Dataset(
|
||||
name = "kind_"+safe_dir_name(kind),
|
||||
mats = filter_reject_large( \
|
||||
filter_reject_small( \
|
||||
filter_reject_integer(ssgetpy.search(
|
||||
kind=kind,
|
||||
dtype='real',
|
||||
limit=1_000_000
|
||||
))))
|
||||
)
|
||||
if len(d.mats) > 0:
|
||||
DATASETS += [d]
|
Reference in New Issue
Block a user