57 lines
1.4 KiB
Python
57 lines
1.4 KiB
Python
from html.parser import HTMLParser
|
|
|
|
|
|
in_kind = False
|
|
kind_match = False
|
|
|
|
kinds = [
|
|
"2D/3D",
|
|
"Acoustics Problem",
|
|
"Materials Problem",
|
|
"Structural Problem",
|
|
"Computational Fluid Dynamics Problem",
|
|
"Model Reduction Problem",
|
|
"Semiconductor Device Problem",
|
|
"Theoretical/Quantum Chemisty Problem",
|
|
"Thermal Problem",
|
|
]
|
|
|
|
class MyHTMLParser(HTMLParser):
|
|
def handle_starttag(self, tag, attrs):
|
|
global in_kind
|
|
global kind_match
|
|
# print("Start tag:", tag)
|
|
# for attr in attrs:
|
|
# print(" attr:", attr)
|
|
if tag == "td":
|
|
in_kind = False
|
|
for attr in attrs:
|
|
if "column-kind" in attr[1]:
|
|
in_kind = True
|
|
|
|
if tag == "a":
|
|
if kind_match:
|
|
for attr in attrs:
|
|
if "href" in attr[0]:
|
|
url = attr[1]
|
|
if "MM" in url:
|
|
print(url)
|
|
kind_match = False
|
|
|
|
def handle_endtag(self, tag):
|
|
# print("Encountered an end tag :", tag)
|
|
pass
|
|
|
|
def handle_data(self, data):
|
|
global kind_match
|
|
global in_kind
|
|
if in_kind:
|
|
for needle in kinds:
|
|
if needle in data:
|
|
# print(data)
|
|
kind_match = True
|
|
|
|
parser = MyHTMLParser()
|
|
|
|
with open("suitesparse-reals.html") as f:
|
|
parser.feed(f.read()) |