initial commit
This commit is contained in:
57
parse.py
Normal file
57
parse.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from html.parser import HTMLParser
|
||||
|
||||
|
||||
in_kind = False
|
||||
kind_match = False
|
||||
|
||||
kinds = [
|
||||
"2D/3D",
|
||||
"Acoustics Problem",
|
||||
"Materials Problem",
|
||||
"Structural Problem",
|
||||
"Computational Fluid Dynamics Problem",
|
||||
"Model Reduction Problem",
|
||||
"Semiconductor Device Problem",
|
||||
"Theoretical/Quantum Chemisty Problem",
|
||||
"Thermal Problem",
|
||||
]
|
||||
|
||||
class MyHTMLParser(HTMLParser):
|
||||
def handle_starttag(self, tag, attrs):
|
||||
global in_kind
|
||||
global kind_match
|
||||
# print("Start tag:", tag)
|
||||
# for attr in attrs:
|
||||
# print(" attr:", attr)
|
||||
if tag == "td":
|
||||
in_kind = False
|
||||
for attr in attrs:
|
||||
if "column-kind" in attr[1]:
|
||||
in_kind = True
|
||||
|
||||
if tag == "a":
|
||||
if kind_match:
|
||||
for attr in attrs:
|
||||
if "href" in attr[0]:
|
||||
url = attr[1]
|
||||
if "MM" in url:
|
||||
print(url)
|
||||
kind_match = False
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
# print("Encountered an end tag :", tag)
|
||||
pass
|
||||
|
||||
def handle_data(self, data):
|
||||
global kind_match
|
||||
global in_kind
|
||||
if in_kind:
|
||||
for needle in kinds:
|
||||
if needle in data:
|
||||
# print(data)
|
||||
kind_match = True
|
||||
|
||||
parser = MyHTMLParser()
|
||||
|
||||
with open("suitesparse-reals.html") as f:
|
||||
parser.feed(f.read())
|
Reference in New Issue
Block a user