update anatole, work on publications, add talks list

2021-01-27 17:40:20 -07:00
parent 163a470f3f
commit 3a685bf1a6
28 changed files with 204 additions and 780 deletions
--- a/content/publication/20200522_pearson_iwapt/index.md
+++ b/content/publication/20200522_pearson_iwapt/index.md
@@ -21,17 +21,6 @@ publication_types = ["1"]
 publication = "2020 IEEE International Workshop on Automatic Performance Tuning"
 publication_short = "In *iWAPT'20*"

-# Abstract and optional shortened version.
-abstract = """
-High-performance distributed computing systems increasingly feature nodes that have multiple CPU sockets and multiple GPUs.
-The communication bandwidth between these components is non-uniform.
-Furthermore, these systems can expose different communication capabilities between these components.
-For communication-heavy applications, optimally using these capabilities is challenging and essential for performance. 
-Bespoke codes with optimized communication may be non-portable across run-time/software/hardware configurations, and existing stencil frameworks neglect optimized communication.
-This work presents node-aware approaches for automatic data placement and communication implementation for 3D stencil codes on multi-GPU nodes with non-homogeneous communication performance and capabilities.
-Benchmarking results in the Summit system show that choices in placement can result in a 20% improvement in single-node exchange, and communication specialization can yield a further 6x improvement in exchange time in a single node, and a 16% improvement at 1536 GPUs."""
-abstract_short = ""
-

 # Does this page contain LaTeX math? (true/false)
 math = false
@@ -74,3 +63,11 @@ url_source = ""
  # Options: Smart, Center, TopLeft, Top, TopRight, Left, Right, BottomLeft, Bottom, BottomRight
  focal_point = ""
 +++
+
+High-performance distributed computing systems increasingly feature nodes that have multiple CPU sockets and multiple GPUs.
+The communication bandwidth between these components is non-uniform.
+Furthermore, these systems can expose different communication capabilities between these components.
+For communication-heavy applications, optimally using these capabilities is challenging and essential for performance. 
+Bespoke codes with optimized communication may be non-portable across run-time/software/hardware configurations, and existing stencil frameworks neglect optimized communication.
+This work presents node-aware approaches for automatic data placement and communication implementation for 3D stencil codes on multi-GPU nodes with non-homogeneous communication performance and capabilities.
+Benchmarking results in the Summit system show that choices in placement can result in a 20% improvement in single-node exchange, and communication specialization can yield a further 6x improvement in exchange time in a single node, and a 16% improvement at 1536 GPUs.