work

2017-05-07 16:06:02 -07:00
parent a706e8598b
commit 22fdb16fda
1 changed files with 23 additions and 13 deletions
--- a/cem17_template.tex
+++ b/cem17_template.tex
@@ -47,7 +47,7 @@ Fig.~\ref{fig:app_breakdown} shows the amount of time the full inverse-solver ap
 ``BW (32T)'' corresponds to a 32-thread OpenMP parallel run on a single XE node, and S822LC corresponds to a 160-thread OpenMP parallel run on the S822LC node.
 Non-MLFMM operations are a minority of the time, and become an even smaller proportion of the time as the object reconstructions grow larger.

-\begin{figure}[h]
+\begin{figure}[ht]
 \begin{center}
 \begin{tabular}{c}
 \mbox{\psfig{figure=figures/cpu_matvec.pdf,width=8cm}}
@@ -56,6 +56,7 @@ Non-MLFMM operations are a minority of the time, and become an even smaller prop
  \caption{
  Amount of application time spent in MLFMM for two different execution environments.
  MLFMM is the dominant component even with CPU parallelization on a single node.
+  As object reconstructions grow larger or more challenging, MLFMM time further increases as a proportion of application time.
  }
  \label{fig:app_breakdown}
 \end{figure}
@@ -103,27 +104,32 @@ The P100s are connected to the Power8 CPUs via $80$~GB/s NVLink connections.
 All evaluations are done on a problem with these parameters. \todo{get from mert}


-Fig.~\ref{fig:mlfmm_bw} shows the amount of  of MLFMM execution time spent in computational kernels.
+Fig.~\ref{fig:mlfmm_bw} shows the MLFMM performance scaling on various Blue Waters configurations.

-\begin{figure}[b]
+\begin{figure}[htbp]
 \begin{center}
 \begin{tabular}{c}
 \mbox{\psfig{figure=figures/mlfmm_bw.pdf,width=8cm}}
 \end{tabular}
 \end{center}
-  \caption{BW.}
+  \caption{
+  BW.
+  }
  \label{fig:mlfmm_bw}
 \end{figure}

-Fig.~\ref{fig:mlfmm_minsky} shows the amount of MLFMM execution time spent in computational kernels.
+Fig.~\ref{fig:mlfmm_minsky} shows the MLFMM performance scaling for various S822LC configurations.

-\begin{figure}[b]
+
+\begin{figure}[htbp]
 \begin{center}
 \begin{tabular}{c}
 \mbox{\psfig{figure=figures/mlfmm_minsky.pdf,width=8cm}}
 \end{tabular}
 \end{center}
-  \caption{S822LC.}
+  \caption{
+  S822LC.
+  }
  \label{fig:mlfmm_minsky}
 \end{figure}

@@ -132,8 +138,12 @@ Fig.~\ref{fig:mlfmm_minsky} shows the amount of MLFMM execution time spent in co
 \subsection{Computation Kernel Breakdown}

 Fig.~\ref{fig:kernel_breakdown} shows the amount of  of MLFMM execution time spent in computational kernels.
+\texttt{P2P} is the ``particle-to-particle'' or nearfield exchanges. 
+\texttt{P2M} and \texttt{M2M} are the lowest-level and higher-level aggregations, respectively. 
+\texttt{L2L} and \texttt{L2P} are the higher-level and lowest-level disaggregations, respectively.
+\texttt{M2M} is the translations.

-\begin{figure}[b]
+\begin{figure}[htbp]
 \begin{center}
 \begin{tabular}{c}
 \mbox{\psfig{figure=figures/kernels.pdf,width=8cm}}
@@ -145,11 +155,11 @@ Fig.~\ref{fig:kernel_breakdown} shows the amount of  of MLFMM execution time spe



-This document is a template for authors preparing papers for the
-CEM'17 Computing and Electromagnetics Workshop in Barcelona, Spain.
-The papers are required to use the IEEE style by following the
-instructions provided in this document. The language is English.
-The papers are expected to be two-pages long.
+%This document is a template for authors preparing papers for the
+%CEM'17 Computing and Electromagnetics Workshop in Barcelona, Spain.
+%The papers are required to use the IEEE style by following the
+%instructions provided in this document. The language is English.
+%The papers are expected to be two-pages long.


 \section{Text Format}