work

2017-05-07 16:06:02 -07:00
parent a706e8598b
commit 22fdb16fda
1 changed files with 23 additions and 13 deletions
--- a/cem17_template.tex
+++ b/cem17_template.tex
@@ -47,7 +47,7 @@ Fig.~\ref{fig:app_breakdown} shows the amount of time the full inverse-solver ap
 ``BW (32T)'' corresponds to a 32-thread OpenMP parallel run on a single XE node, and S822LC corresponds to a 160-thread OpenMP parallel run on the S822LC node.
 Non-MLFMM operations are a minority of the time, and become an even smaller proportion of the time as the object reconstructions grow larger.
-\begin{figure}[h]
+\begin{figure}[ht]
 \begin{center}
 \begin{tabular}{c}
 \mbox{\psfig{figure=figures/cpu_matvec.pdf,width=8cm}}
@@ -56,6 +56,7 @@ Non-MLFMM operations are a minority of the time, and become an even smaller prop
  \caption{
  Amount of application time spent in MLFMM for two different execution environments.
  MLFMM is the dominant component even with CPU parallelization on a single node.
  As object reconstructions grow larger or more challenging, MLFMM time further increases as a proportion of application time.
  }
  \label{fig:app_breakdown}
 \end{figure}
@@ -103,27 +104,32 @@ The P100s are connected to the Power8 CPUs via $80$~GB/s NVLink connections.
 All evaluations are done on a problem with these parameters. \todo{get from mert}
-Fig.~\ref{fig:mlfmm_bw} shows the amount of  of MLFMM execution time spent in computational kernels.
+Fig.~\ref{fig:mlfmm_bw} shows the MLFMM performance scaling on various Blue Waters configurations.
-\begin{figure}[b]
+\begin{figure}[htbp]
 \begin{center}
 \begin{tabular}{c}
 \mbox{\psfig{figure=figures/mlfmm_bw.pdf,width=8cm}}
 \end{tabular}
 \end{center}
-  \caption{BW.}
+  \caption{
  BW.
  }
  \label{fig:mlfmm_bw}
 \end{figure}
-Fig.~\ref{fig:mlfmm_minsky} shows the amount of MLFMM execution time spent in computational kernels.
+Fig.~\ref{fig:mlfmm_minsky} shows the MLFMM performance scaling for various S822LC configurations.
-\begin{figure}[b]
+
 \begin{figure}[htbp]
 \begin{center}
 \begin{tabular}{c}
 \mbox{\psfig{figure=figures/mlfmm_minsky.pdf,width=8cm}}
 \end{tabular}
 \end{center}
-  \caption{S822LC.}
+  \caption{
  S822LC.
  }
  \label{fig:mlfmm_minsky}
 \end{figure}
@@ -132,8 +138,12 @@ Fig.~\ref{fig:mlfmm_minsky} shows the amount of MLFMM execution time spent in co
 \subsection{Computation Kernel Breakdown}
 Fig.~\ref{fig:kernel_breakdown} shows the amount of  of MLFMM execution time spent in computational kernels.
 \texttt{P2P} is the ``particle-to-particle'' or nearfield exchanges. 
 \texttt{P2M} and \texttt{M2M} are the lowest-level and higher-level aggregations, respectively. 
 \texttt{L2L} and \texttt{L2P} are the higher-level and lowest-level disaggregations, respectively.
 \texttt{M2M} is the translations.
-\begin{figure}[b]
+\begin{figure}[htbp]
 \begin{center}
 \begin{tabular}{c}
 \mbox{\psfig{figure=figures/kernels.pdf,width=8cm}}
@@ -145,11 +155,11 @@ Fig.~\ref{fig:kernel_breakdown} shows the amount of  of MLFMM execution time spe
-This document is a template for authors preparing papers for the
+%This document is a template for authors preparing papers for the
-CEM'17 Computing and Electromagnetics Workshop in Barcelona, Spain.
+%CEM'17 Computing and Electromagnetics Workshop in Barcelona, Spain.
-The papers are required to use the IEEE style by following the
+%The papers are required to use the IEEE style by following the
-instructions provided in this document. The language is English.
+%instructions provided in this document. The language is English.
-The papers are expected to be two-pages long.
+%The papers are expected to be two-pages long.
 \section{Text Format}