This commit is contained in:
Carl Pearson
2017-05-07 16:06:02 -07:00
parent a706e8598b
commit 22fdb16fda

View File

@@ -47,7 +47,7 @@ Fig.~\ref{fig:app_breakdown} shows the amount of time the full inverse-solver ap
``BW (32T)'' corresponds to a 32-thread OpenMP parallel run on a single XE node, and S822LC corresponds to a 160-thread OpenMP parallel run on the S822LC node.
Non-MLFMM operations are a minority of the time, and become an even smaller proportion of the time as the object reconstructions grow larger.
\begin{figure}[h]
\begin{figure}[ht]
\begin{center}
\begin{tabular}{c}
\mbox{\psfig{figure=figures/cpu_matvec.pdf,width=8cm}}
@@ -56,6 +56,7 @@ Non-MLFMM operations are a minority of the time, and become an even smaller prop
\caption{
Amount of application time spent in MLFMM for two different execution environments.
MLFMM is the dominant component even with CPU parallelization on a single node.
As object reconstructions grow larger or more challenging, MLFMM time further increases as a proportion of application time.
}
\label{fig:app_breakdown}
\end{figure}
@@ -103,27 +104,32 @@ The P100s are connected to the Power8 CPUs via $80$~GB/s NVLink connections.
All evaluations are done on a problem with these parameters. \todo{get from mert}
Fig.~\ref{fig:mlfmm_bw} shows the amount of of MLFMM execution time spent in computational kernels.
Fig.~\ref{fig:mlfmm_bw} shows the MLFMM performance scaling on various Blue Waters configurations.
\begin{figure}[b]
\begin{figure}[htbp]
\begin{center}
\begin{tabular}{c}
\mbox{\psfig{figure=figures/mlfmm_bw.pdf,width=8cm}}
\end{tabular}
\end{center}
\caption{BW.}
\caption{
BW.
}
\label{fig:mlfmm_bw}
\end{figure}
Fig.~\ref{fig:mlfmm_minsky} shows the amount of MLFMM execution time spent in computational kernels.
Fig.~\ref{fig:mlfmm_minsky} shows the MLFMM performance scaling for various S822LC configurations.
\begin{figure}[b]
\begin{figure}[htbp]
\begin{center}
\begin{tabular}{c}
\mbox{\psfig{figure=figures/mlfmm_minsky.pdf,width=8cm}}
\end{tabular}
\end{center}
\caption{S822LC.}
\caption{
S822LC.
}
\label{fig:mlfmm_minsky}
\end{figure}
@@ -132,8 +138,12 @@ Fig.~\ref{fig:mlfmm_minsky} shows the amount of MLFMM execution time spent in co
\subsection{Computation Kernel Breakdown}
Fig.~\ref{fig:kernel_breakdown} shows the amount of of MLFMM execution time spent in computational kernels.
\texttt{P2P} is the ``particle-to-particle'' or nearfield exchanges.
\texttt{P2M} and \texttt{M2M} are the lowest-level and higher-level aggregations, respectively.
\texttt{L2L} and \texttt{L2P} are the higher-level and lowest-level disaggregations, respectively.
\texttt{M2M} is the translations.
\begin{figure}[b]
\begin{figure}[htbp]
\begin{center}
\begin{tabular}{c}
\mbox{\psfig{figure=figures/kernels.pdf,width=8cm}}
@@ -145,11 +155,11 @@ Fig.~\ref{fig:kernel_breakdown} shows the amount of of MLFMM execution time spe
This document is a template for authors preparing papers for the
CEM'17 Computing and Electromagnetics Workshop in Barcelona, Spain.
The papers are required to use the IEEE style by following the
instructions provided in this document. The language is English.
The papers are expected to be two-pages long.
%This document is a template for authors preparing papers for the
%CEM'17 Computing and Electromagnetics Workshop in Barcelona, Spain.
%The papers are required to use the IEEE style by following the
%instructions provided in this document. The language is English.
%The papers are expected to be two-pages long.
\section{Text Format}