Update hugo, academic-theme

This commit is contained in:
Carl Pearson
2017-10-18 14:06:25 -05:00
parent f2f01d8e4c
commit 057a7ac31e
7 changed files with 209 additions and 32 deletions

View File

@@ -0,0 +1,31 @@
+++
title = "latex-docker"
date = 2017-10-18T12:47:48-05:00
draft = false
# Tags: can be used for filtering projects.
# Example: `tags = ["machine-learning", "deep-learning"]`
tags = ["personal"]
# Project summary to display on homepage.
summary = "Docker images with latex"
# Optional image to display on homepage.
image_preview = ""
# Optional external URL for project (replaces project detail page).
external_link = "https://github.com/cwpearson/latex-docker"
# Does the project detail page use math formatting?
math = false
# Does the project detail page use source code highlighting?
highlight = true
# Featured image
# Place your image in the `static/img/` folder and reference its filename below, e.g. `image = "example.jpg"`.
[header]
image = ""
caption = ""
+++

View File

@@ -1,23 +1,57 @@
+++
title = "Adaptive Cache Bypass and Insertion for Many-Core Accelerators"
date = 2014-01-01
draft = false
date = "2014-01-01"
title = "Adaptive Cache Bypass and Insertion for Many-Core Accelerators"
# Authors. Comma separated list, e.g. `["Bob Smith", "David Jones"]`.
authors = ["Xuhao Chen", "Shengzhao Wu", "Li-Wen Chang", "Wei-Sheng Huang", "Carl Pearson", "Wen-mei Hwu"]
abstract = 'Many-core accelerators, e.g. GPUs, are widely used for accelerating general-purpose compute kernels. With the SIMT execution model, GPUs can hide memory latency through massive multithreading for many regular applications. To support more applications with irregular memory access pattern, cache hierarchy is introduced to GPU architecture to capture input data sharing and mitigate the effect of irregular accesses. However, GPU caches suffer from poor efficiency due to severe contention, which makes it difficult to adopt heuristic management policies, and also limits system performance and energy-efficiency. We propose an adaptive cache management policy specifically for many-core accelerators. The tag array of L2 cache is enhanced with extra bits to track memory access history, an thus the locality information is captured and provided to L1 cache as heuristics to guide its run-time bypass and insertion decisions. By preventing un-reused data from polluting the cache and alleviating contention, cache efficiency is significantly improved. As a result, the system performance is improved by 31% on average for cache sensitive benchmarks, compared to the baseline GPU architecture.'
# Publication type.
# Legend:
# 0 = Uncategorized
# 1 = Conference proceedings
# 2 = Journal
# 3 = Work in progress
# 4 = Technical report
# 5 = Book
# 6 = Book chapter
publication_types = ["1"]
image = ""
image_preview = ""
# Publication name and optional abbreviated version.
publication = "Proceedings of International Workshop on Manycore Embedded Systems."
publication_short = ""
# Abstract and optional shortened version.
abstract = "Many-core accelerators, e.g. GPUs, are widely used for accelerating general-purpose compute kernels. With the SIMT execution model, GPUs can hide memory latency through massive multithreading for many regular applications. To support more applications with irregular memory access pattern, cache hierarchy is introduced to GPU architecture to capture input data sharing and mitigate the effect of irregular accesses. However, GPU caches suffer from poor efficiency due to severe contention, which makes it difficult to adopt heuristic management policies, and also limits system performance and energy-efficiency. We propose an adaptive cache management policy specifically for many-core accelerators. The tag array of L2 cache is enhanced with extra bits to track memory access history, an thus the locality information is captured and provided to L1 cache as heuristics to guide its run-time bypass and insertion decisions. By preventing un-reused data from polluting the cache and alleviating contention, cache efficiency is significantly improved. As a result, the system performance is improved by 31% on average for cache sensitive benchmarks, compared to the baseline GPU architecture."
abstract_short = ""
# Does this page contain LaTeX math? (true/false)
math = false
publication = "*Proceedings of International Workshop on Manycore Embedded Systems.* ACM, 2014."
# Does this page require source code highlighting? (true/false)
highlight = true
# Featured image thumbnail (optional)
image_preview = ""
# Is this a selected publication? (true/false)
selected = false
# Links (optional)
url_pdf = "pdf/2014chen.pdf"
url_preprint = ""
url_code = ""
url_dataset = ""
url_pdf = "pdf/2014chen.pdf"
url_project = ""
url_slides = ""
url_video = ""
url_poster = ""
url_source = ""
selected = false
+++
# Featured image
# Place your image in the `static/img/` folder and reference its filename below, e.g. `image = "example.jpg"`.
[header]
image = ""
caption = ""
+++

View File

@@ -1,23 +1,57 @@
+++
title = "Comparative Performance Evaluation of Multi-GPU MLFMM Implementation for 2-D VIE Problems"
date = 2017-06-21
draft = false
date = "2017-06-21"
title = "Comparative Performance Evaluation of Multi-GPU MLFMM Implementation for 2-D VIE Problems"
# Authors. Comma separated list, e.g. `["Bob Smith", "David Jones"]`.
authors = ["Carl Pearson", "Mert Hidayetoglu", "Wei Ren", "Weng Cho Chew", "Wen-Mei Hwu"]
abstract = 'We compare multi-GPU performance of the multilevel fast multipole method (MLFMM) on two different systems: A shared-memory IBM S822LC workstation with four NVIDIA P100 GPUs, and 16 XK nodes (each is employed with a single NVIDIA K20X GPU) of the Blue Waters supercomputer. MLFMM is implemented for solving scattering problems involving two-dimensional inhomogeneous bodies. Results show that the multi-GPU implementation provides 794 and 969 times speedups on the IBM and Blue Waters systems over their corresponding sequential CPU executions, respectively, where the sequential execution on the IBM system is 1.17 times faster than on the Blue Waters System.'
# Publication type.
# Legend:
# 0 = Uncategorized
# 1 = Conference proceedings
# 2 = Journal
# 3 = Work in progress
# 4 = Technical report
# 5 = Book
# 6 = Book chapter
publication_types = ["1"]
image = ""
image_preview = ""
# Publication name and optional abbreviated version.
publication = "Computing and Electromagnetics International Workshop, IEEE 2017"
publication_short = "CEM"
# Abstract and optional shortened version.
abstract = "We compare multi-GPU performance of the multilevel fast multipole method (MLFMM) on two different systems: A shared-memory IBM S822LC workstation with four NVIDIA P100 GPUs, and 16 XK nodes (each is employed with a single NVIDIA K20X GPU) of the Blue Waters supercomputer. MLFMM is implemented for solving scattering problems involving two-dimensional inhomogeneous bodies. Results show that the multi-GPU implementation provides 794 and 969 times speedups on the IBM and Blue Waters systems over their corresponding sequential CPU executions, respectively, where the sequential execution on the IBM system is 1.17 times faster than on the Blue Waters System."
abstract_short = ""
# Does this page contain LaTeX math? (true/false)
math = false
publication = "*Computing and Electromagnetics International Workshop.* IEEE, 2017."
# Does this page require source code highlighting? (true/false)
highlight = true
# Featured image thumbnail (optional)
image_preview = ""
# Is this a selected publication? (true/false)
selected = false
# Links (optional)
url_pdf = "pdf/20170621_pearson_cem.pdf"
url_preprint = ""
url_code = ""
url_dataset = ""
url_pdf = "pdf/20170621_pearson_cem.pdf"
url_project = ""
url_slides = ""
url_video = ""
url_poster = ""
url_source = ""
selected = true
+++
# Featured image
# Place your image in the `static/img/` folder and reference its filename below, e.g. `image = "example.jpg"`.
[header]
image = ""
caption = ""
+++

View File

@@ -1,23 +1,57 @@
+++
title = "RAI: A Scalable Project Submission System for Parallel Programming Courses"
date = 2017-05-29
draft = false
date = "2017-05-29"
title = "RAI: A Scalable Project Submission System for Parallel Programming Courses"
# Authors. Comma separated list, e.g. `["Bob Smith", "David Jones"]`.
authors = ["Adbul Dakkak", "Carl Pearson", "Cheng Li"]
abstract = 'A major component of many advanced programming courses is an open-ended “end-of-term project” assignment. Delivering and evaluating open-ended parallel programming projects for hundreds or thousands of students brings a need for broad system reconfigurability coupled with challenges of testing and development uniformity, access to esoteric hardware and programming environments, scalability, and security. We present RAI, a secure and extensible system for delivering open-ended programming assignments configured with access to different hardware and software requirements. We describe how the system was used to deliver a programming-competition-style final project in an introductory GPU programming course at the University of Illinois Urbana-Champaign.'
# Publication type.
# Legend:
# 0 = Uncategorized
# 1 = Conference proceedings
# 2 = Journal
# 3 = Work in progress
# 4 = Technical report
# 5 = Book
# 6 = Book chapter
publication_types = ["1"]
image = ""
image_preview = ""
math = false
# Publication name and optional abbreviated version.
publication = "*Parallel and Distributed Processing Symposium Workshops, 2017 IEEE International.* IEEE, 2017."
publication_short = "IPDPS Workshop 2017"
# Abstract and optional shortened version.
abstract = "A major component of many advanced programming courses is an open-ended “end-of-term project” assignment. Delivering and evaluating open-ended parallel programming projects for hundreds or thousands of students brings a need for broad system reconfigurability coupled with challenges of testing and development uniformity, access to esoteric hardware and programming environments, scalability, and security. We present RAI, a secure and extensible system for delivering open-ended programming assignments configured with access to different hardware and software requirements. We describe how the system was used to deliver a programming-competition-style final project in an introductory GPU programming course at the University of Illinois Urbana-Champaign."
abstract_short = ""
# Does this page contain LaTeX math? (true/false)
math = false
# Does this page require source code highlighting? (true/false)
highlight = true
# Featured image thumbnail (optional)
image_preview = ""
# Is this a selected publication? (true/false)
selected = false
# Links (optional)
url_pdf = "pdf/rai-edupar2017.pdf"
url_preprint = ""
url_code = ""
url_dataset = ""
url_pdf = "pdf/rai-edupar2017.pdf"
url_project = ""
url_slides = ""
url_video = ""
url_poster = ""
url_source = ""
selected = true
+++
# Featured image
# Place your image in the `static/img/` folder and reference its filename below, e.g. `image = "example.jpg"`.
[header]
image = ""
caption = ""
+++