Sort is a fundamental kernel used in many database operations. In-memory sorts are now feasible; sort performance is limited by compute flops and main memory bandwidth rather than I/O. In 29, we had earlier presented a competitive analysis of comparison and non-comparison based sorting algorithms on CPUs and GPUs. In this report, we extend this comparison to the Intel Many Integrated Core (MIC) architecture. We evaluate radix sort on Knights Ferry (an implementation of Intel MIC architecture), obtaining a performance gain of 2.2X and 1.7X over the best sort performance on the Intel Core i7 CPU and GTX 280 respectively. We also improve the performance of GPU radix sort by 1.6X over previous results.
%0 Report
%1 IntelManyCoreSort
%A Satish, Nadathur
%A Kim, Changkyu
%A Chhugani, Jatin
%A Nguyen, Anthony D.
%A Lee, Victor W.
%A Kim, Daehyun
%A Dubey, Pradeep
%D 2010
%K Evaluation GPU Intel MIC ManyIntegratedCores MultiCore Sort
%T Fast Sort on CPUs, GPUs and Intel MIC Architectures
%U http://techresearch.intel.com/userfiles/en-us/FASTsort_CPUsGPUs_IntelMICarchitectures.pdf
%X Sort is a fundamental kernel used in many database operations. In-memory sorts are now feasible; sort performance is limited by compute flops and main memory bandwidth rather than I/O. In 29, we had earlier presented a competitive analysis of comparison and non-comparison based sorting algorithms on CPUs and GPUs. In this report, we extend this comparison to the Intel Many Integrated Core (MIC) architecture. We evaluate radix sort on Knights Ferry (an implementation of Intel MIC architecture), obtaining a performance gain of 2.2X and 1.7X over the best sort performance on the Intel Core i7 CPU and GTX 280 respectively. We also improve the performance of GPU radix sort by 1.6X over previous results.
@techreport{IntelManyCoreSort,
abstract = {Sort is a fundamental kernel used in many database operations. In-memory sorts are now feasible; sort performance is limited by compute flops and main memory bandwidth rather than I/O. In [29], we had earlier presented a competitive analysis of comparison and non-comparison based sorting algorithms on CPUs and GPUs. In this report, we extend this comparison to the Intel Many Integrated Core (MIC) architecture. We evaluate radix sort on Knights Ferry (an implementation of Intel MIC architecture), obtaining a performance gain of 2.2X and 1.7X over the best sort performance on the Intel Core i7 CPU and GTX 280 respectively. We also improve the performance of GPU radix sort by 1.6X over previous results.},
added-at = {2010-07-20T15:36:35.000+0200},
author = {Satish, Nadathur and Kim, Changkyu and Chhugani, Jatin and Nguyen, Anthony D. and Lee, Victor W. and Kim, Daehyun and Dubey, Pradeep},
biburl = {https://www.bibsonomy.org/bibtex/2754abf0c567e2065f5189275e63d6d42/gron},
institution = {Intel Labs},
interhash = {47dc21b0b50c84218a906c4fcd0459e5},
intrahash = {754abf0c567e2065f5189275e63d6d42},
keywords = {Evaluation GPU Intel MIC ManyIntegratedCores MultiCore Sort},
timestamp = {2010-07-20T15:36:35.000+0200},
title = {Fast Sort on CPUs, GPUs and Intel MIC Architectures},
type = {Technical Report},
url = {http://techresearch.intel.com/userfiles/en-us/FASTsort_CPUsGPUs_IntelMICarchitectures.pdf},
year = 2010
}