C. Beckmann, and C. Polychronopoulos. Supercomputing '90: Proceedings of the 1990 ACM/IEEE conference on Supercomputing, page 180--189. Los Alamitos, CA, USA, IEEE Computer Society Press, (1990)
Abstract
Many recent studies have considered the importance of barrier synchronization overhead on parallel loop performance, especially for large-scale parallel machines. This paper describes a hardware scheme for supporting fast barrier synchronization. It allows barrier synchronization to be performed within a single instruction cycle for moderately sized systems, and is scalable with logarithmic increase in synchronization time. It supports a large number of concurrent barriers, and can also be used to support a number of different barrier synchronization schemes. Simulation results show that under reasonable assumptions, this hardware can decrease parallel loop execution time significantly, especially for statically scheduled loops.
%0 Conference Paper
%1 110433
%A Beckmann, Carl J.
%A Polychronopoulos, Constantine D.
%B Supercomputing '90: Proceedings of the 1990 ACM/IEEE conference on Supercomputing
%C Los Alamitos, CA, USA
%D 1990
%I IEEE Computer Society Press
%K Barrier Fuzzy Hardware Split parallel performance synchronization
%P 180--189
%T Fast Barrier Synchronization Hardware
%U http://portal.acm.org/citation.cfm?id=110433
%X Many recent studies have considered the importance of barrier synchronization overhead on parallel loop performance, especially for large-scale parallel machines. This paper describes a hardware scheme for supporting fast barrier synchronization. It allows barrier synchronization to be performed within a single instruction cycle for moderately sized systems, and is scalable with logarithmic increase in synchronization time. It supports a large number of concurrent barriers, and can also be used to support a number of different barrier synchronization schemes. Simulation results show that under reasonable assumptions, this hardware can decrease parallel loop execution time significantly, especially for statically scheduled loops.
%@ 0-89791-412-0
@inproceedings{110433,
abstract = {Many recent studies have considered the importance of barrier synchronization overhead on parallel loop performance, especially for large-scale parallel machines. This paper describes a hardware scheme for supporting fast barrier synchronization. It allows barrier synchronization to be performed within a single instruction cycle for moderately sized systems, and is scalable with logarithmic increase in synchronization time. It supports a large number of concurrent barriers, and can also be used to support a number of different barrier synchronization schemes. Simulation results show that under reasonable assumptions, this hardware can decrease parallel loop execution time significantly, especially for statically scheduled loops.},
added-at = {2010-01-28T15:26:21.000+0100},
address = {Los Alamitos, CA, USA},
author = {Beckmann, Carl J. and Polychronopoulos, Constantine D.},
biburl = {https://www.bibsonomy.org/bibtex/2032060344df75b83f7554436302130d8/gron},
booktitle = {Supercomputing '90: Proceedings of the 1990 ACM/IEEE conference on Supercomputing},
description = {Fast barrier synchronization hardware},
interhash = {5d2fc05fc8e31380a1b5e2d9f8ee1942},
intrahash = {032060344df75b83f7554436302130d8},
isbn = {0-89791-412-0},
keywords = {Barrier Fuzzy Hardware Split parallel performance synchronization},
location = {New York, New York, United States},
pages = {180--189},
publisher = {IEEE Computer Society Press},
timestamp = {2010-01-28T15:26:21.000+0100},
title = {Fast Barrier Synchronization Hardware},
url = {http://portal.acm.org/citation.cfm?id=110433},
year = 1990
}