Several techniques have been used to reduce the performance impact of process synchronization in fine-grained multiprocessor systems. These existing techniques tend to have long synchronization times or high shared-bus use, or they require complex and expensive hardware. A new technique is presented that uses distributed hardware locking queues to reduce both contention and latency to the minimum values that can be obtained using a shared-bus. This technique is shown to require at most two shared-bus transactions, with one transaction being typical. The latency for process continuation after obtaining a lock is reduced to near zero. Barrier synchronization using this distributed mechanism requires only one shared-bus transaction per processor involved in the barrier. This new technique is scalable and applicable to both new architectures and to existing systems, and is less complex than other hardware solutions.
%0 Conference Paper
%1 4115792
%A Johnson, Donald
%A Lilja, David
%A Riedl, John
%B Parallel Processing, 1994. ICPP 1994. International Conference on
%D 1994
%K Barrier Fuzzy Hardware Split synchronization
%P 268-275
%R 10.1109/ICPP.1994.23
%T A Distributed Hardware Mechanism for Process Synchronization on Shared-Bus Multiprocessors
%U http://ieeexplore.ieee.org/search/wrapper.jsp?arnumber=4115792
%V 2
%X Several techniques have been used to reduce the performance impact of process synchronization in fine-grained multiprocessor systems. These existing techniques tend to have long synchronization times or high shared-bus use, or they require complex and expensive hardware. A new technique is presented that uses distributed hardware locking queues to reduce both contention and latency to the minimum values that can be obtained using a shared-bus. This technique is shown to require at most two shared-bus transactions, with one transaction being typical. The latency for process continuation after obtaining a lock is reduced to near zero. Barrier synchronization using this distributed mechanism requires only one shared-bus transaction per processor involved in the barrier. This new technique is scalable and applicable to both new architectures and to existing systems, and is less complex than other hardware solutions.
@inproceedings{4115792,
abstract = {Several techniques have been used to reduce the performance impact of process synchronization in fine-grained multiprocessor systems. These existing techniques tend to have long synchronization times or high shared-bus use, or they require complex and expensive hardware. A new technique is presented that uses distributed hardware locking queues to reduce both contention and latency to the minimum values that can be obtained using a shared-bus. This technique is shown to require at most two shared-bus transactions, with one transaction being typical. The latency for process continuation after obtaining a lock is reduced to near zero. Barrier synchronization using this distributed mechanism requires only one shared-bus transaction per processor involved in the barrier. This new technique is scalable and applicable to both new architectures and to existing systems, and is less complex than other hardware solutions.},
added-at = {2010-01-28T16:18:10.000+0100},
author = {Johnson, Donald and Lilja, David and Riedl, John},
biburl = {https://www.bibsonomy.org/bibtex/2c6f0eb7188acf36c83212e8d505bea14/gron},
booktitle = {Parallel Processing, 1994. ICPP 1994. International Conference on},
description = {IEEE Xplore# Wrapper Result},
doi = {10.1109/ICPP.1994.23},
interhash = {e6e8f792053d8dc37f14adfa23574814},
intrahash = {c6f0eb7188acf36c83212e8d505bea14},
keywords = {Barrier Fuzzy Hardware Split synchronization},
month = {August},
pages = {268-275},
timestamp = {2010-01-28T16:18:10.000+0100},
title = {A Distributed Hardware Mechanism for Process Synchronization on Shared-Bus Multiprocessors},
url = {http://ieeexplore.ieee.org/search/wrapper.jsp?arnumber=4115792},
volume = 2,
year = 1994
}