The performance of superscalar processors depends on many parameters with correlated effects. This paper explores the relations between some of these parameters, and more particularly, the requirement in instruction fetch bandwidth. We introduce new enhancements to increase the bandwidth of conventional instruction fetch engines. However, experiments show that the performance does not increase proportionally to the fetch. Once the measured IPC is half the instruction fetch bandwidth, increasing the fetch bandwidth brings very little improvement. In order to better understand this behavior, we develop a model from the empirical observation that the available instruction parallelism grows as the square root of the instruction window size. From the model, we derive that the fetch bandwidth requirement grows as the square root of the distance between mispredicted branches. We also verify experimentally that, to double the IPC, one should both double the fetch bandwidth and decrease the number of mispredicted branches fourfold.
Description
International Journal of Parallel Programming, Volume 29, Number 1 - SpringerLink
%0 Journal Article
%1 springerlink:10.1023/A:1026431920605
%A Michaud, Pierre
%A Seznec, André
%A Jourdan, Stéphan
%D 2001
%I Springer Netherlands
%J International Journal of Parallel Programming
%K CPU diminishing optimization returns
%N 1
%P 35-58
%R 10.1023/A:1026431920605
%T An Exploration of Instruction Fetch Requirement in Out-of-Order Superscalar Processors
%U http://dx.doi.org/10.1023/A:1026431920605
%V 29
%X The performance of superscalar processors depends on many parameters with correlated effects. This paper explores the relations between some of these parameters, and more particularly, the requirement in instruction fetch bandwidth. We introduce new enhancements to increase the bandwidth of conventional instruction fetch engines. However, experiments show that the performance does not increase proportionally to the fetch. Once the measured IPC is half the instruction fetch bandwidth, increasing the fetch bandwidth brings very little improvement. In order to better understand this behavior, we develop a model from the empirical observation that the available instruction parallelism grows as the square root of the instruction window size. From the model, we derive that the fetch bandwidth requirement grows as the square root of the distance between mispredicted branches. We also verify experimentally that, to double the IPC, one should both double the fetch bandwidth and decrease the number of mispredicted branches fourfold.
@article{springerlink:10.1023/A:1026431920605,
abstract = {The performance of superscalar processors depends on many parameters with correlated effects. This paper explores the relations between some of these parameters, and more particularly, the requirement in instruction fetch bandwidth. We introduce new enhancements to increase the bandwidth of conventional instruction fetch engines. However, experiments show that the performance does not increase proportionally to the fetch. Once the measured IPC is half the instruction fetch bandwidth, increasing the fetch bandwidth brings very little improvement. In order to better understand this behavior, we develop a model from the empirical observation that the available instruction parallelism grows as the square root of the instruction window size. From the model, we derive that the fetch bandwidth requirement grows as the square root of the distance between mispredicted branches. We also verify experimentally that, to double the IPC, one should both double the fetch bandwidth and decrease the number of mispredicted branches fourfold.},
added-at = {2012-09-26T15:18:10.000+0200},
author = {Michaud, Pierre and Seznec, André and Jourdan, Stéphan},
biburl = {https://www.bibsonomy.org/bibtex/282afac4f491e8226720cd29e4cf6fc7e/gron},
description = {International Journal of Parallel Programming, Volume 29, Number 1 - SpringerLink},
doi = {10.1023/A:1026431920605},
interhash = {60a0e845b04a7be3c3466c43b3468ca0},
intrahash = {82afac4f491e8226720cd29e4cf6fc7e},
issn = {0885-7458},
journal = {International Journal of Parallel Programming},
keyword = {Computer Science},
keywords = {CPU diminishing optimization returns},
number = 1,
pages = {35-58},
publisher = {Springer Netherlands},
timestamp = {2012-09-26T15:18:11.000+0200},
title = {An Exploration of Instruction Fetch Requirement in Out-of-Order Superscalar Processors},
url = {http://dx.doi.org/10.1023/A:1026431920605},
volume = 29,
year = 2001
}