The polyhedral model provides powerful abstractions to optimize
loop nests with regular accesses. Affine transformations in this
model capture a complex sequence of execution-reordering loop
transformations that can improve performance by parallelization
as well as locality enhancement. Although a significant body of
research has addressed affine scheduling and partitioning, the
problem of automatically finding good affine transforms for
communication-optimized coarse-grained parallelization together
with locality optimization for the general case of
arbitrarily-nested loop sequences remains a challenging problem.
We propose an automatic transformation framework to optimize
arbitrarily-nested loop sequences with affine dependences for
parallelism and locality simultaneously. The approach finds good
tiling hyperplanes by embedding a powerful and versatile cost
function into an Integer Linear Programming formulation. These
tiling hyperplanes are used for communication-minimized
coarse-grained parallelization as well as for locality
optimization. The approach enables the minimization of
inter-tile communication volume in the processor space, and
minimization of reuse distances for local execution at each
node. Programs requiring one-dimensional versus
multi-dimensional time schedules (with scheduling-based
approaches) are all handled with the same algorithm.
Synchronization-free parallelism, permutable loops or pipelined
parallelism at various levels can be detected. Preliminary
studies of the framework show promising results.
%0 Book Section
%1 Bondhugula2008-oy
%A Bondhugula, Uday
%A Baskaran, Muthu
%A Krishnamoorthy, Sriram
%A Ramanujam, J
%A Rountev, Atanas
%A Sadayappan, P
%B Compiler Construction
%D 2008
%I Springer Berlin Heidelberg
%K Expose Pluto
%P 132--146
%T Automatic Transformations for Communication-Minimized
Parallelization and Locality Optimization in the Polyhedral
Model
%X The polyhedral model provides powerful abstractions to optimize
loop nests with regular accesses. Affine transformations in this
model capture a complex sequence of execution-reordering loop
transformations that can improve performance by parallelization
as well as locality enhancement. Although a significant body of
research has addressed affine scheduling and partitioning, the
problem of automatically finding good affine transforms for
communication-optimized coarse-grained parallelization together
with locality optimization for the general case of
arbitrarily-nested loop sequences remains a challenging problem.
We propose an automatic transformation framework to optimize
arbitrarily-nested loop sequences with affine dependences for
parallelism and locality simultaneously. The approach finds good
tiling hyperplanes by embedding a powerful and versatile cost
function into an Integer Linear Programming formulation. These
tiling hyperplanes are used for communication-minimized
coarse-grained parallelization as well as for locality
optimization. The approach enables the minimization of
inter-tile communication volume in the processor space, and
minimization of reuse distances for local execution at each
node. Programs requiring one-dimensional versus
multi-dimensional time schedules (with scheduling-based
approaches) are all handled with the same algorithm.
Synchronization-free parallelism, permutable loops or pipelined
parallelism at various levels can be detected. Preliminary
studies of the framework show promising results.
@incollection{Bondhugula2008-oy,
abstract = {The polyhedral model provides powerful abstractions to optimize
loop nests with regular accesses. Affine transformations in this
model capture a complex sequence of execution-reordering loop
transformations that can improve performance by parallelization
as well as locality enhancement. Although a significant body of
research has addressed affine scheduling and partitioning, the
problem of automatically finding good affine transforms for
communication-optimized coarse-grained parallelization together
with locality optimization for the general case of
arbitrarily-nested loop sequences remains a challenging problem.
We propose an automatic transformation framework to optimize
arbitrarily-nested loop sequences with affine dependences for
parallelism and locality simultaneously. The approach finds good
tiling hyperplanes by embedding a powerful and versatile cost
function into an Integer Linear Programming formulation. These
tiling hyperplanes are used for communication-minimized
coarse-grained parallelization as well as for locality
optimization. The approach enables the minimization of
inter-tile communication volume in the processor space, and
minimization of reuse distances for local execution at each
node. Programs requiring one-dimensional versus
multi-dimensional time schedules (with scheduling-based
approaches) are all handled with the same algorithm.
Synchronization-free parallelism, permutable loops or pipelined
parallelism at various levels can be detected. Preliminary
studies of the framework show promising results.},
added-at = {2015-07-14T13:30:28.000+0200},
author = {Bondhugula, Uday and Baskaran, Muthu and Krishnamoorthy, Sriram and Ramanujam, J and Rountev, Atanas and Sadayappan, P},
biburl = {https://www.bibsonomy.org/bibtex/22655989a2999e00a61f961d8c9bfc7ac/christophv},
booktitle = {Compiler Construction},
interhash = {bacdd44acb9333c7fa22977f940ebe10},
intrahash = {2655989a2999e00a61f961d8c9bfc7ac},
keywords = {Expose Pluto},
pages = {132--146},
publisher = {Springer Berlin Heidelberg},
series = {Lecture Notes in Computer Science},
timestamp = {2016-01-04T14:22:08.000+0100},
title = {Automatic Transformations for {Communication-Minimized}
Parallelization and Locality Optimization in the Polyhedral
Model},
year = 2008
}