Scene parsing is challenging for unrestricted open vocabulary and diverse
scenes. In this paper, we exploit the capability of global context information
by different-region-based context aggregation through our pyramid pooling
module together with the proposed pyramid scene parsing network (PSPNet). Our
global prior representation is effective to produce good quality results on the
scene parsing task, while PSPNet provides a superior framework for pixel-level
prediction tasks. The proposed approach achieves state-of-the-art performance
on various datasets. It came first in ImageNet scene parsing challenge 2016,
PASCAL VOC 2012 benchmark and Cityscapes benchmark. A single PSPNet yields new
record of mIoU accuracy 85.4% on PASCAL VOC 2012 and accuracy 80.2% on
Cityscapes.
%0 Generic
%1 zhao2016pyramid
%A Zhao, Hengshuang
%A Shi, Jianping
%A Qi, Xiaojuan
%A Wang, Xiaogang
%A Jia, Jiaya
%D 2016
%K deep-learning
%T Pyramid Scene Parsing Network
%U http://arxiv.org/abs/1612.01105
%X Scene parsing is challenging for unrestricted open vocabulary and diverse
scenes. In this paper, we exploit the capability of global context information
by different-region-based context aggregation through our pyramid pooling
module together with the proposed pyramid scene parsing network (PSPNet). Our
global prior representation is effective to produce good quality results on the
scene parsing task, while PSPNet provides a superior framework for pixel-level
prediction tasks. The proposed approach achieves state-of-the-art performance
on various datasets. It came first in ImageNet scene parsing challenge 2016,
PASCAL VOC 2012 benchmark and Cityscapes benchmark. A single PSPNet yields new
record of mIoU accuracy 85.4% on PASCAL VOC 2012 and accuracy 80.2% on
Cityscapes.
@misc{zhao2016pyramid,
abstract = {Scene parsing is challenging for unrestricted open vocabulary and diverse
scenes. In this paper, we exploit the capability of global context information
by different-region-based context aggregation through our pyramid pooling
module together with the proposed pyramid scene parsing network (PSPNet). Our
global prior representation is effective to produce good quality results on the
scene parsing task, while PSPNet provides a superior framework for pixel-level
prediction tasks. The proposed approach achieves state-of-the-art performance
on various datasets. It came first in ImageNet scene parsing challenge 2016,
PASCAL VOC 2012 benchmark and Cityscapes benchmark. A single PSPNet yields new
record of mIoU accuracy 85.4% on PASCAL VOC 2012 and accuracy 80.2% on
Cityscapes.},
added-at = {2017-07-27T18:35:01.000+0200},
author = {Zhao, Hengshuang and Shi, Jianping and Qi, Xiaojuan and Wang, Xiaogang and Jia, Jiaya},
biburl = {https://www.bibsonomy.org/bibtex/27e982f0240b9a71c8f21ab75caf5ec5f/axel.vogler},
description = {Pyramid Scene Parsing Network},
interhash = {91dd43390249915cd678007407f7bc14},
intrahash = {7e982f0240b9a71c8f21ab75caf5ec5f},
keywords = {deep-learning},
note = {cite arxiv:1612.01105Comment: CVPR 2017},
timestamp = {2017-07-27T18:35:01.000+0200},
title = {Pyramid Scene Parsing Network},
url = {http://arxiv.org/abs/1612.01105},
year = 2016
}