Graph neural networks (GNNs) have been demonstrated to be powerful in modeling graph-structured data. However, training GNNs usually requires abundant task-specific labeled data, which is often arduously expensive to obtain. One effective way to reduce the labeling effort is to pre-train an expressive GNN model on unlabeled data with self-supervision and then transfer the learned model to downstream tasks with only a few labels. In this paper, we present the GPT-GNN∗ framework to initialize GNNs by generative pre-training. GPT-GNN introduces a self-supervised attributed graph generation task to pre-train a GNN so that it can capture the structural and semantic properties of the graph. We factorize the likelihood of the graph generation into two components: 1) Attribute Generation and 2) Edge Generation. By modeling both components, GPT-GNN captures the inherent dependency between node attributes and graph structure during the generative process. Comprehensive experiments on the billion-scale Open Academic Graph and Amazon recommendation data demonstrate that GPTGNN significantly outperforms state-of-the-art GNN models without pre-training by up to 9.1\% across various downstream tasks.
%0 Conference Paper
%1 hu_gpt-gnn_2020
%A Hu, Ziniu
%A Dong, Yuxiao
%A Wang, Kuansan
%A Chang, Kai-Wei
%A Sun, Yizhou
%B arXiv:2006.15437 cs, stat
%D 2020
%K pretrain ssl gnn
%T GPT-GNN: Generative Pre-Training of Graph Neural Networks
%U http://arxiv.org/abs/2006.15437
%X Graph neural networks (GNNs) have been demonstrated to be powerful in modeling graph-structured data. However, training GNNs usually requires abundant task-specific labeled data, which is often arduously expensive to obtain. One effective way to reduce the labeling effort is to pre-train an expressive GNN model on unlabeled data with self-supervision and then transfer the learned model to downstream tasks with only a few labels. In this paper, we present the GPT-GNN∗ framework to initialize GNNs by generative pre-training. GPT-GNN introduces a self-supervised attributed graph generation task to pre-train a GNN so that it can capture the structural and semantic properties of the graph. We factorize the likelihood of the graph generation into two components: 1) Attribute Generation and 2) Edge Generation. By modeling both components, GPT-GNN captures the inherent dependency between node attributes and graph structure during the generative process. Comprehensive experiments on the billion-scale Open Academic Graph and Amazon recommendation data demonstrate that GPTGNN significantly outperforms state-of-the-art GNN models without pre-training by up to 9.1\% across various downstream tasks.
%Z Comment: Published on KDD 2020
@inproceedings{hu_gpt-gnn_2020,
abstract = {Graph neural networks (GNNs) have been demonstrated to be powerful in modeling graph-structured data. However, training GNNs usually requires abundant task-specific labeled data, which is often arduously expensive to obtain. One effective way to reduce the labeling effort is to pre-train an expressive GNN model on unlabeled data with self-supervision and then transfer the learned model to downstream tasks with only a few labels. In this paper, we present the GPT-GNN∗ framework to initialize GNNs by generative pre-training. GPT-GNN introduces a self-supervised attributed graph generation task to pre-train a GNN so that it can capture the structural and semantic properties of the graph. We factorize the likelihood of the graph generation into two components: 1) Attribute Generation and 2) Edge Generation. By modeling both components, GPT-GNN captures the inherent dependency between node attributes and graph structure during the generative process. Comprehensive experiments on the billion-scale Open Academic Graph and Amazon recommendation data demonstrate that GPTGNN significantly outperforms state-of-the-art GNN models without pre-training by up to 9.1\% across various downstream tasks.},
added-at = {2021-04-08T07:18:22.000+0200},
annote = {Comment: Published on KDD 2020},
author = {Hu, Ziniu and Dong, Yuxiao and Wang, Kuansan and Chang, Kai-Wei and Sun, Yizhou},
biburl = {https://www.bibsonomy.org/bibtex/26cda4262d6b339d6647c63cbbb67fe0e/mengcao},
booktitle = {{arXiv}:2006.15437 [cs, stat]},
file = {Hu 等。 - 2020 - GPT-GNN Generative Pre-Training of Graph Neural N.pdf:files/32/Hu 等。 - 2020 - GPT-GNN Generative Pre-Training of Graph Neural N.pdf:application/pdf},
interhash = {0ee2049d8152af095a7faf523ce6386e},
intrahash = {6cda4262d6b339d6647c63cbbb67fe0e},
keywords = {pretrain ssl gnn},
language = {en},
month = jun,
note = {arXiv: 2006.15437},
shorttitle = {{GPT}-{GNN}},
timestamp = {2021-04-25T13:21:04.000+0200},
title = {{GPT}-{GNN}: {Generative} {Pre}-{Training} of {Graph} {Neural} {Networks}},
url = {http://arxiv.org/abs/2006.15437},
urldate = {2020-11-29},
year = 2020
}