A central challenge in human genomics is to understand the cellular, evolutionary, and clinical
significance of genetic variants. Here we introduce a unified population-genetic and machine-
learning model, called Linear Allele-Specific Selection InferencE (LASSIE), for estimating the
fitness effects of all potential single-nucleotide variants, based on polymorphism data and pre-
dictive genomic features. We applied LASSIE to 51 high-coverage genome sequences annotated
with 33 genomic features, and constructed a map of allele-specific selection coefficients across all
protein-coding sequences in the human genome. We show that this map is informative about both
human evolution and disease.
%0 Journal Article
%1 huang2018estimation
%A Huang, Yi-Fei
%A Siepel, Adam
%D 2018
%I Cold Spring Harbor Laboratory
%K DFE annotation distribution_of_fitness_effects machine_learning methods
%R 10.1101/441337
%T Estimation of allele-specific fitness effects across human protein-coding sequences and implications for disease
%U https://doi.org/10.1101%2F441337
%X A central challenge in human genomics is to understand the cellular, evolutionary, and clinical
significance of genetic variants. Here we introduce a unified population-genetic and machine-
learning model, called Linear Allele-Specific Selection InferencE (LASSIE), for estimating the
fitness effects of all potential single-nucleotide variants, based on polymorphism data and pre-
dictive genomic features. We applied LASSIE to 51 high-coverage genome sequences annotated
with 33 genomic features, and constructed a map of allele-specific selection coefficients across all
protein-coding sequences in the human genome. We show that this map is informative about both
human evolution and disease.
@article{huang2018estimation,
abstract = {A central challenge in human genomics is to understand the cellular, evolutionary, and clinical
significance of genetic variants. Here we introduce a unified population-genetic and machine-
learning model, called Linear Allele-Specific Selection InferencE (LASSIE), for estimating the
fitness effects of all potential single-nucleotide variants, based on polymorphism data and pre-
dictive genomic features. We applied LASSIE to 51 high-coverage genome sequences annotated
with 33 genomic features, and constructed a map of allele-specific selection coefficients across all
protein-coding sequences in the human genome. We show that this map is informative about both
human evolution and disease.},
added-at = {2019-05-15T00:14:07.000+0200},
author = {Huang, Yi-Fei and Siepel, Adam},
biburl = {https://www.bibsonomy.org/bibtex/2eb5873ae83ddd31c68b65726a5360e34/peter.ralph},
doi = {10.1101/441337},
interhash = {03906edbd9c6e2539d3698b0769b1528},
intrahash = {eb5873ae83ddd31c68b65726a5360e34},
keywords = {DFE annotation distribution_of_fitness_effects machine_learning methods},
month = oct,
publisher = {Cold Spring Harbor Laboratory},
timestamp = {2019-05-15T00:14:07.000+0200},
title = {Estimation of allele-specific fitness effects across human protein-coding sequences and implications for disease},
url = {https://doi.org/10.1101%2F441337},
year = 2018
}