Discovering entity mentions that are out of a Knowledge Base (KB) from texts
plays a critical role in KB maintenance, but has not yet been fully explored.
The current methods are mostly limited to the simple threshold-based approach
and feature-based classification; the datasets for evaluation are relatively
rare. In this work, we propose BLINKout, a new BERT-based Entity Linking (EL)
method which can identify mentions that do not have a corresponding KB entity
by matching them to a special NIL entity. To this end, we integrate novel
techniques including NIL representation, NIL classification, and synonym
enhancement. We also propose Ontology Pruning and Versioning strategies to
construct out-of-KB mentions from normal, in-KB EL datasets. Results on four
datasets of clinical notes and publications show that BLINKout outperforms
existing methods to detect out-of-KB mentions for medical ontologies UMLS and
SNOMED CT.
Beschreibung
[2302.07189] Reveal the Unknown: Out-of-Knowledge-Base Mention Discovery with Entity Linking
%0 Generic
%1 dong2023reveal
%A Dong, Hang
%A Chen, Jiaoyan
%A He, Yuan
%A Liu, Yinan
%A Horrocks, Ian
%D 2023
%K BERT-based BLINK KB-enrichment NIL-entities biomedical_texts clinical_nlp clinical_notes entity_linking knowledge-base-enrichment medmentions myown ontologies out-of-KB unknown_entities
%T Reveal the Unknown: Out-of-Knowledge-Base Mention Discovery with Entity
Linking
%U http://arxiv.org/abs/2302.07189
%X Discovering entity mentions that are out of a Knowledge Base (KB) from texts
plays a critical role in KB maintenance, but has not yet been fully explored.
The current methods are mostly limited to the simple threshold-based approach
and feature-based classification; the datasets for evaluation are relatively
rare. In this work, we propose BLINKout, a new BERT-based Entity Linking (EL)
method which can identify mentions that do not have a corresponding KB entity
by matching them to a special NIL entity. To this end, we integrate novel
techniques including NIL representation, NIL classification, and synonym
enhancement. We also propose Ontology Pruning and Versioning strategies to
construct out-of-KB mentions from normal, in-KB EL datasets. Results on four
datasets of clinical notes and publications show that BLINKout outperforms
existing methods to detect out-of-KB mentions for medical ontologies UMLS and
SNOMED CT.
@misc{dong2023reveal,
abstract = {Discovering entity mentions that are out of a Knowledge Base (KB) from texts
plays a critical role in KB maintenance, but has not yet been fully explored.
The current methods are mostly limited to the simple threshold-based approach
and feature-based classification; the datasets for evaluation are relatively
rare. In this work, we propose BLINKout, a new BERT-based Entity Linking (EL)
method which can identify mentions that do not have a corresponding KB entity
by matching them to a special NIL entity. To this end, we integrate novel
techniques including NIL representation, NIL classification, and synonym
enhancement. We also propose Ontology Pruning and Versioning strategies to
construct out-of-KB mentions from normal, in-KB EL datasets. Results on four
datasets of clinical notes and publications show that BLINKout outperforms
existing methods to detect out-of-KB mentions for medical ontologies UMLS and
SNOMED CT.},
added-at = {2023-02-15T09:57:31.000+0100},
author = {Dong, Hang and Chen, Jiaoyan and He, Yuan and Liu, Yinan and Horrocks, Ian},
biburl = {https://www.bibsonomy.org/bibtex/28d3d22e7789e0920f1a07da7a1f6bb75/hangdong},
description = {[2302.07189] Reveal the Unknown: Out-of-Knowledge-Base Mention Discovery with Entity Linking},
interhash = {2d0fe7c2a0caa8bce2f4ff4e2f498b50},
intrahash = {8d3d22e7789e0920f1a07da7a1f6bb75},
keywords = {BERT-based BLINK KB-enrichment NIL-entities biomedical_texts clinical_nlp clinical_notes entity_linking knowledge-base-enrichment medmentions myown ontologies out-of-KB unknown_entities},
note = {cite arxiv:2302.07189},
timestamp = {2023-02-15T09:57:31.000+0100},
title = {Reveal the Unknown: Out-of-Knowledge-Base Mention Discovery with Entity
Linking},
url = {http://arxiv.org/abs/2302.07189},
year = 2023
}