@inproceedings{tempelmeier2018inferring, abstract = {Embedded markup of Web pages has seen widespread adoption throughout the past years driven by standards such as RDFa and Microdata and initiatives such as schema.org, where recent studies show an adoption by 39% of all Web pages already in 2016. While this constitutes an important information source for tasks such as Web search, Web page classification or knowledge graph augmentation, individual markup nodes are usually sparsely described and often lack essential information. For instance, from 26 million nodes describing events within the Common Crawl in 2016, 59% of nodes provide less than six statements and only 257,000 nodes (0.96%) are typed with more specific event subtypes. Nevertheless, given the scale and diversity of Web markup data, nodes that provide missing information can be obtained from the Web in large quantities, in particular for categorical properties. Such data constitutes potential training data for inferring missing information to significantly augment sparsely described nodes. In this work, we introduce a supervised approach for inferring missing categorical properties in Web markup. Our experiments, conducted on properties of events and movies, show a performance of 79% and 83% F1 score correspondingly, significantly outperforming existing baselines.}, added-at = {2020-01-22T12:05:14.000+0100}, author = {Tempelmeier, Nicolas and Demidova, Elena and Dietze, Stefan}, biburl = {https://www.bibsonomy.org/bibtex/273b7a03a0231fa37ae2b618084a5a103/dietze}, booktitle = {Proceedings of The Web Conference 2018 (WWW 2018)}, doi = {10.1145/3178876.3186028}, interhash = {6646fbe66f89849bed4802650e9321c8}, intrahash = {73b7a03a0231fa37ae2b618084a5a103}, keywords = {l3s myown}, publisher = {ACM}, timestamp = {2020-01-23T10:04:06.000+0100}, title = {Inferring Missing Categorical Information in Noisy and Sparse Web Markup}, url = {https://arxiv.org/pdf/1803.00446.pdf}, year = 2018 } @inproceedings{Tempelmeier2018, added-at = {2019-01-22T15:47:53.000+0100}, author = {Tempelmeier, Nicolas and Demidova, Elena and Dietze, Stefan}, biburl = {https://www.bibsonomy.org/bibtex/2a9bea7f56e6feb01de7b2abcac28d04e/dietze}, booktitle = {WWW '18 Proceedings of the 2018 World Wide Web Conference}, doi = {http://dx.doi.org/10.1145/3178876.3186028}, interhash = {6646fbe66f89849bed4802650e9321c8}, intrahash = {a9bea7f56e6feb01de7b2abcac28d04e}, isbn = {978-1-4503-5639-8}, keywords = {NEW l3s myown}, pages = {1297-1306}, publisher = {ACM}, timestamp = {2020-01-22T11:52:22.000+0100}, title = {Inferring Missing Categorical Information in Noisy and Sparse Web Markup}, url = {https://arxiv.org/pdf/1803.00446.pdf}, year = 2018 } @inproceedings{conf/www/TempelmeierDD18, added-at = {2018-11-21T00:00:00.000+0100}, author = {Tempelmeier, Nicolas and Demidova, Elena and Dietze, Stefan}, biburl = {https://www.bibsonomy.org/bibtex/238cb6d93d3d7ae53dc7f1dce3bd488fc/dblp}, booktitle = {WWW}, crossref = {conf/www/2018}, editor = {Champin, Pierre-Antoine and Gandon, Fabien and Lalmas, Mounia and Ipeirotis, Panagiotis G.}, ee = {https://doi.org/10.1145/3178876.3186028}, interhash = {6646fbe66f89849bed4802650e9321c8}, intrahash = {38cb6d93d3d7ae53dc7f1dce3bd488fc}, keywords = {dblp}, pages = {1297-1306}, publisher = {ACM}, timestamp = {2024-04-09T12:54:19.000+0200}, title = {Inferring Missing Categorical Information in Noisy and Sparse Web Markup.}, url = {http://dblp.uni-trier.de/db/conf/www/www2018.html#TempelmeierDD18}, year = 2018 } @article{journals/corr/abs-1803-00446, added-at = {2018-08-13T00:00:00.000+0200}, author = {Tempelmeier, Nicolas and Demidova, Elena and Dietze, Stefan}, biburl = {https://www.bibsonomy.org/bibtex/2aaf9c8d7cd5137c9c02ca46ec82ea019/dblp}, ee = {http://arxiv.org/abs/1803.00446}, interhash = {6646fbe66f89849bed4802650e9321c8}, intrahash = {aaf9c8d7cd5137c9c02ca46ec82ea019}, journal = {CoRR}, keywords = {dblp}, timestamp = {2018-08-14T11:54:04.000+0200}, title = {Inferring Missing Categorical Information in Noisy and Sparse Web Markup.}, url = {http://dblp.uni-trier.de/db/journals/corr/corr1803.html#abs-1803-00446}, volume = {abs/1803.00446}, year = 2018 } @inproceedings{tempelmeier2018inferring, abstract = {Embedded markup of Web pages has seen widespread adoption throughout the past years driven by standards such as RDFa and Microdata and initiatives such as schema.org, where recent studies show an adoption by 39% of all Web pages already in 2016. While this constitutes an important information source for tasks such as Web search, Web page classification or knowledge graph augmentation, individual markup nodes are usually sparsely described and often lack essential information. For instance, from 26 million nodes describing events within the Common Crawl in 2016, 59% of nodes provide less than six statements and only 257,000 nodes (0.96%) are typed with more specific event subtypes. Nevertheless, given the scale and diversity of Web markup data, nodes that provide missing information can be obtained from the Web in large quantities, in particular for categorical properties. Such data constitutes potential training data for inferring missing information to significantly augment sparsely described nodes. In this work, we introduce a supervised approach for inferring missing categorical properties in Web markup. Our experiments, conducted on properties of events and movies, show a performance of 79% and 83% F1 score correspondingly, significantly outperforming existing baselines.}, added-at = {2018-04-10T16:06:23.000+0200}, author = {Tempelmeier, Nicolas and Demidova, Elena and Dietze, Stefan}, biburl = {https://www.bibsonomy.org/bibtex/273b7a03a0231fa37ae2b618084a5a103/ntempelmeier}, booktitle = {Proceedings of The Web Conference 2018 (WWW 2018)}, doi = {10.1145/3178876.3186028}, interhash = {6646fbe66f89849bed4802650e9321c8}, intrahash = {73b7a03a0231fa37ae2b618084a5a103}, keywords = {myown}, publisher = {ACM}, timestamp = {2019-01-29T14:37:07.000+0100}, title = {Inferring Missing Categorical Information in Noisy and Sparse Web Markup}, url = {https://arxiv.org/pdf/1803.00446.pdf}, year = 2018 } @inproceedings{tempelmeier2018inferring, abstract = {Embedded markup of Web pages has seen widespread adoption throughout the past years driven by standards such as RDFa and Microdata and initiatives such as schema.org, where recent studies show an adoption by 39% of all Web pages already in 2016. While this constitutes an important information source for tasks such as Web search, Web page classification or knowledge graph augmentation, individual markup nodes are usually sparsely described and often lack essential information. For instance, from 26 million nodes describing events within the Common Crawl in 2016, 59% of nodes provide less than six statements and only 257,000 nodes (0.96%) are typed with more specific event subtypes. Nevertheless, given the scale and diversity of Web markup data, nodes that provide missing information can be obtained from the Web in large quantities, in particular for categorical properties. Such data constitutes potential training data for inferring missing information to significantly augment sparsely described nodes. In this work, we introduce a supervised approach for inferring missing categorical properties in Web markup. Our experiments, conducted on properties of events and movies, show a performance of 79% and 83% F1 score correspondingly, significantly outperforming existing baselines.}, added-at = {2017-12-22T11:56:26.000+0100}, author = {Tempelmeier, Nicolas and Demidova, Elena and Dietze, Stefan}, biburl = {https://www.bibsonomy.org/bibtex/273b7a03a0231fa37ae2b618084a5a103/demidova}, booktitle = {Proceedings of The Web Conference 2018 (WWW 2018)}, doi = {10.1145/3178876.3186028}, interhash = {6646fbe66f89849bed4802650e9321c8}, intrahash = {73b7a03a0231fa37ae2b618084a5a103}, keywords = {data4urbanmobility myown tempelmeier}, publisher = {ACM}, timestamp = {2018-03-02T18:41:46.000+0100}, title = {Inferring Missing Categorical Information in Noisy and Sparse Web Markup}, url = {https://arxiv.org/pdf/1803.00446.pdf}, year = 2018 }