As massive data acquisition and storage becomes increasingly affordable, a wide variety of enterprises are employing statisticians to engage in sophisticated data analysis. In this paper we highlight the emerging practice of Magnetic, Agile, Deep (MAD) data analysis as a radical departure from traditional Enterprise Data Warehouses and Business Intelligence. We present our design philosophy, techniques and experience providing MAD analytics for one of the world's largest advertising networks at Fox Audience Network, using the Greenplum parallel database system. We describe database design methodologies that support the agile working style of analysts in these settings. We present dataparallel algorithms for sophisticated statistical techniques, with a focus on <i>density</i> methods. Finally, we reflect on database system features that enable agile design and flexible algorithm development using both SQL and MapReduce interfaces over a variety of storage mechanisms.
%0 Journal Article
%1 cohen09madskills
%A Cohen, Jeffrey
%A Dolan, Brian
%A Dunlap, Mark
%A Hellerstein, Joseph M.
%A Welton, Caleb
%D 2009
%I VLDB Endowment
%J Proceedings VLDB Endowment
%K bigdata data-science
%N 2
%P 1481--1492
%T MAD skills: new analysis practices for big data
%U http://dl.acm.org/citation.cfm?id=1687553.1687576
%V 2
%X As massive data acquisition and storage becomes increasingly affordable, a wide variety of enterprises are employing statisticians to engage in sophisticated data analysis. In this paper we highlight the emerging practice of Magnetic, Agile, Deep (MAD) data analysis as a radical departure from traditional Enterprise Data Warehouses and Business Intelligence. We present our design philosophy, techniques and experience providing MAD analytics for one of the world's largest advertising networks at Fox Audience Network, using the Greenplum parallel database system. We describe database design methodologies that support the agile working style of analysts in these settings. We present dataparallel algorithms for sophisticated statistical techniques, with a focus on <i>density</i> methods. Finally, we reflect on database system features that enable agile design and flexible algorithm development using both SQL and MapReduce interfaces over a variety of storage mechanisms.
@article{cohen09madskills,
abstract = {As massive data acquisition and storage becomes increasingly affordable, a wide variety of enterprises are employing statisticians to engage in sophisticated data analysis. In this paper we highlight the emerging practice of Magnetic, Agile, Deep (MAD) data analysis as a radical departure from traditional Enterprise Data Warehouses and Business Intelligence. We present our design philosophy, techniques and experience providing MAD analytics for one of the world's largest advertising networks at Fox Audience Network, using the Greenplum parallel database system. We describe database design methodologies that support the agile working style of analysts in these settings. We present dataparallel algorithms for sophisticated statistical techniques, with a focus on <i>density</i> methods. Finally, we reflect on database system features that enable agile design and flexible algorithm development using both SQL and MapReduce interfaces over a variety of storage mechanisms.},
acmid = {1687576},
added-at = {2013-06-10T07:59:46.000+0200},
author = {Cohen, Jeffrey and Dolan, Brian and Dunlap, Mark and Hellerstein, Joseph M. and Welton, Caleb},
biburl = {https://www.bibsonomy.org/bibtex/29412927e250f3baca2846d5e665c428e/sb3000},
description = {MAD skills},
interhash = {5cc79a10ab83ff7a0b1b6a175ff56cd2},
intrahash = {9412927e250f3baca2846d5e665c428e},
issn = {2150-8097},
issue_date = {August 2009},
journal = {Proceedings VLDB Endowment},
keywords = {bigdata data-science},
month = aug,
number = 2,
numpages = {12},
pages = {1481--1492},
publisher = {VLDB Endowment},
timestamp = {2013-06-10T07:59:46.000+0200},
title = {MAD skills: new analysis practices for big data},
url = {http://dl.acm.org/citation.cfm?id=1687553.1687576},
volume = 2,
year = 2009
}