@COMMENT{{Automatically generated - DO NOT MODIFY!}}

  AUTHOR = {Tim Leathart and Eibe Frank and Geoffrey Holmes and Bernhard Pfahringer},
  TITLE = {Probability Calibration Trees},
  BOOKTITLE = {Proc 9th Asian Conference on Machine Learning},
  PAGES = {145--160},
  YEAR = 2017,
  SERIES = {Seoul, Korea},
  PUBLISHER = {Proceedings of Machine Learning Research},
  PDF = {http://proceedings.mlr.press/v77/leathart17a/leathart17a.pdf},
  ABSTRACT = {Obtaining accurate and well calibrated probability estimates from classiers is useful in
  many applications, for example, when minimising the expected cost of classications. Existing
  methods of calibrating probability estimates are applied globally, ignoring the potential
  for improvements by applying a more ne-grained model. We propose probability
  calibration trees, a modication of logistic model trees that identies regions of the input
  space in which dierent probability calibration models are learned to improve performance.
  We compare probability calibration trees to two widely used calibration methods|isotonic
  regression and Platt scaling|and show that our method results in lower root mean squared
  error on average than both methods, for estimates produced by a variety of base learners.}

  AUTHOR = {Paula Branco and Luis Torgo and Rita P. Ribeiro and Eibe Frank and Bernhard Pfahringer and Markus Michael Rau},
  TITLE = {Learning Through Utility Optimization in Regression Tasks},
  BOOKTITLE = {Proc 4th IEEE International Conference on Data Science and Advanced Analytics},
  YEAR = 2017,
  SERIES = {Tokyo Japan},
  PDF = {https://www.cs.waikato.ac.nz/ml/publications/2017/Utility_Optimization_DSAA17Copyright.pdf},
  ABSTRACT = {Accounting for misclassification costs is important
  in many practical applications of machine learning, and costsensitive
  techniques for classification have been studied extensively.
  Utility-based learning provides a generalization of purely
  cost-based approaches that considers both costs and benefits, enabling
  application to domains with complex cost-benefit settings.
  However, there is little work on utility- or cost-based learning
  for regression. In this paper, we formally define the problem of
  utility-based regression and propose a strategy for maximizing
  the utility of regression models. We verify our findings in a large
  set of experiments that show the advantage of our proposal in
  a diverse set of domains, learning algorithms and cost/benefit

  AUTHOR = {Michael Geilke and Andreas Karwath and Eibe Frank and Stefan Kramer},
  TITLE = {Online estimation of discrete, continuous, and conditional joint densities using classifier chains},
  JOURNAL = {Data Mining and Knowledge Discovery},
  YEAR = 2017,
  HTTP = {http://em.rdcu.be/wf/click?upn=KP7O1RED-2BlD0F9LDqGVeSJfjQ-2FNjUWKqFf9ptHCS3ic-3D_hbNl33h4MUjROltBqZcQP2xrOyNoYWgVukrnjKiZL-2FlLkymfHB4nYN-2F2VGZmg3s13iyg-2BnJ9PaMGJ1b7DIqF2yAy5H8yaQggp7iTSq0iXph2ULpWUIcZJkBi8DyQ8e3dDmcqbjZLqTiY4G32oQsbs9N4mrhUCdNp3Qah9aM3Y-2FgcVqnw4-2B0iwP-2FFjEiqa0FRzGaUI65rAPCOVuxT56gyiBaYzpzkMS1RicigZB7VyeANsgR1YFya-2Fs61vhM3NNdUBlmaLlZc4R4qhFhvLYenTg-3D-3D},
  ABSTRACT = {We address the problem of estimating discrete, continuous, and conditional
  joint densities online, i.e., the algorithm is only provided the current example and
  its current estimate for its update. The family of proposed online density estimators,
  estimation of densities online (EDO), uses classifier chains to model dependencies
  among features, where each classifier in the chain estimates the probability of one
  particular feature. Because a single chain may not provide a reliable estimate, we also
  consider ensembles of classifier chains and ensembles of weighted classifier chains.
  For all density estimators, we provide consistency proofs and propose algorithms to
  perform certain inference tasks. The empirical evaluation of the estimators is conducted
  in several experiments and on datasets of up to several millions of instances. In the
  discrete case, we compare our estimators to density estimates computed by Bayesian
  structure learners. In the continuous case, we compare them to a state-of-the-art online}

  AUTHOR = {Iakovos Gurulian and Konstantinos Markantonakis and Carlton Shepherd and Eibe Frank and Raja Naeem Akram},
  TITLE = {Proximity Assurances Based on Natural and Artificial Ambient Environments},
  BOOKTITLE = {Proc 10th International Conference on Information Technology and Communications Security},
  PAGES = {83--103},
  YEAR = 2017,
  SERIES = {Bucharest, Romania},
  PUBLISHER = {Springer},
  PDF = {https://www.cs.waikato.ac.nz/ml/publications/2017/proximity_assurances.pdf},
  ABSTRACT = {Relay attacks are passive man-in-the-middle attacks that
  aim to extend the physical distance of devices involved in a transaction
  beyond their operating environment. In the eld of smart cards, distance bounding protocols have been proposed in order to counter relay
  attacks. For smartphones, meanwhile, the natural ambient environment
  surrounding the devices has been proposed as a potential Proximity
  and Relay-Attack Detection (PRAD) mechanism. These proposals, however, are not compliant with industry-imposed constraints that stipulate
  maximum transaction completion times, e.g. 500 milliseconds for EMV
  contactless transactions. We evaluated the eectiveness of 17 ambient
  sensors that are widely-available in modern smartphones as a PRAD
  method for time-restricted contactless transactions. In our work, both
  similarity- and machine learning-based analyses demonstrated limited
  eectiveness of natural ambient sensing as a PRAD mechanism under the
  operating requirements for proximity and transaction duration specied
  by EMV and ITSO. To address this, we propose the generation of an
  Articial Ambient Environment (AAE) as a robust alternative for an
  eective PRAD. The use of infrared light as a potential PRAD mechanism
  is evaluated, and our results indicate a high success rate while remaining
  compliant with industry requirements.

  AUTHOR = {Rory Mitchell and Eibe Frank},
  TITLE = {Accelerating the {XGBoost} algorithm using {GPU} computing},
  JOURNAL = {PeerJ Computer Science},
  YEAR = 2017,
  VOLUME = 3,
  NUMBER = {e127},
  HTTP = {http://doi.org/10.7717/peerj-cs.127},
  ABSTRACT = {We present a CUDA-based implementation of a decision tree construction algorithm within the gradient boosting library XGBoost. The tree construction algorithm is executed entirely on the graphics processing unit (GPU) and shows high performance with a variety of datasets and settings, including sparse input matrices. Individual boosting iterations are parallelised, combining two approaches. An interleaved approach is used for shallow trees, switching to a more conventional radix sort-based approach for larger depths. We show speedups of between 3× and 6× using a Titan X compared to a 4 core i7 CPU, and 1.2× using a Titan X compared to 2× Xeon CPUs (24 cores). We show that it is possible to process the Higgs dataset (10 million instances, 28 features) entirely within GPU memory. The algorithm is made available as a plug-in within the XGBoost library and fully supports all XGBoost features including classification, regression and ranking tasks.}

  AUTHOR = {Jeff Mo and Eibe Frank and Varvara Vetrova},
  TITLE = {Large-scale automatic species identification},
  PDF = {https://www.cs.waikato.ac.nz/ml/publications/2017/large-scale-automatic.pdf},
  BOOKTITLE = {Proc 30th Australasian Joint Conference on Artificial Intelligence},
  YEAR = 2017,
  PUBLISHER = {Springer},
  ABSTRACT = {The crowd-sourced Naturewatch GBIF dataset is used to
                  obtain a species classication dataset containing
                  approximately 1.2 million photos of nearly 20
                  thousand different species of biological organisms
                  observed in their natural habitat. We present a
                  general hierarchical species identication system
                  based on deep convolutional neural networks trained
                  on the NatureWatch dataset. The dataset contains
                  images taken under a wide variety of conditions and
                  is heavily imbalanced, with most species associated
                  with only few images. We apply multi-view
                  classification as a way to lend more influence to
                  high frequency details, hierarchical fine-tuning to
                  help with class imbalance and provide
                  regularisation, and automatic specicity control for
                  optimising classication depth. Our system achieves
                  55.8\% accuracy when identifying individual species
                  and around 90\% accuracy at an average taxonomy
                  depth of 5.1 --- equivalent to the taxonomic rank of
                  "family" --- when applying automatic specicity control.

  AUTHOR = {Iakovos Gurulian and Carlton Shepherd and Eibe Frank and Konstantinos Markantonakis and Raja Naeem Akram and Keith Mayes},
  TITLE = {On the Effectiveness of Ambient Sensing for Detecting {NFC} Relay Attacks},
  BOOKTITLE = {Proc 6th IEEE International Conference on Trust, Security and Privacy in Computing and Communications},
  YEAR = 2017,
  SERIES = {Sydney, Australia},
  PDF = {https://www.cs.waikato.ac.nz/ml/publications/2017/PID4857507.pdf},
  ABSTRACT = {Smartphones with Near-Field Communication
  (NFC) may emulate contactless smart cards, which has resulted
  in the deployment of various access control, transportation and
  payment services, such as Google Pay and Apple Pay. Like
  contactless cards, however, NFC-based smartphone transactions
  are susceptible to relay attacks, and ambient sensing has
  been suggested as a potential countermeasure. In this study,
  we empirically evaluate the suitability of ambient sensors
  as a proximity detection mechanism for smartphone-based
  transactions under EMV constraints. We underpin our study
  using sensing data collected from 17 sensors from an emulated
  relay attack test-bed to assess whether they can thwart such
  attacks effectively. Each sensor, where feasible, was used to
  record 350-400 legitimate and relay (illegitimate) contactless
  transactions at two different physical locations. Our analysis
  provides an empirical foundation upon which to determine the
  efficacy of ambient sensing for providing a strong anti-relay
  mechanism in security-sensitive applications. We demonstrate
  that no single, evaluated mobile ambient sensor is suitable for
  such critical applications under realistic deployment constraints.}

  AUTHOR = {Carlton Shepherd and Iakovos Gurulian and Konstantinos Markantonakis and Eibe Frank and Raja Naeem Akram and Emmanouil Panaousis and Keith Mayes },
  TITLE = {The Applicability of Ambient Sensors as Proximity Evidence for {NFC} Transactions},
  BOOKTITLE = {Proc 6th Workshop on Mobile Security Technologies (MoST)},
  YEAR = 2017,
  SERIES = {San Jose, United States},
  PUBLISHER = {IEEE Computer Society's Technical Committee on Security and Privacy},
  HTTP = {http://www.ieee-security.org/TC/SPW2017/MoST/proceedings/Shepherd_MoST17.pdf},
  ABSTRACT = {Near Field Communication (NFC) has enabled mobile
  phones to emulate contactless smart cards. Similar to
  contactless smart cards, they are also susceptible to relay
  attacks. To counter these, a number of methods have been
  proposed that rely primarily on ambient sensors as a proximity
  detection mechanism (also known as an anti-relay mechanism).
  In this paper, we empirically evaluate a comprehensive set of
  ambient sensors for their effectiveness as a proximity detection
  mechanism for NFC contactless-based applications like banking,
  transport and high-security access controls. We selected
  17 sensors available via the Google Android platform. Each
  sensor, where feasible, was used to record the measurements
  of 1,000 contactless transactions at four different physical
  locations. A total of 252 users, a random sample from the
  university student population, were involved during the field
  trials. After careful analysis, we conclude that no single evaluated
  mobile ambient sensor is suitable for proximity detection
  in NFC-based contactless applications in realistic deployment
  scenarios. Lastly, we identify a number of potential avenues
  that may improve their effectiveness.}

  TITLE = {{WASSA-2017} Shared Task on Emotion Intensity},
  AUTHOR = {Mohammad, Saif M. and Bravo-Marquez, Felipe},
  BOOKTITLE = {Proceedings of the Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis (WASSA)},
  ADDRESS = {Copenhagen, Denmark},
  YEAR = {2017},
  URL = {http://aclanthology.info/papers/W17-5205/w17-5205},
  ABSTRACT = {We present the first shared task on detecting
    the intensity of emotion felt by
    the speaker of a tweet. We create
    the first datasets of tweets annotated for
    anger, fear, joy, and sadness intensities
    using a technique called best–worst scaling
    (BWS). We show that the annotations
    lead to reliable fine-grained intensity
    scores (rankings of tweets by intensity).
    The data was partitioned into training, development,
    and test sets for the competition.
    Twenty-two teams participated in
    the shared task, with the best system obtaining
    a Pearson correlation of 0.747 with
    the gold intensity scores. We summarize
    the machine learning setups, resources,
    and tools used by the participating teams,
    with a focus on the techniques and resources
    that are particularly useful for the
    task. The emotion intensity dataset and the
    shared task are helping improve our understanding
    of how we convey more or less
    intense emotions through language.}

  TITLE = {Emotion Intensities in Tweets},
  AUTHOR = {Mohammad, Saif M. and Bravo-Marquez, Felipe},
  BOOKTITLE = {Proceedings of the sixth joint conference on lexical and computational semantics (*Sem)},
  ADDRESS = {Vancouver, Canada},
  YEAR = {2017},
  URL = {http://www.aclweb.org/anthology/S17-1007},
  ABSTRACT = {This paper examines the task of detecting
    intensity of emotion from text. We create
    the first datasets of tweets annotated
    for anger, fear, joy, and sadness intensities.
    We use a technique called best–worst scaling
    (BWS) that improves annotation consistency
    and obtains reliable fine-grained
    scores. We show that emotion-word hashtags
    often impact emotion intensity, usually
    conveying a more intense emotion. Finally,
    we create a benchmark regression
    system and conduct experiments to determine:
    which features are useful for detecting
    emotion intensity; and, the extent to
    which two emotions are similar in terms
    of how they manifest in language.}

  AUTHOR = {Albert Bifet and
               Jiajin Zhang and
               Wei Fan and
               Cheng He and
               Jianfeng Zhang and
               Jianfeng Qian and
               Geoff Holmes and
               Bernhard Pfahringer},
  TITLE = {Extremely Fast Decision Tree Mining for Evolving Data Streams},
  BOOKTITLE = {Proceedings of the 23rd {ACM} {SIGKDD} International Conference on
               Knowledge Discovery and Data Mining, Halifax, NS, Canada, August 13
               - 17, 2017},
  PAGES = {1733--1742},
  YEAR = {2017},
  URL = {http://doi.acm.org/10.1145/3097983.3098139},
  DOI = {10.1145/3097983.3098139},
Nowadays real-time industrial applications are generating a huge amount of data continuously every day. To process these large data streams, we need fast and efficient methodologies and systems. A useful feature desired for data scientists and analysts is to have easy to visualize and understand machine learning models. Decision trees are preferred in many real-time applications for this reason, and also, because combined in an ensemble, they are one of the most powerful methods in machine learning.
In this paper, we present a new system called STREAMDM-C++, that implements decision trees for data streams in C++, and that has been used extensively at Huawei. Streaming decision trees adapt to changes on streams, a huge advantage since standard decision trees are built using a snapshot of data, and can not evolve over time. STREAMDM-C++ is easy to extend, and contains more powerful ensemble methods, and a more efficient and easy to use adaptive decision trees. We compare our new implementation with VFML, the current state of the art implementation in C, and show how our new system outperforms VFML in speed using less resources.

  AUTHOR = {Jean Paul Barddal and
               Heitor Murilo Gomes and
               Fabr{\'{\i}}cio Enembreck and
               Bernhard Pfahringer},
  TITLE = {A survey on feature drift adaptation: Definition, benchmark, challenges
               and future directions},
  JOURNAL = {Journal of Systems and Software},
  VOLUME = {127},
  PAGES = {278--294},
  YEAR = {2017},
  URL = {http://doi.org/10.1016/j.jss.2016.07.005},
  DOI = {10.1016/j.jss.2016.07.005},
  ABSTRACT = {Data stream mining is a fast growing research topic due to the ubiquity of data in several real-world problems. Given their ephemeral nature, data stream sources are expected to undergo changes in data distribution, a phenomenon called concept drift. This paper focuses on one specific type of drift that has not yet been thoroughly studied, namely feature drift. Feature drift occurs whenever a subset of features becomes, or ceases to be, relevant to the learning task; thus, learners must detect and adapt to these changes accordingly. We survey existing work on feature drift adaptation with both explicit and implicit approaches. Additionally, we benchmark several algorithms and a naive feature drift detection approach using synthetic and real-world datasets. The results from our experiments indicate the need for future research in this area as even naive approaches produced gains in accuracy while reducing resources usage. Finally, we state current research topics, challenges and future directions for feature drift adaptation.}

  AUTHOR = {Robert J. Durrant and
               Kee{-}Eung Kim and
               Geoffrey Holmes and
               Stephen Marsland and
               Masashi Sugiyama and
               Zhi{-}Hua Zhou},
  TITLE = {Foreword: special issue for the journal track of the 8th Asian conference on machine learning {(ACML} 2016)},
  JOURNAL = {Machine Learning},
  VOLUME = {106},
  NUMBER = {5},
  PAGES = {623--625},
  YEAR = {2017},
  URL = {http://doi.org/10.1007/s10994-017-5637-5},
  DOI = {10.1007/s10994-017-5637-5}

  AUTHOR = {Michael Mayo and
               Maisa Daoud},
  TITLE = {Aesthetic Local Search of Wind Farm Layouts},
  JOURNAL = {Information},
  VOLUME = {8},
  NUMBER = {2},
  PAGES = {39},
  YEAR = {2017},
  URL = {http://doi.org/10.3390/info8020039},
  ABSTRACT = {The visual impact of wind farm layouts has seen little consideration in the literature on the wind farm layout optimisation problem to date. Most existing algorithms focus on optimising layouts for power or the cost of energy alone. In this paper, we consider the geometry of wind farm layouts and whether it is possible to bi-optimise a layout for both energy efficiency and the degree of visual impact that the layout exhibits. We develop a novel optimisation approach for solving the problem which measures mathematically the degree of visual impact of a layout. The approach draws inspiration from the field of architecture. To evaluate our ideas, we demonstrate them on three benchmark problems for the wind farm layout optimisation problem in conjunction with two recently-published stochastic local search algorithms. Optimal patterned layouts are shown to be very close in terms of energy efficiency to optimal non-patterned layouts. }

  AUTHOR = {Michael Mayo and
               Sean Goltz},
  TITLE = {Constructing Document Vectors Using Kernel Density Estimates},
  BOOKTITLE = {Modeling Decisions for Artificial Intelligence - 14th International
               Conference, {MDAI} 2017, Kitakyushu, Japan, October 18-20, 2017, Proceedings},
  PAGES = {183--194},
  YEAR = {2017},
  URL = {http://doi.org/10.1007/978-3-319-67422-3_16},
  DOI = {10.1007/978-3-319-67422-3_16},
  ABSTRACT = {Document vector embeddings are numeric fixed length representations of text documents that can be used for machine learning and text mining purposes. We describe in this paper a new technique for generating document vectors. Our novel idea builds on the recently popular notion of neural word vector embeddings and combines this concept with the statistics of kernel density estimation. We show that robust document vectors can be produced using our new algorithm, and perform an experiment involving several challenging text classification datasets to demonstrate its effectiveness.}

  AUTHOR = {Brett Wilson and
               Sarah Wakes and
               Michael Mayo},
  TITLE = {Surrogate modeling a computational fluid dynamics-based wind turbine
               wake simulation using machine learning},
  BOOKTITLE = {2017 {IEEE} Symposium Series on Computational Intelligence, {SSCI}
               2017, Honolulu, HI, USA, November 27 - Dec. 1, 2017},
  PAGES = {1--8},
  YEAR = {2017},
  URL = {https://www.researchcommons.waikato.ac.nz/handle/10289/11785},
  ABSTRACT = {The wind farm layout optimisation problem involves finding the optimal locations for wind turbines on a wind farm site in order to minimise the so-called “wake effect”. The wake effect is the effect of turbulence on wind velocity produced by a turbine's rotating blades. This results in reduction in power production and increased fatigue in downstream turbines inside the wake. This paper uses wind velocity data produced from expensive Computational Fluid Dynamics (CFD) simulations of a rotating wind turbine at various incoming wind speeds to generate ground truth wake data, and explores the ability of machine learning algorithms to create surrogate models for predicting the reduced-velocity wind speeds inside a wake. In an extensive evaluation, we show that (i) given data from a CFD simulation, we can construct a model to interpolate wind velocity inside the wake at any arbitrary 3D point with high levels of accuracy; and (ii) given data from several CFD simulations (the training data) we can also accurately predict wind velocities in the wake of CFD simulations that we have not yet run (i.e. we can extrapolate to simulations where the incoming wind speeds are different to those in the training data). The net effect of these findings are that they pave the way towards the construction of novel and improved wake models for wind turbines, which in turn can be incorporated into existing algorithms for solving wind farm layout optimisation problems more accurately.}