@COMMENT{{Automatically generated - DO NOT MODIFY!}}

  AUTHOR = {Tony C. Smith and Eibe Frank},
  TITLE = {Statistical Genomics: Methods and Protocols},
  CHAPTER = {Introducing Machine Learning Concepts with WEKA},
  YEAR = 2016,
  PUBLISHER = {Springer},
  ADDRESS = {New York, NY},
  PAGES = {353--378},
  URL = {http://dx.doi.org/10.1007/978-1-4939-3578-9_17},
  ABSTRACT = {This chapter presents an introduction to data mining with machine learning. It gives an overview of various types of machine learning, along with some examples. It explains how to download, install, and run the WEKA data mining toolkit on a simple data set, then proceeds to explain how one might approach a bioinformatics problem. Finally, it includes a brief summary of machine learning algorithms for other types of data mining problems, and provides suggestions about where to find additional information.}

  AUTHOR = {Eibe Frank and Mark A. Hall and Ian H. Witten},
  TITLE = {The {WEKA} Workbench},
  HOWPUBLISHED = {Online Appendix for "Data Mining: Practical Machine Learning Tools and Techniques", Morgan Kaufmann, Fourth Edition},
  YEAR = 2016,
  HTTP = {http://www.cs.waikato.ac.nz/ml/weka/Witten_et_al_2016_appendix.pdf}

  AUTHOR = {Ian H. Witten and Eibe Frank and Mark A. Hall and Christopher J. Pal},
  TITLE = {Data Mining: Practical Machine Learning Tools and Techniques},
  PUBLISHER = {Morgan Kaufmann},
  YEAR = 2016,
  ADDRESS = {Burlington, MA},
  EDITION = 4,
  HTTP = {http://www.cs.waikato.ac.nz/~ml/weka/book.html}

  AUTHOR = {Christopher Beckham and Mark Hall and Eibe Frank},
  TITLE = {Weka{P}y{S}cript: Classification, Regression, and Filter Schemes for {WEKA} Implemented in {P}ython},
  JOURNAL = {Journal of Open Research Software},
  YEAR = 2016,
  VOLUME = 4,
  NUMBER = 1,
  PAGES = {e33},
  URL = {http://doi.org/10.5334/jors.108},
  ABSTRACT = {WekaPyScript is a package for the machine learning software WEKA that allows learning algorithms and preprocessing methods for classification and regression to be written in Python, as opposed to WEKA’s implementation language, Java. This opens up WEKA to its machine learning and scientific computing ecosystem. Furthermore, due to Python’s minimalist syntax, learning algorithms and preprocessing methods can be prototyped easily and utilised from within WEKA. WekaPyScript works by running a local Python server using the host’s installation of Python; as a result, any libraries installed in the host installation can be leveraged when writing a script for WekaPyScript. Three example scripts (two learning algorithms and one preprocessing method) are presented.}

  AUTHOR = {Felipe Bravo-Marquez and Eibe Frank and Bernhard Pfahringer},
  TITLE = {From opinion lexicons to sentiment classification of tweets and vice versa: a transfer learning approach},
  BOOKTITLE = {Proc 15th IEEE/WIC/ACM International Conference on Web Intelligence},
  YEAR = 2016,
  SERIES = {Omaha, Nebraska},
  PUBLISHER = {IEEE Computer Society},
  PDF = {http://www.cs.waikato.ac.nz/ml/publications/2016/transfer_wi.pdf},
  ABSTRACT = {Message-level and word-level polarity classification
  are two popular tasks in Twitter sentiment analysis. They have
  been commonly addressed by training supervised models from
  labelled data. The main limitation of these models is the high cost
  of data annotation. Transferring existing labels from a related
  problem domain is one possible solution for this problem. In
  this paper, we propose a simple model for transferring sentiment
  labels from words to tweets and vice versa by representing both
  tweets and words using feature vectors residing in the same
  feature space. Tweets are represented by standard NLP features
  such as unigrams and part-of-speech tags. Words are represented
  by averaging the vectors of the tweets in which they occur. We
  evaluate our approach in two transfer learning problems: 1)
  training a tweet-level polarity classifier from a polarity lexicon,
  and 2) inducing a polarity lexicon from a collection of polarityannotated
  tweets. Our results show that the proposed approach
  can successfully classify words and tweets after transfer.}

  AUTHOR = {Felipe Bravo-Marquez and Eibe Frank and Saif M. Mohammad and Bernhard Pfahringer},
  TITLE = {Determining Word-Emotion Associations from Tweets by Multi-Label Classification},
  BOOKTITLE = {Proc 15th IEEE/WIC/ACM International Conference on Web Intelligence},
  YEAR = 2016,
  SERIES = {Omaha, Nebraska},
  PUBLISHER = {IEEE Computer Society},
  PDF = {http://www.cs.waikato.ac.nz/ml/publications/2016/emo_lex_wi.pdf},
  ABSTRACT = {The automatic detection of emotions in Twitter posts
  is a challenging task due to the informal nature of the language
  used in this platform. In this paper, we propose a methodology
  for expanding the NRC word-emotion association lexicon for
  the language used in Twitter. We perform this expansion using
  multi-label classification of words and compare different word-level
  features extracted from unlabelled tweets such as unigrams,
  Brown clusters, POS tags, and word2vec embeddings. The results
  show that the expanded lexicon achieves major improvements
  over the original lexicon when classifying tweets into emotional
  categories. In contrast to previous work, our methodology does
  not depend on tweets annotated with emotional hashtags, thus
  enabling the identification of emotional words from any domain-specific
  collection using unlabelled tweets.}

  AUTHOR = {Tim Leathart and Bernhard Pfahringer and Eibe Frank},
  TITLE = {Building Ensembles of Adaptive Nested Dichotomies with Random-Pair Selection},
  BOOKTITLE = {Proc 20th European Conference on Principles and Practice of Knowledge Discovery in Databases and 27th European Conference on Machine Learning},
  YEAR = 2016,
  SERIES = {Riva del Garda, Italy},
  PUBLISHER = {Springer},
  PDF = {http://www.cs.waikato.ac.nz/ml/publications/2016/adaptive_nested_dichotomies.pdf},
  ABSTRACT = {A system of nested dichotomies is a method of decomposing
  a multi-class problem into a collection of binary problems. Such a sys-
  tem recursively applies binary splits to divide the set of classes into two
  subsets, and trains a binary classier for each split. Although ensembles
  of nested dichotomies with random structure have been shown to per-
  form well in practice, using a more sophisticated class subset selection
  method can be used to improve classication accuracy. We investigate an
  approach to this problem called random-pair selection, and evaluate its
  effectiveness compared to other published methods of subset selection.
  We show that our method outperforms other methods in many cases
  when forming ensembles of nested dichotomies, and is at least on par in
  all other cases.}

  AUTHOR = {Felipe Bravo-Marquez and Eibe Frank and Bernhard Pfahringer},
  TITLE = {Annotate-Sample-Average ({ASA}): A New Distant Supervision Approach for {T}witter Sentiment Analysis},
  BOOKTITLE = {Proc 22nd European Conference on Artificial Intelligence},
  YEAR = 2016,
  SERIES = {The Hague, Netherlands},
  PUBLISHER = {IOS Press},
  PDF = {http://www.cs.waikato.ac.nz/ml/publications/2016/asa_paper.pdf},
  ABSTRACT = {The classification of tweets into polarity classes is a popular
  task in sentiment analysis. State-of-the-art solutions to this problem
  are based on supervised machine learning models trained from
  manually annotated examples. A drawback of these approaches is the
  high cost involved in data annotation. Two freely available resources
  that can be exploited to solve the problem are: 1) large amounts of
  unlabelled tweets obtained from the Twitter API and 2) prior lexical
  knowledge in the form of opinion lexicons. In this paper, we propose
  Annotate-Sample-Average (ASA), a distant supervision method that
  uses these two resources to generate synthetic training data for Twitter
  polarity classification. Positive and negative training instances are
  generated by sampling and averaging unlabelled tweets containing
  words with the corresponding polarity. Polarity of words is determined
  from a given polarity lexicon. Our experimental results show
  that the training data generated by ASA (after tuning its parameters)
  produces a classifier that performs significantly better than a classifier
  trained from tweets annotated with emoticons and a classifier
  trained, without any sampling and averaging, from tweets annotated
  according to the polarity of their words.}

  AUTHOR = {Felipe Bravo{-}Marquez and
               Eibe Frank and
               Bernhard Pfahringer},
  TITLE = {Building a Twitter opinion lexicon from automatically-annotated tweets},
  JOURNAL = {Knowl.-Based Syst.},
  VOLUME = {108},
  PAGES = {65--78},
  YEAR = {2016},
  URL = {https://doi.org/10.1016/j.knosys.2016.05.018},
  DOI = {10.1016/j.knosys.2016.05.018},
  PDF = {https://www.cs.waikato.ac.nz/ml/publications/2016/kbs2016.pdf},
  ABSTRACT = {Opinion lexicons, which are lists of terms labeled by sentiment, are widely used resources to support automatic sentiment analysis of textual passages. However, existing resources of this type exhibit some limitations when applied to social media messages such as tweets (posts in Twitter), because they are unable to capture the diversity of informal expressions commonly found in this type of media.
In this article, we present a method that combines information from automatically annotated tweets and existing hand-made opinion lexicons to expand an opinion lexicon in a supervised fashion. The expanded lexicon contains part-of-speech (POS) disambiguated entries with a probability distribution for positive, negative, and neutral polarity classes, similarly to SentiWordNet.
To obtain this distribution using machine learning, we propose word-level attributes based on (a) the morphological information conveyed by POS tags and (b) associations between words and the sentiment expressed in the tweets that contain them. We consider tweets with both hard and soft sentiment labels. The sentiment associations are modeled in two different ways: using point-wise-mutual-information semantic orientation (PMI-SO), and using stochastic gradient descent semantic orientation (SGD-SO), which learns a linear relationship between words and sentiment. The training dataset is labeled by a seed lexicon formed by combining multiple hand-annotated lexicons.
Our experimental results show that our method outperforms the three-dimensional word-level polarity classification performance obtained by using PMI-SO alone. This is significant because PMI-SO is a state-of-the-art measure for establishing world-level sentiment. Additionally, we show that lexicons created with our method achieve significant improvements over SentiWordNet for classifying tweets into polarity classes, and also outperform SentiStrength in the majority of the experiments.}

  AUTHOR = {Henry Gouk and Bernhard Pfahringer and Michael Cree},
  TITLE = {Learning Distance Metrics for Multi-Label Classification},
  BOOKTITLE = {Proc 8th Asian Conference on Machine Learning},
  YEAR = 2016,
  SERIES = {Hamilton, New Zealand},
  PUBLISHER = {JMLR Workshop and Conference Proceedings},
  URL = {http://www.jmlr.org/proceedings/papers/v63/Gouk8.pdf},
  ABSTRACT = {Distance metric learning is a well studied problem in the eld of machine learning, where
  it is typically used to improve the accuracy of instance based learning techniques. In this
  paper we propose a distance metric learning algorithm that is specialised for multi-label
  classication tasks, rather than the multiclass setting considered by most work in this
  area. The method trains an embedder that can transform instances into a feature space
  where squared Euclidean distance provides an estimate of the Jaccard distance between
  the corresponding label vectors. In addition to a linear Mahalanobis style metric, we
  also present a nonlinear extension that provides a substantial boost in performance. We
  show that this technique signicantly improves upon current approaches for instance based
  multi-label classication, and also enables interesting data visualisations.}

  AUTHOR = {Michael Cree and John Perrone and Gehan Anthonys and Aden Garnett and Henry Gouk},
  TITLE = {Estimating heading direction from monocular video sequences using biologically-based sensors},
  BOOKTITLE = {Image and Vision Computing New Zealand (IVCNZ), International Conference on},
  YEAR = 2016,
  SERIES = {Palmerston North, New Zealand},
  URL = {https://www.cs.waikato.ac.nz/ml/publications/2016/cree2016.pdf},
  ABSTRACT = {The determination of one’s movement through the
  environment (visual odometry or self-motion estimation) from
  monocular sources such as video is an important research problem
  because of its relevance to robotics and autonomous vehicles.
  The traditional computer vision approach to this problem tracks
  visual features across frames in order to obtain 2-D image motion
  estimates from which the camera motion can be derived. We
  present an alternative scheme which uses the properties of motion
  sensitive cells in the primate brain to derive the image motion and
  the camera heading vector. We tested heading estimation using
  a camera mounted on a linear translation table with the line of
  sight of the camera set at a range of angles relative to straight
  ahead (0 degrees to 50 degrees in 10 degree steps). The camera velocity was also
  varied (0.2, 0.4, 0.8, 1.2, 1.6 and 2.0m/s). Our biologically-based
  method produced accurate heading estimates over a wide range of
  test angles and camera speeds. Our approach has the advantage
  of being a one-shot estimator and not requiring iterative search
  techniques for finding the heading.}

  AUTHOR = {Ata Kab{\'{a}}n and
               Jakramate Bootkrajang and
               Robert J. Durrant},
  TITLE = {Toward Large-Scale Continuous {EDA:} {A} Random Matrix Theory Perspective},
  JOURNAL = {Evolutionary Computation},
  VOLUME = {24},
  NUMBER = {2},
  PAGES = {255--291},
  YEAR = {2016},
  URL = {https://doi.org/10.1162/EVCO_a_00150},
  DOI = {10.1162/EVCO_a_00150},
  ABSTRACT = {Estimations of distribution algorithms (EDAs) are a major branch of evolutionary algorithms (EA) with some unique advantages in principle. They are able to take advantage of correlation structure to drive the search more efficiently, and they are able to provide insights about the structure of the search space. However, model building in high dimensions is extremely challenging, and as a result existing EDAs may become less attractive in large-scale problems because of the associated large computational requirements. Large-scale continuous global optimisation is key to many modern-day real-world problems. Scaling up EAs to large-scale problems has become one of the biggest challenges of the field. This paper pins down some fundamental roots of the problem and makes a start at developing a new and generic framework to yield effective and efficient EDA-type algorithms for large-scale continuous global optimisation problems. Our concept is to introduce an ensemble of random projections to low dimensions of the set of fittest search points as a basis for developing a new and generic divide-and-conquer methodology. Our ideas are rooted in the theory of random projections developed in theoretical computer science, and in developing and analysing our framework we exploit some recent results in nonasymptotic random matrix theory.}

  AUTHOR = {Momodou L. Sanyang and
               Robert J. Durrant and
               Ata Kab{\'{a}}n},
  TITLE = {How effective is Cauchy-EDA in high dimensions?},
  BOOKTITLE = {{IEEE} Congress on Evolutionary Computation, {CEC} 2016, Vancouver,
               BC, Canada, July 24-29, 2016},
  PAGES = {3409--3416},
  YEAR = {2016},
  URL = {https://researchcommons.waikato.ac.nz/handle/10289/10506},
  ABSTRACT = {We consider the problem of high dimensional blackbox optimisation via Estimation of Distribution Algorithms (EDA) and the use of heavy-tailed search distributions in this setting. Some authors have suggested that employing a heavy tailed search distribution, such as a Cauchy, may make EDA better explore a high dimensional search space. However, other authors have found Cauchy search distributions are less effective than Gaussian search distributions in high dimensional problems. In this paper, we set out to resolve this controversy. To achieve this we run extensive experiments on a battery of high-dimensional test functions, and develop some theory which shows that small search steps are always more likely to move the search distribution towards the global optimum than large ones and, in particular, large search steps in high-dimensional spaces nearly always do badly in this respect. We hypothesise that, since exploration by large steps is mostly counterproductive in high dimensions, and since the fraction of good directions decays exponentially fast with increasing dimension, instead one should focus mainly on finding the right direction in which to move the search distribution. We propose a minor change to standard Gaussian EDA which implicitly achieves this aim, and our experiments on a sequence of test functions confirm the good performance of our new approach.}

  AUTHOR = {Michael Mayo and
               Chen Zheng},
  TITLE = {BlockCopy-based operators for evolving efficient wind farm layouts},
  BOOKTITLE = {{IEEE} Congress on Evolutionary Computation, {CEC} 2016, Vancouver,
               BC, Canada, July 24-29, 2016},
  PAGES = {1085--1092},
  YEAR = {2016},
  URL = {https://researchcommons.waikato.ac.nz/handle/10289/10777},
  ABSTRACT = {A novel search operator, BlockCopy, is proposed for
  efficiently solving the wind farm layout optimisation problem.
  BlockCopy, which can be used either as mutation or a crossover
  operator, copies patterns of turbines from part of a layout to
  another part. The target layout may be the same as the source, or
  a different layout altogether. The rationale behind this is that it is
  the relative configurations of turbines rather than their individual
  absolute positions on the layouts that count, and BlockCopy, for
  the most part, maintains relative configurations. Our evaluation
  on four benchmark scenarios shows that BlockCopy outperforms
  two other standard approaches (namely, the turbine displacement
  algorithm and random perturbation) from the literature. We also
  evaluate the BlockCopy operator in conjunction with both singlesolution
  and population-based strategies.}

  AUTHOR = {Michael Mayo and
               Sara Omranian},
  TITLE = {Towards a New Evolutionary Subsampling Technique for Heuristic Optimisation of Load Disaggregators},
  BOOKTITLE = {Trends and Applications in Knowledge Discovery and Data Mining - {PAKDD}
               2016 Workshops, BDM, MLSDA, PACC, {WDMBF} Auckland, New Zealand, April
               19, 2016, Revised Selected Papers},
  PAGES = {3--14},
  YEAR = {2016},
  URL = {https://doi.org/10.1007/978-3-319-42996-0_1},
  DOI = {10.1007/978-3-319-42996-0_1},
  ABSTRACT = {In this paper we present some preliminary work towards the development of a new evolutionary subsampling technique for solving the non-intrusive load monitoring (NILM) problem. The NILM problem concerns using predictive algorithms to analyse whole-house energy usage measurements, so that individual appliance energy usages can be disaggregated. The motivation is to educate home owners about their energy usage. However, by their very nature, the datasets used in this research are massively imbalanced in their target value distributions. Consequently standard machine learning techniques, which often rely on optimising for root mean squared error (RMSE), typically fail. We therefore propose the target-weighted RMSE (TW-RMSE) metric as an alternative fitness function for optimising load disaggregators, and show in a simple initial study in which random search is utilised that TW-RMSE is a metric that can be optimised, and therefore has the potential to be included in a larger evolutionary subsampling-based solution to this problem.}

  AUTHOR = {Michael Mayo and
               Albert Bifet},
  TITLE = {Deferral classification of evolving temporal dependent data streams},
  BOOKTITLE = {Proceedings of the 31st Annual {ACM} Symposium on Applied Computing,
               Pisa, Italy, April 4-8, 2016},
  PAGES = {952--954},
  YEAR = {2016},
  URL = {http://doi.acm.org/10.1145/2851613.2851890},
  DOI = {10.1145/2851613.2851890},
  ABSTRACT = {Data streams generated in real-time can be strongly temporally dependent. In this case, standard techniques where we suppose that class labels are not correlated may produce sub-optimal performance because the assumption is incorrect. To deal with this problem, we present in this paper a new algorithm to classify temporally correlated data based on deferral learning. This approach is suitable for learning over time-varying streams. We show how simple classifiers such as Naive Bayes can boost their performance using this new meta-learning methodology. We give an empirical validation of our new algorithm over several real and artificial datasets.}

  AUTHOR = {Juan D. Vel{\'{a}}squez and
               Yerko Covacevich and
               Francisco Molina and
               Edison Marrese{-}Taylor and
               Cristi{\'{a}}n Rodr{\'{\i}}guez and
               Felipe Bravo{-}Marquez},
  TITLE = {{DOCODE} 3.0 (DOcument COpy DEtector): {A} system for plagiarism detection
               by applying an information fusion process from multiple documental
               data sources},
  JOURNAL = {Information Fusion},
  VOLUME = {27},
  PAGES = {64--75},
  YEAR = {2016},
  URL = {https://doi.org/10.1016/j.inffus.2015.05.006},
  DOI = {10.1016/j.inffus.2015.05.006},
  PDF = {https://www.cs.waikato.ac.nz/ml/publications/2016/inffus15.pdf},
  ABSTRACT = {Plagiarism refers to the act of presenting external words, thoughts, or ideas as one’s own, without providing references to the sources from which they were taken. The exponential growth of different digital document sources available on the Web has facilitated the spread of this practice, making the accurate detection of it a crucial task for educational institutions. In this article, we present DOCODE 3.0, a Web system for educational institutions that performs automatic analysis of large quantities of digital documents in relation to their degree of originality. Since plagiarism is a complex problem, frequently tackled at different levels, our system applies algorithms in order to perform an information fusion process from multi data source to all these levels. These algorithms have been successfully tested in the scientific community in solving tasks like the identification of plagiarized passages and the retrieval of source candidates from the Web, among other multi data sources as digital libraries, and have proven to be very effective. We integrate these algorithms into a multi-tier, robust and scalable JEE architecture, allowing many different types of clients with different requirements to consume our services. For users, DOCODE produces a number of visualizations and reports from the different outputs to let teachers and professors gain insights on the originality of the documents they review, allowing them to discover, understand and handle possible plagiarism cases and making it easier and much faster to analyze a vast number of documents. Our experience here is so far focused on the Chilean situation and the Spanish language, offering solutions to Chilean educational institutions in any of their preferred Virtual Learning Environments. However, DOCODE can easily be adapted to increase language coverage.}