MOA 12.03
Real Time Analytics for Data Streams
LearnModel.java
Go to the documentation of this file.
00001 /*
00002  *    LearnModel.java
00003  *    Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
00004  *    @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
00005  *
00006  *    This program is free software; you can redistribute it and/or modify
00007  *    it under the terms of the GNU General Public License as published by
00008  *    the Free Software Foundation; either version 3 of the License, or
00009  *    (at your option) any later version.
00010  *
00011  *    This program is distributed in the hope that it will be useful,
00012  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *    GNU General Public License for more details.
00015  *
00016  *    You should have received a copy of the GNU General Public License
00017  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
00018  *    
00019  */
00020 package moa.tasks;
00021 
00022 import moa.classifiers.Classifier;
00023 import moa.core.ObjectRepository;
00024 import moa.options.ClassOption;
00025 import moa.options.IntOption;
00026 import moa.streams.InstanceStream;
00027 
00034 public class LearnModel extends MainTask {
00035 
00036     @Override
00037     public String getPurposeString() {
00038         return "Learns a model from a stream.";
00039     }
00040 
00041     private static final long serialVersionUID = 1L;
00042 
00043     public ClassOption learnerOption = new ClassOption("learner", 'l',
00044             "Classifier to train.", Classifier.class, "bayes.NaiveBayes");
00045 
00046     public ClassOption streamOption = new ClassOption("stream", 's',
00047             "Stream to learn from.", InstanceStream.class,
00048             "generators.RandomTreeGenerator");
00049 
00050     public IntOption maxInstancesOption = new IntOption("maxInstances", 'm',
00051             "Maximum number of instances to train on per pass over the data.",
00052             10000000, 0, Integer.MAX_VALUE);
00053 
00054     public IntOption numPassesOption = new IntOption("numPasses", 'p',
00055             "The number of passes to do over the data.", 1, 1,
00056             Integer.MAX_VALUE);
00057 
00058     public IntOption memCheckFrequencyOption = new IntOption(
00059             "memCheckFrequency", 'q',
00060             "How many instances between memory bound checks.", 100000, 0,
00061             Integer.MAX_VALUE);
00062 
00063     public LearnModel() {
00064     }
00065 
00066     public LearnModel(Classifier learner, InstanceStream stream,
00067             int maxInstances, int numPasses) {
00068         this.learnerOption.setCurrentObject(learner);
00069         this.streamOption.setCurrentObject(stream);
00070         this.maxInstancesOption.setValue(maxInstances);
00071         this.numPassesOption.setValue(numPasses);
00072     }
00073 
00074     @Override
00075     public Class<?> getTaskResultType() {
00076         return Classifier.class;
00077     }
00078 
00079     @Override
00080     public Object doMainTask(TaskMonitor monitor, ObjectRepository repository) {
00081         Classifier learner = (Classifier) getPreparedClassOption(this.learnerOption);
00082         InstanceStream stream = (InstanceStream) getPreparedClassOption(this.streamOption);
00083         learner.setModelContext(stream.getHeader());
00084         int numPasses = this.numPassesOption.getValue();
00085         int maxInstances = this.maxInstancesOption.getValue();
00086         for (int pass = 0; pass < numPasses; pass++) {
00087             long instancesProcessed = 0;
00088             monitor.setCurrentActivity("Training learner"
00089                     + (numPasses > 1 ? (" (pass " + (pass + 1) + "/"
00090                     + numPasses + ")") : "") + "...", -1.0);
00091             if (pass > 0) {
00092                 stream.restart();
00093             }
00094             while (stream.hasMoreInstances()
00095                     && ((maxInstances < 0) || (instancesProcessed < maxInstances))) {
00096                 learner.trainOnInstance(stream.nextInstance());
00097                 instancesProcessed++;
00098                 if (instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES == 0) {
00099                     if (monitor.taskShouldAbort()) {
00100                         return null;
00101                     }
00102                     long estimatedRemainingInstances = stream.estimatedRemainingInstances();
00103                     if (maxInstances > 0) {
00104                         long maxRemaining = maxInstances - instancesProcessed;
00105                         if ((estimatedRemainingInstances < 0)
00106                                 || (maxRemaining < estimatedRemainingInstances)) {
00107                             estimatedRemainingInstances = maxRemaining;
00108                         }
00109                     }
00110                     monitor.setCurrentActivityFractionComplete(estimatedRemainingInstances < 0 ? -1.0
00111                             : (double) instancesProcessed
00112                             / (double) (instancesProcessed + estimatedRemainingInstances));
00113                     if (monitor.resultPreviewRequested()) {
00114                         monitor.setLatestResultPreview(learner.copy());
00115                     }
00116                 }
00117             }
00118         }
00119         learner.setModelContext(stream.getHeader());
00120         return learner;
00121     }
00122 }
 All Classes Namespaces Files Functions Variables Enumerations