MOA 12.03
Real Time Analytics for Data Streams
EvaluateInterleavedTestThenTrain.java
Go to the documentation of this file.
00001 /*
00002  *    EvaluateInterleavedTestThenTrain.java
00003  *    Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
00004  *    @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
00005  *
00006  *    This program is free software; you can redistribute it and/or modify
00007  *    it under the terms of the GNU General Public License as published by
00008  *    the Free Software Foundation; either version 3 of the License, or
00009  *    (at your option) any later version.
00010  *
00011  *    This program is distributed in the hope that it will be useful,
00012  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *    GNU General Public License for more details.
00015  *
00016  *    You should have received a copy of the GNU General Public License
00017  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
00018  *    
00019  */
00020 package moa.tasks;
00021 
00022 import java.io.File;
00023 import java.io.FileOutputStream;
00024 import java.io.PrintStream;
00025 
00026 import moa.classifiers.Classifier;
00027 import moa.core.Measurement;
00028 import moa.core.ObjectRepository;
00029 import moa.core.TimingUtils;
00030 import moa.evaluation.ClassificationPerformanceEvaluator;
00031 import moa.evaluation.LearningCurve;
00032 import moa.evaluation.LearningEvaluation;
00033 import moa.options.ClassOption;
00034 import moa.options.FileOption;
00035 import moa.options.IntOption;
00036 import moa.streams.InstanceStream;
00037 import weka.core.Instance;
00038 
00045 public class EvaluateInterleavedTestThenTrain extends MainTask {
00046 
00047     @Override
00048     public String getPurposeString() {
00049         return "Evaluates a classifier on a stream by testing then training with each example in sequence.";
00050     }
00051 
00052     private static final long serialVersionUID = 1L;
00053 
00054     public ClassOption learnerOption = new ClassOption("learner", 'l',
00055             "Classifier to train.", Classifier.class, "bayes.NaiveBayes");
00056 
00057     public ClassOption streamOption = new ClassOption("stream", 's',
00058             "Stream to learn from.", InstanceStream.class,
00059             "generators.RandomTreeGenerator");
00060 
00061     public ClassOption evaluatorOption = new ClassOption("evaluator", 'e',
00062             "Classification performance evaluation method.",
00063             ClassificationPerformanceEvaluator.class,
00064             "BasicClassificationPerformanceEvaluator");
00065 
00066     public IntOption instanceLimitOption = new IntOption("instanceLimit", 'i',
00067             "Maximum number of instances to test/train on  (-1 = no limit).",
00068             100000000, -1, Integer.MAX_VALUE);
00069 
00070     public IntOption timeLimitOption = new IntOption("timeLimit", 't',
00071             "Maximum number of seconds to test/train for (-1 = no limit).", -1,
00072             -1, Integer.MAX_VALUE);
00073 
00074     public IntOption sampleFrequencyOption = new IntOption("sampleFrequency",
00075             'f',
00076             "How many instances between samples of the learning performance.",
00077             100000, 0, Integer.MAX_VALUE);
00078 
00079     public IntOption memCheckFrequencyOption = new IntOption(
00080             "memCheckFrequency", 'q',
00081             "How many instances between memory bound checks.", 100000, 0,
00082             Integer.MAX_VALUE);
00083 
00084     public FileOption dumpFileOption = new FileOption("dumpFile", 'd',
00085             "File to append intermediate csv reslts to.", null, "csv", true);
00086 
00087     @Override
00088     public Class<?> getTaskResultType() {
00089         return LearningCurve.class;
00090     }
00091 
00092     @Override
00093     protected Object doMainTask(TaskMonitor monitor, ObjectRepository repository) {
00094         Classifier learner = (Classifier) getPreparedClassOption(this.learnerOption);
00095         InstanceStream stream = (InstanceStream) getPreparedClassOption(this.streamOption);
00096         ClassificationPerformanceEvaluator evaluator = (ClassificationPerformanceEvaluator) getPreparedClassOption(this.evaluatorOption);
00097         learner.setModelContext(stream.getHeader());
00098         int maxInstances = this.instanceLimitOption.getValue();
00099         long instancesProcessed = 0;
00100         int maxSeconds = this.timeLimitOption.getValue();
00101         int secondsElapsed = 0;
00102         monitor.setCurrentActivity("Evaluating learner...", -1.0);
00103         LearningCurve learningCurve = new LearningCurve(
00104                 "learning evaluation instances");
00105         File dumpFile = this.dumpFileOption.getFile();
00106         PrintStream immediateResultStream = null;
00107         if (dumpFile != null) {
00108             try {
00109                 if (dumpFile.exists()) {
00110                     immediateResultStream = new PrintStream(
00111                             new FileOutputStream(dumpFile, true), true);
00112                 } else {
00113                     immediateResultStream = new PrintStream(
00114                             new FileOutputStream(dumpFile), true);
00115                 }
00116             } catch (Exception ex) {
00117                 throw new RuntimeException(
00118                         "Unable to open immediate result file: " + dumpFile, ex);
00119             }
00120         }
00121         boolean firstDump = true;
00122         boolean preciseCPUTiming = TimingUtils.enablePreciseTiming();
00123         long evaluateStartTime = TimingUtils.getNanoCPUTimeOfCurrentThread();
00124         long lastEvaluateStartTime = evaluateStartTime;
00125         double RAMHours = 0.0;
00126         while (stream.hasMoreInstances()
00127                 && ((maxInstances < 0) || (instancesProcessed < maxInstances))
00128                 && ((maxSeconds < 0) || (secondsElapsed < maxSeconds))) {
00129             Instance trainInst = stream.nextInstance();
00130             Instance testInst = (Instance) trainInst.copy();
00131             int trueClass = (int) trainInst.classValue();
00132             //testInst.setClassMissing();
00133             double[] prediction = learner.getVotesForInstance(testInst);
00134             //evaluator.addClassificationAttempt(trueClass, prediction, testInst
00135             //          .weight());
00136             evaluator.addResult(testInst, prediction);
00137             learner.trainOnInstance(trainInst);
00138             instancesProcessed++;
00139             if (instancesProcessed % this.sampleFrequencyOption.getValue() == 0
00140                   ||  stream.hasMoreInstances() == false) {
00141                 long evaluateTime = TimingUtils.getNanoCPUTimeOfCurrentThread();
00142                 double time = TimingUtils.nanoTimeToSeconds(evaluateTime - evaluateStartTime);
00143                 double timeIncrement = TimingUtils.nanoTimeToSeconds(evaluateTime - lastEvaluateStartTime);
00144                 double RAMHoursIncrement = learner.measureByteSize() / (1024.0 * 1024.0 * 1024.0); //GBs
00145                 RAMHoursIncrement *= (timeIncrement / 3600.0); //Hours
00146                 RAMHours += RAMHoursIncrement;
00147                 lastEvaluateStartTime = evaluateTime;
00148                 learningCurve.insertEntry(new LearningEvaluation(
00149                         new Measurement[]{
00150                             new Measurement(
00151                             "learning evaluation instances",
00152                             instancesProcessed),
00153                             new Measurement(
00154                             "evaluation time ("
00155                             + (preciseCPUTiming ? "cpu "
00156                             : "") + "seconds)",
00157                             time),
00158                             new Measurement(
00159                             "model cost (RAM-Hours)",
00160                             RAMHours)
00161                         },
00162                         evaluator, learner));
00163                 if (immediateResultStream != null) {
00164                     if (firstDump) {
00165                         immediateResultStream.println(learningCurve.headerToString());
00166                         firstDump = false;
00167                     }
00168                     immediateResultStream.println(learningCurve.entryToString(learningCurve.numEntries() - 1));
00169                     immediateResultStream.flush();
00170                 }
00171             }
00172             if (instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES == 0) {
00173                 if (monitor.taskShouldAbort()) {
00174                     return null;
00175                 }
00176                 long estimatedRemainingInstances = stream.estimatedRemainingInstances();
00177                 if (maxInstances > 0) {
00178                     long maxRemaining = maxInstances - instancesProcessed;
00179                     if ((estimatedRemainingInstances < 0)
00180                             || (maxRemaining < estimatedRemainingInstances)) {
00181                         estimatedRemainingInstances = maxRemaining;
00182                     }
00183                 }
00184                 monitor.setCurrentActivityFractionComplete(estimatedRemainingInstances < 0 ? -1.0
00185                         : (double) instancesProcessed
00186                         / (double) (instancesProcessed + estimatedRemainingInstances));
00187                 if (monitor.resultPreviewRequested()) {
00188                     monitor.setLatestResultPreview(learningCurve.copy());
00189                 }
00190                 secondsElapsed = (int) TimingUtils.nanoTimeToSeconds(TimingUtils.getNanoCPUTimeOfCurrentThread()
00191                         - evaluateStartTime);
00192             }
00193         }
00194         if (immediateResultStream != null) {
00195             immediateResultStream.close();
00196         }
00197         return learningCurve;
00198     }
00199 }
 All Classes Namespaces Files Functions Variables Enumerations