MOA 12.03
Real Time Analytics for Data Streams
EvaluatePrequential.java
Go to the documentation of this file.
00001 /*
00002  *    EvaluatePrequential.java
00003  *    Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
00004  *    @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
00005  *    @author Albert Bifet (abifet at cs dot waikato dot ac dot nz)
00006  *
00007  *    This program is free software; you can redistribute it and/or modify
00008  *    it under the terms of the GNU General Public License as published by
00009  *    the Free Software Foundation; either version 3 of the License, or
00010  *    (at your option) any later version.
00011  *
00012  *    This program is distributed in the hope that it will be useful,
00013  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  *    GNU General Public License for more details.
00016  *
00017  *    You should have received a copy of the GNU General Public License
00018  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
00019  *    
00020  */
00021 package moa.tasks;
00022 
00023 import java.io.File;
00024 import java.io.FileOutputStream;
00025 import java.io.PrintStream;
00026 
00027 import moa.classifiers.Classifier;
00028 import moa.core.Measurement;
00029 import moa.core.ObjectRepository;
00030 import moa.core.TimingUtils;
00031 import moa.evaluation.ClassificationPerformanceEvaluator;
00032 import moa.evaluation.WindowClassificationPerformanceEvaluator;
00033 import moa.evaluation.EWMAClassificationPerformanceEvaluator;
00034 import moa.evaluation.FadingFactorClassificationPerformanceEvaluator;
00035 import moa.evaluation.LearningCurve;
00036 import moa.evaluation.LearningEvaluation;
00037 import moa.options.ClassOption;
00038 import moa.options.FileOption;
00039 import moa.options.FloatOption;
00040 import moa.options.IntOption;
00041 import moa.streams.InstanceStream;
00042 import weka.core.Instance;
00043 import weka.core.Utils;
00044 
00052 public class EvaluatePrequential extends MainTask {
00053 
00054     @Override
00055     public String getPurposeString() {
00056         return "Evaluates a classifier on a stream by testing then training with each example in sequence.";
00057     }
00058 
00059     private static final long serialVersionUID = 1L;
00060 
00061     public ClassOption learnerOption = new ClassOption("learner", 'l',
00062             "Classifier to train.", Classifier.class, "bayes.NaiveBayes");
00063 
00064     public ClassOption streamOption = new ClassOption("stream", 's',
00065             "Stream to learn from.", InstanceStream.class,
00066             "generators.RandomTreeGenerator");
00067 
00068     public ClassOption evaluatorOption = new ClassOption("evaluator", 'e',
00069             "Classification performance evaluation method.",
00070             ClassificationPerformanceEvaluator.class,
00071             "WindowClassificationPerformanceEvaluator");
00072 
00073     public IntOption instanceLimitOption = new IntOption("instanceLimit", 'i',
00074             "Maximum number of instances to test/train on  (-1 = no limit).",
00075             100000000, -1, Integer.MAX_VALUE);
00076 
00077     public IntOption timeLimitOption = new IntOption("timeLimit", 't',
00078             "Maximum number of seconds to test/train for (-1 = no limit).", -1,
00079             -1, Integer.MAX_VALUE);
00080 
00081     public IntOption sampleFrequencyOption = new IntOption("sampleFrequency",
00082             'f',
00083             "How many instances between samples of the learning performance.",
00084             100000, 0, Integer.MAX_VALUE);
00085 
00086     public IntOption memCheckFrequencyOption = new IntOption(
00087             "memCheckFrequency", 'q',
00088             "How many instances between memory bound checks.", 100000, 0,
00089             Integer.MAX_VALUE);
00090 
00091     public FileOption dumpFileOption = new FileOption("dumpFile", 'd',
00092             "File to append intermediate csv results to.", null, "csv", true);
00093 
00094     public FileOption outputPredictionFileOption = new FileOption("outputPredictionFile", 'o',
00095             "File to append output predictions to.", null, "pred", true);
00096 
00097     //New for prequential method DEPRECATED
00098     public IntOption widthOption = new IntOption("width",
00099             'w', "Size of Window", 1000);
00100 
00101     public FloatOption alphaOption = new FloatOption("alpha",
00102             'a', "Fading factor or exponential smoothing factor", .01);
00103     //End New for prequential methods
00104 
00105     @Override
00106     public Class<?> getTaskResultType() {
00107         return LearningCurve.class;
00108     }
00109 
00110     @Override
00111     protected Object doMainTask(TaskMonitor monitor, ObjectRepository repository) {
00112         Classifier learner = (Classifier) getPreparedClassOption(this.learnerOption);
00113         InstanceStream stream = (InstanceStream) getPreparedClassOption(this.streamOption);
00114         ClassificationPerformanceEvaluator evaluator = (ClassificationPerformanceEvaluator) getPreparedClassOption(this.evaluatorOption);
00115         LearningCurve learningCurve = new LearningCurve(
00116                 "learning evaluation instances");
00117 
00118         //New for prequential methods
00119         if (evaluator instanceof WindowClassificationPerformanceEvaluator) {
00120             //((WindowClassificationPerformanceEvaluator) evaluator).setWindowWidth(widthOption.getValue());
00121             if (widthOption.getValue() != 1000) {
00122                 System.out.println("DEPRECATED! Use EvaluatePrequential -e (WindowClassificationPerformanceEvaluator -w " + widthOption.getValue() + ")");
00123                  return learningCurve;
00124             }
00125         }
00126         if (evaluator instanceof EWMAClassificationPerformanceEvaluator) {
00127             //((EWMAClassificationPerformanceEvaluator) evaluator).setalpha(alphaOption.getValue());
00128             if (alphaOption.getValue() != .01) {
00129                 System.out.println("DEPRECATED! Use EvaluatePrequential -e (EWMAClassificationPerformanceEvaluator -a " + alphaOption.getValue() + ")");
00130                 return learningCurve;
00131             }
00132         }
00133         if (evaluator instanceof FadingFactorClassificationPerformanceEvaluator) {
00134             //((FadingFactorClassificationPerformanceEvaluator) evaluator).setalpha(alphaOption.getValue());
00135             if (alphaOption.getValue() != .01) {
00136                 System.out.println("DEPRECATED! Use EvaluatePrequential -e (FadingFactorClassificationPerformanceEvaluator -a " + alphaOption.getValue() + ")");
00137                 return learningCurve;
00138             }
00139         }
00140         //End New for prequential methods
00141 
00142         learner.setModelContext(stream.getHeader());
00143         int maxInstances = this.instanceLimitOption.getValue();
00144         long instancesProcessed = 0;
00145         int maxSeconds = this.timeLimitOption.getValue();
00146         int secondsElapsed = 0;
00147         monitor.setCurrentActivity("Evaluating learner...", -1.0);
00148 
00149         File dumpFile = this.dumpFileOption.getFile();
00150         PrintStream immediateResultStream = null;
00151         if (dumpFile != null) {
00152             try {
00153                 if (dumpFile.exists()) {
00154                     immediateResultStream = new PrintStream(
00155                             new FileOutputStream(dumpFile, true), true);
00156                 } else {
00157                     immediateResultStream = new PrintStream(
00158                             new FileOutputStream(dumpFile), true);
00159                 }
00160             } catch (Exception ex) {
00161                 throw new RuntimeException(
00162                         "Unable to open immediate result file: " + dumpFile, ex);
00163             }
00164         }
00165         //File for output predictions
00166         File outputPredictionFile = this.outputPredictionFileOption.getFile();
00167         PrintStream outputPredictionResultStream = null;
00168         if (outputPredictionFile != null) {
00169             try {
00170                 if (outputPredictionFile.exists()) {
00171                     outputPredictionResultStream = new PrintStream(
00172                             new FileOutputStream(outputPredictionFile, true), true);
00173                 } else {
00174                     outputPredictionResultStream = new PrintStream(
00175                             new FileOutputStream(outputPredictionFile), true);
00176                 }
00177             } catch (Exception ex) {
00178                 throw new RuntimeException(
00179                         "Unable to open prediction result file: " + outputPredictionFile, ex);
00180             }
00181         }
00182         boolean firstDump = true;
00183         boolean preciseCPUTiming = TimingUtils.enablePreciseTiming();
00184         long evaluateStartTime = TimingUtils.getNanoCPUTimeOfCurrentThread();
00185         long lastEvaluateStartTime = evaluateStartTime;
00186         double RAMHours = 0.0;
00187         while (stream.hasMoreInstances()
00188                 && ((maxInstances < 0) || (instancesProcessed < maxInstances))
00189                 && ((maxSeconds < 0) || (secondsElapsed < maxSeconds))) {
00190             Instance trainInst = stream.nextInstance();
00191             Instance testInst = (Instance) trainInst.copy();
00192             int trueClass = (int) trainInst.classValue();
00193             //testInst.setClassMissing();
00194             double[] prediction = learner.getVotesForInstance(testInst);
00195             // Output prediction
00196             if (outputPredictionFile != null) {
00197                 outputPredictionResultStream.println(Utils.maxIndex(prediction) + "," + trueClass);
00198             }
00199 
00200             //evaluator.addClassificationAttempt(trueClass, prediction, testInst.weight());
00201             evaluator.addResult(testInst, prediction);
00202             learner.trainOnInstance(trainInst);
00203             instancesProcessed++;
00204             if (instancesProcessed % this.sampleFrequencyOption.getValue() == 0
00205                     || stream.hasMoreInstances() == false) {
00206                 long evaluateTime = TimingUtils.getNanoCPUTimeOfCurrentThread();
00207                 double time = TimingUtils.nanoTimeToSeconds(evaluateTime - evaluateStartTime);
00208                 double timeIncrement = TimingUtils.nanoTimeToSeconds(evaluateTime - lastEvaluateStartTime);
00209                 double RAMHoursIncrement = learner.measureByteSize() / (1024.0 * 1024.0 * 1024.0); //GBs
00210                 RAMHoursIncrement *= (timeIncrement / 3600.0); //Hours
00211                 RAMHours += RAMHoursIncrement;
00212                 lastEvaluateStartTime = evaluateTime;
00213                 learningCurve.insertEntry(new LearningEvaluation(
00214                         new Measurement[]{
00215                             new Measurement(
00216                             "learning evaluation instances",
00217                             instancesProcessed),
00218                             new Measurement(
00219                             "evaluation time ("
00220                             + (preciseCPUTiming ? "cpu "
00221                             : "") + "seconds)",
00222                             time),
00223                             new Measurement(
00224                             "model cost (RAM-Hours)",
00225                             RAMHours)
00226                         },
00227                         evaluator, learner));
00228 
00229                 if (immediateResultStream != null) {
00230                     if (firstDump) {
00231                         immediateResultStream.println(learningCurve.headerToString());
00232                         firstDump = false;
00233                     }
00234                     immediateResultStream.println(learningCurve.entryToString(learningCurve.numEntries() - 1));
00235                     immediateResultStream.flush();
00236                 }
00237             }
00238             if (instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES == 0) {
00239                 if (monitor.taskShouldAbort()) {
00240                     return null;
00241                 }
00242                 long estimatedRemainingInstances = stream.estimatedRemainingInstances();
00243                 if (maxInstances > 0) {
00244                     long maxRemaining = maxInstances - instancesProcessed;
00245                     if ((estimatedRemainingInstances < 0)
00246                             || (maxRemaining < estimatedRemainingInstances)) {
00247                         estimatedRemainingInstances = maxRemaining;
00248                     }
00249                 }
00250                 monitor.setCurrentActivityFractionComplete(estimatedRemainingInstances < 0 ? -1.0
00251                         : (double) instancesProcessed
00252                         / (double) (instancesProcessed + estimatedRemainingInstances));
00253                 if (monitor.resultPreviewRequested()) {
00254                     monitor.setLatestResultPreview(learningCurve.copy());
00255                 }
00256                 secondsElapsed = (int) TimingUtils.nanoTimeToSeconds(TimingUtils.getNanoCPUTimeOfCurrentThread()
00257                         - evaluateStartTime);
00258             }
00259         }
00260         if (immediateResultStream != null) {
00261             immediateResultStream.close();
00262         }
00263         if (outputPredictionResultStream != null) {
00264             outputPredictionResultStream.close();
00265         }
00266         return learningCurve;
00267     }
00268 }
 All Classes Namespaces Files Functions Variables Enumerations