MOA 12.03
Real Time Analytics for Data Streams
VFMLNumericAttributeClassObserver.java
Go to the documentation of this file.
00001 /*
00002  *    VFMLNumericAttributeClassObserver.java
00003  *    Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
00004  *    @author Richard Kirkby ([email protected])
00005  *
00006  *    This program is free software; you can redistribute it and/or modify
00007  *    it under the terms of the GNU General Public License as published by
00008  *    the Free Software Foundation; either version 3 of the License, or
00009  *    (at your option) any later version.
00010  *
00011  *    This program is distributed in the hope that it will be useful,
00012  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *    GNU General Public License for more details.
00015  *
00016  *    You should have received a copy of the GNU General Public License
00017  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
00018  *    
00019  */
00020 package moa.classifiers.core.attributeclassobservers;
00021 
00022 import weka.core.Utils;
00023 
00024 import java.io.Serializable;
00025 import java.util.ArrayList;
00026 import java.util.List;
00027 import moa.classifiers.core.AttributeSplitSuggestion;
00028 import moa.classifiers.core.conditionaltests.NumericAttributeBinaryTest;
00029 import moa.classifiers.core.splitcriteria.SplitCriterion;
00030 
00031 import moa.core.DoubleVector;
00032 import moa.core.ObjectRepository;
00033 import moa.options.AbstractOptionHandler;
00034 import moa.options.IntOption;
00035 import moa.tasks.TaskMonitor;
00036 
00044 public class VFMLNumericAttributeClassObserver extends AbstractOptionHandler
00045         implements NumericAttributeClassObserver {
00046 
00047     private static final long serialVersionUID = 1L;
00048 
00049     protected class Bin implements Serializable {
00050 
00051         private static final long serialVersionUID = 1L;
00052 
00053         public double lowerBound, upperBound;
00054 
00055         public DoubleVector classWeights = new DoubleVector();
00056 
00057         public int boundaryClass;
00058 
00059         public double boundaryWeight;
00060     }
00061 
00062     protected List<Bin> binList = new ArrayList<Bin>();
00063 
00064     public IntOption numBinsOption = new IntOption("numBins", 'n',
00065         "The number of bins.", 10, 1, Integer.MAX_VALUE);
00066 
00067 
00068     @Override
00069     public void observeAttributeClass(double attVal, int classVal, double weight) {
00070         if (Utils.isMissingValue(attVal)) {
00071         } else {
00072             if (this.binList.size() < 1) {
00073                 // create the first bin
00074                 Bin newBin = new Bin();
00075                 newBin.classWeights.addToValue(classVal, weight);
00076                 newBin.boundaryClass = classVal;
00077                 newBin.boundaryWeight = weight;
00078                 newBin.upperBound = attVal;
00079                 newBin.lowerBound = attVal;
00080                 this.binList.add(newBin);
00081             } else {
00082                 // find bin containing new example with binary search
00083                 int index = -1;
00084                 boolean found = false;
00085                 int min = 0;
00086                 int max = this.binList.size() - 1;
00087                 index = 0;
00088                 while ((min <= max) && !found) {
00089                     int i = (min + max) / 2;
00090                     Bin bin = this.binList.get(i);
00091                     if (((attVal >= bin.lowerBound) && (attVal < bin.upperBound))
00092                             || ((i == this.binList.size() - 1)
00093                             && (attVal >= bin.lowerBound) && (attVal <= bin.upperBound))) {
00094                         found = true;
00095                         index = i;
00096                     } else if (attVal < bin.lowerBound) {
00097                         max = i - 1;
00098                     } else {
00099                         min = i + 1;
00100                     }
00101                 }
00102                 boolean first = false;
00103                 boolean last = false;
00104                 if (!found) {
00105                     // determine if it is before or after the existing range
00106                     Bin bin = this.binList.get(0);
00107                     if (bin.lowerBound > attVal) {
00108                         // go before the first bin
00109                         index = 0;
00110                         first = true;
00111                     } else {
00112                         // if we haven't found it yet value must be > last bins
00113                         // upperBound
00114                         index = this.binList.size() - 1;
00115                         last = true;
00116                     }
00117                 }
00118                 Bin bin = this.binList.get(index); // VLIndex(ct->bins, index);
00119                 if ((bin.lowerBound == attVal)
00120                         || (this.binList.size() >= this.numBinsOption.getValue())) {// Option.getValue())
00121                     // {//1000)
00122                     // {
00123                     // if this is the exact same boundary and class as the bin
00124                     // boundary or we aren't adding new bins any more then
00125                     // increment
00126                     // boundary counts
00127                     bin.classWeights.addToValue(classVal, weight);
00128                     if ((bin.boundaryClass == classVal)
00129                             && (bin.lowerBound == attVal)) {
00130                         // if it is also the same class then special case it
00131                         bin.boundaryWeight += weight;
00132                     }
00133                 } else {
00134                     // create a new bin
00135                     Bin newBin = new Bin();
00136                     newBin.classWeights.addToValue(classVal, weight);
00137                     newBin.boundaryWeight = weight;
00138                     newBin.boundaryClass = classVal;
00139                     newBin.upperBound = bin.upperBound;
00140                     newBin.lowerBound = attVal;
00141 
00142                     double percent = 0.0;
00143                     // estimate initial counts with a linear interpolation
00144                     if (!((bin.upperBound - bin.lowerBound == 0) || last || first)) {
00145                         percent = 1.0 - ((attVal - bin.lowerBound) / (bin.upperBound - bin.lowerBound));
00146                     }
00147 
00148                     // take out the boundry points, they stay with the old bin
00149                     bin.classWeights.addToValue(bin.boundaryClass,
00150                             -bin.boundaryWeight);
00151                     DoubleVector weightToShift = new DoubleVector(
00152                             bin.classWeights);
00153                     weightToShift.scaleValues(percent);
00154                     newBin.classWeights.addValues(weightToShift);
00155                     bin.classWeights.subtractValues(weightToShift);
00156                     // put the boundry examples back in
00157                     bin.classWeights.addToValue(bin.boundaryClass,
00158                             bin.boundaryWeight);
00159 
00160                     // insert the new bin in the right place
00161                     if (last) {
00162                         bin.upperBound = attVal;
00163                         newBin.upperBound = attVal;
00164                         this.binList.add(newBin);
00165                     } else if (first) {
00166                         newBin.upperBound = bin.lowerBound;
00167                         this.binList.add(0, newBin);
00168                     } else {
00169                         newBin.upperBound = bin.upperBound;
00170                         bin.upperBound = attVal;
00171                         this.binList.add(index + 1, newBin);
00172                     }
00173                 }
00174             }
00175         }
00176     }
00177 
00178     @Override
00179     public double probabilityOfAttributeValueGivenClass(double attVal,
00180             int classVal) {
00181         // TODO: NaiveBayes broken until implemented
00182         return 0.0;
00183     }
00184 
00185     @Override
00186     public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion(
00187             SplitCriterion criterion, double[] preSplitDist, int attIndex,
00188             boolean binaryOnly) {
00189         AttributeSplitSuggestion bestSuggestion = null;
00190         DoubleVector rightDist = new DoubleVector();
00191         for (Bin bin : this.binList) {
00192             rightDist.addValues(bin.classWeights);
00193         }
00194         DoubleVector leftDist = new DoubleVector();
00195         for (Bin bin : this.binList) {
00196             leftDist.addValues(bin.classWeights);
00197             rightDist.subtractValues(bin.classWeights);
00198             double[][] postSplitDists = new double[][]{
00199                 leftDist.getArrayCopy(), rightDist.getArrayCopy()};
00200             double merit = criterion.getMeritOfSplit(preSplitDist,
00201                     postSplitDists);
00202             if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) {
00203                 bestSuggestion = new AttributeSplitSuggestion(
00204                         new NumericAttributeBinaryTest(attIndex,
00205                         bin.upperBound, false), postSplitDists, merit);
00206             }
00207         }
00208         return bestSuggestion;
00209     }
00210 
00211     @Override
00212     public void getDescription(StringBuilder sb, int indent) {
00213         // TODO Auto-generated method stub
00214     }
00215 
00216     @Override
00217     protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) {
00218         // TODO Auto-generated method stub
00219     }
00220 }
 All Classes Namespaces Files Functions Variables Enumerations