MOA 12.03
Real Time Analytics for Data Streams
|
00001 /* 00002 * VFMLNumericAttributeClassObserver.java 00003 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 00004 * @author Richard Kirkby ([email protected]) 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 3 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00018 * 00019 */ 00020 package moa.classifiers.core.attributeclassobservers; 00021 00022 import weka.core.Utils; 00023 00024 import java.io.Serializable; 00025 import java.util.ArrayList; 00026 import java.util.List; 00027 import moa.classifiers.core.AttributeSplitSuggestion; 00028 import moa.classifiers.core.conditionaltests.NumericAttributeBinaryTest; 00029 import moa.classifiers.core.splitcriteria.SplitCriterion; 00030 00031 import moa.core.DoubleVector; 00032 import moa.core.ObjectRepository; 00033 import moa.options.AbstractOptionHandler; 00034 import moa.options.IntOption; 00035 import moa.tasks.TaskMonitor; 00036 00044 public class VFMLNumericAttributeClassObserver extends AbstractOptionHandler 00045 implements NumericAttributeClassObserver { 00046 00047 private static final long serialVersionUID = 1L; 00048 00049 protected class Bin implements Serializable { 00050 00051 private static final long serialVersionUID = 1L; 00052 00053 public double lowerBound, upperBound; 00054 00055 public DoubleVector classWeights = new DoubleVector(); 00056 00057 public int boundaryClass; 00058 00059 public double boundaryWeight; 00060 } 00061 00062 protected List<Bin> binList = new ArrayList<Bin>(); 00063 00064 public IntOption numBinsOption = new IntOption("numBins", 'n', 00065 "The number of bins.", 10, 1, Integer.MAX_VALUE); 00066 00067 00068 @Override 00069 public void observeAttributeClass(double attVal, int classVal, double weight) { 00070 if (Utils.isMissingValue(attVal)) { 00071 } else { 00072 if (this.binList.size() < 1) { 00073 // create the first bin 00074 Bin newBin = new Bin(); 00075 newBin.classWeights.addToValue(classVal, weight); 00076 newBin.boundaryClass = classVal; 00077 newBin.boundaryWeight = weight; 00078 newBin.upperBound = attVal; 00079 newBin.lowerBound = attVal; 00080 this.binList.add(newBin); 00081 } else { 00082 // find bin containing new example with binary search 00083 int index = -1; 00084 boolean found = false; 00085 int min = 0; 00086 int max = this.binList.size() - 1; 00087 index = 0; 00088 while ((min <= max) && !found) { 00089 int i = (min + max) / 2; 00090 Bin bin = this.binList.get(i); 00091 if (((attVal >= bin.lowerBound) && (attVal < bin.upperBound)) 00092 || ((i == this.binList.size() - 1) 00093 && (attVal >= bin.lowerBound) && (attVal <= bin.upperBound))) { 00094 found = true; 00095 index = i; 00096 } else if (attVal < bin.lowerBound) { 00097 max = i - 1; 00098 } else { 00099 min = i + 1; 00100 } 00101 } 00102 boolean first = false; 00103 boolean last = false; 00104 if (!found) { 00105 // determine if it is before or after the existing range 00106 Bin bin = this.binList.get(0); 00107 if (bin.lowerBound > attVal) { 00108 // go before the first bin 00109 index = 0; 00110 first = true; 00111 } else { 00112 // if we haven't found it yet value must be > last bins 00113 // upperBound 00114 index = this.binList.size() - 1; 00115 last = true; 00116 } 00117 } 00118 Bin bin = this.binList.get(index); // VLIndex(ct->bins, index); 00119 if ((bin.lowerBound == attVal) 00120 || (this.binList.size() >= this.numBinsOption.getValue())) {// Option.getValue()) 00121 // {//1000) 00122 // { 00123 // if this is the exact same boundary and class as the bin 00124 // boundary or we aren't adding new bins any more then 00125 // increment 00126 // boundary counts 00127 bin.classWeights.addToValue(classVal, weight); 00128 if ((bin.boundaryClass == classVal) 00129 && (bin.lowerBound == attVal)) { 00130 // if it is also the same class then special case it 00131 bin.boundaryWeight += weight; 00132 } 00133 } else { 00134 // create a new bin 00135 Bin newBin = new Bin(); 00136 newBin.classWeights.addToValue(classVal, weight); 00137 newBin.boundaryWeight = weight; 00138 newBin.boundaryClass = classVal; 00139 newBin.upperBound = bin.upperBound; 00140 newBin.lowerBound = attVal; 00141 00142 double percent = 0.0; 00143 // estimate initial counts with a linear interpolation 00144 if (!((bin.upperBound - bin.lowerBound == 0) || last || first)) { 00145 percent = 1.0 - ((attVal - bin.lowerBound) / (bin.upperBound - bin.lowerBound)); 00146 } 00147 00148 // take out the boundry points, they stay with the old bin 00149 bin.classWeights.addToValue(bin.boundaryClass, 00150 -bin.boundaryWeight); 00151 DoubleVector weightToShift = new DoubleVector( 00152 bin.classWeights); 00153 weightToShift.scaleValues(percent); 00154 newBin.classWeights.addValues(weightToShift); 00155 bin.classWeights.subtractValues(weightToShift); 00156 // put the boundry examples back in 00157 bin.classWeights.addToValue(bin.boundaryClass, 00158 bin.boundaryWeight); 00159 00160 // insert the new bin in the right place 00161 if (last) { 00162 bin.upperBound = attVal; 00163 newBin.upperBound = attVal; 00164 this.binList.add(newBin); 00165 } else if (first) { 00166 newBin.upperBound = bin.lowerBound; 00167 this.binList.add(0, newBin); 00168 } else { 00169 newBin.upperBound = bin.upperBound; 00170 bin.upperBound = attVal; 00171 this.binList.add(index + 1, newBin); 00172 } 00173 } 00174 } 00175 } 00176 } 00177 00178 @Override 00179 public double probabilityOfAttributeValueGivenClass(double attVal, 00180 int classVal) { 00181 // TODO: NaiveBayes broken until implemented 00182 return 0.0; 00183 } 00184 00185 @Override 00186 public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( 00187 SplitCriterion criterion, double[] preSplitDist, int attIndex, 00188 boolean binaryOnly) { 00189 AttributeSplitSuggestion bestSuggestion = null; 00190 DoubleVector rightDist = new DoubleVector(); 00191 for (Bin bin : this.binList) { 00192 rightDist.addValues(bin.classWeights); 00193 } 00194 DoubleVector leftDist = new DoubleVector(); 00195 for (Bin bin : this.binList) { 00196 leftDist.addValues(bin.classWeights); 00197 rightDist.subtractValues(bin.classWeights); 00198 double[][] postSplitDists = new double[][]{ 00199 leftDist.getArrayCopy(), rightDist.getArrayCopy()}; 00200 double merit = criterion.getMeritOfSplit(preSplitDist, 00201 postSplitDists); 00202 if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) { 00203 bestSuggestion = new AttributeSplitSuggestion( 00204 new NumericAttributeBinaryTest(attIndex, 00205 bin.upperBound, false), postSplitDists, merit); 00206 } 00207 } 00208 return bestSuggestion; 00209 } 00210 00211 @Override 00212 public void getDescription(StringBuilder sb, int indent) { 00213 // TODO Auto-generated method stub 00214 } 00215 00216 @Override 00217 protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { 00218 // TODO Auto-generated method stub 00219 } 00220 }