MOA 12.03
Real Time Analytics for Data Streams
RandomTreeGenerator.java
Go to the documentation of this file.
00001 /*
00002  *    RandomTreeGenerator.java
00003  *    Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
00004  *    @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
00005  *
00006  *    This program is free software; you can redistribute it and/or modify
00007  *    it under the terms of the GNU General Public License as published by
00008  *    the Free Software Foundation; either version 3 of the License, or
00009  *    (at your option) any later version.
00010  *
00011  *    This program is distributed in the hope that it will be useful,
00012  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *    GNU General Public License for more details.
00015  *
00016  *    You should have received a copy of the GNU General Public License
00017  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
00018  *    
00019  */
00020 package moa.streams.generators;
00021 
00022 import weka.core.Attribute;
00023 import weka.core.DenseInstance;
00024 import weka.core.FastVector;
00025 import weka.core.Instance;
00026 import weka.core.Instances;
00027 
00028 import java.io.Serializable;
00029 import java.util.ArrayList;
00030 import java.util.Random;
00031 
00032 import moa.core.InstancesHeader;
00033 import moa.core.ObjectRepository;
00034 import moa.options.AbstractOptionHandler;
00035 import moa.options.FloatOption;
00036 import moa.options.IntOption;
00037 import moa.streams.InstanceStream;
00038 import moa.tasks.TaskMonitor;
00039 
00046 public class RandomTreeGenerator extends AbstractOptionHandler implements
00047         InstanceStream {
00048 
00049     @Override
00050     public String getPurposeString() {
00051         return "Generates a stream based on a randomly generated tree.";
00052     }
00053 
00054     private static final long serialVersionUID = 1L;
00055 
00056     public IntOption treeRandomSeedOption = new IntOption("treeRandomSeed",
00057             'r', "Seed for random generation of tree.", 1);
00058 
00059     public IntOption instanceRandomSeedOption = new IntOption(
00060             "instanceRandomSeed", 'i',
00061             "Seed for random generation of instances.", 1);
00062 
00063     public IntOption numClassesOption = new IntOption("numClasses", 'c',
00064             "The number of classes to generate.", 2, 2, Integer.MAX_VALUE);
00065 
00066     public IntOption numNominalsOption = new IntOption("numNominals", 'o',
00067             "The number of nominal attributes to generate.", 5, 0,
00068             Integer.MAX_VALUE);
00069 
00070     public IntOption numNumericsOption = new IntOption("numNumerics", 'u',
00071             "The number of numeric attributes to generate.", 5, 0,
00072             Integer.MAX_VALUE);
00073 
00074     public IntOption numValsPerNominalOption = new IntOption(
00075             "numValsPerNominal", 'v',
00076             "The number of values to generate per nominal attribute.", 5, 2,
00077             Integer.MAX_VALUE);
00078 
00079     public IntOption maxTreeDepthOption = new IntOption("maxTreeDepth", 'd',
00080             "The maximum depth of the tree concept.", 5, 0, Integer.MAX_VALUE);
00081 
00082     public IntOption firstLeafLevelOption = new IntOption(
00083             "firstLeafLevel",
00084             'l',
00085             "The first level of the tree above maxTreeDepth that can have leaves.",
00086             3, 0, Integer.MAX_VALUE);
00087 
00088     public FloatOption leafFractionOption = new FloatOption("leafFraction",
00089             'f',
00090             "The fraction of leaves per level from firstLeafLevel onwards.",
00091             0.15, 0.0, 1.0);
00092 
00093     protected static class Node implements Serializable {
00094 
00095         private static final long serialVersionUID = 1L;
00096 
00097         public int classLabel;
00098 
00099         public int splitAttIndex;
00100 
00101         public double splitAttValue;
00102 
00103         public Node[] children;
00104     }
00105 
00106     protected Node treeRoot;
00107 
00108     protected InstancesHeader streamHeader;
00109 
00110     protected Random instanceRandom;
00111 
00112     @Override
00113     public void prepareForUseImpl(TaskMonitor monitor,
00114             ObjectRepository repository) {
00115         monitor.setCurrentActivity("Preparing random tree...", -1.0);
00116         generateHeader();
00117         generateRandomTree();
00118         restart();
00119     }
00120 
00121     @Override
00122     public long estimatedRemainingInstances() {
00123         return -1;
00124     }
00125 
00126     @Override
00127     public boolean isRestartable() {
00128         return true;
00129     }
00130 
00131     @Override
00132     public void restart() {
00133         this.instanceRandom = new Random(this.instanceRandomSeedOption.getValue());
00134     }
00135 
00136     @Override
00137     public InstancesHeader getHeader() {
00138         return this.streamHeader;
00139     }
00140 
00141     @Override
00142     public boolean hasMoreInstances() {
00143         return true;
00144     }
00145 
00146     @Override
00147     public Instance nextInstance() {
00148         double[] attVals = new double[this.numNominalsOption.getValue()
00149                 + this.numNumericsOption.getValue()];
00150         InstancesHeader header = getHeader();
00151         Instance inst = new DenseInstance(header.numAttributes());
00152         for (int i = 0; i < attVals.length; i++) {
00153             attVals[i] = i < this.numNominalsOption.getValue() ? this.instanceRandom.nextInt(this.numValsPerNominalOption.getValue())
00154                     : this.instanceRandom.nextDouble();
00155             inst.setValue(i, attVals[i]);
00156         }
00157         inst.setDataset(header);
00158         inst.setClassValue(classifyInstance(this.treeRoot, attVals));
00159         return inst;
00160     }
00161 
00162     protected int classifyInstance(Node node, double[] attVals) {
00163         if (node.children == null) {
00164             return node.classLabel;
00165         }
00166         if (node.splitAttIndex < this.numNominalsOption.getValue()) {
00167             return classifyInstance(
00168                     node.children[(int) attVals[node.splitAttIndex]], attVals);
00169         }
00170         return classifyInstance(
00171                 node.children[attVals[node.splitAttIndex] < node.splitAttValue ? 0
00172                 : 1], attVals);
00173     }
00174 
00175     protected void generateHeader() {
00176         FastVector attributes = new FastVector();
00177         FastVector nominalAttVals = new FastVector();
00178         for (int i = 0; i < this.numValsPerNominalOption.getValue(); i++) {
00179             nominalAttVals.addElement("value" + (i + 1));
00180         }
00181         for (int i = 0; i < this.numNominalsOption.getValue(); i++) {
00182             attributes.addElement(new Attribute("nominal" + (i + 1),
00183                     nominalAttVals));
00184         }
00185         for (int i = 0; i < this.numNumericsOption.getValue(); i++) {
00186             attributes.addElement(new Attribute("numeric" + (i + 1)));
00187         }
00188         FastVector classLabels = new FastVector();
00189         for (int i = 0; i < this.numClassesOption.getValue(); i++) {
00190             classLabels.addElement("class" + (i + 1));
00191         }
00192         attributes.addElement(new Attribute("class", classLabels));
00193         this.streamHeader = new InstancesHeader(new Instances(
00194                 getCLICreationString(InstanceStream.class), attributes, 0));
00195         this.streamHeader.setClassIndex(this.streamHeader.numAttributes() - 1);
00196     }
00197 
00198     protected void generateRandomTree() {
00199         Random treeRand = new Random(this.treeRandomSeedOption.getValue());
00200         ArrayList<Integer> nominalAttCandidates = new ArrayList<Integer>(
00201                 this.numNominalsOption.getValue());
00202         for (int i = 0; i < this.numNominalsOption.getValue(); i++) {
00203             nominalAttCandidates.add(i);
00204         }
00205         double[] minNumericVals = new double[this.numNumericsOption.getValue()];
00206         double[] maxNumericVals = new double[this.numNumericsOption.getValue()];
00207         for (int i = 0; i < this.numNumericsOption.getValue(); i++) {
00208             minNumericVals[i] = 0.0;
00209             maxNumericVals[i] = 1.0;
00210         }
00211         this.treeRoot = generateRandomTreeNode(0, nominalAttCandidates,
00212                 minNumericVals, maxNumericVals, treeRand);
00213     }
00214 
00215     protected Node generateRandomTreeNode(int currentDepth,
00216             ArrayList<Integer> nominalAttCandidates, double[] minNumericVals,
00217             double[] maxNumericVals, Random treeRand) {
00218         if ((currentDepth >= this.maxTreeDepthOption.getValue())
00219                 || ((currentDepth >= this.firstLeafLevelOption.getValue()) && (this.leafFractionOption.getValue() >= (1.0 - treeRand.nextDouble())))) {
00220             Node leaf = new Node();
00221             leaf.classLabel = treeRand.nextInt(this.numClassesOption.getValue());
00222             return leaf;
00223         }
00224         Node node = new Node();
00225         int chosenAtt = treeRand.nextInt(nominalAttCandidates.size()
00226                 + this.numNumericsOption.getValue());
00227         if (chosenAtt < nominalAttCandidates.size()) {
00228             node.splitAttIndex = nominalAttCandidates.get(chosenAtt);
00229             node.children = new Node[this.numValsPerNominalOption.getValue()];
00230             ArrayList<Integer> newNominalCandidates = new ArrayList<Integer>(
00231                     nominalAttCandidates);
00232             newNominalCandidates.remove(new Integer(node.splitAttIndex));
00233             newNominalCandidates.trimToSize();
00234             for (int i = 0; i < node.children.length; i++) {
00235                 node.children[i] = generateRandomTreeNode(currentDepth + 1,
00236                         newNominalCandidates, minNumericVals, maxNumericVals,
00237                         treeRand);
00238             }
00239         } else {
00240             int numericIndex = chosenAtt - nominalAttCandidates.size();
00241             node.splitAttIndex = this.numNominalsOption.getValue()
00242                     + numericIndex;
00243             double minVal = minNumericVals[numericIndex];
00244             double maxVal = maxNumericVals[numericIndex];
00245             node.splitAttValue = ((maxVal - minVal) * treeRand.nextDouble())
00246                     + minVal;
00247             node.children = new Node[2];
00248             double[] newMaxVals = maxNumericVals.clone();
00249             newMaxVals[numericIndex] = node.splitAttValue;
00250             node.children[0] = generateRandomTreeNode(currentDepth + 1,
00251                     nominalAttCandidates, minNumericVals, newMaxVals, treeRand);
00252             double[] newMinVals = minNumericVals.clone();
00253             newMinVals[numericIndex] = node.splitAttValue;
00254             node.children[1] = generateRandomTreeNode(currentDepth + 1,
00255                     nominalAttCandidates, newMinVals, maxNumericVals, treeRand);
00256         }
00257         return node;
00258     }
00259 
00260     @Override
00261     public void getDescription(StringBuilder sb, int indent) {
00262         // TODO Auto-generated method stub
00263     }
00264 }
 All Classes Namespaces Files Functions Variables Enumerations