MOA 12.03
Real Time Analytics for Data Streams
AbstractClusterer.java
Go to the documentation of this file.
00001 /*
00002  *    AbstractClusterer.java
00003  *    Copyright (C) 2009 University of Waikato, Hamilton, New Zealand
00004  *    @author Albert Bifet (abifet@cs.waikato.ac.nz)
00005  *
00006  *    This program is free software; you can redistribute it and/or modify
00007  *    it under the terms of the GNU General Public License as published by
00008  *    the Free Software Foundation; either version 3 of the License, or
00009  *    (at your option) any later version.
00010  *
00011  *    This program is distributed in the hope that it will be useful,
00012  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *    GNU General Public License for more details.
00015  *
00016  *    You should have received a copy of the GNU General Public License
00017  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
00018  *    
00019  */
00020 package moa.clusterers;
00021 
00022 import java.util.LinkedList;
00023 import java.util.List;
00024 import java.util.Random;
00025 import moa.cluster.Clustering;
00026 
00027 import moa.core.InstancesHeader;
00028 import moa.core.Measurement;
00029 import moa.core.ObjectRepository;
00030 import moa.core.StringUtils;
00031 import moa.gui.AWTRenderer;
00032 import moa.options.AbstractOptionHandler;
00033 import moa.options.FlagOption;
00034 import moa.options.IntOption;
00035 import moa.tasks.TaskMonitor;
00036 import weka.core.Instance;
00037 import weka.core.Instances;
00038 
00039 public abstract class AbstractClusterer extends AbstractOptionHandler
00040                 implements Clusterer {
00041         
00042         @Override
00043         public String getPurposeString() {
00044                 return "MOA Clusterer: " + getClass().getCanonicalName();
00045         }
00046 
00047         protected InstancesHeader modelContext;
00048 
00049         protected double trainingWeightSeenByModel = 0.0;
00050 
00051         protected int randomSeed = 1;
00052 
00053         protected IntOption randomSeedOption;
00054 
00055     public FlagOption evaluateMicroClusteringOption;
00056 
00057     protected Random clustererRandom;
00058 
00059     protected Clustering clustering;
00060     
00061         public AbstractClusterer() {
00062                 if (isRandomizable()) {
00063                         this.randomSeedOption = new IntOption("randomSeed", 'r',
00064                                         "Seed for random behaviour of the Clusterer.", 1);
00065                 }
00066 
00067         if( implementsMicroClusterer()){
00068             this.evaluateMicroClusteringOption =
00069                     new FlagOption("evaluateMicroClustering", 'M',
00070                     "Evaluate the underlying microclustering instead of the macro clustering");
00071         }
00072         }
00073 
00074         @Override
00075         public void prepareForUseImpl(TaskMonitor monitor,
00076                         ObjectRepository repository) {
00077                 if (this.randomSeedOption != null) {
00078                         this.randomSeed = this.randomSeedOption.getValue();
00079                 }
00080                 if (!trainingHasStarted()) {
00081                         resetLearning();
00082                 }
00083                 clustering = new Clustering();
00084         }
00085 
00086         public void setModelContext(InstancesHeader ih) {
00087                 if ((ih != null) && (ih.classIndex() < 0)) {
00088                         throw new IllegalArgumentException(
00089                                         "Context for a Clusterer must include a class to learn");
00090                 }
00091                 if (trainingHasStarted()
00092                                 && (this.modelContext != null)
00093                                 && ((ih == null) || !contextIsCompatible(this.modelContext, ih))) {
00094                         throw new IllegalArgumentException(
00095                                         "New context is not compatible with existing model");
00096                 }
00097                 this.modelContext = ih;
00098         }
00099 
00100         public InstancesHeader getModelContext() {
00101                 return this.modelContext;
00102         }
00103 
00104         public void setRandomSeed(int s) {
00105                 this.randomSeed = s;
00106                 if (this.randomSeedOption != null) {
00107                         // keep option consistent
00108                         this.randomSeedOption.setValue(s);
00109                 }
00110         }
00111 
00112         public boolean trainingHasStarted() {
00113                 return this.trainingWeightSeenByModel > 0.0;
00114         }
00115 
00116         public double trainingWeightSeenByModel() {
00117                 return this.trainingWeightSeenByModel;
00118         }
00119 
00120         public void resetLearning() {
00121                 this.trainingWeightSeenByModel = 0.0;
00122                 if (isRandomizable()) {
00123                         this.clustererRandom = new Random(this.randomSeed);
00124                 }
00125                 resetLearningImpl();
00126         }
00127 
00128         public void trainOnInstance(Instance inst) {
00129                 if (inst.weight() > 0.0) {
00130                         this.trainingWeightSeenByModel += inst.weight();
00131                         trainOnInstanceImpl(inst);
00132                 }
00133         }
00134 
00135         public Measurement[] getModelMeasurements() {
00136                 List<Measurement> measurementList = new LinkedList<Measurement>();
00137                 measurementList.add(new Measurement("model training instances",
00138                                 trainingWeightSeenByModel()));
00139                 measurementList.add(new Measurement("model serialized size (bytes)",
00140                                 measureByteSize()));
00141                 Measurement[] modelMeasurements = getModelMeasurementsImpl();
00142                 if (modelMeasurements != null) {
00143                         for (Measurement measurement : modelMeasurements) {
00144                                 measurementList.add(measurement);
00145                         }
00146                 }
00147                 // add average of sub-model measurements
00148                 Clusterer[] subModels = getSubClusterers();
00149                 if ((subModels != null) && (subModels.length > 0)) {
00150                         List<Measurement[]> subMeasurements = new LinkedList<Measurement[]>();
00151                         for (Clusterer subModel : subModels) {
00152                                 if (subModel != null) {
00153                                         subMeasurements.add(subModel.getModelMeasurements());
00154                                 }
00155                         }
00156                         Measurement[] avgMeasurements = Measurement
00157                                         .averageMeasurements(subMeasurements
00158                                                         .toArray(new Measurement[subMeasurements.size()][]));
00159                         for (Measurement measurement : avgMeasurements) {
00160                                 measurementList.add(measurement);
00161                         }
00162                 }
00163                 return measurementList.toArray(new Measurement[measurementList.size()]);
00164         }
00165 
00166         public void getDescription(StringBuilder out, int indent) {
00167                 StringUtils.appendIndented(out, indent, "Model type: ");
00168                 out.append(this.getClass().getName());
00169                 StringUtils.appendNewline(out);
00170                 Measurement.getMeasurementsDescription(getModelMeasurements(), out,
00171                                 indent);
00172                 StringUtils.appendNewlineIndented(out, indent, "Model description:");
00173                 StringUtils.appendNewline(out);
00174                 if (trainingHasStarted()) {
00175                         getModelDescription(out, indent);
00176                 } else {
00177                         StringUtils.appendIndented(out, indent,
00178                                         "Model has not been trained.");
00179                 }
00180         }
00181 
00182         public Clusterer[] getSubClusterers() {
00183                 return null;
00184         }
00185 
00186         @Override
00187         public Clusterer copy() {
00188                 return (Clusterer) super.copy();
00189         }
00190 
00191 //      public boolean correctlyClassifies(Instance inst) {
00192 //              return Utils.maxIndex(getVotesForInstance(inst)) == (int) inst
00193 //                              .classValue();
00194 //      }
00195 
00196         public String getClassNameString() {
00197                 return InstancesHeader.getClassNameString(this.modelContext);
00198         }
00199 
00200         public String getClassLabelString(int classLabelIndex) {
00201                 return InstancesHeader.getClassLabelString(this.modelContext,
00202                                 classLabelIndex);
00203         }
00204 
00205         public String getAttributeNameString(int attIndex) {
00206                 return InstancesHeader.getAttributeNameString(this.modelContext,
00207                                 attIndex);
00208         }
00209 
00210         public String getNominalValueString(int attIndex, int valIndex) {
00211                 return InstancesHeader.getNominalValueString(this.modelContext,
00212                                 attIndex, valIndex);
00213         }
00214 
00215         // originalContext notnull
00216         // newContext notnull
00217         public static boolean contextIsCompatible(InstancesHeader originalContext,
00218                         InstancesHeader newContext) {
00219                 // rule 1: num classes can increase but never decrease
00220                 // rule 2: num attributes can increase but never decrease
00221                 // rule 3: num nominal attribute values can increase but never decrease
00222                 // rule 4: attribute types must stay in the same order (although class
00223                 // can
00224                 // move; is always skipped over)
00225                 // attribute names are free to change, but should always still represent
00226                 // the original attributes
00227                 if (newContext.numClasses() < originalContext.numClasses()) {
00228                         return false; // rule 1
00229                 }
00230                 if (newContext.numAttributes() < originalContext.numAttributes()) {
00231                         return false; // rule 2
00232                 }
00233                 int oPos = 0;
00234                 int nPos = 0;
00235                 while (oPos < originalContext.numAttributes()) {
00236                         if (oPos == originalContext.classIndex()) {
00237                                 oPos++;
00238                                 if (!(oPos < originalContext.numAttributes())) {
00239                                         break;
00240                                 }
00241                         }
00242                         if (nPos == newContext.classIndex()) {
00243                                 nPos++;
00244                         }
00245                         if (originalContext.attribute(oPos).isNominal()) {
00246                                 if (!newContext.attribute(nPos).isNominal()) {
00247                                         return false; // rule 4
00248                                 }
00249                                 if (newContext.attribute(nPos).numValues() < originalContext
00250                                                 .attribute(oPos).numValues()) {
00251                                         return false; // rule 3
00252                                 }
00253                         } else {
00254                                 assert (originalContext.attribute(oPos).isNumeric());
00255                                 if (!newContext.attribute(nPos).isNumeric()) {
00256                                         return false; // rule 4
00257                                 }
00258                         }
00259                         oPos++;
00260                         nPos++;
00261                 }
00262                 return true; // all checks clear
00263         }
00264 
00265         public AWTRenderer getAWTRenderer() {
00266                 // TODO should return a default renderer here
00267                 // - or should null be interpreted as the default?
00268                 return null;
00269         }
00270 
00271         // reason for ...Impl methods:
00272         // ease programmer burden by not requiring them to remember calls to super
00273         // in overridden methods & will produce compiler errors if not overridden
00274 
00275         public abstract void resetLearningImpl();
00276 
00277         public abstract void trainOnInstanceImpl(Instance inst);
00278 
00279         protected abstract Measurement[] getModelMeasurementsImpl();
00280 
00281         public abstract void getModelDescription(StringBuilder out, int indent);
00282 
00283         protected static int modelAttIndexToInstanceAttIndex(int index,
00284                         Instance inst) {
00285                 return inst.classIndex() > index ? index : index + 1;
00286         }
00287 
00288         protected static int modelAttIndexToInstanceAttIndex(int index,
00289                         Instances insts) {
00290                 return insts.classIndex() > index ? index : index + 1;
00291         }
00292 
00293         public boolean  implementsMicroClusterer(){
00294             return false;
00295         }
00296 
00297         public boolean  keepClassLabel(){
00298             return false;
00299         }
00300         
00301         public Clustering getMicroClusteringResult(){
00302             return null;
00303         };
00304 }
 All Classes Namespaces Files Functions Variables Enumerations