MOA 12.03
Real Time Analytics for Data Streams
WekaClusteringAlgorithm.java
Go to the documentation of this file.
00001 /*
00002  *    WekaClusteringAlgorithm.java
00003  *    Copyright (C) 2010 RWTH Aachen University, Germany
00004  *    @author Jansen (moa@cs.rwth-aachen.de)
00005  *
00006  *    This program is free software; you can redistribute it and/or modify
00007  *    it under the terms of the GNU General Public License as published by
00008  *    the Free Software Foundation; either version 3 of the License, or
00009  *    (at your option) any later version.
00010  *
00011  *    This program is distributed in the hope that it will be useful,
00012  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *    GNU General Public License for more details.
00015  *
00016  *    You should have received a copy of the GNU General Public License
00017  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
00018  *    
00019  */
00020 
00021 package moa.clusterers;
00022 
00023 import moa.cluster.Clustering;
00024 import moa.core.AutoClassDiscovery;
00025 import moa.core.AutoExpandVector;
00026 import moa.core.Measurement;
00027 import moa.options.ClassOption;
00028 import moa.options.IntOption;
00029 import moa.options.MultiChoiceOption;
00030 import moa.options.StringOption;
00031 
00032 import weka.core.Attribute;
00033 import weka.core.DenseInstance;
00034 import weka.core.FastVector;
00035 import weka.core.Instance;
00036 import weka.core.Instances;
00037 import weka.core.Utils;
00038 
00039 
00040 public class WekaClusteringAlgorithm extends AbstractClusterer{
00041 
00042         private static final long serialVersionUID = 1L;
00043 
00044         public IntOption horizonOption = new IntOption("horizon",
00045                         'h', "Range of the window.", 1000);
00046 
00047         public MultiChoiceOption wekaAlgorithmOption;
00048         
00049         public StringOption parameterOption = new StringOption("parameter", 'p', 
00050                         "Parameters that will be passed to the weka algorithm. (e.g. '-N 5' for using SimpleKmeans with 5 clusters)", "-N 5 -S 8");
00051 
00052     
00053     private Class<?>[] clustererClasses;
00054     private Instances instances;
00055     private weka.clusterers.AbstractClusterer clusterer;
00056     
00057     public WekaClusteringAlgorithm() {
00058         clustererClasses = findWekaClustererClasses();
00059         String[] optionLabels = new String[clustererClasses.length];
00060         String[] optionDescriptions = new String[clustererClasses.length];
00061         
00062         for (int i = 0; i < clustererClasses.length; i++) {
00063                         optionLabels[i] = clustererClasses[i].getSimpleName();
00064                         optionDescriptions[i] = clustererClasses[i].getName();
00065 //                      We do have the parameter option info, but not really a place to show it somewhere
00066 /*
00067                         //System.out.println(clustererClasses[i].getSimpleName());
00068                         for (Class c : clustererClasses[i].getInterfaces()) {
00069                                 if (c.equals(weka.core.OptionHandler.class)) {
00070                                         try {
00071                                                         Enumeration options = ((weka.core.OptionHandler)clustererClasses[i].newInstance()).listOptions();
00072                                                         while(options.hasMoreElements()){
00073                                                                 weka.core.Option o = (weka.core.Option)options.nextElement(); 
00074                                                                 System.out.print(o.synopsis()+" ");     
00075                                                         } 
00076                                                         
00077                                                 } catch (InstantiationException e) {
00078                                                         e.printStackTrace();
00079                                                 } catch (IllegalAccessException e) {
00080                                                         e.printStackTrace();
00081                                                 }
00082                                         
00083                                 }
00084                             }
00085 */                          
00086                 }
00087 
00088         if(clustererClasses!=null && clustererClasses.length > 0){
00089                 wekaAlgorithmOption = new MultiChoiceOption("clusterer", 'w', 
00090                                 "Weka cluster algorithm to use.", 
00091                                 optionLabels, optionDescriptions, 6);
00092         }
00093         else{
00094                 horizonOption = null;
00095                 parameterOption = null;
00096                 
00097         }
00098         
00099         }
00100 
00101     @Override
00102     public void resetLearningImpl() {
00103         try {
00104                 instances = null;
00105                 String clistring = clustererClasses[wekaAlgorithmOption.getChosenIndex()].getName();
00106                 clusterer = (weka.clusterers.AbstractClusterer) ClassOption.cliStringToObject(clistring, weka.clusterers.Clusterer.class, null);
00107                 
00108                 String rawOptions = parameterOption.getValue();
00109                 String[] options = rawOptions.split(" ");
00110                 if(clusterer instanceof weka.core.OptionHandler){
00111                         ((weka.core.OptionHandler)clusterer).setOptions(options);
00112                         Utils.checkForRemainingOptions(options);
00113                 }
00114                 } catch (Exception e) {
00115                         e.printStackTrace();
00116                 }
00117     }
00118 
00119     @Override
00120     public void trainOnInstanceImpl(Instance inst) {
00121         if(instances == null){
00122                 instances =  getDataset(inst.numAttributes(), 0);
00123         }
00124         instances.add(inst);
00125     }
00126 
00127     public Clustering getClusteringResult() {
00128         Clustering clustering = null;
00129         
00130         try {
00131                 clusterer.buildClusterer(instances);
00132                 int numClusters = clusterer.numberOfClusters();
00133                 Instances dataset = getDataset(instances.numAttributes(), numClusters);
00134                 Instances newInstances = new Instances(dataset);
00135                 
00136                 for (int i = 0; i < instances.numInstances(); i++) {
00137                         Instance inst = instances.get(i);
00138                         int cnum = clusterer.clusterInstance(inst);
00139                         
00140                         Instance newInst = new DenseInstance(inst);
00141                         newInst.insertAttributeAt(inst.numAttributes());
00142                         newInst.setDataset(dataset);
00143                         newInst.setClassValue(cnum);
00144                         newInstances.add(newInst);
00145                         }
00146                 clustering = new Clustering(newInstances);
00147                         
00148                 } catch (Exception e) {
00149                         e.printStackTrace();
00150                 }
00151         instances = null;
00152 
00153         return clustering;
00154     }
00155 
00156     
00157     public Instances getDataset(int numdim, int numclass) {
00158         FastVector attributes = new FastVector();
00159         for (int i = 0; i < numdim; i++) {
00160             attributes.addElement(new Attribute("att" + (i + 1)));
00161         }
00162         
00163         if(numclass > 0){
00164                 FastVector classLabels = new FastVector();
00165                 for (int i = 0; i < numclass; i++) {
00166                     classLabels.addElement("class" + (i + 1));
00167                 }
00168                 attributes.addElement(new Attribute("class", classLabels));
00169         }
00170 
00171         Instances myDataset = new Instances("horizion", attributes, 0);
00172         if(numclass > 0){
00173                 myDataset.setClassIndex(myDataset.numAttributes() - 1);
00174         }
00175                 
00176         return myDataset;
00177     }  
00178     
00179     private Class<?>[] findWekaClustererClasses() {
00180         AutoExpandVector<Class<?>> finalClasses = new AutoExpandVector<Class<?>>();
00181         Class<?>[] classesFound = AutoClassDiscovery.findClassesOfType("weka.clusterers",
00182                         weka.clusterers.AbstractClusterer.class);
00183         for (Class<?> foundClass : classesFound) {
00184                 finalClasses.add(foundClass);
00185         }
00186         return finalClasses.toArray(new Class<?>[finalClasses.size()]);
00187 }
00188     
00189     
00190     @Override
00191     protected Measurement[] getModelMeasurementsImpl() {
00192         throw new UnsupportedOperationException("Not supported yet.");
00193     }
00194 
00195     @Override
00196     public void getModelDescription(StringBuilder out, int indent) {
00197     }
00198 
00199     public boolean isRandomizable() {
00200         return false;
00201     }
00202 
00203     public double[] getVotesForInstance(Instance inst) {
00204         return null;
00205     }
00206     
00207     @Override
00208     public boolean keepClassLabel(){
00209         return false;
00210     }
00211 
00212         @Override
00213         public String getPurposeString() {
00214                 String purpose = "MOA Clusterer: " + getClass().getCanonicalName();
00215                 if(clustererClasses==null || clustererClasses.length == 0)
00216                         purpose+="\nPlease add weka.jar to the classpath to use Weka clustering algorithms.";           
00217                 return purpose; 
00218 
00219         }    
00220     
00221 }
00222 
00223 
 All Classes Namespaces Files Functions Variables Enumerations