MOA 12.03
Real Time Analytics for Data Streams
CobWeb.java
Go to the documentation of this file.
00001 /*
00002  *    CobWeb.java
00003  *    Copyright (C) 2009 University of Waikato, Hamilton, New Zealand
00004  *    @author Mark Hall (mhall@cs.waikato.ac.nz)
00005  * 
00006  *    This program is free software; you can redistribute it and/or modify
00007  *    it under the terms of the GNU General Public License as published by
00008  *    the Free Software Foundation; either version 3 of the License, or
00009  *    (at your option) any later version.
00010  *
00011  *    This program is distributed in the hope that it will be useful,
00012  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *    GNU General Public License for more details.
00015  *
00016  *    You should have received a copy of the GNU General Public License
00017  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
00018  *    
00019  */
00020 package moa.clusterers;
00021 
00022 import java.io.Serializable;
00023 
00024 import moa.cluster.Clustering;
00025 import moa.cluster.SphereCluster;
00026 import moa.core.Measurement;
00027 import moa.core.StringUtils;
00028 import moa.options.FloatOption;
00029 import moa.options.IntOption;
00030 import weka.core.AttributeStats;
00031 import weka.core.FastVector;
00032 import weka.core.Instance;
00033 import weka.core.Instances;
00034 import weka.experiment.Stats;
00035 import weka.filters.unsupervised.attribute.Add;
00036 
00037 public class CobWeb extends AbstractClusterer {
00038 
00039     private static final long serialVersionUID = 1L;
00040     public FloatOption acuityOption = new FloatOption("acuity",
00041             'a', "Acuity (minimum standard deviation)", 1.0, 0.0, 90.0);
00042     public FloatOption cutoffOption = new FloatOption("cutoff",
00043             'c', "Cutoff (minimum category utility)", 0.002, 0.0, 90.0); //0.01 * Cobweb.m_normal
00044     public IntOption randomSeedOption = new IntOption("randomSeed", 'r',
00045             "Seed for random noise.", 1);       //42
00046 
00052     private class CNode implements Serializable {
00053 
00055         static final long serialVersionUID = 3452097436933325631L;
00059         private AttributeStats[] m_attStats;
00063         private int m_numAttributes;
00067         protected Instances m_clusterInstances = null;
00071         private FastVector m_children = null;
00075         private double m_totalInstances = 0.0;
00079         private int m_clusterNum = -1;
00080 
00086         public CNode(int numAttributes) {
00087             m_numAttributes = numAttributes;
00088         }
00089 
00096         public CNode(int numAttributes, Instance leafInstance) {
00097             this(numAttributes);
00098             if (m_clusterInstances == null) {
00099                 //System.out.println(leafInstance.numAttributes()+"-"+leafInstance.value(0)+"-"+leafInstance.value(1)+"-"+leafInstance.value(2));
00100                 //System.out.println(leafInstance.numAttributes()+"-"+leafInstance.attribute(0).type()+"-"+leafInstance.attribute(1).type()+"-"+leafInstance.attribute(2).type());
00101                 m_clusterInstances = new Instances(leafInstance.dataset(), 1);
00102             }
00103             m_clusterInstances.add(leafInstance);
00104             updateStats(leafInstance, false);
00105         }
00106 
00112         protected void addInstance(Instance newInstance) {
00113             // Add the instance to this cluster
00114 
00115             if (m_clusterInstances == null) {
00116                 m_clusterInstances = new Instances(newInstance.dataset(), 1);
00117                 m_clusterInstances.add(newInstance);
00118                 updateStats(newInstance, false);
00119                 return;
00120             } else if (m_children == null) {
00121                 /* we are a leaf, so make our existing instance(s) into a child
00122                 and then add the new instance as a child */
00123                 m_children = new FastVector();
00124                 CNode tempSubCluster = new CNode(m_numAttributes,
00125                         m_clusterInstances.instance(0));
00126 
00127                 //      System.out.println("Dumping "+m_clusterInstances.numInstances());
00128                 for (int i = 1; i < m_clusterInstances.numInstances(); i++) {
00129                     tempSubCluster.m_clusterInstances.add(m_clusterInstances.instance(i));
00130                     tempSubCluster.updateStats(m_clusterInstances.instance(i), false);
00131                 }
00132                 m_children = new FastVector();
00133                 m_children.addElement(tempSubCluster);
00134                 m_children.addElement(new CNode(m_numAttributes, newInstance));
00135 
00136                 m_clusterInstances.add(newInstance);
00137                 updateStats(newInstance, false);
00138 
00139                 // here is where we check against cutoff (also check cutoff
00140                 // in findHost)
00141                 if (categoryUtility() < m_cutoff) {
00142                     //    System.out.println("Cutting (leaf add) ");
00143                     m_children = null;
00144                 }
00145                 return;
00146             }
00147 
00148             // otherwise, find the best host for this instance
00149             CNode bestHost = findHost(newInstance, false);
00150             if (bestHost != null) {
00151                 // now add to the best host
00152                 bestHost.addInstance(newInstance);
00153             }
00154         }
00155 
00165         private double[] cuScoresForChildren(Instance newInstance) {
00166             //throws Exception {
00167             // look for a host in existing children
00168             double[] categoryUtils = new double[m_children.size()];
00169 
00170             // look for a home for this instance in the existing children
00171             for (int i = 0; i < m_children.size(); i++) {
00172                 CNode temp = (CNode) m_children.elementAt(i);
00173                 // tentitively add the new instance to this child
00174                 temp.updateStats(newInstance, false);
00175                 categoryUtils[i] = categoryUtility();
00176 
00177                 // remove the new instance from this child
00178                 temp.updateStats(newInstance, true);
00179             }
00180             return categoryUtils;
00181         }
00182 
00183         private double cuScoreForBestTwoMerged(CNode merged,
00184                 CNode a, CNode b,
00185                 Instance newInstance) {//throws Exception {
00186 
00187             double mergedCU = -Double.MAX_VALUE;
00188             // consider merging the best and second
00189             // best.
00190             merged.m_clusterInstances = new Instances(m_clusterInstances, 1);
00191 
00192             merged.addChildNode(a);
00193             merged.addChildNode(b);
00194             merged.updateStats(newInstance, false); // add new instance to stats
00195             // remove the best and second best nodes
00196             m_children.removeElementAt(m_children.indexOf(a));
00197             m_children.removeElementAt(m_children.indexOf(b));
00198             m_children.addElement(merged);
00199             mergedCU = categoryUtility();
00200             // restore the status quo
00201             merged.updateStats(newInstance, true);
00202             m_children.removeElementAt(m_children.indexOf(merged));
00203             m_children.addElement(a);
00204             m_children.addElement(b);
00205             return mergedCU;
00206         }
00207 
00218         private CNode findHost(Instance newInstance,
00219                 boolean structureFrozen) {//throws Exception {
00220 
00221             if (!structureFrozen) {
00222                 updateStats(newInstance, false);
00223             }
00224 
00225             // look for a host in existing children and also consider as a new leaf
00226             double[] categoryUtils = cuScoresForChildren(newInstance);
00227 
00228             // make a temporary new leaf for this instance and get CU
00229             CNode newLeaf = new CNode(m_numAttributes, newInstance);
00230             m_children.addElement(newLeaf);
00231             double bestHostCU = categoryUtility();
00232             CNode finalBestHost = newLeaf;
00233 
00234             // remove new leaf when seaching for best and second best nodes to
00235             // consider for merging and splitting
00236             m_children.removeElementAt(m_children.size() - 1);
00237 
00238             // now determine the best host (and the second best)
00239             int best = 0;
00240             int secondBest = 0;
00241             for (int i = 0; i < categoryUtils.length; i++) {
00242                 if (categoryUtils[i] > categoryUtils[secondBest]) {
00243                     if (categoryUtils[i] > categoryUtils[best]) {
00244                         secondBest = best;
00245                         best = i;
00246                     } else {
00247                         secondBest = i;
00248                     }
00249                 }
00250             }
00251 
00252             CNode a = (CNode) m_children.elementAt(best);
00253             CNode b = (CNode) m_children.elementAt(secondBest);
00254             if (categoryUtils[best] > bestHostCU) {
00255                 bestHostCU = categoryUtils[best];
00256                 finalBestHost = a;
00257                 //      System.out.println("Node is best");
00258             }
00259 
00260             if (structureFrozen) {
00261                 if (finalBestHost == newLeaf) {
00262                     return null; // *this* node is the best host
00263                 } else {
00264                     return finalBestHost;
00265                 }
00266             }
00267 
00268             double mergedCU = -Double.MAX_VALUE;
00269             CNode merged = new CNode(m_numAttributes);
00270             if (a != b) {
00271                 mergedCU = cuScoreForBestTwoMerged(merged, a, b, newInstance);
00272 
00273                 if (mergedCU > bestHostCU) {
00274                     bestHostCU = mergedCU;
00275                     finalBestHost = merged;
00276                 }
00277             }
00278 
00279             // Consider splitting the best
00280             double splitCU = -Double.MAX_VALUE;
00281             double splitBestChildCU = -Double.MAX_VALUE;
00282             double splitPlusNewLeafCU = -Double.MAX_VALUE;
00283             double splitPlusMergeBestTwoCU = -Double.MAX_VALUE;
00284             if (a.m_children != null) {
00285                 FastVector tempChildren = new FastVector();
00286 
00287                 for (int i = 0; i < m_children.size(); i++) {
00288                     CNode existingChild = (CNode) m_children.elementAt(i);
00289                     if (existingChild != a) {
00290                         tempChildren.addElement(existingChild);
00291                     }
00292                 }
00293                 for (int i = 0; i < a.m_children.size(); i++) {
00294                     CNode promotedChild = (CNode) a.m_children.elementAt(i);
00295                     tempChildren.addElement(promotedChild);
00296                 }
00297                 // also add the new leaf
00298                 tempChildren.addElement(newLeaf);
00299 
00300                 FastVector saveStatusQuo = m_children;
00301                 m_children = tempChildren;
00302                 splitPlusNewLeafCU = categoryUtility(); // split + new leaf
00303                 // remove the new leaf
00304                 tempChildren.removeElementAt(tempChildren.size() - 1);
00305                 // now look for best and second best
00306                 categoryUtils = cuScoresForChildren(newInstance);
00307 
00308                 // now determine the best host (and the second best)
00309                 best = 0;
00310                 secondBest = 0;
00311                 for (int i = 0; i < categoryUtils.length; i++) {
00312                     if (categoryUtils[i] > categoryUtils[secondBest]) {
00313                         if (categoryUtils[i] > categoryUtils[best]) {
00314                             secondBest = best;
00315                             best = i;
00316                         } else {
00317                             secondBest = i;
00318                         }
00319                     }
00320                 }
00321                 CNode sa = (CNode) m_children.elementAt(best);
00322                 CNode sb = (CNode) m_children.elementAt(secondBest);
00323                 splitBestChildCU = categoryUtils[best];
00324 
00325                 // now merge best and second best
00326                 CNode mergedSplitChildren = new CNode(m_numAttributes);
00327                 if (sa != sb) {
00328                     splitPlusMergeBestTwoCU =
00329                             cuScoreForBestTwoMerged(mergedSplitChildren, sa, sb, newInstance);
00330                 }
00331                 splitCU = (splitBestChildCU > splitPlusNewLeafCU)
00332                         ? splitBestChildCU : splitPlusNewLeafCU;
00333                 splitCU = (splitCU > splitPlusMergeBestTwoCU)
00334                         ? splitCU : splitPlusMergeBestTwoCU;
00335 
00336                 if (splitCU > bestHostCU) {
00337                     bestHostCU = splitCU;
00338                     finalBestHost = this;
00339                     //    tempChildren.removeElementAt(tempChildren.size()-1);
00340                 } else {
00341                     // restore the status quo
00342                     m_children = saveStatusQuo;
00343                 }
00344             }
00345 
00346             if (finalBestHost != this) {
00347                 // can commit the instance to the set of instances at this node
00348                 m_clusterInstances.add(newInstance);
00349             } else {
00350                 m_numberSplits++;
00351             }
00352 
00353             if (finalBestHost == merged) {
00354                 m_numberMerges++;
00355                 m_children.removeElementAt(m_children.indexOf(a));
00356                 m_children.removeElementAt(m_children.indexOf(b));
00357                 m_children.addElement(merged);
00358             }
00359 
00360             if (finalBestHost == newLeaf) {
00361                 finalBestHost = new CNode(m_numAttributes);
00362                 m_children.addElement(finalBestHost);
00363             }
00364 
00365             if (bestHostCU < m_cutoff) {
00366                 if (finalBestHost == this) {
00367                     // splitting was the best, but since we are cutting all children
00368                     // recursion is aborted and we still need to add the instance
00369                     // to the set of instances at this node
00370                     m_clusterInstances.add(newInstance);
00371                 }
00372                 m_children = null;
00373                 finalBestHost = null;
00374             }
00375 
00376             if (finalBestHost == this) {
00377                 // splitting is still the best, so downdate the stats as
00378                 // we'll be recursively calling on this node
00379                 updateStats(newInstance, true);
00380             }
00381 
00382             return finalBestHost;
00383         }
00384 
00391         protected void addChildNode(CNode child) {
00392             for (int i = 0; i < child.m_clusterInstances.numInstances(); i++) {
00393                 Instance temp = child.m_clusterInstances.instance(i);
00394                 m_clusterInstances.add(temp);
00395                 updateStats(temp, false);
00396             }
00397 
00398             if (m_children == null) {
00399                 m_children = new FastVector();
00400             }
00401             m_children.addElement(child);
00402         }
00403 
00410         protected double categoryUtility() {// {throws Exception {
00411 
00412             // if (m_children == null) {
00413             //throw new Exception("categoryUtility: No children!");
00414             // }
00415 
00416             double totalCU = 0;
00417 
00418             for (int i = 0; i < m_children.size(); i++) {
00419                 CNode child = (CNode) m_children.elementAt(i);
00420                 totalCU += categoryUtilityChild(child);
00421             }
00422 
00423             totalCU /= (double) m_children.size();
00424             return totalCU;
00425         }
00426 
00434         protected double categoryUtilityChild(CNode child) {//throws Exception {
00435 
00436             double sum = 0;
00437             for (int i = 0; i < m_numAttributes; i++) {
00438                 if (m_clusterInstances.attribute(i).isNominal()) {
00439                     for (int j = 0;
00440                             j < m_clusterInstances.attribute(i).numValues(); j++) {
00441                         double x = child.getProbability(i, j);
00442                         double y = getProbability(i, j);
00443                         sum += (x * x) - (y * y);
00444                     }
00445                 } else {
00446                     // numeric attribute
00447                     sum += ((m_normal / child.getStandardDev(i))
00448                             - (m_normal / getStandardDev(i)));
00449 
00450                 }
00451             }
00452             return (child.m_totalInstances / m_totalInstances) * sum;
00453         }
00454 
00463         protected double getProbability(int attIndex, int valueIndex) {
00464             //throws Exception {
00465 
00466             //  if (!m_clusterInstances.attribute(attIndex).isNominal()) {
00467             //throw new Exception("getProbability: attribute is not nominal");
00468             //  }
00469 
00470             if (m_attStats[attIndex].totalCount <= 0) {
00471                 return 0;
00472             }
00473 
00474             return (double) m_attStats[attIndex].nominalCounts[valueIndex]
00475                     / (double) m_attStats[attIndex].totalCount;
00476         }
00477 
00485         protected double getStandardDev(int attIndex) { //throws Exception {
00486             //  if (!m_clusterInstances.attribute(attIndex).isNumeric()) {
00487             //throw new Exception("getStandardDev: attribute is not numeric");
00488             // }
00489 
00490             m_attStats[attIndex].numericStats.calculateDerived();
00491             double stdDev = m_attStats[attIndex].numericStats.stdDev;
00492             if (Double.isNaN(stdDev) || Double.isInfinite(stdDev)) {
00493                 return m_acuity;
00494             }
00495 
00496             return Math.max(m_acuity, stdDev);
00497         }
00498 
00506         protected void updateStats(Instance updateInstance,
00507                 boolean delete) {
00508 
00509             if (m_attStats == null) {
00510                 m_attStats = new AttributeStats[m_numAttributes];
00511                 for (int i = 0; i < m_numAttributes; i++) {
00512                     m_attStats[i] = new AttributeStats();
00513                     if (m_clusterInstances.attribute(i).isNominal()) {
00514                         m_attStats[i].nominalCounts =
00515                                 new int[m_clusterInstances.attribute(i).numValues()];
00516                     } else {
00517                         m_attStats[i].numericStats = new Stats();
00518                     }
00519                 }
00520             }
00521             for (int i = 0; i < m_numAttributes; i++) {
00522                 if (!updateInstance.isMissing(i)) {
00523                     double value = updateInstance.value(i);
00524                     if (m_clusterInstances.attribute(i).isNominal()) {
00525                         m_attStats[i].nominalCounts[(int) value] += (delete)
00526                                 ? (-1.0 * updateInstance.weight())
00527                                 : updateInstance.weight();
00528                         m_attStats[i].totalCount += (delete)
00529                                 ? (-1.0 * updateInstance.weight())
00530                                 : updateInstance.weight();
00531                     } else {
00532                         if (delete) {
00533                             m_attStats[i].numericStats.subtract(value,
00534                                     updateInstance.weight());
00535                         } else {
00536                             m_attStats[i].numericStats.add(value, updateInstance.weight());
00537                         }
00538                     }
00539                 }
00540             }
00541             m_totalInstances += (delete)
00542                     ? (-1.0 * updateInstance.weight())
00543                     : (updateInstance.weight());
00544         }
00545 
00552         private void assignClusterNums(int[] cl_num) { //throws Exception {
00553             // if (m_children != null && m_children.size() < 2) {
00554             //throw new Exception("assignClusterNums: tree not built correctly!");
00555             // }
00556 
00557             m_clusterNum = cl_num[0];
00558             cl_num[0]++;
00559             if (m_children != null) {
00560                 for (int i = 0; i < m_children.size(); i++) {
00561                     CNode child = (CNode) m_children.elementAt(i);
00562                     child.assignClusterNums(cl_num);
00563                 }
00564             }
00565         }
00566 
00573         protected void dumpTree(int depth, StringBuffer text) {
00574 
00575             if (depth == 0) {
00576                 determineNumberOfClusters();
00577             }
00578 
00579             if (m_children == null) {
00580                 text.append("\n");
00581                 for (int j = 0; j < depth; j++) {
00582                     text.append("|   ");
00583                 }
00584                 text.append("leaf " + m_clusterNum + " ["
00585                         + m_clusterInstances.numInstances() + "]");
00586             } else {
00587                 for (int i = 0; i < m_children.size(); i++) {
00588                     text.append("\n");
00589                     for (int j = 0; j < depth; j++) {
00590                         text.append("|   ");
00591                     }
00592                     text.append("node " + m_clusterNum + " ["
00593                             + m_clusterInstances.numInstances()
00594                             + "]");
00595                     ((CNode) m_children.elementAt(i)).dumpTree(depth + 1, text);
00596                 }
00597             }
00598         }
00599 
00607         protected String dumpData() { //throws Exception {
00608             if (m_children == null) {
00609                 return m_clusterInstances.toString();
00610             }
00611 
00612             // construct instances string with cluster numbers attached
00613             CNode tempNode = new CNode(m_numAttributes);
00614             tempNode.m_clusterInstances = new Instances(m_clusterInstances, 1);
00615             for (int i = 0; i < m_children.size(); i++) {
00616                 tempNode.addChildNode((CNode) m_children.elementAt(i));
00617             }
00618             Instances tempInst = tempNode.m_clusterInstances;
00619             tempNode = null;
00620 
00621             Add af = new Add();
00622             af.setAttributeName("Cluster");
00623             String labels = "";
00624             for (int i = 0; i < m_children.size(); i++) {
00625                 CNode temp = (CNode) m_children.elementAt(i);
00626                 labels += ("C" + temp.m_clusterNum);
00627                 if (i < m_children.size() - 1) {
00628                     labels += ",";
00629                 }
00630             }
00631             af.setNominalLabels(labels);
00632             //af.setInputFormat(tempInst);
00633             //tempInst = Filter.useFilter(tempInst, af);
00634             tempInst.setRelationName("Cluster " + m_clusterNum);
00635 
00636             int z = 0;
00637             for (int i = 0; i < m_children.size(); i++) {
00638                 CNode temp = (CNode) m_children.elementAt(i);
00639                 for (int j = 0; j < temp.m_clusterInstances.numInstances(); j++) {
00640                     tempInst.instance(z).setValue(m_numAttributes, (double) i);
00641                     z++;
00642                 }
00643             }
00644             return tempInst.toString();
00645         }
00646 
00653         protected void graphTree(StringBuffer text) { //throws Exception {
00654 
00655             text.append("N" + m_clusterNum
00656                     + " [label=\"" + ((m_children == null)
00657                     ? "leaf " : "node ")
00658                     + m_clusterNum + " "
00659                     + " (" + m_clusterInstances.numInstances()
00660                     + ")\" "
00661                     + ((m_children == null)
00662                     ? "shape=box style=filled " : "")
00663                     + (m_saveInstances
00664                     ? "data =\n" + dumpData() + "\n,\n"
00665                     : "")
00666                     + "]\n");
00667             if (m_children != null) {
00668                 for (int i = 0; i < m_children.size(); i++) {
00669                     CNode temp = (CNode) m_children.elementAt(i);
00670                     text.append("N" + m_clusterNum
00671                             + "->"
00672                             + "N" + temp.m_clusterNum
00673                             + "\n");
00674                 }
00675 
00676                 for (int i = 0; i < m_children.size(); i++) {
00677                     CNode temp = (CNode) m_children.elementAt(i);
00678                     temp.graphTree(text);
00679                 }
00680             }
00681         }
00682 
00689         protected void computeTreeClustering(int depth, Clustering clustering) {
00690 
00691             if (depth == 0) {
00692                 determineNumberOfClusters();
00693             }
00694 
00695             if (m_children == null) {
00696                 //Append Cluster
00697                 /*text.append("\n");
00698                 for (int j = 0; j < depth; j++) {
00699                     text.append("|   ");
00700                 }
00701                 text.append("leaf " + m_clusterNum + " ["
00702                         + m_clusterInstances.numInstances() + "]");
00703                 clustering.add(SphereCluster(this.coordinates, .05, m_clusterInstances.numInstances()));*/
00704                     if (depth == 0) {
00705                             double [] centroidCoordinates = new double[m_clusterInstances.numAttributes()];
00706                             for (int j = 0; j < m_clusterInstances.numAttributes()-1; j++) {                                            
00707                                 centroidCoordinates[j] = m_clusterInstances.meanOrMode(j);      
00708                             }
00709                             clustering.add(new SphereCluster(centroidCoordinates, .05, m_clusterInstances.numInstances()));
00710                     }
00711             } else {
00712                 for (int i = 0; i < m_children.size(); i++) {
00713                     /*text.append("\n");
00714                     for (int j = 0; j < depth; j++) {
00715                         text.append("|   ");
00716                     }
00717                     text.append("node " + m_clusterNum + " ["
00718                             + m_clusterInstances.numInstances()
00719                             + "]");*/
00720                     double [] centroidCoordinates = new double[m_clusterInstances.numAttributes()];
00721                     for (int j = 0; j < m_clusterInstances.numAttributes()-1; j++) {                                            
00722                         centroidCoordinates[j] = m_clusterInstances.meanOrMode(j);      
00723                     }
00724                     clustering.add(new SphereCluster(centroidCoordinates, .05, m_clusterInstances.numInstances()));
00725                     ((CNode) m_children.elementAt(i)).computeTreeClustering(depth + 1, clustering);
00726                 }
00727             }
00728         }
00729     }
00733     protected static final double m_normal = 1.0 / (2 * Math.sqrt(Math.PI));
00737     protected double m_acuity = 1.0;
00741     protected double m_cutoff = 0.002;//0.01 * Cobweb.m_normal;
00745     protected CNode m_cobwebTree = null;
00754     protected int m_numberOfClusters = -1;
00756     protected boolean m_numberOfClustersDetermined = false;
00758     protected int m_numberSplits;
00760     protected int m_numberMerges;
00765     protected boolean m_saveInstances = false;
00766     @SuppressWarnings("hiding")
00767     public static final String classifierPurposeString = "Cobweb and Classit clustering algorithms: it always compares the best host, adding a new leaf, merging the two best hosts, and splitting the best host when considering where to place a new instance..";
00768 
00769     @Override
00770     public void resetLearningImpl() {
00771         setAcuity(this.acuityOption.getValue());
00772         setCutoff(this.cutoffOption.getValue());
00773         m_numberOfClusters = -1;
00774         m_cobwebTree = null;
00775         m_numberSplits = 0;
00776         m_numberMerges = 0;
00777     }
00778 
00785     // public void updateClusterer(Instance newInstance) throws Exception {
00786     @Override
00787     public void trainOnInstanceImpl(Instance newInstance) { //throws Exception {
00788         m_numberOfClustersDetermined = false;
00789 
00790         if (m_cobwebTree == null) {
00791             m_cobwebTree = new CNode(newInstance.numAttributes(), newInstance);
00792         } else {
00793             m_cobwebTree.addInstance(newInstance);
00794         }
00795     }
00796 
00806     public double[] getVotesForInstance(Instance instance) {
00807         //public int clusterInstance(Instance instance) {//throws Exception {
00808         CNode host = m_cobwebTree;
00809         CNode temp = null;
00810 
00811         determineNumberOfClusters();
00812 
00813         if (this.m_numberOfClusters < 1) {
00814             return (new double[0]);
00815         }
00816         double[] ret = new double[this.m_numberOfClusters];
00817 
00818         do {
00819             if (host.m_children == null) {
00820                 temp = null;
00821                 break;
00822             }
00823 
00824             host.updateStats(instance, false);
00825             temp = host.findHost(instance, true);
00826             host.updateStats(instance, true);
00827 
00828             if (temp != null) {
00829                 host = temp;
00830             }
00831         } while (temp != null);
00832 
00833         ret[host.m_clusterNum] = 1.0;
00834         return ret;
00835     }
00836 
00843     protected void determineNumberOfClusters() {
00844         if (!m_numberOfClustersDetermined
00845                 && (m_cobwebTree != null)) {
00846             int[] numClusts = new int[1];
00847             numClusts[0] = 0;
00848             //  try {
00849             m_cobwebTree.assignClusterNums(numClusts);
00850             // }
00851             // catch (Exception e) {
00852 //      e.printStackTrace();
00853 //      numClusts[0] = 0;
00854             // }
00855             m_numberOfClusters = numClusts[0];
00856 
00857             m_numberOfClustersDetermined = true;
00858         }
00859     }
00860 
00866     public int numberOfClusters() {
00867         determineNumberOfClusters();
00868         return m_numberOfClusters;
00869     }
00870 
00871     {
00872 //              return this.observedClassDistribution.getArrayCopy();
00873     }
00874 
00875     @Override
00876     protected Measurement[] getModelMeasurementsImpl() {
00877         return null;
00878     }
00879 
00880     @Override
00881     public void getModelDescription(StringBuilder out, int indent) {
00882         StringBuffer text = new StringBuffer();
00883         if (m_cobwebTree == null) {
00884             StringUtils.appendIndented(out, indent, "Cobweb hasn't been built yet!");
00885             StringUtils.appendNewline(out);
00886         } else {
00887             m_cobwebTree.dumpTree(0, text);
00888             StringUtils.appendIndented(out, indent, "CobWeb - ");
00889             out.append("Number of merges: "
00890                     + m_numberMerges + "\nNumber of splits: "
00891                     + m_numberSplits + "\nNumber of clusters: "
00892                     + numberOfClusters() + "\n" + text.toString());
00893             StringUtils.appendNewline(out);
00894         }
00895     }
00896 
00897     public boolean isRandomizable() {
00898         return false;
00899     }
00900 
00907     public String graph() {// throws Exception {
00908         StringBuffer text = new StringBuffer();
00909 
00910         text.append("digraph CobwebTree {\n");
00911         m_cobwebTree.graphTree(text);
00912         text.append("}\n");
00913         return text.toString();
00914     }
00915 
00920     public void setAcuity(double a) {
00921         m_acuity = a;
00922     }
00923 
00928     public double getAcuity() {
00929         return m_acuity;
00930     }
00931 
00936     public void setCutoff(double c) {
00937         m_cutoff = c;
00938     }
00939 
00944     public double getCutoff() {
00945         return m_cutoff;
00946     }
00947 
00953     public boolean getSaveInstanceData() {
00954 
00955         return m_saveInstances;
00956     }
00957 
00963     public void setSaveInstanceData(boolean newsaveInstances) {
00964 
00965         m_saveInstances = newsaveInstances;
00966     }
00967 
00968     public Clustering getClusteringResult() {
00969         //throw new UnsupportedOperationException("Not supported yet.");
00970         Clustering result = new Clustering();
00971         if (m_cobwebTree == null) {
00972             //StringUtils.appendIndented(out, indent, "Cobweb hasn't been built yet!");
00973             //StringUtils.appendNewline(out);
00974         } else {
00975             m_cobwebTree.computeTreeClustering(0,result);
00976             System.out.println("After Number of clusters: "+numberOfClusters() );    
00977         }
00978         System.out.println("Number of clusters: "+result.size());
00979         return result; 
00980     }
00981 
00982 
00983 }
00984      
00985 
00986 
 All Classes Namespaces Files Functions Variables Enumerations