MOA 12.03
Real Time Analytics for Data Streams
SilhouetteCoefficient.java
Go to the documentation of this file.
00001 /*
00002  *    SilhouetteCoefficient.java
00003  *    Copyright (C) 2010 RWTH Aachen University, Germany
00004  *    @author Jansen (moa@cs.rwth-aachen.de)
00005  *
00006  *    This program is free software; you can redistribute it and/or modify
00007  *    it under the terms of the GNU General Public License as published by
00008  *    the Free Software Foundation; either version 3 of the License, or
00009  *    (at your option) any later version.
00010  *
00011  *    This program is distributed in the hope that it will be useful,
00012  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *    GNU General Public License for more details.
00015  *
00016  *    You should have received a copy of the GNU General Public License
00017  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
00018  *    
00019  */
00020 
00021 package moa.evaluation; 
00022 
00023 import java.util.ArrayList;
00024 import java.util.HashMap;
00025 import moa.cluster.Cluster;
00026 import moa.cluster.Clustering;
00027 import moa.gui.visualization.DataPoint;
00028 
00029 public class SilhouetteCoefficient extends MeasureCollection{
00030     private double pointInclusionProbThreshold = 0.8;
00031 
00032     public SilhouetteCoefficient() {
00033         super();
00034     }
00035 
00036     @Override
00037     protected boolean[] getDefaultEnabled() {
00038         boolean [] defaults = {false};
00039         return defaults;
00040     }
00041 
00042     @Override
00043     public String[] getNames() {
00044         String[] names = {"SilhCoeff"};
00045         return names;
00046     }
00047 
00048     public void evaluateClustering(Clustering clustering, Clustering trueClustering, ArrayList<DataPoint> points) {
00049         int numFCluster = clustering.size();
00050 
00051         double [][] pointInclusionProbFC = new double[points.size()][numFCluster];
00052         for (int p = 0; p < points.size(); p++) {
00053             DataPoint point = points.get(p);
00054             for (int fc = 0; fc < numFCluster; fc++) {
00055                 Cluster cl = clustering.get(fc);
00056                 pointInclusionProbFC[p][fc] = cl.getInclusionProbability(point);
00057             }
00058         }
00059 
00060         double silhCoeff = 0.0;
00061         int totalCount = 0;
00062         for (int p = 0; p < points.size(); p++) {
00063             DataPoint point = points.get(p);
00064             ArrayList<Integer> ownClusters = new ArrayList<Integer>();
00065             for (int fc = 0; fc < numFCluster; fc++) {
00066                 if(pointInclusionProbFC[p][fc] > pointInclusionProbThreshold){
00067                     ownClusters.add(fc);
00068                 }
00069             }
00070 
00071             if(ownClusters.size() > 0){
00072                 double[] distanceByClusters = new double[numFCluster];
00073                 int[] countsByClusters = new int[numFCluster];
00074                     //calculate averageDistance of p to all cluster
00075                 for (int p1 = 0; p1 < points.size(); p1++) {
00076                     DataPoint point1 = points.get(p1);
00077                     if(p1!= p && point1.classValue() != -1){
00078                         for (int fc = 0; fc < numFCluster; fc++) {
00079                             if(pointInclusionProbFC[p1][fc] > pointInclusionProbThreshold){
00080                                 double distance = distance(point, point1);
00081                                 distanceByClusters[fc]+=distance;
00082                                 countsByClusters[fc]++;
00083                             }
00084                         }
00085                     }
00086                 }
00087 
00088                 //find closest OWN cluster as clusters might overlap
00089                 double minAvgDistanceOwn = Double.MAX_VALUE;
00090                 int minOwnIndex = -1;
00091                 for (int fc : ownClusters) {
00092                         double normDist = distanceByClusters[fc]/(double)countsByClusters[fc];
00093                         if(normDist < minAvgDistanceOwn){// && pointInclusionProbFC[p][fc] > pointInclusionProbThreshold){
00094                             minAvgDistanceOwn = normDist;
00095                             minOwnIndex = fc;
00096                         }
00097                 }
00098 
00099 
00100                 //find closest other (or other own) cluster
00101                 double minAvgDistanceOther = Double.MAX_VALUE;
00102                 for (int fc = 0; fc < numFCluster; fc++) {
00103                     if(fc != minOwnIndex){
00104                         double normDist = distanceByClusters[fc]/(double)countsByClusters[fc];
00105                         if(normDist < minAvgDistanceOther){
00106                             minAvgDistanceOther = normDist;
00107                         }
00108                     }
00109                 }
00110 
00111                 double silhP = (minAvgDistanceOther-minAvgDistanceOwn)/Math.max(minAvgDistanceOther, minAvgDistanceOwn);
00112                 point.setMeasureValue("SC - own", minAvgDistanceOwn);
00113                 point.setMeasureValue("SC - other", minAvgDistanceOther);
00114                 point.setMeasureValue("SC", silhP);
00115 
00116                 silhCoeff+=silhP;
00117                 totalCount++;
00118                 //System.out.println(point.getTimestamp()+" Silh "+silhP+" / "+avgDistanceOwn+" "+minAvgDistanceOther+" (C"+minIndex+")");
00119             }
00120         }
00121         if(totalCount>0)
00122             silhCoeff/=(double)totalCount;
00123         //normalize from -1, 1 to 0,1
00124         silhCoeff = (silhCoeff+1)/2.0;
00125         addValue(0,silhCoeff);
00126     }
00127 
00128     private double distance(DataPoint inst1, DataPoint inst2){
00129         double distance = 0.0;
00130         int numDims = inst1.numAttributes();
00131         for (int i = 0; i < numDims; i++) {
00132             double d = inst1.value(i) - inst2.value(i);
00133             distance += d * d;
00134         }
00135         return Math.sqrt(distance);
00136     }
00137 }
 All Classes Namespaces Files Functions Variables Enumerations