MOA 12.03
Real Time Analytics for Data Streams
General.java
Go to the documentation of this file.
00001 /*
00002  *    General.java
00003  *    Copyright (C) 2010 RWTH Aachen University, Germany
00004  *    @author Jansen (moa@cs.rwth-aachen.de)
00005  *
00006  *    This program is free software; you can redistribute it and/or modify
00007  *    it under the terms of the GNU General Public License as published by
00008  *    the Free Software Foundation; either version 3 of the License, or
00009  *    (at your option) any later version.
00010  *
00011  *    This program is distributed in the hope that it will be useful,
00012  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *    GNU General Public License for more details.
00015  *
00016  *    You should have received a copy of the GNU General Public License
00017  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
00018  *    
00019  */
00020 
00021 package moa.evaluation;
00022 
00023 
00024 import java.util.ArrayList;
00025 import moa.cluster.Clustering;
00026 import moa.cluster.SphereCluster;
00027 import moa.gui.visualization.DataPoint;
00028 import weka.core.Instance;
00029 
00030 public class General extends MeasureCollection{
00031     private int numPoints;
00032     private int numFClusters;
00033     private int numDims;
00034     private double pointInclusionProbThreshold = 0.8;
00035     private Clustering clustering;
00036     private ArrayList<DataPoint> points;
00037 
00038 
00039     public General() {
00040         super();
00041     }
00042 
00043 
00044     @Override
00045     protected String[] getNames() {
00046         String[] names = {"GPrecision","GRecall","Redundancy","numCluster","numClasses"};
00047         //String[] names = {"GPrecision","GRecall","Redundancy","Overlap","numCluster","numClasses","Compactness"};
00048         return names;
00049     }
00050 
00051 //    @Override
00052 //    protected boolean[] getDefaultEnabled() {
00053 //        boolean [] defaults = {false, false, false, false, false ,false};
00054 //        return defaults;
00055 //    }
00056 
00057     @Override
00058     public void evaluateClustering(Clustering clustering, Clustering trueClustering, ArrayList<DataPoint> points) throws Exception{
00059 
00060         this.points = points;
00061         this.clustering = clustering;
00062         numPoints = points.size();
00063         numFClusters = clustering.size();
00064         numDims = points.get(0).numAttributes()-1;
00065 
00066 
00067         int totalRedundancy = 0;
00068         int trueCoverage = 0;
00069         int totalCoverage = 0;
00070 
00071         int numNoise = 0;
00072         for (int p = 0; p < numPoints; p++) {
00073             int coverage = 0;
00074             for (int c = 0; c < numFClusters; c++) {
00075                 //contained in cluster c?
00076                 if(clustering.get(c).getInclusionProbability(points.get(p)) >= pointInclusionProbThreshold){
00077                     coverage++;
00078                 }
00079             }
00080 
00081             if(points.get(p).classValue()==-1){
00082                 numNoise++;
00083             }
00084             else{
00085                 if(coverage>0) trueCoverage++;
00086             }
00087 
00088             if(coverage>0) totalCoverage++;  //points covered by clustering (incl. noise)
00089             if(coverage>1) totalRedundancy++; //include noise
00090         }
00091 
00092         addValue("numCluster", clustering.size());
00093         addValue("numClasses", trueClustering.size());
00094         addValue("Redundancy", ((double)totalRedundancy/(double)numPoints));
00095         addValue("GPrecision", (totalCoverage==0?0:((double)trueCoverage/(double)(totalCoverage))));
00096         addValue("GRecall", ((double)trueCoverage/(double)(numPoints-numNoise)));
00097 //        if(isEnabled(3)){
00098 //            addValue("Compactness", computeCompactness());
00099 //        }
00100 //        if(isEnabled(3)){
00101 //            addValue("Overlap", computeOverlap());
00102 //        }
00103     }
00104 
00105     private double computeOverlap(){
00106         for (int c = 0; c < numFClusters; c++) {
00107             if(!(clustering.get(c) instanceof SphereCluster)){
00108                 System.out.println("Overlap only supports Sphere Cluster. Found: "+clustering.get(c).getClass());
00109                 return Double.NaN;
00110             }
00111         }
00112 
00113         boolean[] overlap = new boolean[numFClusters];
00114 
00115         for (int c0 = 0; c0 < numFClusters; c0++) {
00116             if(overlap[c0]) continue;
00117             SphereCluster s0 = (SphereCluster)clustering.get(c0);
00118             for (int c1 = c0; c1 < clustering.size(); c1++) {
00119                 if(c1 == c0) continue;
00120                 SphereCluster s1 = (SphereCluster)clustering.get(c1);
00121                 if(s0.overlapRadiusDegree(s1) > 0){
00122                     overlap[c0] = overlap[c1] = true;
00123                 }
00124             }
00125         }
00126 
00127         double totalOverlap = 0;
00128         for (int c0 = 0; c0 < numFClusters; c0++) {
00129             if(overlap[c0])
00130                 totalOverlap++;
00131         }
00132 
00133 //        if(totalOverlap/(double)numFClusters > .8) RunVisualizer.pause();
00134         if(numFClusters>0) totalOverlap/=(double)numFClusters;
00135         return totalOverlap;
00136     }
00137 
00138 
00139     private double computeCompactness(){
00140         if(numFClusters == 0) return 0;
00141         for (int c = 0; c < numFClusters; c++) {
00142             if(!(clustering.get(c) instanceof SphereCluster)){
00143                 System.out.println("Compactness only supports Sphere Cluster. Found: "+clustering.get(c).getClass());
00144                 return Double.NaN;
00145             }
00146         }
00147 
00148         //TODO weight radius by number of dimensions
00149         double totalCompactness = 0;
00150         for (int c = 0; c < numFClusters; c++) {
00151             ArrayList<Instance> containedPoints = new ArrayList<Instance>();
00152             for (int p = 0; p < numPoints; p++) {
00153                 //p in c
00154                 if(clustering.get(c).getInclusionProbability(points.get(p)) >= pointInclusionProbThreshold){
00155                     containedPoints.add(points.get(p));
00156                 }
00157             }
00158             double compactness = 0;
00159             if(containedPoints.size()>1){
00160                 //cluster not empty
00161                 SphereCluster minEnclosingCluster = new SphereCluster(containedPoints, numDims);
00162                 double minRadius = minEnclosingCluster.getRadius();
00163                 double cfRadius = ((SphereCluster)clustering.get(c)).getRadius();
00164                 if(Math.abs(minRadius-cfRadius) < 0.1e-10){
00165                     compactness = 1;
00166                 }
00167                 else
00168                     if(minRadius < cfRadius)
00169                         compactness = minRadius/cfRadius;
00170                     else{
00171                         System.out.println("Optimal radius bigger then real one ("+(cfRadius-minRadius)+"), this is really wrong");
00172                         compactness = 1;
00173                     }
00174             }
00175             else{
00176                 double cfRadius = ((SphereCluster)clustering.get(c)).getRadius();
00177                 if(cfRadius==0) compactness = 1;
00178             }
00179 
00180             //weight by weight of cluster???
00181             totalCompactness+=compactness;
00182             clustering.get(c).setMeasureValue("Compactness", Double.toString(compactness));
00183         }
00184         return (totalCompactness/numFClusters);
00185     }
00186 
00187 
00188 }
00189 
00190 
 All Classes Namespaces Files Functions Variables Enumerations