MOA 12.03
Real Time Analytics for Data Streams
EntropyCollection.java
Go to the documentation of this file.
00001 /*
00002  *    EntropyCollection.java
00003  *    Copyright (C) 2010 RWTH Aachen University, Germany
00004  *    @author Jansen ([email protected])
00005  *
00006  *    This program is free software; you can redistribute it and/or modify
00007  *    it under the terms of the GNU General Public License as published by
00008  *    the Free Software Foundation; either version 3 of the License, or
00009  *    (at your option) any later version.
00010  *
00011  *    This program is distributed in the hope that it will be useful,
00012  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *    GNU General Public License for more details.
00015  *
00016  *    You should have received a copy of the GNU General Public License
00017  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
00018  *    
00019  */
00020 
00021 package moa.evaluation;
00022 
00023 import java.util.ArrayList;
00024 import moa.cluster.Clustering;
00025 import moa.gui.visualization.DataPoint;
00026 
00027 
00028 public class EntropyCollection extends MeasureCollection{
00029     private boolean debug = false;
00030     private final double beta = 1;
00031 
00032 
00033     @Override
00034     protected String[] getNames() {
00035         String[] names = {"GT cross entropy","FC cross entropy","Homogeneity","Completeness","V-Measure","VarInformation"};
00036         return names;
00037     }
00038 
00039     @Override
00040     protected boolean[] getDefaultEnabled() {
00041         boolean [] defaults = {false, false, false, false, false, false};
00042         return defaults;
00043     }
00044 
00045     @Override
00046     public void evaluateClustering(Clustering fclustering, Clustering hClustering, ArrayList<DataPoint> points) throws Exception {
00047 
00048         MembershipMatrix mm = new MembershipMatrix(fclustering, points);
00049         int numClasses = mm.getNumClasses();
00050         int numCluster = fclustering.size()+1;
00051         int n = mm.getTotalEntries();
00052 
00053 
00054         double FCentropy = 0;
00055         if(numCluster > 1){
00056             for (int fc = 0; fc < numCluster; fc++){
00057                 double weight = mm.getClusterSum(fc)/(double)n;
00058                 if(weight > 0)
00059                     FCentropy+= weight * Math.log10(weight);
00060             }
00061             FCentropy/=(-1*Math.log10(numCluster));
00062         }
00063         if(debug){
00064             System.out.println("FC entropy "+FCentropy);
00065         }
00066 
00067         double GTentropy = 0;
00068         if(numClasses > 1){
00069             for (int hc = 0; hc < numClasses; hc++){
00070                 double weight = mm.getClassSum(hc)/(double)n;
00071                 if(weight > 0)
00072                     GTentropy+= weight * Math.log10(weight);
00073             }
00074             GTentropy/=(-1*Math.log10(numClasses));
00075         }
00076         if(debug){
00077             System.out.println("GT entropy "+GTentropy);
00078         }
00079 
00080 
00081         //cluster based entropy
00082         double FCcrossEntropy = 0;
00083         
00084         for (int fc = 0; fc < numCluster; fc++){
00085             double e = 0;
00086             int clusterWeight = mm.getClusterSum(fc);
00087             if(clusterWeight>0){
00088                 for (int hc = 0; hc < numClasses; hc++) {
00089                     double p = mm.getClusterClassWeight(fc, hc)/(double)clusterWeight;
00090                     if(p!=0){
00091                         e+=p * Math.log10(p);
00092                     }
00093                 }
00094                 FCcrossEntropy+=((clusterWeight/(double)n) * e);
00095             }
00096         }
00097         if(numCluster > 1){
00098             FCcrossEntropy/=-1*Math.log10(numCluster);
00099         }
00100 
00101         addValue("FC cross entropy", 1-FCcrossEntropy);
00102         if(debug){
00103             System.out.println("FC cross entropy "+(1-FCcrossEntropy));
00104         }
00105 
00106 
00107         //class based entropy
00108         double GTcrossEntropy = 0;
00109         for (int hc = 0; hc < numClasses; hc++){
00110             double e = 0;
00111             int classWeight = mm.getClassSum(hc);
00112             if(classWeight>0){
00113                 for (int fc = 0; fc < numCluster; fc++) {
00114                     double p = mm.getClusterClassWeight(fc, hc)/(double)classWeight;
00115                     if(p!=0){
00116                         e+=p * Math.log10(p);
00117                     }
00118                 }
00119             }
00120             GTcrossEntropy+=((classWeight/(double)n) * e);
00121         }
00122         if(numClasses > 1)
00123             GTcrossEntropy/=-1*Math.log10(numClasses);
00124         addValue("GT cross entropy", 1-GTcrossEntropy);
00125         if(debug){
00126             System.out.println("GT cross entropy "+(1-GTcrossEntropy));
00127         }
00128 
00129         double homogeneity;
00130         if(FCentropy == 0)
00131             homogeneity = 1;
00132         else
00133             homogeneity = 1 - FCcrossEntropy/FCentropy;
00134 
00135         //TODO set err values for now, needs to be debugged
00136         if(homogeneity > 1 || homogeneity < 0)
00137             addValue("Homogeneity",-1);
00138         else
00139             addValue("Homogeneity",homogeneity);
00140 
00141         double completeness;
00142         if(GTentropy == 0)
00143             completeness = 1;
00144         else
00145             completeness = 1 - GTcrossEntropy/GTentropy;
00146         addValue("Completeness",completeness);
00147 
00148         double vmeasure = (1+beta)*homogeneity*completeness/(beta*homogeneity+completeness);
00149 
00150         if(vmeasure > 1 || homogeneity < 0)
00151             addValue("V-Measure",-1);
00152         else
00153             addValue("V-Measure",vmeasure);
00154 
00155 
00156 
00157         double mutual = 0;
00158         for (int i = 0; i < numCluster; i++){
00159                 for (int j = 0; j < numClasses; j++) {
00160                    if(mm.getClusterClassWeight(i, j)==0) continue;
00161                    double m = Math.log10(mm.getClusterClassWeight(i, j)/(double)mm.getClusterSum(i)/(double)mm.getClassSum(j)*(double)n);
00162                    m*= mm.getClusterClassWeight(i, j)/(double)n;
00163                    if(debug)
00164                         System.out.println("("+j+"/"+ j + "): "+m);
00165                    mutual+=m;
00166                 }
00167         }
00168         if(numClasses > 1)
00169             mutual/=Math.log10(numClasses);
00170 
00171         double varInfo = 1;
00172         if(FCentropy + GTentropy > 0)
00173             varInfo = 2*mutual/(FCentropy + GTentropy);
00174         
00175         if(debug)
00176             System.out.println("mutual "+mutual+ " / VI "+varInfo);
00177         addValue("VarInformation", varInfo);
00178 
00179     }
00180 
00181 }
 All Classes Namespaces Files Functions Variables Enumerations