MOA 12.03
Real Time Analytics for Data Streams
|
00001 /* 00002 * EntropyCollection.java 00003 * Copyright (C) 2010 RWTH Aachen University, Germany 00004 * @author Jansen ([email protected]) 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 3 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00018 * 00019 */ 00020 00021 package moa.evaluation; 00022 00023 import java.util.ArrayList; 00024 import moa.cluster.Clustering; 00025 import moa.gui.visualization.DataPoint; 00026 00027 00028 public class EntropyCollection extends MeasureCollection{ 00029 private boolean debug = false; 00030 private final double beta = 1; 00031 00032 00033 @Override 00034 protected String[] getNames() { 00035 String[] names = {"GT cross entropy","FC cross entropy","Homogeneity","Completeness","V-Measure","VarInformation"}; 00036 return names; 00037 } 00038 00039 @Override 00040 protected boolean[] getDefaultEnabled() { 00041 boolean [] defaults = {false, false, false, false, false, false}; 00042 return defaults; 00043 } 00044 00045 @Override 00046 public void evaluateClustering(Clustering fclustering, Clustering hClustering, ArrayList<DataPoint> points) throws Exception { 00047 00048 MembershipMatrix mm = new MembershipMatrix(fclustering, points); 00049 int numClasses = mm.getNumClasses(); 00050 int numCluster = fclustering.size()+1; 00051 int n = mm.getTotalEntries(); 00052 00053 00054 double FCentropy = 0; 00055 if(numCluster > 1){ 00056 for (int fc = 0; fc < numCluster; fc++){ 00057 double weight = mm.getClusterSum(fc)/(double)n; 00058 if(weight > 0) 00059 FCentropy+= weight * Math.log10(weight); 00060 } 00061 FCentropy/=(-1*Math.log10(numCluster)); 00062 } 00063 if(debug){ 00064 System.out.println("FC entropy "+FCentropy); 00065 } 00066 00067 double GTentropy = 0; 00068 if(numClasses > 1){ 00069 for (int hc = 0; hc < numClasses; hc++){ 00070 double weight = mm.getClassSum(hc)/(double)n; 00071 if(weight > 0) 00072 GTentropy+= weight * Math.log10(weight); 00073 } 00074 GTentropy/=(-1*Math.log10(numClasses)); 00075 } 00076 if(debug){ 00077 System.out.println("GT entropy "+GTentropy); 00078 } 00079 00080 00081 //cluster based entropy 00082 double FCcrossEntropy = 0; 00083 00084 for (int fc = 0; fc < numCluster; fc++){ 00085 double e = 0; 00086 int clusterWeight = mm.getClusterSum(fc); 00087 if(clusterWeight>0){ 00088 for (int hc = 0; hc < numClasses; hc++) { 00089 double p = mm.getClusterClassWeight(fc, hc)/(double)clusterWeight; 00090 if(p!=0){ 00091 e+=p * Math.log10(p); 00092 } 00093 } 00094 FCcrossEntropy+=((clusterWeight/(double)n) * e); 00095 } 00096 } 00097 if(numCluster > 1){ 00098 FCcrossEntropy/=-1*Math.log10(numCluster); 00099 } 00100 00101 addValue("FC cross entropy", 1-FCcrossEntropy); 00102 if(debug){ 00103 System.out.println("FC cross entropy "+(1-FCcrossEntropy)); 00104 } 00105 00106 00107 //class based entropy 00108 double GTcrossEntropy = 0; 00109 for (int hc = 0; hc < numClasses; hc++){ 00110 double e = 0; 00111 int classWeight = mm.getClassSum(hc); 00112 if(classWeight>0){ 00113 for (int fc = 0; fc < numCluster; fc++) { 00114 double p = mm.getClusterClassWeight(fc, hc)/(double)classWeight; 00115 if(p!=0){ 00116 e+=p * Math.log10(p); 00117 } 00118 } 00119 } 00120 GTcrossEntropy+=((classWeight/(double)n) * e); 00121 } 00122 if(numClasses > 1) 00123 GTcrossEntropy/=-1*Math.log10(numClasses); 00124 addValue("GT cross entropy", 1-GTcrossEntropy); 00125 if(debug){ 00126 System.out.println("GT cross entropy "+(1-GTcrossEntropy)); 00127 } 00128 00129 double homogeneity; 00130 if(FCentropy == 0) 00131 homogeneity = 1; 00132 else 00133 homogeneity = 1 - FCcrossEntropy/FCentropy; 00134 00135 //TODO set err values for now, needs to be debugged 00136 if(homogeneity > 1 || homogeneity < 0) 00137 addValue("Homogeneity",-1); 00138 else 00139 addValue("Homogeneity",homogeneity); 00140 00141 double completeness; 00142 if(GTentropy == 0) 00143 completeness = 1; 00144 else 00145 completeness = 1 - GTcrossEntropy/GTentropy; 00146 addValue("Completeness",completeness); 00147 00148 double vmeasure = (1+beta)*homogeneity*completeness/(beta*homogeneity+completeness); 00149 00150 if(vmeasure > 1 || homogeneity < 0) 00151 addValue("V-Measure",-1); 00152 else 00153 addValue("V-Measure",vmeasure); 00154 00155 00156 00157 double mutual = 0; 00158 for (int i = 0; i < numCluster; i++){ 00159 for (int j = 0; j < numClasses; j++) { 00160 if(mm.getClusterClassWeight(i, j)==0) continue; 00161 double m = Math.log10(mm.getClusterClassWeight(i, j)/(double)mm.getClusterSum(i)/(double)mm.getClassSum(j)*(double)n); 00162 m*= mm.getClusterClassWeight(i, j)/(double)n; 00163 if(debug) 00164 System.out.println("("+j+"/"+ j + "): "+m); 00165 mutual+=m; 00166 } 00167 } 00168 if(numClasses > 1) 00169 mutual/=Math.log10(numClasses); 00170 00171 double varInfo = 1; 00172 if(FCentropy + GTentropy > 0) 00173 varInfo = 2*mutual/(FCentropy + GTentropy); 00174 00175 if(debug) 00176 System.out.println("mutual "+mutual+ " / VI "+varInfo); 00177 addValue("VarInformation", varInfo); 00178 00179 } 00180 00181 }