MOA 12.03
Real Time Analytics for Data Streams
F1.java
Go to the documentation of this file.
00001 /*
00002  *    F1.java
00003  *    Copyright (C) 2010 RWTH Aachen University, Germany
00004  *    @author Jansen (moa@cs.rwth-aachen.de)
00005  *
00006  *    This program is free software; you can redistribute it and/or modify
00007  *    it under the terms of the GNU General Public License as published by
00008  *    the Free Software Foundation; either version 3 of the License, or
00009  *    (at your option) any later version.
00010  *
00011  *    This program is distributed in the hope that it will be useful,
00012  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *    GNU General Public License for more details.
00015  *
00016  *    You should have received a copy of the GNU General Public License
00017  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
00018  *    
00019  */
00020 
00021 package moa.evaluation;
00022 
00023 import java.util.ArrayList;
00024 import moa.cluster.Clustering;
00025 import moa.gui.visualization.DataPoint;
00026 
00027 public class F1 extends MeasureCollection{
00028 
00029     @Override
00030     protected String[] getNames() {
00031         String[] names = {"F1-P","F1-R","Purity"};
00032         return names;
00033     }
00034 
00035 //    @Override
00036 //    protected boolean[] getDefaultEnabled() {
00037 //        boolean [] defaults = {false, false, false};
00038 //        return defaults;
00039 //    }
00040     
00041     public void evaluateClustering(Clustering clustering, Clustering trueClustering, ArrayList<DataPoint> points) {
00042 
00043         if (clustering.size()<0){
00044             addValue(0,0);
00045             addValue(1,0);
00046             return;
00047         }
00048 
00049         MembershipMatrix mm = new MembershipMatrix(clustering, points);
00050         //System.out.println(mm.toString());
00051 
00052         int numClasses = mm.getNumClasses();
00053         if(mm.hasNoiseClass())
00054             numClasses--;
00055 
00056 
00057 
00058         //F1 as defined in P3C, try using F1 optimization
00059         double F1_P = 0.0;
00060         double purity = 0;
00061         int realClusters = 0;
00062         for (int i = 0; i < clustering.size(); i++) {
00063             int max_weight = 0;
00064             int max_weight_index = -1;
00065 
00066             //find max index
00067             for (int j = 0; j < numClasses; j++) {
00068                 if(mm.getClusterClassWeight(i, j) > max_weight){
00069                     max_weight = mm.getClusterClassWeight(i, j);
00070                     max_weight_index = j;
00071                 }
00072             }
00073             if(max_weight_index!=-1){
00074                 realClusters++;
00075                 double precision = mm.getClusterClassWeight(i, max_weight_index)/(double)mm.getClusterSum(i);
00076                 double recall = mm.getClusterClassWeight(i, max_weight_index)/(double) mm.getClassSum(max_weight_index);
00077                 double f1 = 0;
00078                 if(precision > 0 || recall > 0){
00079                     f1 = 2*precision*recall/(precision+recall);
00080                 }
00081                 F1_P += f1;
00082                 purity += precision;
00083 
00084                 //TODO should we move setMeasure stuff into the Cluster interface?
00085                 clustering.get(i).setMeasureValue("F1-P", Double.toString(f1));
00086             }
00087         }
00088         if(realClusters > 0){
00089             F1_P/=realClusters;
00090             purity/=realClusters;
00091         }
00092         addValue("F1-P",F1_P);
00093         addValue("Purity",purity);
00094 
00095 
00096 
00097         //F1 as defined in .... mainly maximizes F1 for each class
00098         double F1_R = 0.0;
00099         for (int j = 0; j < numClasses; j++) {
00100             double max_f1 = 0;
00101             for (int i = 0; i < clustering.size(); i++) {
00102                 double precision = mm.getClusterClassWeight(i, j)/(double)mm.getClusterSum(i);
00103                 double recall = mm.getClusterClassWeight(i, j)/(double)mm.getClassSum(j);
00104                 double f1 = 0;
00105                 if(precision > 0 || recall > 0){
00106                     f1 = 2*precision*recall/(precision+recall);
00107                 }
00108                 if(max_f1 < f1){
00109                     max_f1 = f1;
00110                 }
00111             }
00112             F1_R+= max_f1;
00113         }
00114         F1_R/=numClasses;
00115 
00116         addValue("F1-R",F1_R);
00117     }
00118 
00119 }
 All Classes Namespaces Files Functions Variables Enumerations