MOA 12.03
Real Time Analytics for Data Streams
ClusterGenerator.java
Go to the documentation of this file.
00001 /*
00002  *    ClusterGenerator.java
00003  *    Copyright (C) 2010 RWTH Aachen University, Germany
00004  *    @author Jansen ([email protected])
00005  *
00006  *    This program is free software; you can redistribute it and/or modify
00007  *    it under the terms of the GNU General Public License as published by
00008  *    the Free Software Foundation; either version 3 of the License, or
00009  *    (at your option) any later version.
00010  *
00011  *    This program is distributed in the hope that it will be useful,
00012  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *    GNU General Public License for more details.
00015  *
00016  *    You should have received a copy of the GNU General Public License
00017  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
00018  *    
00019  */
00020 
00021 package moa.clusterers;
00022 
00023 import java.util.ArrayList;
00024 import java.util.Arrays;
00025 import java.util.Random;
00026 import moa.cluster.Clustering;
00027 import moa.cluster.SphereCluster;
00028 import moa.core.Measurement;
00029 import moa.gui.visualization.DataPoint;
00030 import moa.options.FloatOption;
00031 import moa.options.IntOption;
00032 import weka.core.Instance;
00033 
00034 public class ClusterGenerator extends AbstractClusterer{
00035 
00036         private static final long serialVersionUID = 1L;
00037 
00038         public IntOption timeWindowOption = new IntOption("timeWindow",
00039                         't', "Rang of the window.", 1000);
00040 
00041     public FloatOption radiusDecreaseOption = new FloatOption("radiusDecrease", 'r',
00042                 "The average radii of the centroids in the model.", 0, 0, 1);
00043 
00044     public FloatOption radiusIncreaseOption = new FloatOption("radiusIncrease", 'R',
00045                 "The average radii of the centroids in the model.", 0, 0, 1);
00046 
00047     public FloatOption positionOffsetOption = new FloatOption("positionOffset", 'p',
00048                 "The average radii of the centroids in the model.", 0, 0, 1);
00049 
00050     public FloatOption clusterRemoveOption = new FloatOption("clusterRemove", 'D',
00051                 "Deletes complete clusters from the clustering.", 0, 0, 1);
00052 
00053     public FloatOption joinClustersOption = new FloatOption("joinClusters", 'j',
00054             "Join two clusters if their hull distance is less minRadius times this factor.", 0, 0, 1);
00055 
00056     public FloatOption clusterAddOption = new FloatOption("clusterAdd", 'A',
00057                 "Adds additional clusters.", 0, 0, 1);
00058 
00059     private static double err_intervall_width = 0.0;
00060     private ArrayList<DataPoint> points;
00061     private int instanceCounter;
00062     private int windowCounter;
00063     private Random random;
00064     private Clustering sourceClustering = null;
00065 
00066     @Override
00067     public void resetLearningImpl() {
00068         points = new ArrayList<DataPoint>();
00069         instanceCounter = 0;
00070         windowCounter = 0;
00071         random = new Random(227);
00072 
00073         //joinClustersOption.set();
00074         //evaluateMicroClusteringOption.set();
00075     }
00076 
00077     @Override
00078     public void trainOnInstanceImpl(Instance inst) {
00079         if(windowCounter >= timeWindowOption.getValue()){
00080             points.clear();
00081             windowCounter = 0;
00082         }
00083         windowCounter++;
00084         instanceCounter++;
00085         points.add( new DataPoint(inst,instanceCounter));
00086     }
00087 
00088     @Override
00089     public boolean implementsMicroClusterer() {
00090         return true;
00091     }
00092 
00093 
00094     public void setSourceClustering(Clustering source){
00095         sourceClustering = source;
00096     }
00097     
00098     @Override
00099     public Clustering getMicroClusteringResult() {
00100         //System.out.println("Numcluster:"+clustering.size()+" / "+num);
00101         //Clustering source_clustering = new Clustering(points, overlapThreshold, microInitMinPoints);
00102         if(sourceClustering == null){
00103 
00104             System.out.println("You need to set a source clustering for the ClusterGenerator to work");
00105             return null;
00106         }
00107         return alterClustering(sourceClustering);
00108     }
00109 
00110 
00111 
00112     public Clustering getClusteringResult(){
00113         sourceClustering = new Clustering(points);
00114 //        if(sourceClustering == null){
00115 //            System.out.println("You need to set a source clustering for the ClusterGenerator to work");
00116 //            return null;
00117 //        }
00118         return alterClustering(sourceClustering);
00119     }
00120 
00121 
00122     private Clustering alterClustering(Clustering scclustering){
00123         //percentage of the radius that will be cut off
00124         //0: no changes to radius
00125         //1: radius of 0
00126         double errLevelRadiusDecrease = radiusDecreaseOption.getValue();
00127 
00128         //0: no changes to radius
00129         //1: radius 100% bigger
00130         double errLevelRadiusIncrease = radiusIncreaseOption.getValue();
00131 
00132         //0: no changes
00133         //1: distance between centers is 2 * original radius
00134         double errLevelPosition = positionOffsetOption.getValue();
00135 
00136 
00137         int numRemoveCluster = (int)(clusterRemoveOption.getValue()*scclustering.size());
00138 
00139         int numAddCluster = (int)(clusterAddOption.getValue()*scclustering.size());
00140 
00141         for (int c = 0; c < numRemoveCluster; c++) {
00142             int delId = random.nextInt(scclustering.size());
00143             scclustering.remove(delId);
00144         }
00145 
00146         int numCluster = scclustering.size();
00147         double[] err_seeds = new double[numCluster];
00148         double err_seed_sum = 0.0;
00149         double tmp_seed;
00150         for (int i = 0; i < numCluster; i++) {
00151             tmp_seed = random.nextDouble();
00152             err_seeds[i] = err_seed_sum + tmp_seed;
00153             err_seed_sum+= tmp_seed;
00154         }
00155 
00156         double sumWeight = 0;
00157         for (int i = 0; i <numCluster; i++) {
00158             sumWeight+= scclustering.get(i).getWeight();
00159         }
00160 
00161         Clustering clustering = new Clustering();
00162 
00163         for (int i = 0; i <numCluster; i++) {
00164             if(!(scclustering.get(i) instanceof SphereCluster)){
00165                 System.out.println("Not a Sphere Cluster");
00166                 continue;
00167             }
00168             SphereCluster sourceCluster = (SphereCluster)scclustering.get(i);
00169             double[] center = Arrays.copyOf(sourceCluster.getCenter(),sourceCluster.getCenter().length);
00170             double weight = sourceCluster.getWeight();
00171             double radius = sourceCluster.getRadius();
00172 
00173             //move cluster center
00174             if(errLevelPosition >0){
00175                 double errOffset = random.nextDouble()*err_intervall_width/2.0;
00176                 double errOffsetDirection = ((random.nextBoolean())? 1 : -1);
00177                 double level = errLevelPosition + errOffsetDirection * errOffset;
00178                 double[] vector = new double[center.length];
00179                 double vectorLength = 0;
00180                 for (int d = 0; d < center.length; d++) {
00181                     vector[d] = (random.nextBoolean()?1:-1)*random.nextDouble();
00182                     vectorLength += Math.pow(vector[d],2);
00183                 }
00184                 vectorLength = Math.sqrt(vectorLength);
00185 
00186                 
00187                 //max is when clusters are next to each other
00188                 double length = 2 * radius * level;
00189 
00190                 for (int d = 0; d < center.length; d++) {
00191                     //normalize length and then strecht to reach error position
00192                     vector[d]=vector[d]/vectorLength*length;
00193                 }
00194 //                System.out.println("Center "+Arrays.toString(center));
00195 //                System.out.println("Vector "+Arrays.toString(vector));
00196                 //check if error position is within bounds
00197                 double [] newCenter = new double[center.length];
00198                 for (int d = 0; d < center.length; d++) {
00199                     //check bounds, otherwise flip vector
00200                     if(center[d] + vector[d] >= 0 && center[d] + vector[d] <= 1){
00201                         newCenter[d] = center[d] + vector[d];
00202                     }
00203                     else{
00204                         newCenter[d] = center[d] + (-1)*vector[d];
00205                     }
00206                 }
00207                 center = newCenter;
00208                 for (int d = 0; d < center.length; d++) {
00209                     if(newCenter[d] >= 0 && newCenter[d] <= 1){
00210                     }
00211                     else{
00212                         System.out.println("This shouldnt have happend, Cluster center out of bounds:"+Arrays.toString(newCenter));
00213                     }
00214                 }
00215                 //System.out.println("new Center "+Arrays.toString(newCenter));
00216 
00217             }
00218             
00219             //alter radius
00220             if(errLevelRadiusDecrease > 0 || errLevelRadiusIncrease > 0){
00221                 double errOffset = random.nextDouble()*err_intervall_width/2.0;
00222                 int errOffsetDirection = ((random.nextBoolean())? 1 : -1);
00223 
00224                 if(errLevelRadiusDecrease > 0 && (errLevelRadiusIncrease == 0 || random.nextBoolean())){
00225                     double level = (errLevelRadiusDecrease + errOffsetDirection * errOffset);//*sourceCluster.getWeight()/sumWeight;
00226                     level = (level<0)?0:level;
00227                     level = (level>1)?1:level;
00228                     radius*=(1-level);
00229                 }
00230                 else{
00231                     double level = errLevelRadiusIncrease + errOffsetDirection * errOffset;
00232                     level = (level<0)?0:level;
00233                     level = (level>1)?1:level;
00234                     radius+=radius*level;
00235                 }
00236             }
00237 
00238             SphereCluster newCluster = new SphereCluster(center, radius, weight);
00239             newCluster.setMeasureValue("Source Cluster", "C"+sourceCluster.getId());
00240 
00241             clustering.add(newCluster);
00242         }
00243 
00244         if(joinClustersOption.getValue() > 0){
00245             clustering = joinClusters(clustering);
00246         }
00247 
00248         //add new clusters by copying clusters and set a random center
00249         for (int c = 0; c < numAddCluster; c++) {
00250             int copyId = random.nextInt(clustering.size());
00251             SphereCluster scorg = (SphereCluster)clustering.get(copyId);
00252             int dim = scorg.getCenter().length;
00253             double[] center = new double [dim];
00254             double radius = scorg.getRadius();
00255 
00256             boolean outofbounds = true;
00257             int tryCounter = 0;
00258             while(outofbounds && tryCounter < 20){
00259                 tryCounter++;
00260                 outofbounds = false;
00261                 for (int j = 0; j < center.length; j++) {
00262                      center[j] = random.nextDouble();
00263                      if(center[j]- radius < 0 || center[j] + radius > 1){
00264                         outofbounds = true;
00265                         break;
00266                      }
00267                 }
00268             }
00269             if(outofbounds){
00270                 System.out.println("Coludn't place additional cluster");
00271             }
00272             else{
00273                 SphereCluster scnew = new SphereCluster(center, radius, scorg.getWeight()/2);
00274                 scorg.setWeight(scorg.getWeight()-scnew.getWeight());
00275                 clustering.add(scnew);
00276             }
00277         }
00278 
00279         return clustering;
00280 
00281     }
00282 
00283 
00284 
00285     private Clustering joinClusters(Clustering clustering){
00286 
00287         double radiusFactor = joinClustersOption.getValue();
00288         boolean[] merged = new boolean[clustering.size()];
00289 
00290         Clustering mclustering = new Clustering();
00291 
00292         if(radiusFactor >0){
00293             for (int c1 = 0; c1 < clustering.size(); c1++) {
00294                 SphereCluster sc1 = (SphereCluster) clustering.get(c1);
00295                 double minDist = Double.MAX_VALUE;
00296                 double minOver = 1;
00297                 int maxindexCon = -1;
00298                 int maxindexOver = -1;
00299                 for (int c2 = 0; c2 < clustering.size(); c2++) {
00300                     SphereCluster sc2 = (SphereCluster) clustering.get(c2);
00301 //                    double over = sc1.overlapRadiusDegree(sc2);
00302 //                    if(over > 0 && over < minOver){
00303 //                       minOver = over;
00304 //                       maxindexOver = c2;
00305 //                    }
00306                     double dist = sc1.getHullDistance(sc2);
00307                     double threshold = Math.min(sc1.getRadius(), sc2.getRadius())*radiusFactor;
00308                     if(dist > 0 && dist < minDist && dist < threshold){
00309                             minDist = dist;
00310                             maxindexCon = c2;
00311                     }
00312                 }
00313                 int maxindex = -1;
00314                 if(maxindexOver!=-1)
00315                     maxindex = maxindexOver;
00316                 else
00317                     maxindex = maxindexCon;
00318 
00319                 if(maxindex!=-1 && !merged[c1]){
00320                     merged[c1]=true;
00321                     merged[maxindex]=true;
00322                     SphereCluster scnew = new SphereCluster(sc1.getCenter(),sc1.getRadius(),sc1.getWeight());
00323                     SphereCluster sc2 = (SphereCluster) clustering.get(maxindex);
00324                     scnew.merge(sc2);
00325                     mclustering.add(scnew);
00326                 }
00327             }
00328         }
00329 
00330         for (int i = 0; i < merged.length; i++) {
00331             if(!merged[i])
00332                  mclustering.add(clustering.get(i));
00333         }
00334 
00335 
00336         return mclustering;
00337 
00338     }
00339 
00340 
00341 
00342     @Override
00343     protected Measurement[] getModelMeasurementsImpl() {
00344         throw new UnsupportedOperationException("Not supported yet.");
00345     }
00346 
00347     @Override
00348     public void getModelDescription(StringBuilder out, int indent) {
00349         throw new UnsupportedOperationException("Not supported yet.");
00350     }
00351 
00352     @Override
00353     public boolean isRandomizable() {
00354         return false;
00355     }
00356 
00357     @Override
00358     public boolean  keepClassLabel(){
00359         return true;
00360     }
00361 
00362     public double[] getVotesForInstance(Instance inst) {
00363         return null;
00364     }
00365 }
00366 
00367 
 All Classes Namespaces Files Functions Variables Enumerations