MOA 12.03
Real Time Analytics for Data Streams
|
00001 /* 00002 * ClusterGenerator.java 00003 * Copyright (C) 2010 RWTH Aachen University, Germany 00004 * @author Jansen ([email protected]) 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 3 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00018 * 00019 */ 00020 00021 package moa.clusterers; 00022 00023 import java.util.ArrayList; 00024 import java.util.Arrays; 00025 import java.util.Random; 00026 import moa.cluster.Clustering; 00027 import moa.cluster.SphereCluster; 00028 import moa.core.Measurement; 00029 import moa.gui.visualization.DataPoint; 00030 import moa.options.FloatOption; 00031 import moa.options.IntOption; 00032 import weka.core.Instance; 00033 00034 public class ClusterGenerator extends AbstractClusterer{ 00035 00036 private static final long serialVersionUID = 1L; 00037 00038 public IntOption timeWindowOption = new IntOption("timeWindow", 00039 't', "Rang of the window.", 1000); 00040 00041 public FloatOption radiusDecreaseOption = new FloatOption("radiusDecrease", 'r', 00042 "The average radii of the centroids in the model.", 0, 0, 1); 00043 00044 public FloatOption radiusIncreaseOption = new FloatOption("radiusIncrease", 'R', 00045 "The average radii of the centroids in the model.", 0, 0, 1); 00046 00047 public FloatOption positionOffsetOption = new FloatOption("positionOffset", 'p', 00048 "The average radii of the centroids in the model.", 0, 0, 1); 00049 00050 public FloatOption clusterRemoveOption = new FloatOption("clusterRemove", 'D', 00051 "Deletes complete clusters from the clustering.", 0, 0, 1); 00052 00053 public FloatOption joinClustersOption = new FloatOption("joinClusters", 'j', 00054 "Join two clusters if their hull distance is less minRadius times this factor.", 0, 0, 1); 00055 00056 public FloatOption clusterAddOption = new FloatOption("clusterAdd", 'A', 00057 "Adds additional clusters.", 0, 0, 1); 00058 00059 private static double err_intervall_width = 0.0; 00060 private ArrayList<DataPoint> points; 00061 private int instanceCounter; 00062 private int windowCounter; 00063 private Random random; 00064 private Clustering sourceClustering = null; 00065 00066 @Override 00067 public void resetLearningImpl() { 00068 points = new ArrayList<DataPoint>(); 00069 instanceCounter = 0; 00070 windowCounter = 0; 00071 random = new Random(227); 00072 00073 //joinClustersOption.set(); 00074 //evaluateMicroClusteringOption.set(); 00075 } 00076 00077 @Override 00078 public void trainOnInstanceImpl(Instance inst) { 00079 if(windowCounter >= timeWindowOption.getValue()){ 00080 points.clear(); 00081 windowCounter = 0; 00082 } 00083 windowCounter++; 00084 instanceCounter++; 00085 points.add( new DataPoint(inst,instanceCounter)); 00086 } 00087 00088 @Override 00089 public boolean implementsMicroClusterer() { 00090 return true; 00091 } 00092 00093 00094 public void setSourceClustering(Clustering source){ 00095 sourceClustering = source; 00096 } 00097 00098 @Override 00099 public Clustering getMicroClusteringResult() { 00100 //System.out.println("Numcluster:"+clustering.size()+" / "+num); 00101 //Clustering source_clustering = new Clustering(points, overlapThreshold, microInitMinPoints); 00102 if(sourceClustering == null){ 00103 00104 System.out.println("You need to set a source clustering for the ClusterGenerator to work"); 00105 return null; 00106 } 00107 return alterClustering(sourceClustering); 00108 } 00109 00110 00111 00112 public Clustering getClusteringResult(){ 00113 sourceClustering = new Clustering(points); 00114 // if(sourceClustering == null){ 00115 // System.out.println("You need to set a source clustering for the ClusterGenerator to work"); 00116 // return null; 00117 // } 00118 return alterClustering(sourceClustering); 00119 } 00120 00121 00122 private Clustering alterClustering(Clustering scclustering){ 00123 //percentage of the radius that will be cut off 00124 //0: no changes to radius 00125 //1: radius of 0 00126 double errLevelRadiusDecrease = radiusDecreaseOption.getValue(); 00127 00128 //0: no changes to radius 00129 //1: radius 100% bigger 00130 double errLevelRadiusIncrease = radiusIncreaseOption.getValue(); 00131 00132 //0: no changes 00133 //1: distance between centers is 2 * original radius 00134 double errLevelPosition = positionOffsetOption.getValue(); 00135 00136 00137 int numRemoveCluster = (int)(clusterRemoveOption.getValue()*scclustering.size()); 00138 00139 int numAddCluster = (int)(clusterAddOption.getValue()*scclustering.size()); 00140 00141 for (int c = 0; c < numRemoveCluster; c++) { 00142 int delId = random.nextInt(scclustering.size()); 00143 scclustering.remove(delId); 00144 } 00145 00146 int numCluster = scclustering.size(); 00147 double[] err_seeds = new double[numCluster]; 00148 double err_seed_sum = 0.0; 00149 double tmp_seed; 00150 for (int i = 0; i < numCluster; i++) { 00151 tmp_seed = random.nextDouble(); 00152 err_seeds[i] = err_seed_sum + tmp_seed; 00153 err_seed_sum+= tmp_seed; 00154 } 00155 00156 double sumWeight = 0; 00157 for (int i = 0; i <numCluster; i++) { 00158 sumWeight+= scclustering.get(i).getWeight(); 00159 } 00160 00161 Clustering clustering = new Clustering(); 00162 00163 for (int i = 0; i <numCluster; i++) { 00164 if(!(scclustering.get(i) instanceof SphereCluster)){ 00165 System.out.println("Not a Sphere Cluster"); 00166 continue; 00167 } 00168 SphereCluster sourceCluster = (SphereCluster)scclustering.get(i); 00169 double[] center = Arrays.copyOf(sourceCluster.getCenter(),sourceCluster.getCenter().length); 00170 double weight = sourceCluster.getWeight(); 00171 double radius = sourceCluster.getRadius(); 00172 00173 //move cluster center 00174 if(errLevelPosition >0){ 00175 double errOffset = random.nextDouble()*err_intervall_width/2.0; 00176 double errOffsetDirection = ((random.nextBoolean())? 1 : -1); 00177 double level = errLevelPosition + errOffsetDirection * errOffset; 00178 double[] vector = new double[center.length]; 00179 double vectorLength = 0; 00180 for (int d = 0; d < center.length; d++) { 00181 vector[d] = (random.nextBoolean()?1:-1)*random.nextDouble(); 00182 vectorLength += Math.pow(vector[d],2); 00183 } 00184 vectorLength = Math.sqrt(vectorLength); 00185 00186 00187 //max is when clusters are next to each other 00188 double length = 2 * radius * level; 00189 00190 for (int d = 0; d < center.length; d++) { 00191 //normalize length and then strecht to reach error position 00192 vector[d]=vector[d]/vectorLength*length; 00193 } 00194 // System.out.println("Center "+Arrays.toString(center)); 00195 // System.out.println("Vector "+Arrays.toString(vector)); 00196 //check if error position is within bounds 00197 double [] newCenter = new double[center.length]; 00198 for (int d = 0; d < center.length; d++) { 00199 //check bounds, otherwise flip vector 00200 if(center[d] + vector[d] >= 0 && center[d] + vector[d] <= 1){ 00201 newCenter[d] = center[d] + vector[d]; 00202 } 00203 else{ 00204 newCenter[d] = center[d] + (-1)*vector[d]; 00205 } 00206 } 00207 center = newCenter; 00208 for (int d = 0; d < center.length; d++) { 00209 if(newCenter[d] >= 0 && newCenter[d] <= 1){ 00210 } 00211 else{ 00212 System.out.println("This shouldnt have happend, Cluster center out of bounds:"+Arrays.toString(newCenter)); 00213 } 00214 } 00215 //System.out.println("new Center "+Arrays.toString(newCenter)); 00216 00217 } 00218 00219 //alter radius 00220 if(errLevelRadiusDecrease > 0 || errLevelRadiusIncrease > 0){ 00221 double errOffset = random.nextDouble()*err_intervall_width/2.0; 00222 int errOffsetDirection = ((random.nextBoolean())? 1 : -1); 00223 00224 if(errLevelRadiusDecrease > 0 && (errLevelRadiusIncrease == 0 || random.nextBoolean())){ 00225 double level = (errLevelRadiusDecrease + errOffsetDirection * errOffset);//*sourceCluster.getWeight()/sumWeight; 00226 level = (level<0)?0:level; 00227 level = (level>1)?1:level; 00228 radius*=(1-level); 00229 } 00230 else{ 00231 double level = errLevelRadiusIncrease + errOffsetDirection * errOffset; 00232 level = (level<0)?0:level; 00233 level = (level>1)?1:level; 00234 radius+=radius*level; 00235 } 00236 } 00237 00238 SphereCluster newCluster = new SphereCluster(center, radius, weight); 00239 newCluster.setMeasureValue("Source Cluster", "C"+sourceCluster.getId()); 00240 00241 clustering.add(newCluster); 00242 } 00243 00244 if(joinClustersOption.getValue() > 0){ 00245 clustering = joinClusters(clustering); 00246 } 00247 00248 //add new clusters by copying clusters and set a random center 00249 for (int c = 0; c < numAddCluster; c++) { 00250 int copyId = random.nextInt(clustering.size()); 00251 SphereCluster scorg = (SphereCluster)clustering.get(copyId); 00252 int dim = scorg.getCenter().length; 00253 double[] center = new double [dim]; 00254 double radius = scorg.getRadius(); 00255 00256 boolean outofbounds = true; 00257 int tryCounter = 0; 00258 while(outofbounds && tryCounter < 20){ 00259 tryCounter++; 00260 outofbounds = false; 00261 for (int j = 0; j < center.length; j++) { 00262 center[j] = random.nextDouble(); 00263 if(center[j]- radius < 0 || center[j] + radius > 1){ 00264 outofbounds = true; 00265 break; 00266 } 00267 } 00268 } 00269 if(outofbounds){ 00270 System.out.println("Coludn't place additional cluster"); 00271 } 00272 else{ 00273 SphereCluster scnew = new SphereCluster(center, radius, scorg.getWeight()/2); 00274 scorg.setWeight(scorg.getWeight()-scnew.getWeight()); 00275 clustering.add(scnew); 00276 } 00277 } 00278 00279 return clustering; 00280 00281 } 00282 00283 00284 00285 private Clustering joinClusters(Clustering clustering){ 00286 00287 double radiusFactor = joinClustersOption.getValue(); 00288 boolean[] merged = new boolean[clustering.size()]; 00289 00290 Clustering mclustering = new Clustering(); 00291 00292 if(radiusFactor >0){ 00293 for (int c1 = 0; c1 < clustering.size(); c1++) { 00294 SphereCluster sc1 = (SphereCluster) clustering.get(c1); 00295 double minDist = Double.MAX_VALUE; 00296 double minOver = 1; 00297 int maxindexCon = -1; 00298 int maxindexOver = -1; 00299 for (int c2 = 0; c2 < clustering.size(); c2++) { 00300 SphereCluster sc2 = (SphereCluster) clustering.get(c2); 00301 // double over = sc1.overlapRadiusDegree(sc2); 00302 // if(over > 0 && over < minOver){ 00303 // minOver = over; 00304 // maxindexOver = c2; 00305 // } 00306 double dist = sc1.getHullDistance(sc2); 00307 double threshold = Math.min(sc1.getRadius(), sc2.getRadius())*radiusFactor; 00308 if(dist > 0 && dist < minDist && dist < threshold){ 00309 minDist = dist; 00310 maxindexCon = c2; 00311 } 00312 } 00313 int maxindex = -1; 00314 if(maxindexOver!=-1) 00315 maxindex = maxindexOver; 00316 else 00317 maxindex = maxindexCon; 00318 00319 if(maxindex!=-1 && !merged[c1]){ 00320 merged[c1]=true; 00321 merged[maxindex]=true; 00322 SphereCluster scnew = new SphereCluster(sc1.getCenter(),sc1.getRadius(),sc1.getWeight()); 00323 SphereCluster sc2 = (SphereCluster) clustering.get(maxindex); 00324 scnew.merge(sc2); 00325 mclustering.add(scnew); 00326 } 00327 } 00328 } 00329 00330 for (int i = 0; i < merged.length; i++) { 00331 if(!merged[i]) 00332 mclustering.add(clustering.get(i)); 00333 } 00334 00335 00336 return mclustering; 00337 00338 } 00339 00340 00341 00342 @Override 00343 protected Measurement[] getModelMeasurementsImpl() { 00344 throw new UnsupportedOperationException("Not supported yet."); 00345 } 00346 00347 @Override 00348 public void getModelDescription(StringBuilder out, int indent) { 00349 throw new UnsupportedOperationException("Not supported yet."); 00350 } 00351 00352 @Override 00353 public boolean isRandomizable() { 00354 return false; 00355 } 00356 00357 @Override 00358 public boolean keepClassLabel(){ 00359 return true; 00360 } 00361 00362 public double[] getVotesForInstance(Instance inst) { 00363 return null; 00364 } 00365 } 00366 00367