MOA 12.03
Real Time Analytics for Data Streams
BucketManager.java
Go to the documentation of this file.
00001 package moa.clusterers.streamkm;
00002 
00003 
00009 public class BucketManager  {
00010 
00011         protected class Bucket {
00012                 int cursize;
00013                 Point[] points;
00014                 Point[] spillover;
00015                 
00016                 public Bucket(int d, int maxsize){
00017                         this.cursize = 0;
00018                         this.points = new Point[maxsize];
00019                         this.spillover = new Point[maxsize];
00020                         for(int i=0; i<maxsize; i++){
00021                                 this.points[i] = new Point(d);
00022                                 this.spillover[i] = new Point(d);
00023                         }
00024                 }
00025                 
00026         };
00027 
00028         protected int numberOfBuckets;
00029         protected int maxBucketsize;
00030         protected Bucket[] buckets;
00031         protected MTRandom clustererRandom;
00032         protected TreeCoreset treeCoreset;
00033         
00034         
00038         public BucketManager(int n,int d,int maxsize, MTRandom random){
00039                 this.clustererRandom = random;
00040                 this.numberOfBuckets = (int) Math.ceil(Math.log((double)n/(double)maxsize) / Math.log(2) )+2;
00041                 this.maxBucketsize = maxsize;
00042                 this.buckets = new Bucket[this.numberOfBuckets];
00043                 for(int i=0; i<this.numberOfBuckets; i++){
00044                         this.buckets[i] = new Bucket(d,maxsize);
00045                 }
00046                 this.treeCoreset = new TreeCoreset();
00047                 //printf("Created manager with %d buckets of dimension %d \n",this.numberOfBuckets,d);
00048         }
00049 
00053         void insertPoint(Point p){
00054                 
00055                 //check if there is enough space in the first bucket
00056                 int cursize = this.buckets[0].cursize;  
00057                 if(cursize >= this.maxBucketsize) {
00058                         //printf("Bucket 0 full \n");
00059                         //start spillover process
00060                         int curbucket  = 0;
00061                         int nextbucket = 1;
00062 
00063                         //check if the next bucket is empty
00064                         if(this.buckets[nextbucket].cursize == 0){
00065                                 //copy the bucket       
00066                                 int i;
00067                                 for(i=0; i<this.maxBucketsize; i++){
00068                                         this.buckets[nextbucket].points[i] = this.buckets[curbucket].points[i].clone();
00069                                         //copyPointWithoutInit: we should not copy coordinates? 
00070                                 }
00071                                 //bucket is now full
00072                                 this.buckets[nextbucket].cursize = this.maxBucketsize;
00073                                 //first bucket is now empty
00074                                 this.buckets[curbucket].cursize = 0;
00075                                 cursize = 0;
00076                         } else {
00077                                 //printf("Bucket %d full \n",nextbucket);
00078                                 //copy bucket to spillover and continue
00079                                 int i;
00080                                 for(i=0;i<this.maxBucketsize;i++){
00081                                         this.buckets[nextbucket].spillover[i] = this.buckets[curbucket].points[i].clone();
00082                                         //copyPointWithoutInit: we should not copy coordinates? 
00083                                 }
00084                                 this.buckets[0].cursize=0;
00085                                 cursize = 0;
00086                                 curbucket++;
00087                                 nextbucket++;
00088                                 /*
00089                                 as long as the next bucket is full output the coreset to the spillover of the next bucket
00090                                 */
00091                                 while(this.buckets[nextbucket].cursize == this.maxBucketsize){
00092                                         //printf("Bucket %d full \n",nextbucket);
00093                                         this.treeCoreset.unionTreeCoreset(this.maxBucketsize,this.maxBucketsize,
00094                                                 this.maxBucketsize,p.dimension, 
00095                                                 this.buckets[curbucket].points,this.buckets[curbucket].spillover,
00096                                                 this.buckets[nextbucket].spillover, this.clustererRandom);
00097                                         //bucket now empty
00098                                         this.buckets[curbucket].cursize = 0;
00099                                         curbucket++;
00100                                         nextbucket++;
00101                                 }
00102                                 this.treeCoreset.unionTreeCoreset(this.maxBucketsize,this.maxBucketsize,
00103                                                 this.maxBucketsize,p.dimension, 
00104                                                 this.buckets[curbucket].points,this.buckets[curbucket].spillover,
00105                                                 this.buckets[nextbucket].points, this.clustererRandom);
00106                                 this.buckets[curbucket].cursize = 0;
00107                                 this.buckets[nextbucket].cursize = this.maxBucketsize;
00108                         }
00109                 }
00110                 //insert point into the first bucket
00111                 this.buckets[0].points[cursize] = p.clone();
00112                 //copyPointWithoutInit: we should not copy coordinates? 
00113                 this.buckets[0].cursize++;
00114         }
00115 
00128         Point[] getCoresetFromManager(int d){
00129                 Point[] coreset = new Point[d];
00130                 int i = 0;
00131                 if(this.buckets[this.numberOfBuckets-1].cursize == this.maxBucketsize){
00132                         coreset = this.buckets[this.numberOfBuckets-1].points;
00133 
00134                 } else {
00135                         //find the first nonempty bucket
00136                         for(i=0; i < this.numberOfBuckets; i++){
00137                                 if(this.buckets[i].cursize != 0){
00138                                         coreset = this.buckets[i].points;
00139                                         break;
00140                                 }
00141                         }               
00142                         //as long as there is a nonempty bucket compute a coreset
00143                         int j;
00144                         for(j=i+1; j < this.numberOfBuckets; j++){
00145                                 if(this.buckets[j].cursize != 0){
00146                                         //output the coreset into the spillover of bucket j
00147                                         this.treeCoreset.unionTreeCoreset(this.maxBucketsize,this.maxBucketsize,
00148                                                 this.maxBucketsize,d, 
00149                                                 this.buckets[j].points,coreset,
00150                                                 this.buckets[j].spillover, this.clustererRandom); 
00151                                         coreset = this.buckets[j].spillover;                    
00152                                 }
00153                         }
00154                 }
00155                 return coreset;
00156         }
00157 }
 All Classes Namespaces Files Functions Variables Enumerations