MOA 12.03
Real Time Analytics for Data Streams
|
00001 package moa.clusterers.streamkm; 00002 00003 00009 public class BucketManager { 00010 00011 protected class Bucket { 00012 int cursize; 00013 Point[] points; 00014 Point[] spillover; 00015 00016 public Bucket(int d, int maxsize){ 00017 this.cursize = 0; 00018 this.points = new Point[maxsize]; 00019 this.spillover = new Point[maxsize]; 00020 for(int i=0; i<maxsize; i++){ 00021 this.points[i] = new Point(d); 00022 this.spillover[i] = new Point(d); 00023 } 00024 } 00025 00026 }; 00027 00028 protected int numberOfBuckets; 00029 protected int maxBucketsize; 00030 protected Bucket[] buckets; 00031 protected MTRandom clustererRandom; 00032 protected TreeCoreset treeCoreset; 00033 00034 00038 public BucketManager(int n,int d,int maxsize, MTRandom random){ 00039 this.clustererRandom = random; 00040 this.numberOfBuckets = (int) Math.ceil(Math.log((double)n/(double)maxsize) / Math.log(2) )+2; 00041 this.maxBucketsize = maxsize; 00042 this.buckets = new Bucket[this.numberOfBuckets]; 00043 for(int i=0; i<this.numberOfBuckets; i++){ 00044 this.buckets[i] = new Bucket(d,maxsize); 00045 } 00046 this.treeCoreset = new TreeCoreset(); 00047 //printf("Created manager with %d buckets of dimension %d \n",this.numberOfBuckets,d); 00048 } 00049 00053 void insertPoint(Point p){ 00054 00055 //check if there is enough space in the first bucket 00056 int cursize = this.buckets[0].cursize; 00057 if(cursize >= this.maxBucketsize) { 00058 //printf("Bucket 0 full \n"); 00059 //start spillover process 00060 int curbucket = 0; 00061 int nextbucket = 1; 00062 00063 //check if the next bucket is empty 00064 if(this.buckets[nextbucket].cursize == 0){ 00065 //copy the bucket 00066 int i; 00067 for(i=0; i<this.maxBucketsize; i++){ 00068 this.buckets[nextbucket].points[i] = this.buckets[curbucket].points[i].clone(); 00069 //copyPointWithoutInit: we should not copy coordinates? 00070 } 00071 //bucket is now full 00072 this.buckets[nextbucket].cursize = this.maxBucketsize; 00073 //first bucket is now empty 00074 this.buckets[curbucket].cursize = 0; 00075 cursize = 0; 00076 } else { 00077 //printf("Bucket %d full \n",nextbucket); 00078 //copy bucket to spillover and continue 00079 int i; 00080 for(i=0;i<this.maxBucketsize;i++){ 00081 this.buckets[nextbucket].spillover[i] = this.buckets[curbucket].points[i].clone(); 00082 //copyPointWithoutInit: we should not copy coordinates? 00083 } 00084 this.buckets[0].cursize=0; 00085 cursize = 0; 00086 curbucket++; 00087 nextbucket++; 00088 /* 00089 as long as the next bucket is full output the coreset to the spillover of the next bucket 00090 */ 00091 while(this.buckets[nextbucket].cursize == this.maxBucketsize){ 00092 //printf("Bucket %d full \n",nextbucket); 00093 this.treeCoreset.unionTreeCoreset(this.maxBucketsize,this.maxBucketsize, 00094 this.maxBucketsize,p.dimension, 00095 this.buckets[curbucket].points,this.buckets[curbucket].spillover, 00096 this.buckets[nextbucket].spillover, this.clustererRandom); 00097 //bucket now empty 00098 this.buckets[curbucket].cursize = 0; 00099 curbucket++; 00100 nextbucket++; 00101 } 00102 this.treeCoreset.unionTreeCoreset(this.maxBucketsize,this.maxBucketsize, 00103 this.maxBucketsize,p.dimension, 00104 this.buckets[curbucket].points,this.buckets[curbucket].spillover, 00105 this.buckets[nextbucket].points, this.clustererRandom); 00106 this.buckets[curbucket].cursize = 0; 00107 this.buckets[nextbucket].cursize = this.maxBucketsize; 00108 } 00109 } 00110 //insert point into the first bucket 00111 this.buckets[0].points[cursize] = p.clone(); 00112 //copyPointWithoutInit: we should not copy coordinates? 00113 this.buckets[0].cursize++; 00114 } 00115 00128 Point[] getCoresetFromManager(int d){ 00129 Point[] coreset = new Point[d]; 00130 int i = 0; 00131 if(this.buckets[this.numberOfBuckets-1].cursize == this.maxBucketsize){ 00132 coreset = this.buckets[this.numberOfBuckets-1].points; 00133 00134 } else { 00135 //find the first nonempty bucket 00136 for(i=0; i < this.numberOfBuckets; i++){ 00137 if(this.buckets[i].cursize != 0){ 00138 coreset = this.buckets[i].points; 00139 break; 00140 } 00141 } 00142 //as long as there is a nonempty bucket compute a coreset 00143 int j; 00144 for(j=i+1; j < this.numberOfBuckets; j++){ 00145 if(this.buckets[j].cursize != 0){ 00146 //output the coreset into the spillover of bucket j 00147 this.treeCoreset.unionTreeCoreset(this.maxBucketsize,this.maxBucketsize, 00148 this.maxBucketsize,d, 00149 this.buckets[j].points,coreset, 00150 this.buckets[j].spillover, this.clustererRandom); 00151 coreset = this.buckets[j].spillover; 00152 } 00153 } 00154 } 00155 return coreset; 00156 } 00157 }