MOA 12.03
Real Time Analytics for Data Streams
|
00001 /* 00002 * ClusTree.java 00003 * Copyright (C) 2010 RWTH Aachen University, Germany 00004 * @author Sanchez Villaamil ([email protected]) 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 3 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00018 * 00019 */ 00020 00021 package moa.clusterers.clustree; 00022 00023 import java.util.ArrayList; 00024 import java.util.LinkedList; 00025 import moa.clusterers.clustree.util.*; 00026 import moa.cluster.Clustering; 00027 import moa.clusterers.AbstractClusterer; 00028 import moa.core.Measurement; 00029 import moa.options.IntOption; 00030 import weka.core.Instance; 00031 00032 public class ClusTree extends AbstractClusterer{ 00033 private static final long serialVersionUID = 1L; 00034 00035 public IntOption horizonOption = new IntOption("horizon", 00036 'h', "Range of the window.", 1000); 00037 00038 public IntOption maxHeightOption = new IntOption( 00039 "maxHeight", 'H', 00040 "The maximal height of the tree", getDefaultHeight()); 00041 00042 protected int getDefaultHeight() { 00043 return 8; 00044 } 00045 00046 private static int INSERTIONS_BETWEEN_CLEANUPS = 10000; 00050 protected Node root; 00051 // Information about the data represented in this tree. 00055 private int numberDimensions; 00059 protected double negLambda; 00063 private int height; 00067 protected int maxHeight; 00072 private int numRootSplits; 00079 private double weightThreshold = 0.05; 00083 private int numberInsertions; 00084 private long timestamp; 00085 00089 //TODO: Add Option for that 00090 protected boolean breadthFirstStrat = false; 00091 00092 //TODO: cleanup 00093 private Entry alsoUpdate; 00094 00095 @Override 00096 public void resetLearningImpl() { 00097 negLambda = (1.0 / (double) horizonOption.getValue()) 00098 * (Math.log(weightThreshold) / Math.log(2)); 00099 maxHeight = maxHeightOption.getValue(); 00100 numberDimensions = -1; 00101 root = null; 00102 timestamp = 0; 00103 height = 0; 00104 numRootSplits = 0; 00105 numberInsertions = 0; 00106 } 00107 00108 00109 @Override 00110 protected Measurement[] getModelMeasurementsImpl() { 00111 return null; 00112 } 00113 00114 public boolean isRandomizable() { 00115 return false; 00116 } 00117 00118 @Override 00119 public void getModelDescription(StringBuilder out, int indent) { 00120 } 00121 00122 public double[] getVotesForInstance(Instance inst) { 00123 return null; 00124 } 00125 00126 @Override 00127 public boolean implementsMicroClusterer() { 00128 return true; 00129 } 00130 00131 00132 00133 @Override 00134 public void trainOnInstanceImpl(Instance instance) { 00135 timestamp++; 00136 00137 //TODO check if instance contains label 00138 if(root == null){ 00139 numberDimensions = instance.numAttributes(); 00140 root = new Node(numberDimensions, 0); 00141 } 00142 else{ 00143 if(numberDimensions!=instance.numAttributes()) 00144 System.out.println("Wrong dimensionality, expected:"+numberDimensions+ "found:"+instance.numAttributes()); 00145 } 00146 00147 ClusKernel newPointAsKernel = new ClusKernel(instance.toDoubleArray(), numberDimensions); 00148 insert(newPointAsKernel, new SimpleBudget(1000),timestamp); 00149 } 00150 00151 00164 public void insert(ClusKernel newPoint, Budget budget, long timestamp) { 00165 if (breadthFirstStrat){ 00166 insertBreadthFirst(newPoint, budget, timestamp); 00167 } 00168 else{ 00169 Entry rootEntry = new Entry(this.numberDimensions, 00170 root, timestamp, null, null); 00171 ClusKernel carriedBuffer = new ClusKernel(this.numberDimensions); 00172 Entry toInsertHere = insert(newPoint, carriedBuffer, root, rootEntry, 00173 budget, timestamp); 00174 00175 if (toInsertHere != null) { 00176 this.numRootSplits++; 00177 this.height += this.height < this.maxHeight ? 1 : 0; 00178 00179 Node newRoot = new Node(this.numberDimensions, 00180 toInsertHere.getChild().getRawLevel() + 1); 00181 newRoot.addEntry(rootEntry, timestamp); 00182 newRoot.addEntry(toInsertHere, timestamp); 00183 rootEntry.setNode(newRoot); 00184 toInsertHere.setNode(newRoot); 00185 this.root = newRoot; 00186 } 00187 } 00188 00189 this.numberInsertions++; 00190 if (this.numberInsertions % INSERTIONS_BETWEEN_CLEANUPS == 0) { 00191 cleanUp(this.root, 0); 00192 } 00193 } 00194 00203 private Entry insertBreadthFirst(ClusKernel newPoint, Budget budget, long timestamp) { 00204 //check all leaf nodes and get the one with the closest entry to newPoint 00205 Node bestFit = findBestLeafNode(newPoint); 00206 bestFit.makeOlder(timestamp, negLambda); 00207 Entry parent = bestFit.getEntries()[0].getParentEntry(); 00208 // Search for an Entry with a weight under the threshold. 00209 Entry irrelevantEntry = bestFit.getIrrelevantEntry(this.weightThreshold); 00210 int numFreeEntries = bestFit.numFreeEntries(); 00211 Entry newEntry = new Entry(newPoint.getCenter().length, 00212 newPoint, timestamp, parent, bestFit); 00213 //if there is space, add it to the node ( doesn't ever occur, since nodes are created with 3 entries) 00214 if (numFreeEntries>0){ 00215 bestFit.addEntry(newEntry, timestamp); 00216 } 00217 //if outdated cluster in this best fitting node, replace it 00218 else if (irrelevantEntry != null) { 00219 irrelevantEntry.overwriteOldEntry(newEntry); 00220 } 00221 //if there is space/outdated cluster on path to top, split. Else merge without split 00222 else { 00223 if (existsOutdatedEntryOnPath(bestFit)||!this.hasMaximalSize()){ 00224 // We have to split. 00225 insertHereWithSplit(newEntry, bestFit, timestamp); 00226 } 00227 else { 00228 mergeEntryWithoutSplit(bestFit, newEntry, 00229 timestamp); 00230 } 00231 } 00232 //update all nodes on path to top. 00233 if (bestFit.getEntries()[0].getParentEntry()!=null) 00234 updateToTop(bestFit.getEntries()[0].getParentEntry().getNode()); 00235 return null; 00236 } 00243 private boolean existsOutdatedEntryOnPath(Node node) { 00244 if (node == root){ 00245 node.makeOlder(timestamp, negLambda); 00246 return node.getIrrelevantEntry(this.weightThreshold)!=null; 00247 } 00248 do{ 00249 node = node.getEntries()[0].getParentEntry().getNode(); 00250 node.makeOlder(timestamp, negLambda); 00251 for (Entry e : node.getEntries()){ 00252 e.recalculateData(); 00253 } 00254 if (node.numFreeEntries()>0) 00255 return true; 00256 if (node.getIrrelevantEntry(this.weightThreshold)!=null) 00257 return true; 00258 }while(node.getEntries()[0].getParentEntry()!=null); 00259 return false; 00260 } 00261 00266 private void updateToTop(Node toUpdate) { 00267 while(toUpdate!=null){ 00268 for (Entry e: toUpdate.getEntries()) 00269 e.recalculateData(); 00270 if (toUpdate.getEntries()[0].getParentEntry()==null) 00271 break; 00272 toUpdate=toUpdate.getEntries()[0].getParentEntry().getNode(); 00273 } 00274 } 00275 00283 private Entry insertHereWithSplit(Entry toInsert, Node insertNode, 00284 long timestamp) { 00285 //Handle root split 00286 if (insertNode.getEntries()[0].getParentEntry()==null){ 00287 root.makeOlder(timestamp, negLambda); 00288 Entry irrelevantEntry = insertNode.getIrrelevantEntry(this.weightThreshold); 00289 int numFreeEntries = insertNode.numFreeEntries(); 00290 if (irrelevantEntry != null) { 00291 irrelevantEntry.overwriteOldEntry(toInsert); 00292 } 00293 else if (numFreeEntries>0){ 00294 insertNode.addEntry(toInsert, timestamp); 00295 } 00296 else{ 00297 this.numRootSplits++; 00298 this.height += this.height < this.maxHeight ? 1 : 0; 00299 Entry oldRootEntry = new Entry(this.numberDimensions, 00300 root, timestamp, null, null); 00301 Node newRoot = new Node(this.numberDimensions, 00302 this.height); 00303 Entry newRootEntry = split(toInsert, root, oldRootEntry, timestamp); 00304 newRoot.addEntry(oldRootEntry, timestamp); 00305 newRoot.addEntry(newRootEntry, timestamp); 00306 this.root = newRoot; 00307 for (Entry c : oldRootEntry.getChild().getEntries()) 00308 c.setParentEntry(root.getEntries()[0]); 00309 for (Entry c : newRootEntry.getChild().getEntries()) 00310 c.setParentEntry(root.getEntries()[1]); 00311 } 00312 return null; 00313 } 00314 insertNode.makeOlder(timestamp, negLambda); 00315 Entry irrelevantEntry = insertNode.getIrrelevantEntry(this.weightThreshold); 00316 int numFreeEntries = insertNode.numFreeEntries(); 00317 if (irrelevantEntry != null) { 00318 irrelevantEntry.overwriteOldEntry(toInsert); 00319 } 00320 else if (numFreeEntries>0){ 00321 insertNode.addEntry(toInsert, timestamp); 00322 } 00323 else { 00324 // We have to split. 00325 Entry parentEntry = insertNode.getEntries()[0].getParentEntry(); 00326 Entry residualEntry = split(toInsert, insertNode, parentEntry, timestamp); 00327 if (alsoUpdate!=null){ 00328 alsoUpdate = residualEntry; 00329 } 00330 Node nodeForResidualEntry = insertNode.getEntries()[0].getParentEntry().getNode(); 00331 //recursive call 00332 return insertHereWithSplit(residualEntry, nodeForResidualEntry, timestamp); 00333 } 00334 00335 //no Split 00336 return null; 00337 } 00338 00339 00340 // XXX: Document the insertion when the final implementation is done. 00341 private Entry insertHere(Entry newEntry, Node currentNode, 00342 Entry parentEntry, ClusKernel carriedBuffer, Budget budget, 00343 long timestamp) { 00344 00345 int numFreeEntries = currentNode.numFreeEntries(); 00346 00347 // Insert the buffer that we carry. 00348 if (!carriedBuffer.isEmpty()) { 00349 Entry bufferEntry = new Entry(this.numberDimensions, 00350 carriedBuffer, timestamp, parentEntry, currentNode); 00351 00352 if (numFreeEntries <= 1) { 00353 // Distance from buffer to entries. 00354 Entry nearestEntryToCarriedBuffer = 00355 currentNode.nearestEntry(newEntry); 00356 double distanceNearestEntryToBuffer = 00357 nearestEntryToCarriedBuffer.calcDistance(newEntry); 00358 00359 // Distance between buffer and point to insert. 00360 double distanceBufferNewEntry = 00361 newEntry.calcDistance(carriedBuffer); 00362 00363 // Best distance between Entrys in the Node. 00364 BestMergeInNode bestMergeInNode = 00365 calculateBestMergeInNode(currentNode); 00366 00367 // See what the minimal distance is and do the correspoding 00368 // action. 00369 if (distanceNearestEntryToBuffer <= distanceBufferNewEntry 00370 && distanceNearestEntryToBuffer <= bestMergeInNode.distance) { 00371 // Aggregate buffer entry to nearest entry in node. 00372 nearestEntryToCarriedBuffer.aggregateEntry(bufferEntry, 00373 timestamp, this.negLambda); 00374 } else if (distanceBufferNewEntry <= distanceNearestEntryToBuffer 00375 && distanceBufferNewEntry <= bestMergeInNode.distance) { 00376 newEntry.mergeWith(bufferEntry); 00377 } else { 00378 currentNode.mergeEntries(bestMergeInNode.entryPos1, 00379 bestMergeInNode.entryPos2); 00380 currentNode.addEntry(bufferEntry, timestamp); 00381 } 00382 00383 } else { 00384 assert (currentNode.isLeaf()); 00385 currentNode.addEntry(bufferEntry, timestamp); 00386 } 00387 } 00388 00389 // Normally the insertion of the carries buffer does not change the 00390 // number of free entries, but in case of future changes we calculate 00391 // the number again. 00392 numFreeEntries = currentNode.numFreeEntries(); 00393 00394 // Search for an Entry with a weight under the threshold. 00395 Entry irrelevantEntry = currentNode.getIrrelevantEntry(this.weightThreshold); 00396 if (currentNode.isLeaf() && irrelevantEntry != null) { 00397 irrelevantEntry.overwriteOldEntry(newEntry); 00398 } else if (numFreeEntries >= 1) { 00399 currentNode.addEntry(newEntry, timestamp); 00400 } else { 00401 if (currentNode.isLeaf() && (this.hasMaximalSize() 00402 || !budget.hasMoreTime())) { 00403 mergeEntryWithoutSplit(currentNode, newEntry, 00404 timestamp); 00405 } else { 00406 // We have to split. 00407 return split(newEntry, currentNode, parentEntry, timestamp); 00408 } 00409 } 00410 00411 return null; 00412 } 00413 00421 private Node findBestLeafNode(ClusKernel newPoint) { 00422 double minDist = Double.MAX_VALUE; 00423 Node bestFit = null; 00424 for (Node e: collectLeafNodes(root)){ 00425 if (newPoint.calcDistance(e.nearestEntry(newPoint).getData())<minDist){ 00426 bestFit = e; 00427 minDist = newPoint.calcDistance(e.nearestEntry(newPoint).getData()); 00428 } 00429 } 00430 if (bestFit!=null) 00431 return bestFit; 00432 else 00433 return root; 00434 } 00435 00436 private ArrayList<Node> collectLeafNodes(Node curr){ 00437 ArrayList<Node> toReturn = new ArrayList<Node>(); 00438 if (curr==null) 00439 return toReturn; 00440 if (curr.isLeaf()){ 00441 toReturn.add(curr); 00442 return toReturn; 00443 } 00444 else{ 00445 for (Entry e : curr.getEntries()) 00446 toReturn.addAll(collectLeafNodes(e.getChild())); 00447 return toReturn; 00448 } 00449 } 00450 00451 // TODO: Expand all function that work on entries to work with the Budget. 00452 private Entry insert(ClusKernel pointToInsert, ClusKernel carriedBuffer, 00453 Node currentNode, Entry parentEntry, Budget budget, long timestamp) { 00454 assert (currentNode != null); 00455 assert (currentNode.isLeaf() 00456 || currentNode.getEntries()[0].getChild() != null); 00457 00458 currentNode.makeOlder(timestamp, this.negLambda); 00459 00460 // This variable will be changed from to null to an actual reference 00461 // in the following if-else block if we have to insert something here, 00462 // either because this is a leaf, or because of split propagation. 00463 Entry toInsertHere = null; 00464 00465 if (currentNode.isLeaf()) { 00466 // At the end of the function the entry will be inserted. 00467 toInsertHere = new Entry(this.numberDimensions, 00468 pointToInsert, timestamp, parentEntry, currentNode); 00469 } else { 00470 00471 Entry bestEntry = currentNode.nearestEntry(pointToInsert); 00472 bestEntry.aggregateCluster(pointToInsert, timestamp, 00473 this.negLambda); 00474 00475 boolean isCarriedBufferEmpty = carriedBuffer.isEmpty(); 00476 00477 Entry bestBufferEntry = null; 00478 if (!isCarriedBufferEmpty) { 00479 bestBufferEntry = currentNode.nearestEntry(carriedBuffer); 00480 bestBufferEntry.aggregateCluster(carriedBuffer, timestamp, 00481 this.negLambda); 00482 } 00483 00484 if (!budget.hasMoreTime()) { 00485 bestEntry.aggregateToBuffer(pointToInsert, timestamp, 00486 this.negLambda); 00487 if (!isCarriedBufferEmpty) { 00488 bestBufferEntry.aggregateToBuffer(carriedBuffer, 00489 timestamp, this.negLambda); 00490 } 00491 return null; 00492 } 00493 00494 // If the way of the buffer differs from the way of the point to 00495 // be inserted, leave the buffer here. 00496 if (!isCarriedBufferEmpty && (bestEntry != bestBufferEntry)) { 00497 bestBufferEntry.aggregateToBuffer(carriedBuffer, timestamp, 00498 this.negLambda); 00499 carriedBuffer.clear(); 00500 } 00501 // Take the buffer of the best entry for the point to be inserted 00502 // along. 00503 ClusKernel takeAlongBuffer = bestEntry.emptyBuffer(timestamp, 00504 this.negLambda); 00505 carriedBuffer.add(takeAlongBuffer); 00506 00507 // Recursive call. 00508 toInsertHere = insert(pointToInsert, carriedBuffer, 00509 bestEntry.getChild(), bestEntry, budget, timestamp); 00510 } 00511 00512 // If the above block has a new Entry for this place insert it. 00513 if (toInsertHere != null) { 00514 return this.insertHere(toInsertHere, currentNode, parentEntry, 00515 carriedBuffer, budget, timestamp); 00516 } 00517 00518 // If nothing else needs to be done in all the above levels 00519 // return null to signalize it. 00520 return null; 00521 } 00522 00530 private void mergeEntryWithoutSplit(Node node, 00531 Entry newEntry, long timestamp) { 00532 00533 Entry nearestEntryToCarriedBuffer = 00534 node.nearestEntry(newEntry); 00535 double distanceNearestEntryToBuffer = 00536 nearestEntryToCarriedBuffer.calcDistance(newEntry); 00537 00538 BestMergeInNode bestMergeInNode = 00539 calculateBestMergeInNode(node); 00540 00541 if (distanceNearestEntryToBuffer < bestMergeInNode.distance) { 00542 nearestEntryToCarriedBuffer.aggregateEntry(newEntry, timestamp, 00543 this.negLambda); 00544 } else { 00545 node.mergeEntries(bestMergeInNode.entryPos1, 00546 bestMergeInNode.entryPos2); 00547 node.addEntry(newEntry, timestamp); 00548 } 00549 } 00550 00560 private BestMergeInNode calculateBestMergeInNode(Node node) { 00561 assert (node.numFreeEntries() == 0); 00562 00563 Entry[] entries = node.getEntries(); 00564 00565 int toMerge1 = -1; 00566 int toMerge2 = -1; 00567 double distanceBetweenMergeEntries = Double.NaN; 00568 00569 double minDistance = Double.MAX_VALUE; 00570 for (int i = 0; i < entries.length; i++) { 00571 Entry e1 = entries[i]; 00572 for (int j = i + 1; j < entries.length; j++) { 00573 Entry e2 = entries[j]; 00574 double distance = e1.calcDistance(e2); 00575 if (distance < minDistance) { 00576 toMerge1 = i; 00577 toMerge2 = j; 00578 distanceBetweenMergeEntries = distance; 00579 } 00580 } 00581 } 00582 00583 assert (toMerge1 != -1 && toMerge2 != -1); 00584 if (Double.isNaN(distanceBetweenMergeEntries)) { 00585 throw new RuntimeException("The minimal distance between two " 00586 + "Entrys in a Node was Double.MAX_VAUE. That can hardly " 00587 + "be right."); 00588 } 00589 00590 return new BestMergeInNode(toMerge1, toMerge2, 00591 distanceBetweenMergeEntries); 00592 } 00593 00594 private boolean hasMaximalSize() { 00595 // TODO: Improve hasMaximalSize(). For now it just works somehow for testing. 00596 return this.height == this.maxHeight; 00597 } 00598 00613 private Entry split(Entry newEntry, Node node, Entry parentEntry, 00614 long timestamp) { 00615 // The implemented split function only works in trees where node 00616 // have three entries. 00617 // Splitting only makes sense on full nodes. 00618 assert (node.numFreeEntries() == 0); 00619 assert (parentEntry.getChild() == node); 00620 00621 // All the entries we have to separate in two nodes. 00622 Entry[] allEntries = new Entry[4]; 00623 Entry[] nodeEntries = node.getEntries(); 00624 for (int i = 0; i < nodeEntries.length; i++) { 00625 allEntries[i] = new Entry(nodeEntries[i]); 00626 } 00627 allEntries[3] = newEntry; 00628 00629 // Clear the given node, since we are going to refill it later. 00630 node = new Node(this.numberDimensions, node.getRawLevel()); 00631 00632 // Calculate the distance of all the possible pairings, since we want 00633 // to do a (2,2) split. 00634 double select01 = allEntries[0].calcDistance(allEntries[1]) 00635 + allEntries[2].calcDistance(allEntries[3]); 00636 00637 double select02 = allEntries[0].calcDistance(allEntries[2]) 00638 + allEntries[1].calcDistance(allEntries[3]); 00639 00640 double select03 = allEntries[0].calcDistance(allEntries[3]) 00641 + allEntries[1].calcDistance(allEntries[2]); 00642 00643 // See which of the pairings is minimal and distribute the entries 00644 // accordingly. 00645 Node residualNode = new Node(this.numberDimensions, 00646 node.getRawLevel()); 00647 if (select01 < select02) { 00648 if (select01 < select03) {//select01 smallest 00649 node.addEntry(allEntries[0], timestamp); 00650 node.addEntry(allEntries[1], timestamp); 00651 residualNode.addEntry(allEntries[2], timestamp); 00652 residualNode.addEntry(allEntries[3], timestamp); 00653 } else {//select03 smallest 00654 node.addEntry(allEntries[0], timestamp); 00655 node.addEntry(allEntries[3], timestamp); 00656 residualNode.addEntry(allEntries[1], timestamp); 00657 residualNode.addEntry(allEntries[2], timestamp); 00658 } 00659 } else { 00660 if (select02 < select03) {//select02 smallest 00661 node.addEntry(allEntries[0], timestamp); 00662 node.addEntry(allEntries[2], timestamp); 00663 residualNode.addEntry(allEntries[1], timestamp); 00664 residualNode.addEntry(allEntries[3], timestamp); 00665 } else {//select03 smallest 00666 node.addEntry(allEntries[0], timestamp); 00667 node.addEntry(allEntries[3], timestamp); 00668 residualNode.addEntry(allEntries[1], timestamp); 00669 residualNode.addEntry(allEntries[2], timestamp); 00670 } 00671 } 00672 00673 // Set the other node into the tree. 00674 parentEntry.setChild(node); 00675 parentEntry.recalculateData(); 00676 int count = 0; 00677 for (Entry e : node.getEntries()){ 00678 e.setParentEntry(parentEntry); 00679 if (e.getData().getN() != 0) 00680 count++; 00681 } 00682 //System.out.println(count); 00683 // Generate a new entry for the residual node. 00684 Entry residualEntry = new Entry(this.numberDimensions, 00685 residualNode, timestamp, parentEntry, node); 00686 count=0; 00687 for (Entry e: residualNode.getEntries()){ 00688 e.setParentEntry(residualEntry); 00689 if (e.getData().getN() != 0) 00690 count++; 00691 } 00692 //System.out.println(count); 00693 return residualEntry; 00694 } 00695 00701 public int getNumRootSplits() { 00702 return numRootSplits; 00703 } 00704 00711 public int getHeight() { 00712 assert (height <= maxHeight); 00713 return height; 00714 } 00715 00716 private void cleanUp(Node currentNode, int level) { 00717 if (currentNode == null) { 00718 return; 00719 } 00720 00721 Entry[] entries = currentNode.getEntries(); 00722 if (level == this.maxHeight) { 00723 for (int i = 0; i < entries.length; i++) { 00724 Entry e = entries[i]; 00725 e.setChild(null); 00726 } 00727 } else { 00728 for (int i = 0; i < entries.length; i++) { 00729 Entry e = entries[i]; 00730 cleanUp(e.getChild(), level + 1); 00731 } 00732 } 00733 } 00734 00739 //TODO: Microcluster unter dem Threshhold nich zur�ckgeben (WIe bei outdated entries) 00740 @Override 00741 public Clustering getMicroClusteringResult() { 00742 return getClustering(timestamp, -1); 00743 } 00744 00745 @Override 00746 public Clustering getClusteringResult() { 00747 return null; 00748 } 00749 00750 00755 public Clustering getClustering(long currentTime, int targetLevel) { 00756 if (root == null) { 00757 return null; 00758 } 00759 00760 Clustering clusters = new Clustering(); 00761 LinkedList<Node> queue = new LinkedList<Node>(); 00762 queue.add(root); 00763 00764 while (!queue.isEmpty()) { 00765 Node current = queue.remove(); 00766 // if (current == null) 00767 // continue; 00768 int currentLevel = current.getLevel(this); 00769 boolean isLeaf = (current.isLeaf() && currentLevel <= maxHeight) 00770 || currentLevel == maxHeight; 00771 00772 if (currentLevel == targetLevel 00773 || (targetLevel == - 1 && isLeaf)) { 00774 assert (currentLevel <= maxHeight); 00775 00776 Entry[] entries = current.getEntries(); 00777 for (int i = 0; i < entries.length; i++) { 00778 Entry entry = entries[i]; 00779 if (entry == null || entry.isEmpty()) { 00780 continue; 00781 } 00782 // XXX 00783 entry.makeOlder(currentTime, this.negLambda); 00784 if (entry.isIrrelevant(this.weightThreshold)) 00785 continue; 00786 00787 ClusKernel gaussKernel = new ClusKernel(entry.getData()); 00788 00789 // long diff = currentTime - entry.getTimestamp(); 00790 // if (diff > 0) { 00791 // gaussKernel.makeOlder(diff, negLambda); 00792 // } 00793 00794 clusters.add(gaussKernel); 00795 } 00796 } else if (!current.isLeaf()) { 00797 Entry[] entries = current.getEntries(); 00798 for (int i = 0; i < entries.length; i++) { 00799 Entry entry = entries[i]; 00800 00801 if (entry.isEmpty()) { 00802 continue; 00803 } 00804 00805 if (entry.isIrrelevant(weightThreshold)) { 00806 continue; 00807 } 00808 00809 queue.add(entry.getChild()); 00810 } 00811 } 00812 } 00813 00814 return clusters; 00815 } 00816 00817 00818 00819 /************************************************************************** 00820 * LOCAL CLASSES 00821 **************************************************************************/ 00826 class BestMergeInNode { 00827 00831 public int entryPos1; 00835 public int entryPos2; 00839 public double distance; 00840 00848 public BestMergeInNode(int pos1, int pos2, 00849 double distance) { 00850 assert (pos1 != pos2); 00851 00852 this.distance = distance; 00853 00854 if (pos1 < pos2) { 00855 this.entryPos1 = pos1; 00856 this.entryPos2 = pos2; 00857 } else { 00858 this.entryPos1 = pos2; 00859 this.entryPos2 = pos1; 00860 } 00861 } 00862 } 00863 00864 }