MOA 12.03
Real Time Analytics for Data Streams
|
00001 /* 00002 * NaiveBayes.java 00003 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 00004 * @author Richard Kirkby ([email protected]) 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 3 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00018 * 00019 */ 00020 package moa.classifiers.bayes; 00021 00022 import moa.classifiers.AbstractClassifier; 00023 import moa.classifiers.core.attributeclassobservers.AttributeClassObserver; 00024 import moa.classifiers.core.attributeclassobservers.GaussianNumericAttributeClassObserver; 00025 import moa.classifiers.core.attributeclassobservers.NominalAttributeClassObserver; 00026 import moa.core.AutoExpandVector; 00027 import moa.core.DoubleVector; 00028 import moa.core.Measurement; 00029 import moa.core.StringUtils; 00030 import weka.core.Instance; 00031 00047 public class NaiveBayes extends AbstractClassifier { 00048 00049 private static final long serialVersionUID = 1L; 00050 00051 @Override 00052 public String getPurposeString() { 00053 return "Naive Bayes classifier: performs classic bayesian prediction while making naive assumption that all inputs are independent."; 00054 } 00055 00056 protected DoubleVector observedClassDistribution; 00057 00058 protected AutoExpandVector<AttributeClassObserver> attributeObservers; 00059 00060 @Override 00061 public void resetLearningImpl() { 00062 this.observedClassDistribution = new DoubleVector(); 00063 this.attributeObservers = new AutoExpandVector<AttributeClassObserver>(); 00064 } 00065 00066 @Override 00067 public void trainOnInstanceImpl(Instance inst) { 00068 this.observedClassDistribution.addToValue((int) inst.classValue(), inst.weight()); 00069 for (int i = 0; i < inst.numAttributes() - 1; i++) { 00070 int instAttIndex = modelAttIndexToInstanceAttIndex(i, inst); 00071 AttributeClassObserver obs = this.attributeObservers.get(i); 00072 if (obs == null) { 00073 obs = inst.attribute(instAttIndex).isNominal() ? newNominalClassObserver() 00074 : newNumericClassObserver(); 00075 this.attributeObservers.set(i, obs); 00076 } 00077 obs.observeAttributeClass(inst.value(instAttIndex), (int) inst.classValue(), inst.weight()); 00078 } 00079 } 00080 00081 @Override 00082 public double[] getVotesForInstance(Instance inst) { 00083 return doNaiveBayesPrediction(inst, this.observedClassDistribution, 00084 this.attributeObservers); 00085 } 00086 00087 @Override 00088 protected Measurement[] getModelMeasurementsImpl() { 00089 return null; 00090 } 00091 00092 @Override 00093 public void getModelDescription(StringBuilder out, int indent) { 00094 for (int i = 0; i < this.observedClassDistribution.numValues(); i++) { 00095 StringUtils.appendIndented(out, indent, "Observations for "); 00096 out.append(getClassNameString()); 00097 out.append(" = "); 00098 out.append(getClassLabelString(i)); 00099 out.append(":"); 00100 StringUtils.appendNewlineIndented(out, indent + 1, 00101 "Total observed weight = "); 00102 out.append(this.observedClassDistribution.getValue(i)); 00103 out.append(" / prob = "); 00104 out.append(this.observedClassDistribution.getValue(i) 00105 / this.observedClassDistribution.sumOfValues()); 00106 for (int j = 0; j < this.attributeObservers.size(); j++) { 00107 StringUtils.appendNewlineIndented(out, indent + 1, 00108 "Observations for "); 00109 out.append(getAttributeNameString(j)); 00110 out.append(": "); 00111 // TODO: implement observer output 00112 out.append(this.attributeObservers.get(j)); 00113 } 00114 StringUtils.appendNewline(out); 00115 } 00116 } 00117 00118 @Override 00119 public boolean isRandomizable() { 00120 return false; 00121 } 00122 00123 protected AttributeClassObserver newNominalClassObserver() { 00124 return new NominalAttributeClassObserver(); 00125 } 00126 00127 protected AttributeClassObserver newNumericClassObserver() { 00128 return new GaussianNumericAttributeClassObserver(); 00129 } 00130 00131 public static double[] doNaiveBayesPrediction(Instance inst, 00132 DoubleVector observedClassDistribution, 00133 AutoExpandVector<AttributeClassObserver> attributeObservers) { 00134 double[] votes = new double[observedClassDistribution.numValues()]; 00135 double observedClassSum = observedClassDistribution.sumOfValues(); 00136 for (int classIndex = 0; classIndex < votes.length; classIndex++) { 00137 votes[classIndex] = observedClassDistribution.getValue(classIndex) 00138 / observedClassSum; 00139 for (int attIndex = 0; attIndex < inst.numAttributes() - 1; attIndex++) { 00140 int instAttIndex = modelAttIndexToInstanceAttIndex(attIndex, 00141 inst); 00142 AttributeClassObserver obs = attributeObservers.get(attIndex); 00143 if ((obs != null) && !inst.isMissing(instAttIndex)) { 00144 votes[classIndex] *= obs.probabilityOfAttributeValueGivenClass(inst.value(instAttIndex), classIndex); 00145 } 00146 } 00147 } 00148 // TODO: need logic to prevent underflow? 00149 return votes; 00150 } 00151 00152 public void manageMemory(int currentByteSize, int maxByteSize) { 00153 // TODO Auto-generated method stub 00154 } 00155 }