MOA 12.03
Real Time Analytics for Data Streams
NaiveBayes.java
Go to the documentation of this file.
00001 /*
00002  *    NaiveBayes.java
00003  *    Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
00004  *    @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
00005  *
00006  *    This program is free software; you can redistribute it and/or modify
00007  *    it under the terms of the GNU General Public License as published by
00008  *    the Free Software Foundation; either version 3 of the License, or
00009  *    (at your option) any later version.
00010  *
00011  *    This program is distributed in the hope that it will be useful,
00012  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *    GNU General Public License for more details.
00015  *
00016  *    You should have received a copy of the GNU General Public License
00017  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
00018  *    
00019  */
00020 package moa.classifiers.bayes;
00021 
00022 import moa.classifiers.AbstractClassifier;
00023 import moa.classifiers.core.attributeclassobservers.AttributeClassObserver;
00024 import moa.classifiers.core.attributeclassobservers.GaussianNumericAttributeClassObserver;
00025 import moa.classifiers.core.attributeclassobservers.NominalAttributeClassObserver;
00026 import moa.core.AutoExpandVector;
00027 import moa.core.DoubleVector;
00028 import moa.core.Measurement;
00029 import moa.core.StringUtils;
00030 import weka.core.Instance;
00031 
00047 public class NaiveBayes extends AbstractClassifier {
00048 
00049     private static final long serialVersionUID = 1L;
00050 
00051     @Override
00052     public String getPurposeString() {
00053         return "Naive Bayes classifier: performs classic bayesian prediction while making naive assumption that all inputs are independent.";
00054     }
00055 
00056     protected DoubleVector observedClassDistribution;
00057 
00058     protected AutoExpandVector<AttributeClassObserver> attributeObservers;
00059 
00060     @Override
00061     public void resetLearningImpl() {
00062         this.observedClassDistribution = new DoubleVector();
00063         this.attributeObservers = new AutoExpandVector<AttributeClassObserver>();
00064     }
00065 
00066     @Override
00067     public void trainOnInstanceImpl(Instance inst) {
00068         this.observedClassDistribution.addToValue((int) inst.classValue(), inst.weight());
00069         for (int i = 0; i < inst.numAttributes() - 1; i++) {
00070             int instAttIndex = modelAttIndexToInstanceAttIndex(i, inst);
00071             AttributeClassObserver obs = this.attributeObservers.get(i);
00072             if (obs == null) {
00073                 obs = inst.attribute(instAttIndex).isNominal() ? newNominalClassObserver()
00074                         : newNumericClassObserver();
00075                 this.attributeObservers.set(i, obs);
00076             }
00077             obs.observeAttributeClass(inst.value(instAttIndex), (int) inst.classValue(), inst.weight());
00078         }
00079     }
00080 
00081     @Override
00082     public double[] getVotesForInstance(Instance inst) {
00083         return doNaiveBayesPrediction(inst, this.observedClassDistribution,
00084                 this.attributeObservers);
00085     }
00086 
00087     @Override
00088     protected Measurement[] getModelMeasurementsImpl() {
00089         return null;
00090     }
00091 
00092     @Override
00093     public void getModelDescription(StringBuilder out, int indent) {
00094         for (int i = 0; i < this.observedClassDistribution.numValues(); i++) {
00095             StringUtils.appendIndented(out, indent, "Observations for ");
00096             out.append(getClassNameString());
00097             out.append(" = ");
00098             out.append(getClassLabelString(i));
00099             out.append(":");
00100             StringUtils.appendNewlineIndented(out, indent + 1,
00101                     "Total observed weight = ");
00102             out.append(this.observedClassDistribution.getValue(i));
00103             out.append(" / prob = ");
00104             out.append(this.observedClassDistribution.getValue(i)
00105                     / this.observedClassDistribution.sumOfValues());
00106             for (int j = 0; j < this.attributeObservers.size(); j++) {
00107                 StringUtils.appendNewlineIndented(out, indent + 1,
00108                         "Observations for ");
00109                 out.append(getAttributeNameString(j));
00110                 out.append(": ");
00111                 // TODO: implement observer output
00112                 out.append(this.attributeObservers.get(j));
00113             }
00114             StringUtils.appendNewline(out);
00115         }
00116     }
00117 
00118     @Override
00119     public boolean isRandomizable() {
00120         return false;
00121     }
00122 
00123     protected AttributeClassObserver newNominalClassObserver() {
00124         return new NominalAttributeClassObserver();
00125     }
00126 
00127     protected AttributeClassObserver newNumericClassObserver() {
00128         return new GaussianNumericAttributeClassObserver();
00129     }
00130 
00131     public static double[] doNaiveBayesPrediction(Instance inst,
00132             DoubleVector observedClassDistribution,
00133             AutoExpandVector<AttributeClassObserver> attributeObservers) {
00134         double[] votes = new double[observedClassDistribution.numValues()];
00135         double observedClassSum = observedClassDistribution.sumOfValues();
00136         for (int classIndex = 0; classIndex < votes.length; classIndex++) {
00137             votes[classIndex] = observedClassDistribution.getValue(classIndex)
00138                     / observedClassSum;
00139             for (int attIndex = 0; attIndex < inst.numAttributes() - 1; attIndex++) {
00140                 int instAttIndex = modelAttIndexToInstanceAttIndex(attIndex,
00141                         inst);
00142                 AttributeClassObserver obs = attributeObservers.get(attIndex);
00143                 if ((obs != null) && !inst.isMissing(instAttIndex)) {
00144                     votes[classIndex] *= obs.probabilityOfAttributeValueGivenClass(inst.value(instAttIndex), classIndex);
00145                 }
00146             }
00147         }
00148         // TODO: need logic to prevent underflow?
00149         return votes;
00150     }
00151 
00152     public void manageMemory(int currentByteSize, int maxByteSize) {
00153         // TODO Auto-generated method stub
00154     }
00155 }
 All Classes Namespaces Files Functions Variables Enumerations