MOA 12.03
Real Time Analytics for Data Streams
AgrawalGenerator.java
Go to the documentation of this file.
00001 /*
00002  *    AgrawalGenerator.java
00003  *    Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
00004  *    @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
00005  *
00006  *    This program is free software; you can redistribute it and/or modify
00007  *    it under the terms of the GNU General Public License as published by
00008  *    the Free Software Foundation; either version 3 of the License, or
00009  *    (at your option) any later version.
00010  *
00011  *    This program is distributed in the hope that it will be useful,
00012  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *    GNU General Public License for more details.
00015  *
00016  *    You should have received a copy of the GNU General Public License
00017  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
00018  *    
00019  */
00020 package moa.streams.generators;
00021 
00022 import weka.core.Attribute;
00023 import weka.core.DenseInstance;
00024 import weka.core.FastVector;
00025 import weka.core.Instance;
00026 import weka.core.Instances;
00027 
00028 import java.util.Random;
00029 
00030 import moa.core.InstancesHeader;
00031 import moa.core.ObjectRepository;
00032 import moa.options.AbstractOptionHandler;
00033 import moa.options.FlagOption;
00034 import moa.options.FloatOption;
00035 import moa.options.IntOption;
00036 import moa.streams.InstanceStream;
00037 import moa.tasks.TaskMonitor;
00038 
00059 public class AgrawalGenerator extends AbstractOptionHandler implements
00060         InstanceStream {
00061 
00062     @Override
00063     public String getPurposeString() {
00064         return "Generates one of ten different pre-defined loan functions.";
00065     }
00066 
00067     private static final long serialVersionUID = 1L;
00068 
00069     public IntOption functionOption = new IntOption("function", 'f',
00070             "Classification function used, as defined in the original paper.",
00071             1, 1, 10);
00072 
00073     public IntOption instanceRandomSeedOption = new IntOption(
00074             "instanceRandomSeed", 'i',
00075             "Seed for random generation of instances.", 1);
00076 
00077     public FloatOption peturbFractionOption = new FloatOption("peturbFraction",
00078             'p',
00079             "The amount of peturbation (noise) introduced to numeric values.",
00080             0.05, 0.0, 1.0);
00081 
00082     public FlagOption balanceClassesOption = new FlagOption("balanceClasses",
00083             'b', "Balance the number of instances of each class.");
00084 
00085     protected interface ClassFunction {
00086 
00087         public int determineClass(double salary, double commission, int age,
00088                 int elevel, int car, int zipcode, double hvalue, int hyears,
00089                 double loan);
00090     }
00091 
00092     protected static ClassFunction[] classificationFunctions = {
00093         // function 1
00094         new ClassFunction() {
00095 
00096     @Override
00097     public int determineClass(double salary, double commission,
00098             int age, int elevel, int car, int zipcode,
00099             double hvalue, int hyears, double loan) {
00100         return ((age < 40) || (60 <= age)) ? 0 : 1;
00101     }
00102 },
00103         // function 2
00104         new ClassFunction() {
00105 
00106     @Override
00107     public int determineClass(double salary, double commission,
00108             int age, int elevel, int car, int zipcode,
00109             double hvalue, int hyears, double loan) {
00110         if (age < 40) {
00111             return ((50000 <= salary) && (salary <= 100000)) ? 0
00112                     : 1;
00113         } else if (age < 60) {// && age >= 40
00114             return ((75000 <= salary) && (salary <= 125000)) ? 0
00115                     : 1;
00116         } else {// age >= 60
00117             return ((25000 <= salary) && (salary <= 75000)) ? 0 : 1;
00118         }
00119     }
00120 },
00121         // function 3
00122         new ClassFunction() {
00123 
00124     @Override
00125     public int determineClass(double salary, double commission,
00126             int age, int elevel, int car, int zipcode,
00127             double hvalue, int hyears, double loan) {
00128         if (age < 40) {
00129             return ((elevel == 0) || (elevel == 1)) ? 0 : 1;
00130         } else if (age < 60) { // && age >= 40
00131             return ((elevel == 1) || (elevel == 2) || (elevel == 3)) ? 0
00132                     : 1;
00133         } else { // age >= 60
00134             return ((elevel == 2) || (elevel == 3) || (elevel == 4)) ? 0
00135                     : 1;
00136         }
00137     }
00138 },
00139         // function 4
00140         new ClassFunction() {
00141 
00142     @Override
00143     public int determineClass(double salary, double commission,
00144             int age, int elevel, int car, int zipcode,
00145             double hvalue, int hyears, double loan) {
00146         if (age < 40) {
00147             if ((elevel == 0) || (elevel == 1)) {
00148                 return ((25000 <= salary) && (salary <= 75000)) ? 0
00149                         : 1;
00150             }
00151             return ((50000 <= salary) && (salary <= 100000)) ? 0
00152                     : 1;
00153         } else if (age < 60) {// && age >= 40
00154             if ((elevel == 1) || (elevel == 2) || (elevel == 3)) {
00155                 return ((50000 <= salary) && (salary <= 100000)) ? 0
00156                         : 1;
00157             }
00158             return ((75000 <= salary) && (salary <= 125000)) ? 0
00159                     : 1;
00160         } else {// age >= 60
00161             if ((elevel == 2) || (elevel == 3) || (elevel == 4)) {
00162                 return ((50000 <= salary) && (salary <= 100000)) ? 0
00163                         : 1;
00164             }
00165             return ((25000 <= salary) && (salary <= 75000)) ? 0 : 1;
00166         }
00167     }
00168 },
00169         // function 5
00170         new ClassFunction() {
00171 
00172     @Override
00173     public int determineClass(double salary, double commission,
00174             int age, int elevel, int car, int zipcode,
00175             double hvalue, int hyears, double loan) {
00176         if (age < 40) {
00177             if ((50000 <= salary) && (salary <= 100000)) {
00178                 return ((100000 <= loan) && (loan <= 300000)) ? 0
00179                         : 1;
00180             }
00181             return ((200000 <= loan) && (loan <= 400000)) ? 0 : 1;
00182         } else if (age < 60) {// && age >= 40
00183             if ((75000 <= salary) && (salary <= 125000)) {
00184                 return ((200000 <= loan) && (loan <= 400000)) ? 0
00185                         : 1;
00186             }
00187             return ((300000 <= loan) && (loan <= 500000)) ? 0 : 1;
00188         } else {// age >= 60
00189             if ((25000 <= salary) && (salary <= 75000)) {
00190                 return ((300000 <= loan) && (loan <= 500000)) ? 0
00191                         : 1;
00192             }
00193             return ((100000 <= loan) && (loan <= 300000)) ? 0 : 1;
00194         }
00195     }
00196 },
00197         // function 6
00198         new ClassFunction() {
00199 
00200     @Override
00201     public int determineClass(double salary, double commission,
00202             int age, int elevel, int car, int zipcode,
00203             double hvalue, int hyears, double loan) {
00204         double totalSalary = salary + commission;
00205         if (age < 40) {
00206             return ((50000 <= totalSalary) && (totalSalary <= 100000)) ? 0
00207                     : 1;
00208         } else if (age < 60) {// && age >= 40
00209             return ((75000 <= totalSalary) && (totalSalary <= 125000)) ? 0
00210                     : 1;
00211         } else {// age >= 60
00212             return ((25000 <= totalSalary) && (totalSalary <= 75000)) ? 0
00213                     : 1;
00214         }
00215     }
00216 },
00217         // function 7
00218         new ClassFunction() {
00219 
00220     @Override
00221     public int determineClass(double salary, double commission,
00222             int age, int elevel, int car, int zipcode,
00223             double hvalue, int hyears, double loan) {
00224         double disposable = (2.0 * (salary + commission) / 3.0
00225                 - loan / 5.0 - 20000.0);
00226         return disposable > 0 ? 0 : 1;
00227     }
00228 },
00229         // function 8
00230         new ClassFunction() {
00231 
00232     @Override
00233     public int determineClass(double salary, double commission,
00234             int age, int elevel, int car, int zipcode,
00235             double hvalue, int hyears, double loan) {
00236         double disposable = (2.0 * (salary + commission) / 3.0
00237                 - 5000.0 * elevel - 20000.0);
00238         return disposable > 0 ? 0 : 1;
00239     }
00240 },
00241         // function 9
00242         new ClassFunction() {
00243 
00244     @Override
00245     public int determineClass(double salary, double commission,
00246             int age, int elevel, int car, int zipcode,
00247             double hvalue, int hyears, double loan) {
00248         double disposable = (2.0 * (salary + commission) / 3.0
00249                 - 5000.0 * elevel - loan / 5.0 - 10000.0);
00250         return disposable > 0 ? 0 : 1;
00251     }
00252 },
00253         // function 10
00254         new ClassFunction() {
00255 
00256     @Override
00257     public int determineClass(double salary, double commission,
00258             int age, int elevel, int car, int zipcode,
00259             double hvalue, int hyears, double loan) {
00260         double equity = 0.0;
00261         if (hyears >= 20) {
00262             equity = hvalue * (hyears - 20.0) / 10.0;
00263         }
00264         double disposable = (2.0 * (salary + commission) / 3.0
00265                 - 5000.0 * elevel + equity / 5.0 - 10000.0);
00266         return disposable > 0 ? 0 : 1;
00267     }
00268 }};
00269 
00270     protected InstancesHeader streamHeader;
00271 
00272     protected Random instanceRandom;
00273 
00274     protected boolean nextClassShouldBeZero;
00275 
00276     @Override
00277     protected void prepareForUseImpl(TaskMonitor monitor,
00278             ObjectRepository repository) {
00279         // generate header
00280         FastVector attributes = new FastVector();
00281         attributes.addElement(new Attribute("salary"));
00282         attributes.addElement(new Attribute("commission"));
00283         attributes.addElement(new Attribute("age"));
00284         FastVector elevelLabels = new FastVector();
00285         for (int i = 0; i < 5; i++) {
00286             elevelLabels.addElement("level" + i);
00287         }
00288         attributes.addElement(new Attribute("elevel", elevelLabels));
00289         FastVector carLabels = new FastVector();
00290         for (int i = 0; i < 20; i++) {
00291             carLabels.addElement("car" + (i + 1));
00292         }
00293         attributes.addElement(new Attribute("car", carLabels));
00294         FastVector zipCodeLabels = new FastVector();
00295         for (int i = 0; i < 9; i++) {
00296             zipCodeLabels.addElement("zipcode" + (i + 1));
00297         }
00298         attributes.addElement(new Attribute("zipcode", zipCodeLabels));
00299         attributes.addElement(new Attribute("hvalue"));
00300         attributes.addElement(new Attribute("hyears"));
00301         attributes.addElement(new Attribute("loan"));
00302         FastVector classLabels = new FastVector();
00303         classLabels.addElement("groupA");
00304         classLabels.addElement("groupB");
00305         attributes.addElement(new Attribute("class", classLabels));
00306         this.streamHeader = new InstancesHeader(new Instances(
00307                 getCLICreationString(InstanceStream.class), attributes, 0));
00308         this.streamHeader.setClassIndex(this.streamHeader.numAttributes() - 1);
00309         restart();
00310     }
00311 
00312     @Override
00313     public long estimatedRemainingInstances() {
00314         return -1;
00315     }
00316 
00317     @Override
00318     public InstancesHeader getHeader() {
00319         return this.streamHeader;
00320     }
00321 
00322     @Override
00323     public boolean hasMoreInstances() {
00324         return true;
00325     }
00326 
00327     @Override
00328     public boolean isRestartable() {
00329         return true;
00330     }
00331 
00332     @Override
00333     public Instance nextInstance() {
00334         double salary = 0, commission = 0, hvalue = 0, loan = 0;
00335         int age = 0, elevel = 0, car = 0, zipcode = 0, hyears = 0, group = 0;
00336         boolean desiredClassFound = false;
00337         while (!desiredClassFound) {
00338             // generate attributes
00339             salary = 20000.0 + 130000.0 * this.instanceRandom.nextDouble();
00340             commission = (salary >= 75000.0) ? 0
00341                     : (10000.0 + 65000.0 * this.instanceRandom.nextDouble());
00342             // true to c implementation:
00343             // if (instanceRandom.nextDouble() < 0.5 && salary < 75000.0)
00344             // commission = 10000.0 + 65000.0 * instanceRandom.nextDouble();
00345             age = 20 + this.instanceRandom.nextInt(61);
00346             elevel = this.instanceRandom.nextInt(5);
00347             car = this.instanceRandom.nextInt(20);
00348             zipcode = this.instanceRandom.nextInt(9);
00349             hvalue = (9.0 - zipcode) * 100000.0
00350                     * (0.5 + this.instanceRandom.nextDouble());
00351             hyears = 1 + this.instanceRandom.nextInt(30);
00352             loan = this.instanceRandom.nextDouble() * 500000.0;
00353             // determine class
00354             group = classificationFunctions[this.functionOption.getValue() - 1].determineClass(salary, commission, age, elevel, car,
00355                     zipcode, hvalue, hyears, loan);
00356             if (!this.balanceClassesOption.isSet()) {
00357                 desiredClassFound = true;
00358             } else {
00359                 // balance the classes
00360                 if ((this.nextClassShouldBeZero && (group == 0))
00361                         || (!this.nextClassShouldBeZero && (group == 1))) {
00362                     desiredClassFound = true;
00363                     this.nextClassShouldBeZero = !this.nextClassShouldBeZero;
00364                 } // else keep searching
00365             }
00366         }
00367         // perturb values
00368         if (this.peturbFractionOption.getValue() > 0.0) {
00369             salary = perturbValue(salary, 20000, 150000);
00370             if (commission > 0) {
00371                 commission = perturbValue(commission, 10000, 75000);
00372             }
00373             age = (int) Math.round(perturbValue(age, 20, 80));
00374             hvalue = perturbValue(hvalue, (9.0 - zipcode) * 100000.0, 0, 135000);
00375             hyears = (int) Math.round(perturbValue(hyears, 1, 30));
00376             loan = perturbValue(loan, 0, 500000);
00377         }
00378         // construct instance
00379         InstancesHeader header = getHeader();
00380         Instance inst = new DenseInstance(header.numAttributes());
00381         inst.setValue(0, salary);
00382         inst.setValue(1, commission);
00383         inst.setValue(2, age);
00384         inst.setValue(3, elevel);
00385         inst.setValue(4, car);
00386         inst.setValue(5, zipcode);
00387         inst.setValue(6, hvalue);
00388         inst.setValue(7, hyears);
00389         inst.setValue(8, loan);
00390         inst.setDataset(header);
00391         inst.setClassValue(group);
00392         return inst;
00393     }
00394 
00395     protected double perturbValue(double val, double min, double max) {
00396         return perturbValue(val, max - min, min, max);
00397     }
00398 
00399     protected double perturbValue(double val, double range, double min,
00400             double max) {
00401         val += range * (2.0 * (this.instanceRandom.nextDouble() - 0.5))
00402                 * this.peturbFractionOption.getValue();
00403         if (val < min) {
00404             val = min;
00405         } else if (val > max) {
00406             val = max;
00407         }
00408         return val;
00409     }
00410 
00411     @Override
00412     public void restart() {
00413         this.instanceRandom = new Random(this.instanceRandomSeedOption.getValue());
00414         this.nextClassShouldBeZero = false;
00415     }
00416 
00417     @Override
00418     public void getDescription(StringBuilder sb, int indent) {
00419         // TODO Auto-generated method stub
00420     }
00421 }
 All Classes Namespaces Files Functions Variables Enumerations