/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * Ridor.java * Copyright (C) 2001 Xin Xu * */ package weka.classifiers.rules; import java.io.*; import java.util.*; import weka.core.*; import weka.classifiers.*; /** * The implementation of a RIpple-DOwn Rule learner. * * It generates the default rule first and then the exceptions for the default rule * with the least (weighted) error rate. Then it generates the "best" exceptions for * each exception and iterates until pure. Thus it performs a tree-like expansion of * exceptions and the leaf has only default rule but no exceptions.
* The exceptions are a set of rules that predict the class other than class in default * rule. IREP is used to find out the exceptions.

* There are five inner classes defined in this class.
* The first is Ridor_node, which implements one node in the Ridor tree. It's basically * composed of a default class and a set of exception rules to the default class.
* The second inner class is RidorRule, which implements a single exception rule * using REP.
* The last three inner classes are only used in RidorRule. They are Antd, NumericAntd * and NominalAntd, which all implement a single antecedent in the RidorRule.
* The Antd class is an abstract class, which has two subclasses, NumericAntd and * NominalAntd, to implement the corresponding abstract functions. These two subclasses * implement the functions related to a antecedent with a nominal attribute and a numeric * attribute respectively.

* * * @author: Xin XU (xx5@cs.waikato.ac.nz) * @version $Revision: 1.12 $ */ public class Ridor extends Classifier implements OptionHandler, AdditionalMeasureProducer, WeightedInstancesHandler { /** The number of folds to split data into Grow and Prune for IREP */ private int m_Folds = 3; /** The number of shuffles performed on the data for randomization */ private int m_Shuffle = 1; /** Random object for randomization */ private Random m_Random = null; /** The seed to perform randomization */ private int m_Seed = 1; /** Whether use error rate on all the data */ private boolean m_IsAllErr = false; /** Whether use majority class as default class */ private boolean m_IsMajority = false; /** The root of Ridor */ private Ridor_node m_Root = null; /** The class attribute of the data */ private Attribute m_Class; /** Statistics of the data */ private double m_Cover, m_Err; /** The minimal number of instance weights within a split*/ private double m_MinNo = 2.0; /** * Returns a string describing classifier * @return a description suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "The implementation of a RIpple-DOwn Rule learner. " + "It generates a default rule first and then the exceptions for the default rule " + "with the least (weighted) error rate. Then it generates the \"best\" exceptions for " + "each exception and iterates until pure. Thus it performs a tree-like expansion of " + "exceptions." + "The exceptions are a set of rules that predict classes other than the default. " + "IREP is used to generate the exceptions."; } /** * Private class implementing the single node of Ridor. * It consists of a default class label, a set of exceptions to the default rule * and the exceptions to each exception */ private class Ridor_node implements Serializable { /** The default class label */ private double defClass = Double.NaN; /** The set of exceptions of the default rule. Each element also has its own exceptions and the consequent of each rule is determined by its exceptions */ private RidorRule[] rules = null; /** The exceptions of the exception rules */ private Ridor_node[] excepts = null; /** The level of this node */ private int level; /** "Get" member functions */ public double getDefClass() { return defClass; } public RidorRule[] getRules() { return rules; } public Ridor_node[] getExcepts() { return excepts; } /** * Builds a ripple-down manner rule learner. * * @param dataByClass the divided data by their class label. The real class * labels of the instances are all set to 0 * @param lvl the level of the parent node * @exception Exception if ruleset of this node cannot be built */ public void findRules(Instances[] dataByClass, int lvl) throws Exception { Vector finalRules = null; int clas = -1; double[] isPure = new double[dataByClass.length]; int numMajority = 0; level = lvl + 1; for(int h=0; h < dataByClass.length; h++){ isPure[h] = dataByClass[h].sumOfWeights(); if(Utils.grOrEq(isPure[h], m_Folds)) numMajority++; // Count how many class labels have enough instances } if(numMajority <= 1){ // The data is pure or not enough defClass = (double)Utils.maxIndex(isPure); return; } double total = Utils.sum(isPure); if(m_IsMajority){ defClass = (double)Utils.maxIndex(isPure); Instances data = new Instances(dataByClass[(int)defClass]); int index = data.classIndex(); for(int j=0; j= dataByClass[k].numInstances()) data = append(data, dataByClass[k]); else data = append(dataByClass[k], data); } data.setClassIndex(index); // Position new class label double classCount = total - isPure[(int)defClass]; finalRules = new Vector(); buildRuleset(data, classCount, finalRules); if(finalRules.size() == 0) // No good rules built return; } else{ double maxAcRt = isPure[Utils.maxIndex(isPure)] / total; // Find default class for(int i=0; i < dataByClass.length; i++){ if(isPure[i] >= m_Folds){ Instances data = new Instances(dataByClass[i]); int index = data.classIndex(); for(int j=0; j= dataByClass[k].numInstances()) data = append(data, dataByClass[k]); else data = append(dataByClass[k], data); } data.setClassIndex(index); // Position new class label /* Build a set of rules */ double classCount = data.sumOfWeights() - isPure[i]; Vector ruleset = new Vector(); double wAcRt = buildRuleset(data, classCount, ruleset); if(Utils.gr(wAcRt, maxAcRt)){ finalRules = ruleset; maxAcRt = wAcRt; clas = i; } } } if(finalRules == null){ // No good rules found, set majority class as default defClass = (double)Utils.maxIndex(isPure); return; } defClass = (double)clas; } /* Store the exception rules and default class in this node */ int size = finalRules.size(); rules = new RidorRule[size]; excepts = new Ridor_node[size]; for(int l=0; l < size; l++) rules[l] = (RidorRule)finalRules.elementAt(l); /* Build exceptions for each exception rule */ Instances[] uncovered = dataByClass; if(level == 1) // The error of default rule m_Err = total - uncovered[(int)defClass].sumOfWeights(); uncovered[(int)defClass] = new Instances(uncovered[(int)defClass], 0); for(int m=0; m < size; m++){ /* The data covered by this rule, they are also deducted from the original data */ Instances[][] dvdData = divide(rules[m], uncovered); Instances[] covered = dvdData[0]; // Data covered by the rule //uncovered = dvdData[1]; // Data not covered by the rule excepts[m] = new Ridor_node(); excepts[m].findRules(covered, level);// Find exceptions on the covered data } } /** * Private function to build a rule set and return the weighted avg of accuracy * rate of rules in the set. * * @param insts the data used to build ruleset * @param classCount the counts of the instances with the predicted class but not * yet covered by the ruleset * @param ruleset the ruleset to be built * @return the weighted accuracy rate of the ruleset * @exception if the rules cannot be built properly */ private double buildRuleset(Instances insts, double classCount, Vector ruleset) throws Exception { Instances data = new Instances(insts); double wAcRt = 0; // The weighted accuracy rate of this ruleset double total = data.sumOfWeights(); while( classCount >= m_Folds ){ // Data is not pure RidorRule bestRule = null; double bestWorthRate= -1; // The best worth achieved by double bestWorth = -1; // randomization of the data RidorRule rule = new RidorRule(); rule.setPredictedClass(0); // Predict the classes other than default for(int j = 0; j < m_Shuffle; j++){ if(m_Shuffle > 1) data.randomize(m_Random); rule.buildClassifier(data); double wr, w; // Worth rate and worth if(m_IsAllErr){ wr = (rule.getWorth()+rule.getAccuG()) / (rule.getCoverP()+rule.getCoverG()); w = rule.getWorth() + rule.getAccuG(); } else{ wr = rule.getWorthRate(); w = rule.getWorth(); } if(Utils.gr(wr, bestWorthRate) || (Utils.eq(wr, bestWorthRate) && Utils.gr(w, bestWorth))){ bestRule = rule; bestWorthRate = wr; bestWorth = w; } } if (bestRule == null) throw new Exception("Something wrong here inside findRule()!"); if(Utils.sm(bestWorthRate, 0.5) || (!bestRule.hasAntds())) break; // No more good rules generated Instances newData = new Instances(data); data = new Instances(newData, 0);// Empty the data classCount = 0; double cover = 0; // Coverage of this rule on whole data for(int l=0; l 0); } /** * Build one rule using the growing data * * @param data the growing data used to build the rule */ private void grow(Instances data){ Instances growData = new Instances(data); m_AccuG = computeDefAccu(growData); m_CoverG = growData.sumOfWeights(); /* Compute the default accurate rate of the growing data */ double defAcRt= m_AccuG / m_CoverG; /* Keep the record of which attributes have already been used*/ boolean[] used=new boolean [growData.numAttributes()]; for (int k=0; k 0; z--) if(Utils.sm(worthRt[z], worthRt[z-1])) m_Antds.removeElementAt(z); else break; /* Check whether this rule is a default rule */ if(m_Antds.size() == 1){ defAccu = computeDefAccu(pruneData); defAccuRate = defAccu/total; // Compute def. accuracy if(Utils.sm(worthRt[0], defAccuRate)){ // Becomes a default rule m_Antds.removeAllElements(); } } /* Update the worth parameters of this rule*/ int antdsSize = m_Antds.size(); if(antdsSize != 0){ // Not a default rule m_Worth = worthValue[antdsSize-1]; // WorthValues of the last antecedent m_WorthRate = worthRt[antdsSize-1]; m_CoverP = coverage[antdsSize-1]; Antd last = (Antd)m_Antds.lastElement(); m_CoverG = last.getCover(); m_AccuG = last.getAccu(); } else{ // Default rule m_Worth = defAccu; // Default WorthValues m_WorthRate = defAccuRate; m_CoverP = total; } } /** * Private function to compute default number of accurate instances * in the specified data for m_Class * * @param data the data in question * @return the default accuracy number */ private double computeDefAccu(Instances data){ double defAccu=0; for(int i=0; i 0){ for(int j=0; j< (m_Antds.size()-1); j++) text.append("(" + ((Antd)(m_Antds.elementAt(j))).toString()+ ") and "); text.append("("+((Antd)(m_Antds.lastElement())).toString() + ")"); } text.append(" => " + att + " = " + cl); text.append(" ("+m_CoverG+"/"+(m_CoverG - m_AccuG)+") ["+ m_CoverP+"/"+(m_CoverP - m_Worth)+"]"); return text.toString(); } /** * Prints this rule * * @return a textual description of this rule */ public String toString() { return toString(m_ClassAttribute.name(), m_ClassAttribute.value((int)m_Class)); } } /** * The single antecedent in the rule, which is composed of an attribute and * the corresponding value. There are two inherited classes, namely NumericAntd * and NominalAntd in which the attributes are numeric and nominal respectively. */ private abstract class Antd implements Serializable { /* The attribute of the antecedent */ protected Attribute att; /* The attribute value of the antecedent. For numeric attribute, value is either 0(1st bag) or 1(2nd bag) */ protected double value; /* The maximum infoGain achieved by this antecedent test */ protected double maxInfoGain; /* The accurate rate of this antecedent test on the growing data */ protected double accuRate; /* The coverage of this antecedent */ protected double cover; /* The accurate data for this antecedent */ protected double accu; /* Constructor*/ public Antd(Attribute a){ att=a; value=Double.NaN; maxInfoGain = 0; accuRate = Double.NaN; cover = Double.NaN; accu = Double.NaN; } /* The abstract members for inheritance */ public abstract Instances[] splitData(Instances data, double defAcRt, double cla); public abstract boolean isCover(Instance inst); public abstract String toString(); /* Get functions of this antecedent */ public Attribute getAttr(){ return att; } public double getAttrValue(){ return value; } public double getMaxInfoGain(){ return maxInfoGain; } public double getAccuRate(){ return accuRate; } public double getAccu(){ return accu; } public double getCover(){ return cover; } } /** * The antecedent with numeric attribute */ private class NumericAntd extends Antd{ /* The split point for this numeric antecedent */ private double splitPoint; /* Constructor*/ public NumericAntd(Attribute a){ super(a); splitPoint = Double.NaN; } /* Get split point of this numeric antecedent */ public double getSplitPoint(){ return splitPoint; } /** * Implements the splitData function. * This procedure is to split the data into two bags according * to the information gain of the numeric attribute value * The maximum infoGain is also calculated. * * @param insts the data to be split * @param defAcRt the default accuracy rate for data * @param cl the class label to be predicted * @return the array of data after split */ public Instances[] splitData(Instances insts, double defAcRt, double cl){ Instances data = new Instances(insts); data.sort(att); int total=data.numInstances();// Total number of instances without // missing value for att int split=1; // Current split position int prev=0; // Previous split position int finalSplit=split; // Final split position maxInfoGain = 0; value = 0; // Compute minimum number of Instances required in each split double minSplit = 0.1 * (data.sumOfWeights()) / 2.0; if (Utils.smOrEq(minSplit,m_MinNo)) minSplit = m_MinNo; else if (Utils.gr(minSplit,25)) minSplit = 25; double fstCover=0, sndCover=0, fstAccu=0, sndAccu=0; for(int x=0; x "; return (att.name() + symbol + Utils.doubleToString(splitPoint, 6)); } } /** * The antecedent with nominal attribute */ private class NominalAntd extends Antd{ /* The parameters of infoGain calculated for each attribute value */ private double[] accurate; private double[] coverage; private double[] infoGain; /* Constructor*/ public NominalAntd(Attribute a){ super(a); int bag = att.numValues(); accurate = new double[bag]; coverage = new double[bag]; infoGain = new double[bag]; } /** * Implements the splitData function. * This procedure is to split the data into bags according * to the nominal attribute value * The infoGain for each bag is also calculated. * * @param data the data to be split * @param defAcRt the default accuracy rate for data * @param cl the class label to be predicted * @return the array of data after split */ public Instances[] splitData(Instances data, double defAcRt, double cl){ int bag = att.numValues(); Instances[] splitData = new Instances[bag]; for(int x=0; x=2 splits have more than the minimal data int count=0; for(int x=0; x * * -F number
* Set number of folds for reduced error pruning. One fold is * used as the pruning set. (Default: 3)

* * -S number
* Set number of shuffles for randomization. (Default: 10)

* * -A
* Set flag of whether use the error rate of all the data to select * the default class in each step. If not set, the learner will only use * the error rate in the pruning data

* * -M
* Set flag of whether use the majority class as the default class * in each step instead of choosing default class based on the error rate * (if the flag is not set)

* * -N number
* Set the minimal weights of instances within a split. * (Default: 2)

* * @return an enumeration of all the available options */ public Enumeration listOptions() { Vector newVector = new Vector(5); newVector.addElement(new Option("\tSet number of folds for IREP\n" + "\tOne fold is used as pruning set.\n" + "\t(default 3)","F", 1, "-F ")); newVector.addElement(new Option("\tSet number of shuffles to randomize\n" + "\tthe data in order to get better rule.\n" + "\t(default 10)","S", 1, "-S ")); newVector.addElement(new Option("\tSet flag of whether use the error rate \n"+ "\tof all the data to select the default class\n"+ "\tin each step. If not set, the learner will only use"+ "\tthe error rate in the pruning data","A", 0, "-A")); newVector.addElement(new Option("\t Set flag of whether use the majority class as\n"+ "\tthe default class in each step instead of \n"+ "\tchoosing default class based on the error rate\n"+ "\t(if the flag is not set)","M", 0, "-M")); newVector.addElement(new Option("\tSet the minimal weights of instances\n" + "\twithin a split.\n" + "\t(default 2.0)","N", 1, "-N ")); return newVector.elements(); } /** * Parses a given list of options. * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String numFoldsString = Utils.getOption('F', options); if (numFoldsString.length() != 0) m_Folds = Integer.parseInt(numFoldsString); else m_Folds = 3; String numShuffleString = Utils.getOption('S', options); if (numShuffleString.length() != 0) m_Shuffle = Integer.parseInt(numShuffleString); else m_Shuffle = 1; String seedString = Utils.getOption('s', options); if (seedString.length() != 0) m_Seed = Integer.parseInt(seedString); else m_Seed = 1; String minNoString = Utils.getOption('N', options); if (minNoString.length() != 0) m_MinNo = Double.parseDouble(minNoString); else m_MinNo = 2.0; m_IsAllErr = Utils.getFlag('A', options); m_IsMajority = Utils.getFlag('M', options); } /** * Gets the current settings of the Classifier. * * @return an array of strings suitable for passing to setOptions */ public String [] getOptions() { String [] options = new String [8]; int current = 0; options[current++] = "-F"; options[current++] = "" + m_Folds; options[current++] = "-S"; options[current++] = "" + m_Shuffle; options[current++] = "-N"; options[current++] = "" + m_MinNo; if(m_IsAllErr) options[current++] = "-A"; if(m_IsMajority) options[current++] = "-M"; while (current < options.length) options[current++] = ""; return options; } /** Set and get members for parameters */ /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String foldsTipText() { return "Determines the amount of data used for pruning. One fold is used for " + "pruning, the rest for growing the rules."; } public void setFolds(int fold){ m_Folds = fold; } public int getFolds(){ return m_Folds; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String shuffleTipText() { return "Determines how often the data is shuffled before a rule " + "is chosen. If > 1, a rule is learned multiple times and the " + "most accurate rule is chosen."; } public void setShuffle(int sh){ m_Shuffle = sh; } public int getShuffle(){ return m_Shuffle; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String seedTipText() { return "The seed used for randomizing the data."; } public void setSeed(int s){ m_Seed = s; } public int getSeed(){ return m_Seed; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String wholeDataErrTipText() { return "Whether worth of rule is computed based on all the data " + "or just based on data covered by rule."; } public void setWholeDataErr(boolean a){ m_IsAllErr = a; } public boolean getWholeDataErr(){ return m_IsAllErr; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String majorityClassTipText() { return "Whether the majority class is used as default."; } public void setMajorityClass(boolean m){ m_IsMajority = m; } public boolean getMajorityClass(){ return m_IsMajority; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String minNoTipText() { return "The minimum total weight of the instances in a rule."; } public void setMinNo(double m){ m_MinNo = m; } public double getMinNo(){ return m_MinNo; } /** * Returns an enumeration of the additional measure names * @return an enumeration of the measure names */ public Enumeration enumerateMeasures() { Vector newVector = new Vector(1); newVector.addElement("measureNumRules"); return newVector.elements(); } /** * Returns the value of the named measure * @param measureName the name of the measure to query for its value * @return the value of the named measure * @exception IllegalArgumentException if the named measure is not supported */ public double getMeasure(String additionalMeasureName) { if (additionalMeasureName.compareToIgnoreCase("measureNumRules") == 0) return numRules(); else throw new IllegalArgumentException(additionalMeasureName+" not supported (Ripple down rule learner)"); } /** * Measure the number of rules in total in the model * * @return the number of rules */ private double numRules(){ int size = 0; if(m_Root != null) size = m_Root.size(); return (double)(size+1); // Add the default rule } /** * Prints the all the rules of the rule learner. * * @return a textual description of the classifier */ public String toString() { if (m_Root == null) return "RIpple DOwn Rule Learner(Ridor): No model built yet."; return ("RIpple DOwn Rule Learner(Ridor) rules\n"+ "--------------------------------------\n\n" + m_Root.toString() + "\nTotal number of rules (incl. the default rule): " + (int)numRules()); } /** * Main method. * * @param args the options for the classifier */ public static void main(String[] args) { try { System.out.println(Evaluation.evaluateModel(new Ridor(), args)); } catch (Exception e) { e.printStackTrace(); System.err.println(e.getMessage()); } } }