/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * BayesNet.java * Copyright (C) 2001 Remco Bouckaert * */ package weka.classifiers.bayes; import java.util.*; import weka.core.*; import weka.estimators.*; import weka.filters.supervised.attribute.Discretize; import weka.filters.unsupervised.attribute.ReplaceMissingValues; import weka.classifiers.*; import weka.classifiers.bayes.net.*; import weka.classifiers.bayes.net.estimate.DiscreteEstimatorBayes; import weka.classifiers.bayes.net.search.*; import weka.classifiers.bayes.net.search.local.*; import weka.classifiers.bayes.net.estimate.*; /** * Base class for a Bayes Network classifier. Provides datastructures (network structure, * conditional probability distributions, etc.) and facilities common to Bayes Network * learning algorithms like K2 and B. * Works with nominal variables and no missing values only. * * For further documentation, see * = options.length) { return new String[0]; } options[i++] = ""; String [] result = new String [options.length - i]; j = i; while ((j < options.length) && !(options[j].equals("-E"))) { result[j - i] = options[j]; options[j] = ""; j++; } while(j < options.length) { result[j - i] = ""; j++; } return result; } } return new String [0]; } /** * Gets the current settings of the classifier. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { String[] searchOptions = m_SearchAlgorithm.getOptions(); String[] estimatorOptions = m_BayesNetEstimator.getOptions(); String[] options = new String[11 + searchOptions.length + estimatorOptions.length]; int current = 0; if (!m_bUseADTree) { options[current++] = "-D"; } if (m_otherBayesNet != null) { options[current++] = "-B"; options[current++] = ((BIFReader) m_otherBayesNet).getFileName(); } options[current++] = "-Q"; options[current++] = "" + getSearchAlgorithm().getClass().getName(); options[current++] = "--"; for (int iOption = 0; iOption < searchOptions.length; iOption++) { options[current++] = searchOptions[iOption]; } options[current++] = "-E"; options[current++] = "" + getEstimator().getClass().getName(); options[current++] = "--"; for (int iOption = 0; iOption < estimatorOptions.length; iOption++) { options[current++] = estimatorOptions[iOption]; } // Fill up rest with empty strings, not nulls! while (current < options.length) { options[current++] = ""; } return options; } // getOptions /** * Set the SearchAlgorithm used in searching for network structures. * @param newSearchAlgorithm the SearchAlgorithm to use. */ public void setSearchAlgorithm(SearchAlgorithm newSearchAlgorithm) { m_SearchAlgorithm = newSearchAlgorithm; } /** * Get the SearchAlgorithm used as the search algorithm * @return the SearchAlgorithm used as the search algorithm */ public SearchAlgorithm getSearchAlgorithm() { return m_SearchAlgorithm; } /** * Set the Estimator Algorithm used in calculating the CPTs * @param newEstimator the Estimator to use. */ public void setEstimator(BayesNetEstimator newBayesNetEstimator) { m_BayesNetEstimator = newBayesNetEstimator; } /** * Get the BayesNetEstimator used for calculating the CPTs * @return the BayesNetEstimator used. */ public BayesNetEstimator getEstimator() { return m_BayesNetEstimator; } /** * Set whether ADTree structure is used or not * @param bUseADTree */ public void setUseADTree(boolean bUseADTree) { m_bUseADTree = bUseADTree; } /** * Method declaration * @return whether ADTree structure is used or not */ public boolean getUseADTree() { return m_bUseADTree; } /** * Set name of network in BIF file to compare with * @param sBIFFile */ public void setBIFFile(String sBIFFile) { try { m_otherBayesNet = new BIFReader().processFile(sBIFFile); } catch (Throwable t) { m_otherBayesNet = null; } } /** * Get name of network in BIF file to compare with * @return BIF file name */ public String getBIFFile() { if (m_otherBayesNet != null) { return m_otherBayesNet.getFileName(); } return ""; } /** * Returns a description of the classifier. * * @return a description of the classifier as a string. */ public String toString() { StringBuffer text = new StringBuffer(); text.append("Bayes Network Classifier"); text.append("\n" + (m_bUseADTree ? "Using " : "not using ") + "ADTree"); if (m_Instances == null) { text.append(": No model built yet."); } else { // flatten BayesNet down to text text.append("\n#attributes="); text.append(m_Instances.numAttributes()); text.append(" #classindex="); text.append(m_Instances.classIndex()); text.append("\nNetwork structure (nodes followed by parents)\n"); for (int iAttribute = 0; iAttribute < m_Instances.numAttributes(); iAttribute++) { text.append( m_Instances.attribute(iAttribute).name() + "(" + m_Instances.attribute(iAttribute).numValues() + "): "); for (int iParent = 0; iParent < m_ParentSets[iAttribute].getNrOfParents(); iParent++) { text.append(m_Instances.attribute(m_ParentSets[iAttribute].getParent(iParent)).name() + " "); } text.append("\n"); // Description of distributions tends to be too much detail, so it is commented out here // for (int iParent = 0; iParent < m_ParentSets[iAttribute].GetCardinalityOfParents(); iParent++) { // text.append('(' + m_Distributions[iAttribute][iParent].toString() + ')'); // } // text.append("\n"); } text.append("LogScore Bayes: " + measureBayesScore() + "\n"); text.append("LogScore BDeu: " + measureBDeuScore() + "\n"); text.append("LogScore MDL: " + measureMDLScore() + "\n"); text.append("LogScore ENTROPY: " + measureEntropyScore() + "\n"); text.append("LogScore AIC: " + measureAICScore() + "\n"); if (m_otherBayesNet != null) { text.append( "Missing: " + m_otherBayesNet.missingArcs(this) + " Extra: " + m_otherBayesNet.extraArcs(this) + " Reversed: " + m_otherBayesNet.reversedArcs(this) + "\n"); text.append("Divergence: " + m_otherBayesNet.divergence(this) + "\n"); } } return text.toString(); } // toString /** * Returns the type of graph this classifier * represents. * @return Drawable.TREE */ public int graphType() { return Drawable.BayesNet; } /** Returns a BayesNet graph in XMLBIF ver 0.3 format. @return - String representing this BayesNet in XMLBIF ver 0.3 */ public String graph() throws Exception { return toXMLBIF03(); } /** * Returns a description of the classifier in XML BIF 0.3 format. * See http://www-2.cs.cmu.edu/~fgcozman/Research/InterchangeFormat/ * for details on XML BIF. * @return an XML BIF 0.3 description of the classifier as a string. */ public String toXMLBIF03() { if (m_Instances == null) { return(""); } StringBuffer text = new StringBuffer(); text.append("\n"); text.append("\n"); text.append("\n"); text.append(" \n"); text.append(" \n"); text.append(" \n"); text.append(" \n"); text.append(" \n"); text.append(" \n"); text.append(" \n"); text.append(" \n"); text.append(" \n"); text.append(" \n"); text.append(" \n"); text.append("]>\n"); text.append("\n"); text.append("\n"); text.append("\n"); text.append("\n"); text.append("" + XMLNormalize(m_Instances.relationName()) + "\n"); for (int iAttribute = 0; iAttribute < m_Instances.numAttributes(); iAttribute++) { text.append("\n"); text.append("" + XMLNormalize(m_Instances.attribute(iAttribute).name()) + "\n"); for (int iValue = 0; iValue < m_Instances.attribute(iAttribute).numValues(); iValue++) { text.append("" + XMLNormalize(m_Instances.attribute(iAttribute).value(iValue)) + "\n"); } text.append("\n"); } for (int iAttribute = 0; iAttribute < m_Instances.numAttributes(); iAttribute++) { text.append("\n"); text.append("" + XMLNormalize(m_Instances.attribute(iAttribute).name()) + "\n"); for (int iParent = 0; iParent < m_ParentSets[iAttribute].getNrOfParents(); iParent++) { text.append("" + XMLNormalize(m_Instances.attribute(m_ParentSets[iAttribute].getParent(iParent)).name()) + "\n"); } text.append("\n"); for (int iParent = 0; iParent < m_ParentSets[iAttribute].getCardinalityOfParents(); iParent++) { for (int iValue = 0; iValue < m_Instances.attribute(iAttribute).numValues(); iValue++) { text.append(m_Distributions[iAttribute][iParent].getProbability(iValue)); text.append(' '); } text.append('\n'); } text.append("

\n"); text.append("\n"); } text.append("\n"); text.append("\n"); return text.toString(); } // toXMLBIF03 /** XMLNormalize converts the five standard XML entities in a string * g.e. the string V&D's is returned as V&D's * @param sStr string to normalize * @return normalized string */ String XMLNormalize(String sStr) { StringBuffer sStr2 = new StringBuffer(); for (int iStr = 0; iStr < sStr.length(); iStr++) { char c = sStr.charAt(iStr); switch (c) { case '&': sStr2.append("&"); break; case '\'': sStr2.append("'"); break; case '\"': sStr2.append("""); break; case '<': sStr2.append("<"); break; case '>': sStr2.append(">"); break; default: sStr2.append(c); } } return sStr2.toString(); } // XMLNormalize /** * @return a string to describe the UseADTreeoption. */ public String useADTreeTipText() { return "When ADTree (the data structure for increasing speed on counts," + " not to be confused with the classifier under the same name) is used" + " learning time goes down typically. However, because ADTrees are memory" + " intensive, memory problems may occur. Switching this option off makes" + " the structure learning algorithms slower, and run with less memory." + " By default, ADTrees are used."; } /** * @return a string to describe the SearchAlgorithm. */ public String searchAlgorithmTipText() { return "Select method used for searching network structures."; } /** * This will return a string describing the BayesNetEstimator. * @return The string. */ public String estimatorTipText() { return "Select Estimator algorithm for finding the conditional probability tables" + " of the Bayes Network."; } /** * @return a string to describe the BIFFile. */ public String BIFFileTipText() { return "Set the name of a file in BIF XML format. A Bayes network learned" + " from data can be compared with the Bayes network represented by the BIF file." + " Statistics calculated are o.a. the number of missing and extra arcs."; } /** * This will return a string describing the classifier. * @return The string. */ public String globalInfo() { return "Bayes Network learning using various search algorithms and quality measures."; } /** * Main method for testing this class. * * @param argv the options */ public static void main(String[] argv) { try { System.out.println(Evaluation.evaluateModel(new BayesNet(), argv)); } catch (Exception e) { e.printStackTrace(); System.err.println(e.getMessage()); } } // main /** get name of the Bayes network * @return name of the Bayes net */ public String getName() { return m_Instances.relationName(); } /** get number of nodes in the Bayes network * @return number of nodes */ public int getNrOfNodes() { return m_Instances.numAttributes(); } /** get name of a node in the Bayes network * @param iNode: index of the node * @return name of the specified node */ public String getNodeName(int iNode) { return m_Instances.attribute(iNode).name(); } /** get number of values a node can take * @param iNode: index of the node * @return cardinality of the specified node */ public int getCardinality(int iNode) { return m_Instances.attribute(iNode).numValues(); } /** get name of a particular value of a node * @param iNode: index of the node * @param iValue: index of the value * @return cardinality of the specified node */ public String getNodeValue(int iNode, int iValue) { return m_Instances.attribute(iNode).value(iValue); } /** get number of parents of a node in the network structure * @param iNode: index of the node * @return number of parents of the specified node */ public int getNrOfParents(int iNode) { return m_ParentSets[iNode].getNrOfParents(); } /** get node index of a parent of a node in the network structure * @param iNode: index of the node * @param iParent: index of the parents, e.g., 0 is the first parent, 1 the second parent, etc. * @return node index of the iParent's parent of the specified node */ public int getParent(int iNode, int iParent) { return m_ParentSets[iNode].getParent(iParent); } /** Get full set of parent sets. * @return parent sets; */ public ParentSet[] getParentSets() { return m_ParentSets; } /** Get full set of estimators. * @return estimators; */ public Estimator[][] getDistributions() { return m_Distributions; } /** get number of values the collection of parents of a node can take * @param iNode: index of the node * @return cardinality of the parent set of the specified node */ public int getParentCardinality(int iNode) { return m_ParentSets[iNode].getCardinalityOfParents(); } /** get particular probability of the conditional probability distribtion * of a node given its parents. * @param iNode: index of the node * @param iParent: index of the parent set, 0 <= iParent <= getParentCardinality(iNode) * @param iValue: index of the value, 0 <= iValue <= getCardinality(iNode) * @return probability */ public double getProbability(int iNode, int iParent, int iValue) { return m_Distributions[iNode][iParent].getProbability(iValue); } /** get the parent set of a node * @param iNode: index of the node * @return Parent set of the specified node. */ public ParentSet getParentSet(int iNode) { return m_ParentSets[iNode]; } /** get ADTree strucrture containing efficient representation of counts. * @return ADTree strucrture */ public ADNode getADTree() { return m_ADTree;} // implementation of AdditionalMeasureProducer interface /** * Returns an enumeration of the measure names. Additional measures * must follow the naming convention of starting with "measure", eg. * double measureBlah() * @return an enumeration of the measure names */ public Enumeration enumerateMeasures() { Vector newVector = new Vector(4); newVector.addElement("measureExtraArcs"); newVector.addElement("measureMissingArcs"); newVector.addElement("measureReversedArcs"); newVector.addElement("measureDivergence"); newVector.addElement("measureBayesScore"); newVector.addElement("measureBDeuScore"); newVector.addElement("measureMDLScore"); newVector.addElement("measureAICScore"); newVector.addElement("measureEntropyScore"); return newVector.elements(); } // enumerateMeasures public double measureExtraArcs() { if (m_otherBayesNet != null) { return m_otherBayesNet.extraArcs(this); } return 0; } // measureExtraArcs public double measureMissingArcs() { if (m_otherBayesNet != null) { return m_otherBayesNet.missingArcs(this); } return 0; } // measureMissingArcs public double measureReversedArcs() { if (m_otherBayesNet != null) { return m_otherBayesNet.reversedArcs(this); } return 0; } // measureReversedArcs public double measureDivergence() { if (m_otherBayesNet != null) { return m_otherBayesNet.divergence(this); } return 0; } // measureDivergence public double measureBayesScore() { LocalScoreSearchAlgorithm s = new LocalScoreSearchAlgorithm(this, m_Instances); return s.logScore(Scoreable.BAYES); } // measureBayesScore public double measureBDeuScore() { LocalScoreSearchAlgorithm s = new LocalScoreSearchAlgorithm(this, m_Instances); return s.logScore(Scoreable.BDeu); } // measureBDeuScore public double measureMDLScore() { LocalScoreSearchAlgorithm s = new LocalScoreSearchAlgorithm(this, m_Instances); return s.logScore(Scoreable.MDL); } // measureMDLScore public double measureAICScore() { LocalScoreSearchAlgorithm s = new LocalScoreSearchAlgorithm(this, m_Instances); return s.logScore(Scoreable.AIC); } // measureAICScore public double measureEntropyScore() { LocalScoreSearchAlgorithm s = new LocalScoreSearchAlgorithm(this, m_Instances); return s.logScore(Scoreable.ENTROPY); } // measureEntropyScore /** * Returns the value of the named measure * @param measureName the name of the measure to query for its value * @return the value of the named measure * @exception IllegalArgumentException if the named measure is not supported */ public double getMeasure(String measureName) { if (measureName.equals("measureExtraArcs")) { return measureExtraArcs(); } if (measureName.equals("measureMissingArcs")) { return measureMissingArcs(); } if (measureName.equals("measureReversedArcs")) { return measureReversedArcs(); } if (measureName.equals("measureDivergence")) { return measureDivergence(); } if (measureName.equals("measureBayesScore")) { return measureBayesScore(); } if (measureName.equals("measureBDeuScore")) { return measureBDeuScore(); } if (measureName.equals("measureMDLScore")) { return measureMDLScore(); } if (measureName.equals("measureAICScore")) { return measureAICScore(); } if (measureName.equals("measureEntropyScore")) { return measureEntropyScore(); } return 0; } // getMeasure } // class BayesNet