/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * SMOreg.java * Copyright (C) 2002 Sylvain Roy * */ package weka.classifiers.functions; import weka.classifiers.functions.supportVector.*; import weka.classifiers.Classifier; import weka.classifiers.Evaluation; import weka.filters.unsupervised.attribute.NominalToBinary; import weka.filters.unsupervised.attribute.ReplaceMissingValues; import weka.filters.unsupervised.attribute.Normalize; import weka.filters.unsupervised.attribute.Standardize; import weka.filters.Filter; import java.util.*; import java.io.*; import weka.core.*; /** * Implements Alex J.Smola and Bernhard Scholkopf sequential minimal optimization * algorithm for training a support vector regression using polynomial * or RBF kernels. * * This implementation globally replaces all missing values and * transforms nominal attributes into binary ones. It also * normalizes all attributes by default. (Note that the coefficients * in the output are based on the normalized/standardized data, not the * original data.) * * * For more information on the SMO algorithm, see

* * Alex J. Smola, Bernhard Scholkopf (1998). A Tutorial on Support Vector Regression. * NeuroCOLT2 Technical Report Series - NC2-TR-1998-030.

* * S.K. Shevade, S.S. Keerthi, C. Bhattacharyya, K.R.K. Murthy, * Improvements to SMO Algorithm for SVM Regression. * Technical Report CD-99-16, Control Division Dept of Mechanical and Production Engineering, * National University of Singapore.

* * * @author Sylvain Roy (sro33@student.canterbury.ac.nz) * @version $Revision: 1.3.2.3 $ */ public class SMOreg extends Classifier implements OptionHandler, WeightedInstancesHandler { /** Feature-space normalization? */ protected boolean m_featureSpaceNormalization = false; /** Use RBF kernel? (default: poly) */ protected boolean m_useRBF = false; /** The size of the cache (a prime number) */ protected int m_cacheSize = 250007; /** Kernel to use **/ protected Kernel m_kernel; /** Use lower-order terms? */ protected boolean m_lowerOrder = false; /** The exponent for the polynomial kernel. */ protected double m_exponent = 1.0; /** Gamma for the RBF kernel. */ protected double m_gamma = 0.01; /** The class index from the training data */ protected int m_classIndex = -1; /** Only numeric attributes in the dataset? */ protected boolean m_onlyNumeric; /** The filter used to make attributes numeric. */ protected NominalToBinary m_NominalToBinary; /** The filter to apply to the training data */ public static final int FILTER_NORMALIZE = 0; public static final int FILTER_STANDARDIZE = 1; public static final int FILTER_NONE = 2; public static final Tag [] TAGS_FILTER = { new Tag(FILTER_NORMALIZE, "Normalize training data"), new Tag(FILTER_STANDARDIZE, "Standardize training data"), new Tag(FILTER_NONE, "No normalization/standardization"), }; /** The filter used to standardize/normalize all values. */ protected Filter m_Filter = null; /** Whether to normalize/standardize/neither */ protected int m_filterType = FILTER_NORMALIZE; /** The filter used to get rid of missing values. */ protected ReplaceMissingValues m_Missing; /** Turn off all checks and conversions? Turning them off assumes that data is purely numeric, doesn't contain any missing values, and has a numeric class. Turning them off also means that no header information will be stored if the machine is linear. Finally, it also assumes that no instance has a weight equal to 0.*/ protected boolean m_checksTurnedOff = false; /** The training data. */ protected Instances m_data; /** The complexity parameter */ protected double m_C = 1.0; /** The Lagrange multipliers */ protected double[] m_alpha; protected double[] m_alpha_; /** The thresholds. */ protected double m_b, m_bLow, m_bUp; /** The indices for m_bLow and m_bUp */ protected int m_iLow, m_iUp; /** Weight vector for linear machine. */ protected double[] m_weights; /** The current set of errors for all non-bound examples. */ protected double[] m_fcache; /** The five different sets used by the algorithm. */ protected SMOset m_I0; // {i: 0 < m_alpha[i] < C || 0 < m_alpha_[i] < C} protected SMOset m_I1; // {i: m_class[i] = 0, m_alpha_[i] = 0} protected SMOset m_I2; // {i: m_class[i] = 0, m_alpha_[i] = C} protected SMOset m_I3; // {i: m_class[i] = C, m_alpha_[i] = 0} /** The parameter epsilon */ protected double m_epsilon = 1e-3; /** The parameter tol */ protected double m_tol = 1.0e-3; /** The parameter eps */ protected double m_eps = 1.0e-12; /** Precision constant for updating sets */ protected static double m_Del = 1e-10; /** The parameters of the linear transforamtion realized * by the filter on the class attribute */ protected double m_Alin; protected double m_Blin; /** Variables to hold weight vector in sparse form. (To reduce storage requirements.) */ protected double[] m_sparseWeights; protected int[] m_sparseIndices; /** * Returns a string describing classifier * @return a description suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Implements Alex Smola and Bernhard Scholkopf's sequential minimal " + "optimization algorithm for training a support vector regression model. " + "This implementation globally replaces all missing values and " + "transforms nominal attributes into binary ones. It also " + "normalizes all attributes by default. (Note that the coefficients " + "in the output are based on the normalized/standardized data, not the " + "original data.) " + "For more information on the SMO algorithm, see\n\n" + "Alex J. Smola, Bernhard Scholkopf (1998). \"A Tutorial on Support Vector " + "Regression\". NeuroCOLT2 Technical Report Series - NC2-TR-1998-030.\n\n" + "S.K. Shevade, S.S. Keerthi, C. Bhattacharyya, K.R.K. Murthy, " + "\"Improvements to SMO Algorithm for SVM Regression\". " + "Technical Report CD-99-16, Control Division Dept of Mechanical and " + "Production Engineering, National University of Singapore. "; } /** * Method for building the classifier. * * @param insts the set of training instances * @exception Exception if the classifier can't be built successfully */ public void buildClassifier(Instances insts) throws Exception { /* check the set of training instances */ if (!m_checksTurnedOff) { if (insts.checkForStringAttributes()) { throw new UnsupportedAttributeTypeException("Cannot handle string attributes!"); } if (insts.classAttribute().isNominal()) { throw new UnsupportedClassTypeException("SMOreg can't handle a nominal class! " + "Use SMO for performing " + "classification."); } insts = new Instances(insts); insts.deleteWithMissingClass(); if (insts.numInstances() == 0) { throw new Exception("No training instances without a missing class!"); } /* Removes all the instances with weight equal to 0. MUST be done since condition (6) of Shevade's paper is made with the assertion Ci > 0 (See equation (1a). */ Instances data = new Instances(insts, insts.numInstances()); for(int i = 0; i < insts.numInstances(); i++){ if(insts.instance(i).weight() > 0) data.add(insts.instance(i)); } if (data.numInstances() == 0) { throw new Exception("No training instances left after removing " + "instance with either a weight null or a missing class!"); } insts = data; } m_onlyNumeric = true; if (!m_checksTurnedOff) { for (int i = 0; i < insts.numAttributes(); i++) { if (i != insts.classIndex()) { if (!insts.attribute(i).isNumeric()) { m_onlyNumeric = false; break; } } } } if (!m_checksTurnedOff) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(insts); insts = Filter.useFilter(insts, m_Missing); } else { m_Missing = null; } if (!m_onlyNumeric) { m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(insts); insts = Filter.useFilter(insts, m_NominalToBinary); } else { m_NominalToBinary = null; } m_classIndex = insts.classIndex(); if (m_filterType == FILTER_STANDARDIZE) { m_Filter = new Standardize(); ((Standardize)m_Filter).setIgnoreClass(true); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else if (m_filterType == FILTER_NORMALIZE) { m_Filter = new Normalize(); ((Normalize)m_Filter).setIgnoreClass(true); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else { m_Filter = null; } m_data = insts; // determine which linear transformation has been // applied to the class by the filter if (m_Filter != null) { Instance witness = (Instance)insts.instance(0).copy(); witness.setValue(m_classIndex, 0); m_Filter.input(witness); m_Filter.batchFinished(); Instance res = m_Filter.output(); m_Blin = res.value(m_classIndex); witness.setValue(m_classIndex, 1); m_Filter.input(witness); m_Filter.batchFinished(); res = m_Filter.output(); m_Alin = res.value(m_classIndex) - m_Blin; } else { m_Alin = 1.0; m_Blin = 0.0; } // Initialize kernel if(m_useRBF) { m_kernel = new RBFKernel(m_data, m_cacheSize, m_gamma); } else { if (m_featureSpaceNormalization) { m_kernel = new NormalizedPolyKernel(m_data, m_cacheSize, m_exponent, m_lowerOrder); } else { m_kernel = new PolyKernel(m_data, m_cacheSize, m_exponent, m_lowerOrder); } } // If machine is linear, reserve space for weights if (!m_useRBF && m_exponent == 1.0) { m_weights = new double[m_data.numAttributes()]; } else { m_weights = null; } // Initialize fcache m_fcache = new double[m_data.numInstances()]; // Initialize sets m_I0 = new SMOset(m_data.numInstances()); m_I1 = new SMOset(m_data.numInstances()); m_I2 = new SMOset(m_data.numInstances()); m_I3 = new SMOset(m_data.numInstances()); /* MAIN ROUTINE FOR MODIFICATION 1 */ // Follows the specification of the first modification of Shevade's paper // Initialize alpha array to zero m_alpha = new double[m_data.numInstances()]; m_alpha_ = new double[m_data.numInstances()]; // set I1 to contain all the examples for(int i = 0; i < m_data.numInstances(); i++){ m_I1.insert(i); } // choose any example i from the training set : i = 0 m_bUp = m_data.instance(0).classValue() + m_epsilon; m_bLow = m_data.instance(0).classValue() - m_epsilon; m_iUp = m_iLow = 0; int numChanged = 0; boolean examineAll = true; while(numChanged > 0 || examineAll){ numChanged = 0; if(examineAll){ // loop over all the example for(int I = 0; I < m_alpha.length; I++){ numChanged += examineExample(I); } } else { // loop over I_0 for (int I = m_I0.getNext(-1); I != -1; I = m_I0.getNext(I)) { numChanged += examineExample(I); if(m_bUp > m_bLow - 2 * m_tol){ numChanged = 0; break; } } } if (examineAll) examineAll = false; else if (numChanged == 0) examineAll = true; } /* END OF MAIN ROUTINE FOR MODIFICATION 1 */ // debuggage // checkOptimality(); // checkAlphas(); // displayB(); // Set threshold m_b = (m_bLow + m_bUp) / 2.0; // Save memory m_kernel.clean(); m_fcache = null; m_I0 = m_I1 = m_I2 = m_I3 = null; // If machine is linear, delete training data // and store weight vector in sparse format if (!m_useRBF && m_exponent == 1.0) { // compute weight vector for(int j = 0; j < m_weights.length; j++){ m_weights[j] = 0; } for(int k = 0; k < m_alpha.length; k++){ for(int j = 0; j < m_weights.length; j++){ if(j == m_data.classIndex()) continue; m_weights[j] += (m_alpha[k] - m_alpha_[k]) * m_data.instance(k).value(j); } } // Convert weight vector double[] sparseWeights = new double[m_weights.length]; int[] sparseIndices = new int[m_weights.length]; int counter = 0; for (int ii = 0; ii < m_weights.length; ii++) { if (m_weights[ii] != 0.0) { sparseWeights[counter] = m_weights[ii]; sparseIndices[counter] = ii; counter++; } } m_sparseWeights = new double[counter]; m_sparseIndices = new int[counter]; System.arraycopy(sparseWeights, 0, m_sparseWeights, 0, counter); System.arraycopy(sparseIndices, 0, m_sparseIndices, 0, counter); // Clean out training data if (!m_checksTurnedOff) { m_data = new Instances(m_data, 0); } else { m_data = null; } // Clean out weight vector m_weights = null; // We don't need the alphas in the linear case m_alpha = null; m_alpha_ = null; } } /** * Examines instance. (As defined in Shevade's paper.) * * @param i2 index of instance to examine * @return true if examination was successfull * @exception Exception if something goes wrong */ protected int examineExample(int i2) throws Exception{ // Lagrange multipliers for i2 double alpha2 = m_alpha[i2]; double alpha2_ = m_alpha_[i2]; double F2 = 0; if(m_I0.contains(i2)){ F2 = m_fcache[i2]; } else { // compute F2 = F_i2 and set f-cache[i2] = F2 F2 = m_data.instance(i2).classValue(); for(int j = 0; j < m_alpha.length; j++){ F2 -= (m_alpha[j] - m_alpha_[j]) * m_kernel.eval(i2, j, m_data.instance(i2)); } m_fcache[i2] = F2; // Update (b_low, i_low) or (b_up, i_up) using (F2, i2)... if(m_I1.contains(i2)){ if(F2 + m_epsilon < m_bUp){ m_bUp = F2 + m_epsilon; m_iUp = i2; } else if (F2 - m_epsilon > m_bLow){ m_bLow = F2 - m_epsilon; m_iLow = i2; } } else if(m_I2.contains(i2) && (F2+m_epsilon > m_bLow)){ m_bLow = F2 + m_epsilon; m_iLow = i2; } else if(m_I3.contains(i2) && (F2-m_epsilon < m_bUp)){ m_bUp = F2 - m_epsilon; m_iUp = i2; } } // check optimality using current b_low and b_up and, if // violated, find an index i1 to do joint optimization with i2... boolean optimality = true; int i1 = -1; // case 1 : i2 is in I_0a if(m_I0.contains(i2) && 0 < alpha2 && alpha2 < m_C * m_data.instance(i2).weight()){ if(m_bLow-(F2-m_epsilon) > 2 * m_tol){ optimality = false; i1 = m_iLow; // for i2 in I_0a choose the better i1 if((F2 - m_epsilon) - m_bUp > m_bLow - (F2 - m_epsilon)){ i1 = m_iUp; } }else if((F2 - m_epsilon)-m_bUp > 2 * m_tol){ optimality = false; i1 = m_iUp; // for i2 in I_0a choose the better i1... if(m_bLow-(F2-m_epsilon) > (F2-m_epsilon)-m_bUp){ i1 = m_iLow; } } } // case 2 : i2 is in I_0b else if(m_I0.contains(i2) && 0 < alpha2_ && alpha2_ < m_C * m_data.instance(i2).weight()){ if(m_bLow-(F2+m_epsilon) > 2 * m_tol){ optimality = false; i1 = m_iLow; // for i2 in I_0b choose the better i1 if((F2 + m_epsilon) - m_bUp > m_bLow - (F2 + m_epsilon)){ i1 = m_iUp; } }else if((F2 + m_epsilon)-m_bUp > 2 * m_tol){ optimality = false; i1 = m_iUp; // for i2 in I_0b choose the better i1... if(m_bLow-(F2+m_epsilon) > (F2+m_epsilon)-m_bUp){ i1 = m_iLow; } } } // case 3 : i2 is in I_1 else if(m_I1.contains(i2)){ if(m_bLow-(F2+m_epsilon) > 2 * m_tol){ optimality = false; i1 = m_iLow; // for i2 in I_1 choose the better i1 if((F2 + m_epsilon) - m_bUp > m_bLow - (F2 + m_epsilon)){ i1 = m_iUp; } } else if((F2 - m_epsilon)-m_bUp > 2 * m_tol){ optimality = false; i1 = m_iUp; // for i2 in I_1 choose the better i1... if(m_bLow-(F2-m_epsilon) > (F2-m_epsilon)-m_bUp){ i1 = m_iLow; } } } // case 4 : i2 is in I_2 else if(m_I2.contains(i2)){ if((F2+m_epsilon)-m_bUp > 2 * m_tol){ optimality = false; i1 = m_iUp; } } // case 5 : i2 is in I_3 else if(m_I3.contains(i2)){ if(m_bLow-(F2-m_epsilon) > 2 * m_tol){ optimality = false; i1 = m_iLow; } } else{ throw new RuntimeException("Inconsistent state ! I0, I1, I2 and I3 " + "must cover the whole set of indices."); } if(optimality){ return 0; } if(takeStep(i1, i2)){ return 1; } else { return 0; } } /** * Method solving for the Lagrange multipliers for * two instances. (As defined in Shevade's paper.) * * @param i1 index of the first instance * @param i2 index of the second instance * @return true if multipliers could be found * @exception Exception if something goes wrong */ protected boolean takeStep(int i1, int i2) throws Exception{ if(i1 == i2){ return false; } double alpha1 = m_alpha[i1]; double alpha1_ = m_alpha_[i1]; double alpha2 = m_alpha[i2]; double alpha2_ = m_alpha_[i2]; double F1 = m_fcache[i1]; double F2 = m_fcache[i2]; double k11 = m_kernel.eval(i1, i1, m_data.instance(i1)); double k12 = m_kernel.eval(i1, i2, m_data.instance(i1)); double k22 = m_kernel.eval(i2, i2, m_data.instance(i2)); double eta = -2*k12+k11+k22; double gamma = alpha1 - alpha1_ + alpha2 - alpha2_; // In case of numerical instabilies eta might be sligthly less than 0. // (Theoretically, it cannot happen with a kernel which respects Mercer's condition.) if(eta < 0) eta = 0; boolean case1 = false, case2 = false, case3 = false, case4 = false, finished = false; double deltaphi = F1 - F2; double L, H; boolean changed = false; double a1, a2; while(!finished){ // this loop is passed at most three times // Case variables needed to avoid attempting small changes twices if(!case1 && (alpha1 > 0 || (alpha1_ == 0 && deltaphi > 0)) && (alpha2 > 0 || (alpha2_ == 0 && deltaphi < 0)) ){ // compute L, H (wrt alpha1, alpha2) L = java.lang.Math.max(0, gamma - m_C * m_data.instance(i1).weight()); H = java.lang.Math.min(m_C * m_data.instance(i2).weight(), gamma); if(L < H){ if(eta > 0){ a2 = alpha2 - (deltaphi / eta); if(a2 > H) a2 = H; else if(a2 < L) a2 = L; } else { double Lobj = -L*deltaphi; double Hobj = -H*deltaphi; if(Lobj > Hobj) a2 = L; else a2 = H; } a1 = alpha1 - (a2 - alpha2); // update alpha1, alpha2 if change is larger than some eps if(java.lang.Math.abs(a1-alpha1) > m_eps || java.lang.Math.abs(a2-alpha2) > m_eps){ alpha1 = a1; alpha2 = a2; changed = true; } } else { finished = true; } case1 = true; } else if(!case2 && (alpha1 > 0 || (alpha1_ == 0 && deltaphi > 2 * m_epsilon)) && (alpha2_ > 0 || (alpha2 == 0 && deltaphi > 2 * m_epsilon)) ){ // compute L, H (wrt alpha1, alpha2_) L = java.lang.Math.max(0, -gamma); H = java.lang.Math.min(m_C * m_data.instance(i2).weight(), -gamma + m_C*m_data.instance(i1).weight()); if(L < H){ if(eta > 0){ a2 = alpha2_ + ((deltaphi - 2*m_epsilon) / eta); if(a2 > H) a2 = H; else if(a2 < L) a2 = L; } else { double Lobj = L*(-2*m_epsilon + deltaphi); double Hobj = H*(-2*m_epsilon + deltaphi); if(Lobj > Hobj) a2 = L; else a2 = H; } a1 = alpha1 + (a2 - alpha2_); // update alpha1, alpha2_ if change is larger than some eps if(java.lang.Math.abs(a1-alpha1) > m_eps || java.lang.Math.abs(a2-alpha2_) > m_eps){ alpha1 = a1; alpha2_ = a2; changed = true; } } else { finished = true; } case2 = true; } else if(!case3 && (alpha1_ > 0 || (alpha1 == 0 && deltaphi < -2 * m_epsilon)) && (alpha2 > 0 || (alpha2_ == 0 && deltaphi < -2 * m_epsilon)) ){ // compute L, H (wrt alpha1_, alpha2) L = java.lang.Math.max(0, gamma); H = java.lang.Math.min(m_C * m_data.instance(i2).weight(), m_C * m_data.instance(i1).weight() + gamma); if(L < H){ if(eta > 0){ a2 = alpha2 - ((deltaphi + 2*m_epsilon) / eta); if(a2 > H) a2 = H; else if(a2 < L) a2 = L; } else { double Lobj = -L*(2*m_epsilon + deltaphi); double Hobj = -H*(2*m_epsilon + deltaphi); if(Lobj > Hobj) a2 = L; else a2 = H; } a1 = alpha1_ + (a2 - alpha2); // update alpha1_, alpha2 if change is larger than some eps if(java.lang.Math.abs(a1-alpha1_) > m_eps || java.lang.Math.abs(a2-alpha2) > m_eps){ alpha1_ = a1; alpha2 = a2; changed = true; } } else { finished = true; } case3 = true; } else if(!case4 && (alpha1_ > 0 || (alpha1 == 0 && deltaphi < 0)) && (alpha2_ > 0 || (alpha2 == 0 && deltaphi > 0)) ){ // compute L, H (wrt alpha1_, alpha2_) L = java.lang.Math.max(0, -gamma - m_C * m_data.instance(i1).weight()); H = java.lang.Math.min(m_C * m_data.instance(i2).weight(), -gamma); if(L < H){ if(eta > 0){ a2 = alpha2_ + deltaphi / eta; if(a2 > H) a2 = H; else if(a2 < L) a2 = L; } else { double Lobj = L*deltaphi; double Hobj = H*deltaphi; if(Lobj > Hobj) a2 = L; else a2 = H; } a1 = alpha1_ - (a2 - alpha2_); // update alpha1_, alpha2_ if change is larger than some eps if(java.lang.Math.abs(a1-alpha1_) > m_eps || java.lang.Math.abs(a2-alpha2_) > m_eps){ alpha1_ = a1; alpha2_ = a2; changed = true; } } else { finished = true; } case4 = true; } else { finished = true; } // update deltaphi deltaphi += eta * ((alpha2 - alpha2_) - (m_alpha[i2] - m_alpha_[i2])); } if(changed){ // update f-cache[i] for i in I_0 using new Lagrange multipliers for (int i = m_I0.getNext(-1); i != -1; i = m_I0.getNext(i)) { if (i != i1 && i != i2){ m_fcache[i] += ((m_alpha[i1] - m_alpha_[i1]) - (alpha1 - alpha1_)) * m_kernel.eval(i1, i, m_data.instance(i1)) + ((m_alpha[i2] - m_alpha_[i2]) - (alpha2 - alpha2_)) * m_kernel.eval(i2, i, m_data.instance(i2)); } } // update f-cache[i1] and f-cache[i2] m_fcache[i1] += ((m_alpha[i1] - m_alpha_[i1]) - (alpha1 - alpha1_)) * k11 + ((m_alpha[i2] - m_alpha_[i2]) - (alpha2 - alpha2_)) * k12; m_fcache[i2] += ((m_alpha[i1] - m_alpha_[i1]) - (alpha1 - alpha1_)) * k12 + ((m_alpha[i2] - m_alpha_[i2]) - (alpha2 - alpha2_)) * k22; // to prevent precision problems if(alpha1 > m_C * m_data.instance(i1).weight() - m_Del * m_C * m_data.instance(i1).weight()){ alpha1 = m_C * m_data.instance(i1).weight(); } else if (alpha1 <= m_Del * m_C * m_data.instance(i1).weight()){ alpha1 = 0; } if(alpha1_ > m_C * m_data.instance(i1).weight() - m_Del * m_C * m_data.instance(i1).weight()){ alpha1_ = m_C * m_data.instance(i1).weight(); } else if (alpha1_ <= m_Del * m_C * m_data.instance(i1).weight()){ alpha1_ = 0; } if(alpha2 > m_C * m_data.instance(i2).weight() - m_Del * m_C * m_data.instance(i2).weight()){ alpha2 = m_C * m_data.instance(i2).weight(); } else if (alpha2 <= m_Del * m_C * m_data.instance(i2).weight()){ alpha2 = 0; } if(alpha2_ > m_C * m_data.instance(i2).weight() - m_Del * m_C * m_data.instance(i2).weight()){ alpha2_ = m_C * m_data.instance(i2).weight(); } else if (alpha2_ <= m_Del * m_C * m_data.instance(i2).weight()){ alpha2_ = 0; } // Store the changes in alpha, alpha' array m_alpha[i1] = alpha1; m_alpha_[i1] = alpha1_; m_alpha[i2] = alpha2; m_alpha_[i2] = alpha2_; // update I_0, I_1, I_2, I_3 if((0 < alpha1 && alpha1 < m_C * m_data.instance(i1).weight()) || (0 < alpha1_ && alpha1_ < m_C * m_data.instance(i1).weight())){ m_I0.insert(i1); } else { m_I0.delete(i1); } if((alpha1 == 0 && alpha1_ == 0)){ m_I1.insert(i1); } else { m_I1.delete(i1); } if((alpha1 == 0 && alpha1_ == m_C * m_data.instance(i1).weight())){ m_I2.insert(i1); } else { m_I2.delete(i1); } if((alpha1 == m_C * m_data.instance(i1).weight() && alpha1_ == 0)){ m_I3.insert(i1); } else { m_I3.delete(i1); } if((0 < alpha2 && alpha2 < m_C * m_data.instance(i2).weight()) || (0 < alpha2_ && alpha2_ < m_C * m_data.instance(i2).weight())){ m_I0.insert(i2); } else { m_I0.delete(i2); } if((alpha2 == 0 && alpha2_ == 0)){ m_I1.insert(i2); } else { m_I1.delete(i2); } if((alpha2 == 0 && alpha2_ == m_C * m_data.instance(i2).weight())){ m_I2.insert(i2); } else { m_I2.delete(i2); } if((alpha2 == m_C * m_data.instance(i2).weight() && alpha2_ == 0)){ m_I3.insert(i2); } else { m_I3.delete(i2); } // for debuggage // checkSets(); // checkAlphas(); // Compute (i_low, b_low) and (i_up, b_up) by applying the conditions // mentionned above, using only i1, i2 and indices in I_0 m_bLow = -Double.MAX_VALUE; m_bUp = Double.MAX_VALUE; m_iLow =-1; m_iUp = -1; for (int i = m_I0.getNext(-1); i != -1; i = m_I0.getNext(i)) { if(0 < m_alpha_[i] && m_alpha_[i] < m_C * m_data.instance(i).weight() && (m_fcache[i] + m_epsilon > m_bLow)){ m_bLow = m_fcache[i] + m_epsilon; m_iLow = i; } else if(0 < m_alpha[i] && m_alpha[i] < m_C * m_data.instance(i).weight() && (m_fcache[i] - m_epsilon > m_bLow)){ m_bLow = m_fcache[i] - m_epsilon; m_iLow = i; } if(0 < m_alpha[i] && m_alpha[i] < m_C * m_data.instance(i).weight() && (m_fcache[i] - m_epsilon < m_bUp)){ m_bUp = m_fcache[i] - m_epsilon; m_iUp = i; } else if(0 < m_alpha_[i] && m_alpha_[i] < m_C * m_data.instance(i).weight() && (m_fcache[i] + m_epsilon < m_bUp)){ m_bUp = m_fcache[i] + m_epsilon; m_iUp = i; } } if(!m_I0.contains(i1)){ if(m_I2.contains(i1) && (m_fcache[i1] + m_epsilon > m_bLow)){ m_bLow = m_fcache[i1] + m_epsilon; m_iLow = i1; } else if (m_I1.contains(i1) && (m_fcache[i1] - m_epsilon > m_bLow)){ m_bLow = m_fcache[i1] - m_epsilon; m_iLow = i1; } if(m_I3.contains(i1) && (m_fcache[i1] - m_epsilon < m_bUp)){ m_bUp = m_fcache[i1] - m_epsilon; m_iUp = i1; } else if (m_I1.contains(i1) && (m_fcache[i1] + m_epsilon < m_bUp)){ m_bUp = m_fcache[i1] + m_epsilon; m_iUp = i1; } } if(!m_I0.contains(i2)){ if(m_I2.contains(i2) && (m_fcache[i2] + m_epsilon > m_bLow)){ m_bLow = m_fcache[i2] + m_epsilon; m_iLow = i2; } else if (m_I1.contains(i2) && (m_fcache[i2] - m_epsilon > m_bLow)){ m_bLow = m_fcache[i2] - m_epsilon; m_iLow = i2; } if(m_I3.contains(i2) && (m_fcache[i2] - m_epsilon < m_bUp)){ m_bUp = m_fcache[i2] - m_epsilon; m_iUp = i2; } else if (m_I1.contains(i2) && (m_fcache[i2] + m_epsilon < m_bUp)){ m_bUp = m_fcache[i2] + m_epsilon; m_iUp = i2; } } if(m_iLow == -1 || m_iUp == -1){ throw new RuntimeException("Fatal error! The program failled to " + "initialize i_Low, iUp."); } return true; } else { return false; } } /** * Classifies a given instance. * * @param instance the instance to be classified * @return the classification * @exception Exception if instance could not be classified * successfully */ public double classifyInstance(Instance inst) throws Exception { // Filter instance if (!m_checksTurnedOff) { m_Missing.input(inst); m_Missing.batchFinished(); inst = m_Missing.output(); } if (!m_onlyNumeric) { m_NominalToBinary.input(inst); m_NominalToBinary.batchFinished(); inst = m_NominalToBinary.output(); } if (m_Filter != null) { m_Filter.input(inst); m_Filter.batchFinished(); inst = m_Filter.output(); } // classification : double result = m_b; // Is the machine linear? if (!m_useRBF && m_exponent == 1.0) { // Is weight vector stored in sparse format? if (m_sparseWeights == null) { int n1 = inst.numValues(); for (int p = 0; p < n1; p++) { if (inst.index(p) != m_classIndex) { result += m_weights[inst.index(p)] * inst.valueSparse(p); } } } else { int n1 = inst.numValues(); int n2 = m_sparseWeights.length; for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) { int ind1 = inst.index(p1); int ind2 = m_sparseIndices[p2]; if (ind1 == ind2) { if (ind1 != m_classIndex) { result += inst.valueSparse(p1) * m_sparseWeights[p2]; } p1++; p2++; } else if (ind1 > ind2) { p2++; } else { p1++; } } } } else { for (int i = 0; i < m_alpha.length; i++) { result += (m_alpha[i] - m_alpha_[i]) * m_kernel.eval(-1, i, inst); } } // apply the inverse transformation // (due to the normalization/standardization) return (result - m_Blin) / m_Alin; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(11); newVector.addElement(new Option("\tThe amount up to which deviations are\n" + "\ttolerated (epsilon). (default 1e-3)", "S", 1, "-S ")); newVector.addElement(new Option("\tThe complexity constant C. (default 1)", "C", 1, "-C ")); newVector.addElement(new Option("\tThe exponent for the " + "polynomial kernel. (default 1)", "E", 1, "-E ")); newVector.addElement(new Option("\tGamma for the " + "RBF kernel. (default 0.01)", "G", 1, "-G ")); newVector.addElement(new Option("\tWhether to 0=normalize/1=standardize/2=neither. " + "(default 0=normalize)", "N", 1, "-N")); newVector.addElement(new Option("\tFeature-space normalization (only for\n" +"\tnon-linear polynomial kernels).", "F", 0, "-F")); newVector.addElement(new Option("\tUse lower-order terms (only for non-linear\n" +"\tpolynomial kernels).", "O", 0, "-O")); newVector.addElement(new Option("\tUse RBF kernel. " + "(default poly)", "R", 0, "-R")); newVector.addElement(new Option("\tThe size of the kernel cache. " + "(default 250007, use 0 for full cache)", "A", 1, "-A ")); newVector.addElement(new Option("\tThe tolerance parameter. " + "(default 1.0e-3)", "T", 1, "-T ")); newVector.addElement(new Option("\tThe epsilon for round-off error. " + "(default 1.0e-12)", "P", 1, "-P ")); return newVector.elements(); } /** * Parses a given list of options. Valid options are:

* * -S num
* The amount up to which deviation are tolerated (epsilon). (default 1e-3) * Watch out, the value of epsilon is used with the (normalized/standardize) data

* * -C num
* The complexity constant C. (default 1)

* * -E num
* The exponent for the polynomial kernel. (default 1)

* * -G num
* Gamma for the RBF kernel. (default 0.01)

* * -N <0|1|2>
* Whether to 0=normalize/1=standardize/2=neither. (default 0=normalize)

* * -F
* Feature-space normalization (only for non-linear polynomial kernels).

* * -O
* Use lower-order terms (only for non-linear polynomial kernels).

* * -R
* Use RBF kernel (default poly).

* * -A num
* Sets the size of the kernel cache. Should be a prime number. (default 250007, use 0 for full cache)

* * -T num
* Sets the tolerance parameter. (default 1.0e-3)

* * -P num
* Sets the epsilon for round-off error. (default 1.0e-12)

* * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String epsilonString = Utils.getOption('S', options); if (epsilonString.length() != 0) { m_epsilon = (new Double(epsilonString)).doubleValue(); } else { m_epsilon = 1e-3; } String complexityString = Utils.getOption('C', options); if (complexityString.length() != 0) { m_C = (new Double(complexityString)).doubleValue(); } else { m_C = 1.0; } String exponentsString = Utils.getOption('E', options); if (exponentsString.length() != 0) { m_exponent = (new Double(exponentsString)).doubleValue(); } else { m_exponent = 1.0; } String gammaString = Utils.getOption('G', options); if (gammaString.length() != 0) { m_gamma = (new Double(gammaString)).doubleValue(); } else { m_gamma = 0.01; } String cacheString = Utils.getOption('A', options); if (cacheString.length() != 0) { m_cacheSize = Integer.parseInt(cacheString); } else { m_cacheSize = 250007; } String toleranceString = Utils.getOption('T', options); if (toleranceString.length() != 0) { m_tol = (new Double(toleranceString)).doubleValue(); } else { m_tol = 1.0e-3; } String epsString = Utils.getOption('P', options); if (epsString.length() != 0) { m_eps = (new Double(epsString)).doubleValue(); } else { m_eps = 1.0e-12; } m_useRBF = Utils.getFlag('R', options); String nString = Utils.getOption('N', options); if (nString.length() != 0) { setFilterType(new SelectedTag(Integer.parseInt(nString), TAGS_FILTER)); } else { setFilterType(new SelectedTag(FILTER_NORMALIZE, TAGS_FILTER)); } m_featureSpaceNormalization = Utils.getFlag('F', options); if ((m_useRBF) && (m_featureSpaceNormalization)) { throw new Exception("RBF machine doesn't require feature-space normalization."); } if ((m_exponent == 1.0) && (m_featureSpaceNormalization)) { throw new Exception("Can't use feature-space normalization with linear machine."); } m_lowerOrder = Utils.getFlag('O', options); if ((m_useRBF) && (m_lowerOrder)) { throw new Exception("Can't use lower-order terms with RBF machine."); } if ((m_exponent == 1.0) && (m_lowerOrder)) { throw new Exception("Can't use lower-order terms with linear machine."); } } /** * Gets the current settings of the classifier. * * @return an array of strings suitable for passing to setOptions */ public String [] getOptions() { String [] options = new String [20]; int current = 0; options[current++] = "-S"; options[current++] = "" + m_epsilon; options[current++] = "-C"; options[current++] = "" + m_C; options[current++] = "-E"; options[current++] = "" + m_exponent; options[current++] = "-G"; options[current++] = "" + m_gamma; options[current++] = "-A"; options[current++] = "" + m_cacheSize; options[current++] = "-T"; options[current++] = "" + m_tol; options[current++] = "-P"; options[current++] = "" + m_eps; options[current++] = "-N"; options[current++] = "" + m_filterType; if (m_featureSpaceNormalization) { options[current++] = "-F"; } if (m_lowerOrder) { options[current++] = "-O"; } if (m_useRBF) { options[current++] = "-R"; } while (current < options.length) { options[current++] = ""; } return options; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String filterTypeTipText() { return "Determines how/if the data will be transformed."; } /** * Gets how the training data will be transformed. Will be one of * FILTER_NORMALIZE, FILTER_STANDARDIZE, FILTER_NONE. * * @return the filtering mode */ public SelectedTag getFilterType() { return new SelectedTag(m_filterType, TAGS_FILTER); } /** * Sets how the training data will be transformed. Should be one of * FILTER_NORMALIZE, FILTER_STANDARDIZE, FILTER_NONE. * * @param newType the new filtering mode */ public void setFilterType(SelectedTag newType) { if (newType.getTags() == TAGS_FILTER) { m_filterType = newType.getSelectedTag().getID(); } } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String exponentTipText() { return "The exponent for the polynomial kernel."; } /** * Get the value of exponent. * * @return Value of exponent. */ public double getExponent() { return m_exponent; } /** * Set the value of exponent. If linear kernel * is used, rescaling and lower-order terms are * turned off. * * @param v Value to assign to exponent. */ public void setExponent(double v) { if (v == 1.0) { m_featureSpaceNormalization = false; m_lowerOrder = false; } m_exponent = v; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String gammaTipText() { return "The value of the gamma parameter for RBF kernels."; } /** * Get the value of gamma. * * @return Value of gamma. */ public double getGamma() { return m_gamma; } /** * Set the value of gamma. * * @param v Value to assign to gamma. */ public void setGamma(double v) { m_gamma = v; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String cTipText() { return "The complexity parameter C."; } /** * Get the value of C. * * @return Value of C. */ public double getC() { return m_C; } /** * Set the value of C. * * @param v Value to assign to C. */ public void setC(double v) { m_C = v; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String toleranceParameterTipText() { return "The tolerance parameter (shouldn't be changed)."; } /** * Get the value of tolerance parameter. * @return Value of tolerance parameter. */ public double getToleranceParameter() { return m_tol; } /** * Set the value of tolerance parameter. * @param v Value to assign to tolerance parameter. */ public void setToleranceParameter(double v) { m_tol = v; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String epsTipText() { return "The epsilon for round-off error (shouldn't be changed)."; } /** * Get the value of eps. * @return Value of eps. */ public double getEps() { return m_eps; } /** * Set the value of eps. * @param v Value to assign to epsilon. */ public void setEps(double v) { m_eps = v; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String epsilonTipText() { return "The amount up to which deviations are tolerated. " + "Watch out, the value of epsilon is used with the (normalized/standardized) " + "data."; } /** * Get the value of epsilon. * @return Value of epsilon. */ public double getEpsilon() { return m_epsilon; } /** * Set the value of epsilon. * @param v Value to assign to epsilon. */ public void setEpsilon(double v) { m_epsilon = v; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String cacheSizeTipText() { return "The size of the kernel cache (should be a prime number)."; } /** * Get the size of the kernel cache * @return Size of kernel cache. */ public int getCacheSize() { return m_cacheSize; } /** * Set the value of the kernel cache. * @param v Size of kernel cache. */ public void setCacheSize(int v) { m_cacheSize = v; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String useRBFTipText() { return "Whether to use an RBF kernel instead of a polynomial one."; } /** * Check if the RBF kernel is to be used. * @return true if RBF */ public boolean getUseRBF() { return m_useRBF; } /** * Set if the RBF kernel is to be used. * @param v true if RBF */ public void setUseRBF(boolean v) { if (v) { m_featureSpaceNormalization = false; m_lowerOrder = false; } m_useRBF = v; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String featureSpaceNormalizationTipText() { return "Whether feature-space normalization is performed (only " + "available for non-linear polynomial kernels)."; } /** * Check whether feature spaces is being normalized. * @return true if feature space is normalized. */ public boolean getFeatureSpaceNormalization() throws Exception { return m_featureSpaceNormalization; } /** * Set whether feature space is normalized. * @param v true if feature space is to be normalized. */ public void setFeatureSpaceNormalization(boolean v) throws Exception { if ((m_useRBF) || (m_exponent == 1.0)) { m_featureSpaceNormalization = false; } else { m_featureSpaceNormalization = v; } } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String lowerOrderTermsTipText() { return "Whether lower order polyomials are also used (only " + "available for non-linear polynomial kernels)."; } /** * Check whether lower-order terms are being used. * @return Value of lowerOrder. */ public boolean getLowerOrderTerms() { return m_lowerOrder; } /** * Set whether lower-order terms are to be used. Defaults * to false if a linear machine is built. * @param v Value to assign to lowerOrder. */ public void setLowerOrderTerms(boolean v) { if (m_exponent == 1.0 || m_useRBF) { m_lowerOrder = false; } else { m_lowerOrder = v; } } /** * Turns off checks for missing values, etc. Use with caution. */ public void turnChecksOff() { m_checksTurnedOff = true; } /** * Turns on checks for missing values, etc. */ public void turnChecksOn() { m_checksTurnedOff = false; } /** * Prints out the classifier. * * @return a description of the classifier as a string */ public String toString() { StringBuffer text = new StringBuffer(); int printed = 0; if ((m_alpha == null) && (m_sparseWeights == null)) { return "SMOreg : No model built yet."; } try { text.append("SMOreg\n\n"); text.append("Kernel used : \n"); if(m_useRBF) { text.append(" RBF kernel : K(x,y) = e^-(" + m_gamma + "* ^2)"); } else if (m_exponent == 1){ text.append(" Linear Kernel : K(x,y) = "); } else { if (m_featureSpaceNormalization) { if (m_lowerOrder){ text.append(" Normalized Poly Kernel with lower order : K(x,y) = (+1)^" + m_exponent + "/" + "((+1)^" + m_exponent + "*" + "(+1)^" + m_exponent + ")^(1/2)"); } else { text.append(" Normalized Poly Kernel : K(x,y) = ^" + m_exponent + "/" + "(^" + m_exponent + "*" + "^" + m_exponent + ")^(1/2)"); } } else { if (m_lowerOrder){ text.append(" Poly Kernel with lower order : K(x,y) = ( + 1)^" + m_exponent); } else { text.append(" Poly Kernel : K(x,y) = ^" + m_exponent); } } } text.append("\n\n"); // display the linear transformation String trans = ""; if (m_filterType == FILTER_STANDARDIZE) { //text.append("LINEAR TRANSFORMATION APPLIED : \n"); trans = "(standardized) "; //text.append(trans + m_data.classAttribute().name() + " = " + // m_Alin + " * " + m_data.classAttribute().name() + " + " + m_Blin + "\n\n"); } else if (m_filterType == FILTER_NORMALIZE) { //text.append("LINEAR TRANSFORMATION APPLIED : \n"); trans = "(normalized) "; //text.append(trans + m_data.classAttribute().name() + " = " + // m_Alin + " * " + m_data.classAttribute().name() + " + " + m_Blin + "\n\n"); } // If machine linear, print weight vector if (!m_useRBF && m_exponent == 1.0) { text.append("Machine Linear: showing attribute weights, "); text.append("not support vectors.\n"); // We can assume that the weight vector is stored in sparse // format because the classifier has been built text.append(trans + m_data.classAttribute().name() + " =\n"); for (int i = 0; i < m_sparseWeights.length; i++) { if (m_sparseIndices[i] != (int)m_classIndex) { if (printed > 0) { text.append(" + "); } else { text.append(" "); } text.append(Utils.doubleToString(m_sparseWeights[i], 12, 4) + " * "); if (m_filterType == FILTER_STANDARDIZE) { text.append("(standardized) "); } else if (m_filterType == FILTER_NORMALIZE) { text.append("(normalized) "); } if (!m_checksTurnedOff) { text.append(m_data.attribute(m_sparseIndices[i]).name()+"\n"); } else { text.append("attribute with index " + m_sparseIndices[i] +"\n"); } printed++; } } } else { text.append("Support Vector Expansion :\n"); text.append(trans + m_data.classAttribute().name() + " =\n"); printed = 0; for (int i = 0; i < m_alpha.length; i++) { double val = m_alpha[i] - m_alpha_[i]; if (java.lang.Math.abs(val) < 1e-4) continue; if (printed > 0) { text.append(" + "); } else { text.append(" "); } text.append(Utils.doubleToString(val, 12, 4) + " * K[X(" + i + "), X]\n"); printed++; } } if (m_b > 0) { text.append(" + " + Utils.doubleToString(m_b, 12, 4)); } else { text.append(" - " + Utils.doubleToString(-m_b, 12, 4)); } if (m_useRBF || m_exponent != 1.0) { text.append("\n\nNumber of support vectors: " + printed); } int numEval = 0; int numCacheHits = -1; if(m_kernel != null) { numEval = m_kernel.numEvals(); numCacheHits = m_kernel.numCacheHits(); } text.append("\n\nNumber of kernel evaluations: " + numEval); if (numCacheHits >= 0 && numEval > 0) { double hitRatio = 1 - numEval/(numCacheHits+numEval); text.append(" (" + Utils.doubleToString(hitRatio*100, 7, 3) + "% cached)"); } } catch (Exception e) { return "Can't print the classifier."; } return text.toString(); } /** * Main method for testing this class. */ public static void main(String[] argv) { Classifier scheme; try { scheme = new SMOreg(); System.out.println(Evaluation.evaluateModel(scheme, argv)); } catch (Exception e) { e.printStackTrace(); System.err.println(e.getMessage()); } } /** * Debuggage function. * Compute the value of the objective function. */ protected double objFun() throws Exception { double res = 0; double t = 0, t2 = 0; for(int i = 0; i < m_alpha.length; i++){ for(int j = 0; j < m_alpha.length; j++){ t += (m_alpha[i] - m_alpha_[i]) * (m_alpha[j] - m_alpha_[j]) * m_kernel.eval(i,j,m_data.instance(i)); } t2 += m_data.instance(i).classValue() * (m_alpha[i] - m_alpha_[i]) - m_epsilon * (m_alpha[i] + m_alpha_[i]); } res += -0.5 * t + t2; return res; } /** * Debuggage function. * Compute the value of the objective function. */ protected double objFun(int i1, int i2, double alpha1, double alpha1_, double alpha2, double alpha2_) throws Exception { double res = 0; double t = 0, t2 = 0; for(int i = 0; i < m_alpha.length; i++){ double alphai; double alphai_; if(i == i1){ alphai = alpha1; alphai_ = alpha1_; } else if(i == i2){ alphai = alpha2; alphai_ = alpha2_; } else { alphai = m_alpha[i]; alphai_ = m_alpha_[i]; } for(int j = 0; j < m_alpha.length; j++){ double alphaj; double alphaj_; if(j == i1){ alphaj = alpha1; alphaj_ = alpha1_; } else if(j == i2){ alphaj = alpha2; alphaj_ = alpha2_; } else { alphaj = m_alpha[j]; alphaj_ = m_alpha_[j]; } t += (alphai - alphai_) * (alphaj - alphaj_) * m_kernel.eval(i,j,m_data.instance(i)); } t2 += m_data.instance(i).classValue() * (alphai - alphai_) - m_epsilon * (alphai + alphai_); } res += -0.5 * t + t2; return res; } /** * Debuggage function. * Check that the set I0, I1, I2 and I3 cover the whole set of index * and that no attribute appears in two different sets. */ protected void checkSets() throws Exception{ boolean[] test = new boolean[m_data.numInstances()]; for (int i = m_I0.getNext(-1); i != -1; i = m_I0.getNext(i)) { if(test[i]){ throw new Exception("Fatal error! indice " + i + " appears in two different sets."); } else { test[i] = true; } if( !((0 < m_alpha[i] && m_alpha[i] < m_C * m_data.instance(i).weight()) || (0 < m_alpha_[i] && m_alpha_[i] < m_C * m_data.instance(i).weight())) ){ throw new Exception("Warning! I0 contains an incorrect indice."); } } for (int i = m_I1.getNext(-1); i != -1; i = m_I1.getNext(i)) { if(test[i]){ throw new Exception("Fatal error! indice " + i + " appears in two different sets."); } else { test[i] = true; } if( !( m_alpha[i] == 0 && m_alpha_[i] == 0) ){ throw new Exception("Fatal error! I1 contains an incorrect indice."); } } for (int i = m_I2.getNext(-1); i != -1; i = m_I2.getNext(i)) { if(test[i]){ throw new Exception("Fatal error! indice " + i + " appears in two different sets."); } else { test[i] = true; } if( !(m_alpha[i] == 0 && m_alpha_[i] == m_C * m_data.instance(i).weight()) ){ throw new Exception("Fatal error! I2 contains an incorrect indice."); } } for (int i = m_I3.getNext(-1); i != -1; i = m_I3.getNext(i)) { if(test[i]){ throw new Exception("Fatal error! indice " + i + " appears in two different sets."); } else { test[i] = true; } if( !(m_alpha_[i] == 0 && m_alpha[i] == m_C * m_data.instance(i).weight()) ){ throw new Exception("Fatal error! I3 contains an incorrect indice."); } } for (int i = 0; i < test.length; i++){ if(!test[i]){ throw new Exception("Fatal error! indice " + i + " doesn't belong to any set."); } } } /** * Debuggage function * Checks that : * alpha*alpha_=0 * sum(alpha[i] - alpha_[i]) = 0 */ protected void checkAlphas() throws Exception{ double sum = 0; for(int i = 0; i < m_alpha.length; i++){ if(!(0 == m_alpha[i] || m_alpha_[i] == 0)){ throw new Exception("Fatal error! Inconsistent alphas!"); } sum += (m_alpha[i] - m_alpha_[i]); } if(sum > 1e-10){ throw new Exception("Fatal error! Inconsistent alphas' sum = " + sum); } } /** * Debuggage function. * Display the current status of the program. * @param i1 the first current indice * @param i2 the second current indice */ protected void displayStat(int i1, int i2) throws Exception { System.err.println("\n-------- Status : ---------"); System.err.println("\n i, alpha, alpha'\n"); for(int i = 0; i < m_alpha.length; i++){ double result = (m_bLow + m_bUp)/2.0; for (int j = 0; j < m_alpha.length; j++) { result += (m_alpha[j] - m_alpha_[j]) * m_kernel.eval(i, j, m_data.instance(i)); } System.err.print(" " + i + ": (" + m_alpha[i] + ", " + m_alpha_[i] + "), " + (m_data.instance(i).classValue() - m_epsilon) + " <= " + result + " <= " + (m_data.instance(i).classValue() + m_epsilon)); if(i == i1){ System.err.print(" <-- i1"); } if(i == i2){ System.err.print(" <-- i2"); } System.err.println(); } System.err.println("bLow = " + m_bLow + " bUp = " + m_bUp); System.err.println("---------------------------\n"); } /** * Debuggage function * Compute and display bLow, lUp and so on... */ protected void displayB() throws Exception { //double bUp = Double.NEGATIVE_INFINITY; //double bLow = Double.POSITIVE_INFINITY; //int iUp = -1, iLow = -1; for(int i = 0; i < m_data.numInstances(); i++){ double Fi = m_data.instance(i).classValue(); for(int j = 0; j < m_alpha.length; j++){ Fi -= (m_alpha[j] - m_alpha_[j]) * m_kernel.eval(i, j, m_data.instance(i)); } System.err.print("(" + m_alpha[i] + ", " + m_alpha_[i] + ") : "); System.err.print((Fi - m_epsilon) + ", " + (Fi + m_epsilon)); double fim = Fi - m_epsilon, fip = Fi + m_epsilon; String s = ""; if (m_I0.contains(i)){ if ( 0 < m_alpha[i] && m_alpha[i] < m_C * m_data.instance(i).weight()){ s += "(in I0a) bUp = min(bUp, " + fim + ") bLow = max(bLow, " + fim + ")"; } if ( 0 < m_alpha_[i] && m_alpha_[i] < m_C * m_data.instance(i).weight()){ s += "(in I0a) bUp = min(bUp, " + fip + ") bLow = max(bLow, " + fip + ")"; } } if (m_I1.contains(i)){ s += "(in I1) bUp = min(bUp, " + fip + ") bLow = max(bLow, " + fim + ")"; } if (m_I2.contains(i)){ s += "(in I2) bLow = max(bLow, " + fip + ")"; } if (m_I3.contains(i)){ s += "(in I3) bUp = min(bUp, " + fim + ")"; } System.err.println(" " + s + " {" + (m_alpha[i]-1) + ", " + (m_alpha_[i]-1) + "}"); } System.err.println("\n\n"); } /** * Debuggage function. * Checks if the equations (6), (8a), (8b), (8c), (8d) hold. * (Refers to "Improvements to SMO Algorithm for SVM Regression".) * Prints warnings for each equation which doesn't hold. */ protected void checkOptimality() throws Exception { double bUp = Double.POSITIVE_INFINITY; double bLow = Double.NEGATIVE_INFINITY; int iUp = -1, iLow = -1; for(int i = 0; i < m_data.numInstances(); i++){ double Fi = m_data.instance(i).classValue(); for(int j = 0; j < m_alpha.length; j++){ Fi -= (m_alpha[j] - m_alpha_[j]) * m_kernel.eval(i, j, m_data.instance(i)); } double fitilde = 0, fibarre = 0; if(m_I0.contains(i) && 0 < m_alpha[i] && m_alpha[i] < m_C * m_data.instance(i).weight()){ fitilde = Fi - m_epsilon; fibarre = Fi - m_epsilon; } if(m_I0.contains(i) && 0 < m_alpha_[i] && m_alpha_[i] < m_C * m_data.instance(i).weight()){ fitilde = Fi + m_epsilon; fibarre = Fi + m_epsilon; } if(m_I1.contains(i)){ fitilde = Fi - m_epsilon; fibarre = Fi + m_epsilon; } if(m_I2.contains(i)){ fitilde = Fi + m_epsilon; fibarre = Double.POSITIVE_INFINITY; } if(m_I3.contains(i)){ fitilde = Double.NEGATIVE_INFINITY; fibarre = Fi - m_epsilon; } if(fibarre < bUp){ bUp = fibarre; iUp = i; } if(fitilde > bLow){ bLow = fitilde; iLow = i; } } if(!(bLow <= bUp + 2 * m_tol)){ System.err.println("Warning! Optimality not reached : inequation (6) doesn't hold!"); } boolean noPb = true; for(int i = 0; i < m_data.numInstances(); i++){ double Fi = m_data.instance(i).classValue(); for(int j = 0; j < m_alpha.length; j++){ Fi -= (m_alpha[j] - m_alpha_[j]) * m_kernel.eval(i, j, m_data.instance(i)); } double Ei = Fi - ((m_bUp + m_bLow) / 2.0); if((m_alpha[i] > 0) && !(Ei >= m_epsilon - m_tol)){ System.err.println("Warning! Optimality not reached : inequation (8a) doesn't hold for " + i); noPb = false; } if((m_alpha[i] < m_C * m_data.instance(i).weight()) && !(Ei <= m_epsilon + m_tol)){ System.err.println("Warning! Optimality not reached : inequation (8b) doesn't hold for " + i); noPb = false; } if((m_alpha_[i] > 0) && !(Ei <= -m_epsilon + m_tol)){ System.err.println("Warning! Optimality not reached : inequation (8c) doesn't hold for " + i); noPb = false; } if((m_alpha_[i] < m_C * m_data.instance(i).weight()) && !(Ei >= -m_epsilon - m_tol)){ System.err.println("Warning! Optimality not reached : inequation (8d) doesn't hold for " + i); noPb = false; } } if(!noPb){ System.err.println(); //displayStat(-1,-1); //displayB(); } } }