(ns code.learners.nb.nb (:use (code.utils utils nway EqualFrequencyBinning)) (:use (code.learners.kmeans k_means_model k_means_classifier)) (:use (incanter core stats))) (defn nb-table [D] (let [tbl (tabulate D) classes (last (tbl :levels))] {:N (nrow D) :classes classes :nclasses (count classes) :classi (- (tbl :n-vars) 1) :gclasses (map #(vector %1 %2) classes (group-by D (- (ncol D) 1)))})) (defn f ([tbl class1 & options] (let [opts (if options (apply assoc {} options) nil) value (if (:value1 opts) (:value1 opts) nil) att (if (:attribute opts) (:attribute opts) nil)] (if (= opts nil) (nrow (second (first (filter #(= class1 (first %)) (tbl :gclasses))))) (let [iclass (second (first (filter #(= class1 (first %)) (tbl :gclasses))))] (if (= iclass nil) 0 (nrow (filter #(= (nth % att) value) (if (= (nrow iclass) 1) [iclass] iclass))))))))) (defn nb-classifier [tbl classes one m k] (let [nclasses (tbl :nclasses) n1 (tbl :N) j 2.30258509 classi (tbl :classi) gclasses (tbl :gclasses) like (Integer/MIN_VALUE) classification (first classes)] (loop [c classes result (transient [])] (if (empty? c) (reverse (sort-by second (persistent! result))) (recur (rest c) (conj! result (let [prior (/ (+ (f tbl (first c)) k) (+ n1 (* k nclasses))) tmp prior] ;tmp (/ (log prior) j)] (loop [i 0 results (transient [])] (if (= i (count one)) [(first c) (apply * (conj (persistent! results) tmp))] (recur (inc i) (conj! results (let [delta (/ (+ (f tbl (first c) :value1 (nth one i) :attribute i) (* m prior)) (+ (f tbl (first c)) m))] delta)))))))))))) ; (/ (log delta) j))))))))))))) (defn nb [n1 D] (loop [i 0 d (second D) results (transient [])] (if (empty? d) (let [ans (persistent! results) ans1 (apply bind-rows (map matrix (org-data (apply vector (apply concat ans))))) ans2 (map quantile (overall ans1))] (vector 'nb (first D) (first ans2) (second ans2))) ; (apply bind-rows (map matrix (org-data (apply vector (apply concat (persistent! results)))))) (recur (inc i) (rest d) (conj! results (let [classes ((nb-table (discretizer (first d))) :classes) trainer (fn [data] (nb-table data)) tester (fn [one model] (vector (last one) (first (first (nb-classifier model classes (butlast one) 2 1)))))] (map #(abcd-stats (first %) (second %) classes) (nway n1 (discretizer (first d)) trainer tester))))))))