(ns code.utils.EqualFrequencyBinning (:use (code.utils utils)) (:use (incanter core stats))) (def OUT []) (def N 1) (declare sane-numbers) (defn breaks1 [x l b4 max-size] (when-not (= x b4) (when (>= N max-size) (def N 0) (def OUT (conj OUT x)))) (cond (not-empty l) (and (def N (inc N)) (breaks1 (first l) (rest l) x max-size)) true (if (not (member? x OUT)) (def OUT (conj OUT x))))) (defn efb1 [x break-at] (if (= x '?) x (if (<= x (first break-at)) (first break-at) (efb1 x (rest break-at))))) (defn breaks [l max-size] (breaks1 (first l) (rest l) 'nil max-size) (let [result OUT] (def OUT []) ;reset (def N 1) ;reset result)) (defn efb ([data] (efb data 10)) ([data nb] ;nb = no. of bins (let [numbers (sane-numbers data) want (Math/round (/ (count numbers) (* nb 1.0))) break-at (breaks numbers want) new (map (fn [datum] (efb1 datum break-at)) data)] new))) (defn efb2 ([data] (efb1 data 10)) ([data n] (let [dat (Transpose data)] (loop [d dat result []] (if (empty? d) result (recur (rest d) (conj result (efb (first d) n)))))))) (defn sane-numbers [data] (sort (filter #(number? %) data))) (defn discretize [col] (let [sortcol (sort col) bincol (efb sortcol) mush (map #(vector %1 %2) sortcol bincol)] (loop [c col result (transient [])] (if (empty? c) (persistent! result) (recur (rest c) (conj! result (second (first (filter #(= (first c) (first %)) mush))))))))) (defn discretizer [data] (matrix (Transpose (map discretize (Transpose data)))))