(ns code.utils.utils (:use (incanter core stats charts))) ;---------Distance Functions------------------------ ;Distance for discrete data (defn EuclideanDistance [[Oi Oj _]] (let [k (if (= (last Oi) (last Oj)) 0 1)] (Math/sqrt (apply + (map #(Math/pow % 2) (conj (map - (butlast Oi) (butlast Oj)) k )))))) (defn EuclideanDistance2 [[Oi Oj _]] (Math/sqrt (apply + (map #(if (= %1 %2) 0 1) Oi Oj)))) (def memo-EuclideanDistance (memoize EuclideanDistance)) ;Distance for continuous data (defn EuclideanDistance1 [[Oi Oj _]] ; (let [k (if (= (last Oi) (last Oj)) 0 1)] (Math/sqrt (apply + (map #(Math/pow % 2) (map - Oi Oj) )))) ;Euclidean Distance between projections (defn ProjectionED [[Oi Oj xi xj]] (Math/sqrt (- (Math/pow (EuclideanDistance [Oi Oj]) 2) (Math/pow (- xi xj) 2)))) (defn numeric-euclidean-distance! [one two] (if (= (ncol one) (ncol two)) (let [on1 (last one) tw2 (last two) c1 (Math/pow (if (= on1 tw2) 0 1) 2)] (loop [di (transient []) i 0] (if (> (- (count one) 1) i) (recur (conj! di (Math/pow (- (nth one i) (nth two i)) 2)) (inc i)) (Math/sqrt (apply + (conj (persistent! di) c1)))))) (loop [di (transient []) i 0] (if (< i (min (count one) (count two))) (recur (conj! di (Math/pow (- (nth one i) (nth two i)) 2)) (inc i)) (Math/sqrt (apply + (persistent! di))))))) (defn cat-euclidean-distance [one two] (loop [i 0 result (transient [])] (if (>= i (min (count one) (count two))) (Math/sqrt (apply + (persistent! result))) (recur (inc i) (conj! result (if (= (nth one i) (nth two i)) 0 1)))))) ;--------------------------------------------------- (defn mean1 [lst] (map #(/ % (count lst)) (apply #'map (cons #'+ lst)))) (defn FindPos [val lst counter] (if (= (not-empty lst) nil) counter (if (= val (first lst)) (inc counter) (FindPos val (rest lst) (inc counter))))) (defn Transpose [data] (apply vector (apply #'map (cons #'vector data)))) ;----------compress---------------------- (defn n-elts [elt n] (if (> n 1) (list n elt) (list 1 elt))) (defn compr [elt n lst] (if (empty? lst) (list (n-elts elt n)) (let [after (first lst)] (if (= after elt) (compr elt (+ n 1) (rest lst)) (cons (n-elts elt n) (compr after 1 (rest lst))))))) (defn compress [lst] (if (or (= (count lst) 0) (= (count lst) 1)) lst (compr (first lst) 1 (rest lst)))) (defn unique-compress [coll] ; coll = (compress coll) (map #(second %) coll)) ;------------------------------------------- ;----------- Used by seheult and grove------ (defn nr [minimum maximum] (+ minimum (rand (- maximum minimum)))) ;------------------------------------------ ;-------------Normalize-------------- (def *norm* []) (defn normalize [data] (doseq [one (Transpose data)] (def *norm* (conj *norm* (map #(/ % (apply max one)) one)))) (let [result *norm*] (def *norm* []) (apply vector (map #(apply vector %) (Transpose result))))) ;---------------------------------------------------------- ;-------------Rank------------------------------- (defn stat-rank [col] (let [len (count col) ordinal (map #(vector %1 %2) (sort col) (range 1 (+ len 1)))] (loop [r ordinal result (transient [])] (if (empty? r) (apply vector (apply concat (persistent! result))) (recur (if (= (first (first r)) (first (second r))) (rest (rest r)) (rest r)) (conj! result (if (= (first (first r)) (first (second r))) (let [idx1 (/ (+ (second (first r)) (second (second r))) 2.0) ans [idx1 idx1]] ans) [(second (first r))]))))))) ;------------------------------------------------- (defn tab [n] (dotimes [_ n] (print " "))) (defn print-tree ([tree] (print-tree tree 0)) ([tree depth] (println "") (tab (+ depth 1)) (println (first tree)) (let [subtrees (rest tree) c1 (count subtrees)] (loop [subtree subtrees c 0] (tab (+ depth 1)) (print " " '= (first (first subtree))) (if (not (vector? (second (first subtree)))) (println " " '=> (second (first subtree))) (print-tree (second (first subtree)) (+ depth 5))) (if (>= c (- c1 1)) 'done (recur (rest subtree) (inc c))))))) (defn member? [item lst] (if (empty? lst) 'nil (if (= item (first lst)) true (member? item (rest lst))))) (defn shuffle1 [X] (let [len (count X)] (loop [rand-indices []] (let [rdi (rand-int len)] (if (= (count (remove #(= 'nil %) rand-indices)) len) (let [ri (remove #(= 'nil %) rand-indices)] (matrix (map #(nth X %) ri))) (recur (conj rand-indices (if (not (member? rdi rand-indices)) rdi)))))))) (defn shuffle0 [X] (loop [i 0 result (transient [])] (if (= i (count X)) (persistent! result) (recur (inc i) (conj! result (rand-int (count X))))))) (defn shuffle [X] (let [idx (shuffle0 X) bcols (bind-columns idx X) scols (matrix (sort-by first (filter #(> (ncol %) 0) bcols))) Xcols (sel scols :cols (range 1 (ncol scols)))] Xcols)) (defn acc [want got] (/ (count (filter #(not= nil %) (map #(if (= %1 %2) 1) want got))) (* 1.0 (count want)))) (defn abcd [want got goal] (loop [w want g got d 0 c 0 b 0 a 0] (if (empty? w) [a b c d] (recur (rest w) (rest g) (if (= (first g) goal) (if (= (first w) goal) (inc d) d) d) (if (= (first g) goal) (if (not= (first w) goal) (inc c) c) c) (if (not= (first g) goal) (if (= (first w) goal) (inc b) b) b) (if (not= (first g) goal) (if (not= (first w) goal) (inc a) a) a))))) (defn abcd-stat [want got goal] (let [abcds (abcd want got goal) a (nth abcds 0) b (nth abcds 1) c (nth abcds 2) d (nth abcds 3) pd (if (= d 0) 0.0 (* 100.0 (/ d (+ b d)))) pf (if (= c 0) 0.0 (* 100.0 (/ c (+ a c))))] ; acc (format "%5.2f" (* 1.0 (/ (+ a d) (+ a b c d)))) ; pd (format "%5.2f" (if (= d 0) 0.0 (* 1.0 (/ d (+ b d))))) ; pf (format "%5.2f" (if (= c 0) 0.0 (* 1.0 (/ c (+ a c))))) ; prec (format "%5.2f" (if (= d 0) 0.0 (* 1.0 (/ d (+ c d)))))] [pd pf])) ; (println {:FOR goal :A a :B b :C c :D d :ACC acc :PD pd :PF pf :PREC prec}))) ; (println ':FOR goal ':A a ':B b ':C c ':D d ':ACC acc ':PD pd ':PF pf ':PREC prec))) (defn abcd-stats [want got goals] (loop [g goals result []] (if (empty? g) result (recur (rest g) (conj result (abcd-stat want got (first g))))))) (defn abcd-stat1 [want got goal] (let [abcds (abcd want got goal) a (nth abcds 0) b (nth abcds 1) c (nth abcds 2) d (nth abcds 3) pd (if (= d 0) 0.0 (* 100.0 (/ d (+ b d)))) prec (if (= d 0) 0.0 (* 1.0 (/ d (+ c d))))] ; acc (format "%5.2f" (* 1.0 (/ (+ a d) (+ a b c d)))) ; pd (format "%5.2f" (if (= d 0) 0.0 (* 1.0 (/ d (+ b d))))) ; pf (format "%5.2f" (if (= c 0) 0.0 (* 1.0 (/ c (+ a c))))) ; prec (format "%5.2f" (if (= d 0) 0.0 (* 1.0 (/ d (+ c d)))))] [pd pd])) ; (println {:FOR goal :A a :B b :C c :D d :ACC acc :PD pd :PF pf :PREC prec}))) ; (println ':FOR goal ':A a ':B b ':C c ':D d ':ACC acc ':PD pd ':PF pf ':PREC prec))) (defn abcd-stats1 [want got goals] (loop [g goals result []] (if (empty? g) result (recur (rest g) (conj result (abcd-stat1 want got (first g))))))) (defn ug1 [D] (let [C (apply vector (sort (last (Transpose D))))] (loop [c C result (transient [(first C)])] (let [elt (first c)] (if (empty? c) (persistent! result) (recur (rest c) (conj! result (if (not= elt (second c)) (second c))))))))) (defn unique-goals [D] (remove #(= 'nil %) (ug1 D))) (defn k-majority [col] (second (first (reverse (sort-by first (compress (sort col))))))) (defn centroid [cluster] (if (= (nrow cluster) 1) cluster (let [t-cluster (Transpose cluster) C (matrix cluster) C-noclass (sel C :cols (range 0 (- (ncol C) 1))) klass (matrix [(k-majority (last t-cluster))]) ans1 (bind-columns (div (reduce plus C-noclass) (count cluster)) klass) ans2 (if (= (count ans1) 1) (first ans1) ans1)] ans2))) (defn get-nearest [one data distance] (first (sort-by second (pmap #(vector % (distance one %)) data)))) (defn get-nearest-distance [one data distance] (second (first (sort-by second (pmap #(vector % (distance one %)) data))))) (defn get-nearest-distances [one data distance] (map second (sort-by second (pmap #(vector % (distance one %)) data)))) (defn overall [data] (loop [i 0 pds [] pfs []] (if (= i (nrow data)) (matrix [(remove #(= 'none %) (apply concat pfs)) (remove #(= 'none %) (apply concat pds))]) (recur (inc i) (conj pds (if (odd? i) (nth data i) ['none])) (conj pfs (if (even? i) (nth data i) ['none])))))) (defn get-median [col] (nth (quantile col) 2)) (defn fchart [data begin finish x y] (doto (line-chart (sel data :cols 1) (sel data :cols 2) :group-by (sel data :cols 0) :title "Finding Best Number of Features" :legend true :y-label y :x-label x) (set-y-range begin finish) (set-theme :dark) view )) (defn brittle-rank [data begin finish x y] (doto (line-chart (sel data :cols 1) (sel data :cols 2) :group-by (sel data :cols 0) :title "Finding Best Number of Features" :legend true :y-label y :x-label x) (set-y-range begin finish) (set-theme :dark) view )) (defn brittle-chart [data] (doto (bar-chart (sel data :cols 0) (sel data :cols 1) ; :group-by (sel data :cols 1) :title "Brittlness vs. Number of Clusters" ; :legend true :x-label "cluster#" :y-label "brittleness") view clear-background ) ) (defn brittle-chart1 [data] (doto (bar-chart (sel data :cols 1) (sel data :cols 2) :group-by (sel data :cols 0) :title "PD/PF vs. Brittleness" :legend true :x-label "brittleness" :y-label "pd/pf") view clear-background ; (add-text 0 97 "Setosa") ) ) (defn file-result [filename results] (binding [*out* (java.io.FileWriter. filename)] (loop [rlt results ans ""] (if (empty? rlt) (println 'done) (recur (rest rlt) (println (first rlt))))) (flush))) (defn search-distribution [distribution, distance] (loop [i distribution result 0] (if (empty? i) result (recur (rest i) (if (> (first (first i)) distance) (- (second (first i)) 1) result))))) (defn all-distances-between-points [data distance] (loop [dat data result []] (if (empty? dat) (let [answer (matrix (sort (apply concat result))) idx (matrix (range 0 (count answer)))] (bind-columns idx answer)) (recur (rest dat) (conj result (loop [dt (rest dat) ans []] (if (empty? dt) ans (recur (rest dt) (conj ans (distance (first dat) (first dt))))))))))) (defn org-data [list-of-lists] (let [org1 (Transpose list-of-lists) org2 (loop [o org1 results (transient [])] (if (empty? o) (persistent! results) (recur (rest o) (conj! results (Transpose (first o))))))] org2)) (defn make-random-data [D] (loop [i 0 results (transient [])] (if (= i 5) (persistent! results) (recur (inc i) (conj! results (shuffle D))))))