;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;------------------------------- ; MESO: the transient version ;------------------------------- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (ns forensics.mesov4 (:use (forensics utils)) (:use (incanter core))) ;------------------- ; Utils ;------------------- (defn numeric-euclidean-distance! [one two] (let [on1 (last one) tw2 (last two) c1 (Math/pow (if (= on1 tw2) 0 1) 2)] (loop [di [] i 0 j 0] (if (> (- (count one) 1) i) (recur (conj di (Math/pow (- (nth one i) (nth two j)) 2)) (inc i) (inc j)) (Math/sqrt (apply + (conj di c1))))))) (declare nearest!) (defn find-nearest-sphere! [instance spheres distance-fnc & options] (let [opts (if options (apply assoc {} options) nil) dis (if (:dis opts) true false)] (loop [li (transient []) i 0] (if (< i (count spheres)) (recur (conj! li [(distance-fnc instance (nth (nth spheres i) 0)) (nth spheres i)]) (inc i)) (if (= dis true) (nearest! li) (nth (nearest! li) 1)))))) (defn nearest! [li] (loop [j 0 max [Integer/MAX_VALUE]] (if (< j (count li)) (recur (inc j) (if (< (nth max (- (count max) 1)) (nth (nth li j) 0)) max (conj max (nth (nth li j) 0)))) (loop [small (nth max (- (count max) 1)) k 0] (if (= small (nth (nth li k) 0)) (nth li k) (recur small (inc k))))))) (defn remove-sphere! [ns new-s spheres] (loop [n 0 new-spheres (transient [])] (if (< n (count spheres)) (recur (inc n) (if (not (= (nth spheres n) ns )) (conj! new-spheres (nth spheres n)) new-spheres)) (conj! new-spheres new-s)))) (defn add-instance [sphere instance] (bind-rows (matrix (second sphere)) (matrix [instance]))) (declare get-class) (defn find-centroid [instances] (let [centroid (apply vector (butlast (map #(/ % (count instances)) (map #(apply + %) (Transpose instances))))) klass (get-class [centroid instances] centroid)] (conj centroid klass))) ;---------------------------------- ; Activation functions (f) - 7 candidates (6 done) ; v = compression rate - fraction of patterns removed during compression. ; c = a configuration parameter in the range [0. 1.0] ; r = spheres / patterns ;---------------------------------- (defn f1 [c r] (+ (/ 1 2) (/ (Math/tanh (- (/ (* 3 r) c) 3)) 2))) ;----------------------- ; f6 for compression ;----------------------- (defn f6 [v c r] (+ (/ 1 2) (/ (Math/tanh (- (/ (* 3 r) (max v c)) 3)) 2))) ;----------------------------------- ; Grow functions & ; d = distance between the new pattern and the nearest sensitivity sphere ; distance& = current distance ; f = activation function ; grow& = f without compression ; grow&-c = f with compression ;----------------------------------- (defn grow& [[d distance& f c r _]] (/ (* (- d distance&) (/ distance& d) (f c r)) (+ 1 (Math/log (Math/pow (+ (- d distance&) 1) 2))))) (defn grow&-c [[d distance& f c r v]] (/ (* (- d distance&) (/ distance& d) (f c r v)) (+ 1 (Math/log (Math/pow (+ (- d distance&) 1) 2))))) ;-------------------------------- ; Sensitivity Sphere Creation ;-------------------------------- (defn sensitivity-sphere-creation " Argument: D : dataset Options: :distance-fnc (default numeric-euclidean-distance!) :activation-fnc (default f1) :grow-fnc (default grow&) :c (default 0.6) :v (default 3) for compression Examples: (use '(incanter core stats io datasets)) (use '(forensics utils mesov4 row_pruning EqualFrequencyBinning)) (def iris (to-matrix (get-dataset :iris))) (ssc iris) (ssc iris :c 0.7 :v 8) " ([D & options] (let [opts (if options (apply assoc {} options) nil) d (if (:distance-fnc opts) (:distance-fnc opts) numeric-euclidean-distance!) f (if (:activation-fnc opts) (:activation-fnc opts) f1) g (if (:grow-fnc opts) (:grow-fnc opts) grow&) c (if (:c opts) (:c opts) 0.6) v (if (:v opts) (:v opts) 3) first-sphere [(first D) [(first D)]]] (loop [data D delta 0.0 spheres (transient [first-sphere])] (let [one (first data) s-dist (find-nearest-sphere! one spheres d :dis true) nearest-sphere (nth s-dist 1) distance (nth s-dist 0) r (/ (count spheres) (if (= (count data) 0) 1 (count data)))] (if (not (empty? data)) (recur (rest data) (if (<= distance delta) delta (if (= delta 0.0) (* distance (* c c)) (+ delta (g [distance delta f c r v])))) (if (<= distance delta) (let [new-instances (add-instance nearest-sphere one) new-centroid (find-centroid new-instances) new-sphere [new-centroid new-instances]] (remove-sphere! nearest-sphere new-sphere spheres)) (conj! spheres [one [one]]))) (persistent! spheres))))))) (defn ssc [D & options] (sensitivity-sphere-creation D)) ;------------------------------------------------------- ; Meso Tree Stucture for Training ;------------------------------------------------------- (def *subpartitions* []) (defn subpartitions [pivots nearest] (doseq [one pivots] (def *subpartitions* (conj *subpartitions* (apply vector (cons one (remove #(= nil %) (map #(if (= one (second %)) (first %)) nearest))))))) (let [result *subpartitions*] (def *subpartitions* []) result)) (defn create-partitions [spheres q d] (let [pivots (apply vector (take q spheres)) nearest (map #(list %1 %2) spheres (map #(find-nearest-sphere! (first %) pivots d) spheres)) subpartitions (subpartitions pivots nearest)] subpartitions)) (defn meso-tree " Argument: S : spheres Options: :distance-fnc (default numeric-euclidean-distance!) :q (default 8) only powers of 2 numbers (2 4 8) Examples: (use '(incanter core stats io datasets)) (use '(forensics utils mesov4 row_pruning EqualFrequencyBinning)) (def iris (to-matrix (get-dataset :iris))) (def iris-ssc (ssc iris)) (meso-tree iris-ssc) " ([S & options] (let [opts (if options (apply assoc {} options) nil) d (if (:distance-fnc opts) (:distance-fnc opts) numeric-euclidean-distance!) q (if (:q opts) (:q opts) 8)] (if (<= (count S) q) (vector 'a (apply vector S)) (let [partitions (create-partitions S q d)] (loop [part partitions T [] i 0] (if (>= i q) T (recur (apply vector (rest part)) (conj T (vector (first (first part)) (meso-tree (rest (first part)) :q q))) (inc i))))))))) ;----------------------------------------- ; Classifying ;----------------------------------------- (defn get-class1 [sphere instance] (let [s (second sphere) firstvalue (last (first s))] (if (= (count (filter #(= firstvalue (last %)) s)) (count s)) firstvalue (let [d (map #(numeric-euclidean-distance! instance %) s)] (second (first (reverse (compress (sort (last (Transpose (map second (take (if (< (count s) 4) (count s) (int (Math/sqrt (count s)))) (sort-by first (map #(vector %1 %2) d s))))))))))))))) (defn get-class [sphere instance] (let [s (second sphere) firstvalue (last (first s))] (if (= (count (filter #(= firstvalue (last %)) s)) (count s)) firstvalue (get-class (find-nearest-sphere! instance (ssc s) numeric-euclidean-distance!) instance)))) (defn classify2 [tree instance] "Classify one instance" (loop [tr tree] (if (= (first tr) 'a) ; (get-class (last (first (find-nearest-sphere! instance (second tr) numeric-euclidean-distance!))) ; instance) (recur (let [val (find-nearest-sphere! instance (apply vector (map #(first %) tr)) numeric-euclidean-distance!) branch (second (first (filter #(= val (first %)) tr)))] branch))))) (defn classify3 [tree instance] "Classify one instance" (loop [tr tree] (if (= (first tr) 'a) ; (get-class ; (last ; (first (find-nearest-sphere! instance (second tr) numeric-euclidean-distance!) ; instance) (recur (let [val (find-nearest-sphere! instance (apply vector (map #(first %) tr)) numeric-euclidean-distance!) branch (second (first (filter #(= val (first %)) tr)))] branch))))) (defn classify [trees instance] (loop [tr trees result []] (if (empty? tr) (last (first (find-nearest-sphere! instance result numeric-euclidean-distance!))) (recur (rest tr) (conj result (classify3 (first tr) instance)))))) (defn classify-all [trees I] (apply vector (map #(classify trees %) I))) ;----------------------------------------- ; Training and Testing data ;----------------------------------------- (defn meso-acc " Options: :distance-fnc (default numeric-euclidean-distance!) :q (default 8) only powers of 2 numbers (2 4 8) :n (default 10) number of folds :activation-fnc (default f1) :grow-fnc (default grow&) :c (default 0.6) :v (default 3) for compression " ([fold F & options] (let [opts (if options (apply assoc {} options) nil) d (if (:distance-fnc opts) (:distance-fnc opts) numeric-euclidean-distance!) q (if (:q opts) (:q opts) 8) n (if (:n opts) (:n opts) 10) f (if (:activation-fnc opts) (:activation-fnc opts) f1) g (if (:grow-fnc opts) (:grow-fnc opts) grow&) c (if (:c opts) (:c opts) 0.6) v (if (:v opts) (:v opts) 3) matf (matrix fold) col (ncol matf) want (sel matf :cols (- col 1)) test (sel matf :cols (range 0 col)) train (apply vector (apply concat (filter #(not= fold %) F))) spheres (ssc train :d d :q q :n n :f f :g g :c c :v v) tree (meso-tree spheres :q q) got (classify-all tree test) accuracy (acc want got) acc-detect (abcd-stats want got (unique-goals (concat train fold)))] [accuracy acc-detect]))) (defn meso " Argument: D : dataset Options: :distance-fnc (default numeric-euclidean-distance!) :q (default 8) only powers of 2 numbers (2 4 8) for tree :n (default 10) number of folds :activation-fnc (default f1) :grow-fnc (default grow&) :c (default 0.6) :v (default 3) for compression Examples: (use '(incanter core stats io datasets)) (use '(forensics utils mesov4 row_pruning EqualFrequencyBinning)) (def iris (to-matrix (get-dataset :iris))) (meso iris) " ([D & options] (let [opts (if options (apply assoc {} options) nil) d (if (:distance-fnc opts) (:distance-fnc opts) numeric-euclidean-distance!) q (if (:q opts) (:q opts) 8) n (if (:n opts) (:n opts) 10) f (if (:activation-fnc opts) (:activation-fnc opts) f1) g (if (:grow-fnc opts) (:grow-fnc opts) grow&) c (if (:c opts) (:c opts) 0.6) v (if (:v opts) (:v opts) 3) F (folds D n) stats (loop [f F result []] (if (empty? f) result (recur (rest f) (conj result (meso-acc (first f) F :d d :q q :n n :f f :g g :c c :v v))))) accuracy (first (Transpose stats)) t-acc (/ (apply + accuracy) (count accuracy)) pds (reduce plus (matrix (second (Transpose stats))))] (println 'Total 'accuracy t-acc) (dotimes [one (count pds)] (println 'For (nth (unique-goals D) one) 'pd (nth pds one)))))) ;--------------------------------------------- (defn get-train-test [f D] ; (let [f (folds D 10)] {:test f :train (prototype-sort D)}) (defn pro-tree1 [D-sorted] (loop [d D-sorted result []] (if (empty? d) result (recur (rest d) (conj result (ssc (first d))))))) (defn pro-tree [prototypes] (loop [p prototypes result []] (if (empty? p) result (recur (rest p) (conj result (meso-tree (first p))))))) (defn prototype-trainer [fold D] (let [tt (get-train-test fold D) test (tt :test) train (tt :train) trees (pro-tree (pro-tree1 train))] {:test test :trees trees})) (defn pro-acc [Dmap] (let [test (:test Dmap) trees (:trees Dmap) new-test (sel test :cols (range 0 (ncol test))) want (trans (sel test :cols (- (ncol test) 1))) got (classify-all trees new-test) accuracy (acc want got)] accuracy)) (defn knn-meso [D] (let [F (folds D 10)] (loop [f F results []] (if (empty? f) (/ (apply + results) (count results)) (recur (rest f) (conj results (let [train (prototype-trainer (first f) (matrix (apply concat (filter #(not= (first f) %) F)))) result (pro-acc train)] result))))))) ; (let [train (prototype-trainer D) ; result (pro-acc train)] ; result))