(ns code.learners.knn.knn_classifier (:use (code.utils utils)) (:use (incanter core stats))) (defn knn-classifier [one kn D distance] (if (= (nrow D) 1) (last D) (let [k-nearest (map second (take kn (sort-by first (map #(vector (distance one %) %) D)))) klass (if (= kn 1) [(last (first k-nearest)) (last (first k-nearest))] (last (Transpose k-nearest))) classification (k-majority klass)] classification))) (defn find-ranks [data] (let [idx-data (map #(vector %1 %2) (range 1 (+ 1 (count data))) (sort data)) compressed-data (compress (sort data)) ranks (loop [i 0 idx idx-data result []] (if (empty? idx) (apply vector (apply concat result)) (recur (inc i) (let [num (first (nth compressed-data i))] (drop num idx)) (conj result (let [nums (first (nth compressed-data i))] (repeat nums (/ (apply + (map first (take nums idx))) (* 1.0 nums))))))))] ranks)) (defn brittleness [testdata data model distance] (let [together (if (= (nrow model) 1) model (group-by model (- (ncol data) 1)))] (loop [td testdata results (transient [])] (if (empty? td) (persistent! results) (recur (rest td) (conj! results (let [together1 (if (= (nrow together) 1) (if (not= (last (first td)) (last together)) together [nil]) (filter #(not= (last (first td)) (last (first (if (= (nrow %) 1) [%] %)))) together)) all-test-nearest (if (= (first together1) nil) 2.5 (first (sort (map #(get-nearest-distance (first td) (if (= (nrow %) 1) [%] %) distance) together1))))] all-test-nearest))))))) (defn brittleness! [testdata data model distance] (let [together (if (= (nrow model) 1) model (group-by model (- (ncol data) 1)))] (loop [td testdata results (transient [])] (if (empty? td) (persistent! results) (recur (rest td) (conj! results (let [together1 (if (= (nrow together) 1) (if (= (last (first td)) (last together)) together [nil]) (filter #(= (last (first td)) (last (first (if (= (nrow %) 1) [%] %)))) together)) all-test-nearest (if (= (first together1) nil) 2.5 (first (sort (map #(get-nearest-distance (first td) (if (= (nrow %) 1) [%] %) distance) together1))))] all-test-nearest))))))) ;Find distance of nearest unlikely neighbor (defn dNUN [one model distance] (let [lst (sort-by second (map #(vector % (distance one %)) model)) dnun (loop [l lst result nil] (if (not= result nil) result (recur (rest l) (if (not= (last one) (last (first (first l)))) (second (first l)) 2.5))))] dnun)) (defn brittleness1 [testdata data model distance] (map #(dNUN % model distance) testdata))