(ns code.experiments.prep (:use (code.utils utils)) (:use (code.learners.kmeans k_means_classifier k_means_model)) (:use (code.data data standard_data)) (:use (incanter core stats))) (def numeric-datasets [(vector 4 paint3) (vector 4 paint5) (vector 4 paint10) (vector 4 paint20) (vector 4 paint37) ; (vector 4 iris) (vector 4 wind204)]) (def cat-datasets [(vector 4 audio) ;0 (vector 4 bc) ;1 (vector 4 heart) ;2 (vector 4 lym) ;3 (vector 4 pima) ;4 (vector 4 bc-fss) ;5 (vector 4 heart-fss) ;6 (vector 4 lym-fss) ;7 (vector 4 pima-fss) ;8 (vector 4 lym5-fss) ;9 (vector 4 lym6-fss) ;10 (vector 4 lym8-fss) ;11 (vector 4 lym10-fss) ;12 (vector 4 tumor) ;11 (vector 4 splice)]) ;12 (def ndatasets (map #(vector (first %) (make-random-data (second %))) numeric-datasets)) (def cdatasets (map #(vector (first %) (make-random-data (second %))) cat-datasets)) (def cat cat-euclidean-distance) (def numeric numeric-euclidean-distance!) (defn exp1 [k data distance] (let [clusters (k-means-model k (shuffle data) distance)] (loop [i 0 cluster clusters results (transient [])] (if (empty? cluster) (matrix (apply concat (persistent! results))) (recur (inc i) (rest cluster) (conj! results (let [answer (bind-columns (first cluster) (matrix (repeat (nrow (first cluster)) i)))] (if (= (nrow answer) 1) [answer] answer))))))))