;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; LDA and QDA classifiers from chapter 4 of Elements of Statistical Learning
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; LDA 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(use '(incanter core stats charts io))

(def training (to-matrix 
                (read-dataset "http://bit.ly/464h4h" 
                              :header true)))
(def testing (to-matrix 
               (read-dataset "http://bit.ly/1btCei" 
                             :header true)))


(def K 11)
(def p 10)
(def N (nrow training))
(def group-counts (map nrow (group-by training 1)))

;; estimate the prior probabilities for each cluster
(def prior-probs (div group-counts N))

;; estimate the centroids for each cluster
(def cluster-centroids 
  (matrix 
    (for [x_k (group-by training 1 :cols (range 2 12))]
      (map mean (trans x_k)))))


;; estimate the covariance matrix to be used for all clusters
(def cluster-cov-mat 
  (let [groups (group-by training 1 :cols (range 2 12))]
    (reduce plus
      (map (fn [group centroid n]
        (reduce plus 
                (map #(div 
                        (mmult (trans (minus % centroid))
                               (minus % centroid))
                        (- N K))
                     group)))
             groups cluster-centroids group-counts))))

;; calculate the inverses of the cluster covariance matrices
(def inv-cluster-cov-mat (solve cluster-cov-mat))


;; define the linear discriminant function (ldf)
(defn ldf [x Sigma-inv mu_k pi_k]
  (+ (mmult x Sigma-inv (trans mu_k))
     (- (mult 1/2 (mmult mu_k Sigma-inv (trans mu_k))))
     (log pi_k)))


;; define a function to calculate the linear quadratic scores.
(defn calculate-scores
  ([data inv-cov-mat centroids priors]
    (matrix 
      (pmap (fn [row]
             (pmap (partial ldf row inv-cov-mat) 
                   centroids 
                   priors))
           (sel data :cols (range 2 12))))))

;; calculate the scores for the training data
(def training-lda-scores 
  (calculate-scores training
                    inv-cluster-cov-mat
                    cluster-centroids
                    prior-probs))


;; calculate the scores for the testing data
(def testing-lda-scores 
  (calculate-scores testing
                    inv-cluster-cov-mat
                    cluster-centroids
                    prior-probs))

;;(bind-columns (sel training :cols 1) (plus 1 (map max-index training-lda-scores)))
;;(bind-columns (sel testing :cols 1) (plus 1 (map max-index testing-lda-scores)))

(defn max-index 
  "Returns the index of the maximum value in the given sequence."
  ([x] 
    (let [max-x (reduce max x)
          n (length x)]
      (loop [i 0]
        (if (= (nth x i) max-x)
          i
          (recur (inc i)))))))


;; define a function to calculate the error rate
(defn error-rate [data scores] 
  (/ (sum (map #(if (= %1 %2) 0 1) 
               (sel data :cols 1) 
               (plus 1 (map max-index scores))))
     (nrow data)))


;; calculate the error rate for the training data (0.316)
(error-rate training training-lda-scores)


;; calculate the error rate for the testing data (0.56)
(error-rate testing testing-lda-scores)


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; QDA 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(use '(incanter core stats charts io))

;(def training (to-matrix (read-dataset "/Users/dliebke/Desktop/esl/vowel.train.txt" :header true)))
(def training (to-matrix 
                (read-dataset "http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/vowel.train" 
                              :header true)))
;(def testing (to-matrix (read-dataset "/Users/dliebke/Desktop/esl/vowel.test.txt" :header true)))
(def testing (to-matrix 
               (read-dataset "http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/vowel.test" 
                             :header true)))


(def K 11)
(def p 10)
(def N (nrow training))
(def group-counts (map nrow (group-by training 1)))

;; estimate the prior probabilities for each cluster
(def prior-probs (div group-counts N))

;; estimate the centroids for each cluster
(def cluster-centroids (matrix 
                 (for [x_k (group-by training 1 :cols (range 2 12))]
                   (map mean (trans x_k)))))

;;------------------------------------------------------------------------------
;; CALCULATE THE K COVARIANCE MATRICES NECESSARY FOR QDA
;;------------------------------------------------------------------------------
               
;; estimate the covariance matrices for each cluster
(def cluster-cov-mats 
  (let [groups (group-by training 1 :cols (range 2 12))]
    (map (fn [group centroid n]
      (reduce plus 
              (map #(div 
                      (mmult (trans (minus % centroid))
                             (minus % centroid))
                      (dec n))
                   group)))
           groups cluster-centroids group-counts)))

;; calculate the inverses of the cluster covariance matrices
(def inv-cluster-cov-mats (map solve cluster-cov-mats))


;; define the quadratic discriminant function
(defn qdf [x Sigma_k Sigma-inv_k mu_k pi_k]
  (+ (- (mult 1/2 (log (det Sigma_k))))
     (- (mult 1/2 (mmult (minus x mu_k) 
                         Sigma-inv_k 
                         (trans (minus x mu_k)))))
     (log pi_k)))


(defn calculate-scores 
  ([data cov-mats inv-cov-mats centroids priors]
    (matrix 
      (pmap (fn [row]
             (pmap (partial qdf row) 
                   cov-mats 
                   inv-cov-mats 
                   centroids 
                   priors))
           (sel data :cols (range 2 12))))))

;; calculate the scores for the training data
(def training-qda-scores 
  (calculate-scores training 
                    cluster-cov-mats 
                    inv-cluster-cov-mats 
                    cluster-centroids 
                    prior-probs))

;; calculate the scores for the testing data
(def testing-qda-scores 
  (calculate-scores testing 
                    cluster-cov-mats 
                    inv-cluster-cov-mats 
                    cluster-centroids 
                    prior-probs))


;;(bind-columns (sel training :cols 1) (plus 1 (map max-index training-qda-scores)))
;;(bind-columns (sel testing :cols 1) (plus 1 (map max-index testing-qda-scores)))

(defn max-index 
  "Returns the index of the maximum value in the given sequence."
  ([x] 
    (let [max-x (reduce max x)
          n (length x)]
      (loop [i 0]
        (if (= (nth x i) max-x)
          i
          (recur (inc i)))))))


;; define a function to calculate the error rate
(defn error-rate [data scores] 
  (/ (sum (map #(if (= %1 %2) 0 1) 
               (sel data :cols 1) 
               (plus 1 (map max-index scores))))
     (nrow data)))


;; calculate the error rate for the training data
(error-rate training training-qda-scores)


;; calculate the error rate for the testing data
(error-rate testing testing-qda-scores)


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; QDA w/ Eigenvalue Decomposition
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(use '(incanter core stats charts io))

;(def training (to-matrix (read-dataset "/Users/dliebke/Desktop/esl/vowel.train.txt" :header true)))
(def training (to-matrix 
                (read-dataset "http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/vowel.train" 
                              :header true)))
;(def testing (to-matrix (read-dataset "/Users/dliebke/Desktop/esl/vowel.test.txt" :header true)))
(def testing (to-matrix 
               (read-dataset "http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/vowel.test" 
                             :header true)))


(def K 11)
(def p 10)
(def N (nrow training))
(def group-counts (map nrow (group-by training 1)))

;; estimate the prior probabilities for each cluster
(def prior-probs (div group-counts N))

;; estimate the centroids for each cluster
(def cluster-centroids (matrix 
                 (for [x_k (group-by training 1 :cols (range 2 12))]
                   (map mean (trans x_k)))))

;; estimate the covariance matrices for each cluster
(def cluster-cov-matrices 
  (let [groups (group-by training 1 :cols (range 2 12))]
    (map (fn [group centroid n]
      (reduce plus 
              (map #(div 
                      (mmult (trans (minus % centroid))
                             (minus % centroid))
                      (dec n))
                   group)))
           groups cluster-centroids group-counts)))

;;------------------------------------------------------------------------------
;; ADDED TO QDA EXAMPLE TO IMPROVE PERFORMANCE
;;------------------------------------------------------------------------------
;; extract the eigenvalues and eigenvectors from the covariance matrices 
;; for each cluster, to improve performance
(def Sigma-decomp 
  (map decomp-eigenvalue cluster-cov-matrices ))
(def D (map #(diag (:values %)) Sigma-decomp))
(def U (map #(:vectors %) Sigma-decomp))

;;------------------------------------------------------------------------------
;; CHANGED FROM QDA EXAMPLE
;;------------------------------------------------------------------------------
;; define the quadratic discriminant function using the eigenvalues and eigenvectors
(defn qdf [x D_k U_k mu_k pi_k]
  (+ (- (mult 1/2 (sum (map log (diag D_k)))))
     (- (mult 1/2 
              (mmult (trans (mmult (trans U_k) 
                                   (trans (minus x mu_k))))
                     (solve D_k) 
                     (mmult (trans U_k) 
                            (trans (minus x mu_k))))))
     (log pi_k)))

;;------------------------------------------------------------------------------
;; END OF CHANGES
;;------------------------------------------------------------------------------


;; define a function to calculate the quadratic discriminant scores
(defn calculate-scores 
  ([data D U centroids priors]
    (matrix 
      (pmap (fn [row]
             (pmap (partial qdf row) D U centroids priors))
           (sel data :cols (range 2 12))))))


;; calculate the scores for each row of the training data set across all 11 clusters
(def training-qda-scores 
  (calculate-scores training
                    D U
                    cluster-centroids
                    prior-probs))

;; calculate the scores for each row of the testing data set across all 11 clusters
(def testing-qda-scores 
  (calculate-scores testing
                    D U
                    cluster-centroids
                    prior-probs))

;;(bind-columns (sel training :cols 1) (plus 1 (map max-index training-qda-scores)))
;;(bind-columns (sel testing :cols 1) (plus 1 (map max-index testing-qda-scores)))

(defn max-index 
  "Returns the index of the maximum value in the given sequence."
  ([x] 
    (let [max-x (reduce max x)
          n (length x)]
      (loop [i 0]
        (if (= (nth x i) max-x)
          i
          (recur (inc i)))))))


;; define a function to calculate the error rate
(defn error-rate [data scores] 
  (/ (sum (map #(if (= %1 %2) 0 1) 
               (sel data :cols 1) 
               (plus 1 (map max-index scores))))
     (nrow data)))


;; calculate the error rate for the training data
(error-rate training training-qda-scores)


;; calculate the error rate for the testing data
(error-rate testing testing-qda-scores)