;;chet tobrey ;; feb 24 ;; code for equal-frequency-discretization ;test list ; 20 rows so our bins can't exceed 20; (setf *l* '((1 1 1 1 1) (2 2 2 2 2) (0 0 0 0 0) (3 3 3 3 3) (4 4 4 4 4) (5 5 5 5 5) (6 6 6 6 6) (7 7 7 7 7) (40 40 40 40 40) (-100 -100 -100 -100 -100) (20 20 20 20 20) (30 30 30 30 30) (0.2 0.2 0.2 0.2 0.2) (1 2 3 4 5) (23 23 23 23 23) (234 234 234 234 234) (89 89 89 89 89) (3 3 3 3 3 3) (1000 1000 1000 1000 1000) (9876 9876 9876 9876 9876))) (defun equal-f (group &optional(numb 8)) "equal frequency discretization, call on a list of lists ## the bin number can't be higer than the num items in the column" (let (new(old (mysort group))) (setf new (transpose(mapcar #'(lambda (x)(ef x numb))old))) new)) ;XXX example, real simple ;DEMOS (defun demoef() (progn (format t "demo on test list *l* 13 bins ~% ~a ~% ~a ~%" *l* (eql-fun1)) (format t "demo on test list *l* 8 bins ~% ~a ~%" (eql-fun1 8)) (format t "sorting a single column ~a into 5 bins ~% ~a ~%" '(1 2 3 4 5 6 7 8 9 10) (eql-fun2)) (format t "trying on all_databor 17 bins ~% ~a ~%" (eql-fun3)) (format t "all_databor 6 bins ~% ~a ~%" (eql-fun3 6)) )) (defun eql-fun1(&optional (n 13)) " a demo on test list *l*" (equal-f *l* n)) (defun eql-fun2() "a simple demo on a sorted column splits a column of 10 into 5 bins" (let ( (result) (mycol '(1 2 3 4 5 6 7 8 9 10))) (setf result (ef mycol 5)) result)) (defun eql-fun3(&optional(n 17)) "trying on a larger data set" (progn (load "bor/all_databor.lisp") (equal-f (rel-rows (all_databor)) n))) (defun showh (tmp) (let (out) (maphash #'(lambda (k v) (push (cons k v) out)) tmp) (mapc 'print (sort out #'(lambda (a b) (< (car a) (car b))))))) (defun mysort (group) (mapcar #'(lambda (x)(stable-sort x #'<))(transpose group))) (defun ef (l num) (reverse (pull-val (myrank l num) l))) (defun pull-val(table l) (let (new) (dolist (x l) (setf new (cons (gethash x table) new))) new)) (defun myrank (l &optional (numb 8)) "takes sorted list and returns a hash table with the value and bin number" (let ((total 0) (current 0) (binnum 1) (ranks (make-hash-table)) (point (/(length l)numb))) ;(format t " ~a " l) (dotimes (i (length l)) (incf total) (setf (gethash (nth i l) ranks) binnum) ; (format t "pt ~a tot ~a cur ~a f/tp ~a binnum ~a ~%" ; point total current (floor (/ total point)) binnum) (if (> (floor(/ total point)) current) (progn ; (princ "HERE") (incf binnum) (setf current (/ total point))))) showh ranks)) (defun rank (l &optional (ranks (make-hash-table)) (n 0)) "Return a hash of the ranks in the sorted list of numbers 'l'. All numbers in a run of repeated entries get the average rank of that run." (if (null l) ranks (let (repeats sum now) (labels ((walk () (incf n) (pop l)) (new () (setf repeats 1) (setf sum n)) (same () (incf sum n) (incf repeats)) (spin () (when (eql now (car l)) (walk) (same) (spin)))) (setf now (walk)) (new) (spin) ; look for something that is not "now" (setf (gethash now ranks) (/ sum repeats)) (rank l ranks n)))))