#|

Take a space of actual examples and map them into the space of possible examples. What happens? What can we say about any future examples we might see? If you understand that, then you understand data mining.

First, we'll need some examples of actual data. We'll store those in rows on a table. A row of data has two parts inputs (also called independent variables) that select for some outputs (called the depedents variables).

|#

#+SBCL (DECLAIM (SB-EXT:MUFFLE-CONDITIONS CL:STYLE-WARNING))

(defun sqrd (x) 
  (* x x))
(defun norm (x min max) 
  (if (zerop x)
      (norm (/ (+ max min) 2) min max)
      (/ (- x min) (- max min))))

(defun distance (one two dims)
  (let ((min 1) ;set to 0 for standard geometry
	(sum 0) 
	(max 0))
    (mapc #'(lambda (a b dim)
	      (incf sum (sqrd (- (norm a min dim) 
				 (norm b min dim))))
	      (incf max (sqrd (- dim min))))
	  one two dims)
    (/ (sqrt sum) (sqrt max))))

(defun when-we-played-golf () 
  (deftable
   :columns '(outlook   temperature  humidity  windy   play)
   :ranges  '(3         3            2         2       2)

   :observed '((sunny     hot          high      FALSE   no)
	       (sunny     hot          high      TRUE    no)
	       (overcast  hot          high      FALSE   yes)
	       (rainy     mild         high      FALSE   yes)
	       (rainy     cool         normal    FALSE   yes)
	       (rainy     cool         normal    TRUE    no)
	       (overcast  cool         normal    TRUE    yes)
	       (sunny     mild         high      FALSE   no)
	       (sunny     cool         normal    FALSE   yes)
	       (rainy     mild         normal    FALSE   yes)
	       (sunny     mild         normal    TRUE    yes)
	       (overcast  mild         high      TRUE    yes)
	       (overcast  hot          normal    FALSE   yes)
	       (rainy     mild         high      TRUE    no))
   
   :synonyms '((rainy  . 1) (overcast . 2) (sunny . 3)
	       (cool   . 1) (mild     . 2) (hot   . 3) 
	       (normal . 1) (high     . 2)
	       (FALSE  . 1) (TRUE     . 2)
	       (no     . 1) (yes      . 2))
  
   :possibles '((3 3 1 1 0) (3 2 2 2 0) (3 1 1 2 0) (2 3 2 1 0) 
		(2 2 1 1 0) (2 1 2 1 0) (1 3 1 2 0)
		(1 2 2 1 0) (1 1 0 0 0) (2 0 0 2 0))))

(defstruct table 
  columns 
  independent-dimensions 
  dependent-dimensions 
  actuals 
  possibles 
  nearests)

(defstruct row id independents dependents)

(let ((n 0))
  (defun defrow (new)
    (make-row :id (incf n) :independents (butlast new) :dependents (first (last new)))))

(defun deftable (&key columns ranges synonyms observed possibles)
    (make-table :columns   columns
		:independent-dimensions  (butlast ranges)
		:dependent-dimensions    (first (last ranges))
		:actuals    (mapcar #'defrow (sublis synonyms observed))
		:possibles  (mapcar #'defrow possibles)))

(defun nearest (tbl actual)
  (let (d
	best
	(min most-positive-fixnum))
    (dolist (possible (table-possibles tbl))
      (setf d (distance (row-independents actual) 
			(row-independents possible)
			(table-independent-dimensions tbl)))
      (if (< d min)
	  (setf min d
		best possible)))
    (cons (row-id actual) (row-id best))))
	  
(defun nearests (tbl)
  (mapcar 
   #'(lambda (actual) (nearest tbl actual)) 
   (table-actuals tbl)))

(defmacro geta (key alist &optional default)
  (or (cdr (assoc key alist))
      default))

(defun statistics (tbl)
  (let (report
	(all (length (table-actuals tbl)))
	(h (make-hash-table)))
    (dolist (pair (nearests tbl))
      (let* ((actual   (first pair))
	     (klass    (row-dependents (find actual (table-actuals tbl) :key #'row-id)))
	     (possible (rest pair)))
	(push klass (gethash possible h))))
    (dolist (possible (mapcar #'row-id (table-possibles tbl)))
      (let* ((tmp (gethash possible h))
	     (per (round (* 100 (/ (length tmp) all)))))
	(if  tmp 
	     (push `(,possible ,(sort tmp #'<)  ,per)  report)
	     (push `(,possible () 0) report))))
    (mapc 
     #'(lambda(one) (format t "at ~a% possibility ~a has ~a~%" (third one) (first one) (second one)))
     (sort report #'> :key #'third))
    t))

    
#| 
|#

#|
|#