#!/usr/bin/gawk -f #Min and Max are only used when a histogram or graph is going to be generated. They are only for explanation purposes and have no other use. #Given dist (and its flag) is meaningless for internal (non-leaf) nodes. They must be allowed to be generated from their child node dists. BEGIN { FS = OFS = ","; Seed = 1; TotalBins = 20; TotalRuns = 100; Policy = 1; Factor = 0.1; } /^$/ || /\#.*/ { next; } { LayerIndex = 1; GraphIndex = 2; NameIndex = 1+2; OperationIndex = 2+2; GoalPriorityIndex = 3+2; MinIndex = 4+2; MaxIndex = 5+2; GivenDistFlagIndex = 6+2; IsGivenDistIndex = 7+2; IsGoalDistIndex = 8+2; ChildrenCountIndex = 9+2; SanityCheckUniqueNode[$NameIndex]++; TotalNodes++; Layer[TotalNodes] = $LayerIndex; Graph[TotalNoeds] = $GraphIndex; Index[$NameIndex] = TotalNodes; Name[TotalNodes] = $NameIndex; Operation[TotalNodes] = $OperationIndex; GoalPriority[TotalNodes] = $GoalPriorityIndex; Min[TotalNodes] = $MinIndex; Max[TotalNodes] = $MaxIndex; GivenDistFlag[TotalNodes] = $GivenDistFlagIndex; IsGivenDist[TotalNodes] = $IsGivenDistIndex; IsGoalDist[TotalNodes] = $IsGoalDistIndex; ChildrenCount[TotalNodes] = $ChildrenCountIndex; for (i = 1; i <= ChildrenCount[TotalNodes]; i++) Children[TotalNodes,i] = $(i+ChildrenCountIndex); GivenDistStartIndex = ChildrenCountIndex + ChildrenCount[TotalNodes]; if (IsGivenDist[TotalNodes] == 1) { for (i = 1; i <= TotalBins; i++) GivenDist[TotalNodes,i] = $(GivenDistStartIndex+i); GoalDistStartIndex = GivenDistStartIndex + TotalBins; } else GoalDistStartIndex = GivenDistStartIndex; #keep the goal dist normalized. This is not needed for given dist since it is normalized as needed. if (IsGoalDist[TotalNodes] == 1) { for (i = 1; i <= TotalBins; i++) tempDistArray[i] = $(GoalDistStartIndex+i); normalizeDist(tempDistArray); for (i = 1; i <= TotalBins; i++) GoalDist[TotalNodes,i] = tempDistArray[i]; } } END { srand(Seed); # #this and next block makes everything randomly created # # count = 0; # countLimit = int(TotalNodes*Factor); # # while (count < countLimit) # { # #randomly generate goal distributions # for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) # { # if (rand() < 0.5 && IsGoalDist[nodeCounter] == 0 && count < countLimit && ChildrenCount[nodeCounter] > 0) # { # for (binCounter = 1; binCounter <= TotalBins; binCounter++) # tempArray[binCounter] = int(rand() * 20); # # normalizeDist(tempArray); # # for (binCounter = 1; binCounter <= TotalBins; binCounter++) # GoalDist[nodeCounter,binCounter] = tempArray[binCounter]; # # IsGoalDist[nodeCounter] = 1; # count++; # GoalPriority[nodeCounter] = int(rand() * 100); # } # } # } # # #generate a random goal for the root # Root = findRoot(); # # if (IsGoalDist[Root] == 0) # { # for (binCounter = 1; binCounter <= TotalBins; binCounter++) # tempArray[binCounter] = int(rand() * 20); # # normalizeDist(tempArray); # # for (binCounter = 1; binCounter <= TotalBins; binCounter++) # GoalDist[Root,binCounter] = tempArray[binCounter]; # # IsGoalDist[Root] = 1; # count++; # GoalPriority[Root] = int(rand() * 100); # } # #this block is used to make certain ones with a given priority have a random goal distribution # for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) # { # if (GoalPriority[nodeCounter] != 0) # { # for (binCounter = 1; binCounter <= TotalBins; binCounter++) # tempArray[binCounter] = int(rand() * 20); # # normalizeDist(tempArray); # # for (binCounter = 1; binCounter <= TotalBins; binCounter++) # GoalDist[nodeCounter,binCounter] = tempArray[binCounter]; # # IsGoalDist[nodeCounter] = 1; # } # } #this is where the code starts normally if (sanityCheck() == -1) { print "Error! One or more of the child nodes are missing OR a node is defined recursively OR a node is defined more than once."; exit; } Root = findRoot(); if (Root == -1) { print "Error! You have more than one root node."; exit; } HighestPriorityNode = findMaxPriorityNode(); if (HighestPriorityNode == -1 || GoalPriority[Root] == GoalPriority[HighestPriorityNode]) HighestPriorityNode = Root; reportStart(); for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) MinValue[tempBinCounter] = 10^20; for (run = 1; run <= TotalRuns; run++) { #initialize (possibly old values) for a new run for (tempCounter = 0; tempCounter <= TotalNodes; tempCounter++) Processed[tempCounter] = 0; #process the leaves first for (leafCounter = 1; leafCounter <= TotalNodes; leafCounter++) { if (ChildrenCount[leafCounter] == 0) { processLeafNode(leafCounter); Processed[leafCounter] = 1; Processed[0]++; } } #process the rest (all internal nodes) while (Processed[0] < TotalNodes) { for (internalCounter = 1; internalCounter <= TotalNodes; internalCounter++) { if (Processed[internalCounter] == 0) { processedChildren = 0; for (childCounter = 1; childCounter <= ChildrenCount[internalCounter]; childCounter++) { if (Processed[Index[Children[internalCounter,childCounter]]] == 1) processedChildren++; } #if all of its children are processed, process it then if (processedChildren == ChildrenCount[internalCounter]) { processInternalNode(internalCounter); Processed[internalCounter] = 1; Processed[0]++; } } } } # report(); score(); } reportEnd(); } function findMaxPriorityNode(tempMax,tempNode) { tempMax = 0; tempNode = -1; for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) { if (GoalPriority[nodeCounter] >= tempMax) { tempMax = GoalPriority[nodeCounter]; tempNode = nodeCounter; } } return tempNode; } function findRoot(tempArray) { tempRoot = -1; #find the root as the one that is not a child of any other node for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) for (tempCounter = 1; tempCounter <= ChildrenCount[nodeCounter]; tempCounter++) tempArray[Index[Children[nodeCounter,tempCounter]]]++; for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) { if (tempArray[nodeCounter] == 0) { if (tempRoot == -1) tempRoot = nodeCounter; else return -1; } } return tempRoot; } function sanityCheck() { for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) for (tempCounter = 1; tempCounter <= ChildrenCount[nodeCounter]; tempCounter++) if (Index[Children[nodeCounter,tempCounter]] == 0 || Index[Children[nodeCounter,tempCounter]] == Index[Name[nodeCounter]]) { print Name[nodeCounter]; return -1; } for (element in SanityCheckUniqueNode) if (SanityCheckUniqueNode[element] > 1) return -1; return 1; } function score(tempCounter,tempBinCounter,tempValue,tempScore) { for (tempCounter = 1; tempCounter <= TotalBins; tempCounter++) tempScore[tempCounter] = 0; for (tempCounter = 1; tempCounter <= TotalNodes; tempCounter++) { if (IsGoalDist[tempCounter] == 1) { for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) { tempValue = GoalDist[tempCounter,tempBinCounter] - FinalDistArray[Name[tempCounter],tempBinCounter]; if (tempValue < 0) tempValue *= -1; tempScore[tempBinCounter] = tempScore[tempBinCounter] + tempValue * GoalPriority[tempCounter]; } } } for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) { if (tempScore[tempBinCounter] < MinValue[tempBinCounter]) { MinValue[tempBinCounter] = tempScore[tempBinCounter]; for (tempCounter = 1; tempCounter <= TotalNodes; tempCounter++) BestBin[tempCounter,tempBinCounter] = FinalDistArray[Name[tempCounter],tempBinCounter]; } } } function processInternalNode(nodeIndex,tempMinCount,tempMaxCount,tempCounter,tempBinCounter,tempOutArray) { #Having a flag for internal nodes is meaningless since we have to be able to use distributions from child nodes. for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) { tempMaxCount[tempBinCounter] = -10^20; tempMinCount[tempBinCounter] = 10^20; } for (tempCounter = 1; tempCounter <= ChildrenCount[nodeIndex]; tempCounter++) { childName = Children[nodeIndex,tempCounter]; for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) { if (tempMaxCount[tempBinCounter] < FinalDistArray[childName,tempBinCounter]) tempMaxCount[tempBinCounter] = FinalDistArray[childName,tempBinCounter]; if (tempMinCount[tempBinCounter] > FinalDistArray[childName,tempBinCounter]) tempMinCount[tempBinCounter] = FinalDistArray[childName,tempBinCounter]; } } if (Operation[nodeIndex] == "null") { childName = Children[nodeIndex,1]; for (tempCounter = 1; tempCounter <= TotalBins; tempCounter++) tempOutArray[tempCounter] = FinalDistArray[childName,tempCounter]; } else if (Operation[nodeIndex] == "not") { childName = Children[nodeIndex,1]; for (tempCounter = 1; tempCounter <= TotalBins; tempCounter++) tempOutArray[tempCounter] = 1 - FinalDistArray[childName,tempCounter]; } else if (Operation[nodeIndex] == "and") { for (tempCounter = 1; tempCounter <= TotalBins; tempCounter++) tempOutArray[tempCounter] = tempMinCount[tempCounter]; } else if (Operation[nodeIndex] == "or") { for (tempCounter = 1; tempCounter <= TotalBins; tempCounter++) tempOutArray[tempCounter] = tempMaxCount[tempCounter]; } normalizeDist(tempOutArray); for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) FinalDistArray[Name[nodeIndex],tempBinCounter] = tempOutArray[tempBinCounter]; } function processLeafNode(nodeIndex,tempBinCounter,tempOutArray) { if (IsGivenDist[nodeIndex] == 1 && (GivenDistFlag[nodeIndex] == "fixed" || run == 1)) getDist(nodeIndex,tempOutArray); else generateDist(nodeIndex,tempOutArray); normalizeDist(tempOutArray); for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) FinalDistArray[Name[nodeIndex],tempBinCounter] = tempOutArray[tempBinCounter]; } function getDist(nodeIndex,out,tempBinCounter) { for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) out[tempBinCounter] = GivenDist[nodeIndex,tempBinCounter]; } function generateDist(nodeIndex,out,tempBinCounter) { for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) { if (rand() > 0.5 && run > 1) out[tempBinCounter] = BestBin[nodeIndex,tempBinCounter]; else { bound = GoalDist[HighestPriorityNode,tempBinCounter] * 2; out[tempBinCounter] = rand() * bound; } } } function normalizeDist(inputArray,tempCounter,sum) { sum = 0; for (tempCounter = 1; tempCounter <= TotalBins; tempCounter++) sum += inputArray[tempCounter]; if (sum != 0) for (tempCounter = 1; tempCounter <= TotalBins; tempCounter++) inputArray[tempCounter] = inputArray[tempCounter] / sum; } function reportStart() { print "Highest priority node is " Name[HighestPriorityNode]; print "Root is " Name[Root]; for (j = 1; j <= TotalNodes; j++) { printf("%s has %d children: (", Name[j],ChildrenCount[j]); for (i = 1; i <= ChildrenCount[j]; i++) printf("%s,", Children[j,i]); print ") with min: " Min[j] " and max: " Max[j] " and priority: " GoalPriority[j] " and flag '" GivenDistFlag[j] "' with given dist " IsGivenDist[j] " and goal dist " IsGoalDist[j]; if (IsGivenDist[j] == 1) { printf("Given Dist: "); for (i = 1; i <= TotalBins; i++) printf ("%.2f,",GivenDist[j,i]); print ""; } if (IsGoalDist[j] == 1) { printf("Goal Dist: "); for (i = 1; i <= TotalBins; i++) printf ("%.2f,",GoalDist[j,i]); print ""; } } } function report() { print "run: "run; for (j = 1; j <= TotalNodes; j++) { for (i = 1; i <= TotalBins; i++) printf("%.2f,",FinalDistArray[Name[j],i]); printf ("\t%s",Name[j]); if (ChildrenCount[j] > 0 ) { printf(" --> %s (",Operation[j]); comma = ""; for (i = 1; i <= ChildrenCount[j]; i++) { printf("%s",comma); printf("%s", Children[j,i]); comma = "," } printf (")"); } print""; } print ""; } function reportEnd() { print "Final results:" for (j = 1; j <= TotalNodes; j++) { for (i = 1; i <= TotalBins; i++) printf("%.2f,",BestBin[j,i]); printf (" %s",Name[j]); if (ChildrenCount[j] > 0 ) { printf(" --> %s (",Operation[j]); comma = ""; for (i = 1; i <= ChildrenCount[j]; i++) { printf("%s",comma); printf("%s", Children[j,i]); comma = "," } printf (")"); } print""; } print ""; for (j = 1; j <= TotalNodes; j++) { sum = 0; if (IsGoalDist[j] == 1) { for (i = 1; i <= TotalBins; i++) { tempValue = BestBin[j,i] - GoalDist[j,i]; if (tempValue < 0) tempValue *= -1; sum += tempValue; } for (i = 1; i <= TotalBins; i++) printf("%.2f,",BestBin[j,i]); print " --> PREDICTED " Name[j] " ( Priority: " GoalPriority[j] " and Error: " sum " )"; for (i = 1; i <= TotalBins; i++) printf("%.2f,",GoalDist[j,i]); print " --> GOAL " Name[j]; } } }