From 3f31831a605464777be30bd6bcff107dd11339fb Mon Sep 17 00:00:00 2001
From: Divy Patel <divy9881@gmail.com>
Date: Mon, 4 Dec 2023 21:59:12 -0600
Subject: [PATCH 1/5] fix: record parsing when reading from storage as a run
 page

Signed-off-by: Divy Patel <divy9881@gmail.com>
---
 External-Sort/StorageDevice.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/External-Sort/StorageDevice.cpp b/External-Sort/StorageDevice.cpp
index f3885ea..04faecd 100644
--- a/External-Sort/StorageDevice.cpp
+++ b/External-Sort/StorageDevice.cpp
@@ -156,6 +156,9 @@ void StorageDevice::spill_run(char run_bit, uint run, vector<DataRecord*> record
 		int last_run = this->get_last_run();
 		run_path = this->device_path + "/run_" + to_string(last_run + 1);
 	} else if (run_bit == 't') {
+		string trace_str = "STATE -> SPILL_RUNS_" + this->device_path + ": Spill sorted runs to the " + this->device_path + " device";
+
+		trace.append_trace(trace_str);
 		run_path = this->device_path + "/temp_run";
 	} else {
 		run_path = this->device_path + "/run_" + to_string(run);

From 8e1c03eddf25c53546f051051159a61d72ab067a Mon Sep 17 00:00:00 2001
From: Divy Patel <divy9881@gmail.com>
Date: Wed, 6 Dec 2023 15:05:22 -0600
Subject: [PATCH 2/5] feat: add ExternalSort project documentation adhereing to
 canvas course project instructions

Signed-off-by: Divy Patel <divy9881@gmail.com>
---
 External-Sort/DataRecord.cpp |   2 +-
 External-Sort/Tree.cpp       | 563 ++++++++++++++++++++---------------
 External-Sort/Tree.h         |   2 +-
 README.md                    |  64 +++-
 TODO                         |   4 -
 5 files changed, 380 insertions(+), 255 deletions(-)

diff --git a/External-Sort/DataRecord.cpp b/External-Sort/DataRecord.cpp
index 073d798..23b1183 100644
--- a/External-Sort/DataRecord.cpp
+++ b/External-Sort/DataRecord.cpp
@@ -119,7 +119,7 @@ bool DataRecord::is_smaller_int (const DataRecord incoming_record) const
 	return false;
 }
 
-bool DataRecord::is_smaller_str(const DataRecord incoming_record) const
+bool DataRecord::is_smaller_str (const DataRecord incoming_record) const
 {
 	int incoming_length = 0 ; char const *incoming_iter = NULL;
 	int current_length = 0 ; char const *current_iter = NULL;
diff --git a/External-Sort/Tree.cpp b/External-Sort/Tree.cpp
index 6544769..3c3aaa1 100755
--- a/External-Sort/Tree.cpp
+++ b/External-Sort/Tree.cpp
@@ -8,36 +8,45 @@
 #define NODE_RECORD_LIST(node) node->list->record_ptr
 #define NODE_RECORD_LIST_AT(node, idx) node->list->record_ptr[idx]
 #define NODE_RECORD_LIST_LENGTH(node) node->list->record_count
-#define CHECK_SET_EMPTY(node, node_idx)  if (node != NULL) {\
-							if (node->list->record_ptr.empty()) {\
-								node->is_empty = true;\
-							}\
-						}
+#define CHECK_SET_EMPTY(node, node_idx)     \
+	if (node != NULL)                       \
+	{                                       \
+		if (node->list->record_ptr.empty()) \
+		{                                   \
+			node->is_empty = true;          \
+		}                                   \
+	}
 #define SET_INTERNAL_EMPTY(node) node->current_record = NULL;
 
-DataRecord* pop_record(RecordList *list) {
-	DataRecord* top = NULL;
-	if (list == NULL) {
+DataRecord *pop_record(RecordList *list)
+{
+	DataRecord *top = NULL;
+	if (list == NULL)
+	{
 		return NULL;
 	}
-	if (!list->record_ptr.empty()) {
+	if (!list->record_ptr.empty())
+	{
 		top = new DataRecord(list->record_ptr.front());
 		// top = &temp;
 		list->record_ptr.pop_front();
-		#if DEBUG_PRINT
-		// top->print();
-		#endif
+#if DEBUG_PRINT
+// top->print();
+#endif
 		list->record_count--;
 	}
 	return top;
 }
 
-DataRecord* top_record(RecordList *list) {
+DataRecord *top_record(RecordList *list)
+{
 	DataRecord *top = NULL;
-	if (list == NULL) {
+	if (list == NULL)
+	{
 		return NULL;
 	}
-	if (!list->record_ptr.empty()){
+	if (!list->record_ptr.empty())
+	{
 		top = new DataRecord(list->record_ptr.front());
 		// return &list->record_ptr.front();
 		return top;
@@ -46,8 +55,8 @@ DataRecord* top_record(RecordList *list) {
 }
 
 /*
-* Pass the list of sorted runs as part of structure RecordList
-*/
+ * Pass the list of sorted runs as part of structure RecordList
+ */
 
 Tree::Tree(vector<RecordList *> sorted_runs)
 {
@@ -56,31 +65,37 @@ Tree::Tree(vector<RecordList *> sorted_runs)
 	this->total_nodes = 2 * pow(2, this->tree_depth) - 1;
 	this->heap = new struct Node[this->total_nodes];
 	this->total_leaves = pow(2, this->tree_depth);
-	llint first_leaf_node = this->total_nodes - ((this->total_nodes - 1)/2) - 1;
+	llint first_leaf_node = this->total_nodes - ((this->total_nodes - 1) / 2) - 1;
 	lluint jj = 0, current_run = 0;
 	lluint ii = first_leaf_node;
 
-	for (lluint ii = 0 ; ii < this->total_nodes ; ii++) {
+	for (lluint ii = 0; ii < this->total_nodes; ii++)
+	{
 		this->heap[ii].list = NULL;
 		this->heap[ii].current_record = NULL;
 	}
 
-	for ( ; ii < (this->total_leaves*2) - 1 ; ii++) {
+	for (; ii < (this->total_leaves * 2) - 1; ii++)
+	{
 		this->heap[ii].current_record = NULL;
 		this->heap[ii].is_empty = false;
 		this->heap[ii].is_leaf = true;
 		this->heap[ii].list = sorted_runs[jj];
 		current_run++;
-		if (current_run < sorted_runs.size()) {
+		if (current_run < sorted_runs.size())
+		{
 			jj += 1;
 			// printf("%p\n", (void*)each_run);
-		} else {
+		}
+		else
+		{
 			ii++;
 			break;
 		}
 	}
 
-	if (ii < ((this->total_leaves*2) - 1)) {
+	if (ii < ((this->total_leaves * 2) - 1))
+	{
 		this->heap[ii].current_record = NULL;
 		this->heap[ii].is_empty = true;
 		this->heap[ii].is_leaf = true;
@@ -96,18 +111,19 @@ Tree::Tree(vector<RecordList *> sorted_runs)
  * * Leaf nodes can contain only list (which may have 1/more records),
  *      or can be empty as well (can happen after a few runs are merged)
  *      It also stores the count of records in that particular node.
- * 
+ *
  * * By default, we will try to create a full binary tree.
- * 
+ *
  */
 Tree::Tree(DataRecord *records, llint record_ct, llint initial_run)
 {
 	// TODO: See if this is optimal division for fanning
-	this->total_leaves = record_ct/2;
+	this->total_leaves = record_ct / 2;
 	DataRecord *current_ptr = records;
-	llint count_of_cols_per_row = ceil(record_ct/this->total_leaves);
+	llint count_of_cols_per_row = ceil(record_ct / this->total_leaves);
 
-	if (initial_run) {
+	if (initial_run)
+	{
 		count_of_cols_per_row = 1;
 		this->total_leaves = record_ct;
 	}
@@ -117,14 +133,15 @@ Tree::Tree(DataRecord *records, llint record_ct, llint initial_run)
 	this->total_record_count = record_ct;
 	this->heap = new struct Node[this->total_nodes];
 
-	llint first_leaf_node = this->total_nodes - ((this->total_nodes - 1)/2) - 1;
+	llint first_leaf_node = this->total_nodes - ((this->total_nodes - 1) / 2) - 1;
 
 	llint current_ct = record_ct;
 	int start = 0;
 
 	// We always try to generate full binary tree at the beginning
 	// (last leaf may not be balanced)
-	for (lluint ii = first_leaf_node ; ii < (this->total_leaves*2) - 1 ; ii++) {
+	for (lluint ii = first_leaf_node; ii < (this->total_leaves * 2) - 1; ii++)
+	{
 		// Leaf nodes has no current record
 		this->heap[ii].current_record = NULL;
 		this->heap[ii].is_empty = false;
@@ -132,7 +149,8 @@ Tree::Tree(DataRecord *records, llint record_ct, llint initial_run)
 
 		// Assign records to each row
 		this->heap[ii].list = new RecordList;
-		for (int ii = start; ii < start + count_of_cols_per_row; ii++) {
+		for (int ii = start; ii < start + count_of_cols_per_row; ii++)
+		{
 			this->heap[ii].list->record_ptr.push_back(*current_ptr);
 			current_ptr++;
 		}
@@ -140,19 +158,24 @@ Tree::Tree(DataRecord *records, llint record_ct, llint initial_run)
 		// this->heap[ii].list->record_ptr = current_ptr;
 		this->heap[ii].list->record_count = count_of_cols_per_row;
 
-		if (current_ct > 0) {
+		if (current_ct > 0)
+		{
 			start += count_of_cols_per_row;
 			current_ct -= count_of_cols_per_row;
-		} else {
+		}
+		else
+		{
 			break;
 		}
 		// Sample calculation:
-		// For 128 records, there will be 7 rows == 
+		// For 128 records, there will be 7 rows ==
 		//          18 count_of_cols_per_row + 2 remaining
 		// for last row, current_ct will be 2 after subtraction,
 		// so we will just add it to the last row
-		if (initial_run == 0) {
-			if ((current_ct > 0) && (current_ct <= 2*count_of_cols_per_row)) {
+		if (initial_run == 0)
+		{
+			if ((current_ct > 0) && (current_ct <= 2 * count_of_cols_per_row))
+			{
 				count_of_cols_per_row = current_ct;
 				current_ct = -1;
 			}
@@ -160,173 +183,226 @@ Tree::Tree(DataRecord *records, llint record_ct, llint initial_run)
 	}
 }
 
-llint Tree::capacity(llint level) {
-	return (1<<level);
+llint Tree::capacity(llint level)
+{
+	return (1 << level);
 }
 
 /*
-* Compares the record in children nodes in the tree, and pulls up the winner record.
-* Runs only for the internal nodes.
-* @param parent Index (in heap) of the parent record
-*/
-void Tree::compare_and_swap(llint parent, llint unused_leaves_idx) {
-	int child_left = parent*2+1, child_right=parent*2+2;
+ * Compares the record in children nodes in the tree, and pulls up the winner record.
+ * Runs only for the internal nodes.
+ * @param parent Index (in heap) of the parent record
+ */
+void Tree::run_tournament(llint parent, llint unused_leaves_idx)
+{
+	int child_left = parent * 2 + 1, child_right = parent * 2 + 2;
 
 	struct Node *parent_node = &(this->heap[parent]);
 
-    // None of the children are valid == not being used as runs
-    if (child_left >= unused_leaves_idx) {
-        return;
-    } else if (child_left < unused_leaves_idx /*Only left child is valid, special case only valid at leaf nodes */ 
-                && child_right >= unused_leaves_idx) {
-            struct Node *left_child_node = &this->heap[child_left];
-            // If parent has a valid record, it will be the winner, so skip.
-            // Iteration over the next level will empty this node
-            // the iteration after that for this level would fill it up.
-            if (parent_node->current_record) {
-                return;
-            } else {
-                if (IS_LEAF_NODE(left_child_node)) {
-                    parent_node->current_record = pop_record(left_child_node->list);
-                }
-            }
-    } else { /* Both the children are valid. So add the popped record at parent (if empty) */
-        if (parent_node->current_record) {
-            return;
-        } else {
+	// None of the children are valid == not being used as runs
+	if (child_left >= unused_leaves_idx)
+	{
+		return;
+	}
+	else if (child_left < unused_leaves_idx /*Only left child is valid, special case only valid at leaf nodes */
+			 && child_right >= unused_leaves_idx)
+	{
+		struct Node *left_child_node = &this->heap[child_left];
+		// If parent has a valid record, it will be the winner, so skip.
+		// Iteration over the next level will empty this node
+		// the iteration after that for this level would fill it up.
+		if (parent_node->current_record)
+		{
+			return;
+		}
+		else
+		{
+			if (IS_LEAF_NODE(left_child_node))
+			{
+				parent_node->current_record = pop_record(left_child_node->list);
+			}
+		}
+	}
+	else
+	{ /* Both the children are valid. So add the popped record at parent (if empty) */
+		if (parent_node->current_record)
+		{
+			return;
+		}
+		else
+		{
 			struct Node *left_child_node = &(this->heap[child_left]);
-            struct Node *right_child_node = &(this->heap[child_right]);
-            DataRecord *left_data = NULL, *right_data = NULL;
-            if (IS_LEAF_NODE(left_child_node)) {
-                // Both will be a leaf node
-                left_data = top_record(left_child_node->list);
-                right_data = top_record(right_child_node->list);
-                if ((right_data != NULL) & (left_data != NULL)) {
-                    // Compare with OVCs only if both the ovcs exist
-                    if ((left_data->ovc != 0) & (right_data->ovc != 0)) {
-                        if (left_data->ovc < right_data->ovc) {
-                            // If OVC is strictly smaller, the data record is small
-                            parent_node->current_record = pop_record(left_child_node->list);
-                            CHECK_SET_EMPTY(left_child_node, child_left);
-							delete left_data; delete right_data;
-                            return;
-                        } else if (left_data->ovc > right_data->ovc) {
-                            // If OVC is strictly greater, the data record is larger
-                            parent_node->current_record = pop_record(right_child_node->list);
-                            CHECK_SET_EMPTY(right_child_node, child_right);
-							delete left_data; delete right_data;
-                            return;
-                        }
-                    }
-                    // If OVC do not exist for either or are equal, we need to check their actual
-                    // values and update OVC based on the new winner
-                    if (left_data->is_smaller_str(*right_data)) {
+			struct Node *right_child_node = &(this->heap[child_right]);
+			DataRecord *left_data = NULL, *right_data = NULL;
+			if (IS_LEAF_NODE(left_child_node))
+			{
+				// Both will be a leaf node
+				left_data = top_record(left_child_node->list);
+				right_data = top_record(right_child_node->list);
+				if ((right_data != NULL) & (left_data != NULL))
+				{
+					// Compare with OVCs only if both the ovcs exist
+					if ((left_data->ovc != 0) & (right_data->ovc != 0))
+					{
+						if (left_data->ovc < right_data->ovc)
+						{
+							// If OVC is strictly smaller, the data record is small
+							parent_node->current_record = pop_record(left_child_node->list);
+							CHECK_SET_EMPTY(left_child_node, child_left);
+							delete left_data;
+							delete right_data;
+							return;
+						}
+						else if (left_data->ovc > right_data->ovc)
+						{
+							// If OVC is strictly greater, the data record is larger
+							parent_node->current_record = pop_record(right_child_node->list);
+							CHECK_SET_EMPTY(right_child_node, child_right);
+							delete left_data;
+							delete right_data;
+							return;
+						}
+					}
+					// If OVC do not exist for either or are equal, we need to check their actual
+					// values and update OVC based on the new winner
+					if (left_data->is_smaller_str(*right_data))
+					{
 						// left_data->print(); right_data->print();
-                        // Left is the winner -> set OVC of right relative to left
-                        right_data->populate_ovc_int(*left_data);
-                        parent_node->current_record = pop_record(left_child_node->list);
-                        CHECK_SET_EMPTY(left_child_node, child_left);
-						delete left_data; delete right_data;
-                        return;
-                    } else {
-                        // Right is the winner -> set OVC of left relative to right
-                        left_data->populate_ovc_int(*right_data);
-                        parent_node->current_record = pop_record(right_child_node->list);
-                        CHECK_SET_EMPTY(right_child_node, child_right);
-						delete left_data; delete right_data;
-                        return;
-                    }
-                } else if (left_data) {
-                    parent_node->current_record = pop_record(left_child_node->list);
-                    CHECK_SET_EMPTY(left_child_node, child_left);
+						// Left is the winner -> set OVC of right relative to left
+						right_data->populate_ovc_int(*left_data);
+						parent_node->current_record = pop_record(left_child_node->list);
+						CHECK_SET_EMPTY(left_child_node, child_left);
+						delete left_data;
+						delete right_data;
+						return;
+					}
+					else
+					{
+						// Right is the winner -> set OVC of left relative to right
+						left_data->populate_ovc_int(*right_data);
+						parent_node->current_record = pop_record(right_child_node->list);
+						CHECK_SET_EMPTY(right_child_node, child_right);
+						delete left_data;
+						delete right_data;
+						return;
+					}
+				}
+				else if (left_data)
+				{
+					parent_node->current_record = pop_record(left_child_node->list);
+					CHECK_SET_EMPTY(left_child_node, child_left);
 					delete left_data;
-                    return;
-                } else if (right_data) {
-                    parent_node->current_record = pop_record(right_child_node->list);
-                    CHECK_SET_EMPTY(right_child_node, child_right);
+					return;
+				}
+				else if (right_data)
+				{
+					parent_node->current_record = pop_record(right_child_node->list);
+					CHECK_SET_EMPTY(right_child_node, child_right);
 					delete right_data;
-                    return;
-                } else {
-                    parent_node->current_record = NULL;
-                    return;
-                    // At this point, both of the left and right should have been reported as empty, so no need to update.
-                }
-            } else {
-                // It is an internal node
-                left_data = NODE_CURRENT_RECORD(left_child_node);
-                right_data = NODE_CURRENT_RECORD(right_child_node);
-                if (left_data && right_data) {
-                    // Compare with the OVCs only, if they exist
-                    if ((left_data->ovc != 0) & (right_data->ovc != 0)) {
-                        if (left_data->ovc < right_data->ovc) {
-                            // If OVC is strictly smaller, the data record is small
-                            parent_node->is_empty = false;
-                            parent_node->current_record = left_data;
-                            SET_INTERNAL_EMPTY(left_child_node);
-                            return;
-                        } else if (left_data->ovc > right_data->ovc) {
-                            // If OVC is strictly greater, the data record is larger
-                            parent_node->is_empty = false;
-                            parent_node->current_record = right_data;
-                            SET_INTERNAL_EMPTY(right_child_node);
-                            return;
-                        }
-                    }
-                    // If OVC are equal or do not exist for either, we need to check
-                    // their actual values and update OVC based on the new winner
-                    if (left_data->is_smaller_str(*right_data)) {
-                        parent_node->is_empty = false;
-                        right_data->populate_ovc_int(*left_data);
-                        parent_node->current_record = left_data;
-                        SET_INTERNAL_EMPTY(left_child_node);
-                        return;
-                    } else {
-                        parent_node->is_empty = false;
-                        left_data->populate_ovc_int(*right_data);
-                        parent_node->current_record = right_data;
-                        SET_INTERNAL_EMPTY(right_child_node);
-                        return;
-                    }
-                } else if (left_data) {
-                    parent_node->is_empty = false;
-                    parent_node->current_record = left_data;
-                    NODE_CURRENT_RECORD(left_child_node) = NULL;
-                } else if (right_data) {
-                    parent_node->is_empty = false;
-                    parent_node->current_record = right_data;
-                    NODE_CURRENT_RECORD(right_child_node) = NULL;
-                } else {
-                    parent_node->is_empty = true;
-                    parent_node->current_record = NULL;
-                }
-            }
-        }
-    }
+					return;
+				}
+				else
+				{
+					parent_node->current_record = NULL;
+					return;
+					// At this point, both of the left and right should have been reported as empty, so no need to update.
+				}
+			}
+			else
+			{
+				// It is an internal node
+				left_data = NODE_CURRENT_RECORD(left_child_node);
+				right_data = NODE_CURRENT_RECORD(right_child_node);
+				if (left_data && right_data)
+				{
+					// Compare with the OVCs only, if they exist
+					if ((left_data->ovc != 0) & (right_data->ovc != 0))
+					{
+						if (left_data->ovc < right_data->ovc)
+						{
+							// If OVC is strictly smaller, the data record is small
+							parent_node->is_empty = false;
+							parent_node->current_record = left_data;
+							SET_INTERNAL_EMPTY(left_child_node);
+							return;
+						}
+						else if (left_data->ovc > right_data->ovc)
+						{
+							// If OVC is strictly greater, the data record is larger
+							parent_node->is_empty = false;
+							parent_node->current_record = right_data;
+							SET_INTERNAL_EMPTY(right_child_node);
+							return;
+						}
+					}
+					// If OVC are equal or do not exist for either, we need to check
+					// their actual values and update OVC based on the new winner
+					if (left_data->is_smaller_str(*right_data))
+					{
+						parent_node->is_empty = false;
+						right_data->populate_ovc_int(*left_data);
+						parent_node->current_record = left_data;
+						SET_INTERNAL_EMPTY(left_child_node);
+						return;
+					}
+					else
+					{
+						parent_node->is_empty = false;
+						left_data->populate_ovc_int(*right_data);
+						parent_node->current_record = right_data;
+						SET_INTERNAL_EMPTY(right_child_node);
+						return;
+					}
+				}
+				else if (left_data)
+				{
+					parent_node->is_empty = false;
+					parent_node->current_record = left_data;
+					NODE_CURRENT_RECORD(left_child_node) = NULL;
+				}
+				else if (right_data)
+				{
+					parent_node->is_empty = false;
+					parent_node->current_record = right_data;
+					NODE_CURRENT_RECORD(right_child_node) = NULL;
+				}
+				else
+				{
+					parent_node->is_empty = true;
+					parent_node->current_record = NULL;
+				}
+			}
+		}
+	}
 }
 
-struct Node Tree::leaf(llint index, llint current_slot) {
-	return this->heap[current_slot*2 + index];
+struct Node Tree::leaf(llint index, llint current_slot)
+{
+	return this->heap[current_slot * 2 + index];
 }
 
-struct Node Tree::parent(llint current_slot) {
-	return this->heap[current_slot/2];
+struct Node Tree::parent(llint current_slot)
+{
+	return this->heap[current_slot / 2];
 }
 
 /*
-* Each call runs the tree once, to generate one entry of merged run
-*/
-void Tree::run_tree() {
+ * Each call runs the tree once, to generate one entry of merged run
+ */
+void Tree::run_tree()
+{
 	llint unused_leaves_idx = (this->total_nodes + 1) / 2 - 1 + this->total_leaves;
 
 	// Each iteration will give one of the priority queue elements,
 	// run for each of the inner nodes
 	for (llint inner_node_idx = this->total_nodes - pow(2, this->tree_depth) - 1;
-			inner_node_idx >= 0;
-			inner_node_idx--) {
-		this->compare_and_swap(inner_node_idx, unused_leaves_idx);
+		 inner_node_idx >= 0;
+		 inner_node_idx--)
+	{
+		this->run_tournament(inner_node_idx, unused_leaves_idx);
 	}
 #if DEBUG_PRINT
-	cout<<"The heap in iteration "<<endl;
+	cout << "The heap in iteration " << endl;
 	this->print_heap();
 #endif
 	// this->heap[0].current_record->print();
@@ -335,73 +411,86 @@ void Tree::run_tree() {
 }
 
 /*
-* Prints (index Empty) for empty nodes
-* Prints "count -> [heap_index @ list_index :: (datarecord)]" list
-* 
-* For e.g.:
-* (1 Empty )
-* (2 Empty )
-* Count: 0 ->
-* Count: 1 -> [5 @ 0 :: (6, 6, 6)]
-* Count: 3 -> [6 @ 0 :: (7, 7, 7)] [6 @ 1 :: (8, 8, 8)] [6 @ 2 :: (9, 9, 9)]
-*/
-void Tree::print_heap() {
-	cout<<"Tree depth: "<<this->tree_depth+1<<", Total nodes: "<<this->total_nodes<<", Total leaves: "<<this->total_leaves<<endl;
-	for (lluint ii = 0 ; ii < this->total_nodes; ii++) {
+ * Prints (index Empty) for empty nodes
+ * Prints "count -> [heap_index @ list_index :: (datarecord)]" list
+ *
+ * For e.g.:
+ * (1 Empty )
+ * (2 Empty )
+ * Count: 0 ->
+ * Count: 1 -> [5 @ 0 :: (6, 6, 6)]
+ * Count: 3 -> [6 @ 0 :: (7, 7, 7)] [6 @ 1 :: (8, 8, 8)] [6 @ 2 :: (9, 9, 9)]
+ */
+void Tree::print_heap()
+{
+	cout << "Tree depth: " << this->tree_depth + 1 << ", Total nodes: " << this->total_nodes << ", Total leaves: " << this->total_leaves << endl;
+	for (lluint ii = 0; ii < this->total_nodes; ii++)
+	{
 		// if (!this->heap[ii].is_empty) {
-			if (this->heap[ii].current_record) {
-				printf("%lld :: (%.4s, %.4s, %.4s)@(%d:%c)\n",
-						ii, this->heap[ii].current_record->_record[0],
-						this->heap[ii].current_record->_record[1],
-						this->heap[ii].current_record->_record[2],
-						this->heap[ii].current_record->ovc,
-						this->heap[ii].current_record->rel);
-			} else {
-				RecordList *heap_list = this->heap[ii].list;
-				if (heap_list == NULL) {
-					printf("\n(%lld Empty )\n", ii);
-					continue;
-				}
-				lluint jj = 0;
-				printf("\n(%lld (Count: %lld) -> ", ii, heap_list->record_count);
-				for (auto current_record : heap_list->record_ptr) {
-					printf("[%lld @ %lld :: (%.4s, %.4s, %.4s)@(%d:%c)] ",
-						ii, jj, current_record._record[0],
-						current_record._record[1],
-						current_record._record[2],
-						current_record.ovc,
-						current_record.rel);
-						jj++;
-				}
-				printf(")\n");
+		if (this->heap[ii].current_record)
+		{
+			printf("%lld :: (%.4s, %.4s, %.4s)@(%d:%c)\n",
+				   ii, this->heap[ii].current_record->_record[0],
+				   this->heap[ii].current_record->_record[1],
+				   this->heap[ii].current_record->_record[2],
+				   this->heap[ii].current_record->ovc,
+				   this->heap[ii].current_record->rel);
+		}
+		else
+		{
+			RecordList *heap_list = this->heap[ii].list;
+			if (heap_list == NULL)
+			{
+				printf("\n(%lld Empty )\n", ii);
+				continue;
+			}
+			lluint jj = 0;
+			printf("\n(%lld (Count: %lld) -> ", ii, heap_list->record_count);
+			for (auto current_record : heap_list->record_ptr)
+			{
+				printf("[%lld @ %lld :: (%.4s, %.4s, %.4s)@(%d:%c)] ",
+					   ii, jj, current_record._record[0],
+					   current_record._record[1],
+					   current_record._record[2],
+					   current_record.ovc,
+					   current_record.rel);
+				jj++;
 			}
+			printf(")\n");
+		}
 		// } else {
 		// 	printf("\n(%lld Empty )\n", ii);
 		// }
 	}
 }
 
-vector<llint> Tree::get_empty_leaves() {
+vector<llint> Tree::get_empty_leaves()
+{
 	vector<llint> empty_leaf_idx_list;
 	llint first_leaf_idx = pow(2, this->tree_depth) - 1;
-	for (lluint ii = first_leaf_idx; ii < this->total_nodes; ii++) {
+	for (lluint ii = first_leaf_idx; ii < this->total_nodes; ii++)
+	{
 		if ((this->heap[ii].is_empty) &&
-			(this->heap[ii].list->record_ptr.empty())) {
-				empty_leaf_idx_list.push_back(ii);
+			(this->heap[ii].list->record_ptr.empty()))
+		{
+			empty_leaf_idx_list.push_back(ii);
 		}
 	}
 	return empty_leaf_idx_list;
 }
 
 /*
-* Add new records at a leaf node (only if the existing list is exhausted)
-*/
-llint Tree::add_run_at_leaf(llint leaf_node_index, DataRecord *record_list, llint record_ct) {
+ * Add new records at a leaf node (only if the existing list is exhausted)
+ */
+llint Tree::add_run_at_leaf(llint leaf_node_index, DataRecord *record_list, llint record_ct)
+{
 	this->heap[leaf_node_index].is_empty = false;
-	if (this->heap[leaf_node_index].list == NULL) {
+	if (this->heap[leaf_node_index].list == NULL)
+	{
 		this->heap[leaf_node_index].list = new RecordList;
 	}
-	for (llint ii = 0 ; ii < record_ct; ii++) {
+	for (llint ii = 0; ii < record_ct; ii++)
+	{
 		this->heap[leaf_node_index].list->record_ptr.push_back(*record_list);
 		record_list++;
 	}
@@ -410,31 +499,35 @@ llint Tree::add_run_at_leaf(llint leaf_node_index, DataRecord *record_list, llin
 	return 0;
 }
 
-void Tree::spillover_run() {
-	for (auto a: this->generated_run) {
+void Tree::spillover_run()
+{
+	for (auto a : this->generated_run)
+	{
 		delete a;
 	}
 	this->generated_run.clear();
 }
 
 /*
-* Prints a sorted run
-*/
-void Tree::print_run() {
-	for (auto a: this->generated_run) {
+ * Prints a sorted run
+ */
+void Tree::print_run()
+{
+	for (auto a : this->generated_run)
+	{
 		a->print();
 	}
 	return;
 }
 
-vector<DataRecord*> Tree::get_generated_run()
+vector<DataRecord *> Tree::get_generated_run()
 {
 	return this->generated_run;
 }
 
-Tree::~Tree ()
+Tree::~Tree()
 {
-	delete [] this->heap;
+	delete[] this->heap;
 
 	TRACE(ENABLE_TRACE);
 }
diff --git a/External-Sort/Tree.h b/External-Sort/Tree.h
index 563269a..abb6e40 100755
--- a/External-Sort/Tree.h
+++ b/External-Sort/Tree.h
@@ -53,7 +53,7 @@ class Tree
 
 		// Tournament tree functions
 		void run_tree();
-		void compare_and_swap(llint parent, llint unused_leaf_idx);
+		void run_tournament(llint parent, llint unused_leaf_idx);
 		vector<llint> get_empty_leaves();
 		llint add_run_at_leaf(llint leaf_node_idx, DataRecord *sorted_run, llint number_of_records);
 		void spillover_run();
diff --git a/README.md b/README.md
index 960023f..58e2596 100644
--- a/README.md
+++ b/README.md
@@ -9,20 +9,20 @@ External Sorting algorithm for Databases having constrained storage hierarchy
 
 # Techniques Implemented by our submission and the corresponding Source Files and Lines
 
-- Tournament trees [5]
-- Offset-value coding [5]
+- Tournament trees [5]: File Tree.cpp @ Line 196
+- Offset-value coding [5]: File DataRecord.cpp @ Line 122
 - Minimum count of row & column comparisons [5]
-- Cache-size mini runs [5]
-- Device-optimized page sizes [5]
-- Spilling memory-to-SSD [5]
-- Spilling from SSD to disk [5]
-- Graceful degradation
-  - Into merging [5]
+- Cache-size mini runs [5]: File SortRecords.cpp @ Line 26
+- Device-optimized page sizes [5]: File SortRecords.cpp @ Line 81 and Line 136 
+- Spilling memory-to-SSD [5]: File SortRecords.cpp @ Line 65
+- Spilling from SSD to disk [5]: File SortRecords.cpp @ Line 69 and Line 125
+- Graceful degradation: File SortRecords.cpp @ Line 72, Line 74 and Line 151
+  - Into merging [5] 
   - Beyond one merge step [5]
-- Optimized merge patterns [5]
-- Verifying
-  - sets of rows & values [5]
-  - sort order [5]
+- Optimized merge patterns [5]: File SortRecords.cpp @ Line 150 and Line 151
+- Verifying: File Iterator.cpp @ Line 69 and Line 84
+  - sets of rows & values [5]: File Iterator.cpp @ Line 84
+  - sort order [5]: File Iterator.cpp @ Line 69
 
 - Replacement selection?
 - Run size > memory size?
@@ -32,9 +32,45 @@ External Sorting algorithm for Databases having constrained storage hierarchy
 - Quicksort
 
 
-# Reasons we chose to implement the specific subset of techniques 
-# Project's state(complete or have what kinds of bugs) 
+# Reasons we chose to implement the specific subset of techniques
+- `Tournament-tree priority queue` was used in order to achieve `high fan-in` for merging our sorted run inputs of records
+- `Offset-value coding` was used to achieve `minimum column value comparisons`
+- `Cache-size mini runs` were used to be able to fit the sort inputs, for tournament-tree, in the cache. This enabled us to leverage the low-latency accesses when there are `cache hits`
+- `Device-optimized page sizes` were used in order to being cognizant about the `access-profile(latency, bandwidth)` of various devices in the storage hierarchy. For `SSD`, we used `8KB(100 MB/s * 0.1 ms ~ 10KB)` and for `HDD`, we used `1MB(100 MB/s * 10 ms ~ 1MB)`
+- We achieved graceful-degradation by spilling `cache-size runs from cache to memory`, `spilling memory-size runs from memory to SSD` and `spilling SSD-size runs from SSD to HDD`
+- Also `HDD-page size(1MB)` sorted runs were written to `SSD` prior to actually merging runs on the `HDD`. This is to leverage low-latency accesses of flash drives(SSD)
+- `Sort-order`, `set of rows` and their `values` were verified as part of sorting the input records. This is to verify the `correctness` and `integrity` of our sort algorihthm
+
+
+# Project's state
+- The implementation of the `External-Sort` is complete with all of the techniques which were expected from us as part of the course project
+- The sort was tested against `1KB` size records and with `12M` number of records(although it takes ~1hr to complete the sort, for this particular test-case)
+- The sort algorithm was tested against `valgrind` to check for any memory leaks introduced while developing. The codebase does not have any memory leaks, from the latest leak-report on the most recent code version
+
 # How to run our programs
+- To run our program, first compile the source code using following command, under `External-Sort` directory
+```
+$ cd External-Sort
+$ make ExternalSort.exe
+```
+- After compiling the source code, to execute the External Sort with custom arguments, run following command inside `External-Sort` directory
+```
+# Where,
+# "-c" gives the total number of records
+# "-s" is the individual record size
+# "-o" is the trace of your program run
+$ ./ExternalSort.exe -c 120 -s 10 -o trace0.txt
+```
+
+- The program creates three directories on the completion of the sort algorithm:
+  - `input`: This directory consist of the input table which has records generated by the random-generator in arbitrary order
+  - `output`: This directory consist of the output table which has records from input table but in a sorted order, sorted using our sort algorithm
+  - `trace`: This directory consists of trace files generated from the sort. The trace file consists of logs related SSD and HDD device accesses. And the logs related to sort state machine
+
+- In order to remove all the generated binaries, executables, and the utility directories mentioned above, run the following command
+```
+$ make clean
+```
 
 # Initial Setup
 ```
diff --git a/TODO b/TODO
index 6376d9b..e69de29 100644
--- a/TODO
+++ b/TODO
@@ -1,4 +0,0 @@
-- Changes in pick(Sahil)
-- Fix sort order for large records, probable problem with OVC(Sahil)
-- Add README documentation as mentioned in the canvas(Divy)
-- Sort is crashing for larger runs, might be because of memory starvation due to a leak(Divy)
\ No newline at end of file

From 4642283e70947f4d2c9e99a8577ad0ba64bcbb4c Mon Sep 17 00:00:00 2001
From: Divy Patel <divy9881@gmail.com>
Date: Wed, 6 Dec 2023 15:12:54 -0600
Subject: [PATCH 3/5] feat: add ExternalSort project documentation adhereing to
 canvas course project instructions

Signed-off-by: Divy Patel <divy9881@gmail.com>
---
 README.md | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 58e2596..6bcde83 100644
--- a/README.md
+++ b/README.md
@@ -9,20 +9,20 @@ External Sorting algorithm for Databases having constrained storage hierarchy
 
 # Techniques Implemented by our submission and the corresponding Source Files and Lines
 
-- Tournament trees [5]: File Tree.cpp @ Line 196
-- Offset-value coding [5]: File DataRecord.cpp @ Line 122
-- Minimum count of row & column comparisons [5]
-- Cache-size mini runs [5]: File SortRecords.cpp @ Line 26
-- Device-optimized page sizes [5]: File SortRecords.cpp @ Line 81 and Line 136 
-- Spilling memory-to-SSD [5]: File SortRecords.cpp @ Line 65
-- Spilling from SSD to disk [5]: File SortRecords.cpp @ Line 69 and Line 125
-- Graceful degradation: File SortRecords.cpp @ Line 72, Line 74 and Line 151
-  - Into merging [5] 
-  - Beyond one merge step [5]
-- Optimized merge patterns [5]: File SortRecords.cpp @ Line 150 and Line 151
-- Verifying: File Iterator.cpp @ Line 69 and Line 84
-  - sets of rows & values [5]: File Iterator.cpp @ Line 84
-  - sort order [5]: File Iterator.cpp @ Line 69
+- **Tournament trees**: `File Tree.cpp @ Line 196`
+- **Offset-value coding**: `File DataRecord.cpp @ Line 122`
+- **Minimum count of row & column comparisons**
+- **Cache-size mini runs**: `File SortRecords.cpp @ Line 26`
+- **Device-optimized page sizes**: `File SortRecords.cpp @ Line 81 and Line 136`
+- **Spilling memory-to-SSD**: `File SortRecords.cpp @ Line 65`
+- **Spilling from SSD to disk**: `File SortRecords.cpp @ Line 69 and Line 125`
+- **Graceful degradation**: `File SortRecords.cpp @ Line 72, Line 74 and Line 151`
+  - **Into merging** 
+  - **Beyond one merge step**
+- **Optimized merge patterns**: `File SortRecords.cpp @ Line 150 and Line 151`
+- **Verifying**: `File Iterator.cpp @ Line 69 and Line 84`
+  - **sets of rows & values**: `File Iterator.cpp @ Line 84`
+  - **sort order**: `File Iterator.cpp @ Line 69`
 
 - Replacement selection?
 - Run size > memory size?

From 01aa5f83d00824956e557d0454a1f66c34bf8afc Mon Sep 17 00:00:00 2001
From: Divy Patel <divy9881@gmail.com>
Date: Wed, 6 Dec 2023 16:26:40 -0600
Subject: [PATCH 4/5] feat: add ExternalSort project documentation adhereing to
 canvas course project instructions

Signed-off-by: Divy Patel <divy9881@gmail.com>
---
 README.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6bcde83..2b99daf 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,12 @@ External Sorting algorithm for Databases having constrained storage hierarchy
 - Devaki Kulkarni (9086222321) dgkulkarni2@wisc.edu
 - Manaswini Gogineni (9085432699) mgogineni@wisc.edu 
 
+# Individual Contributions
+__Divy__: Cache-size mini runs, Device-optimized page sizes, Spilling memory-to-SSD, Spilling from SSD to disk, Graceful degradation, Optimized merge patterns, Testing and Memory Leak Check
+__Sahil__: Tournament trees, Offset-value coding, Minimum count of row & column comparisons, Optimized merge patterns, Large-size records, Testing and Memory Leak Check
+__Devaki__: Tournament trees, Offset-value coding, Large-size records
+__Manaswini__: Verification
+
 # Techniques Implemented by our submission and the corresponding Source Files and Lines
 
 - **Tournament trees**: `File Tree.cpp @ Line 196`
@@ -33,7 +39,7 @@ External Sorting algorithm for Databases having constrained storage hierarchy
 
 
 # Reasons we chose to implement the specific subset of techniques
-- `Tournament-tree priority queue` was used in order to achieve `high fan-in` for merging our sorted run inputs of records
+- `Tournament-tree priority queue` was used in order to achieve `high fan-in` for merging our sorted run inputs of records and less number of comparisons than a standard tree-of-winners
 - `Offset-value coding` was used to achieve `minimum column value comparisons`
 - `Cache-size mini runs` were used to be able to fit the sort inputs, for tournament-tree, in the cache. This enabled us to leverage the low-latency accesses when there are `cache hits`
 - `Device-optimized page sizes` were used in order to being cognizant about the `access-profile(latency, bandwidth)` of various devices in the storage hierarchy. For `SSD`, we used `8KB(100 MB/s * 0.1 ms ~ 10KB)` and for `HDD`, we used `1MB(100 MB/s * 10 ms ~ 1MB)`

From 206bb0ad3a517998cbe67ac69f87faba8257d963 Mon Sep 17 00:00:00 2001
From: Divy Patel <divy9881@gmail.com>
Date: Wed, 6 Dec 2023 16:27:34 -0600
Subject: [PATCH 5/5] feat: add ExternalSort project documentation adhereing to
 canvas course project instructions

Signed-off-by: Divy Patel <divy9881@gmail.com>
---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 2b99daf..b7be76a 100644
--- a/README.md
+++ b/README.md
@@ -8,10 +8,10 @@ External Sorting algorithm for Databases having constrained storage hierarchy
 - Manaswini Gogineni (9085432699) mgogineni@wisc.edu 
 
 # Individual Contributions
-__Divy__: Cache-size mini runs, Device-optimized page sizes, Spilling memory-to-SSD, Spilling from SSD to disk, Graceful degradation, Optimized merge patterns, Testing and Memory Leak Check
-__Sahil__: Tournament trees, Offset-value coding, Minimum count of row & column comparisons, Optimized merge patterns, Large-size records, Testing and Memory Leak Check
-__Devaki__: Tournament trees, Offset-value coding, Large-size records
-__Manaswini__: Verification
+- __Divy__: Cache-size mini runs, Device-optimized page sizes, Spilling memory-to-SSD, Spilling from SSD to disk, Graceful degradation, Optimized merge patterns, Testing and Memory Leak Check
+- __Sahil__: Tournament trees, Offset-value coding, Minimum count of row & column comparisons, Optimized merge patterns, Large-size records, Testing and Memory Leak Check
+- __Devaki__: Tournament trees, Offset-value coding, Large-size records
+- __Manaswini__: Verification
 
 # Techniques Implemented by our submission and the corresponding Source Files and Lines