fix: cleaning up bib and minor typos

Signed-off-by: Matej Focko <mfocko@redhat.com>
This commit is contained in:
Matej Focko 2022-05-18 11:32:26 +02:00
parent 9a32d10615
commit 0afce998e0
Signed by: mfocko
GPG key ID: 7C47D46246790496
8 changed files with 99 additions and 206 deletions

View file

@ -1,4 +1,4 @@
COUNT:=$(shell texcount fi-pdflatex.tex -char -inc | grep "Letters in text:" | tail -n1 | cut -f2- -d:)
COUNT:=$(shell texcount thesis.tex -char -inc | grep "Letters in text:" | tail -n1 | cut -f2- -d:)
count:
@echo Does not count spaces! The number is therefore lower than actual number of standard pages.

View file

@ -1,7 +1,17 @@
% The example bibliographical entries below were borrowed from the
% <https://www.ctan.org/pkg/biblatex-iso690> package documentation.
@article{wavl,
@ARTICLE{avl,
author = {Adelson-Velskij, Georgij and Landis, Evgenij},
year = {1962},
month = {01},
pages = {263-266},
title = {An algorithm for the organization of information},
volume = {146},
journal = {Doklady Akad. Nauk SSSR}
}
@ARTICLE{wavl,
author = {Haeupler, Bernhard and Sen, Siddhartha and Tarjan, Robert E.},
title = {Rank-Balanced Trees},
year = {2015},
@ -21,7 +31,7 @@
keywords = {data structures, exponential potential function, red-black trees, AVL trees, search trees, Balanced binary trees, amortized complexity}
}
@article{ravl,
@ARTICLE{ravl,
author = {Sen, Siddhartha and Tarjan, Robert E. and Kim, David Hong Kyun},
title = {Deletion Without Rebalancing in Binary Search Trees},
year = {2016},
@ -41,6 +51,14 @@
keywords = {Balanced trees, exponential potential function, amortized complexity, algorithm, data structure, database access methods}
}
@ARTICLE{hypothesis,
author = {MacIver, David R. and Hatfield-Dodds, Zac and {many other contributors}},
doi = {10.21105/joss.01891},
month = {11},
title = {{Hypothesis: A new approach to property-based testing}},
year = {2019}
}
@INPROCEEDINGS{rbtree,
author={Guibas, Leo J. and Sedgewick, Robert},
booktitle={19th Annual Symposium on Foundations of Computer Science (sfcs 1978)},
@ -52,8 +70,20 @@
doi={10.1109/SFCS.1978.3}
}
@inproceedings{btree,
author = {Bayer, R. and McCreight, E.},
@ARTICLE{adams_1993,
title={Functional Pearls Efficient sets—a balancing act},
volume={3},
DOI={10.1017/S0956796800000885},
number={4},
journal={Journal of Functional Programming},
publisher={Cambridge University Press},
author={Adams, Stephen},
year={1993},
pages={553561}
}
@INPROCEEDINGS{btree,
author = {Bayer, Rudolf and McCreight, Edward},
title = {Organization and Maintenance of Large Ordered Indices},
year = {1970},
isbn = {9781450379410},
@ -70,16 +100,16 @@
series = {SIGFIDET '70}
}
@book{knuth1998art,
@BOOK{knuth1998art,
title={The Art of Computer Programming: Volume 3: Sorting and Searching},
author={Knuth, D.E.},
author={Knuth, Donald E.},
isbn={9780321635785},
url={https://books.google.cz/books?id=cYULBAAAQBAJ},
year={1998},
publisher={Pearson Education}
}
@Book{cormen2009introduction,
@BOOK{cormen2009introduction,
author = {Cormen, Thomas},
title = {Introduction to algorithms},
publisher = {MIT Press},
@ -88,7 +118,7 @@
isbn = {9780262033848}
}
@Book{labyrint,
@BOOK{labyrint,
author = {Mareš, Martin and Valla, Tomáš},
title = {Průvodce labyrintem algoritmů},
publisher = {CZ.NIC, z.s.p.o.},
@ -97,14 +127,6 @@
url = {http://pruvodce.ucw.cz}
}
@article{hypothesis,
author = {MacIver, David R. and Hatfield-Dodds, Zac and {many other contributors}},
doi = {10.21105/joss.01891},
month = {11},
title = {{Hypothesis: A new approach to property-based testing}},
year = {2019}
}
@ONLINE{ib002,
title = {Algoritmy a datové struktury I},
url = {https://is.muni.cz/predmet/fi/jaro2022/IB002},
@ -121,137 +143,10 @@
langid = {czech}
}
@article{adams_1993,
title={Functional Pearls Efficient sets—a balancing act},
volume={3},
DOI={10.1017/S0956796800000885},
number={4},
journal={Journal of Functional Programming},
publisher={Cambridge University Press},
author={Adams, Stephen},
year={1993},
pages={553561}
}
%% EXAMPLES FOLLOW %%
% BORGMAN, Christine L., 2003. From Gutenberg to the global information
% infrastructure: access to information in the networked world. First. Cambridge
% (Mass): The MIT Press. ISBN 0-262-52345-0 [xviii, 324]
@BOOK{borgman03,
author = {Borgman, Christine L.},
date = {2003},
title = {From Gutenberg to the global information infrastructure},
subtitle = {access to information in the networked world},
edition = {1}, % should not be stated
location = {Cambridge (Mass)},
publisher = {The MIT Press},
pagetotal = {xviii, 324},
isbn = {0-262-52345-0},
langid = {english}
}
% GREENBERG, David, 1998. Camel drivers and gatecrashers: quality control in
% the digital research library. In: HAWKINS, B.L et al. (eds.). The mirage of
% continuity: reconfiguring academic information resources for the 21st
% century. Washington (D.C.): Council on Library and Information Resources;
% Association of American Universities, pp. 105116
@INCOLLECTION{greenberg98,
crossref = {mirage98},
author = {Greenberg, David},
title = {Camel drivers and gatecrashers},
subtitle = {quality control in the digital research library},
pages = {105-116},
}
@COLLECTION{mirage98,
title = {The mirage of continuity},
subtitle = {reconfiguring academic information
resources for the 21st century},
editor = {Hawkins, B.L and Battin, P},
date = {1998},
location = {Washington (D.C.)},
publisher = {Council on Library and Information Resources;
Association of American Universities},
langid = {english}
}
% LYNCH, C., 2005. Where do we go from here?: the next decade for digital
% libraries. DLib Magazine [online]. Vol. 11, no. 7/8 [visited on 2005-08-15].
% ISSN 1082-9873. Available from:
% http://www.dlib.org/dlib/july05/lynch/07lynch.html
@ARTICLE{lynch05,
crossref = {dlib},
author = {Lynch, C.},
title = {Where do we go from here?},
subtitle = {the next decade for digital libraries},
url = {http://www.dlib.org/dlib/july05/lynch/07lynch.html},
urldate = {2005-08-15}
}
@PERIODICAL{dlib,
journaltitle = {DLib Magazine},
date = {2005},
volume = {11},
number = {7/8},
issn = {1082-9873},
langid = {english},
}
% HÀN, Thé Thành, 2001. Micro-typographic extensions to the TEX type- setting
% system [online]. Brno [visited on 2016-12-09]. Available from:
% http://www.pragma-ade.nl/pdftex/thesis.pdf. PhD thesis. The Faculty of
% Informatics, Masaryk University.
@PHDTHESIS{thanh01,
author = {Hàn Thé, Thành},
title = {Micro-typographic extensions to the \TeX{} typesetting system},
date = {2001},
institution = {The Faculty of Informatics, Masaryk University},
location = {Brno},
url = {http://www.pragma-ade.nl/pdftex/thesis.pdf},
urldate = {2016-12-09},
langid = {english}
}
% SHANNON, Claude Elwood, 1940. A symbolic analysis of relay and switching
% circuits. Available from DOI: 1721.1/11173. Masters thesis. Massachusetts
% Institute of Technology.
@MASTERSTHESIS{shannon40,
author = {Shannon, Claude Elwood},
title = {A symbolic analysis of relay and switching circuits},
date = {1940},
institution = {Massachusetts Institute of Technology},
langid = {english},
doi = {1721.1/11173},
}
% EHLINGER JR., Philip Charles. Device for the treatment of hiccups. US.
% US Patent, 7062320.
@PATENT{ehlinger06,
author = {Ehlinger Jr., Philip Charles},
title = {Device for the treatment of hiccups},
type = {US Patent},
location = {US},
date = {2006},
number = {7062320},
langid = {english}
}
% Masaryk University, 19962009 [online]. Brno: Masaryk University
% [visited on 2016-12-09]. Available from: https://www.muni.cz/en.
@ONLINE{muni,
publisher = {Masaryk University},
title = {Masaryk University},
date = {1996/2009},
url = {https://www.muni.cz/en},
urldate = {2016-12-09},
location = {Brno},
langid = {english}
@ONLINE{llrb,
author = {Sedgewick, Robert},
title = {Left-leaning Red-Black Trees},
year = {2008},
url = {https://sedgewick.io/wp-content/uploads/2022/03/2008-09LLRB.pdf},
urldate = {2022-05-10}
}

View file

@ -1,34 +1,31 @@
\chapter{Implementation}
For the implementation of rank-balanced trees we have used two programming languages: C\# and Python. C\# implementation has not been finished and therefore is not part of the submitted attachments. However it has given a valuable insight into the role of preconditions and invariants in algorithms while \texttt{null}-checking is enforced, since, for example, static type control cannot be aware of a node \textbf{not} being a \texttt{null} after checking specific set of conditions that forbid such scenario.
For the implementation of rank-balanced trees we have used two programming languages: C\# and Python. C\# implementation has not been finished and therefore is not part of the submitted attachments. However it has given a valuable insight into the role of preconditions and invariants in algorithms when \texttt{null}-checking is enforced, for example static type control cannot be aware of a node \textbf{not} being a \texttt{null} after checking specific set of conditions that forbid such scenario.
Python has been chosen as the \textit{go-to} language, since it is used for teaching foundations of programming~\cite{ib111} and also introductory course of algorithms and data structures~\cite{ib002} at our faculty.
We have started by implementing a general idea of a rank-balanced tree. Rank-balanced tree is an abstract class that \textbf{does not} implement methods specific to different kinds of trees such as
Python has been chosen as the \textit{go-to} language, since it is used for teaching foundations of programming~\cite{ib111} and also introductory course to algorithms and data structures~\cite{ib002} at our faculty.
We have started by implementing a general idea of a rank-balanced tree. Rank-balanced tree is an abstract class that \textbf{does not} implement methods specific to different kinds of trees such as:
\begin{enumerate}
\item \texttt{is\_correct\_node} is used to check whether node and its subtrees satisfy rank rules
\item \texttt{\_insert\_rebalance} rebalances the tree from the given node after insertion
\item \texttt{\_delete\_rebalance} rebalances the tree from the given nodes (deleted node and parent) after deletion
\item \texttt{is\_correct\_node} that is used to check whether node and its subtrees satisfy rank rules.
\item \texttt{\_insert\_rebalance} that rebalances the tree from the given node after insertion.
\item \texttt{\_delete\_rebalance} that rebalances the tree from the given nodes (deleted node and parent) after deletion.
\end{enumerate}
Apart from the abstract methods there is provided following interface that is either shared by the specific trees or used by them:
Apart from the abstract methods \texttt{RankedTree} provides following interface that is either shared by the specific trees or used by them:
\begin{enumerate}
\item \texttt{\_get\_unwrapped\_graph} that is used to generate DOT format of the tree for purpose of either debugging or sharing
\item \texttt{rank} returns rank of the root of the tree
\item \texttt{is\_correct} calls \texttt{is\_correct\_node} on the root of the tree
\item \texttt{search} is used to look up keys in the tree
\item \texttt{insert} implements generic insertion into the tree followed by a call to tree-specific rebalance function
\item \texttt{delete} is identical to \texttt{insert}, but for the deletion from the tree
\item \texttt{\_get\_unwrapped\_graph} that is used to generate DOT format of the tree for purpose of either debugging or sharing.
\item \texttt{rank}, which returns rank of the root of the tree.
\item \texttt{is\_correct}, which calls \texttt{is\_correct\_node} on the root of the tree.
\item \texttt{search} that is used to look up keys in the tree.
\item \texttt{insert}, which implements generic insertion into the tree followed by a call to tree-specific rebalance function.
\item \texttt{delete}, which is identical to \texttt{insert}, but for the deletion from the tree.
\end{enumerate}
Apart from that we have also implemented class for representation of nodes that provides quite rich interface that is utilized during rebalancing and also in generic methods on generic tree.
\section{Testing and validation}
From the beginning we have employed the techniques of property-based testing (done manually during C\# implementation and uses Hypothesis~\cite{hypothesis} for Python implementation). The elementary requirement for testing and validation of implemented algorithms was a \textbf{correct} \texttt{is\_correct\_node} method that validates properties of a specific rank-balanced tree and a good set of invariants. List of set invariants follows:
From the beginning we have employed the techniques of property-based testing (done manually during the C\# implementation and via Hypothesis~\cite{hypothesis} for the Python implementation). The elementary requirement for testing and validation of the implemented algorithms was a \textbf{correct} \texttt{is\_correct\_node} method that validates properties of a specific rank-balanced tree and a good set of invariants. List of set invariants follows:
\begin{enumerate}
\item for insertion we have set following invariants
\begin{itemize}
@ -44,4 +41,4 @@ From the beginning we have employed the techniques of property-based testing (do
\end{itemize}
\end{enumerate}
We also admit abuse of property-based testing to find a \textit{minimal} sequence of operations when WAVL tree relaxation manifests. For that purpose we have implemented a \textit{comparator} class that takes two different instances of rank-balanced trees and provides rudimentary \texttt{insert} and \texttt{delete} interface enriched with \texttt{are\_same} (evaluates isomorphism and ranks) and \texttt{are\_similar} (evaluates just isomorphism) methods. While trying to find minimal counter-example, we have also discovered a bug in rebalance after deletion of WAVL tree that caused enforcement of the AVL rank rules.
We also admit abuse of property-based testing to find a \textit{minimal} sequence of operations when WAVL tree relaxation manifests. For that purpose we have implemented a \textit{comparator} class that takes two different instances of rank-balanced trees and provides rudimentary \texttt{insert} and \texttt{delete} interface enriched with \texttt{are\_same} (evaluates isomorphism and ranks) and \texttt{are\_similar} (evaluates just isomorphism) methods. While trying to find minimal counter-example, we have also discovered a bug in rebalance after deletion in the WAVL tree that caused enforcement of the AVL rank rule.

View file

@ -1,10 +1,10 @@
\chapter*{Introduction}
\addcontentsline{toc}{chapter}{Introduction}
Data structures have become a regular part of the essential toolbox for problem-solving. In many cases, they also help to improve the existing algorithm's performance, e.g. using a priority queue in Dijkstra's algorithm for the shortest path. This thesis will mainly discuss the implementation of a set (which can also be adjusted to represent a dictionary or map, if you wish).
Data structures have become a regular part of the essential toolbox for problem-solving. In many cases, they also help to improve the existing algorithm's performance, e.g. using a priority queue in Dijkstra's algorithm for the shortest path. This thesis will mainly discuss the implementation of a set.
Currently, the most commonly used implementations of sets use hash tables, but we will talk about another common alternative, implementation via self-balancing search trees. Compared to a hash table, they provide us with \textbf{consistent} time complexity, but at the cost of a requirement for ordering on the elements. The most implemented self-balancing binary tree is a \textit{red-black tree}, as described by Guibas and Sedgewick~\cite{rbtree}. Among other alternatives, we can find (non-binary) \textit{B-tree}~\cite{btree} and \textit{AVL tree}~\cite{knuth1998art}.
Currently, the most commonly used implementations of sets use hash tables, but we will talk about another common alternative, implementation via self-balancing search trees. Compared to a hash table, they provide consistent time complexity, but at the cost of a requirement for ordering on the elements. The most implemented self-balancing binary tree is a \textit{red-black tree}, as described by Guibas and Sedgewick~\cite{rbtree}. Among other alternatives, we can find (non-binary) \textit{B-tree}~\cite{btree} and \textit{AVL tree}~\cite{avl}.
This thesis analyses and visualizes the \textit{Weak AVL (WAVL)}\cite{wavl} tree that has more relaxed conditions than the AVL tree but still provides better balancing than a red-black tree.
This thesis analyses and visualizes the \textit{Weak AVL (WAVL)}\cite{wavl} tree that has more relaxed conditions than the AVL tree, but still provides better balancing than a red-black tree.
We start by reiterating through commonly used search trees, explaining basic ideas behind their self-balancing and bounding the height in the worst-case scenario. Then we state used terminology and explain the rank-balanced tree. Given a rank-balanced tree, we can delve into the details behind the representation of previously shown self-balancing binary search trees using rank and the WAVL rule gives us a new self-balancing binary search tree. For the WAVL, we provide pseudocode and explain operations used for rebalancing, including diagrams. Later on, we will discuss different heuristics that can be used for rebalancing and implementing the visualization of the operations on the WAVL tree.

View file

@ -1,4 +1,4 @@
\chapter{Rank-Balanced Trees}
\chapter{Rank-Balanced Trees}\label{chap:rank-balanced-trees}
In comparison to nodes in binary search trees, nodes in rank-balanced trees contain one more piece of information, and that is \textit{rank}. Each type of tree that can be implemented using this representation, e.g. red-black, 2-3-4, AVL or WAVL, has a specific set of rules that ensure the resulting tree is balanced.
@ -10,22 +10,20 @@ In the text and pseudocode we adopt these functions or properties~\cite{wavl}:
\item function $parent(x)$ or property $x.parent$ returns parent of a node; analogically for the left and right children of a node
\item \textit{rank-difference} of \textit{x} is defined as $r(parent(x)) - r(x)$
\item $x$ is an \textit{i-child} if its rank-difference is $i$
\item $x$ is an $(i, j)$ node if its left and right children have $i$ and $j$ rank-differences respectively;
ordering of the children does not matter
\item $x$ is an $(i, j)$-node if its left and right children have $i$ and $j$ rank-differences respectively; ordering of the children does not matter
\end{itemize}
\section{Rules for the other trees}
\section{Rules for other trees}
As we have mentioned above, it is possible to implement different kinds of self-balancing binary search trees via different rules for ranks.
As we have mentioned at the beginning of \hyperref[chap:rank-balanced-trees]{this chapter}, it is possible to implement different kinds of self-balancing binary search trees via different rules for ranks.
\subsection{AVL tree}\label{chap:avl-rule}
\textbf{AVL Rule}: Every node is (1, 1) or (1, 2).~\cite{wavl}
In case of the AVL trees rank represents height. Here we can notice a very smart way of using the \textit{(i, j) node} definition. If we go back to the definition and want to be explicit about the nodes that are allowed with the \textit{AVL Rule}, then we get (1, 1), (1, 2) \textbf{or} (2, 1) nodes. However it is possible to find implementations of the AVL tree that allow leaning \textbf{to only one side} as opposed to the original requirements given by Adelson-Velsky and Landis. Forbidding interchangeability of (i, j) with (j, i) nodes would still yield AVL trees that lean to one side.
In case of the AVL tree, rank represents height. Here we can notice a very smart way of using the \textit{(i, j)-node} definition. If we go back to the definition and want to be explicit about the nodes that are allowed with the \textit{AVL Rule}, then we get (1, 1), (1, 2) \textbf{or} (2, 1) nodes. However it is possible to find implementations of the AVL tree that allow leaning \textbf{to only one side} as opposed to the original requirements given by \textit{Adelson-Velsky and Landis}~\cite{avl}. Forbidding interchangeability of (i, j) with (j, i)-nodes would still yield AVL trees that lean to one side.
Meaning of the \textit{AVL Rule} is quite simple, since rank represents height in that case. We can draw analogies using the notation used for the AVL trees, where we mark nodes with a trit (or a sign) or use a balance-factor. We have two cases to discuss:
\begin{itemize}
\item \textbf{(1, 1) node} represents a tree where both of its subtrees have the same height. In this case we are talking about the nodes with balance-factor $0$ (respectively being signed with a $0$).
\item \textbf{(1, 2) node} represents a tree where one of its subtrees has a bigger height. In this case we are talking about the nodes with balance-factor $-1$ or $1$ (respectively being signed with a $-$ or a $+$).
@ -69,7 +67,7 @@ Example of the AVL tree that uses ranks instead of signs or balance-factors can
\draw (258.6bp,61.5bp) node {1};
%
\end{tikzpicture}
\caption{Example of the AVL tree using ranks}
\caption{Example of the AVL tree using ranks.}
\label{fig:ranked:avl}
\end{figure}
@ -79,8 +77,7 @@ Example of the AVL tree that uses ranks instead of signs or balance-factors can
In case of red-black trees, rank represents number of black nodes on a path from the node to a leaf (excluding the node itself). Based on that we can discuss the \textit{Red-Black Rule} in detail:
\begin{enumerate}
\item \textit{All rank differences are 0 or 1} inductively enforces monotonically increasing (at most by 1) count of black nodes from the leaves. \\
In detail:
\item \textit{All rank differences are 0 or 1} inductively enforces monotonically non-decreasing (at most by 1) count of black nodes from the leaves. In detail:
\begin{enumerate}
\item In case the \textbf{current node is black}, the rank difference must be 1, since we have one more black node on the path from the parent to the leaves than from the current node.
\item In case the \textbf{current node is red}, the rank difference must be 0, since from the parent the count of black nodes on the path to leaves has not changed.
@ -89,7 +86,7 @@ In case of red-black trees, rank represents number of black nodes on a path from
\item \textit{No parent of a 0-child is a 0-child} ensures that there are no two consecutive red nodes, since 0-child node is equivalent to the red node.
\end{enumerate}
Example of the red-black tree that uses ranks instead of colors can be seen in \autoref{fig:ranked:rbt}, red nodes are also colored for convenience.
Example of the red-black tree that uses ranks instead of colors can be seen in \autoref{fig:ranked:rbt}, red nodes are also colored for the convenience.
Majority of the red-black tree implementations color nodes of the tree, following that notation and \textbf{precise} definition of the red-black tree it is quite common to ask the following questions:
@ -100,7 +97,7 @@ Majority of the red-black tree implementations color nodes of the tree, followin
If we do not count external nodes themselves, we decrease the count of black nodes on every path to the external nodes by $1$.
\end{enumerate}
Overall they do not really matter as long as they are used consistently, since they affect the counts globally.
Overall they do not really matter, as long as they are used consistently, since they affect the counts globally.
However it is also possible to color edges instead of the nodes as is presented in \textit{Průvodce labyrintem algoritmů} by \textit{Mareš and Valla}.~\cite{labyrint} In this representation color of the edge represents color of the child node. This representation is much more „natural“ for the representation using rank as it can be seen in \autoref{fig:ranked:rbt}, where edges connecting nodes with rank-difference $1$ represent \textit{black edges} and edges connecting nodes with rank-difference $0$ represent \textit{red edges}. It is also apparent that using this representation root of the tree does not hold any color anymore.
@ -142,7 +139,7 @@ However it is also possible to color edges instead of the nodes as is presented
\draw (306.6bp,61.5bp) node {0};
%
\end{tikzpicture}
\caption{Example of the red-black tree using ranks}
\caption{Example of the red-black tree using ranks.}
\label{fig:ranked:rbt}
\end{figure}
@ -150,26 +147,26 @@ However it is also possible to color edges instead of the nodes as is presented
To show that using rank is mostly an implementation detail, we will describe an implementation of the AVL tree using rank.
Implementation of the insertion is trivial, since it is described by \textit{Haeupler et al.} and is used in the WAVL tree. All we need to implement is the deletion from the AVL tree. We will start by short description of the deletion rebalance as given by \textit{Mareš and Valla} in \textit{Průvodce labyrintem algoritmů}.
Implementation of the insertion is trivial, since it is described by \textit{Haeupler et al.}~\cite{wavl} and is used in the WAVL tree. All we need to implement is the deletion from the AVL tree. We will start with a short description of the deletion rebalance as given by \textit{Mareš and Valla} in \textit{Průvodce labyrintem algoritmů}.
When propagating the error, we can encounter 3 cases (we explain them with respect to propagating deletion from the left subtree, propagation from right is mirrored and role of trits $+$ and $-$ swaps)~\cite{labyrint}:
\begin{enumerate}
\item \textit{Node was marked with $-$.} In this case, heights of left and right subtrees are equal now and node is marked with $0$, but propagation must be continued, since the height of the whole subtree has changed.\label{avl:rules:delete:1}
\item \textit{Node was marked with $0$.} In this case, node is marked with $+$ and the height of the subrtree has not changed, therefore we can stop the propagation.\label{avl:rules:delete:2}
\item \textit{Node was marked with $+$.} In this case, node would acquire balance-factor of $+2$, which is not allowed. In this situation we decide based on the mark of the node from which we are propagating the insertion in the following way (let $x$ the current node marked with $+$ and $y$ be the right child of $x$):\label{avl:rules:delete:3}
\item \textit{Node was marked with $0$.} In this case, node is marked with $+$ and the height of the subtree has not changed, therefore we can stop the propagation.\label{avl:rules:delete:2}
\item \textit{Node was marked with $+$.} In this case, node would acquire balance-factor of $+2$, which is not allowed. In this situation we decide based on the mark of the node from which we are propagating the deletion in the following way (let $x$ the current node marked with $+$ and $y$ be the right child of $x$):\label{avl:rules:delete:3}
\begin{enumerate}
\item $y$ is marked with $+$, then we rotate by $x$ to the left. After that both $x$ and $y$ can be marked with $0$. Height from the point of the parent has changed, so we continue the propagation.\label{avl:rules:delete:3a}
\item $y$ is marked with $0$, then we rotate by $x$ to the left. After the rotation, $x$ can be marked with $+$ and $y$ with $-$. Height of the subtree has not changed, so propagation can be stopped.\label{avl:rules:delete:3b}
\item If $y$ is marked with $+$, then we rotate by $x$ to the left. After that both $x$ and $y$ can be marked with $0$. Height from the point of the parent has changed, so we continue the propagation.\label{avl:rules:delete:3a}
\item If $y$ is marked with $0$, then we rotate by $x$ to the left. After the rotation, $x$ can be marked with $+$ and $y$ with $-$. Height of the subtree has not changed, so propagation can be stopped.\label{avl:rules:delete:3b}
\item $y$ is marked with $-$. Let $z$ be the left son of $y$. We double rotate: first by $z$ to the right and then by $x$ to the left. After the double-rotation $x$ can be marked by either $0$ or $-$, $y$ by $0$ or $+$ and $z$ gets $0$. Height of the subtree has changed, therefore we must propagate further.\label{avl:rules:delete:3c}
\end{enumerate}
\end{enumerate}\label{avl:rules:delete}
We have implemented the deletion rebalance by implementing following functions:
Having knowledge about rules we have implemented the deletion rebalance by implementing the following functions:
\begin{enumerate}
\item \avlDeleteRebalance{} that handles updating the current node and its parent and iteratively calls subroutine handling previously described \textit{one step of a rebalancing}
\item \avlDeleteFixNode{} that handles one adjustment of rebalancing as described above
\item \avlDeleteRotate{} that handles rotation and updating of ranks, if necessary
\item \avlDeleteRebalance{} that handles updating the current node and its parent and iteratively calls subroutine handling previously described \textit{one step of a rebalancing}.
\item \avlDeleteFixNode{} that handles one adjustment of rebalancing as described above.
\item \avlDeleteRotate{} that handles rotation and updating of ranks, if necessary.
\end{enumerate}
\begin{algorithm}
@ -194,7 +191,7 @@ We have implemented the deletion rebalance by implementing following functions:
\caption{\texttt{deleteRebalance} algorithm for the AVL tree}\label{algorithm:avl:deleteRebalance}
\end{algorithm}
\texttt{deleteRebalance}, as can be seen in \autoref{algorithm:avl:deleteRebalance}, is quite straightforward. At the beginning we early return in case there is nothing to be rebalanced, which happens when deleting the last node from the tree. Then we handle case when we are given only parent by correctly setting $y$ and $parent$. Following up on that, as long as we have a node to be checked, we call \autoref{algorithm:avl:deleteFixNode} to fix balancing of the current node. Algorithm for fixing node returns $true$ or $false$ depending on the need to propagate the height change further, which is utilized in the condition of the \texttt{while} loop.
\texttt{deleteRebalance}, as can be seen in \autoref{algorithm:avl:deleteRebalance}, is quite straightforward. At the beginning we early return in case there is nothing to be rebalanced, which happens when deleting the last node from the tree. Then we handle a case where we are given only parent by correctly setting $y$ and $parent$. Following up on that, as long as we have a node to be checked, we call \autoref{algorithm:avl:deleteFixNode} to fix balancing of the current node. Algorithm for fixing node returns $true$ or $false$ depending on the need to propagate the height change further, which is utilized in the condition of the \texttt{while} loop.
\begin{algorithm}
\Proc{$\texttt{deleteFixNode}(T, x, parent)$}{
@ -249,6 +246,6 @@ There are two operations that are not described using helper functions and they
\newpage
\texttt{deleteRotate} is handling only fixes where the rotations are required. Both \autoref{algorithm:avl:deleteFixNode} and \autoref{algorithm:avl:deleteRotate} include comments to highlight which rules are handled. This function is also done generically regardless of the subtree from which the height change is being propagated. This is done passing in functions used for rotations (since it is mirrored) and also by passing in the balance-factor required for just one rotation.
\texttt{deleteRotate} is handling only fixes where the rotations are required. Both \autoref{algorithm:avl:deleteFixNode} and \autoref{algorithm:avl:deleteRotate} include comments to highlight which rules are handled. This function is also done generically regardless of the subtree from which the height change is being propagated. This is done by passing in functions used for rotations (since it is mirrored) and also by passing in the balance-factor required for just one rotation.
In both \autoref{algorithm:avl:deleteFixNode} and \autoref{algorithm:avl:deleteRotate} there is a key difference compared to the AVL tree implementations without ranks. Comparing the \hyperref[avl:rules:delete]{rules for deletion} with algorithms for rank-balanced implementation, it is apparent that during propagation of height change, the balance-factors of immediate nodes are already adjusted, since the information comes from either of its subtrees and it is calculated using ranks of its children that are already adjusted. This fact needs to be reflected in the implementation accordingly, since it shifts the meaning of rules as they are described above and written for the implementations that store the trit in the nodes directly, which is updated manually during rebalancing.

View file

@ -4,23 +4,26 @@ This chapter will briefly discuss the properties and fundamental ideas behind th
\section{Red-black trees}
As mentioned previously, red-black trees are among the most popular implementations in standard libraries. As always, we have a binary search tree, and then each node is given \textit{red} or \textit{black} colour. A red-black tree is kept balanced by enforcing the following set of rules~\cite{rbtree}:
As mentioned previously, red-black trees are among the most popular implementations in standard libraries. As always, we have a binary search tree, and each node is given \textit{red} or \textit{black} colour. A red-black tree is kept balanced by enforcing the following set of rules~\cite{rbtree}:
\begin{enumerate}
\item External nodes are black; internal nodes may be red or black.
\item For each internal node, all paths from it to external nodes contain the same number of black nodes.
\item No path from an internal node to an external node contains two red nodes in a row.
\item External nodes do not hold any data.
\item Root has black colour. This rule is optional, since it increases the count of black nodes from root to each of the external nodes. However it may be beneficial during insertion.
\end{enumerate}
Given this knowledge, we can safely deduce the following relation between the height of the red-black tree and nodes stored in it~\cite{cormen2009introduction}:
\[
\log_2{(n + 1)} \leq h \leq 2 \cdot \log_2{(n + 2)} - 2
\]\label{rb-height}
where the lower bound is given by a perfect binary tree and upper bound by the minimal red-black tree.
There are also other variants of the red-black tree that are considered to be simpler for implementation, e.g. left-leaning red-black tree, as described by Sedgewick.
Lower bound is given by a perfect binary tree and the upper bound is given by the minimal red-black tree.
Red-black trees are used to implement sets in C++, Java and C\#.
There are also other variants of the red-black tree that are considered to be simpler for implementation, e.g. left-leaning red-black tree, as described by \textit{Sedgewick}~\cite{llrb}.
Red-black trees are used to implement sets in C++~\cite{llvm}, Java and C\#.
\section{AVL tree}
@ -36,15 +39,15 @@ Then we have an AVL tree, if for every node $n$ in the tree the following holds:
BalanceFactor(n) \in \{ -1, 0, 1 \}
\]
In other words, the heights of left and right subtrees of each node differ at most in 1.
In other words, the heights of left and right subtrees of each node differ at most in 1.~\cite{avl}
Similarly, we will deduce the height of the AVL tree from original paper, by Adelson-Velsky and Landis, we get:
Similarly, we will deduce the height of the AVL tree from original paper, by \textit{Adelson-Velsky and Landis}~\cite{avl}, we get:
\[
\left( \log_2{(n + 1)} \leq \right) h < \log_{\varphi}{(n + 1)} < \frac{3}{2} \cdot \log_2{(n + 1)}
\]\label{avl-height}
If we compare the upper bounds for the height of the red-black trees and AVL trees, we can see that AVL rules are more strict than red-black rules, but at the cost of rebalancing. However, in both cases, the rebalancing still takes $\log_2{n}$.
If we compare the upper bounds for the height of the red-black trees and AVL trees, we can see that AVL rules are more strict than red-black rules, but at the cost of rebalancing. However, in both cases the rebalancing still takes $\log_2{n}$.
Regarding the implementation of AVL trees, we can see them implemented in the standard library of Agda or Coq.

0
thesis.idx Normal file
View file

View file

@ -2,7 +2,7 @@
\section{Rank rule}
Based on the rank rules for implementing red-black tree (as described in \ref{chap:rb-rule}) and AVL tree (as described in \ref{chap:avl-rule}), \textit{Haeupler et al.} present a new rank rule:
Based on the rank rules for implementing red-black tree (as described in \ref{chap:rb-rule}) and AVL tree (as described in \ref{chap:avl-rule}), \textit{Haeupler et al.}~\cite{wavl} present a new rank rule:
\textbf{Weak AVL Rule}: All rank differences are 1 or 2, and every leaf has rank 0.~\cite{wavl}
@ -15,10 +15,10 @@ Comparing the \textit{Weak AVL Rule} to the \textit{AVL Rule}, we can come to th
\section{Height boundaries}
We have described in \autoref{chap:sb-bst} other common self-balanced binary search trees to be able to draw analogies and explain differences between them. Given the boundaries of height for red-black and AVL tree, we can safely assume that the AVL is more strict with regards to the self-balancing than the red-black tree. Let us show how does WAVL fit among them. \textit{Haeupler et al.} present following bounds~\cite{wavl}:
We have described in \autoref{chap:sb-bst} common self-balanced binary search trees to be able to draw analogies and explain differences between them. Given the boundaries of height for red-black and AVL tree, we can safely assume that the AVL is more strict with regards to the self-balancing than the red-black tree. Let us show how does WAVL fit among them. \textit{Haeupler et al.} present following bounds~\cite{wavl}:
\[ h \leq k \leq 2h \text{ and } k \leq 2 \log_2{n} \]
In those equations we can see $h$ and $n$ in the same context as we used it to lay boundaries for the AVL and red-black trees, but we can also see new variable $k$, which represents the rank of the tree.
In those equations we can see $h$ and $n$ in the same context as we used it to lay boundaries for the AVL and red-black trees, but we can also see a new variable $k$, which represents the rank of the tree.
One of the core differences between AVL and WAVL lies in the rebalancing after deletion. Insertion into the WAVL tree is realized in the same way as it would in the AVL tree and the benefit of (2, 2)-node is used during deletion rebalancing.
@ -26,7 +26,8 @@ From the previous 2 statements we can come to 2 conclusions and those are:
\begin{itemize}
\item If we commit only insertions to the WAVL tree, it will always yield a valid AVL tree. In that case it means that the height boundaries are same as of the AVL tree (described in \autoref{avl-height}).
\item If we commit deletions too, we can assume the worst-case scenario where \[ h < 2 \log_2{n} \] which also holds for the red-black trees.
\item If we commit deletions too, we can assume the worst-case scenario where \[ h < 2 \log_2{n} \]
This scenario is close to the upper bound of the height for the red-black trees (described in \autoref{rb-height}).
\end{itemize}
From the two conclusions we can safely deduce that the WAVL tree is in the worst-case scenario as efficient as the red-black tree and in the best-case scenario as efficient as the AVL tree.