chore: split to multiple files

Signed-off-by: Matej Focko <mfocko@redhat.com>
This commit is contained in:
Matej Focko 2022-05-17 13:37:59 +02:00
parent fbb29ef738
commit 9a32d10615
Signed by: mfocko
GPG key ID: 7C47D46246790496
10 changed files with 949 additions and 936 deletions

View file

@ -106,19 +106,31 @@
} }
@ONLINE{ib002, @ONLINE{ib002,
title = {Algoritmy a datové struktury I}, title = {Algoritmy a datové struktury I},
url = {https://is.muni.cz/predmet/fi/jaro2022/IB002}, url = {https://is.muni.cz/predmet/fi/jaro2022/IB002},
urldate = {2022-05-01}, urldate = {2022-05-01},
location = {Brno}, location = {Brno},
langid = {czech} langid = {czech}
} }
@ONLINE{ib111, @ONLINE{ib111,
title = {Základy programování}, title = {Základy programování},
url = {https://is.muni.cz/predmet/fi/podzim2021/IB111}, url = {https://is.muni.cz/predmet/fi/podzim2021/IB111},
urldate = {2022-05-01}, urldate = {2022-05-01},
location = {Brno}, location = {Brno},
langid = {czech} langid = {czech}
}
@article{adams_1993,
title={Functional Pearls Efficient sets—a balancing act},
volume={3},
DOI={10.1017/S0956796800000885},
number={4},
journal={Journal of Functional Programming},
publisher={Cambridge University Press},
author={Adams, Stephen},
year={1993},
pages={553561}
} }
%% EXAMPLES FOLLOW %% %% EXAMPLES FOLLOW %%

View file

@ -1,926 +0,0 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% I, the copyright holder of this work, release this work into the
%% public domain. This applies worldwide. In some countries this may
%% not be legally possible; if so: I grant anyone the right to use
%% this work for any purpose, without any conditions, unless such
%% conditions are required by law.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\documentclass[
printed, %% The `digital` option enables the default options for the
%% digital version of a document. Replace with `printed`
%% to enable the default options for the printed version
%% of a document.
color, %% Uncomment these lines (by removing the %% at the
%% beginning) to use color in the digital version of your
%% document
table, %% The `table` option causes the coloring of tables.
%% Replace with `notable` to restore plain LaTeX tables.
oneside, %% The `twoside` option enables double-sided typesetting.
%% Use at least 120 g/m² paper to prevent show-through.
%% Replace with `oneside` to use one-sided typesetting;
%% use only if you dont have access to a double-sided
%% printer, or if one-sided typesetting is a formal
%% requirement at your faculty.
nolof, %% The `lof` option prints the List of Figures. Replace
%% with `nolof` to hide the List of Figures.
nolot, %% The `lot` option prints the List of Tables. Replace
%% with `nolot` to hide the List of Tables.
%% More options are listed in the user guide at
%% <http://mirrors.ctan.org/macros/latex/contrib/fithesis/guide/mu/fi.pdf>.
]{fithesis3}
%% The following section sets up the locales used in the thesis.
\usepackage[resetfonts]{cmap} %% We need to load the T2A font encoding
\usepackage[T1,T2A]{fontenc} %% to use the Cyrillic fonts with Russian texts.
\usepackage[
main=english, %% By using `czech` or `slovak` as the main locale
%% instead of `english`, you can typeset the thesis
%% in either Czech or Slovak, respectively.
english, czech, slovak %% The additional keys allow
]{babel} %% foreign texts to be typeset as follows:
%%
%% \begin{otherlanguage}{german} ... \end{otherlanguage}
%% \begin{otherlanguage}{russian} ... \end{otherlanguage}
%% \begin{otherlanguage}{czech} ... \end{otherlanguage}
%% \begin{otherlanguage}{slovak} ... \end{otherlanguage}
%%
%% For non-Latin scripts, it may be necessary to load additional
%% fonts:
\usepackage{paratype}
\def\textrussian#1{{\usefont{T2A}{PTSerif-TLF}{m}{rm}#1}}
%%
%% The following section sets up the metadata of the thesis.
\thesissetup{
date = \the\year/\the\month/\the\day,
university = mu,
faculty = fi,
type = bc,
author = Matej Focko,
gender = m,
advisor = {prof. RNDr. Ivana Černá, CSc.},
title = {Rank-Balanced Trees},
TeXtitle = {Rank-Balanced Trees},
keywords = {algorithms, data structures, rank, trees, balanced trees, study material, visualization, ...},
TeXkeywords = {algorithms, data structures, rank, trees, balanced trees, study material, visualization, \ldots},
abstract = {%
In the thesis we demonstrate usage of a rank for implementing balanced binary-search trees
and algorithms related to a specific rank-balanced tree, the weak AVL tree. First part
of the thesis consists of description of the rank-balanced tree followed by a comparison
to other balanced trees that can be implemented using rank, diagrams and pseudo-codes related
to the weak AVL tree. We also present an implementation of the weak AVL tree in Python, tested with a
property-based testing. The final part of the thesis is a web-page that allows performing operations
on the weak AVL tree with animations and step-by-step walk-through of a pseudo-code.
},
thanks = {%
\textit{TBD}
},
bib = bibliography.bib,
%% Uncomment the following line (by removing the %% at the
%% beginning) and replace `assignment.pdf` with the filename
%% of your scanned thesis assignment.
%% assignment = assignment.pdf,
}
\usepackage{makeidx} %% The `makeidx` package contains
\makeindex %% helper commands for index typesetting.
%% These additional packages are used within the document:
\usepackage{paralist} %% Compact list environments
\usepackage{amsmath} %% Mathematics
\usepackage{amsthm}
\usepackage{amsfonts}
\usepackage{url} %% Hyperlinks
\usepackage{tabularx} %% Tables
\usepackage{tabu}
\usepackage{booktabs}
\usepackage[vlined,longend,linesnumbered]{algorithm2e}
\usepackage{listings} %% Source code highlighting
\lstset{
basicstyle = \ttfamily,
identifierstyle = \color{black},
keywordstyle = \color{blue},
keywordstyle = {[2]\color{cyan}},
keywordstyle = {[3]\color{olive}},
stringstyle = \color{teal},
commentstyle = \itshape\color{magenta},
breaklines = true,
}
\usepackage{floatrow} %% Putting captions above tables
\floatsetup[table]{capposition=top}
\usepackage{hyperref}
\usepackage[x11names, svgnames, rgb]{xcolor}
\usepackage{tikz}
\usetikzlibrary{decorations,arrows,shapes}
\SetKwProg{Fn}{function}{ is}{end}
\SetKwProg{Proc}{procedure}{ is}{end}
\newcommand{\avlDeleteRebalance}{\hyperref[algorithm:avl:deleteRebalance]{\texttt{deleteRebalance}}}
\newcommand{\avlDeleteFixNode}{\hyperref[algorithm:avl:deleteFixNode]{\texttt{deleteFixNode}}}
\newcommand{\avlDeleteRotate}{\hyperref[algorithm:avl:deleteRotate]{\texttt{deleteRotate}}}
\newcommand{\findParentNode}{\hyperref[algorithm:findParentNode]{\texttt{findParentNode}}}
\newcommand{\wavlInsertRebalance}{\hyperref[algorithm:wavl:insertRebalance]{\texttt{insertRebalance}}}
\newcommand{\wavlFixZeroChild}{\hyperref[algorithm:wavl:fix0Child]{\texttt{fix0Child}}}
\newcommand{\wavlDeleteRebalance}{\hyperref[algorithm:wavl:deleteRebalance]{\texttt{deleteRebalance}}}
\newcommand{\wavlBottomUpDelete}{\hyperref[algorithm:wavl:bottomUpDelete]{\texttt{bottomUpDelete}}}
\newcommand{\wavlFixDelete}{\hyperref[algorithm:wavl:fixDelete]{\texttt{fixDelete}}}
\begin{document}
\chapter*{Introduction}
\addcontentsline{toc}{chapter}{Introduction}
Data structures have become a regular part of the essential toolbox for problem-solving. In many cases, they also help to improve the existing algorithm's performance, e.g. using a priority queue in Dijkstra's algorithm for the shortest path. This thesis will mainly discuss the implementation of a set (which can also be adjusted to represent a dictionary or map, if you wish).
Currently, the most commonly used implementations of sets use hash tables, but we will talk about another common alternative, implementation via self-balancing search trees. Compared to a hash table, they provide us with \textbf{consistent} time complexity, but at the cost of a requirement for ordering on the elements. The most implemented self-balancing binary tree is a \textit{red-black tree}, as described by Guibas and Sedgewick~\cite{rbtree}. Among other alternatives, we can find (non-binary) \textit{B-tree}~\cite{btree} and \textit{AVL tree}~\cite{knuth1998art}.
This thesis analyses and visualizes the \textit{Weak AVL (WAVL)}\cite{wavl} tree that has more relaxed conditions than the AVL tree but still provides better balancing than a red-black tree.
We start by reiterating through commonly used search trees, explaining basic ideas behind their self-balancing and bounding the height in the worst-case scenario. Then we state used terminology and explain the rank-balanced tree. Given a rank-balanced tree, we can delve into the details behind the representation of previously shown self-balancing binary search trees using rank and the WAVL rule gives us a new self-balancing binary search tree. For the WAVL, we provide pseudocode and explain operations used for rebalancing, including diagrams. Later on, we will discuss different heuristics that can be used for rebalancing and implementing the visualization of the operations on the WAVL tree.
\chapter{Self-Balancing Search Trees}\label{chap:sb-bst}
This chapter will briefly discuss the properties and fundamental ideas behind the most used self-balancing search trees in standard libraries to give an idea about current options and how WAVL fits among them.
\section{Red-black trees}
As mentioned previously, red-black trees are among the most popular implementations in standard libraries. As always, we have a binary search tree, and then each node is given \textit{red} or \textit{black} colour. A red-black tree is kept balanced by enforcing the following set of rules~\cite{rbtree}:
\begin{enumerate}
\item External nodes are black; internal nodes may be red or black.
\item For each internal node, all paths from it to external nodes contain the same number of black nodes.
\item No path from an internal node to an external node contains two red nodes in a row.
\end{enumerate}
Given this knowledge, we can safely deduce the following relation between the height of the red-black tree and nodes stored in it~\cite{cormen2009introduction}:
\[
\log_2{(n + 1)} \leq h \leq 2 \cdot \log_2{(n + 2)} - 2
\]\label{rb-height}
where the lower bound is given by a perfect binary tree and upper bound by the minimal red-black tree.
There are also other variants of the red-black tree that are considered to be simpler for implementation, e.g. left-leaning red-black tree, as described by Sedgewick.
Red-black trees are used to implement sets in C++, Java and C\#.
\section{AVL tree}
AVL tree is considered to be the eldest self-balancing binary search tree. For clarity, we define the following function:
\[
BalanceFactor(n) := height(right(n)) - height(left(n))
\]
Then we have an AVL tree, if for every node $n$ in the tree the following holds:
\[
BalanceFactor(n) \in \{ -1, 0, 1 \}
\]
In other words, the heights of left and right subtrees of each node differ at most in 1.
Similarly, we will deduce the height of the AVL tree from original paper, by Adelson-Velsky and Landis, we get:
\[
\left( \log_2{(n + 1)} \leq \right) h < \log_{\varphi}{(n + 1)} < \frac{3}{2} \cdot \log_2{(n + 1)}
\]\label{avl-height}
If we compare the upper bounds for the height of the red-black trees and AVL trees, we can see that AVL rules are more strict than red-black rules, but at the cost of rebalancing. However, in both cases, the rebalancing still takes $\log_2{n}$.
Regarding the implementation of AVL trees, we can see them implemented in the standard library of Agda or Coq.
\section {B-tree}
\textit{To keep or not to keep…}
Used in Rust.
\chapter{Rank-Balanced Trees}
In comparison to nodes in binary search trees, nodes in rank-balanced trees contain one more piece of information, and that is \textit{rank}. Each type of tree that can be implemented using this representation, e.g. red-black, 2-3-4, AVL or WAVL, has a specific set of rules that ensure the resulting tree is balanced.
\section{Terminology related to rank-balanced trees}
In the text and pseudocode we adopt these functions or properties~\cite{wavl}:
\begin{itemize}
\item function $r(x)$ or property $x.rank$ that returns rank of a node; in case of $r(x)$ there is a special case: $r(nil) = -1$
\item function $parent(x)$ or property $x.parent$ returns parent of a node; analogically for the left and right children of a node
\item \textit{rank-difference} of \textit{x} is defined as $r(parent(x)) - r(x)$
\item $x$ is an \textit{i-child} if its rank-difference is $i$
\item $x$ is an $(i, j)$ node if its left and right children have $i$ and $j$ rank-differences respectively;
ordering of the children does not matter
\end{itemize}
\section{Rules for the other trees}
As we have mentioned above, it is possible to implement different kinds of self-balancing binary search trees via different rules for ranks.
\subsection{AVL tree}\label{chap:avl-rule}
\textbf{AVL Rule}: Every node is (1, 1) or (1, 2).~\cite{wavl}
In case of the AVL trees rank represents height. Here we can notice a very smart way of using the \textit{(i, j) node} definition. If we go back to the definition and want to be explicit about the nodes that are allowed with the \textit{AVL Rule}, then we get (1, 1), (1, 2) \textbf{or} (2, 1) nodes. However it is possible to find implementations of the AVL tree that allow leaning \textbf{to only one side} as opposed to the original requirements given by Adelson-Velsky and Landis. Forbidding interchangeability of (i, j) with (j, i) nodes would still yield AVL trees that lean to one side.
Meaning of the \textit{AVL Rule} is quite simple, since rank represents height in that case. We can draw analogies using the notation used for the AVL trees, where we mark nodes with a trit (or a sign) or use a balance-factor. We have two cases to discuss:
\begin{itemize}
\item \textbf{(1, 1) node} represents a tree where both of its subtrees have the same height. In this case we are talking about the nodes with balance-factor $0$ (respectively being signed with a $0$).
\item \textbf{(1, 2) node} represents a tree where one of its subtrees has a bigger height. In this case we are talking about the nodes with balance-factor $-1$ or $1$ (respectively being signed with a $-$ or a $+$).
\end{itemize}
Example of the AVL tree that uses ranks instead of signs or balance-factors can be seen in \autoref{fig:ranked:avl}.
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{value=3+ rank=3}) at (140.6bp,279.0bp) [draw,ellipse] {3, 3};
\node (Node{value=1+ rank=1}) at (103.6bp,192.0bp) [draw,ellipse] {1, 1};
\node (Node{value=7+ rank=2}) at (178.6bp,192.0bp) [draw,ellipse] {7, 2};
\node (Node{value=0+ rank=0}) at (28.597bp,105.0bp) [draw,ellipse] {0, 0};
\node (Node{value=2+ rank=0}) at (103.6bp,105.0bp) [draw,ellipse] {2, 0};
\node (Node{value=5+ rank=1}) at (178.6bp,105.0bp) [draw,ellipse] {5, 1};
\node (Node{value=8+ rank=1}) at (253.6bp,105.0bp) [draw,ellipse] {8, 1};
\node (Node{value=4+ rank=0}) at (103.6bp,18.0bp) [draw,ellipse] {4, 0};
\node (Node{value=6+ rank=0}) at (178.6bp,18.0bp) [draw,ellipse] {6, 0};
\node (Node{value=9+ rank=0}) at (253.6bp,18.0bp) [draw,ellipse] {9, 0};
\draw [->] (Node{value=3+ rank=3}) ..controls (128.03bp,249.14bp) and (120.85bp,232.64bp) .. (Node{value=1+ rank=1});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (128.6bp,235.5bp) node {2};
\draw [->] (Node{value=3+ rank=3}) ..controls (153.5bp,249.14bp) and (160.88bp,232.64bp) .. (Node{value=7+ rank=2});
\draw (166.6bp,235.5bp) node {1};
\draw [->] (Node{value=1+ rank=1}) ..controls (78.671bp,162.75bp) and (61.893bp,143.74bp) .. (Node{value=0+ rank=0});
\draw (75.597bp,148.5bp) node {1};
\draw [->] (Node{value=1+ rank=1}) ..controls (103.6bp,162.16bp) and (103.6bp,146.55bp) .. (Node{value=2+ rank=0});
\draw (108.6bp,148.5bp) node {1};
\draw [->] (Node{value=7+ rank=2}) ..controls (178.6bp,162.16bp) and (178.6bp,146.55bp) .. (Node{value=5+ rank=1});
\draw (183.6bp,148.5bp) node {1};
\draw [->] (Node{value=7+ rank=2}) ..controls (203.52bp,162.75bp) and (220.3bp,143.74bp) .. (Node{value=8+ rank=1});
\draw (224.6bp,148.5bp) node {1};
\draw [->] (Node{value=5+ rank=1}) ..controls (153.67bp,75.75bp) and (136.89bp,56.735bp) .. (Node{value=4+ rank=0});
\draw (149.6bp,61.5bp) node {1};
\draw [->] (Node{value=5+ rank=1}) ..controls (178.6bp,75.163bp) and (178.6bp,59.548bp) .. (Node{value=6+ rank=0});
\draw (183.6bp,61.5bp) node {1};
\draw [->] (Node{value=8+ rank=1}) ..controls (253.6bp,75.163bp) and (253.6bp,59.548bp) .. (Node{value=9+ rank=0});
\draw (258.6bp,61.5bp) node {1};
%
\end{tikzpicture}
\caption{Example of the AVL tree using ranks}
\label{fig:ranked:avl}
\end{figure}
\subsection{Red-black tree}\label{chap:rb-rule}
\textbf{Red-Black Rule}: All rank differences are 0 or 1, and no parent of a 0-child is a 0-child.~\cite{wavl}
In case of red-black trees, rank represents number of black nodes on a path from the node to a leaf (excluding the node itself). Based on that we can discuss the \textit{Red-Black Rule} in detail:
\begin{enumerate}
\item \textit{All rank differences are 0 or 1} inductively enforces monotonically increasing (at most by 1) count of black nodes from the leaves. \\
In detail:
\begin{enumerate}
\item In case the \textbf{current node is black}, the rank difference must be 1, since we have one more black node on the path from the parent to the leaves than from the current node.
\item In case the \textbf{current node is red}, the rank difference must be 0, since from the parent the count of black nodes on the path to leaves has not changed.
\item And finally all other differences are invalid, since by adding a node to the beginning of a path to the leaf we can either add red node (0-child) or black node (1-child), i.e. there is one more black node on the path or not which implies the change can be only 0 or 1.
\end{enumerate}
\item \textit{No parent of a 0-child is a 0-child} ensures that there are no two consecutive red nodes, since 0-child node is equivalent to the red node.
\end{enumerate}
Example of the red-black tree that uses ranks instead of colors can be seen in \autoref{fig:ranked:rbt}, red nodes are also colored for convenience.
Majority of the red-black tree implementations color nodes of the tree, following that notation and \textbf{precise} definition of the red-black tree it is quite common to ask the following questions:
\begin{enumerate}
\item \textit{Do we count the node itself if it is black?} \\
If we do not count nodes themselves, we decrease the count of black nodes on every path to the external nodes by $1$.
\item \textit{Do we count the external nodes (leaves that do not hold any value)?} \\
If we do not count external nodes themselves, we decrease the count of black nodes on every path to the external nodes by $1$.
\end{enumerate}
Overall they do not really matter as long as they are used consistently, since they affect the counts globally.
However it is also possible to color edges instead of the nodes as is presented in \textit{Průvodce labyrintem algoritmů} by \textit{Mareš and Valla}.~\cite{labyrint} In this representation color of the edge represents color of the child node. This representation is much more „natural“ for the representation using rank as it can be seen in \autoref{fig:ranked:rbt}, where edges connecting nodes with rank-difference $1$ represent \textit{black edges} and edges connecting nodes with rank-difference $0$ represent \textit{red edges}. It is also apparent that using this representation root of the tree does not hold any color anymore.
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{value=3+ rank=2}) at (140.6bp,366.0bp) [draw,ellipse] {3, 2};
%%
\node (Node{value=3+ rank=2}) at (140.6bp,366.0bp) [draw,ellipse] {3, 2};
\node (Node{value=1+ rank=1}) at (103.6bp,279.0bp) [draw,ellipse] {1, 1};
\node (Node{value=5+ rank=1}) at (178.6bp,279.0bp) [draw,ellipse] {5, 1};
\node (Node{value=0+ rank=0}) at (28.597bp,192.0bp) [draw,ellipse] {0, 0};
\node (Node{value=2+ rank=0}) at (103.6bp,192.0bp) [draw,ellipse] {2, 0};
\node (Node{value=4+ rank=0}) at (178.6bp,192.0bp) [draw,ellipse] {4, 0};
\node (Node{value=7+ rank=1}) at (253.6bp,192.0bp) [draw=red,ellipse] {7, 1};
\node (Node{value=6+ rank=0}) at (226.6bp,105.0bp) [draw,ellipse] {6, 0};
\node (Node{value=8+ rank=0}) at (301.6bp,105.0bp) [draw,ellipse] {8, 0};
\node (Node{value=9+ rank=0}) at (301.6bp,18.0bp) [draw=red,ellipse] {9, 0};
\draw [->] (Node{value=3+ rank=2}) ..controls (128.03bp,336.14bp) and (120.85bp,319.64bp) .. (Node{value=1+ rank=1});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (129.6bp,322.5bp) node {1};
\draw [->] (Node{value=3+ rank=2}) ..controls (153.5bp,336.14bp) and (160.88bp,319.64bp) .. (Node{value=5+ rank=1});
\draw (167.6bp,322.5bp) node {1};
\draw [->] (Node{value=1+ rank=1}) ..controls (78.671bp,249.75bp) and (61.893bp,230.74bp) .. (Node{value=0+ rank=0});
\draw (75.597bp,235.5bp) node {1};
\draw [->] (Node{value=1+ rank=1}) ..controls (103.6bp,249.16bp) and (103.6bp,233.55bp) .. (Node{value=2+ rank=0});
\draw (108.6bp,235.5bp) node {1};
\draw [->] (Node{value=7+ rank=1}) ..controls (244.49bp,162.33bp) and (239.36bp,146.17bp) .. (Node{value=6+ rank=0});
\draw (246.6bp,148.5bp) node {1};
\draw [->] (Node{value=7+ rank=1}) ..controls (269.82bp,162.26bp) and (279.51bp,145.12bp) .. (Node{value=8+ rank=0});
\draw (284.6bp,148.5bp) node {1};
\draw [->] (Node{value=5+ rank=1}) ..controls (178.6bp,249.16bp) and (178.6bp,233.55bp) .. (Node{value=4+ rank=0});
\draw (183.6bp,235.5bp) node {1};
\draw [red,->] (Node{value=5+ rank=1}) ..controls (203.52bp,249.75bp) and (220.3bp,230.74bp) .. (Node{value=7+ rank=1});
\draw (225.6bp,235.5bp) node {0};
\draw [red,->] (Node{value=8+ rank=0}) ..controls (301.6bp,75.163bp) and (301.6bp,59.548bp) .. (Node{value=9+ rank=0});
\draw (306.6bp,61.5bp) node {0};
%
\end{tikzpicture}
\caption{Example of the red-black tree using ranks}
\label{fig:ranked:rbt}
\end{figure}
\section{Implementation of other balanced trees using rank}
To show that using rank is mostly an implementation detail, we will describe an implementation of the AVL tree using rank.
Implementation of the insertion is trivial, since it is described by \textit{Haeupler et al.} and is used in the WAVL tree. All we need to implement is the deletion from the AVL tree. We will start by short description of the deletion rebalance as given by \textit{Mareš and Valla} in \textit{Průvodce labyrintem algoritmů}.
When propagating the error, we can encounter 3 cases (we explain them with respect to propagating deletion from the left subtree, propagation from right is mirrored and role of trits $+$ and $-$ swaps)~\cite{labyrint}:
\begin{enumerate}
\item \textit{Node was marked with $-$.} In this case, heights of left and right subtrees are equal now and node is marked with $0$, but propagation must be continued, since the height of the whole subtree has changed.\label{avl:rules:delete:1}
\item \textit{Node was marked with $0$.} In this case, node is marked with $+$ and the height of the subrtree has not changed, therefore we can stop the propagation.\label{avl:rules:delete:2}
\item \textit{Node was marked with $+$.} In this case, node would acquire balance-factor of $+2$, which is not allowed. In this situation we decide based on the mark of the node from which we are propagating the insertion in the following way (let $x$ the current node marked with $+$ and $y$ be the right child of $x$):\label{avl:rules:delete:3}
\begin{enumerate}
\item $y$ is marked with $+$, then we rotate by $x$ to the left. After that both $x$ and $y$ can be marked with $0$. Height from the point of the parent has changed, so we continue the propagation.\label{avl:rules:delete:3a}
\item $y$ is marked with $0$, then we rotate by $x$ to the left. After the rotation, $x$ can be marked with $+$ and $y$ with $-$. Height of the subtree has not changed, so propagation can be stopped.\label{avl:rules:delete:3b}
\item $y$ is marked with $-$. Let $z$ be the left son of $y$. We double rotate: first by $z$ to the right and then by $x$ to the left. After the double-rotation $x$ can be marked by either $0$ or $-$, $y$ by $0$ or $+$ and $z$ gets $0$. Height of the subtree has changed, therefore we must propagate further.\label{avl:rules:delete:3c}
\end{enumerate}
\end{enumerate}\label{avl:rules:delete}
We have implemented the deletion rebalance by implementing following functions:
\begin{enumerate}
\item \avlDeleteRebalance{} that handles updating the current node and its parent and iteratively calls subroutine handling previously described \textit{one step of a rebalancing}
\item \avlDeleteFixNode{} that handles one adjustment of rebalancing as described above
\item \avlDeleteRotate{} that handles rotation and updating of ranks, if necessary
\end{enumerate}
\begin{algorithm}
\Proc{$\texttt{deleteRebalance}(T, y, parent)$}{
\If{$y = nil \land parent = nil$}{
\Return;
}
\BlankLine
\If{$y = nil$}{
$(y, parent) \gets (parent, parent.parent)$\;
}
\BlankLine
\While{$y \neq nil \land \avlDeleteFixNode(T, y, parent)$}{
$y \gets parent$\;
\eIf{$parent \neq nil$}{
$parent \gets parent.parent$\;
}{
$parent \gets nil$\;
}
}
}
\caption{\texttt{deleteRebalance} algorithm for the AVL tree}\label{algorithm:avl:deleteRebalance}
\end{algorithm}
\texttt{deleteRebalance}, as can be seen in \autoref{algorithm:avl:deleteRebalance}, is quite straightforward. At the beginning we early return in case there is nothing to be rebalanced, which happens when deleting the last node from the tree. Then we handle case when we are given only parent by correctly setting $y$ and $parent$. Following up on that, as long as we have a node to be checked, we call \autoref{algorithm:avl:deleteFixNode} to fix balancing of the current node. Algorithm for fixing node returns $true$ or $false$ depending on the need to propagate the height change further, which is utilized in the condition of the \texttt{while} loop.
\begin{algorithm}
\Proc{$\texttt{deleteFixNode}(T, x, parent)$}{
\uIf(\tcp*[h]{Handles \hyperref[avl:rules:delete:1]{rule 1}}){balance-factor of $x$ is $0$}{
update rank of $x$\;
\Return{$true$}\;
}
\ElseIf(\tcp*[h]{Handles \hyperref[avl:rules:delete:2]{rule 2}}){balance-factor of $x$ is $-1$ or $1$}{
\Return{$false$}\;
}
\BlankLine
$(l, r) \gets (x.left, x.right)$\;
$(rotateL, rotateR) \gets (\texttt{rotateLeft}, \texttt{rotateRight})$\;
\BlankLine
\tcp{Handles \hyperref[avl:rules:delete:3]{rule 3}}
\eIf{balance-factor of $x$ is $2$}{
\Return{$\avlDeleteRotate(T, x, r, 1, rotateL, rotateR)$}\;
}{
\Return{$\avlDeleteRotate(T, x, l, -1, rotateR, rotateL)$}\;
}
}
\caption{\texttt{deleteFixNode} algorithm for the AVL tree}\label{algorithm:avl:deleteFixNode}
\end{algorithm}
\texttt{deleteFixNode} implements the algorithm as described in \hyperref[avl:rules:delete]{the list} with all possible cases above. We start by checking the balance-factor of the given node, in case there is no need to rotate, the rank gets updated if necessary and then we return the information whether there is a need to propagate further or not. In case the node has acquired balance-factor of $2$ we call \autoref{algorithm:avl:deleteRotate} to fix the balancing locally.
There are two operations that are not described using helper functions and they are done in a following way:
\begin{itemize}
\item Balance-factor of a node $x$ is calculated as \[ rank(x.right) - rank(x.left) \]
\item Updating rank of a node $x$ is done by setting node's rank to \[ 1 + \max \{ rank(x.left), rank(x.right) \} \]
\end{itemize}
\begin{algorithm}
\Proc{$\texttt{deleteRotate}(T, x, y, leaning, rotateL, rotateR)$}{
$f \gets $ balance-factor of $y$\;
\BlankLine
\uIf(\tcp*[h]{Handles rules \hyperref[avl:rules:delete:3a]{3a} \& \hyperref[avl:rules:delete:3b]{3b}}){$f = 0 \lor f = leaning$}{
$rotateL(T, x)$\;
}
\Else(\tcp*[h]{Handles \hyperref[avl:rules:delete:3c]{rule 3c}}){
$rotateR(T, y)$\;
$rotateL(T, x)$\;
}
\BlankLine
update ranks of $x$, $y$ and new root of the subtree\;
\BlankLine
\Return{$f \neq 0$}\;
}
\caption{\texttt{deleteRotate} algorithm for the AVL tree}\label{algorithm:avl:deleteRotate}
\end{algorithm}
\newpage
\texttt{deleteRotate} is handling only fixes where the rotations are required. Both \autoref{algorithm:avl:deleteFixNode} and \autoref{algorithm:avl:deleteRotate} include comments to highlight which rules are handled. This function is also done generically regardless of the subtree from which the height change is being propagated. This is done passing in functions used for rotations (since it is mirrored) and also by passing in the balance-factor required for just one rotation.
In both \autoref{algorithm:avl:deleteFixNode} and \autoref{algorithm:avl:deleteRotate} there is a key difference compared to the AVL tree implementations without ranks. Comparing the \hyperref[avl:rules:delete]{rules for deletion} with algorithms for rank-balanced implementation, it is apparent that during propagation of height change, the balance-factors of immediate nodes are already adjusted, since the information comes from either of its subtrees and it is calculated using ranks of its children that are already adjusted. This fact needs to be reflected in the implementation accordingly, since it shifts the meaning of rules as they are described above and written for the implementations that store the trit in the nodes directly, which is updated manually during rebalancing.
\chapter{Weak AVL Trees}
\section{Rank rule}
Based on the rank rules for implementing red-black tree (as described in \ref{chap:rb-rule}) and AVL tree (as described in \ref{chap:avl-rule}), \textit{Haeupler et al.} present a new rank rule:
\textbf{Weak AVL Rule}: All rank differences are 1 or 2, and every leaf has rank 0.~\cite{wavl}
Comparing the \textit{Weak AVL Rule} to the \textit{AVL Rule}, we can come to these conclusions:
\begin{itemize}
\item \textit{Every leaf has rank 0} holds with the AVL Rule, since every node is (1, 1) or (1, 2) and rank of a node represents height of its tree. Rank of \textit{nil} is defined as $-1$ and height of tree rooted at leaf is $0$, therefore leaves are (1, 1)-nodes
\item \textit{All rank differences are 1 or 2} does not hold in one specific case, and that is (2, 2)-node, which is allowed in the WAVL tree, but not in the AVL tree. This difference will be explained more thoroughly later on.
\end{itemize}
\section{Height boundaries}
We have described in \autoref{chap:sb-bst} other common self-balanced binary search trees to be able to draw analogies and explain differences between them. Given the boundaries of height for red-black and AVL tree, we can safely assume that the AVL is more strict with regards to the self-balancing than the red-black tree. Let us show how does WAVL fit among them. \textit{Haeupler et al.} present following bounds~\cite{wavl}:
\[ h \leq k \leq 2h \text{ and } k \leq 2 \log_2{n} \]
In those equations we can see $h$ and $n$ in the same context as we used it to lay boundaries for the AVL and red-black trees, but we can also see new variable $k$, which represents the rank of the tree.
One of the core differences between AVL and WAVL lies in the rebalancing after deletion. Insertion into the WAVL tree is realized in the same way as it would in the AVL tree and the benefit of (2, 2)-node is used during deletion rebalancing.
From the previous 2 statements we can come to 2 conclusions and those are:
\begin{itemize}
\item If we commit only insertions to the WAVL tree, it will always yield a valid AVL tree. In that case it means that the height boundaries are same as of the AVL tree (described in \autoref{avl-height}).
\item If we commit deletions too, we can assume the worst-case scenario where \[ h < 2 \log_2{n} \] which also holds for the red-black trees.
\end{itemize}
From the two conclusions we can safely deduce that the WAVL tree is in the worst-case scenario as efficient as the red-black tree and in the best-case scenario as efficient as the AVL tree.
\newpage
\section{Insertion into the weak AVL tree}
Inserting values into WAVL tree is equivalent to inserting values into regular binary-search tree followed up by rebalancing that ensures rank rules hold. This part can be clearly seen in \autoref{algorithm:wavl:insert}. We can also see there two early returns, one of them happens during insertion into the empty tree and other during insertion of duplicate key, which we do not allow.
\begin{algorithm}
\Proc{$\texttt{insert}(T, key)$}{
$insertedNode \gets Node(key)$\;
\If{$T.root = nil$}{
$T.root \gets insertedNode$\;
\Return\;
}
\BlankLine
$parent \gets \findParentNode(key, T.root)$\;
\If{$parent = nil$}{
\Return\;
}
$insertedNode.parent \gets parent$\;
\BlankLine
\eIf{$key < parent.key$}{
$parent.left \gets insertedNode$\;
}{
$parent.right \gets insertedNode$\;
}
\BlankLine
$\wavlInsertRebalance(T, insertedNode)$\;
}
\caption{Insert operation on binary search tree}\label{algorithm:wavl:insert}
\end{algorithm}
In the \autoref{algorithm:wavl:insert} we have also utilized a helper function that is used to find parent of the newly inserted node and also prevents insertion of duplicate keys within the tree. Pseudocode of that function can be seen in \autoref{algorithm:findParentNode}.
\begin{algorithm}
\Fn{$\texttt{findParentNode}(key, node)$}{
$childNode \gets node$\;
\BlankLine
\While{$childNode \neq nil$}{
$node \gets childNode$\;
\uIf{$key < node.key$}{
$childNode \gets node.left$\;
}
\ElseIf{$node.key < key$}{
$childNode \gets node.right$\;
}
\Else{
\Return{nil}\;
}
}
\BlankLine
\Return{node}\;
}
\caption{Helper function that returns parent for newly inserted node}\label{algorithm:findParentNode}
\end{algorithm}
Rebalancing after insertion in the WAVL tree is equivalent to rebalancing after insertion in the AVL tree. We will start with a short description of the rebalancing within AVL to lay a foundation for analogies and differences compared to the implementation using ranks.
When propagating the error, we can encounter 3 cases (we explain them with respect to propagating insertion from the left subtree, propagation from right is mirrored and role of trits $+$ and $-$ swaps)~\cite{labyrint}:
\begin{enumerate}
\item \textit{Node was marked with $+$.} In this case, heights of left and right subtrees are equal now and node is marked with $0$ and propagation can be stopped.\label{avl:rules:insert:1}
\item \textit{Node was marked with $0$.} In this case, node is marked with $-$, but the height of the tree rooted at the node has changes, which means that we need to propagate the changes further.\label{avl:rules:insert:2}
\item \textit{Node was marked with $-$.} In this case, node would acquire balance-factor of $-2$, which is not allowed. In this situation we decide based on the mark of the node from which we are propagating the insertion in the following way (let $x$ be the node from which the information is being propagated and $z$ the current node marked with $-$):\label{avl:rules:insert:3}
\begin{enumerate}
\item $x$ is marked with $-$, then we rotate by $z$ to the right. After that both $z$ and $x$ can be marked with $0$. Height from the point of the parent has not changed, so we can stop the propagation.\label{avl:rules:insert:3a}
\item $x$ is marked with $+$, then we double rotate: first by $x$ to the left and then by $z$ to the right. Here we need to recalculate the balance-factors for $z$ and $x$, where $z$ gets $-$ or $0$ and $x$ gets $0$ or $+$. Node that was a right child to the $x$ before the double-rotation is now marked with $0$ and propagation can be stopped.\label{avl:rules:insert:3b}
\item $x$ is marked with $0$. This case is trivial, since it cannot happen, because we never propagate the height change from a node that acquired sign $0$.
\end{enumerate}
\end{enumerate}
In the following explanation we have to consider that valid nodes in AVL tree implemented via ranks are (1, 1) and (1, 2) and by the time of evaluating rank-differences of parent, they are already affected by the rebalancing done from the inserted leaf.
Rebalancing of the tree is equivalent to rebalancing of AVL tree and is executed in a following way:
\begin{algorithm}
\Proc{$\texttt{insertRebalance}(T, node)$}{
\tcp{Handles \hyperref[avl:rules:insert:2]{rule 2}}
\While{$node.parent \neq nil \land (node.parent\, is\, (0, 1)\, or\, (1, 0))$}{
$\texttt{promote}(node.parent)$\;
$node \gets node.parent$\;
}
\BlankLine
\tcp{Handles \hyperref[avl:rules:insert:1]{rule 1}}
\lIf{$node.parent = nil \lor node\, is\, not\, \text{0-child}$}{\Return}
\BlankLine
\tcp{Handles \hyperref[avl:rules:insert:3]{rule 3}}
\eIf{$node = node.parent.left$}{
$\wavlFixZeroChild(T, node, node.right, \texttt{rotateLeft}, \texttt{rotateRight})$\;
}{
$\wavlFixZeroChild(T, node, node.left, \texttt{rotateRight}, \texttt{rotateLeft})$\;
}
\BlankLine
}
\caption{Algorithm containing bottom-up rebalancing after insertion}\label{algorithm:wavl:insertRebalance}
\end{algorithm}
As a first step, which can be seen in \autoref{algorithm:wavl:insertRebalance}, we iteratively check rank-differences of a parent of the current node. As long as it is a (0, 1) or (1, 0) node, we promote it and propagate further. There is an interesting observation to be made about the way \textit{how parent can fulfill such requirement}. And the answer is simple, since we are adding a leaf or are already propagating the change to the root, it means that we have lowered the rank-difference of the parent, therefore it must have been (1, 1) node. From the algorithm used for usual implementations of AVL trees, this step refers to \hyperref[avl:rules:insert:2]{\textit{rule 2}}. After the promotion the rank of the parent becomes (1, 2) or (2, 1) which means that it gets sign $-$ (or $+$ respectively when propagating from the right subtree), which conforms to the usual algorithm.
After this, we might end up in two situations and those are:
\begin{enumerate}
\item Current node is not a 0-child, which means that after propagation and promotions we have gotten to a parent node that is (1, 2) or (2, 1), which refers to the \hyperref[avl:rules:insert:1]{\textit{rule 1}}.
\item Current node is a 0-child, which means that after propagation and promotions we have a node with a parent that is either (0, 2) or (2, 0) node. This case conforms to the \hyperref[avl:rules:insert:3]{\textit{rule 3}} and must be handled further to fix the broken rank rule.
\end{enumerate}
\hyperref[avl:rules:insert:3]{\textit{Rule 3}} is then handled by a helper function that can be seen in \autoref{algorithm:wavl:fix0Child}.
\begin{algorithm}
\Proc{$\texttt{fix0Child}(T, x, y, rotateToLeft, rotateToRight)$}{
$z \gets x.parent$\;
\BlankLine
\uIf(\tcp*[h]{Handles \hyperref[avl:rules:insert:3a]{rule 3a}}){$y = nil \lor y\, is\, \text{2-child}$}{
$rotateToRight(T, z)$\;
\BlankLine
$\texttt{demote}(z)$\;
}
\ElseIf(\tcp*[h]{Handles \hyperref[avl:rules:insert:3b]{rule 3b}}){$y\, is\, \text{1-child}$}{
$rotateToLeft(T, x)$\;
$rotateToRight(T, z)$\;
\BlankLine
$\texttt{promote}(y)$\;
$\texttt{demote}(x)$\;
$\texttt{demote}(z)$\;
}
}
\caption{Generic algorithm for fixing 0-child after insertion}\label{algorithm:wavl:fix0Child}
\end{algorithm}
Here we can see, once again, an interesting pattern. When comparing to the algorithm described above, using the rank representation, we do not need to worry about changing the signs and updating the heights, since by rotating combined with demotion and promotion of the ranks, we are effectively updating the height (represented via rank) of the affected nodes. This observation could be used in \autoref{algorithm:avl:deleteFixNode} and \autoref{algorithm:avl:deleteRotate} where we turned to manual updating of ranks to show the difference.
\section{Deletion from the weak AVL tree}
\begin{algorithm}
\Proc{$\texttt{deleteRebalance}(T, y, parent)$}{
\uIf{$y \text{ is (2, 2)}$}{
$\texttt{demote}(y)$\;
$parent \gets y.parent$\;
}
\ElseIf{$parent \text{ is (2, 2)}$}{
$\texttt{demote}(parent)$\;
$parent \gets parent.parent$\;
}
\BlankLine
\If{$parent = nil$}{
\Return\;
}
$z \gets \text{3-child of } parent$\;
\If{$z \neq nil$}{
$\wavlBottomUpDelete(T, z, parent)$\;
}
}
\caption{Initial phase of algorithm for the rebalance after deletion from the WAVL tree}\label{algorithm:wavl:deleteRebalance}
\end{algorithm}
As described by \textit{Haeupler et al.}, we start the deletion rebalancing by checking for (2, 2) node. If that is the case, we demote it and continue with the deletion rebalancing via \autoref{algorithm:wavl:bottomUpDelete} if we have created a 3-child by the demotion. Demoting the (2, 2) node is imperative, since it enforces part of the \textit{Weak AVL Rule} requiring that leaves have rank equal to zero.
For example consider the following tree in \autoref{fig:wavl:twoElements}. Deletion of key 2 from that tree would result in having only key 1 in the tree with rank equal to 1, which would be (2, 2) node and leaf at the same time. After the demotion of the remaining key, we acquire the tree as shown in \autoref{fig:wavl:twoElementsAfterDelete}
In contrast to the \textit{AVL Rule}, WAVL tree allows us to have (2, 2) nodes present. Therefore we can encounter two key differences during deletion rebalancing:
\begin{enumerate}
\item If anywhere during the deletion rebalancing, \textbf{but not} at the start, we encounter (2, 2) node, we can safely stop the rebalancing process, since rest of the tree must be correct and we have fixed errors on the way to the current node from the leaf.
\item Compared to the AVL tree, during deletion rebalancing we need to fix \textbf{3-child} nodes.
\end{enumerate}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{value=1+ rank=1}) at (28.597bp,105.0bp) [draw,ellipse] {1, 1};
\node (Node{value=2+ rank=0}) at (28.597bp,18.0bp) [draw,ellipse] {2, 0};
\draw [->] (Node{value=1+ rank=1}) ..controls (28.597bp,75.163bp) and (28.597bp,59.548bp) .. (Node{value=2+ rank=0});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (33.597bp,61.5bp) node {1};
%
\end{tikzpicture}
\caption{WAVL tree containing two elements}
\label{fig:wavl:twoElements}
\end{figure}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{value=1+ rank=1}) at (28.597bp,105.0bp) [draw,ellipse] {1, 0};
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
%
\end{tikzpicture}
\caption{\autoref{fig:wavl:twoElements} after deletion of 2}
\label{fig:wavl:twoElementsAfterDelete}
\end{figure}
\begin{algorithm}
\Proc{$\texttt{bottomUpDelete}(T, x, parent)$}{
\If{$x \text{ is not 3-child} \lor parent = nil$}{
\Return\;
}
\BlankLine
$y \gets nil$\;
\eIf{$parent.left = x$}{
$y \gets parent.right$\;
}{
$y \gets parent.left$\;
}
\BlankLine
\While{$parent \neq nil \land x \text{ is 3-child} \land (y \text{ is 2-child or (2, 2)})$}{
\If{$y \text{ is not 2-child}$}{
$\texttt{demote}(y)$\;
}
$\texttt{demote}(parent)$\;
\BlankLine
$x \gets parent$\;
$parent \gets x.parent$\;
\If{$parent = nil$}{
\Return;
}
\BlankLine
\eIf{$parent.left = x$}{
$y \gets parent.right$\;
}{
$y \gets parent.left$\;
}
}
\BlankLine
\If{$parent \text{ is not (1, 3)}$}{
\Return\;
}
$p \gets parent$\;
\eIf{$parent.left = x$}{
$\wavlFixDelete(T, x, p.right, p, false, \texttt{rotateLeft}, \texttt{rotateRight})$\;
}{
$\wavlFixDelete(T, x, p.left, p, true, \texttt{rotateRight}, \texttt{rotateLeft})$\;
}
}
\caption{Propagation of the broken rank rule after deletion from the WAVL tree}\label{algorithm:wavl:bottomUpDelete}
\end{algorithm}
\begin{algorithm}
\Proc{$\texttt{fixDelete}(T, x, y, z, reversed, rotateL, rotateR)$}{
$v \gets y.left$\;
$w \gets y.right$\;
\If{$reversed$}{
$(v, w) \gets (w, v)$\;
}
\BlankLine
\uIf{$w \text{ is 1-child} \land y.parent \neq nil$}{
$rotateL(T, y.parent)$\;
\BlankLine
$\texttt{promote}(y)$\;
$\texttt{demote}(z)$\;
\BlankLine
\If{$z$ is a leaf}{
$\texttt{demote}(z)$\;
}
}
\ElseIf{$w \text{ is 2-child} \land v.parent \neq nil$}{
$rotateR(T, v.parent)$\;
$rotateL(T, v.parent)$\;
\BlankLine
$\texttt{promote}(v)$\;
$\texttt{promote}(v)$\;
$\texttt{demote}(y)$\;
$\texttt{demote}(z)$\;
$\texttt{demote}(z)$\;
}
}
\caption{Final phase of the deletion rebalance after deletion from the WAVL tree}\label{algorithm:wavl:fixDelete}
\end{algorithm}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{1}) at (87.197bp,192.0bp) [draw,ellipse] {1, 2};
\node (Node{0}) at (31.197bp,105.0bp) [draw,ellipse] {0, 1};
\node (Node{2}) at (106.2bp,105.0bp) [draw,ellipse] {2, 0};
\node (Node{-1}) at (31.197bp,18.0bp) [draw,ellipse] {-1, 0};
\draw [->] (Node{1}) ..controls (68.373bp,162.43bp) and (56.68bp,144.68bp) .. (Node{0});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (68.197bp,148.5bp) node {1};
\draw [->] (Node{1}) ..controls (93.66bp,162.09bp) and (97.18bp,146.34bp) .. (Node{2});
\draw (102.2bp,148.5bp) node {2};
\draw [->] (Node{0}) ..controls (31.197bp,75.163bp) and (31.197bp,59.548bp) .. (Node{-1});
\draw (36.197bp,61.5bp) node {1};
%
\end{tikzpicture}
\caption{WAVL tree with elements inserted in order $(0, 1, 2, -1)$}\label{fig:wavl:deletionA:before}
\end{figure}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{1}) at (31.197bp,192.0bp) [draw,ellipse] {1, 2};
\node (Node{0}) at (31.197bp,105.0bp) [draw,ellipse] {0, 1};
\node (Node{-1}) at (31.197bp,18.0bp) [draw,ellipse] {-1, 0};
\draw [->] (Node{1}) ..controls (31.197bp,162.16bp) and (31.197bp,146.55bp) .. (Node{0});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (36.197bp,148.5bp) node {1};
\draw [->] (Node{0}) ..controls (31.197bp,75.163bp) and (31.197bp,59.548bp) .. (Node{-1});
\draw (36.197bp,61.5bp) node {1};
%
\end{tikzpicture}
\caption{WAVL tree from \autoref{fig:wavl:deletionA:before} after deletion of 2, value is replaced by one of its children}\label{fig:wavl:deletionA:replacing}
\end{figure}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{0}) at (70.197bp,105.0bp) [draw,ellipse] {0, 1};
\node (Node{-1}) at (31.197bp,18.0bp) [draw,ellipse] {-1, 0};
\node (Node{1}) at (109.2bp,18.0bp) [draw,ellipse] {1, 2};
\draw [->] (Node{0}) ..controls (57.102bp,75.46bp) and (49.394bp,58.66bp) .. (Node{-1});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (58.197bp,61.5bp) node {1};
\draw [->] (Node{0}) ..controls (83.292bp,75.46bp) and (91.0bp,58.66bp) .. (Node{1});
\draw (98.197bp,61.5bp) node {-1};
%
\end{tikzpicture}
\caption{rotation by parent}\label{fig:my_label}
\end{figure}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{0}) at (70.197bp,105.0bp) [draw=blue,ellipse] {0, 2};
\node (Node{-1}) at (31.197bp,18.0bp) [draw,ellipse] {-1, 0};
\node (Node{1}) at (109.2bp,18.0bp) [draw,ellipse] {1, 2};
\draw [->] (Node{0}) ..controls (57.102bp,75.46bp) and (49.394bp,58.66bp) .. (Node{-1});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (58.197bp,61.5bp) node {2};
\draw [->] (Node{0}) ..controls (83.292bp,75.46bp) and (91.0bp,58.66bp) .. (Node{1});
\draw (96.197bp,61.5bp) node {0};
%
\end{tikzpicture}
\caption{promotion of y}\label{fig:my_label}
\end{figure}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{0}) at (70.197bp,105.0bp) [draw,ellipse] {0, 2};
\node (Node{-1}) at (31.197bp,18.0bp) [draw,ellipse] {-1, 0};
\node (Node{1}) at (109.2bp,18.0bp) [draw=blue,ellipse] {1, 1};
\draw [->] (Node{0}) ..controls (57.102bp,75.46bp) and (49.394bp,58.66bp) .. (Node{-1});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (58.197bp,61.5bp) node {2};
\draw [->] (Node{0}) ..controls (83.292bp,75.46bp) and (91.0bp,58.66bp) .. (Node{1});
\draw (96.197bp,61.5bp) node {1};
%
\end{tikzpicture}
\caption{demotion of z}\label{fig:my_label}
\end{figure}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{0}) at (70.197bp,105.0bp) [draw,ellipse] {0, 2};
\node (Node{-1}) at (31.197bp,18.0bp) [draw,ellipse] {-1, 0};
\node (Node{1}) at (109.2bp,18.0bp) [draw=blue,ellipse] {1, 0};
\draw [->] (Node{0}) ..controls (57.102bp,75.46bp) and (49.394bp,58.66bp) .. (Node{-1});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (58.197bp,61.5bp) node {2};
\draw [->] (Node{0}) ..controls (83.292bp,75.46bp) and (91.0bp,58.66bp) .. (Node{1});
\draw (96.197bp,61.5bp) node {2};
%
\end{tikzpicture}
\caption{second demotion of z}\label{fig:my_label}
\end{figure}
\chapter{Implementation}
For the implementation of rank-balanced trees we have used two programming languages: C\# and Python. C\# implementation has not been finished and therefore is not part of the submitted attachments. However it has given a valuable insight into the role of preconditions and invariants in algorithms while \texttt{null}-checking is enforced, since, for example, static type control cannot be aware of a node \textbf{not} being a \texttt{null} after checking specific set of conditions that forbid such scenario.
Python has been chosen as the \textit{go-to} language, since it is used for teaching foundations of programming~\cite{ib111} and also introductory course of algorithms and data structures~\cite{ib002} at our faculty.
We have started by implementing a general idea of a rank-balanced tree. Rank-balanced tree is an abstract class that \textbf{does not} implement methods specific to different kinds of trees such as
\begin{enumerate}
\item \texttt{is\_correct\_node} is used to check whether node and its subtrees satisfy rank rules
\item \texttt{\_insert\_rebalance} rebalances the tree from the given node after insertion
\item \texttt{\_delete\_rebalance} rebalances the tree from the given nodes (deleted node and parent) after deletion
\end{enumerate}
Apart from the abstract methods there is provided following interface that is either shared by the specific trees or used by them:
\begin{enumerate}
\item \texttt{\_get\_unwrapped\_graph} that is used to generate DOT format of the tree for purpose of either debugging or sharing
\item \texttt{rank} returns rank of the root of the tree
\item \texttt{is\_correct} calls \texttt{is\_correct\_node} on the root of the tree
\item \texttt{search} is used to look up keys in the tree
\item \texttt{insert} implements generic insertion into the tree followed by a call to tree-specific rebalance function
\item \texttt{delete} is identical to \texttt{insert}, but for the deletion from the tree
\end{enumerate}
Apart from that we have also implemented class for representation of nodes that provides quite rich interface that is utilized during rebalancing and also in generic methods on generic tree.
\section{Testing and validation}
From the beginning we have employed the techniques of property-based testing (done manually during C\# implementation and uses Hypothesis~\cite{hypothesis} for Python implementation). The elementary requirement for testing and validation of implemented algorithms was a \textbf{correct} \texttt{is\_correct\_node} method that validates properties of a specific rank-balanced tree and a good set of invariants. List of set invariants follows:
\begin{enumerate}
\item for insertion we have set following invariants
\begin{itemize}
\item Rank rules have not been broken after the insertion.
\item Inserted value can be found in the tree after the insertion.
\item All previously inserted values can still be found in the tree.
\end{itemize}
\item for deletion we have set following invariants
\begin{itemize}
\item Rank rules have not been broken after the deletion.
\item Deleted node can no longer be found in the tree after the deletion.
\item All other values can still be found in the tree.
\end{itemize}
\end{enumerate}
We also admit abuse of property-based testing to find a \textit{minimal} sequence of operations when WAVL tree relaxation manifests. For that purpose we have implemented a \textit{comparator} class that takes two different instances of rank-balanced trees and provides rudimentary \texttt{insert} and \texttt{delete} interface enriched with \texttt{are\_same} (evaluates isomorphism and ranks) and \texttt{are\_similar} (evaluates just isomorphism) methods. While trying to find minimal counter-example, we have also discovered a bug in rebalance after deletion of WAVL tree that caused enforcement of the AVL rank rules.
\chapter{Visualization}
\chapter*{Summary}
\addcontentsline{toc}{chapter}{Summary}
\appendix %% Start the appendices.
\chapter{An appendix}
Here you can insert the appendices of your thesis.
\end{document}

47
implementation.tex Normal file
View file

@ -0,0 +1,47 @@
\chapter{Implementation}
For the implementation of rank-balanced trees we have used two programming languages: C\# and Python. C\# implementation has not been finished and therefore is not part of the submitted attachments. However it has given a valuable insight into the role of preconditions and invariants in algorithms while \texttt{null}-checking is enforced, since, for example, static type control cannot be aware of a node \textbf{not} being a \texttt{null} after checking specific set of conditions that forbid such scenario.
Python has been chosen as the \textit{go-to} language, since it is used for teaching foundations of programming~\cite{ib111} and also introductory course of algorithms and data structures~\cite{ib002} at our faculty.
We have started by implementing a general idea of a rank-balanced tree. Rank-balanced tree is an abstract class that \textbf{does not} implement methods specific to different kinds of trees such as
\begin{enumerate}
\item \texttt{is\_correct\_node} is used to check whether node and its subtrees satisfy rank rules
\item \texttt{\_insert\_rebalance} rebalances the tree from the given node after insertion
\item \texttt{\_delete\_rebalance} rebalances the tree from the given nodes (deleted node and parent) after deletion
\end{enumerate}
Apart from the abstract methods there is provided following interface that is either shared by the specific trees or used by them:
\begin{enumerate}
\item \texttt{\_get\_unwrapped\_graph} that is used to generate DOT format of the tree for purpose of either debugging or sharing
\item \texttt{rank} returns rank of the root of the tree
\item \texttt{is\_correct} calls \texttt{is\_correct\_node} on the root of the tree
\item \texttt{search} is used to look up keys in the tree
\item \texttt{insert} implements generic insertion into the tree followed by a call to tree-specific rebalance function
\item \texttt{delete} is identical to \texttt{insert}, but for the deletion from the tree
\end{enumerate}
Apart from that we have also implemented class for representation of nodes that provides quite rich interface that is utilized during rebalancing and also in generic methods on generic tree.
\section{Testing and validation}
From the beginning we have employed the techniques of property-based testing (done manually during C\# implementation and uses Hypothesis~\cite{hypothesis} for Python implementation). The elementary requirement for testing and validation of implemented algorithms was a \textbf{correct} \texttt{is\_correct\_node} method that validates properties of a specific rank-balanced tree and a good set of invariants. List of set invariants follows:
\begin{enumerate}
\item for insertion we have set following invariants
\begin{itemize}
\item Rank rules have not been broken after the insertion.
\item Inserted value can be found in the tree after the insertion.
\item All previously inserted values can still be found in the tree.
\end{itemize}
\item for deletion we have set following invariants
\begin{itemize}
\item Rank rules have not been broken after the deletion.
\item Deleted node can no longer be found in the tree after the deletion.
\item All other values can still be found in the tree.
\end{itemize}
\end{enumerate}
We also admit abuse of property-based testing to find a \textit{minimal} sequence of operations when WAVL tree relaxation manifests. For that purpose we have implemented a \textit{comparator} class that takes two different instances of rank-balanced trees and provides rudimentary \texttt{insert} and \texttt{delete} interface enriched with \texttt{are\_same} (evaluates isomorphism and ranks) and \texttt{are\_similar} (evaluates just isomorphism) methods. While trying to find minimal counter-example, we have also discovered a bug in rebalance after deletion of WAVL tree that caused enforcement of the AVL rank rules.

10
introduction.tex Normal file
View file

@ -0,0 +1,10 @@
\chapter*{Introduction}
\addcontentsline{toc}{chapter}{Introduction}
Data structures have become a regular part of the essential toolbox for problem-solving. In many cases, they also help to improve the existing algorithm's performance, e.g. using a priority queue in Dijkstra's algorithm for the shortest path. This thesis will mainly discuss the implementation of a set (which can also be adjusted to represent a dictionary or map, if you wish).
Currently, the most commonly used implementations of sets use hash tables, but we will talk about another common alternative, implementation via self-balancing search trees. Compared to a hash table, they provide us with \textbf{consistent} time complexity, but at the cost of a requirement for ordering on the elements. The most implemented self-balancing binary tree is a \textit{red-black tree}, as described by Guibas and Sedgewick~\cite{rbtree}. Among other alternatives, we can find (non-binary) \textit{B-tree}~\cite{btree} and \textit{AVL tree}~\cite{knuth1998art}.
This thesis analyses and visualizes the \textit{Weak AVL (WAVL)}\cite{wavl} tree that has more relaxed conditions than the AVL tree but still provides better balancing than a red-black tree.
We start by reiterating through commonly used search trees, explaining basic ideas behind their self-balancing and bounding the height in the worst-case scenario. Then we state used terminology and explain the rank-balanced tree. Given a rank-balanced tree, we can delve into the details behind the representation of previously shown self-balancing binary search trees using rank and the WAVL rule gives us a new self-balancing binary search tree. For the WAVL, we provide pseudocode and explain operations used for rebalancing, including diagrams. Later on, we will discuss different heuristics that can be used for rebalancing and implementing the visualization of the operations on the WAVL tree.

254
rank_balanced_trees.tex Normal file
View file

@ -0,0 +1,254 @@
\chapter{Rank-Balanced Trees}
In comparison to nodes in binary search trees, nodes in rank-balanced trees contain one more piece of information, and that is \textit{rank}. Each type of tree that can be implemented using this representation, e.g. red-black, 2-3-4, AVL or WAVL, has a specific set of rules that ensure the resulting tree is balanced.
\section{Terminology related to rank-balanced trees}
In the text and pseudocode we adopt these functions or properties~\cite{wavl}:
\begin{itemize}
\item function $r(x)$ or property $x.rank$ that returns rank of a node; in case of $r(x)$ there is a special case: $r(nil) = -1$
\item function $parent(x)$ or property $x.parent$ returns parent of a node; analogically for the left and right children of a node
\item \textit{rank-difference} of \textit{x} is defined as $r(parent(x)) - r(x)$
\item $x$ is an \textit{i-child} if its rank-difference is $i$
\item $x$ is an $(i, j)$ node if its left and right children have $i$ and $j$ rank-differences respectively;
ordering of the children does not matter
\end{itemize}
\section{Rules for the other trees}
As we have mentioned above, it is possible to implement different kinds of self-balancing binary search trees via different rules for ranks.
\subsection{AVL tree}\label{chap:avl-rule}
\textbf{AVL Rule}: Every node is (1, 1) or (1, 2).~\cite{wavl}
In case of the AVL trees rank represents height. Here we can notice a very smart way of using the \textit{(i, j) node} definition. If we go back to the definition and want to be explicit about the nodes that are allowed with the \textit{AVL Rule}, then we get (1, 1), (1, 2) \textbf{or} (2, 1) nodes. However it is possible to find implementations of the AVL tree that allow leaning \textbf{to only one side} as opposed to the original requirements given by Adelson-Velsky and Landis. Forbidding interchangeability of (i, j) with (j, i) nodes would still yield AVL trees that lean to one side.
Meaning of the \textit{AVL Rule} is quite simple, since rank represents height in that case. We can draw analogies using the notation used for the AVL trees, where we mark nodes with a trit (or a sign) or use a balance-factor. We have two cases to discuss:
\begin{itemize}
\item \textbf{(1, 1) node} represents a tree where both of its subtrees have the same height. In this case we are talking about the nodes with balance-factor $0$ (respectively being signed with a $0$).
\item \textbf{(1, 2) node} represents a tree where one of its subtrees has a bigger height. In this case we are talking about the nodes with balance-factor $-1$ or $1$ (respectively being signed with a $-$ or a $+$).
\end{itemize}
Example of the AVL tree that uses ranks instead of signs or balance-factors can be seen in \autoref{fig:ranked:avl}.
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{value=3+ rank=3}) at (140.6bp,279.0bp) [draw,ellipse] {3, 3};
\node (Node{value=1+ rank=1}) at (103.6bp,192.0bp) [draw,ellipse] {1, 1};
\node (Node{value=7+ rank=2}) at (178.6bp,192.0bp) [draw,ellipse] {7, 2};
\node (Node{value=0+ rank=0}) at (28.597bp,105.0bp) [draw,ellipse] {0, 0};
\node (Node{value=2+ rank=0}) at (103.6bp,105.0bp) [draw,ellipse] {2, 0};
\node (Node{value=5+ rank=1}) at (178.6bp,105.0bp) [draw,ellipse] {5, 1};
\node (Node{value=8+ rank=1}) at (253.6bp,105.0bp) [draw,ellipse] {8, 1};
\node (Node{value=4+ rank=0}) at (103.6bp,18.0bp) [draw,ellipse] {4, 0};
\node (Node{value=6+ rank=0}) at (178.6bp,18.0bp) [draw,ellipse] {6, 0};
\node (Node{value=9+ rank=0}) at (253.6bp,18.0bp) [draw,ellipse] {9, 0};
\draw [->] (Node{value=3+ rank=3}) ..controls (128.03bp,249.14bp) and (120.85bp,232.64bp) .. (Node{value=1+ rank=1});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (128.6bp,235.5bp) node {2};
\draw [->] (Node{value=3+ rank=3}) ..controls (153.5bp,249.14bp) and (160.88bp,232.64bp) .. (Node{value=7+ rank=2});
\draw (166.6bp,235.5bp) node {1};
\draw [->] (Node{value=1+ rank=1}) ..controls (78.671bp,162.75bp) and (61.893bp,143.74bp) .. (Node{value=0+ rank=0});
\draw (75.597bp,148.5bp) node {1};
\draw [->] (Node{value=1+ rank=1}) ..controls (103.6bp,162.16bp) and (103.6bp,146.55bp) .. (Node{value=2+ rank=0});
\draw (108.6bp,148.5bp) node {1};
\draw [->] (Node{value=7+ rank=2}) ..controls (178.6bp,162.16bp) and (178.6bp,146.55bp) .. (Node{value=5+ rank=1});
\draw (183.6bp,148.5bp) node {1};
\draw [->] (Node{value=7+ rank=2}) ..controls (203.52bp,162.75bp) and (220.3bp,143.74bp) .. (Node{value=8+ rank=1});
\draw (224.6bp,148.5bp) node {1};
\draw [->] (Node{value=5+ rank=1}) ..controls (153.67bp,75.75bp) and (136.89bp,56.735bp) .. (Node{value=4+ rank=0});
\draw (149.6bp,61.5bp) node {1};
\draw [->] (Node{value=5+ rank=1}) ..controls (178.6bp,75.163bp) and (178.6bp,59.548bp) .. (Node{value=6+ rank=0});
\draw (183.6bp,61.5bp) node {1};
\draw [->] (Node{value=8+ rank=1}) ..controls (253.6bp,75.163bp) and (253.6bp,59.548bp) .. (Node{value=9+ rank=0});
\draw (258.6bp,61.5bp) node {1};
%
\end{tikzpicture}
\caption{Example of the AVL tree using ranks}
\label{fig:ranked:avl}
\end{figure}
\subsection{Red-black tree}\label{chap:rb-rule}
\textbf{Red-Black Rule}: All rank differences are 0 or 1, and no parent of a 0-child is a 0-child.~\cite{wavl}
In case of red-black trees, rank represents number of black nodes on a path from the node to a leaf (excluding the node itself). Based on that we can discuss the \textit{Red-Black Rule} in detail:
\begin{enumerate}
\item \textit{All rank differences are 0 or 1} inductively enforces monotonically increasing (at most by 1) count of black nodes from the leaves. \\
In detail:
\begin{enumerate}
\item In case the \textbf{current node is black}, the rank difference must be 1, since we have one more black node on the path from the parent to the leaves than from the current node.
\item In case the \textbf{current node is red}, the rank difference must be 0, since from the parent the count of black nodes on the path to leaves has not changed.
\item And finally all other differences are invalid, since by adding a node to the beginning of a path to the leaf we can either add red node (0-child) or black node (1-child), i.e. there is one more black node on the path or not which implies the change can be only 0 or 1.
\end{enumerate}
\item \textit{No parent of a 0-child is a 0-child} ensures that there are no two consecutive red nodes, since 0-child node is equivalent to the red node.
\end{enumerate}
Example of the red-black tree that uses ranks instead of colors can be seen in \autoref{fig:ranked:rbt}, red nodes are also colored for convenience.
Majority of the red-black tree implementations color nodes of the tree, following that notation and \textbf{precise} definition of the red-black tree it is quite common to ask the following questions:
\begin{enumerate}
\item \textit{Do we count the node itself if it is black?} \\
If we do not count nodes themselves, we decrease the count of black nodes on every path to the external nodes by $1$.
\item \textit{Do we count the external nodes (leaves that do not hold any value)?} \\
If we do not count external nodes themselves, we decrease the count of black nodes on every path to the external nodes by $1$.
\end{enumerate}
Overall they do not really matter as long as they are used consistently, since they affect the counts globally.
However it is also possible to color edges instead of the nodes as is presented in \textit{Průvodce labyrintem algoritmů} by \textit{Mareš and Valla}.~\cite{labyrint} In this representation color of the edge represents color of the child node. This representation is much more „natural“ for the representation using rank as it can be seen in \autoref{fig:ranked:rbt}, where edges connecting nodes with rank-difference $1$ represent \textit{black edges} and edges connecting nodes with rank-difference $0$ represent \textit{red edges}. It is also apparent that using this representation root of the tree does not hold any color anymore.
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{value=3+ rank=2}) at (140.6bp,366.0bp) [draw,ellipse] {3, 2};
%%
\node (Node{value=3+ rank=2}) at (140.6bp,366.0bp) [draw,ellipse] {3, 2};
\node (Node{value=1+ rank=1}) at (103.6bp,279.0bp) [draw,ellipse] {1, 1};
\node (Node{value=5+ rank=1}) at (178.6bp,279.0bp) [draw,ellipse] {5, 1};
\node (Node{value=0+ rank=0}) at (28.597bp,192.0bp) [draw,ellipse] {0, 0};
\node (Node{value=2+ rank=0}) at (103.6bp,192.0bp) [draw,ellipse] {2, 0};
\node (Node{value=4+ rank=0}) at (178.6bp,192.0bp) [draw,ellipse] {4, 0};
\node (Node{value=7+ rank=1}) at (253.6bp,192.0bp) [draw=red,ellipse] {7, 1};
\node (Node{value=6+ rank=0}) at (226.6bp,105.0bp) [draw,ellipse] {6, 0};
\node (Node{value=8+ rank=0}) at (301.6bp,105.0bp) [draw,ellipse] {8, 0};
\node (Node{value=9+ rank=0}) at (301.6bp,18.0bp) [draw=red,ellipse] {9, 0};
\draw [->] (Node{value=3+ rank=2}) ..controls (128.03bp,336.14bp) and (120.85bp,319.64bp) .. (Node{value=1+ rank=1});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (129.6bp,322.5bp) node {1};
\draw [->] (Node{value=3+ rank=2}) ..controls (153.5bp,336.14bp) and (160.88bp,319.64bp) .. (Node{value=5+ rank=1});
\draw (167.6bp,322.5bp) node {1};
\draw [->] (Node{value=1+ rank=1}) ..controls (78.671bp,249.75bp) and (61.893bp,230.74bp) .. (Node{value=0+ rank=0});
\draw (75.597bp,235.5bp) node {1};
\draw [->] (Node{value=1+ rank=1}) ..controls (103.6bp,249.16bp) and (103.6bp,233.55bp) .. (Node{value=2+ rank=0});
\draw (108.6bp,235.5bp) node {1};
\draw [->] (Node{value=7+ rank=1}) ..controls (244.49bp,162.33bp) and (239.36bp,146.17bp) .. (Node{value=6+ rank=0});
\draw (246.6bp,148.5bp) node {1};
\draw [->] (Node{value=7+ rank=1}) ..controls (269.82bp,162.26bp) and (279.51bp,145.12bp) .. (Node{value=8+ rank=0});
\draw (284.6bp,148.5bp) node {1};
\draw [->] (Node{value=5+ rank=1}) ..controls (178.6bp,249.16bp) and (178.6bp,233.55bp) .. (Node{value=4+ rank=0});
\draw (183.6bp,235.5bp) node {1};
\draw [red,->] (Node{value=5+ rank=1}) ..controls (203.52bp,249.75bp) and (220.3bp,230.74bp) .. (Node{value=7+ rank=1});
\draw (225.6bp,235.5bp) node {0};
\draw [red,->] (Node{value=8+ rank=0}) ..controls (301.6bp,75.163bp) and (301.6bp,59.548bp) .. (Node{value=9+ rank=0});
\draw (306.6bp,61.5bp) node {0};
%
\end{tikzpicture}
\caption{Example of the red-black tree using ranks}
\label{fig:ranked:rbt}
\end{figure}
\section{Implementation of other balanced trees using rank}
To show that using rank is mostly an implementation detail, we will describe an implementation of the AVL tree using rank.
Implementation of the insertion is trivial, since it is described by \textit{Haeupler et al.} and is used in the WAVL tree. All we need to implement is the deletion from the AVL tree. We will start by short description of the deletion rebalance as given by \textit{Mareš and Valla} in \textit{Průvodce labyrintem algoritmů}.
When propagating the error, we can encounter 3 cases (we explain them with respect to propagating deletion from the left subtree, propagation from right is mirrored and role of trits $+$ and $-$ swaps)~\cite{labyrint}:
\begin{enumerate}
\item \textit{Node was marked with $-$.} In this case, heights of left and right subtrees are equal now and node is marked with $0$, but propagation must be continued, since the height of the whole subtree has changed.\label{avl:rules:delete:1}
\item \textit{Node was marked with $0$.} In this case, node is marked with $+$ and the height of the subrtree has not changed, therefore we can stop the propagation.\label{avl:rules:delete:2}
\item \textit{Node was marked with $+$.} In this case, node would acquire balance-factor of $+2$, which is not allowed. In this situation we decide based on the mark of the node from which we are propagating the insertion in the following way (let $x$ the current node marked with $+$ and $y$ be the right child of $x$):\label{avl:rules:delete:3}
\begin{enumerate}
\item $y$ is marked with $+$, then we rotate by $x$ to the left. After that both $x$ and $y$ can be marked with $0$. Height from the point of the parent has changed, so we continue the propagation.\label{avl:rules:delete:3a}
\item $y$ is marked with $0$, then we rotate by $x$ to the left. After the rotation, $x$ can be marked with $+$ and $y$ with $-$. Height of the subtree has not changed, so propagation can be stopped.\label{avl:rules:delete:3b}
\item $y$ is marked with $-$. Let $z$ be the left son of $y$. We double rotate: first by $z$ to the right and then by $x$ to the left. After the double-rotation $x$ can be marked by either $0$ or $-$, $y$ by $0$ or $+$ and $z$ gets $0$. Height of the subtree has changed, therefore we must propagate further.\label{avl:rules:delete:3c}
\end{enumerate}
\end{enumerate}\label{avl:rules:delete}
We have implemented the deletion rebalance by implementing following functions:
\begin{enumerate}
\item \avlDeleteRebalance{} that handles updating the current node and its parent and iteratively calls subroutine handling previously described \textit{one step of a rebalancing}
\item \avlDeleteFixNode{} that handles one adjustment of rebalancing as described above
\item \avlDeleteRotate{} that handles rotation and updating of ranks, if necessary
\end{enumerate}
\begin{algorithm}
\Proc{$\texttt{deleteRebalance}(T, y, parent)$}{
\If{$y = nil \land parent = nil$}{
\Return;
}
\BlankLine
\If{$y = nil$}{
$(y, parent) \gets (parent, parent.parent)$\;
}
\BlankLine
\While{$y \neq nil \land \avlDeleteFixNode(T, y, parent)$}{
$y \gets parent$\;
\eIf{$parent \neq nil$}{
$parent \gets parent.parent$\;
}{
$parent \gets nil$\;
}
}
}
\caption{\texttt{deleteRebalance} algorithm for the AVL tree}\label{algorithm:avl:deleteRebalance}
\end{algorithm}
\texttt{deleteRebalance}, as can be seen in \autoref{algorithm:avl:deleteRebalance}, is quite straightforward. At the beginning we early return in case there is nothing to be rebalanced, which happens when deleting the last node from the tree. Then we handle case when we are given only parent by correctly setting $y$ and $parent$. Following up on that, as long as we have a node to be checked, we call \autoref{algorithm:avl:deleteFixNode} to fix balancing of the current node. Algorithm for fixing node returns $true$ or $false$ depending on the need to propagate the height change further, which is utilized in the condition of the \texttt{while} loop.
\begin{algorithm}
\Proc{$\texttt{deleteFixNode}(T, x, parent)$}{
\uIf(\tcp*[h]{Handles \hyperref[avl:rules:delete:1]{rule 1}}){balance-factor of $x$ is $0$}{
update rank of $x$\;
\Return{$true$}\;
}
\ElseIf(\tcp*[h]{Handles \hyperref[avl:rules:delete:2]{rule 2}}){balance-factor of $x$ is $-1$ or $1$}{
\Return{$false$}\;
}
\BlankLine
$(l, r) \gets (x.left, x.right)$\;
$(rotateL, rotateR) \gets (\texttt{rotateLeft}, \texttt{rotateRight})$\;
\BlankLine
\tcp{Handles \hyperref[avl:rules:delete:3]{rule 3}}
\eIf{balance-factor of $x$ is $2$}{
\Return{$\avlDeleteRotate(T, x, r, 1, rotateL, rotateR)$}\;
}{
\Return{$\avlDeleteRotate(T, x, l, -1, rotateR, rotateL)$}\;
}
}
\caption{\texttt{deleteFixNode} algorithm for the AVL tree}\label{algorithm:avl:deleteFixNode}
\end{algorithm}
\texttt{deleteFixNode} implements the algorithm as described in \hyperref[avl:rules:delete]{the list} with all possible cases above. We start by checking the balance-factor of the given node, in case there is no need to rotate, the rank gets updated if necessary and then we return the information whether there is a need to propagate further or not. In case the node has acquired balance-factor of $2$ we call \autoref{algorithm:avl:deleteRotate} to fix the balancing locally.
There are two operations that are not described using helper functions and they are done in a following way:
\begin{itemize}
\item Balance-factor of a node $x$ is calculated as \[ rank(x.right) - rank(x.left) \]
\item Updating rank of a node $x$ is done by setting node's rank to \[ 1 + \max \{ rank(x.left), rank(x.right) \} \]
\end{itemize}
\begin{algorithm}
\Proc{$\texttt{deleteRotate}(T, x, y, leaning, rotateL, rotateR)$}{
$f \gets $ balance-factor of $y$\;
\BlankLine
\uIf(\tcp*[h]{Handles rules \hyperref[avl:rules:delete:3a]{3a} \& \hyperref[avl:rules:delete:3b]{3b}}){$f = 0 \lor f = leaning$}{
$rotateL(T, x)$\;
}
\Else(\tcp*[h]{Handles \hyperref[avl:rules:delete:3c]{rule 3c}}){
$rotateR(T, y)$\;
$rotateL(T, x)$\;
}
\BlankLine
update ranks of $x$, $y$ and new root of the subtree\;
\BlankLine
\Return{$f \neq 0$}\;
}
\caption{\texttt{deleteRotate} algorithm for the AVL tree}\label{algorithm:avl:deleteRotate}
\end{algorithm}
\newpage
\texttt{deleteRotate} is handling only fixes where the rotations are required. Both \autoref{algorithm:avl:deleteFixNode} and \autoref{algorithm:avl:deleteRotate} include comments to highlight which rules are handled. This function is also done generically regardless of the subtree from which the height change is being propagated. This is done passing in functions used for rotations (since it is mirrored) and also by passing in the balance-factor required for just one rotation.
In both \autoref{algorithm:avl:deleteFixNode} and \autoref{algorithm:avl:deleteRotate} there is a key difference compared to the AVL tree implementations without ranks. Comparing the \hyperref[avl:rules:delete]{rules for deletion} with algorithms for rank-balanced implementation, it is apparent that during propagation of height change, the balance-factors of immediate nodes are already adjusted, since the information comes from either of its subtrees and it is calculated using ranks of its children that are already adjusted. This fact needs to be reflected in the implementation accordingly, since it shifts the meaning of rules as they are described above and written for the implementations that store the trit in the nodes directly, which is updated manually during rebalancing.

View file

@ -0,0 +1,55 @@
\chapter{Self-Balancing Search Trees}\label{chap:sb-bst}
This chapter will briefly discuss the properties and fundamental ideas behind the most used self-balancing search trees in standard libraries to give an idea about current options and how WAVL fits among them.
\section{Red-black trees}
As mentioned previously, red-black trees are among the most popular implementations in standard libraries. As always, we have a binary search tree, and then each node is given \textit{red} or \textit{black} colour. A red-black tree is kept balanced by enforcing the following set of rules~\cite{rbtree}:
\begin{enumerate}
\item External nodes are black; internal nodes may be red or black.
\item For each internal node, all paths from it to external nodes contain the same number of black nodes.
\item No path from an internal node to an external node contains two red nodes in a row.
\end{enumerate}
Given this knowledge, we can safely deduce the following relation between the height of the red-black tree and nodes stored in it~\cite{cormen2009introduction}:
\[
\log_2{(n + 1)} \leq h \leq 2 \cdot \log_2{(n + 2)} - 2
\]\label{rb-height}
where the lower bound is given by a perfect binary tree and upper bound by the minimal red-black tree.
There are also other variants of the red-black tree that are considered to be simpler for implementation, e.g. left-leaning red-black tree, as described by Sedgewick.
Red-black trees are used to implement sets in C++, Java and C\#.
\section{AVL tree}
AVL tree is considered to be the eldest self-balancing binary search tree. For clarity, we define the following function:
\[
BalanceFactor(n) := height(right(n)) - height(left(n))
\]
Then we have an AVL tree, if for every node $n$ in the tree the following holds:
\[
BalanceFactor(n) \in \{ -1, 0, 1 \}
\]
In other words, the heights of left and right subtrees of each node differ at most in 1.
Similarly, we will deduce the height of the AVL tree from original paper, by Adelson-Velsky and Landis, we get:
\[
\left( \log_2{(n + 1)} \leq \right) h < \log_{\varphi}{(n + 1)} < \frac{3}{2} \cdot \log_2{(n + 1)}
\]\label{avl-height}
If we compare the upper bounds for the height of the red-black trees and AVL trees, we can see that AVL rules are more strict than red-black rules, but at the cost of rebalancing. However, in both cases, the rebalancing still takes $\log_2{n}$.
Regarding the implementation of AVL trees, we can see them implemented in the standard library of Agda or Coq.
\section {B-tree}
\textit{To keep or not to keep…}
Used in Rust.

2
summary.tex Normal file
View file

@ -0,0 +1,2 @@
\chapter*{Summary}
\addcontentsline{toc}{chapter}{Summary}

144
thesis.tex Normal file
View file

@ -0,0 +1,144 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% I, the copyright holder of this work, release this work into the
%% public domain. This applies worldwide. In some countries this may
%% not be legally possible; if so: I grant anyone the right to use
%% this work for any purpose, without any conditions, unless such
%% conditions are required by law.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\documentclass[
printed, %% The `digital` option enables the default options for the
%% digital version of a document. Replace with `printed`
%% to enable the default options for the printed version
%% of a document.
color, %% Uncomment these lines (by removing the %% at the
%% beginning) to use color in the digital version of your
%% document
table, %% The `table` option causes the coloring of tables.
%% Replace with `notable` to restore plain LaTeX tables.
oneside, %% The `twoside` option enables double-sided typesetting.
%% Use at least 120 g/m² paper to prevent show-through.
%% Replace with `oneside` to use one-sided typesetting;
%% use only if you dont have access to a double-sided
%% printer, or if one-sided typesetting is a formal
%% requirement at your faculty.
nolof, %% The `lof` option prints the List of Figures. Replace
%% with `nolof` to hide the List of Figures.
nolot, %% The `lot` option prints the List of Tables. Replace
%% with `nolot` to hide the List of Tables.
%% More options are listed in the user guide at
%% <http://mirrors.ctan.org/macros/latex/contrib/fithesis/guide/mu/fi.pdf>.
]{fithesis3}
%% The following section sets up the locales used in the thesis.
\usepackage[resetfonts]{cmap} %% We need to load the T2A font encoding
\usepackage[T1,T2A]{fontenc} %% to use the Cyrillic fonts with Russian texts.
\usepackage[
main=english, %% By using `czech` or `slovak` as the main locale
%% instead of `english`, you can typeset the thesis
%% in either Czech or Slovak, respectively.
english, czech, slovak %% The additional keys allow
]{babel} %% foreign texts to be typeset as follows:
%%
%% \begin{otherlanguage}{german} ... \end{otherlanguage}
%% \begin{otherlanguage}{russian} ... \end{otherlanguage}
%% \begin{otherlanguage}{czech} ... \end{otherlanguage}
%% \begin{otherlanguage}{slovak} ... \end{otherlanguage}
%%
%% For non-Latin scripts, it may be necessary to load additional
%% fonts:
\usepackage{paratype}
\def\textrussian#1{{\usefont{T2A}{PTSerif-TLF}{m}{rm}#1}}
%%
%% The following section sets up the metadata of the thesis.
\thesissetup{
date = \the\year/\the\month/\the\day,
university = mu,
faculty = fi,
type = bc,
author = Matej Focko,
gender = m,
advisor = {prof. RNDr. Ivana Černá, CSc.},
title = {Rank-Balanced Trees},
TeXtitle = {Rank-Balanced Trees},
keywords = {algorithms, data structures, rank, trees, balanced trees, study material, visualization, ...},
TeXkeywords = {algorithms, data structures, rank, trees, balanced trees, study material, visualization, \ldots},
abstract = {%
In the thesis we demonstrate usage of a rank for implementing balanced binary-search trees
and algorithms related to a specific rank-balanced tree, the weak AVL tree. First part
of the thesis consists of description of the rank-balanced tree followed by a comparison
to other balanced trees that can be implemented using rank, diagrams and pseudo-codes related
to the weak AVL tree. We also present an implementation of the weak AVL tree in Python, tested with a
property-based testing. The final part of the thesis is a web-page that allows performing operations
on the weak AVL tree with animations and step-by-step walk-through of a pseudo-code.
},
thanks = {%
\textit{TBD}
},
bib = bibliography.bib,
%% Uncomment the following line (by removing the %% at the
%% beginning) and replace `assignment.pdf` with the filename
%% of your scanned thesis assignment.
%% assignment = assignment.pdf,
}
\usepackage{makeidx} %% The `makeidx` package contains
\makeindex %% helper commands for index typesetting.
%% These additional packages are used within the document:
\usepackage{paralist} %% Compact list environments
\usepackage{amsmath} %% Mathematics
\usepackage{amsthm}
\usepackage{amsfonts}
\usepackage{url} %% Hyperlinks
\usepackage{tabularx} %% Tables
\usepackage{tabu}
\usepackage{booktabs}
\usepackage[vlined,longend,linesnumbered]{algorithm2e}
\usepackage{listings} %% Source code highlighting
\lstset{
basicstyle = \ttfamily,
identifierstyle = \color{black},
keywordstyle = \color{blue},
keywordstyle = {[2]\color{cyan}},
keywordstyle = {[3]\color{olive}},
stringstyle = \color{teal},
commentstyle = \itshape\color{magenta},
breaklines = true,
}
\usepackage{floatrow} %% Putting captions above tables
\floatsetup[table]{capposition=top}
\usepackage{hyperref}
\usepackage[x11names, svgnames, rgb]{xcolor}
\usepackage{tikz}
\usetikzlibrary{decorations,arrows,shapes}
\SetKwProg{Fn}{function}{ is}{end}
\SetKwProg{Proc}{procedure}{ is}{end}
\newcommand{\avlDeleteRebalance}{\hyperref[algorithm:avl:deleteRebalance]{\texttt{deleteRebalance}}}
\newcommand{\avlDeleteFixNode}{\hyperref[algorithm:avl:deleteFixNode]{\texttt{deleteFixNode}}}
\newcommand{\avlDeleteRotate}{\hyperref[algorithm:avl:deleteRotate]{\texttt{deleteRotate}}}
\newcommand{\findParentNode}{\hyperref[algorithm:findParentNode]{\texttt{findParentNode}}}
\newcommand{\wavlInsertRebalance}{\hyperref[algorithm:wavl:insertRebalance]{\texttt{insertRebalance}}}
\newcommand{\wavlFixZeroChild}{\hyperref[algorithm:wavl:fix0Child]{\texttt{fix0Child}}}
\newcommand{\wavlDeleteRebalance}{\hyperref[algorithm:wavl:deleteRebalance]{\texttt{deleteRebalance}}}
\newcommand{\wavlBottomUpDelete}{\hyperref[algorithm:wavl:bottomUpDelete]{\texttt{bottomUpDelete}}}
\newcommand{\wavlFixDelete}{\hyperref[algorithm:wavl:fixDelete]{\texttt{fixDelete}}}
\begin{document}
\include{introduction}
\include{self_balancing_search_trees}
\include{rank_balanced_trees}
\include{wavl_trees}
\include{implementation}
\include{visualization}
\include{summary}
\appendix %% Start the appendices.
\chapter{An appendix}
Here you can insert the appendices of your thesis.
\end{document}

1
visualization.tex Normal file
View file

@ -0,0 +1 @@
\chapter{Visualization}

414
wavl_trees.tex Normal file
View file

@ -0,0 +1,414 @@
\chapter{Weak AVL Trees}
\section{Rank rule}
Based on the rank rules for implementing red-black tree (as described in \ref{chap:rb-rule}) and AVL tree (as described in \ref{chap:avl-rule}), \textit{Haeupler et al.} present a new rank rule:
\textbf{Weak AVL Rule}: All rank differences are 1 or 2, and every leaf has rank 0.~\cite{wavl}
Comparing the \textit{Weak AVL Rule} to the \textit{AVL Rule}, we can come to these conclusions:
\begin{itemize}
\item \textit{Every leaf has rank 0} holds with the AVL Rule, since every node is (1, 1) or (1, 2) and rank of a node represents height of its tree. Rank of \textit{nil} is defined as $-1$ and height of tree rooted at leaf is $0$, therefore leaves are (1, 1)-nodes
\item \textit{All rank differences are 1 or 2} does not hold in one specific case, and that is (2, 2)-node, which is allowed in the WAVL tree, but not in the AVL tree. This difference will be explained more thoroughly later on.
\end{itemize}
\section{Height boundaries}
We have described in \autoref{chap:sb-bst} other common self-balanced binary search trees to be able to draw analogies and explain differences between them. Given the boundaries of height for red-black and AVL tree, we can safely assume that the AVL is more strict with regards to the self-balancing than the red-black tree. Let us show how does WAVL fit among them. \textit{Haeupler et al.} present following bounds~\cite{wavl}:
\[ h \leq k \leq 2h \text{ and } k \leq 2 \log_2{n} \]
In those equations we can see $h$ and $n$ in the same context as we used it to lay boundaries for the AVL and red-black trees, but we can also see new variable $k$, which represents the rank of the tree.
One of the core differences between AVL and WAVL lies in the rebalancing after deletion. Insertion into the WAVL tree is realized in the same way as it would in the AVL tree and the benefit of (2, 2)-node is used during deletion rebalancing.
From the previous 2 statements we can come to 2 conclusions and those are:
\begin{itemize}
\item If we commit only insertions to the WAVL tree, it will always yield a valid AVL tree. In that case it means that the height boundaries are same as of the AVL tree (described in \autoref{avl-height}).
\item If we commit deletions too, we can assume the worst-case scenario where \[ h < 2 \log_2{n} \] which also holds for the red-black trees.
\end{itemize}
From the two conclusions we can safely deduce that the WAVL tree is in the worst-case scenario as efficient as the red-black tree and in the best-case scenario as efficient as the AVL tree.
\newpage
\section{Insertion into the weak AVL tree}
Inserting values into WAVL tree is equivalent to inserting values into regular binary-search tree followed up by rebalancing that ensures rank rules hold. This part can be clearly seen in \autoref{algorithm:wavl:insert}. We can also see there two early returns, one of them happens during insertion into the empty tree and other during insertion of duplicate key, which we do not allow.
\begin{algorithm}
\Proc{$\texttt{insert}(T, key)$}{
$insertedNode \gets Node(key)$\;
\If{$T.root = nil$}{
$T.root \gets insertedNode$\;
\Return\;
}
\BlankLine
$parent \gets \findParentNode(key, T.root)$\;
\If{$parent = nil$}{
\Return\;
}
$insertedNode.parent \gets parent$\;
\BlankLine
\eIf{$key < parent.key$}{
$parent.left \gets insertedNode$\;
}{
$parent.right \gets insertedNode$\;
}
\BlankLine
$\wavlInsertRebalance(T, insertedNode)$\;
}
\caption{Insert operation on binary search tree}\label{algorithm:wavl:insert}
\end{algorithm}
In the \autoref{algorithm:wavl:insert} we have also utilized a helper function that is used to find parent of the newly inserted node and also prevents insertion of duplicate keys within the tree. Pseudocode of that function can be seen in \autoref{algorithm:findParentNode}.
\begin{algorithm}
\Fn{$\texttt{findParentNode}(key, node)$}{
$childNode \gets node$\;
\BlankLine
\While{$childNode \neq nil$}{
$node \gets childNode$\;
\uIf{$key < node.key$}{
$childNode \gets node.left$\;
}
\ElseIf{$node.key < key$}{
$childNode \gets node.right$\;
}
\Else{
\Return{nil}\;
}
}
\BlankLine
\Return{node}\;
}
\caption{Helper function that returns parent for newly inserted node}\label{algorithm:findParentNode}
\end{algorithm}
Rebalancing after insertion in the WAVL tree is equivalent to rebalancing after insertion in the AVL tree. We will start with a short description of the rebalancing within AVL to lay a foundation for analogies and differences compared to the implementation using ranks.
When propagating the error, we can encounter 3 cases (we explain them with respect to propagating insertion from the left subtree, propagation from right is mirrored and role of trits $+$ and $-$ swaps)~\cite{labyrint}:
\begin{enumerate}
\item \textit{Node was marked with $+$.} In this case, heights of left and right subtrees are equal now and node is marked with $0$ and propagation can be stopped.\label{avl:rules:insert:1}
\item \textit{Node was marked with $0$.} In this case, node is marked with $-$, but the height of the tree rooted at the node has changes, which means that we need to propagate the changes further.\label{avl:rules:insert:2}
\item \textit{Node was marked with $-$.} In this case, node would acquire balance-factor of $-2$, which is not allowed. In this situation we decide based on the mark of the node from which we are propagating the insertion in the following way (let $x$ be the node from which the information is being propagated and $z$ the current node marked with $-$):\label{avl:rules:insert:3}
\begin{enumerate}
\item $x$ is marked with $-$, then we rotate by $z$ to the right. After that both $z$ and $x$ can be marked with $0$. Height from the point of the parent has not changed, so we can stop the propagation.\label{avl:rules:insert:3a}
\item $x$ is marked with $+$, then we double rotate: first by $x$ to the left and then by $z$ to the right. Here we need to recalculate the balance-factors for $z$ and $x$, where $z$ gets $-$ or $0$ and $x$ gets $0$ or $+$. Node that was a right child to the $x$ before the double-rotation is now marked with $0$ and propagation can be stopped.\label{avl:rules:insert:3b}
\item $x$ is marked with $0$. This case is trivial, since it cannot happen, because we never propagate the height change from a node that acquired sign $0$.
\end{enumerate}
\end{enumerate}
In the following explanation we have to consider that valid nodes in AVL tree implemented via ranks are (1, 1) and (1, 2) and by the time of evaluating rank-differences of parent, they are already affected by the rebalancing done from the inserted leaf.
Rebalancing of the tree is equivalent to rebalancing of AVL tree and is executed in a following way:
\begin{algorithm}
\Proc{$\texttt{insertRebalance}(T, node)$}{
\tcp{Handles \hyperref[avl:rules:insert:2]{rule 2}}
\While{$node.parent \neq nil \land (node.parent\, is\, (0, 1)\, or\, (1, 0))$}{
$\texttt{promote}(node.parent)$\;
$node \gets node.parent$\;
}
\BlankLine
\tcp{Handles \hyperref[avl:rules:insert:1]{rule 1}}
\lIf{$node.parent = nil \lor node\, is\, not\, \text{0-child}$}{\Return}
\BlankLine
\tcp{Handles \hyperref[avl:rules:insert:3]{rule 3}}
\eIf{$node = node.parent.left$}{
$\wavlFixZeroChild(T, node, node.right, \texttt{rotateLeft}, \texttt{rotateRight})$\;
}{
$\wavlFixZeroChild(T, node, node.left, \texttt{rotateRight}, \texttt{rotateLeft})$\;
}
\BlankLine
}
\caption{Algorithm containing bottom-up rebalancing after insertion}\label{algorithm:wavl:insertRebalance}
\end{algorithm}
As a first step, which can be seen in \autoref{algorithm:wavl:insertRebalance}, we iteratively check rank-differences of a parent of the current node. As long as it is a (0, 1) or (1, 0) node, we promote it and propagate further. There is an interesting observation to be made about the way \textit{how parent can fulfill such requirement}. And the answer is simple, since we are adding a leaf or are already propagating the change to the root, it means that we have lowered the rank-difference of the parent, therefore it must have been (1, 1) node. From the algorithm used for usual implementations of AVL trees, this step refers to \hyperref[avl:rules:insert:2]{\textit{rule 2}}. After the promotion the rank of the parent becomes (1, 2) or (2, 1) which means that it gets sign $-$ (or $+$ respectively when propagating from the right subtree), which conforms to the usual algorithm.
After this, we might end up in two situations and those are:
\begin{enumerate}
\item Current node is not a 0-child, which means that after propagation and promotions we have gotten to a parent node that is (1, 2) or (2, 1), which refers to the \hyperref[avl:rules:insert:1]{\textit{rule 1}}.
\item Current node is a 0-child, which means that after propagation and promotions we have a node with a parent that is either (0, 2) or (2, 0) node. This case conforms to the \hyperref[avl:rules:insert:3]{\textit{rule 3}} and must be handled further to fix the broken rank rule.
\end{enumerate}
\hyperref[avl:rules:insert:3]{\textit{Rule 3}} is then handled by a helper function that can be seen in \autoref{algorithm:wavl:fix0Child}.
\begin{algorithm}
\Proc{$\texttt{fix0Child}(T, x, y, rotateToLeft, rotateToRight)$}{
$z \gets x.parent$\;
\BlankLine
\uIf(\tcp*[h]{Handles \hyperref[avl:rules:insert:3a]{rule 3a}}){$y = nil \lor y\, is\, \text{2-child}$}{
$rotateToRight(T, z)$\;
\BlankLine
$\texttt{demote}(z)$\;
}
\ElseIf(\tcp*[h]{Handles \hyperref[avl:rules:insert:3b]{rule 3b}}){$y\, is\, \text{1-child}$}{
$rotateToLeft(T, x)$\;
$rotateToRight(T, z)$\;
\BlankLine
$\texttt{promote}(y)$\;
$\texttt{demote}(x)$\;
$\texttt{demote}(z)$\;
}
}
\caption{Generic algorithm for fixing 0-child after insertion}\label{algorithm:wavl:fix0Child}
\end{algorithm}
Here we can see, once again, an interesting pattern. When comparing to the algorithm described above, using the rank representation, we do not need to worry about changing the signs and updating the heights, since by rotating combined with demotion and promotion of the ranks, we are effectively updating the height (represented via rank) of the affected nodes. This observation could be used in \autoref{algorithm:avl:deleteFixNode} and \autoref{algorithm:avl:deleteRotate} where we turned to manual updating of ranks to show the difference.
\section{Deletion from the weak AVL tree}
\begin{algorithm}
\Proc{$\texttt{deleteRebalance}(T, y, parent)$}{
\uIf{$y \text{ is (2, 2)}$}{
$\texttt{demote}(y)$\;
$parent \gets y.parent$\;
}
\ElseIf{$parent \text{ is (2, 2)}$}{
$\texttt{demote}(parent)$\;
$parent \gets parent.parent$\;
}
\BlankLine
\If{$parent = nil$}{
\Return\;
}
$z \gets \text{3-child of } parent$\;
\If{$z \neq nil$}{
$\wavlBottomUpDelete(T, z, parent)$\;
}
}
\caption{Initial phase of algorithm for the rebalance after deletion from the WAVL tree}\label{algorithm:wavl:deleteRebalance}
\end{algorithm}
As described by \textit{Haeupler et al.}, we start the deletion rebalancing by checking for (2, 2) node. If that is the case, we demote it and continue with the deletion rebalancing via \autoref{algorithm:wavl:bottomUpDelete} if we have created a 3-child by the demotion. Demoting the (2, 2) node is imperative, since it enforces part of the \textit{Weak AVL Rule} requiring that leaves have rank equal to zero.
For example consider the following tree in \autoref{fig:wavl:twoElements}. Deletion of key 2 from that tree would result in having only key 1 in the tree with rank equal to 1, which would be (2, 2) node and leaf at the same time. After the demotion of the remaining key, we acquire the tree as shown in \autoref{fig:wavl:twoElementsAfterDelete}
In contrast to the \textit{AVL Rule}, WAVL tree allows us to have (2, 2) nodes present. Therefore we can encounter two key differences during deletion rebalancing:
\begin{enumerate}
\item If anywhere during the deletion rebalancing, \textbf{but not} at the start, we encounter (2, 2) node, we can safely stop the rebalancing process, since rest of the tree must be correct and we have fixed errors on the way to the current node from the leaf.
\item Compared to the AVL tree, during deletion rebalancing we need to fix \textbf{3-child} nodes.
\end{enumerate}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{value=1+ rank=1}) at (28.597bp,105.0bp) [draw,ellipse] {1, 1};
\node (Node{value=2+ rank=0}) at (28.597bp,18.0bp) [draw,ellipse] {2, 0};
\draw [->] (Node{value=1+ rank=1}) ..controls (28.597bp,75.163bp) and (28.597bp,59.548bp) .. (Node{value=2+ rank=0});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (33.597bp,61.5bp) node {1};
%
\end{tikzpicture}
\caption{WAVL tree containing two elements}
\label{fig:wavl:twoElements}
\end{figure}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{value=1+ rank=1}) at (28.597bp,105.0bp) [draw,ellipse] {1, 0};
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
%
\end{tikzpicture}
\caption{\autoref{fig:wavl:twoElements} after deletion of 2}
\label{fig:wavl:twoElementsAfterDelete}
\end{figure}
\begin{algorithm}
\Proc{$\texttt{bottomUpDelete}(T, x, parent)$}{
\If{$x \text{ is not 3-child} \lor parent = nil$}{
\Return\;
}
\BlankLine
$y \gets nil$\;
\eIf{$parent.left = x$}{
$y \gets parent.right$\;
}{
$y \gets parent.left$\;
}
\BlankLine
\While{$parent \neq nil \land x \text{ is 3-child} \land (y \text{ is 2-child or (2, 2)})$}{
\If{$y \text{ is not 2-child}$}{
$\texttt{demote}(y)$\;
}
$\texttt{demote}(parent)$\;
\BlankLine
$x \gets parent$\;
$parent \gets x.parent$\;
\If{$parent = nil$}{
\Return;
}
\BlankLine
\eIf{$parent.left = x$}{
$y \gets parent.right$\;
}{
$y \gets parent.left$\;
}
}
\BlankLine
\If{$parent \text{ is not (1, 3)}$}{
\Return\;
}
$p \gets parent$\;
\eIf{$parent.left = x$}{
$\wavlFixDelete(T, x, p.right, p, false, \texttt{rotateLeft}, \texttt{rotateRight})$\;
}{
$\wavlFixDelete(T, x, p.left, p, true, \texttt{rotateRight}, \texttt{rotateLeft})$\;
}
}
\caption{Propagation of the broken rank rule after deletion from the WAVL tree}\label{algorithm:wavl:bottomUpDelete}
\end{algorithm}
\begin{algorithm}
\Proc{$\texttt{fixDelete}(T, x, y, z, reversed, rotateL, rotateR)$}{
$v \gets y.left$\;
$w \gets y.right$\;
\If{$reversed$}{
$(v, w) \gets (w, v)$\;
}
\BlankLine
\uIf{$w \text{ is 1-child} \land y.parent \neq nil$}{
$rotateL(T, y.parent)$\;
\BlankLine
$\texttt{promote}(y)$\;
$\texttt{demote}(z)$\;
\BlankLine
\If{$z$ is a leaf}{
$\texttt{demote}(z)$\;
}
}
\ElseIf{$w \text{ is 2-child} \land v.parent \neq nil$}{
$rotateR(T, v.parent)$\;
$rotateL(T, v.parent)$\;
\BlankLine
$\texttt{promote}(v)$\;
$\texttt{promote}(v)$\;
$\texttt{demote}(y)$\;
$\texttt{demote}(z)$\;
$\texttt{demote}(z)$\;
}
}
\caption{Final phase of the deletion rebalance after deletion from the WAVL tree}\label{algorithm:wavl:fixDelete}
\end{algorithm}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{1}) at (87.197bp,192.0bp) [draw,ellipse] {1, 2};
\node (Node{0}) at (31.197bp,105.0bp) [draw,ellipse] {0, 1};
\node (Node{2}) at (106.2bp,105.0bp) [draw,ellipse] {2, 0};
\node (Node{-1}) at (31.197bp,18.0bp) [draw,ellipse] {-1, 0};
\draw [->] (Node{1}) ..controls (68.373bp,162.43bp) and (56.68bp,144.68bp) .. (Node{0});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (68.197bp,148.5bp) node {1};
\draw [->] (Node{1}) ..controls (93.66bp,162.09bp) and (97.18bp,146.34bp) .. (Node{2});
\draw (102.2bp,148.5bp) node {2};
\draw [->] (Node{0}) ..controls (31.197bp,75.163bp) and (31.197bp,59.548bp) .. (Node{-1});
\draw (36.197bp,61.5bp) node {1};
%
\end{tikzpicture}
\caption{WAVL tree with elements inserted in order $(0, 1, 2, -1)$}\label{fig:wavl:deletionA:before}
\end{figure}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{1}) at (31.197bp,192.0bp) [draw,ellipse] {1, 2};
\node (Node{0}) at (31.197bp,105.0bp) [draw,ellipse] {0, 1};
\node (Node{-1}) at (31.197bp,18.0bp) [draw,ellipse] {-1, 0};
\draw [->] (Node{1}) ..controls (31.197bp,162.16bp) and (31.197bp,146.55bp) .. (Node{0});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (36.197bp,148.5bp) node {1};
\draw [->] (Node{0}) ..controls (31.197bp,75.163bp) and (31.197bp,59.548bp) .. (Node{-1});
\draw (36.197bp,61.5bp) node {1};
%
\end{tikzpicture}
\caption{WAVL tree from \autoref{fig:wavl:deletionA:before} after deletion of 2, value is replaced by one of its children}\label{fig:wavl:deletionA:replacing}
\end{figure}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{0}) at (70.197bp,105.0bp) [draw,ellipse] {0, 1};
\node (Node{-1}) at (31.197bp,18.0bp) [draw,ellipse] {-1, 0};
\node (Node{1}) at (109.2bp,18.0bp) [draw,ellipse] {1, 2};
\draw [->] (Node{0}) ..controls (57.102bp,75.46bp) and (49.394bp,58.66bp) .. (Node{-1});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (58.197bp,61.5bp) node {1};
\draw [->] (Node{0}) ..controls (83.292bp,75.46bp) and (91.0bp,58.66bp) .. (Node{1});
\draw (98.197bp,61.5bp) node {-1};
%
\end{tikzpicture}
\caption{rotation by parent}\label{fig:my_label}
\end{figure}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{0}) at (70.197bp,105.0bp) [draw=blue,ellipse] {0, 2};
\node (Node{-1}) at (31.197bp,18.0bp) [draw,ellipse] {-1, 0};
\node (Node{1}) at (109.2bp,18.0bp) [draw,ellipse] {1, 2};
\draw [->] (Node{0}) ..controls (57.102bp,75.46bp) and (49.394bp,58.66bp) .. (Node{-1});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (58.197bp,61.5bp) node {2};
\draw [->] (Node{0}) ..controls (83.292bp,75.46bp) and (91.0bp,58.66bp) .. (Node{1});
\draw (96.197bp,61.5bp) node {0};
%
\end{tikzpicture}
\caption{promotion of y}\label{fig:my_label}
\end{figure}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{0}) at (70.197bp,105.0bp) [draw,ellipse] {0, 2};
\node (Node{-1}) at (31.197bp,18.0bp) [draw,ellipse] {-1, 0};
\node (Node{1}) at (109.2bp,18.0bp) [draw=blue,ellipse] {1, 1};
\draw [->] (Node{0}) ..controls (57.102bp,75.46bp) and (49.394bp,58.66bp) .. (Node{-1});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (58.197bp,61.5bp) node {2};
\draw [->] (Node{0}) ..controls (83.292bp,75.46bp) and (91.0bp,58.66bp) .. (Node{1});
\draw (96.197bp,61.5bp) node {1};
%
\end{tikzpicture}
\caption{demotion of z}\label{fig:my_label}
\end{figure}
\begin{figure}
\centering
\begin{tikzpicture}[>=latex',line join=bevel,scale=0.75,]
%%
\node (Node{0}) at (70.197bp,105.0bp) [draw,ellipse] {0, 2};
\node (Node{-1}) at (31.197bp,18.0bp) [draw,ellipse] {-1, 0};
\node (Node{1}) at (109.2bp,18.0bp) [draw=blue,ellipse] {1, 0};
\draw [->] (Node{0}) ..controls (57.102bp,75.46bp) and (49.394bp,58.66bp) .. (Node{-1});
\definecolor{strokecol}{rgb}{0.0,0.0,0.0};
\pgfsetstrokecolor{strokecol}
\draw (58.197bp,61.5bp) node {2};
\draw [->] (Node{0}) ..controls (83.292bp,75.46bp) and (91.0bp,58.66bp) .. (Node{1});
\draw (96.197bp,61.5bp) node {2};
%
\end{tikzpicture}
\caption{second demotion of z}\label{fig:my_label}
\end{figure}