blog/algorithms/hash-tables/breaking/index.html

151 lines
41 KiB
HTML
Raw Normal View History

<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper plugin-docs plugin-id-algorithms docs-version-current docs-doc-page docs-doc-id-hash-tables/2023-11-28-breaking/breaking" data-has-hydrated="false">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v3.0.1">
<title data-rh="true">Breaking Hash Table | mf</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://blog.mfocko.xyz/algorithms/hash-tables/breaking/"><meta data-rh="true" property="og:locale" content="en"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-algorithms-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-algorithms-current"><meta data-rh="true" property="og:title" content="Breaking Hash Table | mf"><meta data-rh="true" name="description" content="How to get the linear time complexity in a hash table.
"><meta data-rh="true" property="og:description" content="How to get the linear time complexity in a hash table.
"><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://blog.mfocko.xyz/algorithms/hash-tables/breaking/"><link data-rh="true" rel="alternate" href="https://blog.mfocko.xyz/algorithms/hash-tables/breaking/" hreflang="en"><link data-rh="true" rel="alternate" href="https://blog.mfocko.xyz/algorithms/hash-tables/breaking/" hreflang="x-default"><link data-rh="true" rel="preconnect" href="https://0VXRFPR4QF-dsn.algolia.net" crossorigin="anonymous"><link rel="search" type="application/opensearchdescription+xml" title="mf" href="/opensearch.xml">
<link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="mf RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="mf Atom Feed">
<link rel="alternate" type="application/json" href="/blog/feed.json" title="mf JSON Feed">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.13.24/dist/katex.min.css" integrity="sha384-odtC+0UGzzFL/6PNoE8rX/SPcQDXBJ+uRepguP4QkPCm2LBxH3FA3y+fKSiJ+AmM" crossorigin="anonymous"><link rel="stylesheet" href="/assets/css/styles.e1ac7597.css">
<script src="/assets/js/runtime~main.81d405e9.js" defer="defer"></script>
<script src="/assets/js/main.3450d297.js" defer="defer"></script>
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return localStorage.getItem("theme")}catch(t){}}();t(null!==e?e:"light")}(),function(){try{const c=new URLSearchParams(window.location.search).entries();for(var[t,e]of c)if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id="__docusaurus"><div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><b class="navbar__title text--truncate">mf</b></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Additional FI MU materials</a><ul class="dropdown__menu"><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/algorithms/">Algorithms</a></li><li><a class="dropdown__link" href="/c/">C</a></li><li><a class="dropdown__link" href="/cpp/">C++</a></li></ul></div><a class="navbar__item navbar__link" href="/contributions/">Contributions</a><a class="navbar__item navbar__link" href="/talks/">Talks</a></div><div class="navbar__items navbar__items--right"><a class="navbar__item navbar__link" href="/blog/">Blog</a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type="button" disabled="" title="Switch between dark and light mode (currently light mode)" aria-label="Switch between dark and light mode (currently light mode)" aria-live="polite"><svg viewBox="0 0 24 24" width="24" height="24" class="lightToggleIcon_pyhR"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" class="darkToggleIcon_wfgR"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg></button></div><div class="navbarSearchContainer_Bca1"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.
issues to occur.</p>
<h2 class="anchor anchorWithStickyNavbar_LWe7" id="introduction">Introduction<a href="#introduction" class="hash-link" aria-label="Direct link to Introduction" title="Direct link to Introduction"></a></h2>
<p>Hash tables are very commonly used to represent sets or dictionaries. Even when
you look up solution to some problem that requires set or dictionary, it is more
than likely that you&#x27;ll find something that references usage of the hash table.
You might think it&#x27;s the only possible option<sup><a href="#user-content-fn-1" id="user-content-fnref-1" data-footnote-ref="true" aria-describedby="footnote-label">1</a></sup>, or it&#x27;s the best one<sup><a href="#user-content-fn-2" id="user-content-fnref-2" data-footnote-ref="true" aria-describedby="footnote-label">2</a></sup>.</p>
<p>One of the reasons to prefer hash tables over any other representation is the
fact that they are <strong>supposed</strong> to be faster than the alternatives, but the
truth lies somewhere in between.</p>
<p>One of the other possible implementations of the set is a balanced tree. Majorly
occurring implementations rely on the <em>red-black tree</em>, but you may see also
others like an <em>AVL tree</em><sup><a href="#user-content-fn-3" id="user-content-fnref-3" data-footnote-ref="true" aria-describedby="footnote-label">3</a></sup> or <em>B-tree</em><sup><a href="#user-content-fn-4" id="user-content-fnref-4" data-footnote-ref="true" aria-describedby="footnote-label">4</a></sup>.</p>
<h2 class="anchor anchorWithStickyNavbar_LWe7" id="hash-table-v-trees">Hash Table v. Trees<a href="#hash-table-v-trees" class="hash-link" aria-label="Direct link to Hash Table v. Trees" title="Direct link to Hash Table v. Trees"></a></h2>
<p>The most interesting part are the differences between their implementations. Why
should you choose hash table, or why should you choose the tree implementation?
Let&#x27;s compare the differences one by one.</p>
<h3 class="anchor anchorWithStickyNavbar_LWe7" id="requirements">Requirements<a href="#requirements" class="hash-link" aria-label="Direct link to Requirements" title="Direct link to Requirements"></a></h3>
<p>We will start with the fundamentals on which the underlying data structures
rely. We can also consider them as <em>requirements</em> that must be met to be able to
use the underlying data structure.</p>
<p>Hash table relies on the <em>hash function</em> that is supposed to distribute the keys
in such way that they&#x27;re evenly spread across the slots where the keys (or
pairs, for dictionary) are stored, but at the same time they&#x27;re somewhat unique,
so no clustering occurs.</p>
<p>Trees depend on the <em>ordering</em> of the elements. They maintain the elements in
a sorted fashion, so for any pair of the elements that are used as keys, you
need to be able to decide which one of them is <em>smaller or equal to</em> the other.</p>
<p>Hash function can be easily created by using the bits that <em>uniquely</em> identify
a unique element. On the other hand, ordering may not be as easy to define.</p>
<div class="theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>Example</div><div class="admonitionContent_BuS1"><p>If you are familiar with complex numbers, they are a great example of a key that
does not have ordering (unless you go element-wise for the sake of storing them
in a tree; though the ordering <strong>is not</strong> defined on them).</p><p>Hashing them is much easier though, you can just “combine” the hashes of the
real and imaginary parts of the complex number to get a hash of the complex
number itself.</p></div></div>
<h3 class="anchor anchorWithStickyNavbar_LWe7" id="underlying-data-structure">Underlying data structure<a href="#underlying-data-structure" class="hash-link" aria-label="Direct link to Underlying data structure" title="Direct link to Underlying data structure"></a></h3>
<p>The most obvious difference is the <em>core</em> of the idea behind these data
structures. Hash tables rely on data being stored in one continuous piece of
memory (the array) where you can “guess” (by using the hash function) the
location of what you&#x27;re looking for in a constant time and also access that
location in the, said, constant time<sup><a href="#user-content-fn-5" id="user-content-fnref-5" data-footnote-ref="true" aria-describedby="footnote-label">5</a></sup>. In case the hash function is
<em>not good enough</em><sup><a href="#user-content-fn-6" id="user-content-fnref-6" data-footnote-ref="true" aria-describedby="footnote-label">6</a></sup>, you need to go in <em>blind</em>, and if it comes to the worst,
check everything.</p>
<div class="theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>tl;dr</div><div class="admonitionContent_BuS1"><ul>
<li>I know where should I look</li>
<li>I can look there instantenously</li>
<li>If my guesses are very wrong, I might need to check everything</li>
</ul></div></div>
<p>On the other hand, tree implementations rely on the self-balancing trees in
which you don&#x27;t get as <em>amazing</em> results as with the hash table, but they&#x27;re
<strong>consistent</strong>. Given that we have a self-balancing tree, the height of the tree
is same for <strong>every</strong> input and therefore checking for any element can take the
same time even in the worst case.</p>
<div class="theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>tl;dr</div><div class="admonitionContent_BuS1"><ul>
<li>I don&#x27;t know where to look</li>
<li>I know how to get there</li>
<li>Wherever I look, it takes me about the same time</li>
</ul></div></div>
<p>Let&#x27;s compare side by side:</p>
<table><thead><tr><th style="text-align:right">time complexity</th><th style="text-align:center">hash table</th><th style="text-align:center">tree</th></tr></thead><tbody><tr><td style="text-align:right">expected</td><td style="text-align:center">constant</td><td style="text-align:center">depends on the height</td></tr><tr><td style="text-align:right">worst-case</td><td style="text-align:center">gotta check everything</td><td style="text-align:center">depends on the height</td></tr></tbody></table>
<h2 class="anchor anchorWithStickyNavbar_LWe7" id="major-factors-of-hash-tables">Major Factors of Hash Tables<a href="#major-factors-of-hash-tables" class="hash-link" aria-label="Direct link to Major Factors of Hash Tables" title="Direct link to Major Factors of Hash Tables"></a></h2>
<p>Let&#x27;s have a look at the major factors that affect the efficiency and
functioning of a hash table. We have already mentioned the hash function that
plays a crucial role, but there are also different ways how you can implement
a hash table, so we will have a look at those too.</p>
<h3 class="anchor anchorWithStickyNavbar_LWe7" id="hash-functions">Hash functions<a href="#hash-functions" class="hash-link" aria-label="Direct link to Hash functions" title="Direct link to Hash functions"></a></h3>
<div class="theme-admonition theme-admonition-info admonition_xJq3 alert alert--info"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 14 16"><path fill-rule="evenodd" d="M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"></path></svg></span>info</div><div class="admonitionContent_BuS1"><p>We will start with a definition of hash function in a mathematical definition
and type signature in some known language:</p><span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>h</mi><mo>:</mo><mi>T</mi><mo></mo><mi mathvariant="double-struck">N</mi></mrow><annotation encoding="application/x-tex"> h : T \rightarrow \mathbb{N}</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6944em"></span><span class="mord mathnormal">h</span><span class="mspace" style="margin-right:0.2778em"></span><span class="mrel">:</span><span class="mspace" style="margin-right:0.2778em"></span></span><span class="base"><span class="strut" style="height:0.6833em"></span><span class="mord mathnormal" style="margin-right:0.13889em">T</span><span class="mspace" style="margin-right:0.2778em"></span><span class="mrel"></span><span class="mspace" style="margin-right:0.2778em"></span></span><span class="base"><span class="strut" style="height:0.6889em"></span><span class="mord mathbb">N</span></span></span></span></span><p>For a type signature we will just take the declaration from C++<sup><a href="#user-content-fn-7" id="user-content-fnref-7" data-footnote-ref="true" aria-describedby="footnote-label">7</a></sup>:</p><div class="language-cpp codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-cpp codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">std</span><span class="token double-colon punctuation" style="color:#393A34">::</span><span class="token plain">size_t </span><span class="token keyword" style="color:#00009f">operator</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">(</span><span class="token keyword" style="color:#00009f">const</span><span class="token plain"> T</span><span class="token operator" style="color:#393A34">&amp;</span><span class="token plain"> key</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">const</span><span class="token punctuation" style="color:#393A34">;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>If you compare with the mathematical definition, it is very similar, except for
the fact that the memory is not unlimited, so the <em>natural number</em> turned into
an <em>unsigned integer type</em> (on majority of platforms it will be a 64-bit
unsigned integer).</p></div></div>
<p>As we have already touched above, hash function gives “a guess” where to look
for the key (either when doing a look up, or for insertion to guess a suitable
spot for the insertion).</p>
<p>Hash functions are expected to have a so-called <em>avalanche effect</em> which means
that the smallest change to the key should result in a massive change of hash.
Avalanche effect technically guarantees that even when your data are clustered
together, it should lower the amount of conflicts that can occur.</p>
<div class="theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>Exercise for the reader</div><div class="admonitionContent_BuS1"><p>Try to give an example of a hash function that is not good at all.</p></div></div>
<h3 class="anchor anchorWithStickyNavbar_LWe7" id="implementation-details">Implementation details<a href="#implementation-details" class="hash-link" aria-label="Direct link to Implementation details" title="Direct link to Implementation details"></a></h3>
<p>There are different variations of the hash tables. You&#x27;ve more than likely seen
an implementation that keeps linked lists for buckets. However there are also
other variations that use probing instead.</p>
<p>With regards to the implementation details, we need to mention the fact that
even with the bounded hash (as we could&#x27;ve seen above), you&#x27;re not likely to
have all the buckets for different hashes available. Most common approach to
this is having a smaller set of buckets and modifying the hash to fit within.</p>
<p>One of the most common approaches is to keep lengths of the hash tables in the
powers of 2 which allows bit-masking to take place.</p>
<div class="theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>Example</div><div class="admonitionContent_BuS1"><p>Let&#x27;s say we&#x27;re given <code>h = 0xDEADBEEF</code> and we have <code>l = 65536=2^16</code> spots in our
hash table. What can we do here?</p><p>Well, we definitely have a bigger hash than spots available, so we need to
“shrink” it somehow. The most common practice is to take the lower bits of the
hash to represent an index in the table:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">h &amp; (l - 1)</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p><em>Why does this work?</em> Firstly we subtract 1 from the length (indices run from
<code>⟨0 ; l - 1⟩</code>, since table is zero-indexed). Therefore if we do <em>binary and</em> on
any number, we always get a valid index within the table. Let&#x27;s find the index
for our hash:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">0xDEADBEEF &amp; 0xFFFF = 0xBEEF</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div></div></div>
<section data-footnotes="true" class="footnotes"><h2 class="anchor anchorWithStickyNavbar_LWe7 sr-only" id="footnote-label">Footnotes<a href="#footnote-label" class="hash-link" aria-label="Direct link to Footnotes" title="Direct link to Footnotes"></a></h2>
<ol>
<li id="user-content-fn-1">
<p>not true <a href="#user-content-fnref-1" data-footnote-backref="" aria-label="Back to reference 1" class="data-footnote-backref"></a></p>
</li>
<li id="user-content-fn-2">
<p>also not true <a href="#user-content-fnref-2" data-footnote-backref="" aria-label="Back to reference 2" class="data-footnote-backref"></a></p>
</li>
<li id="user-content-fn-3">
<p>actually the first of its kind (the self-balanced trees) <a href="#user-content-fnref-3" data-footnote-backref="" aria-label="Back to reference 3" class="data-footnote-backref"></a></p>
</li>
<li id="user-content-fn-4">
<p>Rust chose to implement this instead of the common choice of the red-black
or AVL tree; main difference lies in the fact that B-trees are not binary
trees <a href="#user-content-fnref-4" data-footnote-backref="" aria-label="Back to reference 4" class="data-footnote-backref"></a></p>
</li>
<li id="user-content-fn-5">
<p>This, of course, does not hold true for the educational implementations of
the hash tables where conflicts are handled by storing the items in the
linked lists. In practice linked lists are not that commonly used for
addressing this issue as it has even worse impact on the efficiency of the
data structure. <a href="#user-content-fnref-5" data-footnote-backref="" aria-label="Back to reference 5" class="data-footnote-backref"></a></p>
</li>
<li id="user-content-fn-6">
<p>My guess is not very good, or it&#x27;s really bad… <a href="#user-content-fnref-6" data-footnote-backref="" aria-label="Back to reference 6" class="data-footnote-backref"></a></p>
</li>
<li id="user-content-fn-7">
<p><a href="https://en.cppreference.com/w/cpp/utility/hash" target="_blank" rel="noopener noreferrer">https://en.cppreference.com/w/cpp/utility/hash</a> <a href="#user-content-fnref-7" data-footnote-backref="" aria-label="Back to reference 7" class="data-footnote-backref"></a></p>
</li>
</ol>
</section></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-tags-row row margin-bottom--sm"><div class="col"><b>Tags:</b><ul class="tags_jXut padding--none margin-left--sm"><li class="tag_QGVx"><a class="tag_zVej tagRegular_sFm0" href="/algorithms/tags/cpp/">cpp</a></li><li class="tag_QGVx"><a class="tag_zVej tagRegular_sFm0" href="/algorithms/tags/python/">python</a></li><li class="tag_QGVx"><a class="tag_zVej tagRegular_sFm0" href="/algorithms/tags/hash-tables/">hash-tables</a></li></ul></div></div><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/mfocko/blog/tree/main/algorithms/12-hash-tables/2023-11-28-breaking/index.md" target="_blank" rel="noopener noreferrer" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_Z9Sw" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_vwxv"><span class="theme-last-updated">Last updated<!-- --> on <b><time datetime="2023-11-28T00:00:00.000Z">Nov 28, 2023</time></b></span></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href="/algorithms/category/hash-tables/"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Hash Tables</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/algorithms/hash-tables/breaking/python/"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Breaking Python</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#introduction" class="table-of-contents__link toc-highlight">Introduction</a></li><li><a href="#hash-table-v-trees" class="table-of-contents__link toc-highlight">Hash Table v. Trees</a><ul><li><a href="#requirements" class="table-of-contents__link toc-highlight">Requirements</a></li><li><a href="#underlying-data-structure" class="table-of-contents__link toc-highlight">Underlying data structure</a></li></ul></li><li><a href="#major-factors-of-hash-tables" class="table-of-contents__link toc-highlight">Major Factors of Hash Tables</a><ul><li><a href="#hash-functions" class="table-of-contents__link toc-highlight">Hash functions</a></li><li><a href="#implementation-details" class="table-of-contents__link toc-highlight">Implementation details</a></li></ul></li></ul></div></div></div></div></main></div></div></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Git</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://github.com/mfocko" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://gitlab.com/mfocko" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitLab<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://git.mfocko.xyz/mfocko" target="_blank" rel="noopener noreferrer" class="footer__link-item">Gitea (self-hosted)<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-1
</body>
</html>