mirror of
https://github.com/mfocko/blog.git
synced 2024-11-10 08:19:07 +01:00
151 lines
41 KiB
HTML
151 lines
41 KiB
HTML
|
<!doctype html>
|
|||
|
<html lang="en" dir="ltr" class="docs-wrapper plugin-docs plugin-id-algorithms docs-version-current docs-doc-page docs-doc-id-hash-tables/2023-11-28-breaking/breaking" data-has-hydrated="false">
|
|||
|
<head>
|
|||
|
<meta charset="UTF-8">
|
|||
|
<meta name="generator" content="Docusaurus v3.0.0">
|
|||
|
<title data-rh="true">Breaking Hash Table | mf</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://blog.mfocko.xyz/algorithms/hash-tables/breaking/"><meta data-rh="true" property="og:locale" content="en"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-algorithms-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-algorithms-current"><meta data-rh="true" property="og:title" content="Breaking Hash Table | mf"><meta data-rh="true" name="description" content="How to get the linear time complexity in a hash table.
|
|||
|
"><meta data-rh="true" property="og:description" content="How to get the linear time complexity in a hash table.
|
|||
|
"><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://blog.mfocko.xyz/algorithms/hash-tables/breaking/"><link data-rh="true" rel="alternate" href="https://blog.mfocko.xyz/algorithms/hash-tables/breaking/" hreflang="en"><link data-rh="true" rel="alternate" href="https://blog.mfocko.xyz/algorithms/hash-tables/breaking/" hreflang="x-default"><link data-rh="true" rel="preconnect" href="https://0VXRFPR4QF-dsn.algolia.net" crossorigin="anonymous"><link rel="search" type="application/opensearchdescription+xml" title="mf" href="/opensearch.xml">
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="mf RSS Feed">
|
|||
|
<link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="mf Atom Feed">
|
|||
|
<link rel="alternate" type="application/json" href="/blog/feed.json" title="mf JSON Feed">
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.13.24/dist/katex.min.css" integrity="sha384-odtC+0UGzzFL/6PNoE8rX/SPcQDXBJ+uRepguP4QkPCm2LBxH3FA3y+fKSiJ+AmM" crossorigin="anonymous"><link rel="stylesheet" href="/assets/css/styles.0f577c26.css">
|
|||
|
<script src="/assets/js/runtime~main.f17742ff.js" defer="defer"></script>
|
|||
|
<script src="/assets/js/main.a809bb25.js" defer="defer"></script>
|
|||
|
</head>
|
|||
|
<body class="navigation-with-keyboard">
|
|||
|
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return localStorage.getItem("theme")}catch(t){}}();t(null!==e?e:"light")}(),function(){try{const c=new URLSearchParams(window.location.search).entries();for(var[t,e]of c)if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id="__docusaurus"><div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><b class="navbar__title text--truncate">mf</b></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Additional FI MU materials</a><ul class="dropdown__menu"><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/algorithms/">Algorithms</a></li><li><a class="dropdown__link" href="/c/">C</a></li><li><a class="dropdown__link" href="/cpp/">C++</a></li></ul></div><a class="navbar__item navbar__link" href="/contributions/">Contributions</a><a class="navbar__item navbar__link" href="/talks/">Talks</a></div><div class="navbar__items navbar__items--right"><a class="navbar__item navbar__link" href="/blog/">Blog</a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type="button" disabled="" title="Switch between dark and light mode (currently light mode)" aria-label="Switch between dark and light mode (currently light mode)" aria-live="polite"><svg viewBox="0 0 24 24" width="24" height="24" class="lightToggleIcon_pyhR"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" class="darkToggleIcon_wfgR"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg></button></div><div class="navbarSearchContainer_Bca1"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.
|
|||
|
issues to occur.</p>
|
|||
|
<h2 class="anchor anchorWithStickyNavbar_LWe7" id="introduction">Introduction<a href="#introduction" class="hash-link" aria-label="Direct link to Introduction" title="Direct link to Introduction"></a></h2>
|
|||
|
<p>Hash tables are very commonly used to represent sets or dictionaries. Even when
|
|||
|
you look up solution to some problem that requires set or dictionary, it is more
|
|||
|
than likely that you'll find something that references usage of the hash table.
|
|||
|
You might think it's the only possible option<sup><a href="#user-content-fn-1" id="user-content-fnref-1" data-footnote-ref="true" aria-describedby="footnote-label">1</a></sup>, or it's the best one<sup><a href="#user-content-fn-2" id="user-content-fnref-2" data-footnote-ref="true" aria-describedby="footnote-label">2</a></sup>.</p>
|
|||
|
<p>One of the reasons to prefer hash tables over any other representation is the
|
|||
|
fact that they are <strong>supposed</strong> to be faster than the alternatives, but the
|
|||
|
truth lies somewhere in between.</p>
|
|||
|
<p>One of the other possible implementations of the set is a balanced tree. Majorly
|
|||
|
occurring implementations rely on the <em>red-black tree</em>, but you may see also
|
|||
|
others like an <em>AVL tree</em><sup><a href="#user-content-fn-3" id="user-content-fnref-3" data-footnote-ref="true" aria-describedby="footnote-label">3</a></sup> or <em>B-tree</em><sup><a href="#user-content-fn-4" id="user-content-fnref-4" data-footnote-ref="true" aria-describedby="footnote-label">4</a></sup>.</p>
|
|||
|
<h2 class="anchor anchorWithStickyNavbar_LWe7" id="hash-table-v-trees">Hash Table v. Trees<a href="#hash-table-v-trees" class="hash-link" aria-label="Direct link to Hash Table v. Trees" title="Direct link to Hash Table v. Trees"></a></h2>
|
|||
|
<p>The most interesting part are the differences between their implementations. Why
|
|||
|
should you choose hash table, or why should you choose the tree implementation?
|
|||
|
Let's compare the differences one by one.</p>
|
|||
|
<h3 class="anchor anchorWithStickyNavbar_LWe7" id="requirements">Requirements<a href="#requirements" class="hash-link" aria-label="Direct link to Requirements" title="Direct link to Requirements"></a></h3>
|
|||
|
<p>We will start with the fundamentals on which the underlying data structures
|
|||
|
rely. We can also consider them as <em>requirements</em> that must be met to be able to
|
|||
|
use the underlying data structure.</p>
|
|||
|
<p>Hash table relies on the <em>hash function</em> that is supposed to distribute the keys
|
|||
|
in such way that they're evenly spread across the slots where the keys (or
|
|||
|
pairs, for dictionary) are stored, but at the same time they're somewhat unique,
|
|||
|
so no clustering occurs.</p>
|
|||
|
<p>Trees depend on the <em>ordering</em> of the elements. They maintain the elements in
|
|||
|
a sorted fashion, so for any pair of the elements that are used as keys, you
|
|||
|
need to be able to decide which one of them is <em>smaller or equal to</em> the other.</p>
|
|||
|
<p>Hash function can be easily created by using the bits that <em>uniquely</em> identify
|
|||
|
a unique element. On the other hand, ordering may not be as easy to define.</p>
|
|||
|
<div class="theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>Example</div><div class="admonitionContent_BuS1"><p>If you are familiar with complex numbers, they are a great example of a key that
|
|||
|
does not have ordering (unless you go element-wise for the sake of storing them
|
|||
|
in a tree; though the ordering <strong>is not</strong> defined on them).</p><p>Hashing them is much easier though, you can just “combine” the hashes of the
|
|||
|
real and imaginary parts of the complex number to get a hash of the complex
|
|||
|
number itself.</p></div></div>
|
|||
|
<h3 class="anchor anchorWithStickyNavbar_LWe7" id="underlying-data-structure">Underlying data structure<a href="#underlying-data-structure" class="hash-link" aria-label="Direct link to Underlying data structure" title="Direct link to Underlying data structure"></a></h3>
|
|||
|
<p>The most obvious difference is the <em>core</em> of the idea behind these data
|
|||
|
structures. Hash tables rely on data being stored in one continuous piece of
|
|||
|
memory (the array) where you can “guess” (by using the hash function) the
|
|||
|
location of what you're looking for in a constant time and also access that
|
|||
|
location in the, said, constant time<sup><a href="#user-content-fn-5" id="user-content-fnref-5" data-footnote-ref="true" aria-describedby="footnote-label">5</a></sup>. In case the hash function is
|
|||
|
<em>not good enough</em><sup><a href="#user-content-fn-6" id="user-content-fnref-6" data-footnote-ref="true" aria-describedby="footnote-label">6</a></sup>, you need to go in <em>blind</em>, and if it comes to the worst,
|
|||
|
check everything.</p>
|
|||
|
<div class="theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>tl;dr</div><div class="admonitionContent_BuS1"><ul>
|
|||
|
<li>I know where should I look</li>
|
|||
|
<li>I can look there instantenously</li>
|
|||
|
<li>If my guesses are very wrong, I might need to check everything</li>
|
|||
|
</ul></div></div>
|
|||
|
<p>On the other hand, tree implementations rely on the self-balancing trees in
|
|||
|
which you don't get as <em>amazing</em> results as with the hash table, but they're
|
|||
|
<strong>consistent</strong>. Given that we have a self-balancing tree, the height of the tree
|
|||
|
is same for <strong>every</strong> input and therefore checking for any element can take the
|
|||
|
same time even in the worst case.</p>
|
|||
|
<div class="theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>tl;dr</div><div class="admonitionContent_BuS1"><ul>
|
|||
|
<li>I don't know where to look</li>
|
|||
|
<li>I know how to get there</li>
|
|||
|
<li>Wherever I look, it takes me about the same time</li>
|
|||
|
</ul></div></div>
|
|||
|
<p>Let's compare side by side:</p>
|
|||
|
<table><thead><tr><th style="text-align:right">time complexity</th><th style="text-align:center">hash table</th><th style="text-align:center">tree</th></tr></thead><tbody><tr><td style="text-align:right">expected</td><td style="text-align:center">constant</td><td style="text-align:center">depends on the height</td></tr><tr><td style="text-align:right">worst-case</td><td style="text-align:center">gotta check everything</td><td style="text-align:center">depends on the height</td></tr></tbody></table>
|
|||
|
<h2 class="anchor anchorWithStickyNavbar_LWe7" id="major-factors-of-hash-tables">Major Factors of Hash Tables<a href="#major-factors-of-hash-tables" class="hash-link" aria-label="Direct link to Major Factors of Hash Tables" title="Direct link to Major Factors of Hash Tables"></a></h2>
|
|||
|
<p>Let's have a look at the major factors that affect the efficiency and
|
|||
|
functioning of a hash table. We have already mentioned the hash function that
|
|||
|
plays a crucial role, but there are also different ways how you can implement
|
|||
|
a hash table, so we will have a look at those too.</p>
|
|||
|
<h3 class="anchor anchorWithStickyNavbar_LWe7" id="hash-functions">Hash functions<a href="#hash-functions" class="hash-link" aria-label="Direct link to Hash functions" title="Direct link to Hash functions"></a></h3>
|
|||
|
<div class="theme-admonition theme-admonition-info admonition_xJq3 alert alert--info"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 14 16"><path fill-rule="evenodd" d="M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"></path></svg></span>info</div><div class="admonitionContent_BuS1"><p>We will start with a definition of hash function in a mathematical definition
|
|||
|
and type signature in some known language:</p><span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>h</mi><mo>:</mo><mi>T</mi><mo>→</mo><mi mathvariant="double-struck">N</mi></mrow><annotation encoding="application/x-tex"> h : T \rightarrow \mathbb{N}</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6944em"></span><span class="mord mathnormal">h</span><span class="mspace" style="margin-right:0.2778em"></span><span class="mrel">:</span><span class="mspace" style="margin-right:0.2778em"></span></span><span class="base"><span class="strut" style="height:0.6833em"></span><span class="mord mathnormal" style="margin-right:0.13889em">T</span><span class="mspace" style="margin-right:0.2778em"></span><span class="mrel">→</span><span class="mspace" style="margin-right:0.2778em"></span></span><span class="base"><span class="strut" style="height:0.6889em"></span><span class="mord mathbb">N</span></span></span></span></span><p>For a type signature we will just take the declaration from C++<sup><a href="#user-content-fn-7" id="user-content-fnref-7" data-footnote-ref="true" aria-describedby="footnote-label">7</a></sup>:</p><div class="language-cpp codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-cpp codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">std</span><span class="token double-colon punctuation" style="color:#393A34">::</span><span class="token plain">size_t </span><span class="token keyword" style="color:#00009f">operator</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">(</span><span class="token keyword" style="color:#00009f">const</span><span class="token plain"> T</span><span class="token operator" style="color:#393A34">&</span><span class="token plain"> key</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">const</span><span class="token punctuation" style="color:#393A34">;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>If you compare with the mathematical definition, it is very similar, except for
|
|||
|
the fact that the memory is not unlimited, so the <em>natural number</em> turned into
|
|||
|
an <em>unsigned integer type</em> (on majority of platforms it will be a 64-bit
|
|||
|
unsigned integer).</p></div></div>
|
|||
|
<p>As we have already touched above, hash function gives “a guess” where to look
|
|||
|
for the key (either when doing a look up, or for insertion to guess a suitable
|
|||
|
spot for the insertion).</p>
|
|||
|
<p>Hash functions are expected to have a so-called <em>avalanche effect</em> which means
|
|||
|
that the smallest change to the key should result in a massive change of hash.
|
|||
|
Avalanche effect technically guarantees that even when your data are clustered
|
|||
|
together, it should lower the amount of conflicts that can occur.</p>
|
|||
|
<div class="theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>Exercise for the reader</div><div class="admonitionContent_BuS1"><p>Try to give an example of a hash function that is not good at all.</p></div></div>
|
|||
|
<h3 class="anchor anchorWithStickyNavbar_LWe7" id="implementation-details">Implementation details<a href="#implementation-details" class="hash-link" aria-label="Direct link to Implementation details" title="Direct link to Implementation details"></a></h3>
|
|||
|
<p>There are different variations of the hash tables. You've more than likely seen
|
|||
|
an implementation that keeps linked lists for buckets. However there are also
|
|||
|
other variations that use probing instead.</p>
|
|||
|
<p>With regards to the implementation details, we need to mention the fact that
|
|||
|
even with the bounded hash (as we could've seen above), you're not likely to
|
|||
|
have all the buckets for different hashes available. Most common approach to
|
|||
|
this is having a smaller set of buckets and modifying the hash to fit within.</p>
|
|||
|
<p>One of the most common approaches is to keep lengths of the hash tables in the
|
|||
|
powers of 2 which allows bit-masking to take place.</p>
|
|||
|
<div class="theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>Example</div><div class="admonitionContent_BuS1"><p>Let's say we're given <code>h = 0xDEADBEEF</code> and we have <code>l = 65536=2^16</code> spots in our
|
|||
|
hash table. What can we do here?</p><p>Well, we definitely have a bigger hash than spots available, so we need to
|
|||
|
“shrink” it somehow. The most common practice is to take the lower bits of the
|
|||
|
hash to represent an index in the table:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">h & (l - 1)</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p><em>Why does this work?</em> Firstly we subtract 1 from the length (indices run from
|
|||
|
<code>⟨0 ; l - 1⟩</code>, since table is zero-indexed). Therefore if we do <em>binary and</em> on
|
|||
|
any number, we always get a valid index within the table. Let's find the index
|
|||
|
for our hash:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">0xDEADBEEF & 0xFFFF = 0xBEEF</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div></div></div>
|
|||
|
<section data-footnotes="true" class="footnotes"><h2 class="anchor anchorWithStickyNavbar_LWe7 sr-only" id="footnote-label">Footnotes<a href="#footnote-label" class="hash-link" aria-label="Direct link to Footnotes" title="Direct link to Footnotes"></a></h2>
|
|||
|
<ol>
|
|||
|
<li id="user-content-fn-1">
|
|||
|
<p>not true <a href="#user-content-fnref-1" data-footnote-backref="" aria-label="Back to reference 1" class="data-footnote-backref">↩</a></p>
|
|||
|
</li>
|
|||
|
<li id="user-content-fn-2">
|
|||
|
<p>also not true <a href="#user-content-fnref-2" data-footnote-backref="" aria-label="Back to reference 2" class="data-footnote-backref">↩</a></p>
|
|||
|
</li>
|
|||
|
<li id="user-content-fn-3">
|
|||
|
<p>actually the first of its kind (the self-balanced trees) <a href="#user-content-fnref-3" data-footnote-backref="" aria-label="Back to reference 3" class="data-footnote-backref">↩</a></p>
|
|||
|
</li>
|
|||
|
<li id="user-content-fn-4">
|
|||
|
<p>Rust chose to implement this instead of the common choice of the red-black
|
|||
|
or AVL tree; main difference lies in the fact that B-trees are not binary
|
|||
|
trees <a href="#user-content-fnref-4" data-footnote-backref="" aria-label="Back to reference 4" class="data-footnote-backref">↩</a></p>
|
|||
|
</li>
|
|||
|
<li id="user-content-fn-5">
|
|||
|
<p>This, of course, does not hold true for the educational implementations of
|
|||
|
the hash tables where conflicts are handled by storing the items in the
|
|||
|
linked lists. In practice linked lists are not that commonly used for
|
|||
|
addressing this issue as it has even worse impact on the efficiency of the
|
|||
|
data structure. <a href="#user-content-fnref-5" data-footnote-backref="" aria-label="Back to reference 5" class="data-footnote-backref">↩</a></p>
|
|||
|
</li>
|
|||
|
<li id="user-content-fn-6">
|
|||
|
<p>My guess is not very good, or it's really bad… <a href="#user-content-fnref-6" data-footnote-backref="" aria-label="Back to reference 6" class="data-footnote-backref">↩</a></p>
|
|||
|
</li>
|
|||
|
<li id="user-content-fn-7">
|
|||
|
<p><a href="https://en.cppreference.com/w/cpp/utility/hash" target="_blank" rel="noopener noreferrer">https://en.cppreference.com/w/cpp/utility/hash</a> <a href="#user-content-fnref-7" data-footnote-backref="" aria-label="Back to reference 7" class="data-footnote-backref">↩</a></p>
|
|||
|
</li>
|
|||
|
</ol>
|
|||
|
</section></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-tags-row row margin-bottom--sm"><div class="col"><b>Tags:</b><ul class="tags_jXut padding--none margin-left--sm"><li class="tag_QGVx"><a class="tag_zVej tagRegular_sFm0" href="/algorithms/tags/cpp/">cpp</a></li><li class="tag_QGVx"><a class="tag_zVej tagRegular_sFm0" href="/algorithms/tags/python/">python</a></li><li class="tag_QGVx"><a class="tag_zVej tagRegular_sFm0" href="/algorithms/tags/hash-tables/">hash-tables</a></li></ul></div></div><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/mfocko/blog/tree/main/algorithms/12-hash-tables/2023-11-28-breaking/index.md" target="_blank" rel="noopener noreferrer" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_Z9Sw" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_vwxv"><span class="theme-last-updated">Last updated<!-- --> on <b><time datetime="2023-11-28T00:00:00.000Z">Nov 28, 2023</time></b></span></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href="/algorithms/category/hash-tables/"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Hash Tables</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/algorithms/hash-tables/breaking/python/"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Breaking Python</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#introduction" class="table-of-contents__link toc-highlight">Introduction</a></li><li><a href="#hash-table-v-trees" class="table-of-contents__link toc-highlight">Hash Table v. Trees</a><ul><li><a href="#requirements" class="table-of-contents__link toc-highlight">Requirements</a></li><li><a href="#underlying-data-structure" class="table-of-contents__link toc-highlight">Underlying data structure</a></li></ul></li><li><a href="#major-factors-of-hash-tables" class="table-of-contents__link toc-highlight">Major Factors of Hash Tables</a><ul><li><a href="#hash-functions" class="table-of-contents__link toc-highlight">Hash functions</a></li><li><a href="#implementation-details" class="table-of-contents__link toc-highlight">Implementation details</a></li></ul></li></ul></div></div></div></div></main></div></div></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Git</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://github.com/mfocko" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://gitlab.com/mfocko" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitLab<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://git.mfocko.xyz/mfocko" target="_blank" rel="noopener noreferrer" class="footer__link-item">Gitea (self-hosted)<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-1
|
|||
|
</body>
|
|||
|
</html>
|