2023-11-28 19:40:59 +01:00
<!doctype html>
< html lang = "en" dir = "ltr" class = "docs-wrapper plugin-docs plugin-id-algorithms docs-version-current docs-doc-page docs-doc-id-hash-tables/2023-11-28-breaking/python" data-has-hydrated = "false" >
< head >
< meta charset = "UTF-8" >
2023-12-28 18:55:58 +01:00
< meta name = "generator" content = "Docusaurus v3.0.1" >
2023-11-28 19:40:59 +01:00
< title data-rh = "true" > Breaking Python | mf< / title > < meta data-rh = "true" name = "viewport" content = "width=device-width,initial-scale=1" > < meta data-rh = "true" name = "twitter:card" content = "summary_large_image" > < meta data-rh = "true" property = "og:url" content = "https://blog.mfocko.xyz/algorithms/hash-tables/breaking/python/" > < meta data-rh = "true" property = "og:locale" content = "en" > < meta data-rh = "true" name = "docusaurus_locale" content = "en" > < meta data-rh = "true" name = "docsearch:language" content = "en" > < meta data-rh = "true" name = "docusaurus_version" content = "current" > < meta data-rh = "true" name = "docusaurus_tag" content = "docs-algorithms-current" > < meta data-rh = "true" name = "docsearch:version" content = "current" > < meta data-rh = "true" name = "docsearch:docusaurus_tag" content = "docs-algorithms-current" > < meta data-rh = "true" property = "og:title" content = "Breaking Python | mf" > < meta data-rh = "true" name = "description" content = "Actually getting the worst-case time complexity in Python .
">< meta data-rh = "true" property = "og:description" content = "Actually getting the worst-case time complexity in Python .
">< link data-rh = "true" rel = "icon" href = "/img/favicon.ico" > < link data-rh = "true" rel = "canonical" href = "https://blog.mfocko.xyz/algorithms/hash-tables/breaking/python/" > < link data-rh = "true" rel = "alternate" href = "https://blog.mfocko.xyz/algorithms/hash-tables/breaking/python/" hreflang = "en" > < link data-rh = "true" rel = "alternate" href = "https://blog.mfocko.xyz/algorithms/hash-tables/breaking/python/" hreflang = "x-default" > < link data-rh = "true" rel = "preconnect" href = "https://0VXRFPR4QF-dsn.algolia.net" crossorigin = "anonymous" > < link rel = "search" type = "application/opensearchdescription+xml" title = "mf" href = "/opensearch.xml" >
< link rel = "alternate" type = "application/rss+xml" href = "/blog/rss.xml" title = "mf RSS Feed" >
< link rel = "alternate" type = "application/atom+xml" href = "/blog/atom.xml" title = "mf Atom Feed" >
< link rel = "alternate" type = "application/json" href = "/blog/feed.json" title = "mf JSON Feed" >
2023-12-28 18:55:58 +01:00
< link rel = "stylesheet" href = "https://cdn.jsdelivr.net/npm/katex@0.13.24/dist/katex.min.css" integrity = "sha384-odtC+0UGzzFL/6PNoE8rX/SPcQDXBJ+uRepguP4QkPCm2LBxH3FA3y+fKSiJ+AmM" crossorigin = "anonymous" > < link rel = "stylesheet" href = "/assets/css/styles.e1ac7597.css" >
< script src = "/assets/js/runtime~main.8dd9984c.js" defer = "defer" > < / script >
< script src = "/assets/js/main.c998cb37.js" defer = "defer" > < / script >
2023-11-28 19:40:59 +01:00
< / head >
< body class = "navigation-with-keyboard" >
2023-11-28 20:27:15 +01:00
< script > ! function ( ) { function t ( t ) { document . documentElement . setAttribute ( "data-theme" , t ) } var e = function ( ) { try { return new URLSearchParams ( window . location . search ) . get ( "docusaurus-theme" ) } catch ( t ) { } } ( ) || function ( ) { try { return localStorage . getItem ( "theme" ) } catch ( t ) { } } ( ) ; t ( null !== e ? e : "light" ) } ( ) , function ( ) { try { const c = new URLSearchParams ( window . location . search ) . entries ( ) ; for ( var [ t , e ] of c ) if ( t . startsWith ( "docusaurus-data-" ) ) { var a = t . replace ( "docusaurus-data-" , "data-" ) ; document . documentElement . setAttribute ( a , e ) } } catch ( t ) { } } ( ) < / script > < div id = "__docusaurus" > < div role = "region" aria-label = "Skip to main content" > < a class = "skipToContent_fXgn" href = "#__docusaurus_skipToContent_fallback" > Skip to main content< / a > < / div > < nav aria-label = "Main" class = "navbar navbar--fixed-top" > < div class = "navbar__inner" > < div class = "navbar__items" > < button aria-label = "Toggle navigation bar" aria-expanded = "false" class = "navbar__toggle clean-btn" type = "button" > < svg width = "30" height = "30" viewBox = "0 0 30 30" aria-hidden = "true" > < path stroke = "currentColor" stroke-linecap = "round" stroke-miterlimit = "10" stroke-width = "2" d = "M4 7h22M4 15h22M4 23h22" > < / path > < / svg > < / button > < a class = "navbar__brand" href = "/" > < b class = "navbar__title text--truncate" > mf< / b > < / a > < div class = "navbar__item dropdown dropdown--hoverable" > < a href = "#" aria-haspopup = "true" aria-expanded = "false" role = "button" class = "navbar__link" > Additional FI MU materials< / a > < ul class = "dropdown__menu" > < li > < a aria-current = "page" class = "dropdown__link dropdown__link--active" href = "/algorithms/" > Algorithms< / a > < / li > < li > < a class = "dropdown__link" href = "/c/" > C< / a > < / li > < li > < a class = "dropdown__link" href = "/cpp/" > C++< / a > < / li > < / ul > < / div > < a class = "navbar__item navbar__link" href = "/contributions/" > Contributions< / a > < a class = "navbar__item navbar__link" href = "/talks/" > Talks< / a > < / div > < div class = "navbar__items navbar__items--right" > < a class = "navbar__item navbar__link" href = "/blog/" > Blog< / a > < div class = "toggle_vylO colorModeToggle_DEke" > < button class = "clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type = "button" disabled = "" title = "Switch between dark and light mode (currently light mode)" aria-label = "Switch between dark and light mode (currently light mode)" aria-live = "polite" > < svg viewBox = "0 0 24 24" width = "24" height = "24" class = "lightToggleIcon_pyhR" > < path fill = "currentColor" d = "M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z" > < / path > < / svg > < svg viewBox = "0 0 24 24" width = "24" height = "24" class = "darkToggleIcon_wfgR" > < path fill = "currentColor" d = "M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z" > < / path > < / svg > < / button > < / div > < div class = "navbarSearchContainer_Bca1" > < button type = "button" class = "DocSearch DocSearch-Button" aria-label = "Search" > < span class = "DocSearch-Button-Container" > < svg width = "20" height = "20" class = "DocSearch-Search-Icon" viewBox = "0 0 20 20" > < path d = "M14.386 14 . 386l4 . 0877 4 . 0877-4 . 0877-4 . 0877c-2 .
2023-11-28 19:40:59 +01:00
< p > Our language of choice for bringing the worst out of the hash table is < em > Python< / em > .< / p >
< p > Let' s start by talking about the hash function and why we' ve chosen Python for
this. Hash function for integers in Python is simply < em > identity< / em > , as you might' ve
guessed, there' s no avalanche effect. Another thing that helps us is the fact
that integers in Python are technically < code > BigInt< / code > s< sup > < a href = "#user-content-fn-1" id = "user-content-fnref-1" data-footnote-ref = "true" aria-describedby = "footnote-label" > 1< / a > < / sup > . This allows us to put bit
more pressure on the hashing function.< / p >
< p > From the perspective of the implementation, it is a hash table that uses probing
to resolve conflicts. This also means that it' s a contiguous space in memory.
Indexing works like in the provided example above. When the hash table reaches
a < em > breaking point< / em > (defined somewhere in the C code), it reallocates the table
and rehashes everything.< / p >
< div class = "theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success" > < div class = "admonitionHeading_Gvgb" > < span class = "admonitionIcon_Rf37" > < svg viewBox = "0 0 12 16" > < path fill-rule = "evenodd" d = "M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z" > < / path > < / svg > < / span > tip< / div > < div class = "admonitionContent_BuS1" > < p > Resizing and rehashing can reduce the conflicts. That is coming from the fact
that the position in the table is determined by the hash and the size of the
table itself.< / p > < / div > < / div >
< h2 class = "anchor anchorWithStickyNavbar_LWe7" id = "preparing-the-attack" > Preparing the attack< a href = "#preparing-the-attack" class = "hash-link" aria-label = "Direct link to Preparing the attack" title = "Direct link to Preparing the attack" > < / a > < / h2 >
< p > Knowing the things above, it is not that hard to construct a method how to cause
as many conflicts as possible. Let' s go over it:< / p >
< ol >
< li > We know that integers are hashed to themselves.< / li >
< li > We also know that from that hash we use only lower bits that are used as
indices.< / li >
< li > We also know that there' s a rehashing on resize that could possibly fix the
conflicts.< / li >
< / ol >
< p > We will test with different sequences:< / p >
< ol >
< li > ordered one, numbers through 1 to N< / li >
< li > ordered one in a reversed order, numbers through N back to 1< / li >
< li > numbers that are shifted to the left, so they create conflicts until resize< / li >
< li > numbers that are shifted to the left, but resizing helps only in the end< / li >
< li > numbers that are shifted to the left, but they won' t be taken in account even
after final resize< / li >
< / ol >
< p > For each of these sequences, we will insert 10⁷ elements and look each of them
up for 10 times in a row.< / p >
< p > As a base of our benchmark, we will use a < code > Strategy< / code > class and then for each
strategy we will just implement the sequence of numbers that it uses:< / p >
< div class = "language-py codeBlockContainer_Ckt0 theme-code-block" style = "--prism-color:#393A34;--prism-background-color:#f6f8fa" > < div class = "codeBlockContent_biex" > < pre tabindex = "0" class = "prism-code language-py codeBlock_bY9V thin-scrollbar" style = "color:#393A34;background-color:#f6f8fa" > < code class = "codeBlockLines_e6Vv" > < span class = "token-line" style = "color:#393A34" > < span class = "token keyword" style = "color:#00009f" > class< / span > < span class = "token plain" > < / span > < span class = "token class-name" > Strategy< / span > < span class = "token punctuation" style = "color:#393A34" > :< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > def< / span > < span class = "token plain" > < / span > < span class = "token function" style = "color:#d73a49" > __init__< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > self< / span > < span class = "token punctuation" style = "color:#393A34" > ,< / span > < span class = "token plain" > data_structure< / span > < span class = "token operator" style = "color:#393A34" > =< / span > < span class = "token builtin" > set< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > :< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > self< / span > < span class = "token punctuation" style = "color:#393A34" > .< / span > < span class = "token plain" > _table < / span > < span class = "token operator" style = "color:#393A34" > =< / span > < span class = "token plain" > data_structure< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" style = "display:inline-block" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token decorator annotation punctuation" style = "color:#393A34" > @cached_property< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > def< / span > < span class = "token plain" > < / span > < span class = "token function" style = "color:#d73a49" > elements< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > self< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > :< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > raise< / span > < span class = "token plain" > NotImplementedError< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token string" style = "color:#e3116c" > " Implement for each strategy" < / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" style = "display:inline-block" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token decorator annotation punctuation" style = "color:#393A34" > @property< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > def< / span > < span class = "token plain" > < / span > < span class = "token function" style = "color:#d73a49" > name< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > self< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > :< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token keyword" styl
< h3 class = "anchor anchorWithStickyNavbar_LWe7" id = "sequences" > Sequences< a href = "#sequences" class = "hash-link" aria-label = "Direct link to Sequences" title = "Direct link to Sequences" > < / a > < / h3 >
< p > Let' s have a look at how we generate the numbers to be inserted:< / p >
< ul >
< li > ordered sequence (ascending)<!-- -->
< div class = "language-py codeBlockContainer_Ckt0 theme-code-block" style = "--prism-color:#393A34;--prism-background-color:#f6f8fa" > < div class = "codeBlockContent_biex" > < pre tabindex = "0" class = "prism-code language-py codeBlock_bY9V thin-scrollbar" style = "color:#393A34;background-color:#f6f8fa" > < code class = "codeBlockLines_e6Vv" > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > x < / span > < span class = "token keyword" style = "color:#00009f" > for< / span > < span class = "token plain" > x < / span > < span class = "token keyword" style = "color:#00009f" > in< / span > < span class = "token plain" > < / span > < span class = "token builtin" > range< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > N_ELEMENTS< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < br > < / span > < / code > < / pre > < div class = "buttonGroup__atx" > < button type = "button" aria-label = "Copy code to clipboard" title = "Copy" class = "clean-btn" > < span class = "copyButtonIcons_eSgA" aria-hidden = "true" > < svg viewBox = "0 0 24 24" class = "copyButtonIcon_y97N" > < path fill = "currentColor" d = "M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z" > < / path > < / svg > < svg viewBox = "0 0 24 24" class = "copyButtonSuccessIcon_LjdS" > < path fill = "currentColor" d = "M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z" > < / path > < / svg > < / span > < / button > < / div > < / div > < / div >
< / li >
< li > ordered sequence (descending)<!-- -->
< div class = "language-py codeBlockContainer_Ckt0 theme-code-block" style = "--prism-color:#393A34;--prism-background-color:#f6f8fa" > < div class = "codeBlockContent_biex" > < pre tabindex = "0" class = "prism-code language-py codeBlock_bY9V thin-scrollbar" style = "color:#393A34;background-color:#f6f8fa" > < code class = "codeBlockLines_e6Vv" > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > x < / span > < span class = "token keyword" style = "color:#00009f" > for< / span > < span class = "token plain" > x < / span > < span class = "token keyword" style = "color:#00009f" > in< / span > < span class = "token plain" > < / span > < span class = "token builtin" > reversed< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token builtin" > range< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > N_ELEMENTS< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < br > < / span > < / code > < / pre > < div class = "buttonGroup__atx" > < button type = "button" aria-label = "Copy code to clipboard" title = "Copy" class = "clean-btn" > < span class = "copyButtonIcons_eSgA" aria-hidden = "true" > < svg viewBox = "0 0 24 24" class = "copyButtonIcon_y97N" > < path fill = "currentColor" d = "M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z" > < / path > < / svg > < svg viewBox = "0 0 24 24" class = "copyButtonSuccessIcon_LjdS" > < path fill = "currentColor" d = "M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z" > < / path > < / svg > < / span > < / button > < / div > < / div > < / div >
< / li >
< li > progressive sequence that “heals” on resize<!-- -->
< div class = "language-py codeBlockContainer_Ckt0 theme-code-block" style = "--prism-color:#393A34;--prism-background-color:#f6f8fa" > < div class = "codeBlockContent_biex" > < pre tabindex = "0" class = "prism-code language-py codeBlock_bY9V thin-scrollbar" style = "color:#393A34;background-color:#f6f8fa" > < code class = "codeBlockLines_e6Vv" > < span class = "token-line" style = "color:#393A34" > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > < < < / span > < span class = "token plain" > < / span > < span class = "token builtin" > max< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token number" style = "color:#36acaa" > 5< / span > < span class = "token punctuation" style = "color:#393A34" > ,< / span > < span class = "token plain" > x< / span > < span class = "token punctuation" style = "color:#393A34" > .< / span > < span class = "token plain" > bit_length< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > for< / span > < span class = "token plain" > x < / span > < span class = "token keyword" style = "color:#00009f" > in< / span > < span class = "token plain" > < / span > < span class = "token builtin" > range< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > N_ELEMENTS< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < br > < / span > < / code > < / pre > < div class = "buttonGroup__atx" > < button type = "button" aria-label = "Copy code to clipboard" title = "Copy" class = "clean-btn" > < span class = "copyButtonIcons_eSgA" aria-hidden = "true" > < svg viewBox = "0 0 24 24" class = "copyButtonIcon_y97N" > < path fill = "currentColor" d = "M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z" > < / path > < / svg > < svg viewBox = "0 0 24 24" class = "copyButtonSuccessIcon_LjdS" > < path fill = "currentColor" d = "M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z" > < / path > < / svg > < / span > < / button > < / div > < / div > < / div >
< / li >
< li > progressive sequence that “heals” in the end<!-- -->
< div class = "language-py codeBlockContainer_Ckt0 theme-code-block" style = "--prism-color:#393A34;--prism-background-color:#f6f8fa" > < div class = "codeBlockContent_biex" > < pre tabindex = "0" class = "prism-code language-py codeBlock_bY9V thin-scrollbar" style = "color:#393A34;background-color:#f6f8fa" > < code class = "codeBlockLines_e6Vv" > < span class = "token-line" style = "color:#393A34" > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > < < < / span > < span class = "token plain" > < / span > < span class = "token builtin" > max< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token number" style = "color:#36acaa" > 5< / span > < span class = "token punctuation" style = "color:#393A34" > ,< / span > < span class = "token plain" > x< / span > < span class = "token punctuation" style = "color:#393A34" > .< / span > < span class = "token plain" > bit_length< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > for< / span > < span class = "token plain" > x < / span > < span class = "token keyword" style = "color:#00009f" > in< / span > < span class = "token plain" > < / span > < span class = "token builtin" > reversed< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token builtin" > range< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > N_ELEMENTS< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < br > < / span > < / code > < / pre > < div class = "buttonGroup__atx" > < button type = "button" aria-label = "Copy code to clipboard" title = "Copy" class = "clean-btn" > < span class = "copyButtonIcons_eSgA" aria-hidden = "true" > < svg viewBox = "0 0 24 24" class = "copyButtonIcon_y97N" > < path fill = "currentColor" d = "M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z" > < / path > < / svg > < svg viewBox = "0 0 24 24" class = "copyButtonSuccessIcon_LjdS" > < path fill = "currentColor" d = "M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z" > < / path > < / svg > < / span > < / button > < / div > < / div > < / div >
< / li >
< li > conflicts everywhere<!-- -->
< div class = "language-py codeBlockContainer_Ckt0 theme-code-block" style = "--prism-color:#393A34;--prism-background-color:#f6f8fa" > < div class = "codeBlockContent_biex" > < pre tabindex = "0" class = "prism-code language-py codeBlock_bY9V thin-scrollbar" style = "color:#393A34;background-color:#f6f8fa" > < code class = "codeBlockLines_e6Vv" > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > < < < / span > < span class = "token plain" > < / span > < span class = "token number" style = "color:#36acaa" > 32< / span > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > for< / span > < span class = "token plain" > x < / span > < span class = "token keyword" style = "color:#00009f" > in< / span > < span class = "token plain" > < / span > < span class = "token builtin" > range< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > N_ELEMENTS< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < br > < / span > < / code > < / pre > < div class = "buttonGroup__atx" > < button type = "button" aria-label = "Copy code to clipboard" title = "Copy" class = "clean-btn" > < span class = "copyButtonIcons_eSgA" aria-hidden = "true" > < svg viewBox = "0 0 24 24" class = "copyButtonIcon_y97N" > < path fill = "currentColor" d = "M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z" > < / path > < / svg > < svg viewBox = "0 0 24 24" class = "copyButtonSuccessIcon_LjdS" > < path fill = "currentColor" d = "M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z" > < / path > < / svg > < / span > < / button > < / div > < / div > < / div >
< / li >
< / ul >
< h2 class = "anchor anchorWithStickyNavbar_LWe7" id = "results" > Results< a href = "#results" class = "hash-link" aria-label = "Direct link to Results" title = "Direct link to Results" > < / a > < / h2 >
< p > Let' s have a look at the obtained results after running the code:< / p >
< table > < thead > < tr > < th style = "text-align:center" > Technique< / th > < th style = "text-align:right" > Insertion phase< / th > < th style = "text-align:right" > Lookup phase< / th > < / tr > < / thead > < tbody > < tr > < td style = "text-align:center" > ordered sequence (ascending)< / td > < td style = "text-align:right" > < code > 558.60ms< / code > < / td > < td style = "text-align:right" > < code > 3304.26ms< / code > < / td > < / tr > < tr > < td style = "text-align:center" > ordered sequence (descending)< / td > < td style = "text-align:right" > < code > 554.08ms< / code > < / td > < td style = "text-align:right" > < code > 3365.84ms< / code > < / td > < / tr > < tr > < td style = "text-align:center" > progressive sequence that “heals” on resize< / td > < td style = "text-align:right" > < code > 3781.30ms< / code > < / td > < td style = "text-align:right" > < code > 28565.71ms< / code > < / td > < / tr > < tr > < td style = "text-align:center" > progressive sequence that “heals” in the end< / td > < td style = "text-align:right" > < code > 3280.38ms< / code > < / td > < td style = "text-align:right" > < code > 26494.61ms< / code > < / td > < / tr > < tr > < td style = "text-align:center" > conflicts everywhere< / td > < td style = "text-align:right" > < code > 4027.54ms< / code > < / td > < td style = "text-align:right" > < code > 29132.92ms< / code > < / td > < / tr > < / tbody > < / table >
< p > You can see a noticable “jump” in the time after switching to the “progressive”
sequence. The last sequence that has conflicts all the time has the worst time,
even though it' s rather comparable with the first progressive sequence with
regards to the insertion phase.< / p >
< p > If we were to compare the < em > always conflicting< / em > one with the first one, we can
see that insertion took over 7× longer and lookups almost 9× longer.< / p >
2023-11-28 20:47:31 +01:00
< p > You can have a look at the code < a href = "/files/algorithms/hash-tables/breaking/benchmark.py" target = "_blank" rel = "noopener noreferrer" > here< / a > .< / p >
2023-11-28 19:40:59 +01:00
< h2 class = "anchor anchorWithStickyNavbar_LWe7" id = "comparing-with-the-tree" > Comparing with the tree< a href = "#comparing-with-the-tree" class = "hash-link" aria-label = "Direct link to Comparing with the tree" title = "Direct link to Comparing with the tree" > < / a > < / h2 >
2023-11-28 20:47:31 +01:00
< div class = "theme-admonition theme-admonition-danger admonition_xJq3 alert alert--danger" > < div class = "admonitionHeading_Gvgb" > < span class = "admonitionIcon_Rf37" > < svg viewBox = "0 0 12 16" > < path fill-rule = "evenodd" d = "M5.05.31c.81 2.17.41 3.38-.52 4.31C3.55 5.67 1.98 6.45.9 7.98c-1.45 2.05-1.7 6.53 3.53 7.7-2.2-1.16-2.67-4.52-.3-6.61-.61 2.03.53 3.33 1.94 2.86 1.39-.47 2.3.53 2.27 1.67-.02.78-.31 1.44-1.13 1.81 3.42-.59 4.78-3.42 4.78-5.56 0-2.84-2.53-3.22-1.25-5.61-1.52.13-2.03 1.13-1.89 2.75.09 1.08-1.02 1.8-1.86 1.33-.67-.41-.66-1.19-.06-1.78C8.18 5.31 8.68 2.45 5.05.32L5.03.3l.02.01z" > < / path > < / svg > < / span > danger< / div > < div class = "admonitionContent_BuS1" > < p > Source code can be found < a href = "/files/algorithms/hash-tables/breaking/benchmark.cpp" target = "_blank" rel = "noopener noreferrer" > here< / a > .< / p > < p > < em > Viewer discretion advised.< / em > < / p > < / div > < / div >
2023-11-28 19:40:59 +01:00
< p > Python doesn' t have a tree structure for sets/maps implemented, therefore for
a comparison we will run a similar benchmark in C++. By running the same
sequences on both hash table and tree (RB-tree) we will obtain the following
results:< / p >
< table > < thead > < tr > < th style = "text-align:center" > Technique< / th > < th style = "text-align:right" > Insertion (hash)< / th > < th style = "text-align:right" > Lookup (hash)< / th > < th style = "text-align:right" > Insertion (tree)< / th > < th style = "text-align:right" > Lookup (tree)< / th > < / tr > < / thead > < tbody > < tr > < td style = "text-align:center" > ordered (ascending)< / td > < td style = "text-align:right" > < code > 316ms< / code > < / td > < td style = "text-align:right" > < code > 298ms< / code > < / td > < td style = "text-align:right" > < code > 2098ms< / code > < / td > < td style = "text-align:right" > < code > 5914ms< / code > < / td > < / tr > < tr > < td style = "text-align:center" > ordered (descending)< / td > < td style = "text-align:right" > < code > 259ms< / code > < / td > < td style = "text-align:right" > < code > 315ms< / code > < / td > < td style = "text-align:right" > < code > 1958ms< / code > < / td > < td style = "text-align:right" > < code > 14747ms< / code > < / td > < / tr > < tr > < td style = "text-align:center" > progressive a)< / td > < td style = "text-align:right" > < code > 1152ms< / code > < / td > < td style = "text-align:right" > < code > 6021ms< / code > < / td > < td style = "text-align:right" > < code > 2581ms< / code > < / td > < td style = "text-align:right" > < code > 16074ms< / code > < / td > < / tr > < tr > < td style = "text-align:center" > progressive b)< / td > < td style = "text-align:right" > < code > 1041ms< / code > < / td > < td style = "text-align:right" > < code > 6096ms< / code > < / td > < td style = "text-align:right" > < code > 2770ms< / code > < / td > < td style = "text-align:right" > < code > 15986ms< / code > < / td > < / tr > < tr > < td style = "text-align:center" > conflicts< / td > < td style = "text-align:right" > < code > 964ms< / code > < / td > < td style = "text-align:right" > < code > 1633ms< / code > < / td > < td style = "text-align:right" > < code > 2559ms< / code > < / td > < td style = "text-align:right" > < code > 13285ms< / code > < / td > < / tr > < / tbody > < / table >
< div class = "theme-admonition theme-admonition-note admonition_xJq3 alert alert--secondary" > < div class = "admonitionHeading_Gvgb" > < span class = "admonitionIcon_Rf37" > < svg viewBox = "0 0 14 16" > < path fill-rule = "evenodd" d = "M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z" > < / path > < / svg > < / span > note< / div > < div class = "admonitionContent_BuS1" > < p > We can' t forget that implementation details be involved. Hash function is still
the identity, to my knowledge.< / p > < / div > < / div >
< p > One interesting thing to notice is the fact that the progressive sequences took
the most time in lookups (which is not same as in the Python).< / p >
< p > Now, if we have a look at the tree implementation, we can notice two very
distinctive things:< / p >
< ol >
< li > Tree implementations are not affected by the input, therefore (except for the
first sequence) we can see < strong > very consistent< / strong > times.< / li >
< li > Compared to the hash table the times are much higher and not very ideal.< / li >
< / ol >
< p > The reason for the 2nd point may not be very obvious. From the technical
perspective it makes some sense. Let' s dive into it!< / p >
< p > If we take a hash table, it is an array in a memory, therefore it is contiguous
piece of memory. (For more information I' d suggest looking into the 1st blog
post below in references section by < em > Bjarne Stroustrup< / em > )< / p >
< p > On the other hand, if we take a look at the tree, each node holds some
attributes and pointers to the left and right descendants of itself. Even if we
maintain a reasonable height of the tree (keep the tree balanced), we still need
to follow the pointers which point to the nodes < em > somewhere< / em > on the heap. When
traversing the tree, we get a consistent time complexity, but at the expense of
jumping between the nodes on the heap which takes some time.< / p >
< div class = "theme-admonition theme-admonition-danger admonition_xJq3 alert alert--danger" > < div class = "admonitionHeading_Gvgb" > < span class = "admonitionIcon_Rf37" > < svg viewBox = "0 0 12 16" > < path fill-rule = "evenodd" d = "M5.05.31c.81 2.17.41 3.38-.52 4.31C3.55 5.67 1.98 6.45.9 7.98c-1.45 2.05-1.7 6.53 3.53 7.7-2.2-1.16-2.67-4.52-.3-6.61-.61 2.03.53 3.33 1.94 2.86 1.39-.47 2.3.53 2.27 1.67-.02.78-.31 1.44-1.13 1.81 3.42-.59 4.78-3.42 4.78-5.56 0-2.84-2.53-3.22-1.25-5.61-1.52.13-2.03 1.13-1.89 2.75.09 1.08-1.02 1.8-1.86 1.33-.67-.41-.66-1.19-.06-1.78C8.18 5.31 8.68 2.45 5.05.32L5.03.3l.02.01z" > < / path > < / svg > < / span > danger< / div > < div class = "admonitionContent_BuS1" > < p > This is not supposed to leverage the hash table and try to persuade people not
to use the tree representations. There are benefits coming from the respective
data structures, even if the time is not the best.< / p > < p > Overall if we compare the worst-case time complexities of the tree and hash
table, tree representation comes off better.< / p > < / div > < / div >
< div class = "theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success" > < div class = "admonitionHeading_Gvgb" > < span class = "admonitionIcon_Rf37" > < svg viewBox = "0 0 12 16" > < path fill-rule = "evenodd" d = "M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z" > < / path > < / svg > < / span > Challenge< / div > < div class = "admonitionContent_BuS1" > < p > Try to benchmark with the similar approach in the Rust. Since Rust uses
different hash function, it would be the best to just override the hash, this
way you can also avoid the hard part of this attack (making up the numbers that
will collide).< / p > < / div > < / div >
< hr >
< h2 class = "anchor anchorWithStickyNavbar_LWe7" id = "references" > References< a href = "#references" class = "hash-link" aria-label = "Direct link to References" title = "Direct link to References" > < / a > < / h2 >
< ol >
< li > Bjarne Stroustrup.
< a href = "https://www.stroustrup.com/bs_faq.html#list" target = "_blank" rel = "noopener noreferrer" > Are lists evil?< / a > < / li >
< / ol >
< section data-footnotes = "true" class = "footnotes" > < h2 class = "anchor anchorWithStickyNavbar_LWe7 sr-only" id = "footnote-label" > Footnotes< a href = "#footnote-label" class = "hash-link" aria-label = "Direct link to Footnotes" title = "Direct link to Footnotes" > < / a > < / h2 >
< ol >
< li id = "user-content-fn-1" >
< p > Arbitrary-sized integers, they can get as big as your memory allows. < a href = "#user-content-fnref-1" data-footnote-backref = "" aria-label = "Back to reference 1" class = "data-footnote-backref" > ↩< / a > < / p >
< / li >
< / ol >
2023-11-28 20:27:15 +01:00
< / section > < / div > < footer class = "theme-doc-footer docusaurus-mt-lg" > < div class = "theme-doc-footer-tags-row row margin-bottom--sm" > < div class = "col" > < b > Tags:< / b > < ul class = "tags_jXut padding--none margin-left--sm" > < li class = "tag_QGVx" > < a class = "tag_zVej tagRegular_sFm0" href = "/algorithms/tags/cpp/" > cpp< / a > < / li > < li class = "tag_QGVx" > < a class = "tag_zVej tagRegular_sFm0" href = "/algorithms/tags/python/" > python< / a > < / li > < li class = "tag_QGVx" > < a class = "tag_zVej tagRegular_sFm0" href = "/algorithms/tags/hash-tables/" > hash-tables< / a > < / li > < / ul > < / div > < / div > < div class = "theme-doc-footer-edit-meta-row row" > < div class = "col" > < a href = "https://github.com/mfocko/blog/tree/main/algorithms/12-hash-tables/2023-11-28-breaking/01-python.md" target = "_blank" rel = "noopener noreferrer" class = "theme-edit-this-page" > < svg fill = "currentColor" height = "20" width = "20" viewBox = "0 0 40 40" class = "iconEdit_Z9Sw" aria-hidden = "true" > < g > < path d = "m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z" > < / path > < / g > < / svg > Edit this page< / a > < / div > < div class = "col lastUpdated_vwxv" > < span class = "theme-last-updated" > Last updated<!-- --> on < b > < time datetime = "2023-11-28T00:00:00.000Z" > Nov 28, 2023< / time > < / b > < / span > < / div > < / div > < / footer > < / article > < nav class = "pagination-nav docusaurus-mt-lg" aria-label = "Docs pages" > < a class = "pagination-nav__link pagination-nav__link--prev" href = "/algorithms/hash-tables/breaking/" > < div class = "pagination-nav__sublabel" > Previous< / div > < div class = "pagination-nav__label" > Breaking Hash Table< / div > < / a > < a class = "pagination-nav__link pagination-nav__link--next" href = "/algorithms/hash-tables/breaking/mitigations/" > < div class = "pagination-nav__sublabel" > Next< / div > < div class = "pagination-nav__label" > Possible Mitigations< / div > < / a > < / nav > < / div > < / div > < div class = "col col--3" > < div class = "tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop" > < ul class = "table-of-contents table-of-contents__left-border" > < li > < a href = "#preparing-the-attack" class = "table-of-contents__link toc-highlight" > Preparing the attack< / a > < ul > < li > < a href = "#sequences" class = "table-of-contents__link toc-highlight" > Sequences< / a > < / li > < / ul > < / li > < li > < a href = "#results" class = "table-of-contents__link toc-highlight" > Results< / a > < / li > < li > < a href = "#comparing-with-the-tree" class = "table-of-contents__link toc-highlight" > Comparing with the tree< / a > < / li > < li > < a href = "#references" class = "table-of-contents__link toc-highlight" > References< / a > < / li > < / ul > < / div > < / div > < / div > < / div > < / main > < / div > < / div > < / div > < footer class = "footer footer--dark" > < div class = "container container-fluid" > < div class = "row footer__links" > < div class = "col footer__col" > < div class = "footer__title" > Git< / div > < ul class = "footer__items clean-list" > < li class = "footer__item" > < a href = "https://github.com/mfocko" target = "_blank" rel = "noopener noreferrer" class = "footer__link-item" > GitHub< svg width = "13.5" height = "13.5" aria-hidden = "true" viewBox = "0 0 24 24" class = "iconExternalLink_nPIU" > < path fill = "currentColor" d = "M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z" > < / path > < / svg > < / a > < / li > < li class = "footer__item" > < a href = "https://gitlab.com/mfocko" target = "_blank" rel = "noopener noreferrer" class = "footer__link-item" > GitLab< svg width = "13.5" height = "13.5" aria-hidden = "true" viewBox = "0 0 24 24" class = "iconExternalLink_nPIU" > < path fill = "currentColor" d = "M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z" > < / path > < / svg > < / a > < / li > < li class = "footer__item" > < a href = "https://git.mfocko.xyz/mfocko" target = "_blank" rel = "noopener noreferrer" class = "footer__link-item" > Gitea (self-hosted)< svg width = "13.5" height = "13.5" aria-hidden = "true" viewBox = "0 0 24 24" class = "iconExternalLink_nPIU" > < path fill = "currentColor" d = "M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z" > < / path > < / svg > < / a > < / li > < / ul > < / div > < div class = "col footer__col" > < div class = "footer__title" > Social #1< / div > < ul class = "footer__items clean-list" > < li class = "footer__item" > <
2023-11-28 19:40:59 +01:00
< / body >
< / html >