2023-11-28 19:40:59 +01:00
<!doctype html>
< html lang = "en" dir = "ltr" class = "docs-wrapper plugin-docs plugin-id-algorithms docs-version-current docs-doc-page docs-doc-id-hash-tables/2023-11-28-breaking/mitigations" data-has-hydrated = "false" >
< head >
< meta charset = "UTF-8" >
2023-12-28 18:55:58 +01:00
< meta name = "generator" content = "Docusaurus v3.0.1" >
2023-11-28 19:40:59 +01:00
< title data-rh = "true" > Possible Mitigations | mf< / title > < meta data-rh = "true" name = "viewport" content = "width=device-width,initial-scale=1" > < meta data-rh = "true" name = "twitter:card" content = "summary_large_image" > < meta data-rh = "true" property = "og:url" content = "https://blog.mfocko.xyz/algorithms/hash-tables/breaking/mitigations/" > < meta data-rh = "true" property = "og:locale" content = "en" > < meta data-rh = "true" name = "docusaurus_locale" content = "en" > < meta data-rh = "true" name = "docsearch:language" content = "en" > < meta data-rh = "true" name = "docusaurus_version" content = "current" > < meta data-rh = "true" name = "docusaurus_tag" content = "docs-algorithms-current" > < meta data-rh = "true" name = "docsearch:version" content = "current" > < meta data-rh = "true" name = "docsearch:docusaurus_tag" content = "docs-algorithms-current" > < meta data-rh = "true" property = "og:title" content = "Possible Mitigations | mf" > < meta data-rh = "true" name = "description" content = "Talking about the ways how to prevent the attacks on the hash table .
">< meta data-rh = "true" property = "og:description" content = "Talking about the ways how to prevent the attacks on the hash table .
">< link data-rh = "true" rel = "icon" href = "/img/favicon.ico" > < link data-rh = "true" rel = "canonical" href = "https://blog.mfocko.xyz/algorithms/hash-tables/breaking/mitigations/" > < link data-rh = "true" rel = "alternate" href = "https://blog.mfocko.xyz/algorithms/hash-tables/breaking/mitigations/" hreflang = "en" > < link data-rh = "true" rel = "alternate" href = "https://blog.mfocko.xyz/algorithms/hash-tables/breaking/mitigations/" hreflang = "x-default" > < link data-rh = "true" rel = "preconnect" href = "https://0VXRFPR4QF-dsn.algolia.net" crossorigin = "anonymous" > < link rel = "search" type = "application/opensearchdescription+xml" title = "mf" href = "/opensearch.xml" >
< link rel = "alternate" type = "application/rss+xml" href = "/blog/rss.xml" title = "mf RSS Feed" >
< link rel = "alternate" type = "application/atom+xml" href = "/blog/atom.xml" title = "mf Atom Feed" >
< link rel = "alternate" type = "application/json" href = "/blog/feed.json" title = "mf JSON Feed" >
2023-12-28 18:55:58 +01:00
< link rel = "stylesheet" href = "https://cdn.jsdelivr.net/npm/katex@0.13.24/dist/katex.min.css" integrity = "sha384-odtC+0UGzzFL/6PNoE8rX/SPcQDXBJ+uRepguP4QkPCm2LBxH3FA3y+fKSiJ+AmM" crossorigin = "anonymous" > < link rel = "stylesheet" href = "/assets/css/styles.e1ac7597.css" >
2024-01-03 15:14:53 +01:00
< script src = "/assets/js/runtime~main.65095f6f.js" defer = "defer" > < / script >
< script src = "/assets/js/main.2a166133.js" defer = "defer" > < / script >
2023-11-28 19:40:59 +01:00
< / head >
< body class = "navigation-with-keyboard" >
2024-01-03 15:14:53 +01:00
< script > ! function ( ) { function t ( t ) { document . documentElement . setAttribute ( "data-theme" , t ) } var e = function ( ) { try { return new URLSearchParams ( window . location . search ) . get ( "docusaurus-theme" ) } catch ( t ) { } } ( ) || function ( ) { try { return localStorage . getItem ( "theme" ) } catch ( t ) { } } ( ) ; t ( null !== e ? e : "light" ) } ( ) , function ( ) { try { const c = new URLSearchParams ( window . location . search ) . entries ( ) ; for ( var [ t , e ] of c ) if ( t . startsWith ( "docusaurus-data-" ) ) { var a = t . replace ( "docusaurus-data-" , "data-" ) ; document . documentElement . setAttribute ( a , e ) } } catch ( t ) { } } ( ) < / script > < div id = "__docusaurus" > < div role = "region" aria-label = "Skip to main content" > < a class = "skipToContent_fXgn" href = "#__docusaurus_skipToContent_fallback" > Skip to main content< / a > < / div > < nav aria-label = "Main" class = "navbar navbar--fixed-top" > < div class = "navbar__inner" > < div class = "navbar__items" > < button aria-label = "Toggle navigation bar" aria-expanded = "false" class = "navbar__toggle clean-btn" type = "button" > < svg width = "30" height = "30" viewBox = "0 0 30 30" aria-hidden = "true" > < path stroke = "currentColor" stroke-linecap = "round" stroke-miterlimit = "10" stroke-width = "2" d = "M4 7h22M4 15h22M4 23h22" > < / path > < / svg > < / button > < a class = "navbar__brand" href = "/" > < b class = "navbar__title text--truncate" > mf< / b > < / a > < div class = "navbar__item dropdown dropdown--hoverable" > < a href = "#" aria-haspopup = "true" aria-expanded = "false" role = "button" class = "navbar__link" > Additional FI MU materials< / a > < ul class = "dropdown__menu" > < li > < a aria-current = "page" class = "dropdown__link dropdown__link--active" href = "/algorithms/" > Algorithms< / a > < / li > < li > < a class = "dropdown__link" href = "/c/" > C< / a > < / li > < li > < a class = "dropdown__link" href = "/cpp/" > C++< / a > < / li > < / ul > < / div > < a class = "navbar__item navbar__link" href = "/contributions/" > Contributions< / a > < a class = "navbar__item navbar__link" href = "/talks/" > Talks< / a > < / div > < div class = "navbar__items navbar__items--right" > < a class = "navbar__item navbar__link" href = "/blog/" > Blog< / a > < div class = "toggle_vylO colorModeToggle_DEke" > < button class = "clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type = "button" disabled = "" title = "Switch between dark and light mode (currently light mode)" aria-label = "Switch between dark and light mode (currently light mode)" aria-live = "polite" > < svg viewBox = "0 0 24 24" width = "24" height = "24" class = "lightToggleIcon_pyhR" > < path fill = "currentColor" d = "M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z" > < / path > < / svg > < svg viewBox = "0 0 24 24" width = "24" height = "24" class = "darkToggleIcon_wfgR" > < path fill = "currentColor" d = "M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z" > < / path > < / svg > < / button > < / div > < div class = "navbarSearchContainer_Bca1" > < button type = "button" class = "DocSearch DocSearch-Button" aria-label = "Search" > < span class = "DocSearch-Button-Container" > < svg width = "20" height = "20" class = "DocSearch-Search-Icon" viewBox = "0 0 20 20" > < path d = "M14.386 14 . 386l4 . 0877 4 . 0877-4 . 0877-4 . 0877c-2 .
2023-11-28 19:40:59 +01:00
only make it better, we cannot guarantee the ideal time complexity…< / p >
< p > For the sake of simplicity (and referencing an article by < em > Neal Wu< / em > on the same
topic; in references below) I will use the C++ to describe the mitigations.< / p >
< h2 class = "anchor anchorWithStickyNavbar_LWe7" id = "random-seed" > Random seed< a href = "#random-seed" class = "hash-link" aria-label = "Direct link to Random seed" title = "Direct link to Random seed" > < / a > < / h2 >
< p > One of the options how to avoid this kind of an attack is to introduce a random
seed to the hash. That way it is not that easy to choose the < em > nasty< / em > numbers.< / p >
< div class = "language-cpp codeBlockContainer_Ckt0 theme-code-block" style = "--prism-color:#393A34;--prism-background-color:#f6f8fa" > < div class = "codeBlockContent_biex" > < pre tabindex = "0" class = "prism-code language-cpp codeBlock_bY9V thin-scrollbar" style = "color:#393A34;background-color:#f6f8fa" > < code class = "codeBlockLines_e6Vv" > < span class = "token-line" style = "color:#393A34" > < span class = "token keyword" style = "color:#00009f" > struct< / span > < span class = "token plain" > < / span > < span class = "token class-name" > custom_hash< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > {< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > size_t < / span > < span class = "token keyword" style = "color:#00009f" > operator< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token keyword" style = "color:#00009f" > uint64_t< / span > < span class = "token plain" > x< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > const< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > {< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > return< / span > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > +< / span > < span class = "token plain" > < / span > < span class = "token number" style = "color:#36acaa" > 7529< / span > < span class = "token punctuation" style = "color:#393A34" > ;< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > }< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > }< / span > < span class = "token punctuation" style = "color:#393A34" > ;< / span > < br > < / span > < / code > < / pre > < div class = "buttonGroup__atx" > < button type = "button" aria-label = "Copy code to clipboard" title = "Copy" class = "clean-btn" > < span class = "copyButtonIcons_eSgA" aria-hidden = "true" > < svg viewBox = "0 0 24 24" class = "copyButtonIcon_y97N" > < path fill = "currentColor" d = "M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z" > < / path > < / svg > < svg viewBox = "0 0 24 24" class = "copyButtonSuccessIcon_LjdS" > < path fill = "currentColor" d = "M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z" > < / path > < / svg > < / span > < / button > < / div > < / div > < / div >
< p > As you may have noticed, this is not very helpful, since it just shifts the
issue by some number. Better option is to use a shift from random number
generator:< / p >
< div class = "language-cpp codeBlockContainer_Ckt0 theme-code-block" style = "--prism-color:#393A34;--prism-background-color:#f6f8fa" > < div class = "codeBlockContent_biex" > < pre tabindex = "0" class = "prism-code language-cpp codeBlock_bY9V thin-scrollbar" style = "color:#393A34;background-color:#f6f8fa" > < code class = "codeBlockLines_e6Vv" > < span class = "token-line" style = "color:#393A34" > < span class = "token keyword" style = "color:#00009f" > struct< / span > < span class = "token plain" > < / span > < span class = "token class-name" > custom_hash< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > {< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > size_t < / span > < span class = "token keyword" style = "color:#00009f" > operator< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token keyword" style = "color:#00009f" > uint64_t< / span > < span class = "token plain" > x< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > const< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > {< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > static< / span > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > const< / span > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > uint64_t< / span > < span class = "token plain" > FIXED_RANDOM < / span > < span class = "token operator" style = "color:#393A34" > =< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > chrono< / span > < span class = "token double-colon punctuation" style = "color:#393A34" > ::< / span > < span class = "token plain" > steady_clock< / span > < span class = "token double-colon punctuation" style = "color:#393A34" > ::< / span > < span class = "token function" style = "color:#d73a49" > now< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > .< / span > < span class = "token function" style = "color:#d73a49" > time_since_epoch< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > .< / span > < span class = "token function" style = "color:#d73a49" > count< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > ;< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > return< / span > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > +< / span > < span class = "token plain" > FIXED_RANDOM< / span > < span class = "token punctuation" style = "color:#393A34" > ;< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > }< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > }< / span > < span class = "token punctuation" style = "color:#393A34" > ;< / span > < br > < / span > < / code > < / pre > < div class = "buttonGroup__atx" > < button type = "button" aria-label = "Copy code to clipboard" title = "Copy" class = "clean-btn" > < span class = "copyButtonIcons_eSgA" aria-hidden = "true" > < svg viewBox = "0 0 24 24" class = "copyButtonIcon_y97N" > < path fill = "currentColor" d = "M19,21H8V7H19M19,5H8A2,2 0 0 , 0 6 , 7
< p > In this case the hash is using a high-precision clock to shift the number, which
is much harder to break.< / p >
< h2 class = "anchor anchorWithStickyNavbar_LWe7" id = "better-random-seed" > Better random seed< a href = "#better-random-seed" class = "hash-link" aria-label = "Direct link to Better random seed" title = "Direct link to Better random seed" > < / a > < / h2 >
< p > Building on the previous solution, we can do some < em > bit magic< / em > instead of the
shifting:< / p >
< div class = "language-cpp codeBlockContainer_Ckt0 theme-code-block" style = "--prism-color:#393A34;--prism-background-color:#f6f8fa" > < div class = "codeBlockContent_biex" > < pre tabindex = "0" class = "prism-code language-cpp codeBlock_bY9V thin-scrollbar" style = "color:#393A34;background-color:#f6f8fa" > < code class = "codeBlockLines_e6Vv" > < span class = "token-line" style = "color:#393A34" > < span class = "token keyword" style = "color:#00009f" > struct< / span > < span class = "token plain" > < / span > < span class = "token class-name" > custom_hash< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > {< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > size_t < / span > < span class = "token keyword" style = "color:#00009f" > operator< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token keyword" style = "color:#00009f" > uint64_t< / span > < span class = "token plain" > x< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > const< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > {< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > static< / span > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > const< / span > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > uint64_t< / span > < span class = "token plain" > FIXED_RANDOM < / span > < span class = "token operator" style = "color:#393A34" > =< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > chrono< / span > < span class = "token double-colon punctuation" style = "color:#393A34" > ::< / span > < span class = "token plain" > steady_clock< / span > < span class = "token double-colon punctuation" style = "color:#393A34" > ::< / span > < span class = "token function" style = "color:#d73a49" > now< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > .< / span > < span class = "token function" style = "color:#d73a49" > time_since_epoch< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > .< / span > < span class = "token function" style = "color:#d73a49" > count< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > ;< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > ^=< / span > < span class = "token plain" > FIXED_RANDOM< / span > < span class = "token punctuation" style = "color:#393A34" > ;< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > return< / span > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > ^< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > > > < / span > < span class = "token plain" > < / span > < span class = "token number" style = "color:#36acaa" > 16< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > ;< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "tok
< p > This not only shifts the number, it also manipulates the underlying bits of the
hash. In this case we' re also applying the < code > XOR< / code > operation.< / p >
< h2 class = "anchor anchorWithStickyNavbar_LWe7" id = "adjusting-the-hash-function" > Adjusting the hash function< a href = "#adjusting-the-hash-function" class = "hash-link" aria-label = "Direct link to Adjusting the hash function" title = "Direct link to Adjusting the hash function" > < / a > < / h2 >
< p > Another option is to switch up the hash function.< / p >
< p > For example Rust uses < a href = "https://en.wikipedia.org/wiki/SipHash" target = "_blank" rel = "noopener noreferrer" > < em > SipHash< / em > < / a > by
default.< / p >
< p > On the other hand, you can usually specify your own hash function, here we will
follow the article by < em > Neal< / em > that uses so-called < em > < code > splitmix64< / code > < / em > .< / p >
< div class = "language-cpp codeBlockContainer_Ckt0 theme-code-block" style = "--prism-color:#393A34;--prism-background-color:#f6f8fa" > < div class = "codeBlockContent_biex" > < pre tabindex = "0" class = "prism-code language-cpp codeBlock_bY9V thin-scrollbar" style = "color:#393A34;background-color:#f6f8fa" > < code class = "codeBlockLines_e6Vv" > < span class = "token-line" style = "color:#393A34" > < span class = "token keyword" style = "color:#00009f" > static< / span > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > uint64_t< / span > < span class = "token plain" > < / span > < span class = "token function" style = "color:#d73a49" > splitmix64< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token keyword" style = "color:#00009f" > uint64_t< / span > < span class = "token plain" > x< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > {< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token comment" style = "color:#999988;font-style:italic" > // http://xorshift.di.unimi.it/splitmix64.c< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > +=< / span > < span class = "token plain" > < / span > < span class = "token number" style = "color:#36acaa" > 0x9e3779b97f4a7c15< / span > < span class = "token punctuation" style = "color:#393A34" > ;< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > =< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > ^< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > > > < / span > < span class = "token plain" > < / span > < span class = "token number" style = "color:#36acaa" > 30< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token plain" > < / span > < span class = "token operator" style = "color:#393A34" > *< / span > < span class = "token plain" > < / span > < span class = "token number" style = "color:#36acaa" > 0xbf58476d1ce4e5b9< / span > < span class = "token punctuation" style = "color:#393A34" > ;< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > =< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > ^< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > > > < / span > < span class = "token plain" > < / span > < span class = "token number" style = "color:#36acaa" > 27< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token plain" > < / span > < span class = "token operator" style = "color:#393A34" > *< / span > < span class = "token plain" > < / span > < span class = "token number" style = "color:#36acaa" > 0x94d049bb133111eb< / span > < span class = "token punctuation" style = "color:#393A34" > ;< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > return< / span > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > ^< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > x < / span > < span
< p > As you can see, this definitely doesn' t do identity on the integers <!-- --> 😄< / p >
< p > Another example would be
< a href = "https://github.com/openjdk/jdk/blob/dc256fbc6490f8163adb286dbb7380c10e5e1e06/src/java.base/share/classes/java/util/HashMap.java#L320-L339" target = "_blank" rel = "noopener noreferrer" > < code > HashMap::hash()< / code > < / a >
function in Java:< / p >
< div class = "language-java codeBlockContainer_Ckt0 theme-code-block" style = "--prism-color:#393A34;--prism-background-color:#f6f8fa" > < div class = "codeBlockContent_biex" > < pre tabindex = "0" class = "prism-code language-java codeBlock_bY9V thin-scrollbar" style = "color:#393A34;background-color:#f6f8fa" > < code class = "codeBlockLines_e6Vv" > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > /**< / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > * Computes key.hashCode() and spreads (XORs) higher bits of hash< / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > * to lower. Because the table uses power-of-two masking, sets of< / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > * hashes that vary only in bits above the current mask will< / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > * always collide. (Among known examples are sets of Float keys< / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > * holding consecutive whole numbers in small tables.) So we< / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > * apply a transform that spreads the impact of higher bits< / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > * downward. There is a tradeoff between speed, utility, and< / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > * quality of bit-spreading. Because many common sets of hashes< / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > * are already reasonably distributed (so don' t benefit from< / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > * spreading), and because we use trees to handle large sets of< / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > * collisions in bins, we just XOR some shifted bits in the< / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > * cheapest possible way to reduce systematic lossage, as well as< / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > * to incorporate impact of the highest bits that would otherwise< / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > * never be used in index calculations because of table bounds.< / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token comment" style = "color:#999988;font-style:italic" > */< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > static< / span > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > final< / span > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > int< / span > < span class = "token plain" > < / span > < span class = "token function" style = "color:#d73a49" > hash< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token class-name" > Object< / span > < span class = "token plain" > key< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > {
< p > You can notice that they try to include the upper bits of the hash by using
< code > XOR< / code > , this would render our attack in the previous part helpless.< / p >
< h2 class = "anchor anchorWithStickyNavbar_LWe7" id = "combining-both" > Combining both< a href = "#combining-both" class = "hash-link" aria-label = "Direct link to Combining both" title = "Direct link to Combining both" > < / a > < / h2 >
< p > Can we make it better? Of course! Use multiple mitigations at the same time. In
our case, we will both inject the random value < strong > and< / strong > use the < em > < code > splitmix64< / code > < / em > :< / p >
< div class = "language-cpp codeBlockContainer_Ckt0 theme-code-block" style = "--prism-color:#393A34;--prism-background-color:#f6f8fa" > < div class = "codeBlockContent_biex" > < pre tabindex = "0" class = "prism-code language-cpp codeBlock_bY9V thin-scrollbar" style = "color:#393A34;background-color:#f6f8fa" > < code class = "codeBlockLines_e6Vv" > < span class = "token-line" style = "color:#393A34" > < span class = "token keyword" style = "color:#00009f" > struct< / span > < span class = "token plain" > < / span > < span class = "token class-name" > custom_hash< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > {< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > static< / span > < span class = "token plain" > < / span > < span class = "token keyword" style = "color:#00009f" > uint64_t< / span > < span class = "token plain" > < / span > < span class = "token function" style = "color:#d73a49" > splitmix64< / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token keyword" style = "color:#00009f" > uint64_t< / span > < span class = "token plain" > x< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > {< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > < / span > < span class = "token comment" style = "color:#999988;font-style:italic" > // http://xorshift.di.unimi.it/splitmix64.c< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > +=< / span > < span class = "token plain" > < / span > < span class = "token number" style = "color:#36acaa" > 0x9e3779b97f4a7c15< / span > < span class = "token punctuation" style = "color:#393A34" > ;< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > =< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > ^< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > > > < / span > < span class = "token plain" > < / span > < span class = "token number" style = "color:#36acaa" > 30< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token plain" > < / span > < span class = "token operator" style = "color:#393A34" > *< / span > < span class = "token plain" > < / span > < span class = "token number" style = "color:#36acaa" > 0xbf58476d1ce4e5b9< / span > < span class = "token punctuation" style = "color:#393A34" > ;< / span > < span class = "token plain" > < / span > < br > < / span > < span class = "token-line" style = "color:#393A34" > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > =< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > ^< / span > < span class = "token plain" > < / span > < span class = "token punctuation" style = "color:#393A34" > (< / span > < span class = "token plain" > x < / span > < span class = "token operator" style = "color:#393A34" > > > < / span > < span class = "token plain" > < / span > < span class = "token number" style = "color:#36acaa" > 27< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token punctuation" style = "color:#393A34" > )< / span > < span class = "token plain" > < / span > < span class = "token operator" style = "color:#393A34" > *< / span > < span class = "token plain" > < / span > < span class = "token number" style = "color:#36acaa" > 0x94d049bb133111eb< / span > < span class = "token punctuation" style = "color:#393A34" > ;< / span > < span class = "token plain" > < / span > < br
< h2 class = "anchor anchorWithStickyNavbar_LWe7" id = "fallback-for-extreme-cases" > Fallback for extreme cases< a href = "#fallback-for-extreme-cases" class = "hash-link" aria-label = "Direct link to Fallback for extreme cases" title = "Direct link to Fallback for extreme cases" > < / a > < / h2 >
< p > As we have mentioned above, Python resolves the conflicts by probing (it looks
for empty space somewhere else in the table, but it' s deterministic about it, so
it' s not “< em > oops, this is full, let' s go one-by-one and find some spot< / em > ”). In the
case of C++ and Java, they resolve the conflicts by linked lists, as is the
usual text-book depiction of the hash table.< / p >
< p > However Java does something more intelligent. Once you go over the threshold of
conflicts in one spot, it converts the linked list to an RB-tree that is sorted
by the hash and key respectively.< / p >
< div class = "theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success" > < div class = "admonitionHeading_Gvgb" > < span class = "admonitionIcon_Rf37" > < svg viewBox = "0 0 12 16" > < path fill-rule = "evenodd" d = "M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z" > < / path > < / svg > < / span > tip< / div > < div class = "admonitionContent_BuS1" > < p > You may wonder what sense does it make to define an ordering on the tree by the
hash, if we' re dealing with conflicts. Well, there are less buckets than the
range of the hash, so if we take lower bits, we can have a conflict even though
the hashes are not the same.< / p > < / div > < / div >
< p > You might have noticed that if we get a < strong > really bad< / strong > hashing function, this is
not very helpful. It is not, < strong > but< / strong > it can help in other cases.< / p >
< div class = "theme-admonition theme-admonition-danger admonition_xJq3 alert alert--danger" > < div class = "admonitionHeading_Gvgb" > < span class = "admonitionIcon_Rf37" > < svg viewBox = "0 0 12 16" > < path fill-rule = "evenodd" d = "M5.05.31c.81 2.17.41 3.38-.52 4.31C3.55 5.67 1.98 6.45.9 7.98c-1.45 2.05-1.7 6.53 3.53 7.7-2.2-1.16-2.67-4.52-.3-6.61-.61 2.03.53 3.33 1.94 2.86 1.39-.47 2.3.53 2.27 1.67-.02.78-.31 1.44-1.13 1.81 3.42-.59 4.78-3.42 4.78-5.56 0-2.84-2.53-3.22-1.25-5.61-1.52.13-2.03 1.13-1.89 2.75.09 1.08-1.02 1.8-1.86 1.33-.67-.41-.66-1.19-.06-1.78C8.18 5.31 8.68 2.45 5.05.32L5.03.3l.02.01z" > < / path > < / svg > < / span > danger< / div > < div class = "admonitionContent_BuS1" > < p > As the ordering on the keys of the hash table is not required and may not be
implemented, the tree may be ordered by just the hash.< / p > < / div > < / div >
< hr >
< h2 class = "anchor anchorWithStickyNavbar_LWe7" id = "references" > References< a href = "#references" class = "hash-link" aria-label = "Direct link to References" title = "Direct link to References" > < / a > < / h2 >
< ol >
< li > Neal Wu.
< a href = "https://codeforces.com/blog/entry/62393" target = "_blank" rel = "noopener noreferrer" > Blowing up < code > unordered_map< / code > , and how to stop getting hacked on it< / a > .< / li >
2024-01-03 15:14:53 +01:00
< / ol > < / div > < footer class = "theme-doc-footer docusaurus-mt-lg" > < div class = "theme-doc-footer-tags-row row margin-bottom--sm" > < div class = "col" > < b > Tags:< / b > < ul class = "tags_jXut padding--none margin-left--sm" > < li class = "tag_QGVx" > < a class = "tag_zVej tagRegular_sFm0" href = "/algorithms/tags/cpp/" > cpp< / a > < / li > < li class = "tag_QGVx" > < a class = "tag_zVej tagRegular_sFm0" href = "/algorithms/tags/python/" > python< / a > < / li > < li class = "tag_QGVx" > < a class = "tag_zVej tagRegular_sFm0" href = "/algorithms/tags/hash-tables/" > hash-tables< / a > < / li > < / ul > < / div > < / div > < div class = "theme-doc-footer-edit-meta-row row" > < div class = "col" > < a href = "https://github.com/mfocko/blog/tree/main/algorithms/12-hash-tables/2023-11-28-breaking/02-mitigations.md" target = "_blank" rel = "noopener noreferrer" class = "theme-edit-this-page" > < svg fill = "currentColor" height = "20" width = "20" viewBox = "0 0 40 40" class = "iconEdit_Z9Sw" aria-hidden = "true" > < g > < path d = "m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z" > < / path > < / g > < / svg > Edit this page< / a > < / div > < div class = "col lastUpdated_vwxv" > < span class = "theme-last-updated" > Last updated<!-- --> on < b > < time datetime = "2023-11-28T00:00:00.000Z" > Nov 28, 2023< / time > < / b > < / span > < / div > < / div > < / footer > < / article > < nav class = "pagination-nav docusaurus-mt-lg" aria-label = "Docs pages" > < a class = "pagination-nav__link pagination-nav__link--prev" href = "/algorithms/hash-tables/breaking/python/" > < div class = "pagination-nav__sublabel" > Previous< / div > < div class = "pagination-nav__label" > Breaking Python< / div > < / a > < / nav > < / div > < / div > < div class = "col col--3" > < div class = "tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop" > < ul class = "table-of-contents table-of-contents__left-border" > < li > < a href = "#random-seed" class = "table-of-contents__link toc-highlight" > Random seed< / a > < / li > < li > < a href = "#better-random-seed" class = "table-of-contents__link toc-highlight" > Better random seed< / a > < / li > < li > < a href = "#adjusting-the-hash-function" class = "table-of-contents__link toc-highlight" > Adjusting the hash function< / a > < / li > < li > < a href = "#combining-both" class = "table-of-contents__link toc-highlight" > Combining both< / a > < / li > < li > < a href = "#fallback-for-extreme-cases" class = "table-of-contents__link toc-highlight" > Fallback for extreme cases< / a > < / li > < li > < a href = "#references" class = "table-of-contents__link toc-highlight" > References< / a > < / li > < / ul > < / div > < / div > < / div > < / div > < / main > < / div > < / div > < / div > < footer class = "footer footer--dark" > < div class = "container container-fluid" > < div class = "row footer__links" > < div class = "col footer__col" > < div class = "footer__title" > Git< / div > < ul class = "footer__items clean-list" > < li class = "footer__item" > < a href = "https://github.com/mfocko" target = "_blank" rel = "noopener noreferrer" class = "footer__link-item" > GitHub< svg width = "13.5" height = "13.5" aria-hidden = "true" viewBox = "0 0 24 24" class = "iconExternalLink_nPIU" > < path fill = "currentColor" d = "M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z" > < / path > < / svg > < / a > < / li > < li class = "footer__item" > < a href = "https://gitlab.com/mfocko" target = "_blank" rel = "noopener noreferrer" class = "footer__link-item" > GitLab< svg width = "13.5" height = "13.5" aria-hidden = "true" viewBox = "0 0 24 24" class = "iconExternalLink_nPIU" > < path fill = "currentColor" d = "M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z" > < / path > < / svg > < / a > < / li > < li class = "footer__item" > < a href = "https://git.mfocko.xyz/mfocko" target = "_blank" rel = "noopener noreferrer" class = "footer__link-item" > Gitea (self-hosted)< svg width = "13.5" height = "13.5" aria-hidden = "true" viewBox = "0 0 24 24" class = "iconExternalLink_nPIU" > < path fill = "currentColor" d = "M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z" > < / path > < / svg > < / a > < / li > < / ul > < / div > < div class = "col footer__col" > < div class = "footer__title" > Social #1< / div > < ul class = "footer__items clean-list" > < li class = "footer__item" > < a href = "https://www.linkedin.com/in/mfocko/" target = "_blank" rel = "noopener noreferrer"
2023-11-28 19:40:59 +01:00
< / body >
< / html >