blog/assets/js/15966941.58ddb6d9.js
github-actions[bot] 0b9bf3d392 deploy: dadb0d51f7
2023-11-28 18:40:59 +00:00

1 line
No EOL
10 KiB
JavaScript

"use strict";(self.webpackChunkfi=self.webpackChunkfi||[]).push([[8326],{16721:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>r,contentTitle:()=>o,default:()=>d,frontMatter:()=>a,metadata:()=>h,toc:()=>l});var s=n(85893),i=n(11151);const a={id:"mitigations",slug:"/hash-tables/breaking/mitigations",title:"Possible Mitigations",description:"Talking about the ways how to prevent the attacks on the hash table.\n",tags:["cpp","python","hash-tables"],last_update:{date:new Date("2023-11-28T00:00:00.000Z")}},o=void 0,h={id:"hash-tables/2023-11-28-breaking/mitigations",title:"Possible Mitigations",description:"Talking about the ways how to prevent the attacks on the hash table.\n",source:"@site/algorithms/12-hash-tables/2023-11-28-breaking/02-mitigations.md",sourceDirName:"12-hash-tables/2023-11-28-breaking",slug:"/hash-tables/breaking/mitigations",permalink:"/algorithms/hash-tables/breaking/mitigations",draft:!1,unlisted:!1,editUrl:"https://github.com/mfocko/blog/tree/main/algorithms/12-hash-tables/2023-11-28-breaking/02-mitigations.md",tags:[{label:"cpp",permalink:"/algorithms/tags/cpp"},{label:"python",permalink:"/algorithms/tags/python"},{label:"hash-tables",permalink:"/algorithms/tags/hash-tables"}],version:"current",lastUpdatedAt:1701129600,formattedLastUpdatedAt:"Nov 28, 2023",sidebarPosition:2,frontMatter:{id:"mitigations",slug:"/hash-tables/breaking/mitigations",title:"Possible Mitigations",description:"Talking about the ways how to prevent the attacks on the hash table.\n",tags:["cpp","python","hash-tables"],last_update:{date:"2023-11-28T00:00:00.000Z"}},sidebar:"autogeneratedBar",previous:{title:"Breaking Python",permalink:"/algorithms/hash-tables/breaking/python"}},r={},l=[{value:"Random seed",id:"random-seed",level:2},{value:"Better random seed",id:"better-random-seed",level:2},{value:"Adjusting the hash function",id:"adjusting-the-hash-function",level:2},{value:"Combining both",id:"combining-both",level:2},{value:"Fallback for extreme cases",id:"fallback-for-extreme-cases",level:2},{value:"References",id:"references",level:2}];function c(e){const t={a:"a",admonition:"admonition",code:"code",em:"em",h2:"h2",hr:"hr",li:"li",ol:"ol",p:"p",pre:"pre",strong:"strong",...(0,i.a)(),...e.components};return(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(t.p,{children:"There are multiple ways the issues created above can be mitigated. Still we can\nonly make it better, we cannot guarantee the ideal time complexity\u2026"}),"\n",(0,s.jsxs)(t.p,{children:["For the sake of simplicity (and referencing an article by ",(0,s.jsx)(t.em,{children:"Neal Wu"})," on the same\ntopic; in references below) I will use the C++ to describe the mitigations."]}),"\n",(0,s.jsx)(t.h2,{id:"random-seed",children:"Random seed"}),"\n",(0,s.jsxs)(t.p,{children:["One of the options how to avoid this kind of an attack is to introduce a random\nseed to the hash. That way it is not that easy to choose the ",(0,s.jsx)(t.em,{children:"nasty"})," numbers."]}),"\n",(0,s.jsx)(t.pre,{children:(0,s.jsx)(t.code,{className:"language-cpp",children:"struct custom_hash {\n size_t operator()(uint64_t x) const {\n return x + 7529;\n }\n};\n"})}),"\n",(0,s.jsx)(t.p,{children:"As you may have noticed, this is not very helpful, since it just shifts the\nissue by some number. Better option is to use a shift from random number\ngenerator:"}),"\n",(0,s.jsx)(t.pre,{children:(0,s.jsx)(t.code,{className:"language-cpp",children:"struct custom_hash {\n size_t operator()(uint64_t x) const {\n static const uint64_t FIXED_RANDOM =\n chrono::steady_clock::now().time_since_epoch().count();\n return x + FIXED_RANDOM;\n }\n};\n"})}),"\n",(0,s.jsx)(t.p,{children:"In this case the hash is using a high-precision clock to shift the number, which\nis much harder to break."}),"\n",(0,s.jsx)(t.h2,{id:"better-random-seed",children:"Better random seed"}),"\n",(0,s.jsxs)(t.p,{children:["Building on the previous solution, we can do some ",(0,s.jsx)(t.em,{children:"bit magic"})," instead of the\nshifting:"]}),"\n",(0,s.jsx)(t.pre,{children:(0,s.jsx)(t.code,{className:"language-cpp",children:"struct custom_hash {\n size_t operator()(uint64_t x) const {\n static const uint64_t FIXED_RANDOM =\n chrono::steady_clock::now().time_since_epoch().count();\n x ^= FIXED_RANDOM;\n return x ^ (x >> 16);\n }\n};\n"})}),"\n",(0,s.jsxs)(t.p,{children:["This not only shifts the number, it also manipulates the underlying bits of the\nhash. In this case we're also applying the ",(0,s.jsx)(t.code,{children:"XOR"})," operation."]}),"\n",(0,s.jsx)(t.h2,{id:"adjusting-the-hash-function",children:"Adjusting the hash function"}),"\n",(0,s.jsx)(t.p,{children:"Another option is to switch up the hash function."}),"\n",(0,s.jsxs)(t.p,{children:["For example Rust uses ",(0,s.jsx)(t.a,{href:"https://en.wikipedia.org/wiki/SipHash",children:(0,s.jsx)(t.em,{children:"SipHash"})})," by\ndefault."]}),"\n",(0,s.jsxs)(t.p,{children:["On the other hand, you can usually specify your own hash function, here we will\nfollow the article by ",(0,s.jsx)(t.em,{children:"Neal"})," that uses so-called ",(0,s.jsx)(t.em,{children:(0,s.jsx)(t.code,{children:"splitmix64"})}),"."]}),"\n",(0,s.jsx)(t.pre,{children:(0,s.jsx)(t.code,{className:"language-cpp",children:"static uint64_t splitmix64(uint64_t x) {\n // http://xorshift.di.unimi.it/splitmix64.c\n x += 0x9e3779b97f4a7c15;\n x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9;\n x = (x ^ (x >> 27)) * 0x94d049bb133111eb;\n return x ^ (x >> 31);\n}\n"})}),"\n",(0,s.jsxs)(t.p,{children:["As you can see, this definitely doesn't do identity on the integers ","\ud83d\ude04"]}),"\n",(0,s.jsxs)(t.p,{children:["Another example would be\n",(0,s.jsx)(t.a,{href:"https://github.com/openjdk/jdk/blob/dc256fbc6490f8163adb286dbb7380c10e5e1e06/src/java.base/share/classes/java/util/HashMap.java#L320-L339",children:(0,s.jsx)(t.code,{children:"HashMap::hash()"})}),"\nfunction in Java:"]}),"\n",(0,s.jsx)(t.pre,{children:(0,s.jsx)(t.code,{className:"language-java",children:"/**\n * Computes key.hashCode() and spreads (XORs) higher bits of hash\n * to lower. Because the table uses power-of-two masking, sets of\n * hashes that vary only in bits above the current mask will\n * always collide. (Among known examples are sets of Float keys\n * holding consecutive whole numbers in small tables.) So we\n * apply a transform that spreads the impact of higher bits\n * downward. There is a tradeoff between speed, utility, and\n * quality of bit-spreading. Because many common sets of hashes\n * are already reasonably distributed (so don't benefit from\n * spreading), and because we use trees to handle large sets of\n * collisions in bins, we just XOR some shifted bits in the\n * cheapest possible way to reduce systematic lossage, as well as\n * to incorporate impact of the highest bits that would otherwise\n * never be used in index calculations because of table bounds.\n */\nstatic final int hash(Object key) {\n int h;\n return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);\n}\n"})}),"\n",(0,s.jsxs)(t.p,{children:["You can notice that they try to include the upper bits of the hash by using\n",(0,s.jsx)(t.code,{children:"XOR"}),", this would render our attack in the previous part helpless."]}),"\n",(0,s.jsx)(t.h2,{id:"combining-both",children:"Combining both"}),"\n",(0,s.jsxs)(t.p,{children:["Can we make it better? Of course! Use multiple mitigations at the same time. In\nour case, we will both inject the random value ",(0,s.jsx)(t.strong,{children:"and"})," use the ",(0,s.jsx)(t.em,{children:(0,s.jsx)(t.code,{children:"splitmix64"})}),":"]}),"\n",(0,s.jsx)(t.pre,{children:(0,s.jsx)(t.code,{className:"language-cpp",children:"struct custom_hash {\n static uint64_t splitmix64(uint64_t x) {\n // http://xorshift.di.unimi.it/splitmix64.c\n x += 0x9e3779b97f4a7c15;\n x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9;\n x = (x ^ (x >> 27)) * 0x94d049bb133111eb;\n return x ^ (x >> 31);\n }\n\n size_t operator()(uint64_t x) const {\n static const uint64_t FIXED_RANDOM =\n chrono::steady_clock::now().time_since_epoch().count();\n return splitmix64(x + FIXED_RANDOM);\n }\n};\n"})}),"\n",(0,s.jsx)(t.h2,{id:"fallback-for-extreme-cases",children:"Fallback for extreme cases"}),"\n",(0,s.jsxs)(t.p,{children:["As we have mentioned above, Python resolves the conflicts by probing (it looks\nfor empty space somewhere else in the table, but it's deterministic about it, so\nit's not \u201c",(0,s.jsx)(t.em,{children:"oops, this is full, let's go one-by-one and find some spot"}),"\u201d). In the\ncase of C++ and Java, they resolve the conflicts by linked lists, as is the\nusual text-book depiction of the hash table."]}),"\n",(0,s.jsx)(t.p,{children:"However Java does something more intelligent. Once you go over the threshold of\nconflicts in one spot, it converts the linked list to an RB-tree that is sorted\nby the hash and key respectively."}),"\n",(0,s.jsx)(t.admonition,{type:"tip",children:(0,s.jsx)(t.p,{children:"You may wonder what sense does it make to define an ordering on the tree by the\nhash, if we're dealing with conflicts. Well, there are less buckets than the\nrange of the hash, so if we take lower bits, we can have a conflict even though\nthe hashes are not the same."})}),"\n",(0,s.jsxs)(t.p,{children:["You might have noticed that if we get a ",(0,s.jsx)(t.strong,{children:"really bad"})," hashing function, this is\nnot very helpful. It is not, ",(0,s.jsx)(t.strong,{children:"but"})," it can help in other cases."]}),"\n",(0,s.jsx)(t.admonition,{type:"danger",children:(0,s.jsx)(t.p,{children:"As the ordering on the keys of the hash table is not required and may not be\nimplemented, the tree may be ordered by just the hash."})}),"\n",(0,s.jsx)(t.hr,{}),"\n",(0,s.jsx)(t.h2,{id:"references",children:"References"}),"\n",(0,s.jsxs)(t.ol,{children:["\n",(0,s.jsxs)(t.li,{children:["Neal Wu.\n",(0,s.jsxs)(t.a,{href:"https://codeforces.com/blog/entry/62393",children:["Blowing up ",(0,s.jsx)(t.code,{children:"unordered_map"}),", and how to stop getting hacked on it"]}),"."]}),"\n"]})]})}function d(e={}){const{wrapper:t}={...(0,i.a)(),...e.components};return t?(0,s.jsx)(t,{...e,children:(0,s.jsx)(c,{...e})}):c(e)}},11151:(e,t,n)=>{n.d(t,{Z:()=>h,a:()=>o});var s=n(67294);const i={},a=s.createContext(i);function o(e){const t=s.useContext(a);return s.useMemo((function(){return"function"==typeof e?e(t):{...t,...e}}),[t,e])}function h(e){let t;return t=e.disableParentContext?"function"==typeof e.components?e.components(i):e.components||i:o(e.components),s.createElement(a.Provider,{value:t},e.children)}}}]);