1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package maphash provides hash functions on byte sequences. 6 // These hash functions are intended to be used to implement hash tables or 7 // other data structures that need to map arbitrary strings or byte 8 // sequences to a uniform distribution on unsigned 64-bit integers. 9 // Each different instance of a hash table or data structure should use its own [Seed]. 10 // 11 // The hash functions are not cryptographically secure. 12 // (See crypto/sha256 and crypto/sha512 for cryptographic use.) 13 package maphash 14 15 // A Seed is a random value that selects the specific hash function 16 // computed by a [Hash]. If two Hashes use the same Seeds, they 17 // will compute the same hash values for any given input. 18 // If two Hashes use different Seeds, they are very likely to compute 19 // distinct hash values for any given input. 20 // 21 // A Seed must be initialized by calling [MakeSeed]. 22 // The zero seed is uninitialized and not valid for use with [Hash]'s SetSeed method. 23 // 24 // Each Seed value is local to a single process and cannot be serialized 25 // or otherwise recreated in a different process. 26 type Seed struct { 27 s uint64 28 } 29 30 // Bytes returns the hash of b with the given seed. 31 // 32 // Bytes is equivalent to, but more convenient and efficient than: 33 // 34 // var h Hash 35 // h.SetSeed(seed) 36 // h.Write(b) 37 // return h.Sum64() 38 func Bytes(seed Seed, b []byte) uint64 { 39 state := seed.s 40 if state == 0 { 41 panic("maphash: use of uninitialized Seed") 42 } 43 44 if len(b) > bufSize { 45 b = b[:len(b):len(b)] // merge len and cap calculations when reslicing 46 for len(b) > bufSize { 47 state = rthash(b[:bufSize], state) 48 b = b[bufSize:] 49 } 50 } 51 return rthash(b, state) 52 } 53 54 // String returns the hash of s with the given seed. 55 // 56 // String is equivalent to, but more convenient and efficient than: 57 // 58 // var h Hash 59 // h.SetSeed(seed) 60 // h.WriteString(s) 61 // return h.Sum64() 62 func String(seed Seed, s string) uint64 { 63 state := seed.s 64 if state == 0 { 65 panic("maphash: use of uninitialized Seed") 66 } 67 for len(s) > bufSize { 68 state = rthashString(s[:bufSize], state) 69 s = s[bufSize:] 70 } 71 return rthashString(s, state) 72 } 73 74 // A Hash computes a seeded hash of a byte sequence. 75 // 76 // The zero Hash is a valid Hash ready to use. 77 // A zero Hash chooses a random seed for itself during 78 // the first call to a Reset, Write, Seed, or Sum64 method. 79 // For control over the seed, use SetSeed. 80 // 81 // The computed hash values depend only on the initial seed and 82 // the sequence of bytes provided to the Hash object, not on the way 83 // in which the bytes are provided. For example, the three sequences 84 // 85 // h.Write([]byte{'f','o','o'}) 86 // h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o') 87 // h.WriteString("foo") 88 // 89 // all have the same effect. 90 // 91 // Hashes are intended to be collision-resistant, even for situations 92 // where an adversary controls the byte sequences being hashed. 93 // 94 // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is. 95 // If multiple goroutines must compute the same seeded hash, 96 // each can declare its own Hash and call SetSeed with a common Seed. 97 type Hash struct { 98 _ [0]func() // not comparable 99 seed Seed // initial seed used for this hash 100 state Seed // current hash of all flushed bytes 101 buf [bufSize]byte // unflushed byte buffer 102 n int // number of unflushed bytes 103 } 104 105 // bufSize is the size of the Hash write buffer. 106 // The buffer ensures that writes depend only on the sequence of bytes, 107 // not the sequence of WriteByte/Write/WriteString calls, 108 // by always calling rthash with a full buffer (except for the tail). 109 const bufSize = 128 110 111 // initSeed seeds the hash if necessary. 112 // initSeed is called lazily before any operation that actually uses h.seed/h.state. 113 // Note that this does not include Write/WriteByte/WriteString in the case 114 // where they only add to h.buf. (If they write too much, they call h.flush, 115 // which does call h.initSeed.) 116 func (h *Hash) initSeed() { 117 if h.seed.s == 0 { 118 seed := MakeSeed() 119 h.seed = seed 120 h.state = seed 121 } 122 } 123 124 // WriteByte adds b to the sequence of bytes hashed by h. 125 // It never fails; the error result is for implementing [io.ByteWriter]. 126 func (h *Hash) WriteByte(b byte) error { 127 if h.n == len(h.buf) { 128 h.flush() 129 } 130 h.buf[h.n] = b 131 h.n++ 132 return nil 133 } 134 135 // Write adds b to the sequence of bytes hashed by h. 136 // It always writes all of b and never fails; the count and error result are for implementing [io.Writer]. 137 func (h *Hash) Write(b []byte) (int, error) { 138 size := len(b) 139 // Deal with bytes left over in h.buf. 140 // h.n <= bufSize is always true. 141 // Checking it is ~free and it lets the compiler eliminate a bounds check. 142 if h.n > 0 && h.n <= bufSize { 143 k := copy(h.buf[h.n:], b) 144 h.n += k 145 if h.n < bufSize { 146 // Copied the entirety of b to h.buf. 147 return size, nil 148 } 149 b = b[k:] 150 h.flush() 151 // No need to set h.n = 0 here; it happens just before exit. 152 } 153 // Process as many full buffers as possible, without copying, and calling initSeed only once. 154 if len(b) > bufSize { 155 h.initSeed() 156 for len(b) > bufSize { 157 h.state.s = rthash(b[:bufSize], h.state.s) 158 b = b[bufSize:] 159 } 160 } 161 // Copy the tail. 162 copy(h.buf[:], b) 163 h.n = len(b) 164 return size, nil 165 } 166 167 // WriteString adds the bytes of s to the sequence of bytes hashed by h. 168 // It always writes all of s and never fails; the count and error result are for implementing [io.StringWriter]. 169 func (h *Hash) WriteString(s string) (int, error) { 170 // WriteString mirrors Write. See Write for comments. 171 size := len(s) 172 if h.n > 0 && h.n <= bufSize { 173 k := copy(h.buf[h.n:], s) 174 h.n += k 175 if h.n < bufSize { 176 return size, nil 177 } 178 s = s[k:] 179 h.flush() 180 } 181 if len(s) > bufSize { 182 h.initSeed() 183 for len(s) > bufSize { 184 h.state.s = rthashString(s[:bufSize], h.state.s) 185 s = s[bufSize:] 186 } 187 } 188 copy(h.buf[:], s) 189 h.n = len(s) 190 return size, nil 191 } 192 193 // Seed returns h's seed value. 194 func (h *Hash) Seed() Seed { 195 h.initSeed() 196 return h.seed 197 } 198 199 // SetSeed sets h to use seed, which must have been returned by [MakeSeed] 200 // or by another [Hash.Seed] method. 201 // Two [Hash] objects with the same seed behave identically. 202 // Two [Hash] objects with different seeds will very likely behave differently. 203 // Any bytes added to h before this call will be discarded. 204 func (h *Hash) SetSeed(seed Seed) { 205 if seed.s == 0 { 206 panic("maphash: use of uninitialized Seed") 207 } 208 h.seed = seed 209 h.state = seed 210 h.n = 0 211 } 212 213 // Reset discards all bytes added to h. 214 // (The seed remains the same.) 215 func (h *Hash) Reset() { 216 h.initSeed() 217 h.state = h.seed 218 h.n = 0 219 } 220 221 // precondition: buffer is full. 222 func (h *Hash) flush() { 223 if h.n != len(h.buf) { 224 panic("maphash: flush of partially full buffer") 225 } 226 h.initSeed() 227 h.state.s = rthash(h.buf[:h.n], h.state.s) 228 h.n = 0 229 } 230 231 // Sum64 returns h's current 64-bit value, which depends on 232 // h's seed and the sequence of bytes added to h since the 233 // last call to [Hash.Reset] or [Hash.SetSeed]. 234 // 235 // All bits of the Sum64 result are close to uniformly and 236 // independently distributed, so it can be safely reduced 237 // by using bit masking, shifting, or modular arithmetic. 238 func (h *Hash) Sum64() uint64 { 239 h.initSeed() 240 return rthash(h.buf[:h.n], h.state.s) 241 } 242 243 // MakeSeed returns a new random seed. 244 func MakeSeed() Seed { 245 var s uint64 246 for { 247 s = randUint64() 248 // We use seed 0 to indicate an uninitialized seed/hash, 249 // so keep trying until we get a non-zero seed. 250 if s != 0 { 251 break 252 } 253 } 254 return Seed{s: s} 255 } 256 257 // Sum appends the hash's current 64-bit value to b. 258 // It exists for implementing [hash.Hash]. 259 // For direct calls, it is more efficient to use [Hash.Sum64]. 260 func (h *Hash) Sum(b []byte) []byte { 261 x := h.Sum64() 262 return append(b, 263 byte(x>>0), 264 byte(x>>8), 265 byte(x>>16), 266 byte(x>>24), 267 byte(x>>32), 268 byte(x>>40), 269 byte(x>>48), 270 byte(x>>56)) 271 } 272 273 // Size returns h's hash value size, 8 bytes. 274 func (h *Hash) Size() int { return 8 } 275 276 // BlockSize returns h's block size. 277 func (h *Hash) BlockSize() int { return len(h.buf) } 278