Skip to content

Commit d07523c

Browse files
ICH: Use 128-bit Blake2b hash instead of 64-bit SipHash for incr. comp. fingerprints.
1 parent 40cd1fd commit d07523c

File tree

14 files changed

+460
-38
lines changed

14 files changed

+460
-38
lines changed
+286
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
12+
// An implementation of the Blake2b cryptographic hash function.
13+
// The implementation closely follows: https://tools.ietf.org/html/rfc7693
14+
//
15+
// "BLAKE2 is a cryptographic hash function faster than MD5, SHA-1, SHA-2, and
16+
// SHA-3, yet is at least as secure as the latest standard SHA-3."
17+
// according to their own website :)
18+
//
19+
// Indeed this implementation is two to three times as fast as our SHA-256
20+
// implementation. If you have the luxury of being able to use crates from
21+
// crates.io, you can go there and find still faster implementations.
22+
23+
pub struct Blake2bCtx {
24+
b: [u8; 128],
25+
h: [u64; 8],
26+
t: [u64; 2],
27+
c: usize,
28+
outlen: usize,
29+
}
30+
31+
impl ::std::fmt::Debug for Blake2bCtx {
32+
fn fmt(&self, fmt: &mut ::std::fmt::Formatter) -> Result<(), ::std::fmt::Error> {
33+
write!(fmt, "{:?}", self.h)
34+
}
35+
}
36+
37+
#[inline(always)]
38+
fn b2b_g(v: &mut [u64; 16],
39+
a: usize,
40+
b: usize,
41+
c: usize,
42+
d: usize,
43+
x: u64,
44+
y: u64)
45+
{
46+
v[a] = v[a].wrapping_add(v[b]).wrapping_add(x);
47+
v[d] = (v[d] ^ v[a]).rotate_right(32);
48+
v[c] = v[c].wrapping_add(v[d]);
49+
v[b] = (v[b] ^ v[c]).rotate_right(24);
50+
v[a] = v[a].wrapping_add(v[b]).wrapping_add(y);
51+
v[d] = (v[d] ^ v[a]).rotate_right(16);
52+
v[c] = v[c].wrapping_add(v[d]);
53+
v[b] = (v[b] ^ v[c]).rotate_right(63);
54+
}
55+
56+
// Initialization vector
57+
const BLAKE2B_IV: [u64; 8] = [
58+
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
59+
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
60+
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
61+
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
62+
];
63+
64+
fn blake2b_compress(ctx: &mut Blake2bCtx, last: bool) {
65+
66+
const SIGMA: [[usize; 16]; 12] = [
67+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ],
68+
[14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 ],
69+
[11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 ],
70+
[7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 ],
71+
[9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 ],
72+
[2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 ],
73+
[12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 ],
74+
[13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 ],
75+
[6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 ],
76+
[10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 ],
77+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ],
78+
[14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 ]
79+
];
80+
81+
let mut v: [u64; 16] = [
82+
ctx.h[0],
83+
ctx.h[1],
84+
ctx.h[2],
85+
ctx.h[3],
86+
ctx.h[4],
87+
ctx.h[5],
88+
ctx.h[6],
89+
ctx.h[7],
90+
91+
BLAKE2B_IV[0],
92+
BLAKE2B_IV[1],
93+
BLAKE2B_IV[2],
94+
BLAKE2B_IV[3],
95+
BLAKE2B_IV[4],
96+
BLAKE2B_IV[5],
97+
BLAKE2B_IV[6],
98+
BLAKE2B_IV[7],
99+
];
100+
101+
v[12] ^= ctx.t[0]; // low 64 bits of offset
102+
v[13] ^= ctx.t[1]; // high 64 bits
103+
if last {
104+
v[14] = !v[14];
105+
}
106+
107+
{
108+
// Re-interpret the input buffer in the state as u64s
109+
let m: &mut [u64; 16] = unsafe {
110+
let b: &mut [u8; 128] = &mut ctx.b;
111+
::std::mem::transmute(b)
112+
};
113+
114+
// It's OK to modify the buffer in place since this is the last time
115+
// this data will be accessed before it's overwritten
116+
if cfg!(target_endian = "big") {
117+
for word in &mut m[..] {
118+
*word = word.to_be();
119+
}
120+
}
121+
122+
for i in 0 .. 12 {
123+
b2b_g(&mut v, 0, 4, 8, 12, m[SIGMA[i][ 0]], m[SIGMA[i][ 1]]);
124+
b2b_g(&mut v, 1, 5, 9, 13, m[SIGMA[i][ 2]], m[SIGMA[i][ 3]]);
125+
b2b_g(&mut v, 2, 6, 10, 14, m[SIGMA[i][ 4]], m[SIGMA[i][ 5]]);
126+
b2b_g(&mut v, 3, 7, 11, 15, m[SIGMA[i][ 6]], m[SIGMA[i][ 7]]);
127+
b2b_g(&mut v, 0, 5, 10, 15, m[SIGMA[i][ 8]], m[SIGMA[i][ 9]]);
128+
b2b_g(&mut v, 1, 6, 11, 12, m[SIGMA[i][10]], m[SIGMA[i][11]]);
129+
b2b_g(&mut v, 2, 7, 8, 13, m[SIGMA[i][12]], m[SIGMA[i][13]]);
130+
b2b_g(&mut v, 3, 4, 9, 14, m[SIGMA[i][14]], m[SIGMA[i][15]]);
131+
}
132+
}
133+
134+
for i in 0 .. 8 {
135+
ctx.h[i] ^= v[i] ^ v[i + 8];
136+
}
137+
}
138+
139+
pub fn blake2b_new(outlen: usize, key: &[u8]) -> Blake2bCtx {
140+
assert!(outlen > 0 && outlen <= 64 && key.len() <= 64);
141+
142+
let mut ctx = Blake2bCtx {
143+
b: [0; 128],
144+
h: BLAKE2B_IV,
145+
t: [0; 2],
146+
c: 0,
147+
outlen: outlen,
148+
};
149+
150+
ctx.h[0] ^= 0x01010000 ^ ((key.len() << 8) as u64) ^ (outlen as u64);
151+
152+
if key.len() > 0 {
153+
blake2b_update(&mut ctx, key);
154+
ctx.c = ctx.b.len();
155+
}
156+
157+
ctx
158+
}
159+
160+
pub fn blake2b_update(ctx: &mut Blake2bCtx, mut data: &[u8])
161+
{
162+
let mut bytes_to_copy = data.len();
163+
let mut space_in_buffer = ctx.b.len() - ctx.c;
164+
165+
while bytes_to_copy > space_in_buffer {
166+
checked_mem_copy(data, &mut ctx.b[ctx.c .. ], space_in_buffer);
167+
168+
ctx.t[0] = ctx.t[0].wrapping_add(ctx.b.len() as u64);
169+
if ctx.t[0] < (ctx.b.len() as u64) {
170+
ctx.t[1] += 1;
171+
}
172+
blake2b_compress(ctx, false);
173+
ctx.c = 0;
174+
175+
data = &data[space_in_buffer .. ];
176+
bytes_to_copy -= space_in_buffer;
177+
space_in_buffer = ctx.b.len();
178+
}
179+
180+
if bytes_to_copy > 0 {
181+
checked_mem_copy(data, &mut ctx.b[ctx.c .. ], bytes_to_copy);
182+
ctx.c += bytes_to_copy;
183+
}
184+
}
185+
186+
pub fn blake2b_final(mut ctx: Blake2bCtx, out: &mut [u8])
187+
{
188+
ctx.t[0] = ctx.t[0].wrapping_add(ctx.c as u64);
189+
if ctx.t[0] < ctx.c as u64 {
190+
ctx.t[1] += 1;
191+
}
192+
193+
while ctx.c < 128 {
194+
ctx.b[ctx.c] = 0;
195+
ctx.c += 1;
196+
}
197+
198+
blake2b_compress(&mut ctx, true);
199+
200+
if cfg!(target_endian = "big") {
201+
// Make sure that the data is in memory in little endian format, as is
202+
// demanded by BLAKE2
203+
for word in &mut ctx.h {
204+
*word = word.to_le();
205+
}
206+
}
207+
208+
checked_mem_copy(&ctx.h, out, ctx.outlen);
209+
}
210+
211+
#[inline(always)]
212+
fn checked_mem_copy<T1, T2>(from: &[T1], to: &mut [T2], byte_count: usize) {
213+
let from_size = from.len() * ::std::mem::size_of::<T1>();
214+
let to_size = to.len() * ::std::mem::size_of::<T2>();
215+
assert!(from_size >= byte_count);
216+
assert!(to_size >= byte_count);
217+
let from_byte_ptr = from.as_ptr() as * const u8;
218+
let to_byte_ptr = to.as_mut_ptr() as * mut u8;
219+
unsafe {
220+
::std::ptr::copy_nonoverlapping(from_byte_ptr, to_byte_ptr, byte_count);
221+
}
222+
}
223+
224+
pub fn blake2b(out: &mut [u8], key: &[u8], data: &[u8])
225+
{
226+
let mut ctx = blake2b_new(out.len(), key);
227+
blake2b_update(&mut ctx, data);
228+
blake2b_final(ctx, out);
229+
}
230+
231+
#[cfg(test)]
232+
fn selftest_seq(out: &mut [u8], seed: u32)
233+
{
234+
let mut a: u32 = 0xDEAD4BADu32.wrapping_mul(seed);
235+
let mut b: u32 = 1;
236+
237+
for i in 0 .. out.len() {
238+
let t: u32 = a.wrapping_add(b);
239+
a = b;
240+
b = t;
241+
out[i] = ((t >> 24) & 0xFF) as u8;
242+
}
243+
}
244+
245+
#[test]
246+
fn blake2b_selftest()
247+
{
248+
// grand hash of hash results
249+
const BLAKE2B_RES: [u8; 32] = [
250+
0xC2, 0x3A, 0x78, 0x00, 0xD9, 0x81, 0x23, 0xBD,
251+
0x10, 0xF5, 0x06, 0xC6, 0x1E, 0x29, 0xDA, 0x56,
252+
0x03, 0xD7, 0x63, 0xB8, 0xBB, 0xAD, 0x2E, 0x73,
253+
0x7F, 0x5E, 0x76, 0x5A, 0x7B, 0xCC, 0xD4, 0x75
254+
];
255+
256+
// parameter sets
257+
const B2B_MD_LEN: [usize; 4] = [20, 32, 48, 64];
258+
const B2B_IN_LEN: [usize; 6] = [0, 3, 128, 129, 255, 1024];
259+
260+
let mut data = [0u8; 1024];
261+
let mut md = [0u8; 64];
262+
let mut key = [0u8; 64];
263+
264+
let mut ctx = blake2b_new(32, &[]);
265+
266+
for i in 0 .. 4 {
267+
let outlen = B2B_MD_LEN[i];
268+
for j in 0 .. 6 {
269+
let inlen = B2B_IN_LEN[j];
270+
271+
selftest_seq(&mut data[.. inlen], inlen as u32); // unkeyed hash
272+
blake2b(&mut md[.. outlen], &[], &data[.. inlen]);
273+
blake2b_update(&mut ctx, &md[.. outlen]); // hash the hash
274+
275+
selftest_seq(&mut key[0 .. outlen], outlen as u32); // keyed hash
276+
blake2b(&mut md[.. outlen], &key[.. outlen], &data[.. inlen]);
277+
blake2b_update(&mut ctx, &md[.. outlen]); // hash the hash
278+
}
279+
}
280+
281+
// compute and compare the hash of hashes
282+
blake2b_final(ctx, &mut md[..]);
283+
for i in 0 .. 32 {
284+
assert_eq!(md[i], BLAKE2B_RES[i]);
285+
}
286+
}

src/librustc_data_structures/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ extern crate serialize as rustc_serialize; // used by deriving
4242
extern crate libc;
4343

4444
pub mod bitslice;
45+
pub mod blake2b;
4546
pub mod bitvec;
4647
pub mod graph;
4748
pub mod ivar;

src/librustc_incremental/calculate_svh/hasher.rs

+34-8
Original file line numberDiff line numberDiff line change
@@ -9,38 +9,64 @@
99
// except according to those terms.
1010

1111
use std::hash::Hasher;
12-
use std::collections::hash_map::DefaultHasher;
12+
use std::mem;
13+
use rustc_data_structures::blake2b;
14+
use ich::Fingerprint;
1315

1416
#[derive(Debug)]
1517
pub struct IchHasher {
16-
// FIXME: this should use SHA1, not DefaultHasher. DefaultHasher is not
17-
// built to avoid collisions.
18-
state: DefaultHasher,
18+
state: blake2b::Blake2bCtx,
1919
bytes_hashed: u64,
2020
}
2121

2222
impl IchHasher {
2323
pub fn new() -> IchHasher {
2424
IchHasher {
25-
state: DefaultHasher::new(),
25+
state: blake2b::blake2b_new(mem::size_of::<Fingerprint>(), &[]),
2626
bytes_hashed: 0
2727
}
2828
}
2929

3030
pub fn bytes_hashed(&self) -> u64 {
3131
self.bytes_hashed
3232
}
33+
34+
pub fn finish(self) -> Fingerprint {
35+
let mut fingerprint = Fingerprint::zero();
36+
blake2b::blake2b_final(self.state, &mut fingerprint.0);
37+
fingerprint
38+
}
3339
}
3440

3541
impl Hasher for IchHasher {
36-
#[inline]
3742
fn finish(&self) -> u64 {
38-
self.state.finish()
43+
bug!("Use other finish() implementation to get the full 128-bit hash.");
3944
}
4045

4146
#[inline]
4247
fn write(&mut self, bytes: &[u8]) {
43-
self.state.write(bytes);
48+
blake2b::blake2b_update(&mut self.state, bytes);
4449
self.bytes_hashed += bytes.len() as u64;
4550
}
51+
52+
#[inline]
53+
fn write_u16(&mut self, i: u16) {
54+
self.write(&unsafe { mem::transmute::<_, [u8; 2]>(i.to_le()) })
55+
}
56+
57+
#[inline]
58+
fn write_u32(&mut self, i: u32) {
59+
self.write(&unsafe { mem::transmute::<_, [u8; 4]>(i.to_le()) })
60+
}
61+
62+
#[inline]
63+
fn write_u64(&mut self, i: u64) {
64+
self.write(&unsafe { mem::transmute::<_, [u8; 8]>(i.to_le()) })
65+
}
66+
67+
#[inline]
68+
fn write_usize(&mut self, i: usize) {
69+
// always hash as u64, so we don't depend on the size of `usize`
70+
self.write_u64(i as u64);
71+
}
4672
}

0 commit comments

Comments
 (0)