-
Notifications
You must be signed in to change notification settings - Fork 13.4k
Encode hashes as bytes, not varint #110083
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -1,4 +1,4 @@ | ||||||||||||
use crate::stable_hasher; | ||||||||||||
use crate::stable_hasher::{Hash64, StableHasher, StableHasherResult}; | ||||||||||||
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; | ||||||||||||
use std::hash::{Hash, Hasher}; | ||||||||||||
|
||||||||||||
|
@@ -9,32 +9,47 @@ mod tests; | |||||||||||
#[repr(C)] | ||||||||||||
pub struct Fingerprint(u64, u64); | ||||||||||||
|
||||||||||||
impl Fingerprint { | ||||||||||||
pub const ZERO: Fingerprint = Fingerprint(0, 0); | ||||||||||||
pub trait FingerprintComponent { | ||||||||||||
fn as_u64(&self) -> u64; | ||||||||||||
} | ||||||||||||
|
||||||||||||
#[inline] | ||||||||||||
pub fn new(_0: u64, _1: u64) -> Fingerprint { | ||||||||||||
Fingerprint(_0, _1) | ||||||||||||
impl FingerprintComponent for Hash64 { | ||||||||||||
fn as_u64(&self) -> u64 { | ||||||||||||
Hash64::as_u64(*self) | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
impl FingerprintComponent for u64 { | ||||||||||||
fn as_u64(&self) -> u64 { | ||||||||||||
saethlin marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
*self | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
impl Fingerprint { | ||||||||||||
pub const ZERO: Fingerprint = Fingerprint(0, 0); | ||||||||||||
|
||||||||||||
#[inline] | ||||||||||||
pub fn from_smaller_hash(hash: u64) -> Fingerprint { | ||||||||||||
Fingerprint(hash, hash) | ||||||||||||
pub fn new<A, B>(_0: A, _1: B) -> Fingerprint | ||||||||||||
where | ||||||||||||
A: FingerprintComponent, | ||||||||||||
B: FingerprintComponent, | ||||||||||||
{ | ||||||||||||
Fingerprint(_0.as_u64(), _1.as_u64()) | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not directly store 2 Hash64? Or directly a Hash128? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm trying to do some encapsulation here (it's not really working, probably needs follow-up PRs to improve it). I added some documentation to Users of rust/compiler/rustc_middle/src/dep_graph/dep_node.rs Lines 357 to 361 in fd57c6b
After looking through this a lot I think the So
That seems like 3 different types to me. |
||||||||||||
} | ||||||||||||
|
||||||||||||
#[inline] | ||||||||||||
pub fn to_smaller_hash(&self) -> u64 { | ||||||||||||
pub fn to_smaller_hash(&self) -> Hash64 { | ||||||||||||
// Even though both halves of the fingerprint are expected to be good | ||||||||||||
// quality hash values, let's still combine the two values because the | ||||||||||||
// Fingerprints in DefPathHash have the StableCrateId portion which is | ||||||||||||
// the same for all DefPathHashes from the same crate. Combining the | ||||||||||||
// two halves makes sure we get a good quality hash in such cases too. | ||||||||||||
self.0.wrapping_mul(3).wrapping_add(self.1) | ||||||||||||
Hash64::new(self.0.wrapping_mul(3).wrapping_add(self.1)) | ||||||||||||
} | ||||||||||||
|
||||||||||||
#[inline] | ||||||||||||
pub fn as_value(&self) -> (u64, u64) { | ||||||||||||
(self.0, self.1) | ||||||||||||
pub fn split(&self) -> (Hash64, Hash64) { | ||||||||||||
(Hash64::new(self.0), Hash64::new(self.1)) | ||||||||||||
} | ||||||||||||
|
||||||||||||
#[inline] | ||||||||||||
|
@@ -131,9 +146,9 @@ impl FingerprintHasher for crate::unhash::Unhasher { | |||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
impl stable_hasher::StableHasherResult for Fingerprint { | ||||||||||||
impl StableHasherResult for Fingerprint { | ||||||||||||
#[inline] | ||||||||||||
fn finish(hasher: stable_hasher::StableHasher) -> Self { | ||||||||||||
fn finish(hasher: StableHasher) -> Self { | ||||||||||||
let (_0, _1) = hasher.finalize(); | ||||||||||||
Fingerprint(_0, _1) | ||||||||||||
} | ||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
//! rustc encodes a lot of hashes. If hashes are stored as `u64` or `u128`, a `derive(Encodable)` | ||
//! will apply varint encoding to the hashes, which is less efficient than directly encoding the 8 | ||
//! or 16 bytes of the hash. | ||
//! | ||
//! The types in this module represent 64-bit or 128-bit hashes produced by a `StableHasher`. | ||
//! `Hash64` and `Hash128` expose some utilty functions to encourage users to not extract the inner | ||
//! hash value as an integer type and accidentally apply varint encoding to it. | ||
//! | ||
//! In contrast with `Fingerprint`, users of these types cannot and should not attempt to construct | ||
//! and decompose these types into constitutent pieces. The point of these types is only to | ||
//! connect the fact that they can only be produced by a `StableHasher` to their | ||
//! `Encode`/`Decode` impls. | ||
|
||
use crate::stable_hasher::{StableHasher, StableHasherResult}; | ||
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; | ||
use std::fmt; | ||
use std::ops::BitXorAssign; | ||
|
||
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)] | ||
pub struct Hash64 { | ||
inner: u64, | ||
} | ||
|
||
impl Hash64 { | ||
pub const ZERO: Hash64 = Hash64 { inner: 0 }; | ||
|
||
#[inline] | ||
pub(crate) fn new(n: u64) -> Self { | ||
Self { inner: n } | ||
} | ||
|
||
#[inline] | ||
pub fn as_u64(self) -> u64 { | ||
self.inner | ||
} | ||
} | ||
|
||
impl BitXorAssign<u64> for Hash64 { | ||
fn bitxor_assign(&mut self, rhs: u64) { | ||
saethlin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.inner ^= rhs; | ||
} | ||
} | ||
|
||
impl<S: Encoder> Encodable<S> for Hash64 { | ||
#[inline] | ||
fn encode(&self, s: &mut S) { | ||
s.emit_raw_bytes(&self.inner.to_le_bytes()); | ||
} | ||
} | ||
|
||
impl<D: Decoder> Decodable<D> for Hash64 { | ||
#[inline] | ||
fn decode(d: &mut D) -> Self { | ||
Self { inner: u64::from_le_bytes(d.read_raw_bytes(8).try_into().unwrap()) } | ||
} | ||
} | ||
|
||
impl StableHasherResult for Hash64 { | ||
#[inline] | ||
fn finish(hasher: StableHasher) -> Self { | ||
Self { inner: hasher.finalize().0 } | ||
} | ||
} | ||
|
||
impl fmt::Debug for Hash64 { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
self.inner.fmt(f) | ||
} | ||
} | ||
|
||
impl fmt::LowerHex for Hash64 { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
fmt::LowerHex::fmt(&self.inner, f) | ||
} | ||
} | ||
|
||
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)] | ||
pub struct Hash128 { | ||
inner: u128, | ||
} | ||
|
||
impl Hash128 { | ||
#[inline] | ||
pub fn truncate(self) -> Hash64 { | ||
Hash64 { inner: self.inner as u64 } | ||
} | ||
|
||
#[inline] | ||
pub fn wrapping_add(self, other: Self) -> Self { | ||
Self { inner: self.inner.wrapping_add(other.inner) } | ||
} | ||
|
||
#[inline] | ||
pub fn as_u128(self) -> u128 { | ||
self.inner | ||
} | ||
} | ||
|
||
impl<S: Encoder> Encodable<S> for Hash128 { | ||
#[inline] | ||
fn encode(&self, s: &mut S) { | ||
s.emit_raw_bytes(&self.inner.to_le_bytes()); | ||
} | ||
} | ||
|
||
impl<D: Decoder> Decodable<D> for Hash128 { | ||
#[inline] | ||
fn decode(d: &mut D) -> Self { | ||
Self { inner: u128::from_le_bytes(d.read_raw_bytes(16).try_into().unwrap()) } | ||
} | ||
} | ||
|
||
impl StableHasherResult for Hash128 { | ||
#[inline] | ||
fn finish(hasher: StableHasher) -> Self { | ||
let (_0, _1) = hasher.finalize(); | ||
Self { inner: u128::from(_0) | (u128::from(_1) << 64) } | ||
} | ||
} | ||
|
||
impl fmt::Debug for Hash128 { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
self.inner.fmt(f) | ||
} | ||
} | ||
|
||
impl fmt::LowerHex for Hash128 { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
fmt::LowerHex::fmt(&self.inner, f) | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.