1
Fork 0

Auto merge of #41302 - rkruppe:dec2flt-assoc-consts, r=BurntSushi

Use associated constants in core::num::dec2flt
This commit is contained in:
bors 2017-04-14 19:33:28 +00:00
commit bbdaad0dc8
4 changed files with 99 additions and 152 deletions

View file

@ -70,6 +70,7 @@
#![feature(allow_internal_unstable)] #![feature(allow_internal_unstable)]
#![feature(asm)] #![feature(asm)]
#![feature(associated_type_defaults)] #![feature(associated_type_defaults)]
#![feature(associated_consts)]
#![feature(cfg_target_feature)] #![feature(cfg_target_feature)]
#![feature(cfg_target_has_atomic)] #![feature(cfg_target_has_atomic)]
#![feature(concat_idents)] #![feature(concat_idents)]

View file

@ -106,17 +106,17 @@ mod fpu_precision {
/// a bignum. /// a bignum.
pub fn fast_path<T: RawFloat>(integral: &[u8], fractional: &[u8], e: i64) -> Option<T> { pub fn fast_path<T: RawFloat>(integral: &[u8], fractional: &[u8], e: i64) -> Option<T> {
let num_digits = integral.len() + fractional.len(); let num_digits = integral.len() + fractional.len();
// log_10(f64::max_sig) ~ 15.95. We compare the exact value to max_sig near the end, // log_10(f64::MAX_SIG) ~ 15.95. We compare the exact value to MAX_SIG near the end,
// this is just a quick, cheap rejection (and also frees the rest of the code from // this is just a quick, cheap rejection (and also frees the rest of the code from
// worrying about underflow). // worrying about underflow).
if num_digits > 16 { if num_digits > 16 {
return None; return None;
} }
if e.abs() >= T::ceil_log5_of_max_sig() as i64 { if e.abs() >= T::CEIL_LOG5_OF_MAX_SIG as i64 {
return None; return None;
} }
let f = num::from_str_unchecked(integral.iter().chain(fractional.iter())); let f = num::from_str_unchecked(integral.iter().chain(fractional.iter()));
if f > T::max_sig() { if f > T::MAX_SIG {
return None; return None;
} }
@ -154,14 +154,14 @@ pub fn fast_path<T: RawFloat>(integral: &[u8], fractional: &[u8], e: i64) -> Opt
/// > the best possible approximation that uses p bits of significand.) /// > the best possible approximation that uses p bits of significand.)
pub fn bellerophon<T: RawFloat>(f: &Big, e: i16) -> T { pub fn bellerophon<T: RawFloat>(f: &Big, e: i16) -> T {
let slop; let slop;
if f <= &Big::from_u64(T::max_sig()) { if f <= &Big::from_u64(T::MAX_SIG) {
// The cases abs(e) < log5(2^N) are in fast_path() // The cases abs(e) < log5(2^N) are in fast_path()
slop = if e >= 0 { 0 } else { 3 }; slop = if e >= 0 { 0 } else { 3 };
} else { } else {
slop = if e >= 0 { 1 } else { 4 }; slop = if e >= 0 { 1 } else { 4 };
} }
let z = rawfp::big_to_fp(f).mul(&power_of_ten(e)).normalize(); let z = rawfp::big_to_fp(f).mul(&power_of_ten(e)).normalize();
let exp_p_n = 1 << (P - T::sig_bits() as u32); let exp_p_n = 1 << (P - T::SIG_BITS as u32);
let lowbits: i64 = (z.f % exp_p_n) as i64; let lowbits: i64 = (z.f % exp_p_n) as i64;
// Is the slop large enough to make a difference when // Is the slop large enough to make a difference when
// rounding to n bits? // rounding to n bits?
@ -210,14 +210,14 @@ fn algorithm_r<T: RawFloat>(f: &Big, e: i16, z0: T) -> T {
if d2 < y { if d2 < y {
let mut d2_double = d2; let mut d2_double = d2;
d2_double.mul_pow2(1); d2_double.mul_pow2(1);
if m == T::min_sig() && d_negative && d2_double > y { if m == T::MIN_SIG && d_negative && d2_double > y {
z = prev_float(z); z = prev_float(z);
} else { } else {
return z; return z;
} }
} else if d2 == y { } else if d2 == y {
if m % 2 == 0 { if m % 2 == 0 {
if m == T::min_sig() && d_negative { if m == T::MIN_SIG && d_negative {
z = prev_float(z); z = prev_float(z);
} else { } else {
return z; return z;
@ -303,12 +303,12 @@ pub fn algorithm_m<T: RawFloat>(f: &Big, e: i16) -> T {
quick_start::<T>(&mut u, &mut v, &mut k); quick_start::<T>(&mut u, &mut v, &mut k);
let mut rem = Big::from_small(0); let mut rem = Big::from_small(0);
let mut x = Big::from_small(0); let mut x = Big::from_small(0);
let min_sig = Big::from_u64(T::min_sig()); let min_sig = Big::from_u64(T::MIN_SIG);
let max_sig = Big::from_u64(T::max_sig()); let max_sig = Big::from_u64(T::MAX_SIG);
loop { loop {
u.div_rem(&v, &mut x, &mut rem); u.div_rem(&v, &mut x, &mut rem);
if k == T::min_exp_int() { if k == T::MIN_EXP_INT {
// We have to stop at the minimum exponent, if we wait until `k < T::min_exp_int()`, // We have to stop at the minimum exponent, if we wait until `k < T::MIN_EXP_INT`,
// then we'd be off by a factor of two. Unfortunately this means we have to special- // then we'd be off by a factor of two. Unfortunately this means we have to special-
// case normal numbers with the minimum exponent. // case normal numbers with the minimum exponent.
// FIXME find a more elegant formulation, but run the `tiny-pow10` test to make sure // FIXME find a more elegant formulation, but run the `tiny-pow10` test to make sure
@ -318,8 +318,8 @@ pub fn algorithm_m<T: RawFloat>(f: &Big, e: i16) -> T {
} }
return underflow(x, v, rem); return underflow(x, v, rem);
} }
if k > T::max_exp_int() { if k > T::MAX_EXP_INT {
return T::infinity2(); return T::INFINITY;
} }
if x < min_sig { if x < min_sig {
u.mul_pow2(1); u.mul_pow2(1);
@ -345,18 +345,18 @@ fn quick_start<T: RawFloat>(u: &mut Big, v: &mut Big, k: &mut i16) {
// The target ratio is one where u/v is in an in-range significand. Thus our termination // The target ratio is one where u/v is in an in-range significand. Thus our termination
// condition is log2(u / v) being the significand bits, plus/minus one. // condition is log2(u / v) being the significand bits, plus/minus one.
// FIXME Looking at the second bit could improve the estimate and avoid some more divisions. // FIXME Looking at the second bit could improve the estimate and avoid some more divisions.
let target_ratio = T::sig_bits() as i16; let target_ratio = T::SIG_BITS as i16;
let log2_u = u.bit_length() as i16; let log2_u = u.bit_length() as i16;
let log2_v = v.bit_length() as i16; let log2_v = v.bit_length() as i16;
let mut u_shift: i16 = 0; let mut u_shift: i16 = 0;
let mut v_shift: i16 = 0; let mut v_shift: i16 = 0;
assert!(*k == 0); assert!(*k == 0);
loop { loop {
if *k == T::min_exp_int() { if *k == T::MIN_EXP_INT {
// Underflow or subnormal. Leave it to the main function. // Underflow or subnormal. Leave it to the main function.
break; break;
} }
if *k == T::max_exp_int() { if *k == T::MAX_EXP_INT {
// Overflow. Leave it to the main function. // Overflow. Leave it to the main function.
break; break;
} }
@ -376,7 +376,7 @@ fn quick_start<T: RawFloat>(u: &mut Big, v: &mut Big, k: &mut i16) {
} }
fn underflow<T: RawFloat>(x: Big, v: Big, rem: Big) -> T { fn underflow<T: RawFloat>(x: Big, v: Big, rem: Big) -> T {
if x < Big::from_u64(T::min_sig()) { if x < Big::from_u64(T::MIN_SIG) {
let q = num::to_u64(&x); let q = num::to_u64(&x);
let z = rawfp::encode_subnormal(q); let z = rawfp::encode_subnormal(q);
return round_by_remainder(v, rem, q, z); return round_by_remainder(v, rem, q, z);
@ -395,9 +395,9 @@ fn underflow<T: RawFloat>(x: Big, v: Big, rem: Big) -> T {
// needs to be rounded up. Only when the rounded off bits are 1/2 and the remainder // needs to be rounded up. Only when the rounded off bits are 1/2 and the remainder
// is zero, we have a half-to-even situation. // is zero, we have a half-to-even situation.
let bits = x.bit_length(); let bits = x.bit_length();
let lsb = bits - T::sig_bits() as usize; let lsb = bits - T::SIG_BITS as usize;
let q = num::get_bits(&x, lsb, bits); let q = num::get_bits(&x, lsb, bits);
let k = T::min_exp_int() + lsb as i16; let k = T::MIN_EXP_INT + lsb as i16;
let z = rawfp::encode_normal(Unpacked::new(q, k)); let z = rawfp::encode_normal(Unpacked::new(q, k));
let q_even = q % 2 == 0; let q_even = q % 2 == 0;
match num::compare_with_half_ulp(&x, lsb) { match num::compare_with_half_ulp(&x, lsb) {

View file

@ -214,11 +214,11 @@ fn dec2flt<T: RawFloat>(s: &str) -> Result<T, ParseFloatError> {
let (sign, s) = extract_sign(s); let (sign, s) = extract_sign(s);
let flt = match parse_decimal(s) { let flt = match parse_decimal(s) {
ParseResult::Valid(decimal) => convert(decimal)?, ParseResult::Valid(decimal) => convert(decimal)?,
ParseResult::ShortcutToInf => T::infinity2(), ParseResult::ShortcutToInf => T::INFINITY,
ParseResult::ShortcutToZero => T::zero2(), ParseResult::ShortcutToZero => T::ZERO,
ParseResult::Invalid => match s { ParseResult::Invalid => match s {
"inf" => T::infinity2(), "inf" => T::INFINITY,
"NaN" => T::nan2(), "NaN" => T::NAN,
_ => { return Err(pfe_invalid()); } _ => { return Err(pfe_invalid()); }
} }
}; };
@ -254,7 +254,7 @@ fn convert<T: RawFloat>(mut decimal: Decimal) -> Result<T, ParseFloatError> {
// FIXME These bounds are rather conservative. A more careful analysis of the failure modes // FIXME These bounds are rather conservative. A more careful analysis of the failure modes
// of Bellerophon could allow using it in more cases for a massive speed up. // of Bellerophon could allow using it in more cases for a massive speed up.
let exponent_in_range = table::MIN_E <= e && e <= table::MAX_E; let exponent_in_range = table::MIN_E <= e && e <= table::MAX_E;
let value_in_range = upper_bound <= T::max_normal_digits() as u64; let value_in_range = upper_bound <= T::MAX_NORMAL_DIGITS as u64;
if exponent_in_range && value_in_range { if exponent_in_range && value_in_range {
Ok(algorithm::bellerophon(&f, e)) Ok(algorithm::bellerophon(&f, e))
} else { } else {
@ -315,17 +315,17 @@ fn bound_intermediate_digits(decimal: &Decimal, e: i64) -> u64 {
fn trivial_cases<T: RawFloat>(decimal: &Decimal) -> Option<T> { fn trivial_cases<T: RawFloat>(decimal: &Decimal) -> Option<T> {
// There were zeros but they were stripped by simplify() // There were zeros but they were stripped by simplify()
if decimal.integral.is_empty() && decimal.fractional.is_empty() { if decimal.integral.is_empty() && decimal.fractional.is_empty() {
return Some(T::zero2()); return Some(T::ZERO);
} }
// This is a crude approximation of ceil(log10(the real value)). We don't need to worry too // This is a crude approximation of ceil(log10(the real value)). We don't need to worry too
// much about overflow here because the input length is tiny (at least compared to 2^64) and // much about overflow here because the input length is tiny (at least compared to 2^64) and
// the parser already handles exponents whose absolute value is greater than 10^18 // the parser already handles exponents whose absolute value is greater than 10^18
// (which is still 10^19 short of 2^64). // (which is still 10^19 short of 2^64).
let max_place = decimal.exp + decimal.integral.len() as i64; let max_place = decimal.exp + decimal.integral.len() as i64;
if max_place > T::inf_cutoff() { if max_place > T::INF_CUTOFF {
return Some(T::infinity2()); return Some(T::INFINITY);
} else if max_place < T::zero_cutoff() { } else if max_place < T::ZERO_CUTOFF {
return Some(T::zero2()); return Some(T::ZERO);
} }
None None
} }

View file

@ -56,24 +56,12 @@ impl Unpacked {
/// ///
/// Should **never ever** be implemented for other types or be used outside the dec2flt module. /// Should **never ever** be implemented for other types or be used outside the dec2flt module.
/// Inherits from `Float` because there is some overlap, but all the reused methods are trivial. /// Inherits from `Float` because there is some overlap, but all the reused methods are trivial.
/// The "methods" (pseudo-constants) with default implementation should not be overriden.
pub trait RawFloat : Float + Copy + Debug + LowerExp pub trait RawFloat : Float + Copy + Debug + LowerExp
+ Mul<Output=Self> + Div<Output=Self> + Neg<Output=Self> + Mul<Output=Self> + Div<Output=Self> + Neg<Output=Self>
{ {
// suffix of "2" because Float::infinity is deprecated const INFINITY: Self;
#[allow(deprecated)] const NAN: Self;
fn infinity2() -> Self { const ZERO: Self;
Float::infinity()
}
// suffix of "2" because Float::nan is deprecated
#[allow(deprecated)]
fn nan2() -> Self {
Float::nan()
}
// suffix of "2" because Float::zero is deprecated
fn zero2() -> Self;
// suffix of "2" because Float::integer_decode is deprecated // suffix of "2" because Float::integer_decode is deprecated
#[allow(deprecated)] #[allow(deprecated)]
@ -94,94 +82,83 @@ pub trait RawFloat : Float + Copy + Debug + LowerExp
/// represented, the other code in this module makes sure to never let that happen. /// represented, the other code in this module makes sure to never let that happen.
fn from_int(x: u64) -> Self; fn from_int(x: u64) -> Self;
/// Get the value 10<sup>e</sup> from a pre-computed table. Panics for e >= /// Get the value 10<sup>e</sup> from a pre-computed table.
/// ceil_log5_of_max_sig(). /// Panics for `e >= CEIL_LOG5_OF_MAX_SIG`.
fn short_fast_pow10(e: usize) -> Self; fn short_fast_pow10(e: usize) -> Self;
// FIXME Everything that follows should be associated constants, but taking the value of an
// associated constant from a type parameter does not work (yet?)
// A possible workaround is having a `FloatInfo` struct for all the constants, but so far
// the methods aren't painful enough to rewrite.
/// What the name says. It's easier to hard code than juggling intrinsics and /// What the name says. It's easier to hard code than juggling intrinsics and
/// hoping LLVM constant folds it. /// hoping LLVM constant folds it.
fn ceil_log5_of_max_sig() -> i16; const CEIL_LOG5_OF_MAX_SIG: i16;
// A conservative bound on the decimal digits of inputs that can't produce overflow or zero or // A conservative bound on the decimal digits of inputs that can't produce overflow or zero or
/// subnormals. Probably the decimal exponent of the maximum normal value, hence the name. /// subnormals. Probably the decimal exponent of the maximum normal value, hence the name.
fn max_normal_digits() -> usize; const MAX_NORMAL_DIGITS: usize;
/// When the most significant decimal digit has a place value greater than this, the number /// When the most significant decimal digit has a place value greater than this, the number
/// is certainly rounded to infinity. /// is certainly rounded to infinity.
fn inf_cutoff() -> i64; const INF_CUTOFF: i64;
/// When the most significant decimal digit has a place value less than this, the number /// When the most significant decimal digit has a place value less than this, the number
/// is certainly rounded to zero. /// is certainly rounded to zero.
fn zero_cutoff() -> i64; const ZERO_CUTOFF: i64;
/// The number of bits in the exponent. /// The number of bits in the exponent.
fn exp_bits() -> u8; const EXP_BITS: u8;
/// The number of bits in the singificand, *including* the hidden bit. /// The number of bits in the singificand, *including* the hidden bit.
fn sig_bits() -> u8; const SIG_BITS: u8;
/// The number of bits in the singificand, *excluding* the hidden bit. /// The number of bits in the singificand, *excluding* the hidden bit.
fn explicit_sig_bits() -> u8 { const EXPLICIT_SIG_BITS: u8;
Self::sig_bits() - 1
}
/// The maximum legal exponent in fractional representation. /// The maximum legal exponent in fractional representation.
fn max_exp() -> i16 { const MAX_EXP: i16;
(1 << (Self::exp_bits() - 1)) - 1
}
/// The minimum legal exponent in fractional representation, excluding subnormals. /// The minimum legal exponent in fractional representation, excluding subnormals.
fn min_exp() -> i16 { const MIN_EXP: i16;
-Self::max_exp() + 1
}
/// `MAX_EXP` for integral representation, i.e., with the shift applied. /// `MAX_EXP` for integral representation, i.e., with the shift applied.
fn max_exp_int() -> i16 { const MAX_EXP_INT: i16;
Self::max_exp() - (Self::sig_bits() as i16 - 1)
}
/// `MAX_EXP` encoded (i.e., with offset bias) /// `MAX_EXP` encoded (i.e., with offset bias)
fn max_encoded_exp() -> i16 { const MAX_ENCODED_EXP: i16;
(1 << Self::exp_bits()) - 1
}
/// `MIN_EXP` for integral representation, i.e., with the shift applied. /// `MIN_EXP` for integral representation, i.e., with the shift applied.
fn min_exp_int() -> i16 { const MIN_EXP_INT: i16;
Self::min_exp() - (Self::sig_bits() as i16 - 1)
}
/// The maximum normalized singificand in integral representation. /// The maximum normalized singificand in integral representation.
fn max_sig() -> u64 { const MAX_SIG: u64;
(1 << Self::sig_bits()) - 1
}
/// The minimal normalized significand in integral representation. /// The minimal normalized significand in integral representation.
fn min_sig() -> u64 { const MIN_SIG: u64;
1 << (Self::sig_bits() - 1) }
// Mostly a workaround for #34344.
macro_rules! other_constants {
($type: ident) => {
const EXPLICIT_SIG_BITS: u8 = Self::SIG_BITS - 1;
const MAX_EXP: i16 = (1 << (Self::EXP_BITS - 1)) - 1;
const MIN_EXP: i16 = -Self::MAX_EXP + 1;
const MAX_EXP_INT: i16 = Self::MAX_EXP - (Self::SIG_BITS as i16 - 1);
const MAX_ENCODED_EXP: i16 = (1 << Self::EXP_BITS) - 1;
const MIN_EXP_INT: i16 = Self::MIN_EXP - (Self::SIG_BITS as i16 - 1);
const MAX_SIG: u64 = (1 << Self::SIG_BITS) - 1;
const MIN_SIG: u64 = 1 << (Self::SIG_BITS - 1);
const INFINITY: Self = $crate::$type::INFINITY;
const NAN: Self = $crate::$type::NAN;
const ZERO: Self = 0.0;
} }
} }
impl RawFloat for f32 { impl RawFloat for f32 {
fn zero2() -> Self { const SIG_BITS: u8 = 24;
0.0 const EXP_BITS: u8 = 8;
} const CEIL_LOG5_OF_MAX_SIG: i16 = 11;
const MAX_NORMAL_DIGITS: usize = 35;
fn sig_bits() -> u8 { const INF_CUTOFF: i64 = 40;
24 const ZERO_CUTOFF: i64 = -48;
} other_constants!(f32);
fn exp_bits() -> u8 {
8
}
fn ceil_log5_of_max_sig() -> i16 {
11
}
fn transmute(self) -> u64 { fn transmute(self) -> u64 {
let bits: u32 = unsafe { transmute(self) }; let bits: u32 = unsafe { transmute(self) };
@ -207,37 +184,17 @@ impl RawFloat for f32 {
fn short_fast_pow10(e: usize) -> Self { fn short_fast_pow10(e: usize) -> Self {
table::F32_SHORT_POWERS[e] table::F32_SHORT_POWERS[e]
} }
fn max_normal_digits() -> usize {
35
}
fn inf_cutoff() -> i64 {
40
}
fn zero_cutoff() -> i64 {
-48
}
} }
impl RawFloat for f64 { impl RawFloat for f64 {
fn zero2() -> Self { const SIG_BITS: u8 = 53;
0.0 const EXP_BITS: u8 = 11;
} const CEIL_LOG5_OF_MAX_SIG: i16 = 23;
const MAX_NORMAL_DIGITS: usize = 305;
fn sig_bits() -> u8 { const INF_CUTOFF: i64 = 310;
53 const ZERO_CUTOFF: i64 = -326;
} other_constants!(f64);
fn exp_bits() -> u8 {
11
}
fn ceil_log5_of_max_sig() -> i16 {
23
}
fn transmute(self) -> u64 { fn transmute(self) -> u64 {
let bits: u64 = unsafe { transmute(self) }; let bits: u64 = unsafe { transmute(self) };
@ -262,38 +219,27 @@ impl RawFloat for f64 {
fn short_fast_pow10(e: usize) -> Self { fn short_fast_pow10(e: usize) -> Self {
table::F64_SHORT_POWERS[e] table::F64_SHORT_POWERS[e]
} }
fn max_normal_digits() -> usize {
305
}
fn inf_cutoff() -> i64 {
310
}
fn zero_cutoff() -> i64 {
-326
}
} }
/// Convert an Fp to the closest f64. Only handles number that fit into a normalized f64. /// Convert an Fp to the closest machine float type.
/// Does not handle subnormal results.
pub fn fp_to_float<T: RawFloat>(x: Fp) -> T { pub fn fp_to_float<T: RawFloat>(x: Fp) -> T {
let x = x.normalize(); let x = x.normalize();
// x.f is 64 bit, so x.e has a mantissa shift of 63 // x.f is 64 bit, so x.e has a mantissa shift of 63
let e = x.e + 63; let e = x.e + 63;
if e > T::max_exp() { if e > T::MAX_EXP {
panic!("fp_to_float: exponent {} too large", e) panic!("fp_to_float: exponent {} too large", e)
} else if e > T::min_exp() { } else if e > T::MIN_EXP {
encode_normal(round_normal::<T>(x)) encode_normal(round_normal::<T>(x))
} else { } else {
panic!("fp_to_float: exponent {} too small", e) panic!("fp_to_float: exponent {} too small", e)
} }
} }
/// Round the 64-bit significand to 53 bit with half-to-even. Does not handle exponent overflow. /// Round the 64-bit significand to T::SIG_BITS bits with half-to-even.
/// Does not handle exponent overflow.
pub fn round_normal<T: RawFloat>(x: Fp) -> Unpacked { pub fn round_normal<T: RawFloat>(x: Fp) -> Unpacked {
let excess = 64 - T::sig_bits() as i16; let excess = 64 - T::SIG_BITS as i16;
let half: u64 = 1 << (excess - 1); let half: u64 = 1 << (excess - 1);
let (q, rem) = (x.f >> excess, x.f & ((1 << excess) - 1)); let (q, rem) = (x.f >> excess, x.f & ((1 << excess) - 1));
assert_eq!(q << excess | rem, x.f); assert_eq!(q << excess | rem, x.f);
@ -303,8 +249,8 @@ pub fn round_normal<T: RawFloat>(x: Fp) -> Unpacked {
Unpacked::new(q, k) Unpacked::new(q, k)
} else if rem == half && (q % 2) == 0 { } else if rem == half && (q % 2) == 0 {
Unpacked::new(q, k) Unpacked::new(q, k)
} else if q == T::max_sig() { } else if q == T::MAX_SIG {
Unpacked::new(T::min_sig(), k + 1) Unpacked::new(T::MIN_SIG, k + 1)
} else { } else {
Unpacked::new(q + 1, k) Unpacked::new(q + 1, k)
} }
@ -313,22 +259,22 @@ pub fn round_normal<T: RawFloat>(x: Fp) -> Unpacked {
/// Inverse of `RawFloat::unpack()` for normalized numbers. /// Inverse of `RawFloat::unpack()` for normalized numbers.
/// Panics if the significand or exponent are not valid for normalized numbers. /// Panics if the significand or exponent are not valid for normalized numbers.
pub fn encode_normal<T: RawFloat>(x: Unpacked) -> T { pub fn encode_normal<T: RawFloat>(x: Unpacked) -> T {
debug_assert!(T::min_sig() <= x.sig && x.sig <= T::max_sig(), debug_assert!(T::MIN_SIG <= x.sig && x.sig <= T::MAX_SIG,
"encode_normal: significand not normalized"); "encode_normal: significand not normalized");
// Remove the hidden bit // Remove the hidden bit
let sig_enc = x.sig & !(1 << T::explicit_sig_bits()); let sig_enc = x.sig & !(1 << T::EXPLICIT_SIG_BITS);
// Adjust the exponent for exponent bias and mantissa shift // Adjust the exponent for exponent bias and mantissa shift
let k_enc = x.k + T::max_exp() + T::explicit_sig_bits() as i16; let k_enc = x.k + T::MAX_EXP + T::EXPLICIT_SIG_BITS as i16;
debug_assert!(k_enc != 0 && k_enc < T::max_encoded_exp(), debug_assert!(k_enc != 0 && k_enc < T::MAX_ENCODED_EXP,
"encode_normal: exponent out of range"); "encode_normal: exponent out of range");
// Leave sign bit at 0 ("+"), our numbers are all positive // Leave sign bit at 0 ("+"), our numbers are all positive
let bits = (k_enc as u64) << T::explicit_sig_bits() | sig_enc; let bits = (k_enc as u64) << T::EXPLICIT_SIG_BITS | sig_enc;
T::from_bits(bits) T::from_bits(bits)
} }
/// Construct the subnormal. A mantissa of 0 is allowed and constructs zero. /// Construct a subnormal. A mantissa of 0 is allowed and constructs zero.
pub fn encode_subnormal<T: RawFloat>(significand: u64) -> T { pub fn encode_subnormal<T: RawFloat>(significand: u64) -> T {
assert!(significand < T::min_sig(), "encode_subnormal: not actually subnormal"); assert!(significand < T::MIN_SIG, "encode_subnormal: not actually subnormal");
// Encoded exponent is 0, the sign bit is 0, so we just have to reinterpret the bits. // Encoded exponent is 0, the sign bit is 0, so we just have to reinterpret the bits.
T::from_bits(significand) T::from_bits(significand)
} }
@ -364,8 +310,8 @@ pub fn prev_float<T: RawFloat>(x: T) -> T {
Zero => panic!("prev_float: argument is zero"), Zero => panic!("prev_float: argument is zero"),
Normal => { Normal => {
let Unpacked { sig, k } = x.unpack(); let Unpacked { sig, k } = x.unpack();
if sig == T::min_sig() { if sig == T::MIN_SIG {
encode_normal(Unpacked::new(T::max_sig(), k - 1)) encode_normal(Unpacked::new(T::MAX_SIG, k - 1))
} else { } else {
encode_normal(Unpacked::new(sig - 1, k)) encode_normal(Unpacked::new(sig - 1, k))
} }
@ -380,7 +326,7 @@ pub fn prev_float<T: RawFloat>(x: T) -> T {
pub fn next_float<T: RawFloat>(x: T) -> T { pub fn next_float<T: RawFloat>(x: T) -> T {
match x.classify() { match x.classify() {
Nan => panic!("next_float: argument is NaN"), Nan => panic!("next_float: argument is NaN"),
Infinite => T::infinity2(), Infinite => T::INFINITY,
// This seems too good to be true, but it works. // This seems too good to be true, but it works.
// 0.0 is encoded as the all-zero word. Subnormals are 0x000m...m where m is the mantissa. // 0.0 is encoded as the all-zero word. Subnormals are 0x000m...m where m is the mantissa.
// In particular, the smallest subnormal is 0x0...01 and the largest is 0x000F...F. // In particular, the smallest subnormal is 0x0...01 and the largest is 0x000F...F.