From 81f0b8d9cd408f839a71f1c690a291137eba23b3 Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Tue, 13 Sep 2022 21:58:30 +0200 Subject: [PATCH] numbers, yee haw --- mono-fmt-macro/Cargo.toml | 2 +- src/lib.rs | 6 + src/opts.rs | 186 +++++++++++------------ src/std_impl/formatter.rs | 7 - src/std_impl/mod.rs | 310 +++++++++++++++++++++++++++++++++++++- src/std_impl/num.rs | 130 ++-------------- 6 files changed, 414 insertions(+), 227 deletions(-) delete mode 100644 src/std_impl/formatter.rs diff --git a/mono-fmt-macro/Cargo.toml b/mono-fmt-macro/Cargo.toml index 4ae5f4c..a817180 100644 --- a/mono-fmt-macro/Cargo.toml +++ b/mono-fmt-macro/Cargo.toml @@ -12,4 +12,4 @@ proc-macro = true peekmore = "1.0.0" proc-macro2 = "1.0.43" quote = "1.0.21" -syn = { version = "1.0.99" } +syn = { version = "1.0.99", features = ["full"] } diff --git a/src/lib.rs b/src/lib.rs index e0fd5c3..7c981c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -161,6 +161,12 @@ mod tests { let result = format!("test {:?} hello", "uwu"); assert_eq!(result, r#"test "uwu" hello"#); } + + #[test] + fn number() { + let result = format!("a: {}", 32523532u64); + assert_eq!(result, "a: 32523532"); + } } fn f() { diff --git a/src/opts.rs b/src/opts.rs index 36cec26..d54f09e 100644 --- a/src/opts.rs +++ b/src/opts.rs @@ -1,31 +1,96 @@ use crate::Formatter; +#[derive(Debug, Clone, Copy)] pub enum Alignment { Left, Center, Right, + Unknown, } -pub trait FmtOpts { - #[doc(hidden)] - type Inner: FmtOpts; +macro_rules! options { + ( + $( + fn $name:ident() -> $ret:ty { + $($default:tt)* + } + )* + ) => { + pub trait FmtOpts { + #[doc(hidden)] + type Inner: FmtOpts; + $( + #[inline] + fn $name() -> $ret { + Self::Inner::$name() + } + )* + } + + impl FmtOpts for () { + type Inner = never::Never; + + $( + #[inline] + fn $name() -> $ret { + $($default)* + } + )* + } + + impl Formatter { + $( + #[inline] + pub fn $name(&self) -> $ret { + O::$name() + } + )* + } + }; +} + +options!( fn alternate() -> bool { - Self::Inner::alternate() + false } fn width() -> Option { - Self::Inner::width() + None } - fn align() -> Option { - Self::Inner::align() + fn align() -> Alignment { + Alignment::Unknown } - fn fill() -> Option { - Self::Inner::fill() + fn fill() -> char { + ' ' } -} + + fn sign_plus() -> bool { + false + } + + fn sign_aware_zero_pad() -> bool { + false + } + + fn sign_minus() -> bool { + false + } + + fn precision() -> Option { + None + } + + fn debug_lower_hex() -> bool { + false + } + + fn debug_upper_hex() -> bool { + false + } +); mod never { use crate::FmtOpts; @@ -45,29 +110,11 @@ mod never { } } -impl FmtOpts for () { - type Inner = never::Never; - - fn alternate() -> bool { - false - } - - fn width() -> Option { - None - } - - fn align() -> Option { - None - } - - fn fill() -> Option { - None - } -} pub struct WithAlternate(pub I); impl FmtOpts for WithAlternate { type Inner = I; + #[inline] fn alternate() -> bool { true } @@ -76,6 +123,7 @@ pub struct WithWidth(pub I); impl FmtOpts for WithWidth { type Inner = I; + #[inline] fn width() -> Option { Some(A) } @@ -84,91 +132,35 @@ pub struct WithLeftAlign(pub I); impl FmtOpts for WithLeftAlign { type Inner = I; - fn align() -> Option { - Some(Alignment::Left) + #[inline] + fn align() -> Alignment { + Alignment::Left } } pub struct WithRightAlign(pub I); impl FmtOpts for WithRightAlign { type Inner = I; - fn align() -> Option { - Some(Alignment::Right) + #[inline] + fn align() -> Alignment { + Alignment::Right } } pub struct WithCenterAlign(pub I); impl FmtOpts for WithCenterAlign { type Inner = I; - fn align() -> Option { - Some(Alignment::Center) + #[inline] + fn align() -> Alignment { + Alignment::Center } } pub struct WithFill(pub I); impl FmtOpts for WithFill { type Inner = I; - fn fill() -> Option { - Some(A) - } -} - -impl Formatter { - pub fn alternate(&self) -> bool { - O::alternate() - } - - pub fn width() -> Option { - O::width() - } - - pub fn align() -> Option { - O::align() - } - - pub fn fill() -> Option { - O::fill() - } - - pub fn with_alternate(self) -> Formatter> { - Formatter { - buf: self.buf, - opts: WithAlternate(self.opts), - } - } - - pub fn with_width(self) -> Formatter> { - Formatter { - buf: self.buf, - opts: WithWidth(self.opts), - } - } - - pub fn with_left_align(self) -> Formatter> { - Formatter { - buf: self.buf, - opts: WithLeftAlign(self.opts), - } - } - - pub fn with_right_align(self) -> Formatter> { - Formatter { - buf: self.buf, - opts: WithRightAlign(self.opts), - } - } - - pub fn with_center_align(self) -> Formatter> { - Formatter { - buf: self.buf, - opts: WithCenterAlign(self.opts), - } - } - - pub fn with_fill(self) -> Formatter> { - Formatter { - buf: self.buf, - opts: WithFill(self.opts), - } + #[inline] + fn fill() -> char { + A } } diff --git a/src/std_impl/formatter.rs b/src/std_impl/formatter.rs deleted file mode 100644 index e62e52e..0000000 --- a/src/std_impl/formatter.rs +++ /dev/null @@ -1,7 +0,0 @@ -use crate::{FmtOpts, Formatter, Result, Write}; - -impl Formatter { - pub fn pad_integral(&mut self, is_nonnegative: bool, prefix: &str, buf: &str) -> Result { - todo!() - } -} diff --git a/src/std_impl/mod.rs b/src/std_impl/mod.rs index 4f466ec..0e012aa 100644 --- a/src/std_impl/mod.rs +++ b/src/std_impl/mod.rs @@ -1,4 +1,312 @@ //! Copied modified stuff from core -mod formatter; mod num; + +use crate::{opts::Alignment, Error, FmtOpts, Formatter, Result, Write}; + +mod numfmt { + //! Shared utilities used by both float and integer formatting. + + /// Formatted parts. + #[derive(Copy, Clone, PartialEq, Eq, Debug)] + pub enum Part<'a> { + /// Given number of zero digits. + Zero(usize), + /// A literal number up to 5 digits. + Num(u16), + /// A verbatim copy of given bytes. + Copy(&'a [u8]), + } + + impl<'a> Part<'a> { + /// Returns the exact byte length of given part. + pub fn len(&self) -> usize { + match *self { + Part::Zero(nzeroes) => nzeroes, + Part::Num(v) => { + if v < 1_000 { + if v < 10 { + 1 + } else if v < 100 { + 2 + } else { + 3 + } + } else { + if v < 10_000 { + 4 + } else { + 5 + } + } + } + Part::Copy(buf) => buf.len(), + } + } + + /// Writes a part into the supplied buffer. + /// Returns the number of written bytes, or `None` if the buffer is not enough. + /// (It may still leave partially written bytes in the buffer; do not rely on that.) + pub fn write(&self, out: &mut [u8]) -> Option { + let len = self.len(); + if out.len() >= len { + match *self { + Part::Zero(nzeroes) => { + for c in &mut out[..nzeroes] { + *c = b'0'; + } + } + Part::Num(mut v) => { + for c in out[..len].iter_mut().rev() { + *c = b'0' + (v % 10) as u8; + v /= 10; + } + } + Part::Copy(buf) => { + out[..buf.len()].copy_from_slice(buf); + } + } + Some(len) + } else { + None + } + } + } + + /// Formatted result containing one or more parts. + /// This can be written to the byte buffer or converted to the allocated string. + #[allow(missing_debug_implementations)] + #[derive(Clone)] + pub struct Formatted<'a> { + /// A byte slice representing a sign, either `""`, `"-"` or `"+"`. + pub sign: &'static str, + /// Formatted parts to be rendered after a sign and optional zero padding. + pub parts: &'a [Part<'a>], + } + + impl<'a> Formatted<'a> { + /// Returns the exact byte length of combined formatted result. + pub fn len(&self) -> usize { + let mut len = self.sign.len(); + for part in self.parts { + len += part.len(); + } + len + } + + /// Writes all formatted parts into the supplied buffer. + /// Returns the number of written bytes, or `None` if the buffer is not enough. + /// (It may still leave partially written bytes in the buffer; do not rely on that.) + pub fn write(&self, out: &mut [u8]) -> Option { + if out.len() < self.sign.len() { + return None; + } + out[..self.sign.len()].copy_from_slice(self.sign.as_bytes()); + + let mut written = self.sign.len(); + for part in self.parts { + let len = part.write(&mut out[written..])?; + written += len; + } + Some(written) + } + } +} + +/// Padding after the end of something. Returned by `Formatter::padding`. +#[must_use = "don't forget to write the post padding"] +pub(crate) struct PostPadding { + fill: char, + padding: usize, +} + +impl PostPadding { + fn new(fill: char, padding: usize) -> PostPadding { + PostPadding { fill, padding } + } + + /// Write this post padding. + pub(crate) fn write(self, f: &mut Formatter) -> Result { + for _ in 0..self.padding { + f.buf.write_char(self.fill)?; + } + Ok(()) + } +} + +impl Formatter { + pub fn pad_integral(&mut self, is_nonnegative: bool, prefix: &str, buf: &str) -> Result { + let mut width = buf.len(); + + let mut sign = None; + if !is_nonnegative { + sign = Some('-'); + width += 1; + } else if self.sign_plus() { + sign = Some('+'); + width += 1; + } + + let prefix = if self.alternate() { + width += prefix.chars().count(); + Some(prefix) + } else { + None + }; + + // Writes the sign if it exists, and then the prefix if it was requested + #[inline(never)] + fn write_prefix(f: &mut Formatter, sign: Option, prefix: Option<&str>) -> Result { + if let Some(c) = sign { + f.buf.write_char(c)?; + } + if let Some(prefix) = prefix { + f.buf.write_str(prefix) + } else { + Ok(()) + } + } + + // The `width` field is more of a `min-width` parameter at this point. + match self.width() { + // If there's no minimum length requirements then we can just + // write the bytes. + None => { + write_prefix(self, sign, prefix)?; + self.buf.write_str(buf) + } + // Check if we're over the minimum width, if so then we can also + // just write the bytes. + Some(min) if width >= min => { + write_prefix(self, sign, prefix)?; + self.buf.write_str(buf) + } + // The sign and prefix goes before the padding if the fill character + // is zero + Some(min) if self.sign_aware_zero_pad() => { + write_prefix(self, sign, prefix)?; + let post_padding = self.padding(min - width, Alignment::Right, '0', Alignment::Right)?; + self.buf.write_str(buf)?; + post_padding.write(self)?; + Ok(()) + } + // Otherwise, the sign and prefix goes after the padding + Some(min) => { + let post_padding = self.padding(min - width, Alignment::Right, self.fill(), self.align())?; + write_prefix(self, sign, prefix)?; + self.buf.write_str(buf)?; + post_padding.write(self) + } + } + } + + fn pad_formatted_parts(&mut self, formatted: &numfmt::Formatted<'_>) -> Result { + // WARN(mono-fmt): This was changed heavily, there might be a bug here + if let Some(mut width) = self.width() { + // for the sign-aware zero padding, we render the sign first and + // behave as if we had no sign from the beginning. + let mut formatted = formatted.clone(); + + let mut the_fill = self.fill(); + let mut the_align = self.align(); + if self.sign_aware_zero_pad() { + // a sign always goes first + let sign = formatted.sign; + self.buf.write_str(sign)?; + + // remove the sign from the formatted parts + formatted.sign = ""; + width = width.saturating_sub(sign.len()); + the_fill = '0'; + the_align = Alignment::Right; + } + + // remaining parts go through the ordinary padding process. + let len = formatted.len(); + let ret = if width <= len { + // no padding + self.write_formatted_parts(&formatted) + } else { + let post_padding = self.padding(width - len, the_align, the_fill, the_align)?; + self.write_formatted_parts(&formatted)?; + post_padding.write(self) + }; + ret + } else { + // this is the common case and we take a shortcut + self.write_formatted_parts(formatted) + } + } + + pub(crate) fn padding( + &mut self, + padding: usize, + default: Alignment, + actual_fill: char, + actual_align: Alignment, + ) -> std::result::Result { + // WARN: We might have `self` in an invalid state, don't touch `self` opts + let align = match actual_align { + Alignment::Unknown => default, + _ => actual_align, + }; + + let (pre_pad, post_pad) = match align { + Alignment::Left => (0, padding), + Alignment::Right | Alignment::Unknown => (padding, 0), + Alignment::Center => (padding / 2, (padding + 1) / 2), + }; + + for _ in 0..pre_pad { + self.buf.write_char(actual_fill)?; + } + + Ok(PostPadding::new(actual_fill, post_pad)) + } + + fn write_formatted_parts(&mut self, formatted: &numfmt::Formatted<'_>) -> Result { + fn write_bytes(buf: &mut W, s: &[u8]) -> Result { + // SAFETY: This is used for `numfmt::Part::Num` and `numfmt::Part::Copy`. + // It's safe to use for `numfmt::Part::Num` since every char `c` is between + // `b'0'` and `b'9'`, which means `s` is valid UTF-8. + // It's also probably safe in practice to use for `numfmt::Part::Copy(buf)` + // since `buf` should be plain ASCII, but it's possible for someone to pass + // in a bad value for `buf` into `numfmt::to_shortest_str` since it is a + // public function. + // FIXME: Determine whether this could result in UB. + buf.write_str(unsafe { std::str::from_utf8_unchecked(s) }) + } + + if !formatted.sign.is_empty() { + self.buf.write_str(formatted.sign)?; + } + for part in formatted.parts { + match *part { + numfmt::Part::Zero(mut nzeroes) => { + const ZEROES: &str = // 64 zeroes + "0000000000000000000000000000000000000000000000000000000000000000"; + while nzeroes > ZEROES.len() { + self.buf.write_str(ZEROES)?; + nzeroes -= ZEROES.len(); + } + if nzeroes > 0 { + self.buf.write_str(&ZEROES[..nzeroes])?; + } + } + numfmt::Part::Num(mut v) => { + let mut s = [0; 5]; + let len = part.len(); + for c in s[..len].iter_mut().rev() { + *c = b'0' + (v % 10) as u8; + v /= 10; + } + write_bytes(&mut self.buf, &s[..len])?; + } + numfmt::Part::Copy(buf) => { + write_bytes(&mut self.buf, buf)?; + } + } + } + Ok(()) + } +} diff --git a/src/std_impl/num.rs b/src/std_impl/num.rs index da629cf..3bd1ad7 100644 --- a/src/std_impl/num.rs +++ b/src/std_impl/num.rs @@ -6,116 +6,7 @@ use std::{ ptr, slice, str, }; -use crate::{self as fmt, FmtOpts, Write}; - -mod numfmt { - //! Shared utilities used by both float and integer formatting. - - /// Formatted parts. - #[derive(Copy, Clone, PartialEq, Eq, Debug)] - pub enum Part<'a> { - /// Given number of zero digits. - Zero(usize), - /// A literal number up to 5 digits. - Num(u16), - /// A verbatim copy of given bytes. - Copy(&'a [u8]), - } - - impl<'a> Part<'a> { - /// Returns the exact byte length of given part. - pub fn len(&self) -> usize { - match *self { - Part::Zero(nzeroes) => nzeroes, - Part::Num(v) => { - if v < 1_000 { - if v < 10 { - 1 - } else if v < 100 { - 2 - } else { - 3 - } - } else { - if v < 10_000 { - 4 - } else { - 5 - } - } - } - Part::Copy(buf) => buf.len(), - } - } - - /// Writes a part into the supplied buffer. - /// Returns the number of written bytes, or `None` if the buffer is not enough. - /// (It may still leave partially written bytes in the buffer; do not rely on that.) - pub fn write(&self, out: &mut [u8]) -> Option { - let len = self.len(); - if out.len() >= len { - match *self { - Part::Zero(nzeroes) => { - for c in &mut out[..nzeroes] { - *c = b'0'; - } - } - Part::Num(mut v) => { - for c in out[..len].iter_mut().rev() { - *c = b'0' + (v % 10) as u8; - v /= 10; - } - } - Part::Copy(buf) => { - out[..buf.len()].copy_from_slice(buf); - } - } - Some(len) - } else { - None - } - } - } - - /// Formatted result containing one or more parts. - /// This can be written to the byte buffer or converted to the allocated string. - #[allow(missing_debug_implementations)] - #[derive(Clone)] - pub struct Formatted<'a> { - /// A byte slice representing a sign, either `""`, `"-"` or `"+"`. - pub sign: &'static str, - /// Formatted parts to be rendered after a sign and optional zero padding. - pub parts: &'a [Part<'a>], - } - - impl<'a> Formatted<'a> { - /// Returns the exact byte length of combined formatted result. - pub fn len(&self) -> usize { - let mut len = self.sign.len(); - for part in self.parts { - len += part.len(); - } - len - } - - /// Writes all formatted parts into the supplied buffer. - /// Returns the number of written bytes, or `None` if the buffer is not enough. - /// (It may still leave partially written bytes in the buffer; do not rely on that.) - pub fn write(&self, out: &mut [u8]) -> Option { - if out.len() < self.sign.len() { - return None; - } - out[..self.sign.len()].copy_from_slice(self.sign.as_bytes()); - - let mut written = self.sign.len(); - for part in self.parts { - let len = part.write(&mut out[written..])?; - written += len; - } - Some(written) - } - } -} +use crate::{self as fmt, std_impl::numfmt, FmtOpts, Write}; #[doc(hidden)] trait DisplayInt: @@ -215,10 +106,7 @@ trait GenericRadix: Sized { // SAFETY: The only chars in `buf` are created by `Self::digit` which are assumed to be // valid UTF-8 let buf = unsafe { - str::from_utf8_unchecked(slice::from_raw_parts( - MaybeUninit::slice_as_ptr(buf), - buf.len(), - )) + str::from_utf8_unchecked(slice::from_raw_parts(buf.as_ptr().cast(), buf.len())) }; f.pad_integral(is_nonnegative, Self::PREFIX, buf) } @@ -323,7 +211,7 @@ macro_rules! impl_Display { // 2^128 is about 3*10^38, so 39 gives an extra byte of space let mut buf = [MaybeUninit::::uninit(); 39]; let mut curr = buf.len() as isize; - let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf); + let buf_ptr = buf.as_mut_ptr().cast::(); let lut_ptr = DEC_DIGITS_LUT.as_ptr(); // SAFETY: Since `d1` and `d2` are always less than or equal to `198`, we @@ -451,7 +339,7 @@ macro_rules! impl_Exp { // that `curr >= 0`. let mut buf = [MaybeUninit::::uninit(); 40]; let mut curr = buf.len() as isize; //index for buf - let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf); + let buf_ptr = buf.as_mut_ptr().cast::(); let lut_ptr = DEC_DIGITS_LUT.as_ptr(); // decode 2 chars at a time @@ -499,7 +387,7 @@ macro_rules! impl_Exp { // stores 'e' (or 'E') and the up to 2-digit exponent let mut exp_buf = [MaybeUninit::::uninit(); 3]; - let exp_ptr = MaybeUninit::slice_as_mut_ptr(&mut exp_buf); + let exp_ptr = exp_buf.as_mut_ptr().cast::(); // SAFETY: In either case, `exp_buf` is written within bounds and `exp_ptr[..len]` // is contained within `exp_buf` since `len <= 3`. let exp_slice = unsafe { @@ -589,7 +477,7 @@ impl_Exp!(i128, u128 as u128 via to_u128 named exp_u128); /// Helper function for writing a u64 into `buf` going from last to first, with `curr`. fn parse_u64_into(mut n: u64, buf: &mut [MaybeUninit; N], curr: &mut isize) { - let buf_ptr = MaybeUninit::slice_as_mut_ptr(buf); + let buf_ptr = buf.as_mut_ptr().cast::(); let lut_ptr = DEC_DIGITS_LUT.as_ptr(); assert!(*curr > 19); @@ -716,7 +604,7 @@ fn fmt_u128( // remaining since it has length 39 unsafe { ptr::write_bytes( - MaybeUninit::slice_as_mut_ptr(&mut buf).offset(target), + buf.as_mut_ptr().cast::().offset(target), b'0', (curr - target) as usize, ); @@ -730,7 +618,7 @@ fn fmt_u128( let target = (buf.len() - 38) as isize; // The raw `buf_ptr` pointer is only valid until `buf` is used the next time, // buf `buf` is not used in this scope so we are good. - let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf); + let buf_ptr = buf.as_mut_ptr().cast::(); // SAFETY: At this point we wrote at most 38 bytes, pad up to that point, // There can only be at most 1 digit remaining. unsafe { @@ -745,7 +633,7 @@ fn fmt_u128( // UTF-8 since `DEC_DIGITS_LUT` is let buf_slice = unsafe { str::from_utf8_unchecked(slice::from_raw_parts( - MaybeUninit::slice_as_mut_ptr(&mut buf).offset(curr), + buf.as_mut_ptr().cast::().offset(curr), buf.len() - curr as usize, )) };