numbers, yee haw

This commit is contained in:
nora 2022-09-13 21:58:30 +02:00
parent 166b495586
commit 81f0b8d9cd
6 changed files with 414 additions and 227 deletions

View file

@ -12,4 +12,4 @@ proc-macro = true
peekmore = "1.0.0"
proc-macro2 = "1.0.43"
quote = "1.0.21"
syn = { version = "1.0.99" }
syn = { version = "1.0.99", features = ["full"] }

View file

@ -161,6 +161,12 @@ mod tests {
let result = format!("test {:?} hello", "uwu");
assert_eq!(result, r#"test "uwu" hello"#);
}
#[test]
fn number() {
let result = format!("a: {}", 32523532u64);
assert_eq!(result, "a: 32523532");
}
}
fn f() {

View file

@ -1,31 +1,96 @@
use crate::Formatter;
#[derive(Debug, Clone, Copy)]
pub enum Alignment {
Left,
Center,
Right,
Unknown,
}
pub trait FmtOpts {
#[doc(hidden)]
type Inner: FmtOpts;
macro_rules! options {
(
$(
fn $name:ident() -> $ret:ty {
$($default:tt)*
}
)*
) => {
pub trait FmtOpts {
#[doc(hidden)]
type Inner: FmtOpts;
$(
#[inline]
fn $name() -> $ret {
Self::Inner::$name()
}
)*
}
impl FmtOpts for () {
type Inner = never::Never;
$(
#[inline]
fn $name() -> $ret {
$($default)*
}
)*
}
impl<W, O: FmtOpts> Formatter<W, O> {
$(
#[inline]
pub fn $name(&self) -> $ret {
O::$name()
}
)*
}
};
}
options!(
fn alternate() -> bool {
Self::Inner::alternate()
false
}
fn width() -> Option<usize> {
Self::Inner::width()
None
}
fn align() -> Option<Alignment> {
Self::Inner::align()
fn align() -> Alignment {
Alignment::Unknown
}
fn fill() -> Option<char> {
Self::Inner::fill()
fn fill() -> char {
' '
}
}
fn sign_plus() -> bool {
false
}
fn sign_aware_zero_pad() -> bool {
false
}
fn sign_minus() -> bool {
false
}
fn precision() -> Option<usize> {
None
}
fn debug_lower_hex() -> bool {
false
}
fn debug_upper_hex() -> bool {
false
}
);
mod never {
use crate::FmtOpts;
@ -45,29 +110,11 @@ mod never {
}
}
impl FmtOpts for () {
type Inner = never::Never;
fn alternate() -> bool {
false
}
fn width() -> Option<usize> {
None
}
fn align() -> Option<Alignment> {
None
}
fn fill() -> Option<char> {
None
}
}
pub struct WithAlternate<I>(pub I);
impl<I: FmtOpts> FmtOpts for WithAlternate<I> {
type Inner = I;
#[inline]
fn alternate() -> bool {
true
}
@ -76,6 +123,7 @@ pub struct WithWidth<I, const A: usize>(pub I);
impl<I: FmtOpts, const A: usize> FmtOpts for WithWidth<I, A> {
type Inner = I;
#[inline]
fn width() -> Option<usize> {
Some(A)
}
@ -84,91 +132,35 @@ pub struct WithLeftAlign<I>(pub I);
impl<I: FmtOpts> FmtOpts for WithLeftAlign<I> {
type Inner = I;
fn align() -> Option<Alignment> {
Some(Alignment::Left)
#[inline]
fn align() -> Alignment {
Alignment::Left
}
}
pub struct WithRightAlign<I>(pub I);
impl<I: FmtOpts> FmtOpts for WithRightAlign<I> {
type Inner = I;
fn align() -> Option<Alignment> {
Some(Alignment::Right)
#[inline]
fn align() -> Alignment {
Alignment::Right
}
}
pub struct WithCenterAlign<I>(pub I);
impl<I: FmtOpts> FmtOpts for WithCenterAlign<I> {
type Inner = I;
fn align() -> Option<Alignment> {
Some(Alignment::Center)
#[inline]
fn align() -> Alignment {
Alignment::Center
}
}
pub struct WithFill<I, const A: char>(pub I);
impl<I: FmtOpts, const A: char> FmtOpts for WithFill<I, A> {
type Inner = I;
fn fill() -> Option<char> {
Some(A)
}
}
impl<W, O: FmtOpts> Formatter<W, O> {
pub fn alternate(&self) -> bool {
O::alternate()
}
pub fn width() -> Option<usize> {
O::width()
}
pub fn align() -> Option<Alignment> {
O::align()
}
pub fn fill() -> Option<char> {
O::fill()
}
pub fn with_alternate(self) -> Formatter<W, WithAlternate<O>> {
Formatter {
buf: self.buf,
opts: WithAlternate(self.opts),
}
}
pub fn with_width<const WIDTH: usize>(self) -> Formatter<W, WithWidth<O, WIDTH>> {
Formatter {
buf: self.buf,
opts: WithWidth(self.opts),
}
}
pub fn with_left_align(self) -> Formatter<W, WithLeftAlign<O>> {
Formatter {
buf: self.buf,
opts: WithLeftAlign(self.opts),
}
}
pub fn with_right_align(self) -> Formatter<W, WithRightAlign<O>> {
Formatter {
buf: self.buf,
opts: WithRightAlign(self.opts),
}
}
pub fn with_center_align(self) -> Formatter<W, WithCenterAlign<O>> {
Formatter {
buf: self.buf,
opts: WithCenterAlign(self.opts),
}
}
pub fn with_fill<const FILL: char>(self) -> Formatter<W, WithFill<O, FILL>> {
Formatter {
buf: self.buf,
opts: WithFill(self.opts),
}
#[inline]
fn fill() -> char {
A
}
}

View file

@ -1,7 +0,0 @@
use crate::{FmtOpts, Formatter, Result, Write};
impl<W: Write, O: FmtOpts> Formatter<W, O> {
pub fn pad_integral(&mut self, is_nonnegative: bool, prefix: &str, buf: &str) -> Result {
todo!()
}
}

View file

@ -1,4 +1,312 @@
//! Copied modified stuff from core
mod formatter;
mod num;
use crate::{opts::Alignment, Error, FmtOpts, Formatter, Result, Write};
mod numfmt {
//! Shared utilities used by both float and integer formatting.
/// Formatted parts.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub enum Part<'a> {
/// Given number of zero digits.
Zero(usize),
/// A literal number up to 5 digits.
Num(u16),
/// A verbatim copy of given bytes.
Copy(&'a [u8]),
}
impl<'a> Part<'a> {
/// Returns the exact byte length of given part.
pub fn len(&self) -> usize {
match *self {
Part::Zero(nzeroes) => nzeroes,
Part::Num(v) => {
if v < 1_000 {
if v < 10 {
1
} else if v < 100 {
2
} else {
3
}
} else {
if v < 10_000 {
4
} else {
5
}
}
}
Part::Copy(buf) => buf.len(),
}
}
/// Writes a part into the supplied buffer.
/// Returns the number of written bytes, or `None` if the buffer is not enough.
/// (It may still leave partially written bytes in the buffer; do not rely on that.)
pub fn write(&self, out: &mut [u8]) -> Option<usize> {
let len = self.len();
if out.len() >= len {
match *self {
Part::Zero(nzeroes) => {
for c in &mut out[..nzeroes] {
*c = b'0';
}
}
Part::Num(mut v) => {
for c in out[..len].iter_mut().rev() {
*c = b'0' + (v % 10) as u8;
v /= 10;
}
}
Part::Copy(buf) => {
out[..buf.len()].copy_from_slice(buf);
}
}
Some(len)
} else {
None
}
}
}
/// Formatted result containing one or more parts.
/// This can be written to the byte buffer or converted to the allocated string.
#[allow(missing_debug_implementations)]
#[derive(Clone)]
pub struct Formatted<'a> {
/// A byte slice representing a sign, either `""`, `"-"` or `"+"`.
pub sign: &'static str,
/// Formatted parts to be rendered after a sign and optional zero padding.
pub parts: &'a [Part<'a>],
}
impl<'a> Formatted<'a> {
/// Returns the exact byte length of combined formatted result.
pub fn len(&self) -> usize {
let mut len = self.sign.len();
for part in self.parts {
len += part.len();
}
len
}
/// Writes all formatted parts into the supplied buffer.
/// Returns the number of written bytes, or `None` if the buffer is not enough.
/// (It may still leave partially written bytes in the buffer; do not rely on that.)
pub fn write(&self, out: &mut [u8]) -> Option<usize> {
if out.len() < self.sign.len() {
return None;
}
out[..self.sign.len()].copy_from_slice(self.sign.as_bytes());
let mut written = self.sign.len();
for part in self.parts {
let len = part.write(&mut out[written..])?;
written += len;
}
Some(written)
}
}
}
/// Padding after the end of something. Returned by `Formatter::padding`.
#[must_use = "don't forget to write the post padding"]
pub(crate) struct PostPadding {
fill: char,
padding: usize,
}
impl PostPadding {
fn new(fill: char, padding: usize) -> PostPadding {
PostPadding { fill, padding }
}
/// Write this post padding.
pub(crate) fn write<W: Write, O>(self, f: &mut Formatter<W, O>) -> Result {
for _ in 0..self.padding {
f.buf.write_char(self.fill)?;
}
Ok(())
}
}
impl<W: Write, O: FmtOpts> Formatter<W, O> {
pub fn pad_integral(&mut self, is_nonnegative: bool, prefix: &str, buf: &str) -> Result {
let mut width = buf.len();
let mut sign = None;
if !is_nonnegative {
sign = Some('-');
width += 1;
} else if self.sign_plus() {
sign = Some('+');
width += 1;
}
let prefix = if self.alternate() {
width += prefix.chars().count();
Some(prefix)
} else {
None
};
// Writes the sign if it exists, and then the prefix if it was requested
#[inline(never)]
fn write_prefix<W: Write, O>(f: &mut Formatter<W, O>, sign: Option<char>, prefix: Option<&str>) -> Result {
if let Some(c) = sign {
f.buf.write_char(c)?;
}
if let Some(prefix) = prefix {
f.buf.write_str(prefix)
} else {
Ok(())
}
}
// The `width` field is more of a `min-width` parameter at this point.
match self.width() {
// If there's no minimum length requirements then we can just
// write the bytes.
None => {
write_prefix(self, sign, prefix)?;
self.buf.write_str(buf)
}
// Check if we're over the minimum width, if so then we can also
// just write the bytes.
Some(min) if width >= min => {
write_prefix(self, sign, prefix)?;
self.buf.write_str(buf)
}
// The sign and prefix goes before the padding if the fill character
// is zero
Some(min) if self.sign_aware_zero_pad() => {
write_prefix(self, sign, prefix)?;
let post_padding = self.padding(min - width, Alignment::Right, '0', Alignment::Right)?;
self.buf.write_str(buf)?;
post_padding.write(self)?;
Ok(())
}
// Otherwise, the sign and prefix goes after the padding
Some(min) => {
let post_padding = self.padding(min - width, Alignment::Right, self.fill(), self.align())?;
write_prefix(self, sign, prefix)?;
self.buf.write_str(buf)?;
post_padding.write(self)
}
}
}
fn pad_formatted_parts(&mut self, formatted: &numfmt::Formatted<'_>) -> Result {
// WARN(mono-fmt): This was changed heavily, there might be a bug here
if let Some(mut width) = self.width() {
// for the sign-aware zero padding, we render the sign first and
// behave as if we had no sign from the beginning.
let mut formatted = formatted.clone();
let mut the_fill = self.fill();
let mut the_align = self.align();
if self.sign_aware_zero_pad() {
// a sign always goes first
let sign = formatted.sign;
self.buf.write_str(sign)?;
// remove the sign from the formatted parts
formatted.sign = "";
width = width.saturating_sub(sign.len());
the_fill = '0';
the_align = Alignment::Right;
}
// remaining parts go through the ordinary padding process.
let len = formatted.len();
let ret = if width <= len {
// no padding
self.write_formatted_parts(&formatted)
} else {
let post_padding = self.padding(width - len, the_align, the_fill, the_align)?;
self.write_formatted_parts(&formatted)?;
post_padding.write(self)
};
ret
} else {
// this is the common case and we take a shortcut
self.write_formatted_parts(formatted)
}
}
pub(crate) fn padding(
&mut self,
padding: usize,
default: Alignment,
actual_fill: char,
actual_align: Alignment,
) -> std::result::Result<PostPadding, Error> {
// WARN: We might have `self` in an invalid state, don't touch `self` opts
let align = match actual_align {
Alignment::Unknown => default,
_ => actual_align,
};
let (pre_pad, post_pad) = match align {
Alignment::Left => (0, padding),
Alignment::Right | Alignment::Unknown => (padding, 0),
Alignment::Center => (padding / 2, (padding + 1) / 2),
};
for _ in 0..pre_pad {
self.buf.write_char(actual_fill)?;
}
Ok(PostPadding::new(actual_fill, post_pad))
}
fn write_formatted_parts(&mut self, formatted: &numfmt::Formatted<'_>) -> Result {
fn write_bytes<W: Write>(buf: &mut W, s: &[u8]) -> Result {
// SAFETY: This is used for `numfmt::Part::Num` and `numfmt::Part::Copy`.
// It's safe to use for `numfmt::Part::Num` since every char `c` is between
// `b'0'` and `b'9'`, which means `s` is valid UTF-8.
// It's also probably safe in practice to use for `numfmt::Part::Copy(buf)`
// since `buf` should be plain ASCII, but it's possible for someone to pass
// in a bad value for `buf` into `numfmt::to_shortest_str` since it is a
// public function.
// FIXME: Determine whether this could result in UB.
buf.write_str(unsafe { std::str::from_utf8_unchecked(s) })
}
if !formatted.sign.is_empty() {
self.buf.write_str(formatted.sign)?;
}
for part in formatted.parts {
match *part {
numfmt::Part::Zero(mut nzeroes) => {
const ZEROES: &str = // 64 zeroes
"0000000000000000000000000000000000000000000000000000000000000000";
while nzeroes > ZEROES.len() {
self.buf.write_str(ZEROES)?;
nzeroes -= ZEROES.len();
}
if nzeroes > 0 {
self.buf.write_str(&ZEROES[..nzeroes])?;
}
}
numfmt::Part::Num(mut v) => {
let mut s = [0; 5];
let len = part.len();
for c in s[..len].iter_mut().rev() {
*c = b'0' + (v % 10) as u8;
v /= 10;
}
write_bytes(&mut self.buf, &s[..len])?;
}
numfmt::Part::Copy(buf) => {
write_bytes(&mut self.buf, buf)?;
}
}
}
Ok(())
}
}

View file

@ -6,116 +6,7 @@ use std::{
ptr, slice, str,
};
use crate::{self as fmt, FmtOpts, Write};
mod numfmt {
//! Shared utilities used by both float and integer formatting.
/// Formatted parts.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub enum Part<'a> {
/// Given number of zero digits.
Zero(usize),
/// A literal number up to 5 digits.
Num(u16),
/// A verbatim copy of given bytes.
Copy(&'a [u8]),
}
impl<'a> Part<'a> {
/// Returns the exact byte length of given part.
pub fn len(&self) -> usize {
match *self {
Part::Zero(nzeroes) => nzeroes,
Part::Num(v) => {
if v < 1_000 {
if v < 10 {
1
} else if v < 100 {
2
} else {
3
}
} else {
if v < 10_000 {
4
} else {
5
}
}
}
Part::Copy(buf) => buf.len(),
}
}
/// Writes a part into the supplied buffer.
/// Returns the number of written bytes, or `None` if the buffer is not enough.
/// (It may still leave partially written bytes in the buffer; do not rely on that.)
pub fn write(&self, out: &mut [u8]) -> Option<usize> {
let len = self.len();
if out.len() >= len {
match *self {
Part::Zero(nzeroes) => {
for c in &mut out[..nzeroes] {
*c = b'0';
}
}
Part::Num(mut v) => {
for c in out[..len].iter_mut().rev() {
*c = b'0' + (v % 10) as u8;
v /= 10;
}
}
Part::Copy(buf) => {
out[..buf.len()].copy_from_slice(buf);
}
}
Some(len)
} else {
None
}
}
}
/// Formatted result containing one or more parts.
/// This can be written to the byte buffer or converted to the allocated string.
#[allow(missing_debug_implementations)]
#[derive(Clone)]
pub struct Formatted<'a> {
/// A byte slice representing a sign, either `""`, `"-"` or `"+"`.
pub sign: &'static str,
/// Formatted parts to be rendered after a sign and optional zero padding.
pub parts: &'a [Part<'a>],
}
impl<'a> Formatted<'a> {
/// Returns the exact byte length of combined formatted result.
pub fn len(&self) -> usize {
let mut len = self.sign.len();
for part in self.parts {
len += part.len();
}
len
}
/// Writes all formatted parts into the supplied buffer.
/// Returns the number of written bytes, or `None` if the buffer is not enough.
/// (It may still leave partially written bytes in the buffer; do not rely on that.)
pub fn write(&self, out: &mut [u8]) -> Option<usize> {
if out.len() < self.sign.len() {
return None;
}
out[..self.sign.len()].copy_from_slice(self.sign.as_bytes());
let mut written = self.sign.len();
for part in self.parts {
let len = part.write(&mut out[written..])?;
written += len;
}
Some(written)
}
}
}
use crate::{self as fmt, std_impl::numfmt, FmtOpts, Write};
#[doc(hidden)]
trait DisplayInt:
@ -215,10 +106,7 @@ trait GenericRadix: Sized {
// SAFETY: The only chars in `buf` are created by `Self::digit` which are assumed to be
// valid UTF-8
let buf = unsafe {
str::from_utf8_unchecked(slice::from_raw_parts(
MaybeUninit::slice_as_ptr(buf),
buf.len(),
))
str::from_utf8_unchecked(slice::from_raw_parts(buf.as_ptr().cast(), buf.len()))
};
f.pad_integral(is_nonnegative, Self::PREFIX, buf)
}
@ -323,7 +211,7 @@ macro_rules! impl_Display {
// 2^128 is about 3*10^38, so 39 gives an extra byte of space
let mut buf = [MaybeUninit::<u8>::uninit(); 39];
let mut curr = buf.len() as isize;
let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf);
let buf_ptr = buf.as_mut_ptr().cast::<u8>();
let lut_ptr = DEC_DIGITS_LUT.as_ptr();
// SAFETY: Since `d1` and `d2` are always less than or equal to `198`, we
@ -451,7 +339,7 @@ macro_rules! impl_Exp {
// that `curr >= 0`.
let mut buf = [MaybeUninit::<u8>::uninit(); 40];
let mut curr = buf.len() as isize; //index for buf
let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf);
let buf_ptr = buf.as_mut_ptr().cast::<u8>();
let lut_ptr = DEC_DIGITS_LUT.as_ptr();
// decode 2 chars at a time
@ -499,7 +387,7 @@ macro_rules! impl_Exp {
// stores 'e' (or 'E') and the up to 2-digit exponent
let mut exp_buf = [MaybeUninit::<u8>::uninit(); 3];
let exp_ptr = MaybeUninit::slice_as_mut_ptr(&mut exp_buf);
let exp_ptr = exp_buf.as_mut_ptr().cast::<u8>();
// SAFETY: In either case, `exp_buf` is written within bounds and `exp_ptr[..len]`
// is contained within `exp_buf` since `len <= 3`.
let exp_slice = unsafe {
@ -589,7 +477,7 @@ impl_Exp!(i128, u128 as u128 via to_u128 named exp_u128);
/// Helper function for writing a u64 into `buf` going from last to first, with `curr`.
fn parse_u64_into<const N: usize>(mut n: u64, buf: &mut [MaybeUninit<u8>; N], curr: &mut isize) {
let buf_ptr = MaybeUninit::slice_as_mut_ptr(buf);
let buf_ptr = buf.as_mut_ptr().cast::<u8>();
let lut_ptr = DEC_DIGITS_LUT.as_ptr();
assert!(*curr > 19);
@ -716,7 +604,7 @@ fn fmt_u128<W: Write, O: FmtOpts>(
// remaining since it has length 39
unsafe {
ptr::write_bytes(
MaybeUninit::slice_as_mut_ptr(&mut buf).offset(target),
buf.as_mut_ptr().cast::<u8>().offset(target),
b'0',
(curr - target) as usize,
);
@ -730,7 +618,7 @@ fn fmt_u128<W: Write, O: FmtOpts>(
let target = (buf.len() - 38) as isize;
// The raw `buf_ptr` pointer is only valid until `buf` is used the next time,
// buf `buf` is not used in this scope so we are good.
let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf);
let buf_ptr = buf.as_mut_ptr().cast::<u8>();
// SAFETY: At this point we wrote at most 38 bytes, pad up to that point,
// There can only be at most 1 digit remaining.
unsafe {
@ -745,7 +633,7 @@ fn fmt_u128<W: Write, O: FmtOpts>(
// UTF-8 since `DEC_DIGITS_LUT` is
let buf_slice = unsafe {
str::from_utf8_unchecked(slice::from_raw_parts(
MaybeUninit::slice_as_mut_ptr(&mut buf).offset(curr),
buf.as_mut_ptr().cast::<u8>().offset(curr),
buf.len() - curr as usize,
))
};