From 2dc937bba86755ce2aee05306ac3144198ee3cba Mon Sep 17 00:00:00 2001 From: Nicolas Musset Date: Thu, 19 Aug 2021 13:39:25 +0900 Subject: [PATCH 1/4] Refactor using a buffer for reading and a buffer for writing --- Cargo.lock | 18 +++++++++++++++++ Cargo.toml | 1 + src/lib.rs | 58 +++++++++++++++++++++++++++++++++++++---------------- src/main.rs | 36 +++++++++++++++++---------------- 4 files changed, 79 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d728d15..6861b53 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "ansi_term" version = "0.11.0" @@ -9,6 +11,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + [[package]] name = "atty" version = "0.2.14" @@ -242,6 +250,7 @@ version = "1.0.1" dependencies = [ "clap", "criterion", + "utf8-chars", ] [[package]] @@ -519,6 +528,15 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" +[[package]] +name = "utf8-chars" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1348d8face79d019be7cbc0198e36bf93e160ddbfaa7bb54c9592627b9ec841" +dependencies = [ + "arrayvec", +] + [[package]] name = "vec_map" version = "0.8.2" diff --git a/Cargo.toml b/Cargo.toml index 9247338..11408e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ categories = ["command-line-utilities"] [dependencies] clap = { version= "2.33.3", optional = true } +utf8-chars = "1.0.0" [dev-dependencies] criterion = "0.3" diff --git a/src/lib.rs b/src/lib.rs index 13f0612..9d9de6d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,10 @@ //! //! It does not do anything more than that, which makes it so fast. +use std::io::{BufReader, BufWriter, Write}; +use utf8_chars::BufReadCharsExt; +use std::error::Error; + /// /// Set the indentation used for the formatting. /// @@ -24,16 +28,32 @@ pub enum Indentation<'a> { /// The default indentation is faster than a custom one /// pub fn format_json(json: &str, indentation: Indentation) -> String { - // at least as big as the input to avoid resizing - // this might be too big if the input string is formatted in a weird way, but that's not expected, and it will still be efficient - let mut out = String::with_capacity(json.len()); + let mut reader = BufReader::new(json.as_bytes()); + let mut writer = BufWriter::new(Vec::new()); + + format_json_buffered(&mut reader, &mut writer, indentation).unwrap(); + String::from_utf8(writer.into_inner().unwrap()).unwrap() +} + +/// +/// # Formats a json string +/// +/// The indentation can be set to any value using [Indentation](jsonformat::Indentation) +/// The default value is two spaces +/// The default indentation is faster than a custom one +/// +pub fn format_json_buffered(reader: &mut BufReader, writer: &mut BufWriter, indentation: Indentation) -> Result<(), Box> +where + R: std::io::Read, + W: std::io::Write { let mut escaped = false; let mut in_string = false; let mut indent_level = 0usize; let mut newline_requested = false; // invalidated if next character is ] or } - for char in json.chars() { + for char in reader.chars() { + let char = char?; if in_string { let mut escape_here = false; match char { @@ -49,7 +69,7 @@ pub fn format_json(json: &str, indentation: Indentation) -> String { } _ => {} } - out.push(char); + writer.write_all(char.encode_utf8(&mut [0; 4]).as_bytes())?; escaped = escape_here; } else { let mut auto_push = true; @@ -71,14 +91,14 @@ pub fn format_json(json: &str, indentation: Indentation) -> String { indent_level = indent_level.saturating_sub(1); if !newline_requested { // see comment below about newline_requested - out.push('\n'); - indent(&mut out, indent_level, indentation); + writeln!(writer)?; + indent_buffered(writer, indent_level, indentation)?; } } ':' => { auto_push = false; - out.push(char); - out.push(' '); + writer.write_all(char.encode_utf8(&mut [0; 4]).as_bytes())?; + writer.write_all(" ".as_bytes())?; } ',' => { request_newline = true; @@ -89,33 +109,37 @@ pub fn format_json(json: &str, indentation: Indentation) -> String { // newline only happens after { [ and , // this means we can safely assume that it being followed up by } or ] // means an empty object/array - out.push('\n'); - indent(&mut out, old_level, indentation); + writeln!(writer)?; + indent_buffered(writer, old_level, indentation)?; } if auto_push { - out.push(char); + writer.write_all(char.encode_utf8(&mut [0; 4]).as_bytes())?; } newline_requested = request_newline; } } - out + Ok(()) } -fn indent(buf: &mut String, level: usize, indent_str: Indentation) { +fn indent_buffered(writer: &mut BufWriter, level: usize, indent_str: Indentation) -> Result<(), Box> +where + W: std::io::Write { for _ in 0..level { match indent_str { Indentation::Default => { - buf.push(' '); - buf.push(' '); + writer.write_all(" ".as_bytes())?; + writer.write_all(" ".as_bytes())?; } Indentation::Custom(indent) => { - buf.push_str(indent); + writer.write_all(indent.as_bytes())?; } } } + + Ok(()) } #[cfg(test)] diff --git a/src/main.rs b/src/main.rs index 238950f..16ce1b5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,10 @@ use clap::clap_app; -use jsonformat::{format_json, Indentation}; -use std::fs; -use std::io; -use std::io::Read; +use jsonformat::{Indentation, format_json_buffered}; +use std::error::Error; +use std::fs::File; +use std::io::{BufReader, BufWriter, Read, Write}; -fn main() -> Result<(), io::Error> { +fn main() -> Result<(), Box> { let matches = clap_app!(jsonformat => (version: "1.1") (author: "nilstrieb ") @@ -16,13 +16,13 @@ fn main() -> Result<(), io::Error> { ) .get_matches(); - let str = match matches.value_of("input") { - Some(path) => fs::read_to_string(path)?, + let reader: Box = match matches.value_of("input") { + Some(path) => { + let f = File::open(path)?; + Box::new(BufReader::new(f)) + }, None => { - let mut buf = String::new(); - let stdin = std::io::stdin(); - stdin.lock().read_to_string(&mut buf)?; - buf + Box::new(std::io::stdin()) } }; @@ -41,8 +41,6 @@ fn main() -> Result<(), io::Error> { None => Indentation::Default, }; - let formatted = format_json(&str, indent); - let mut output = matches.value_of("output"); let mut windows_output_default_file: Option = None; @@ -57,12 +55,16 @@ fn main() -> Result<(), io::Error> { output = windows_output_default_file.as_deref().or(output); - match output { + let writer : Box = match output { Some(file) => { - fs::write(file, formatted)?; + Box::new(File::create(file)?) } - None => println!("{}", formatted), - } + None => Box::new(std::io::stdout()), + }; + + let mut reader = BufReader::new(reader); + let mut writer = BufWriter::new(writer); + format_json_buffered(&mut reader, &mut writer, indent)?; Ok(()) } From 9e81e67551702e4ada41d3d0826d9f04eb3af38b Mon Sep 17 00:00:00 2001 From: Nicolas Musset Date: Sat, 21 Aug 2021 22:20:17 +0900 Subject: [PATCH 2/4] Format code with rustfmt --- src/lib.rs | 25 +++++++++++++++++-------- src/main.rs | 17 +++++------------ 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9d9de6d..b2a75a1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,9 +3,9 @@ //! //! It does not do anything more than that, which makes it so fast. -use std::io::{BufReader, BufWriter, Write}; -use utf8_chars::BufReadCharsExt; use std::error::Error; +use std::io::{BufReader, BufWriter, Read, Write}; +use utf8_chars::BufReadCharsExt; /// /// Set the indentation used for the formatting. @@ -42,11 +42,15 @@ pub fn format_json(json: &str, indentation: Indentation) -> String { /// The default value is two spaces /// The default indentation is faster than a custom one /// -pub fn format_json_buffered(reader: &mut BufReader, writer: &mut BufWriter, indentation: Indentation) -> Result<(), Box> +pub fn format_json_buffered( + reader: &mut BufReader, + writer: &mut BufWriter, + indentation: Indentation, +) -> Result<(), Box> where - R: std::io::Read, - W: std::io::Write { - + R: Read, + W: Write, +{ let mut escaped = false; let mut in_string = false; let mut indent_level = 0usize; @@ -124,9 +128,14 @@ where Ok(()) } -fn indent_buffered(writer: &mut BufWriter, level: usize, indent_str: Indentation) -> Result<(), Box> +fn indent_buffered( + writer: &mut BufWriter, + level: usize, + indent_str: Indentation, +) -> Result<(), Box> where - W: std::io::Write { + W: std::io::Write, +{ for _ in 0..level { match indent_str { Indentation::Default => { diff --git a/src/main.rs b/src/main.rs index 16ce1b5..788f17c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,5 @@ use clap::clap_app; -use jsonformat::{Indentation, format_json_buffered}; +use jsonformat::{format_json_buffered, Indentation}; use std::error::Error; use std::fs::File; use std::io::{BufReader, BufWriter, Read, Write}; @@ -17,13 +17,8 @@ fn main() -> Result<(), Box> { .get_matches(); let reader: Box = match matches.value_of("input") { - Some(path) => { - let f = File::open(path)?; - Box::new(BufReader::new(f)) - }, - None => { - Box::new(std::io::stdin()) - } + Some(path) => Box::new(File::open(path)?), + None => Box::new(std::io::stdin()), }; let replaced_indent = matches.value_of("indentation").map(|value| { @@ -55,10 +50,8 @@ fn main() -> Result<(), Box> { output = windows_output_default_file.as_deref().or(output); - let writer : Box = match output { - Some(file) => { - Box::new(File::create(file)?) - } + let writer: Box = match output { + Some(file) => Box::new(File::create(file)?), None => Box::new(std::io::stdout()), }; From 2a87a54b8a6122c819c3959569d8b4eec18a9f9a Mon Sep 17 00:00:00 2001 From: Nicolas Musset Date: Sun, 22 Aug 2021 10:55:24 +0900 Subject: [PATCH 3/4] No need to use crate utf8-chars, we can read/write at the byte level --- Cargo.lock | 16 ---------------- Cargo.toml | 1 - src/lib.rs | 38 ++++++++++++++++++-------------------- 3 files changed, 18 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6861b53..8c8ebea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,12 +11,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "arrayvec" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" - [[package]] name = "atty" version = "0.2.14" @@ -250,7 +244,6 @@ version = "1.0.1" dependencies = [ "clap", "criterion", - "utf8-chars", ] [[package]] @@ -528,15 +521,6 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" -[[package]] -name = "utf8-chars" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1348d8face79d019be7cbc0198e36bf93e160ddbfaa7bb54c9592627b9ec841" -dependencies = [ - "arrayvec", -] - [[package]] name = "vec_map" version = "0.8.2" diff --git a/Cargo.toml b/Cargo.toml index 11408e8..9247338 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,6 @@ categories = ["command-line-utilities"] [dependencies] clap = { version= "2.33.3", optional = true } -utf8-chars = "1.0.0" [dev-dependencies] criterion = "0.3" diff --git a/src/lib.rs b/src/lib.rs index b2a75a1..d30e9bc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,6 @@ use std::error::Error; use std::io::{BufReader, BufWriter, Read, Write}; -use utf8_chars::BufReadCharsExt; /// /// Set the indentation used for the formatting. @@ -56,24 +55,24 @@ where let mut indent_level = 0usize; let mut newline_requested = false; // invalidated if next character is ] or } - for char in reader.chars() { + for char in reader.bytes() { let char = char?; if in_string { let mut escape_here = false; match char { - '"' => { + b'"' => { if !escaped { in_string = false; } } - '\\' => { + b'\\' => { if !escaped { escape_here = true; } } _ => {} } - writer.write_all(char.encode_utf8(&mut [0; 4]).as_bytes())?; + writer.write_all(&[char])?; escaped = escape_here; } else { let mut auto_push = true; @@ -81,44 +80,44 @@ where let old_level = indent_level; match char { - '"' => in_string = true, - ' ' | '\n' | '\t' => continue, - '[' => { + b'"' => in_string = true, + b' ' | b'\n' | b'\t' => continue, + b'[' => { indent_level += 1; request_newline = true; } - '{' => { + b'{' => { indent_level += 1; request_newline = true; } - '}' | ']' => { + b'}' | b']' => { indent_level = indent_level.saturating_sub(1); if !newline_requested { // see comment below about newline_requested - writeln!(writer)?; + writer.write_all(&[b'\n'])?; indent_buffered(writer, indent_level, indentation)?; } } - ':' => { + b':' => { auto_push = false; - writer.write_all(char.encode_utf8(&mut [0; 4]).as_bytes())?; - writer.write_all(" ".as_bytes())?; + writer.write_all(&[char])?; + writer.write_all(&[b' '])?; } - ',' => { + b',' => { request_newline = true; } _ => {} } - if newline_requested && char != ']' && char != '}' { + if newline_requested && char != b']' && char != b'}' { // newline only happens after { [ and , // this means we can safely assume that it being followed up by } or ] // means an empty object/array - writeln!(writer)?; + writer.write_all(&[b'\n'])?; indent_buffered(writer, old_level, indentation)?; } if auto_push { - writer.write_all(char.encode_utf8(&mut [0; 4]).as_bytes())?; + writer.write_all(&[char])?; } newline_requested = request_newline; @@ -139,8 +138,7 @@ where for _ in 0..level { match indent_str { Indentation::Default => { - writer.write_all(" ".as_bytes())?; - writer.write_all(" ".as_bytes())?; + writer.write_all(b" ")?; } Indentation::Custom(indent) => { writer.write_all(indent.as_bytes())?; From 0d24310cbf8c7c82412d180faaa9a3e80d39f424 Mon Sep 17 00:00:00 2001 From: Nicolas Musset Date: Mon, 23 Aug 2021 14:56:57 +0900 Subject: [PATCH 4/4] On-stack dynamic dispatch to avoid boxing --- src/main.rs | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/main.rs b/src/main.rs index 788f17c..778395c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,9 +16,17 @@ fn main() -> Result<(), Box> { ) .get_matches(); - let reader: Box = match matches.value_of("input") { - Some(path) => Box::new(File::open(path)?), - None => Box::new(std::io::stdin()), + // Note: on-stack dynamic dispatch + let (mut file, mut stdin); + let reader: &mut dyn Read = match matches.value_of("input") { + Some(path) => { + file = File::open(path)?; + &mut file + } + None => { + stdin = std::io::stdin(); + &mut stdin + } }; let replaced_indent = matches.value_of("indentation").map(|value| { @@ -50,9 +58,17 @@ fn main() -> Result<(), Box> { output = windows_output_default_file.as_deref().or(output); - let writer: Box = match output { - Some(file) => Box::new(File::create(file)?), - None => Box::new(std::io::stdout()), + // Note: on-stack dynamic dispatch + let (mut file, mut stdout); + let writer: &mut dyn Write = match output { + Some(filename) => { + file = File::create(filename)?; + &mut file + }, + None => { + stdout = std::io::stdout(); + &mut stdout + }, }; let mut reader = BufReader::new(reader);