This commit is contained in:
nora 2022-04-28 20:35:33 +02:00
parent 61e1e5d6fb
commit 28c691da5b
10 changed files with 11429 additions and 66 deletions

6
.gitignore vendored
View file

@ -1,9 +1,3 @@
/target /target
.idea .idea
*.iml *.iml
# test data
*.json
# local install script
install.sh

3
.rustfmt.toml Normal file
View file

@ -0,0 +1,3 @@
imports_granularity = "Crate"
newline_style = "Unix"
group_imports = "StdExternalCrate"

2
Cargo.lock generated
View file

@ -238,7 +238,7 @@ dependencies = [
[[package]] [[package]]
name = "jsonformat" name = "jsonformat"
version = "1.2.0" version = "2.0.0"
dependencies = [ dependencies = [
"criterion", "criterion",
] ]

View file

@ -3,7 +3,7 @@ members = [".", "jsonformat-cli"]
[package] [package]
name = "jsonformat" name = "jsonformat"
version = "1.2.0" version = "2.0.0"
authors = ["Nilstrieb <nilstrieb@gmail.com>"] authors = ["Nilstrieb <nilstrieb@gmail.com>"]
edition = "2021" edition = "2021"
license = "MIT" license = "MIT"

View file

@ -2,11 +2,13 @@
`jsonformat` is an extremely fast JSON formatter. `jsonformat` is an extremely fast JSON formatter.
It formats over 60MB of nested JSON in under 0.4s. It formats over 20MB of nested JSON in 60ms.
For the library, look at [docs.rs](https://docs.rs/jsonformat)
## Install ## Install
You need Rust installed on your system You need Rust installed on your system
`cargo install jsonformat` `cargo install jsonformat-cli`
## Usage ## Usage
``` ```
@ -28,13 +30,14 @@ OPTIONS:
Reads from stdin if no file is supplied. Reads from stdin if no file is supplied.
Outputs to stdout if no output file is specified. Outputs to stdout if no output file is specified.
On windows, it writes to a file called `<filename>_f.json`, unless the `--stdout` flag is used or a custom output file is provided. This it to enable drag-and-drop in windows explorer. On Windows, it writes to a file called `<filename>_f.json`, unless the `--stdout` flag is used or a custom output
file is provided. This it to enable drag-and-drop in Windows explorer.
## Error handling ## Error handling
`jsonformat` does not report malformed json - it can't even fully know whether the json is actually malformed. Malformed json is just formatted kind of incorrectly, with no data lost and no crashes. If you find one, open an issue, `jsonformat` does not report malformed json - it can't even fully know whether the json is actually malformed.
Malformed json is just formatted kind of incorrectly, with no data lost and no crashes. If you find one, open an issue,
## How? ## How?
`jsonformat` does not actually parse the json, it just loops through each character and keeps track of some flags. It then copies these characters to the output buffer, adding and removing whitespace. `jsonformat` does not actually parse the json, it just loops through each character and keeps track of some flags.
It then copies these characters to the output buffer, adding and removing whitespace.
The code is currently a bit chaotic, but it works and is fast, so good enough for now. Maybe it could profit from SIMD in the future, but I have never used it and I don't know whether it would work. Maybe some day...

View file

@ -1,17 +1,32 @@
use criterion::{criterion_group, criterion_main, Criterion};
use jsonformat::{format_json, Indentation};
use std::{fs, io}; use std::{fs, io};
/// You need a json file called massive.json in your project root use criterion::{black_box, criterion_group, criterion_main, Criterion};
fn format_massive_json(file: &str) -> io::Result<String> { use jsonformat::{format, format_reader_writer, Indentation};
Ok(format_json(&file, Indentation::Default))
}
fn criterion_benchmark(c: &mut Criterion) { fn criterion_benchmark(c: &mut Criterion) {
let file = fs::read_to_string("massive.json").expect("massive.json file in project directory"); let file = include_str!("large-file.json");
c.bench_function("Format massive json", |b| { c.bench_function("Format json default settings", |b| {
b.iter(|| format_massive_json(&file)) b.iter(|| {
let json = format(&file, Indentation::Default);
black_box(json);
})
});
c.bench_function("Format json custom indentation", |b| {
b.iter(|| {
let json = format(&file, Indentation::Custom("123456"));
black_box(json);
})
});
c.bench_function("Format json no utf8 validation", |b| {
b.iter(|| {
let mut writer = Vec::with_capacity(file.len() * 2);
format_reader_writer(file.as_bytes(), &mut writer, Indentation::Default).unwrap();
black_box(writer);
})
}); });
} }

11352
benches/large-file.json Normal file

File diff suppressed because one or more lines are too long

View file

@ -15,7 +15,7 @@ categories = ["command-line-utilities"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
jsonformat = { path = "..", version = "1.2.0" } jsonformat = { path = "..", version = "2.0.0" }
clap = "2.33.3" clap = "2.33.3"
[[bin]] [[bin]]

View file

@ -1,8 +1,11 @@
use std::{
error::Error,
fs::File,
io::{BufReader, BufWriter, Read, Write},
};
use clap::clap_app; use clap::clap_app;
use jsonformat::{format_json_buffered, Indentation}; use jsonformat::{format_reader_writer, Indentation};
use std::error::Error;
use std::fs::File;
use std::io::{BufReader, BufWriter, Read, Write};
fn main() -> Result<(), Box<dyn Error>> { fn main() -> Result<(), Box<dyn Error>> {
let matches = clap_app!(jsonformat => let matches = clap_app!(jsonformat =>
@ -64,16 +67,16 @@ fn main() -> Result<(), Box<dyn Error>> {
Some(filename) => { Some(filename) => {
file = File::create(filename)?; file = File::create(filename)?;
&mut file &mut file
}, }
None => { None => {
stdout = std::io::stdout(); stdout = std::io::stdout();
&mut stdout &mut stdout
}, }
}; };
let mut reader = BufReader::new(reader); let mut reader = BufReader::new(reader);
let mut writer = BufWriter::new(writer); let mut writer = BufWriter::new(writer);
format_json_buffered(&mut reader, &mut writer, indent)?; format_reader_writer(&mut reader, &mut writer, indent)?;
Ok(()) Ok(())
} }

View file

@ -3,10 +3,11 @@
//! //!
//! It does not do anything more than that, which makes it so fast. //! It does not do anything more than that, which makes it so fast.
use std::error::Error; use std::{
use std::io::{BufReader, BufWriter, Read, Write}; io,
io::{Read, Write},
};
///
/// Set the indentation used for the formatting. /// Set the indentation used for the formatting.
/// ///
/// Note: It is *not* recommended to set indentation to anything oder than some spaces or some tabs, /// Note: It is *not* recommended to set indentation to anything oder than some spaces or some tabs,
@ -19,33 +20,29 @@ pub enum Indentation<'a> {
Custom(&'a str), Custom(&'a str),
} }
///
/// # Formats a json string /// # Formats a json string
/// ///
/// The indentation can be set to any value using [Indentation](jsonformat::Indentation) /// The indentation can be set to any value using [`Indentation`]
/// The default value is two spaces /// The default value is two spaces
/// The default indentation is faster than a custom one /// The default indentation is faster than a custom one
/// pub fn format(json: &str, indentation: Indentation) -> String {
pub fn format_json(json: &str, indentation: Indentation) -> String { let mut reader = json.as_bytes();
let mut reader = BufReader::new(json.as_bytes()); let mut writer = Vec::with_capacity(json.len());
let mut writer = BufWriter::new(Vec::new());
format_json_buffered(&mut reader, &mut writer, indentation).unwrap(); format_reader_writer(&mut reader, &mut writer, indentation).unwrap();
String::from_utf8(writer.into_inner().unwrap()).unwrap() String::from_utf8(writer).unwrap()
} }
///
/// # Formats a json string /// # Formats a json string
/// ///
/// The indentation can be set to any value using [Indentation](jsonformat::Indentation) /// The indentation can be set to any value using [`Indentation`]
/// The default value is two spaces /// The default value is two spaces
/// The default indentation is faster than a custom one /// The default indentation is faster than a custom one
/// pub fn format_reader_writer<R, W>(
pub fn format_json_buffered<R, W>( reader: R,
reader: &mut BufReader<R>, mut writer: W,
writer: &mut BufWriter<W>,
indentation: Indentation, indentation: Indentation,
) -> Result<(), Box<dyn Error>> ) -> io::Result<()>
where where
R: Read, R: Read,
W: Write, W: Write,
@ -95,7 +92,7 @@ where
if !newline_requested { if !newline_requested {
// see comment below about newline_requested // see comment below about newline_requested
writer.write_all(&[b'\n'])?; writer.write_all(&[b'\n'])?;
indent_buffered(writer, indent_level, indentation)?; indent(&mut writer, indent_level, indentation)?;
} }
} }
b':' => { b':' => {
@ -113,7 +110,7 @@ where
// this means we can safely assume that it being followed up by } or ] // this means we can safely assume that it being followed up by } or ]
// means an empty object/array // means an empty object/array
writer.write_all(&[b'\n'])?; writer.write_all(&[b'\n'])?;
indent_buffered(writer, old_level, indentation)?; indent(&mut writer, old_level, indentation)?;
} }
if auto_push { if auto_push {
@ -127,13 +124,9 @@ where
Ok(()) Ok(())
} }
fn indent_buffered<W>( fn indent<W>(writer: &mut W, level: usize, indent_str: Indentation) -> io::Result<()>
writer: &mut BufWriter<W>,
level: usize,
indent_str: Indentation,
) -> Result<(), Box<dyn Error>>
where where
W: std::io::Write, W: Write,
{ {
for _ in 0..level { for _ in 0..level {
match indent_str { match indent_str {
@ -156,27 +149,27 @@ mod test {
#[test] #[test]
fn echoes_primitive() { fn echoes_primitive() {
let json = "1.35"; let json = "1.35";
assert_eq!(json, format_json(json, Indentation::Default)); assert_eq!(json, format(json, Indentation::Default));
} }
#[test] #[test]
fn ignore_whitespace_in_string() { fn ignore_whitespace_in_string() {
let json = "\" hallo \""; let json = "\" hallo \"";
assert_eq!(json, format_json(json, Indentation::Default)); assert_eq!(json, format(json, Indentation::Default));
} }
#[test] #[test]
fn remove_leading_whitespace() { fn remove_leading_whitespace() {
let json = " 0"; let json = " 0";
let expected = "0"; let expected = "0";
assert_eq!(expected, format_json(json, Indentation::Default)); assert_eq!(expected, format(json, Indentation::Default));
} }
#[test] #[test]
fn handle_escaped_strings() { fn handle_escaped_strings() {
let json = " \" hallo \\\" \" "; let json = " \" hallo \\\" \" ";
let expected = "\" hallo \\\" \""; let expected = "\" hallo \\\" \"";
assert_eq!(expected, format_json(json, Indentation::Default)); assert_eq!(expected, format(json, Indentation::Default));
} }
#[test] #[test]
@ -185,7 +178,7 @@ mod test {
let expected = "{ let expected = "{
\"a\": 0 \"a\": 0
}"; }";
assert_eq!(expected, format_json(json, Indentation::Default)); assert_eq!(expected, format(json, Indentation::Default));
} }
#[test] #[test]
@ -196,7 +189,7 @@ mod test {
2, 2,
null null
]"; ]";
assert_eq!(expected, format_json(json, Indentation::Default)); assert_eq!(expected, format(json, Indentation::Default));
} }
#[test] #[test]
@ -212,7 +205,7 @@ mod test {
} }
]"; ]";
assert_eq!(expected, format_json(json, Indentation::Default)); assert_eq!(expected, format(json, Indentation::Default));
} }
#[test] #[test]
@ -227,6 +220,6 @@ mod test {
} }
]"; ]";
assert_eq!(expected, format_json(expected, Indentation::Default)); assert_eq!(expected, format(expected, Indentation::Default));
} }
} }