This commit is contained in:
nora 2022-04-28 20:35:33 +02:00
parent 61e1e5d6fb
commit 28c691da5b
10 changed files with 11429 additions and 66 deletions

6
.gitignore vendored
View file

@ -1,9 +1,3 @@
/target
.idea
*.iml
# test data
*.json
# local install script
install.sh

3
.rustfmt.toml Normal file
View file

@ -0,0 +1,3 @@
imports_granularity = "Crate"
newline_style = "Unix"
group_imports = "StdExternalCrate"

2
Cargo.lock generated
View file

@ -238,7 +238,7 @@ dependencies = [
[[package]]
name = "jsonformat"
version = "1.2.0"
version = "2.0.0"
dependencies = [
"criterion",
]

View file

@ -3,7 +3,7 @@ members = [".", "jsonformat-cli"]
[package]
name = "jsonformat"
version = "1.2.0"
version = "2.0.0"
authors = ["Nilstrieb <nilstrieb@gmail.com>"]
edition = "2021"
license = "MIT"

View file

@ -2,11 +2,13 @@
`jsonformat` is an extremely fast JSON formatter.
It formats over 60MB of nested JSON in under 0.4s.
It formats over 20MB of nested JSON in 60ms.
For the library, look at [docs.rs](https://docs.rs/jsonformat)
## Install
You need Rust installed on your system
`cargo install jsonformat`
`cargo install jsonformat-cli`
## Usage
```
@ -28,13 +30,14 @@ OPTIONS:
Reads from stdin if no file is supplied.
Outputs to stdout if no output file is specified.
On windows, it writes to a file called `<filename>_f.json`, unless the `--stdout` flag is used or a custom output file is provided. This it to enable drag-and-drop in windows explorer.
On Windows, it writes to a file called `<filename>_f.json`, unless the `--stdout` flag is used or a custom output
file is provided. This it to enable drag-and-drop in Windows explorer.
## Error handling
`jsonformat` does not report malformed json - it can't even fully know whether the json is actually malformed. Malformed json is just formatted kind of incorrectly, with no data lost and no crashes. If you find one, open an issue,
`jsonformat` does not report malformed json - it can't even fully know whether the json is actually malformed.
Malformed json is just formatted kind of incorrectly, with no data lost and no crashes. If you find one, open an issue,
## How?
`jsonformat` does not actually parse the json, it just loops through each character and keeps track of some flags. It then copies these characters to the output buffer, adding and removing whitespace.
The code is currently a bit chaotic, but it works and is fast, so good enough for now. Maybe it could profit from SIMD in the future, but I have never used it and I don't know whether it would work. Maybe some day...
`jsonformat` does not actually parse the json, it just loops through each character and keeps track of some flags.
It then copies these characters to the output buffer, adding and removing whitespace.

View file

@ -1,17 +1,32 @@
use criterion::{criterion_group, criterion_main, Criterion};
use jsonformat::{format_json, Indentation};
use std::{fs, io};
/// You need a json file called massive.json in your project root
fn format_massive_json(file: &str) -> io::Result<String> {
Ok(format_json(&file, Indentation::Default))
}
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use jsonformat::{format, format_reader_writer, Indentation};
fn criterion_benchmark(c: &mut Criterion) {
let file = fs::read_to_string("massive.json").expect("massive.json file in project directory");
let file = include_str!("large-file.json");
c.bench_function("Format massive json", |b| {
b.iter(|| format_massive_json(&file))
c.bench_function("Format json default settings", |b| {
b.iter(|| {
let json = format(&file, Indentation::Default);
black_box(json);
})
});
c.bench_function("Format json custom indentation", |b| {
b.iter(|| {
let json = format(&file, Indentation::Custom("123456"));
black_box(json);
})
});
c.bench_function("Format json no utf8 validation", |b| {
b.iter(|| {
let mut writer = Vec::with_capacity(file.len() * 2);
format_reader_writer(file.as_bytes(), &mut writer, Indentation::Default).unwrap();
black_box(writer);
})
});
}

11352
benches/large-file.json Normal file

File diff suppressed because one or more lines are too long

View file

@ -15,7 +15,7 @@ categories = ["command-line-utilities"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
jsonformat = { path = "..", version = "1.2.0" }
jsonformat = { path = "..", version = "2.0.0" }
clap = "2.33.3"
[[bin]]

View file

@ -1,8 +1,11 @@
use std::{
error::Error,
fs::File,
io::{BufReader, BufWriter, Read, Write},
};
use clap::clap_app;
use jsonformat::{format_json_buffered, Indentation};
use std::error::Error;
use std::fs::File;
use std::io::{BufReader, BufWriter, Read, Write};
use jsonformat::{format_reader_writer, Indentation};
fn main() -> Result<(), Box<dyn Error>> {
let matches = clap_app!(jsonformat =>
@ -64,16 +67,16 @@ fn main() -> Result<(), Box<dyn Error>> {
Some(filename) => {
file = File::create(filename)?;
&mut file
},
}
None => {
stdout = std::io::stdout();
&mut stdout
},
}
};
let mut reader = BufReader::new(reader);
let mut writer = BufWriter::new(writer);
format_json_buffered(&mut reader, &mut writer, indent)?;
format_reader_writer(&mut reader, &mut writer, indent)?;
Ok(())
}

View file

@ -3,10 +3,11 @@
//!
//! It does not do anything more than that, which makes it so fast.
use std::error::Error;
use std::io::{BufReader, BufWriter, Read, Write};
use std::{
io,
io::{Read, Write},
};
///
/// Set the indentation used for the formatting.
///
/// Note: It is *not* recommended to set indentation to anything oder than some spaces or some tabs,
@ -19,33 +20,29 @@ pub enum Indentation<'a> {
Custom(&'a str),
}
///
/// # Formats a json string
///
/// The indentation can be set to any value using [Indentation](jsonformat::Indentation)
/// The indentation can be set to any value using [`Indentation`]
/// The default value is two spaces
/// The default indentation is faster than a custom one
///
pub fn format_json(json: &str, indentation: Indentation) -> String {
let mut reader = BufReader::new(json.as_bytes());
let mut writer = BufWriter::new(Vec::new());
pub fn format(json: &str, indentation: Indentation) -> String {
let mut reader = json.as_bytes();
let mut writer = Vec::with_capacity(json.len());
format_json_buffered(&mut reader, &mut writer, indentation).unwrap();
String::from_utf8(writer.into_inner().unwrap()).unwrap()
format_reader_writer(&mut reader, &mut writer, indentation).unwrap();
String::from_utf8(writer).unwrap()
}
///
/// # Formats a json string
///
/// The indentation can be set to any value using [Indentation](jsonformat::Indentation)
/// The indentation can be set to any value using [`Indentation`]
/// The default value is two spaces
/// The default indentation is faster than a custom one
///
pub fn format_json_buffered<R, W>(
reader: &mut BufReader<R>,
writer: &mut BufWriter<W>,
pub fn format_reader_writer<R, W>(
reader: R,
mut writer: W,
indentation: Indentation,
) -> Result<(), Box<dyn Error>>
) -> io::Result<()>
where
R: Read,
W: Write,
@ -95,7 +92,7 @@ where
if !newline_requested {
// see comment below about newline_requested
writer.write_all(&[b'\n'])?;
indent_buffered(writer, indent_level, indentation)?;
indent(&mut writer, indent_level, indentation)?;
}
}
b':' => {
@ -113,7 +110,7 @@ where
// this means we can safely assume that it being followed up by } or ]
// means an empty object/array
writer.write_all(&[b'\n'])?;
indent_buffered(writer, old_level, indentation)?;
indent(&mut writer, old_level, indentation)?;
}
if auto_push {
@ -127,13 +124,9 @@ where
Ok(())
}
fn indent_buffered<W>(
writer: &mut BufWriter<W>,
level: usize,
indent_str: Indentation,
) -> Result<(), Box<dyn Error>>
fn indent<W>(writer: &mut W, level: usize, indent_str: Indentation) -> io::Result<()>
where
W: std::io::Write,
W: Write,
{
for _ in 0..level {
match indent_str {
@ -156,27 +149,27 @@ mod test {
#[test]
fn echoes_primitive() {
let json = "1.35";
assert_eq!(json, format_json(json, Indentation::Default));
assert_eq!(json, format(json, Indentation::Default));
}
#[test]
fn ignore_whitespace_in_string() {
let json = "\" hallo \"";
assert_eq!(json, format_json(json, Indentation::Default));
assert_eq!(json, format(json, Indentation::Default));
}
#[test]
fn remove_leading_whitespace() {
let json = " 0";
let expected = "0";
assert_eq!(expected, format_json(json, Indentation::Default));
assert_eq!(expected, format(json, Indentation::Default));
}
#[test]
fn handle_escaped_strings() {
let json = " \" hallo \\\" \" ";
let expected = "\" hallo \\\" \"";
assert_eq!(expected, format_json(json, Indentation::Default));
assert_eq!(expected, format(json, Indentation::Default));
}
#[test]
@ -185,7 +178,7 @@ mod test {
let expected = "{
\"a\": 0
}";
assert_eq!(expected, format_json(json, Indentation::Default));
assert_eq!(expected, format(json, Indentation::Default));
}
#[test]
@ -196,7 +189,7 @@ mod test {
2,
null
]";
assert_eq!(expected, format_json(json, Indentation::Default));
assert_eq!(expected, format(json, Indentation::Default));
}
#[test]
@ -212,7 +205,7 @@ mod test {
}
]";
assert_eq!(expected, format_json(json, Indentation::Default));
assert_eq!(expected, format(json, Indentation::Default));
}
#[test]
@ -227,6 +220,6 @@ mod test {
}
]";
assert_eq!(expected, format_json(expected, Indentation::Default));
assert_eq!(expected, format(expected, Indentation::Default));
}
}