benchmark

This commit is contained in:
nora 2023-12-02 22:35:12 +01:00
parent d5b8c4ed21
commit 73ced81e4a
3 changed files with 53 additions and 25 deletions

3
.gitignore vendored
View file

@ -1,2 +1,3 @@
input*.txt
target
target
perf.data*

View file

@ -4,9 +4,27 @@ benchmarks:
Ensure that `input.txt` contains many, many copies of the actual input, the actual input is way too small.
`cargo build --release && hyperfine 'target/release/day1 naive' 'target/release/day1 zero_alloc' 'target/release/day1 branchless'`
`cargo build --release && hyperfine 'target/release/day1 naive' 'target/release/day1 zero_alloc' 'target/release/day1 branchless' 'target/release/day1 vectorized'`
```
target/release/day1 branchless ran
1.52 ± 0.06 times faster than target/release/day1 zero_alloc
7.74 ± 0.27 times faster than target/release/day1 naive
Benchmark 1: target/release/day1 naive
Time (mean ± σ): 4.735 s ± 0.061 s [User: 4.663 s, System: 0.072 s]
Range (min … max): 4.643 s … 4.798 s 10 runs
Benchmark 2: target/release/day1 zero_alloc
Time (mean ± σ): 880.1 ms ± 10.7 ms [User: 807.9 ms, System: 72.1 ms]
Range (min … max): 858.3 ms … 891.4 ms 10 runs
Benchmark 3: target/release/day1 branchless
Time (mean ± σ): 587.1 ms ± 4.4 ms [User: 515.0 ms, System: 72.1 ms]
Range (min … max): 578.3 ms … 594.1 ms 10 runs
Benchmark 4: target/release/day1 vectorized
Time (mean ± σ): 394.3 ms ± 5.2 ms [User: 322.2 ms, System: 71.9 ms]
Range (min … max): 386.4 ms … 400.0 ms 10 runs
Summary
target/release/day1 vectorized ran
1.49 ± 0.02 times faster than target/release/day1 branchless
2.23 ± 0.04 times faster than target/release/day1 zero_alloc
12.01 ± 0.22 times faster than target/release/day1 naive
```

View file

@ -48,7 +48,7 @@ pub unsafe fn part2(input: &str) {
continue;
}
const fn gorble(s: &[u8]) -> u64 {
fn gorble(s: &[u8]) -> u64 {
let mut bytes = [0; 8];
let mut i = 0;
while i < s.len() {
@ -61,21 +61,22 @@ pub unsafe fn part2(input: &str) {
let mut acc = 0;
acc |= if one >= b'0' && one <= b'9' { one } else { 0 };
let mut vector_result = None;
#[cfg(all(target_arch = "x86_64"))]
if avx2 {
use std::arch::x86_64;
unsafe fn round(input: u64, compare: [u64; 4], then: [u64; 4]) -> x86_64::__m256i {
// YYYYYYYY|AAAAAAAA|XXXXXXXX|BBBBBBBB|
let fives = unsafe { std::mem::transmute::<_, x86_64::__m256i>(compare) };
let compare = unsafe { std::mem::transmute::<_, x86_64::__m256i>(compare) };
// 000000EE|000000ZZ|000000XX|000000FF|
let then = unsafe { std::mem::transmute::<_, x86_64::__m256i>(then) };
// XXXXXXXX|XXXXXXXX|XXXXXXXX|XXXXXXXX|
let actual = x86_64::_mm256_set1_epi64x(input as i64);
// 00000000|00000000|11111111|00000000|
let mask = x86_64::_mm256_cmpeq_epi64(fives, actual);
let mask = x86_64::_mm256_cmpeq_epi64(compare, actual);
// 00000000|00000000|0000000X|00000000|
let result = x86_64::_mm256_and_si256(then, mask);
// we can also pretend that it's this as only the lowest byte is set in each lane
@ -99,8 +100,8 @@ pub unsafe fn part2(input: &str) {
[b'6' as _, b'2' as _, b'1' as _, 0],
);
let result = x86_64::_mm256_or_pd(std::mem::transmute(fives), std::mem::transmute(fours));
let result =
x86_64::_mm256_or_pd(std::mem::transmute(fives), std::mem::transmute(fours));
let result = x86_64::_mm256_or_pd(result, std::mem::transmute(threes));
let low = x86_64::_mm256_extractf128_pd(result, 0);
@ -114,29 +115,37 @@ pub unsafe fn part2(input: &str) {
digits[line_idx] = acc | result as u8;
if cfg!(debug_assertions) {
vector_result = Some(acc | result as u8);
}
}
if !avx2 {
if cfg!(debug_assertions) || !avx2 {
macro_rules! check {
($const:ident $len:ident == $str:expr => $value:expr) => {
const $const: u64 = gorble($str);
acc |= (if $len == $const { $value } else { 0 });
($len:ident == $str:expr => $value:expr) => {
acc |= (if $len == gorble($str) { $value } else { 0 });
};
}
check!(EIGHT five == b"eight" => b'8');
check!(SEVEN five == b"seven" => b'7');
check!(THREE five == b"three" => b'3');
check!(FIVE four == b"five" => b'5');
check!(FOUR four == b"four" => b'4');
check!(NINE four == b"nine" => b'9');
check!(five == b"eight" => b'8');
check!(five == b"seven" => b'7');
check!(five == b"three" => b'3');
check!(SIX three == b"six" => b'6');
check!(TWO three == b"two" => b'2');
check!(ONE three == b"one" => b'1');
check!(four == b"five" => b'5');
check!(four == b"four" => b'4');
check!(four == b"nine" => b'9');
check!(three == b"six" => b'6');
check!(three == b"two" => b'2');
check!(three == b"one" => b'1');
digits[line_idx] = acc;
if cfg!(debug_assertions) {
if let Some(vector_result) = vector_result {
assert_eq!(vector_result, acc);
}
}
}
byte_idx += 1;