mirror of
https://github.com/Noratrieb/advent-of-code.git
synced 2026-01-17 02:55:01 +01:00
benchmark
This commit is contained in:
parent
d5b8c4ed21
commit
73ced81e4a
3 changed files with 53 additions and 25 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -1,2 +1,3 @@
|
||||||
input*.txt
|
input*.txt
|
||||||
target
|
target
|
||||||
|
perf.data*
|
||||||
|
|
@ -4,9 +4,27 @@ benchmarks:
|
||||||
|
|
||||||
Ensure that `input.txt` contains many, many copies of the actual input, the actual input is way too small.
|
Ensure that `input.txt` contains many, many copies of the actual input, the actual input is way too small.
|
||||||
|
|
||||||
`cargo build --release && hyperfine 'target/release/day1 naive' 'target/release/day1 zero_alloc' 'target/release/day1 branchless'`
|
`cargo build --release && hyperfine 'target/release/day1 naive' 'target/release/day1 zero_alloc' 'target/release/day1 branchless' 'target/release/day1 vectorized'`
|
||||||
```
|
```
|
||||||
target/release/day1 branchless ran
|
Benchmark 1: target/release/day1 naive
|
||||||
1.52 ± 0.06 times faster than target/release/day1 zero_alloc
|
Time (mean ± σ): 4.735 s ± 0.061 s [User: 4.663 s, System: 0.072 s]
|
||||||
7.74 ± 0.27 times faster than target/release/day1 naive
|
Range (min … max): 4.643 s … 4.798 s 10 runs
|
||||||
|
|
||||||
|
Benchmark 2: target/release/day1 zero_alloc
|
||||||
|
Time (mean ± σ): 880.1 ms ± 10.7 ms [User: 807.9 ms, System: 72.1 ms]
|
||||||
|
Range (min … max): 858.3 ms … 891.4 ms 10 runs
|
||||||
|
|
||||||
|
Benchmark 3: target/release/day1 branchless
|
||||||
|
Time (mean ± σ): 587.1 ms ± 4.4 ms [User: 515.0 ms, System: 72.1 ms]
|
||||||
|
Range (min … max): 578.3 ms … 594.1 ms 10 runs
|
||||||
|
|
||||||
|
Benchmark 4: target/release/day1 vectorized
|
||||||
|
Time (mean ± σ): 394.3 ms ± 5.2 ms [User: 322.2 ms, System: 71.9 ms]
|
||||||
|
Range (min … max): 386.4 ms … 400.0 ms 10 runs
|
||||||
|
|
||||||
|
Summary
|
||||||
|
target/release/day1 vectorized ran
|
||||||
|
1.49 ± 0.02 times faster than target/release/day1 branchless
|
||||||
|
2.23 ± 0.04 times faster than target/release/day1 zero_alloc
|
||||||
|
12.01 ± 0.22 times faster than target/release/day1 naive
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -48,7 +48,7 @@ pub unsafe fn part2(input: &str) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const fn gorble(s: &[u8]) -> u64 {
|
fn gorble(s: &[u8]) -> u64 {
|
||||||
let mut bytes = [0; 8];
|
let mut bytes = [0; 8];
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
while i < s.len() {
|
while i < s.len() {
|
||||||
|
|
@ -61,21 +61,22 @@ pub unsafe fn part2(input: &str) {
|
||||||
|
|
||||||
let mut acc = 0;
|
let mut acc = 0;
|
||||||
|
|
||||||
|
|
||||||
acc |= if one >= b'0' && one <= b'9' { one } else { 0 };
|
acc |= if one >= b'0' && one <= b'9' { one } else { 0 };
|
||||||
|
|
||||||
|
let mut vector_result = None;
|
||||||
|
|
||||||
#[cfg(all(target_arch = "x86_64"))]
|
#[cfg(all(target_arch = "x86_64"))]
|
||||||
if avx2 {
|
if avx2 {
|
||||||
use std::arch::x86_64;
|
use std::arch::x86_64;
|
||||||
unsafe fn round(input: u64, compare: [u64; 4], then: [u64; 4]) -> x86_64::__m256i {
|
unsafe fn round(input: u64, compare: [u64; 4], then: [u64; 4]) -> x86_64::__m256i {
|
||||||
// YYYYYYYY|AAAAAAAA|XXXXXXXX|BBBBBBBB|
|
// YYYYYYYY|AAAAAAAA|XXXXXXXX|BBBBBBBB|
|
||||||
let fives = unsafe { std::mem::transmute::<_, x86_64::__m256i>(compare) };
|
let compare = unsafe { std::mem::transmute::<_, x86_64::__m256i>(compare) };
|
||||||
// 000000EE|000000ZZ|000000XX|000000FF|
|
// 000000EE|000000ZZ|000000XX|000000FF|
|
||||||
let then = unsafe { std::mem::transmute::<_, x86_64::__m256i>(then) };
|
let then = unsafe { std::mem::transmute::<_, x86_64::__m256i>(then) };
|
||||||
// XXXXXXXX|XXXXXXXX|XXXXXXXX|XXXXXXXX|
|
// XXXXXXXX|XXXXXXXX|XXXXXXXX|XXXXXXXX|
|
||||||
let actual = x86_64::_mm256_set1_epi64x(input as i64);
|
let actual = x86_64::_mm256_set1_epi64x(input as i64);
|
||||||
// 00000000|00000000|11111111|00000000|
|
// 00000000|00000000|11111111|00000000|
|
||||||
let mask = x86_64::_mm256_cmpeq_epi64(fives, actual);
|
let mask = x86_64::_mm256_cmpeq_epi64(compare, actual);
|
||||||
// 00000000|00000000|0000000X|00000000|
|
// 00000000|00000000|0000000X|00000000|
|
||||||
let result = x86_64::_mm256_and_si256(then, mask);
|
let result = x86_64::_mm256_and_si256(then, mask);
|
||||||
// we can also pretend that it's this as only the lowest byte is set in each lane
|
// we can also pretend that it's this as only the lowest byte is set in each lane
|
||||||
|
|
@ -99,8 +100,8 @@ pub unsafe fn part2(input: &str) {
|
||||||
[b'6' as _, b'2' as _, b'1' as _, 0],
|
[b'6' as _, b'2' as _, b'1' as _, 0],
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let result =
|
||||||
let result = x86_64::_mm256_or_pd(std::mem::transmute(fives), std::mem::transmute(fours));
|
x86_64::_mm256_or_pd(std::mem::transmute(fives), std::mem::transmute(fours));
|
||||||
let result = x86_64::_mm256_or_pd(result, std::mem::transmute(threes));
|
let result = x86_64::_mm256_or_pd(result, std::mem::transmute(threes));
|
||||||
|
|
||||||
let low = x86_64::_mm256_extractf128_pd(result, 0);
|
let low = x86_64::_mm256_extractf128_pd(result, 0);
|
||||||
|
|
@ -114,29 +115,37 @@ pub unsafe fn part2(input: &str) {
|
||||||
|
|
||||||
digits[line_idx] = acc | result as u8;
|
digits[line_idx] = acc | result as u8;
|
||||||
|
|
||||||
|
if cfg!(debug_assertions) {
|
||||||
|
vector_result = Some(acc | result as u8);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !avx2 {
|
if cfg!(debug_assertions) || !avx2 {
|
||||||
macro_rules! check {
|
macro_rules! check {
|
||||||
($const:ident $len:ident == $str:expr => $value:expr) => {
|
($len:ident == $str:expr => $value:expr) => {
|
||||||
const $const: u64 = gorble($str);
|
acc |= (if $len == gorble($str) { $value } else { 0 });
|
||||||
acc |= (if $len == $const { $value } else { 0 });
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
check!(EIGHT five == b"eight" => b'8');
|
|
||||||
check!(SEVEN five == b"seven" => b'7');
|
|
||||||
check!(THREE five == b"three" => b'3');
|
|
||||||
|
|
||||||
check!(FIVE four == b"five" => b'5');
|
check!(five == b"eight" => b'8');
|
||||||
check!(FOUR four == b"four" => b'4');
|
check!(five == b"seven" => b'7');
|
||||||
check!(NINE four == b"nine" => b'9');
|
check!(five == b"three" => b'3');
|
||||||
|
|
||||||
check!(SIX three == b"six" => b'6');
|
check!(four == b"five" => b'5');
|
||||||
check!(TWO three == b"two" => b'2');
|
check!(four == b"four" => b'4');
|
||||||
check!(ONE three == b"one" => b'1');
|
check!(four == b"nine" => b'9');
|
||||||
|
|
||||||
|
check!(three == b"six" => b'6');
|
||||||
|
check!(three == b"two" => b'2');
|
||||||
|
check!(three == b"one" => b'1');
|
||||||
|
|
||||||
digits[line_idx] = acc;
|
digits[line_idx] = acc;
|
||||||
|
|
||||||
|
if cfg!(debug_assertions) {
|
||||||
|
if let Some(vector_result) = vector_result {
|
||||||
|
assert_eq!(vector_result, acc);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
byte_idx += 1;
|
byte_idx += 1;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue