mirror of
https://github.com/Noratrieb/advent-of-code.git
synced 2026-01-14 17:45:02 +01:00
benchmark
This commit is contained in:
parent
d5b8c4ed21
commit
73ced81e4a
3 changed files with 53 additions and 25 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -1,2 +1,3 @@
|
|||
input*.txt
|
||||
target
|
||||
target
|
||||
perf.data*
|
||||
|
|
@ -4,9 +4,27 @@ benchmarks:
|
|||
|
||||
Ensure that `input.txt` contains many, many copies of the actual input, the actual input is way too small.
|
||||
|
||||
`cargo build --release && hyperfine 'target/release/day1 naive' 'target/release/day1 zero_alloc' 'target/release/day1 branchless'`
|
||||
`cargo build --release && hyperfine 'target/release/day1 naive' 'target/release/day1 zero_alloc' 'target/release/day1 branchless' 'target/release/day1 vectorized'`
|
||||
```
|
||||
target/release/day1 branchless ran
|
||||
1.52 ± 0.06 times faster than target/release/day1 zero_alloc
|
||||
7.74 ± 0.27 times faster than target/release/day1 naive
|
||||
Benchmark 1: target/release/day1 naive
|
||||
Time (mean ± σ): 4.735 s ± 0.061 s [User: 4.663 s, System: 0.072 s]
|
||||
Range (min … max): 4.643 s … 4.798 s 10 runs
|
||||
|
||||
Benchmark 2: target/release/day1 zero_alloc
|
||||
Time (mean ± σ): 880.1 ms ± 10.7 ms [User: 807.9 ms, System: 72.1 ms]
|
||||
Range (min … max): 858.3 ms … 891.4 ms 10 runs
|
||||
|
||||
Benchmark 3: target/release/day1 branchless
|
||||
Time (mean ± σ): 587.1 ms ± 4.4 ms [User: 515.0 ms, System: 72.1 ms]
|
||||
Range (min … max): 578.3 ms … 594.1 ms 10 runs
|
||||
|
||||
Benchmark 4: target/release/day1 vectorized
|
||||
Time (mean ± σ): 394.3 ms ± 5.2 ms [User: 322.2 ms, System: 71.9 ms]
|
||||
Range (min … max): 386.4 ms … 400.0 ms 10 runs
|
||||
|
||||
Summary
|
||||
target/release/day1 vectorized ran
|
||||
1.49 ± 0.02 times faster than target/release/day1 branchless
|
||||
2.23 ± 0.04 times faster than target/release/day1 zero_alloc
|
||||
12.01 ± 0.22 times faster than target/release/day1 naive
|
||||
```
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ pub unsafe fn part2(input: &str) {
|
|||
continue;
|
||||
}
|
||||
|
||||
const fn gorble(s: &[u8]) -> u64 {
|
||||
fn gorble(s: &[u8]) -> u64 {
|
||||
let mut bytes = [0; 8];
|
||||
let mut i = 0;
|
||||
while i < s.len() {
|
||||
|
|
@ -61,21 +61,22 @@ pub unsafe fn part2(input: &str) {
|
|||
|
||||
let mut acc = 0;
|
||||
|
||||
|
||||
acc |= if one >= b'0' && one <= b'9' { one } else { 0 };
|
||||
|
||||
let mut vector_result = None;
|
||||
|
||||
#[cfg(all(target_arch = "x86_64"))]
|
||||
if avx2 {
|
||||
use std::arch::x86_64;
|
||||
unsafe fn round(input: u64, compare: [u64; 4], then: [u64; 4]) -> x86_64::__m256i {
|
||||
// YYYYYYYY|AAAAAAAA|XXXXXXXX|BBBBBBBB|
|
||||
let fives = unsafe { std::mem::transmute::<_, x86_64::__m256i>(compare) };
|
||||
let compare = unsafe { std::mem::transmute::<_, x86_64::__m256i>(compare) };
|
||||
// 000000EE|000000ZZ|000000XX|000000FF|
|
||||
let then = unsafe { std::mem::transmute::<_, x86_64::__m256i>(then) };
|
||||
// XXXXXXXX|XXXXXXXX|XXXXXXXX|XXXXXXXX|
|
||||
let actual = x86_64::_mm256_set1_epi64x(input as i64);
|
||||
// 00000000|00000000|11111111|00000000|
|
||||
let mask = x86_64::_mm256_cmpeq_epi64(fives, actual);
|
||||
let mask = x86_64::_mm256_cmpeq_epi64(compare, actual);
|
||||
// 00000000|00000000|0000000X|00000000|
|
||||
let result = x86_64::_mm256_and_si256(then, mask);
|
||||
// we can also pretend that it's this as only the lowest byte is set in each lane
|
||||
|
|
@ -99,8 +100,8 @@ pub unsafe fn part2(input: &str) {
|
|||
[b'6' as _, b'2' as _, b'1' as _, 0],
|
||||
);
|
||||
|
||||
|
||||
let result = x86_64::_mm256_or_pd(std::mem::transmute(fives), std::mem::transmute(fours));
|
||||
let result =
|
||||
x86_64::_mm256_or_pd(std::mem::transmute(fives), std::mem::transmute(fours));
|
||||
let result = x86_64::_mm256_or_pd(result, std::mem::transmute(threes));
|
||||
|
||||
let low = x86_64::_mm256_extractf128_pd(result, 0);
|
||||
|
|
@ -114,29 +115,37 @@ pub unsafe fn part2(input: &str) {
|
|||
|
||||
digits[line_idx] = acc | result as u8;
|
||||
|
||||
if cfg!(debug_assertions) {
|
||||
vector_result = Some(acc | result as u8);
|
||||
}
|
||||
}
|
||||
|
||||
if !avx2 {
|
||||
if cfg!(debug_assertions) || !avx2 {
|
||||
macro_rules! check {
|
||||
($const:ident $len:ident == $str:expr => $value:expr) => {
|
||||
const $const: u64 = gorble($str);
|
||||
acc |= (if $len == $const { $value } else { 0 });
|
||||
($len:ident == $str:expr => $value:expr) => {
|
||||
acc |= (if $len == gorble($str) { $value } else { 0 });
|
||||
};
|
||||
}
|
||||
|
||||
check!(EIGHT five == b"eight" => b'8');
|
||||
check!(SEVEN five == b"seven" => b'7');
|
||||
check!(THREE five == b"three" => b'3');
|
||||
|
||||
check!(FIVE four == b"five" => b'5');
|
||||
check!(FOUR four == b"four" => b'4');
|
||||
check!(NINE four == b"nine" => b'9');
|
||||
check!(five == b"eight" => b'8');
|
||||
check!(five == b"seven" => b'7');
|
||||
check!(five == b"three" => b'3');
|
||||
|
||||
check!(SIX three == b"six" => b'6');
|
||||
check!(TWO three == b"two" => b'2');
|
||||
check!(ONE three == b"one" => b'1');
|
||||
check!(four == b"five" => b'5');
|
||||
check!(four == b"four" => b'4');
|
||||
check!(four == b"nine" => b'9');
|
||||
|
||||
check!(three == b"six" => b'6');
|
||||
check!(three == b"two" => b'2');
|
||||
check!(three == b"one" => b'1');
|
||||
|
||||
digits[line_idx] = acc;
|
||||
|
||||
if cfg!(debug_assertions) {
|
||||
if let Some(vector_result) = vector_result {
|
||||
assert_eq!(vector_result, acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
byte_idx += 1;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue