use helper::IteratorExt; use nom::InputIter; #[target_feature(enable = "avx2")] #[cfg(target_arch = "x86_64")] unsafe fn line_match_count_avx2(line: &str) -> usize { let line = line.as_bytes(); fn pack(chunk: &[u8]) -> u16 { ((chunk[1] as u16) << 8) | (chunk[2] as u16) } let colon = line.position(|b| b == b':').unwrap(); let numbers = &line[(colon + 1)..]; let pipe = numbers.position(|b| b == b'|').unwrap(); let winning = &numbers[..(pipe - 1)]; let you_have = &numbers[(pipe + 1)..]; let winning = winning .chunks_exact(3) .map(pack) .collect_array_default::<16>() .unwrap(); let you_have = you_have.chunks_exact(3).map(pack); unsafe { use std::arch::x86_64; let winning = std::mem::transmute::<_, x86_64::__m256i>(winning); you_have .filter(|&have| { let have = x86_64::_mm256_set1_epi16(have as _); let eq = x86_64::_mm256_cmpeq_epi16(winning, have); x86_64::_mm256_movemask_epi8(eq) > 0 }) .count() } } fn line_match_count(line: &str) -> usize { let line = line.as_bytes(); fn pack(chunk: &[u8]) -> u16 { ((chunk[1] as u16) << 8) | (chunk[2] as u16) } let colon = line.position(|b| b == b':').unwrap(); let numbers = &line[(colon + 1)..]; let pipe = numbers.position(|b| b == b'|').unwrap(); let winning = &numbers[..(pipe - 1)]; let you_have = &numbers[(pipe + 1)..]; let winning = winning .chunks_exact(3) .map(pack) .collect_array_default::<16>() .unwrap(); let you_have = you_have.chunks_exact(3).map(pack); you_have .filter(|have| winning.iter().any(|w| w == have)) .count() } pub fn part2(input: &str) -> u64 { let avx2 = std::arch::is_x86_feature_detected!("avx2"); let lines = if avx2 { #[cfg(target_arch = "x86_64")] { input .lines() .map(|line| unsafe { line_match_count_avx2(line) }) .collect::>() } #[cfg(not(target_arch = "x86_64"))] Vec::new() } else { input .lines() .map(|line| line_match_count(line)) .collect::>() }; let mut cache = vec![0; lines.len()]; let mut processed = 0; // By iterating backwards, we ensure the cache is always populated for every subsequent line. for (i, result) in lines.iter().copied().enumerate().rev() { let before = processed; processed += 1; // Every card gives us one card. // Now, let's see how many cards this card will expand to. for expand in (i + 1)..((i + 1) + result) { #[cfg(debug_assertions)] eprintln!( "{} expands to {} which is worth {}", i + 1, expand + 1, cache[expand] ); // Since the value is bigger than i, it must be cached! processed += cache[expand]; } cache[i] = processed - before; } processed }