ndn_sync/
psync.rs

1use std::collections::HashSet;
2
3/// Hash a value to a checksum used to verify pure IBF cells.
4///
5/// Uses a different multiplier than the cell-selection hash so that the
6/// checksum is independent of the cell index computation.
7fn cell_hash(v: u64) -> u64 {
8    v.wrapping_mul(0x517cc1b727220a95)
9        .rotate_right(17)
10        .wrapping_add(0xdeadbeefcafe1234)
11}
12
13/// A single cell in an Invertible Bloom Filter.
14///
15/// A cell is "pure" (contains exactly one logical element) when
16/// `count == ±1` AND `cell_hash(xor_sum) == hash_sum`. This rules out
17/// the common false-positive where several values cancel each other to
18/// produce the same xor_sum as a legitimate element.
19#[derive(Clone, Debug, Default)]
20struct IbfCell {
21    xor_sum: u64,
22    hash_sum: u64,
23    count: i64,
24}
25
26impl IbfCell {
27    fn is_pure(&self) -> bool {
28        (self.count == 1 || self.count == -1) && cell_hash(self.xor_sum) == self.hash_sum
29    }
30}
31
32/// A fixed-width Invertible Bloom Filter over `u64` hash values.
33///
34/// `k` hash functions are simulated by rotating a seed before each cell
35/// selection. For PSync, set elements are hashes of NDN name strings.
36#[derive(Clone, Debug)]
37pub struct Ibf {
38    cells: Vec<IbfCell>,
39    k: usize,
40}
41
42impl Ibf {
43    /// Create an IBF with `size` cells and `k` hash functions.
44    pub fn new(size: usize, k: usize) -> Self {
45        Self {
46            cells: vec![IbfCell::default(); size.max(1)],
47            k: k.max(1),
48        }
49    }
50
51    /// Create an IBF from raw cell data `(xor_sum, hash_sum, count)`.
52    ///
53    /// Uses `k = 3` hash functions (the PSync default). The number of cells
54    /// is inferred from the length of `cells`.
55    pub fn from_cells(cells: Vec<(u64, u64, i64)>) -> Self {
56        let k = 3; // default k
57        Self {
58            cells: cells
59                .into_iter()
60                .map(|(x, h, c)| IbfCell {
61                    xor_sum: x,
62                    hash_sum: h,
63                    count: c,
64                })
65                .collect(),
66            k,
67        }
68    }
69
70    /// Export cells as `(xor_sum, hash_sum, count)` tuples for wire encoding.
71    pub fn cells(&self) -> Vec<(u64, u64, i64)> {
72        self.cells
73            .iter()
74            .map(|c| (c.xor_sum, c.hash_sum, c.count))
75            .collect()
76    }
77
78    fn cell_indices(&self, value: u64) -> Vec<usize> {
79        let n = self.cells.len();
80        (0..self.k)
81            .map(|i| {
82                // splitmix64 finalizer after mixing in the seed index.
83                let mut h = value ^ (i as u64).wrapping_mul(0x9e3779b97f4a7c15);
84                h = (h ^ (h >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
85                h = (h ^ (h >> 27)).wrapping_mul(0x94d049bb133111eb);
86                h = h ^ (h >> 31);
87                h as usize % n
88            })
89            .collect()
90    }
91
92    /// Insert a hash value into the IBF.
93    pub fn insert(&mut self, value: u64) {
94        let ch = cell_hash(value);
95        for idx in self.cell_indices(value) {
96            self.cells[idx].xor_sum ^= value;
97            self.cells[idx].hash_sum ^= ch;
98            self.cells[idx].count += 1;
99        }
100    }
101
102    /// Remove a hash value from the IBF.
103    pub fn remove(&mut self, value: u64) {
104        let ch = cell_hash(value);
105        for idx in self.cell_indices(value) {
106            self.cells[idx].xor_sum ^= value;
107            self.cells[idx].hash_sum ^= ch;
108            self.cells[idx].count -= 1;
109        }
110    }
111
112    /// Subtract `other` from `self` (XOR fields, subtract counts).
113    ///
114    /// The result encodes the symmetric difference of the two sets.
115    pub fn subtract(&self, other: &Ibf) -> Ibf {
116        let mut result = self.clone();
117        for (a, b) in result.cells.iter_mut().zip(&other.cells) {
118            a.xor_sum ^= b.xor_sum;
119            a.hash_sum ^= b.hash_sum;
120            a.count -= b.count;
121        }
122        result
123    }
124
125    /// Attempt to decode the symmetric difference from a subtracted IBF.
126    ///
127    /// Returns `Some((in_self_not_other, in_other_not_self))` on success, or
128    /// `None` if the difference is too large for the IBF to decode.
129    pub fn decode(&self) -> Option<(HashSet<u64>, HashSet<u64>)> {
130        let mut ibf = self.clone();
131        let mut in_a = HashSet::new();
132        let mut in_b = HashSet::new();
133
134        loop {
135            // Find the first verifiably pure cell.
136            let pure_idx = ibf.cells.iter().position(|c| c.is_pure());
137            let Some(idx) = pure_idx else { break };
138
139            let val = ibf.cells[idx].xor_sum;
140            let positive = ibf.cells[idx].count == 1;
141
142            if positive {
143                in_a.insert(val);
144                ibf.remove(val);
145            } else {
146                in_b.insert(val);
147                ibf.insert(val);
148            }
149        }
150
151        // Successful decode: all cells must be empty.
152        if ibf
153            .cells
154            .iter()
155            .all(|c| c.count == 0 && c.xor_sum == 0 && c.hash_sum == 0)
156        {
157            Some((in_a, in_b))
158        } else {
159            None
160        }
161    }
162}
163
164/// Partial Sync (PSync) node.
165///
166/// Maintains a local set of content name hashes and an IBF over that set.
167/// To reconcile with a peer: compute `local_ibf.subtract(peer_ibf)` and
168/// call `decode()` to find which hashes each side is missing.
169pub struct PSyncNode {
170    local_set: HashSet<u64>,
171    ibf_size: usize,
172    ibf_k: usize,
173}
174
175impl PSyncNode {
176    /// Create a node with an IBF of `ibf_size` cells and k=3 hash functions.
177    ///
178    /// `ibf_size` should be significantly larger than the expected set
179    /// difference; 80 cells handles differences up to ~40 elements with k=3.
180    pub fn new(ibf_size: usize) -> Self {
181        Self {
182            local_set: HashSet::new(),
183            ibf_size: ibf_size.max(4),
184            ibf_k: 3,
185        }
186    }
187
188    /// Insert a content hash into the local set.
189    pub fn insert(&mut self, hash: u64) {
190        self.local_set.insert(hash);
191    }
192
193    /// Remove a content hash from the local set.  Returns `true` if present.
194    pub fn remove(&mut self, hash: u64) -> bool {
195        self.local_set.remove(&hash)
196    }
197
198    /// Returns `true` if the local set contains `hash`.
199    pub fn contains(&self, hash: u64) -> bool {
200        self.local_set.contains(&hash)
201    }
202
203    /// Number of hashes in the local set.
204    pub fn len(&self) -> usize {
205        self.local_set.len()
206    }
207
208    /// Returns `true` if the local set is empty.
209    pub fn is_empty(&self) -> bool {
210        self.local_set.is_empty()
211    }
212
213    /// Build an IBF over the current local set.
214    pub fn build_ibf(&self) -> Ibf {
215        let mut ibf = Ibf::new(self.ibf_size, self.ibf_k);
216        for &h in &self.local_set {
217            ibf.insert(h);
218        }
219        ibf
220    }
221
222    /// Reconcile with a peer IBF.
223    ///
224    /// Returns `Some((hashes_we_have_that_peer_lacks, hashes_peer_has_that_we_lack))`
225    /// or `None` if the difference is too large to decode.
226    pub fn reconcile(&self, peer_ibf: &Ibf) -> Option<(HashSet<u64>, HashSet<u64>)> {
227        self.build_ibf().subtract(peer_ibf).decode()
228    }
229}
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234
235    #[test]
236    fn ibf_insert_and_remove_is_identity() {
237        let mut ibf = Ibf::new(16, 3);
238        ibf.insert(42);
239        ibf.remove(42);
240        assert!(
241            ibf.cells
242                .iter()
243                .all(|c| c.count == 0 && c.xor_sum == 0 && c.hash_sum == 0)
244        );
245    }
246
247    #[test]
248    fn psync_contains_after_insert() {
249        let mut node = PSyncNode::new(16);
250        node.insert(100);
251        assert!(node.contains(100));
252        assert_eq!(node.len(), 1);
253    }
254
255    #[test]
256    fn psync_remove() {
257        let mut node = PSyncNode::new(16);
258        node.insert(1);
259        assert!(node.remove(1));
260        assert!(node.is_empty());
261    }
262
263    #[test]
264    fn reconcile_identical_sets_returns_empty_diff() {
265        let mut a = PSyncNode::new(64);
266        let mut b = PSyncNode::new(64);
267        for i in 0..10u64 {
268            a.insert(i);
269            b.insert(i);
270        }
271        let (a_extra, b_extra) = a.reconcile(&b.build_ibf()).unwrap();
272        assert!(a_extra.is_empty());
273        assert!(b_extra.is_empty());
274    }
275
276    #[test]
277    fn reconcile_one_sided_difference() {
278        let mut a = PSyncNode::new(64);
279        let b = PSyncNode::new(64);
280        a.insert(999);
281
282        let (a_has, b_has) = a.reconcile(&b.build_ibf()).unwrap();
283        assert!(a_has.contains(&999));
284        assert!(b_has.is_empty());
285    }
286
287    #[test]
288    fn reconcile_disjoint_sets() {
289        let mut a = PSyncNode::new(64);
290        let mut b = PSyncNode::new(64);
291        // Use large distinct values so cell-hash collisions are implausible.
292        a.insert(0x0102030405060708);
293        a.insert(0x1112131415161718);
294        b.insert(0x2122232425262728);
295
296        let (a_has, b_has) = a.reconcile(&b.build_ibf()).unwrap();
297        assert!(a_has.contains(&0x0102030405060708));
298        assert!(a_has.contains(&0x1112131415161718));
299        assert!(b_has.contains(&0x2122232425262728));
300    }
301
302    #[test]
303    fn reconcile_both_sides_have_extras() {
304        let mut a = PSyncNode::new(64);
305        let mut b = PSyncNode::new(64);
306        // A-only
307        a.insert(0xAA00000000000001);
308        // B-only
309        b.insert(0xBB00000000000001);
310        b.insert(0xBB00000000000002);
311
312        let (a_has, b_has) = a.reconcile(&b.build_ibf()).unwrap();
313        assert_eq!(a_has.len(), 1);
314        assert!(a_has.contains(&0xAA00000000000001));
315        assert_eq!(b_has.len(), 2);
316        assert!(b_has.contains(&0xBB00000000000001));
317        assert!(b_has.contains(&0xBB00000000000002));
318    }
319}