1use std::collections::HashSet;
2
3fn cell_hash(v: u64) -> u64 {
8 v.wrapping_mul(0x517cc1b727220a95)
9 .rotate_right(17)
10 .wrapping_add(0xdeadbeefcafe1234)
11}
12
13#[derive(Clone, Debug, Default)]
20struct IbfCell {
21 xor_sum: u64,
22 hash_sum: u64,
23 count: i64,
24}
25
26impl IbfCell {
27 fn is_pure(&self) -> bool {
28 (self.count == 1 || self.count == -1) && cell_hash(self.xor_sum) == self.hash_sum
29 }
30}
31
32#[derive(Clone, Debug)]
37pub struct Ibf {
38 cells: Vec<IbfCell>,
39 k: usize,
40}
41
42impl Ibf {
43 pub fn new(size: usize, k: usize) -> Self {
45 Self {
46 cells: vec![IbfCell::default(); size.max(1)],
47 k: k.max(1),
48 }
49 }
50
51 pub fn from_cells(cells: Vec<(u64, u64, i64)>) -> Self {
56 let k = 3; Self {
58 cells: cells
59 .into_iter()
60 .map(|(x, h, c)| IbfCell {
61 xor_sum: x,
62 hash_sum: h,
63 count: c,
64 })
65 .collect(),
66 k,
67 }
68 }
69
70 pub fn cells(&self) -> Vec<(u64, u64, i64)> {
72 self.cells
73 .iter()
74 .map(|c| (c.xor_sum, c.hash_sum, c.count))
75 .collect()
76 }
77
78 fn cell_indices(&self, value: u64) -> Vec<usize> {
79 let n = self.cells.len();
80 (0..self.k)
81 .map(|i| {
82 let mut h = value ^ (i as u64).wrapping_mul(0x9e3779b97f4a7c15);
84 h = (h ^ (h >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
85 h = (h ^ (h >> 27)).wrapping_mul(0x94d049bb133111eb);
86 h = h ^ (h >> 31);
87 h as usize % n
88 })
89 .collect()
90 }
91
92 pub fn insert(&mut self, value: u64) {
94 let ch = cell_hash(value);
95 for idx in self.cell_indices(value) {
96 self.cells[idx].xor_sum ^= value;
97 self.cells[idx].hash_sum ^= ch;
98 self.cells[idx].count += 1;
99 }
100 }
101
102 pub fn remove(&mut self, value: u64) {
104 let ch = cell_hash(value);
105 for idx in self.cell_indices(value) {
106 self.cells[idx].xor_sum ^= value;
107 self.cells[idx].hash_sum ^= ch;
108 self.cells[idx].count -= 1;
109 }
110 }
111
112 pub fn subtract(&self, other: &Ibf) -> Ibf {
116 let mut result = self.clone();
117 for (a, b) in result.cells.iter_mut().zip(&other.cells) {
118 a.xor_sum ^= b.xor_sum;
119 a.hash_sum ^= b.hash_sum;
120 a.count -= b.count;
121 }
122 result
123 }
124
125 pub fn decode(&self) -> Option<(HashSet<u64>, HashSet<u64>)> {
130 let mut ibf = self.clone();
131 let mut in_a = HashSet::new();
132 let mut in_b = HashSet::new();
133
134 loop {
135 let pure_idx = ibf.cells.iter().position(|c| c.is_pure());
137 let Some(idx) = pure_idx else { break };
138
139 let val = ibf.cells[idx].xor_sum;
140 let positive = ibf.cells[idx].count == 1;
141
142 if positive {
143 in_a.insert(val);
144 ibf.remove(val);
145 } else {
146 in_b.insert(val);
147 ibf.insert(val);
148 }
149 }
150
151 if ibf
153 .cells
154 .iter()
155 .all(|c| c.count == 0 && c.xor_sum == 0 && c.hash_sum == 0)
156 {
157 Some((in_a, in_b))
158 } else {
159 None
160 }
161 }
162}
163
164pub struct PSyncNode {
170 local_set: HashSet<u64>,
171 ibf_size: usize,
172 ibf_k: usize,
173}
174
175impl PSyncNode {
176 pub fn new(ibf_size: usize) -> Self {
181 Self {
182 local_set: HashSet::new(),
183 ibf_size: ibf_size.max(4),
184 ibf_k: 3,
185 }
186 }
187
188 pub fn insert(&mut self, hash: u64) {
190 self.local_set.insert(hash);
191 }
192
193 pub fn remove(&mut self, hash: u64) -> bool {
195 self.local_set.remove(&hash)
196 }
197
198 pub fn contains(&self, hash: u64) -> bool {
200 self.local_set.contains(&hash)
201 }
202
203 pub fn len(&self) -> usize {
205 self.local_set.len()
206 }
207
208 pub fn is_empty(&self) -> bool {
210 self.local_set.is_empty()
211 }
212
213 pub fn build_ibf(&self) -> Ibf {
215 let mut ibf = Ibf::new(self.ibf_size, self.ibf_k);
216 for &h in &self.local_set {
217 ibf.insert(h);
218 }
219 ibf
220 }
221
222 pub fn reconcile(&self, peer_ibf: &Ibf) -> Option<(HashSet<u64>, HashSet<u64>)> {
227 self.build_ibf().subtract(peer_ibf).decode()
228 }
229}
230
231#[cfg(test)]
232mod tests {
233 use super::*;
234
235 #[test]
236 fn ibf_insert_and_remove_is_identity() {
237 let mut ibf = Ibf::new(16, 3);
238 ibf.insert(42);
239 ibf.remove(42);
240 assert!(
241 ibf.cells
242 .iter()
243 .all(|c| c.count == 0 && c.xor_sum == 0 && c.hash_sum == 0)
244 );
245 }
246
247 #[test]
248 fn psync_contains_after_insert() {
249 let mut node = PSyncNode::new(16);
250 node.insert(100);
251 assert!(node.contains(100));
252 assert_eq!(node.len(), 1);
253 }
254
255 #[test]
256 fn psync_remove() {
257 let mut node = PSyncNode::new(16);
258 node.insert(1);
259 assert!(node.remove(1));
260 assert!(node.is_empty());
261 }
262
263 #[test]
264 fn reconcile_identical_sets_returns_empty_diff() {
265 let mut a = PSyncNode::new(64);
266 let mut b = PSyncNode::new(64);
267 for i in 0..10u64 {
268 a.insert(i);
269 b.insert(i);
270 }
271 let (a_extra, b_extra) = a.reconcile(&b.build_ibf()).unwrap();
272 assert!(a_extra.is_empty());
273 assert!(b_extra.is_empty());
274 }
275
276 #[test]
277 fn reconcile_one_sided_difference() {
278 let mut a = PSyncNode::new(64);
279 let b = PSyncNode::new(64);
280 a.insert(999);
281
282 let (a_has, b_has) = a.reconcile(&b.build_ibf()).unwrap();
283 assert!(a_has.contains(&999));
284 assert!(b_has.is_empty());
285 }
286
287 #[test]
288 fn reconcile_disjoint_sets() {
289 let mut a = PSyncNode::new(64);
290 let mut b = PSyncNode::new(64);
291 a.insert(0x0102030405060708);
293 a.insert(0x1112131415161718);
294 b.insert(0x2122232425262728);
295
296 let (a_has, b_has) = a.reconcile(&b.build_ibf()).unwrap();
297 assert!(a_has.contains(&0x0102030405060708));
298 assert!(a_has.contains(&0x1112131415161718));
299 assert!(b_has.contains(&0x2122232425262728));
300 }
301
302 #[test]
303 fn reconcile_both_sides_have_extras() {
304 let mut a = PSyncNode::new(64);
305 let mut b = PSyncNode::new(64);
306 a.insert(0xAA00000000000001);
308 b.insert(0xBB00000000000001);
310 b.insert(0xBB00000000000002);
311
312 let (a_has, b_has) = a.reconcile(&b.build_ibf()).unwrap();
313 assert_eq!(a_has.len(), 1);
314 assert!(a_has.contains(&0xAA00000000000001));
315 assert_eq!(b_has.len(), 2);
316 assert!(b_has.contains(&0xBB00000000000001));
317 assert!(b_has.contains(&0xBB00000000000002));
318 }
319}