mmtk/util/rust_util/
rev_group.rs

1//! This module provides an iterator that groups adjacent items with the same key.
2//!
3//! It gives all `Iterator + Clone` iterators a new method: `.revisitable_group_by`. It is similar
4//! to `Itertools::group_by`, but it lets the user know the length of each group before iterating
5//! through the group.  Implementation-wise, it eagerly finds all items with the same key, and
6//! then lets the user traverse the same range of items again using a pre-cloned iterator. This
7//! is why it is named "revisitable" group-by.
8//!
9//! This is useful for the memory mapper to coalesce the `mmap` call for adjacent chunks that have
10//! the same `MapState`.  The memory mapper needs to know the size of each group to compute the
11//! memory range the group of `MapState` covers in order to call `mmap`, and then traverse the
12//! group of `MapState` again to update them.
13//!
14//! The `.revisitable_group_by` method takes a closure for computing the keys of each item.
15//! Adjacent items with the same key will be put into the same group.
16//!
17//! The following example groups adjacent even or odd numbers together.
18//!
19//! ```rs
20//! let nums = [1, 3, 5, 2, 4, 6, 7, 9];
21//! for group in nums.iter().revisitable_group_by(|x| *x % 2) {
22//!     println!("key: {}, len: {}", group.key, group.len);
23//!     for x in group {
24//!         println!("  x: {}", *x);
25//!     }
26//! }
27//! ```
28//!
29//! It should form three groups, `[1, 3, 5]`, `[2, 4, 6]` and `[7, 9]`, with the keys being 1, 0
30//! and 1, respectively.
31//!
32//! It can be used with the `.flatten()` method to make groups across the boundaries of several
33//! iterable items.
34//!
35//! ```rs
36//! let slice_of_slices: &[&[i32]] = &[&[10, 20], &[30, 40, 11, 21], &[31, 12, 22]];
37//! let result = slice_of_slices.iter().copied().flatten().copied()
38//!     .revisitable_group_by(|x| x % 10)
39//!     .map(|group| group.collect::<Vec<_>>())
40//!     .collect::<Vec<_>>();
41//! assert_eq!(
42//!     result,
43//!     vec![vec![10, 20, 30, 40], vec![11, 21, 31], vec![12, 22]],
44//! );
45//! ```
46
47/// This trait provides the `revisitable_group_by` method for all `Iterator` that also implements
48/// `Clone`.
49pub(crate) trait RevisitableGroupByForIterator {
50    type Item;
51    type Iter: Iterator<Item = Self::Item> + Clone;
52
53    /// Group adjacent items by key.  `get_key` is a closure that computes the key.
54    fn revisitable_group_by<K, F>(
55        self,
56        get_key: F,
57    ) -> RevisitableGroupBy<Self::Item, K, Self::Iter, F>
58    where
59        K: PartialEq + Copy,
60        F: FnMut(&Self::Item) -> K;
61}
62
63impl<I: Iterator + Clone> RevisitableGroupByForIterator for I {
64    type Item = <I as Iterator>::Item;
65    type Iter = I;
66
67    fn revisitable_group_by<K, F>(
68        self,
69        get_key: F,
70    ) -> RevisitableGroupBy<Self::Item, K, Self::Iter, F>
71    where
72        K: PartialEq + Copy,
73        F: FnMut(&Self::Item) -> K,
74    {
75        RevisitableGroupBy {
76            iter: self,
77            get_key,
78            next_group_initial: None,
79        }
80    }
81}
82
83/// An iterator through groups of items with the same key.
84pub(crate) struct RevisitableGroupBy<T, K, I, F>
85where
86    K: PartialEq + Copy,
87    I: Iterator<Item = T> + Clone,
88    F: FnMut(&T) -> K,
89{
90    /// The underlying iterator.
91    iter: I,
92    /// The function to get the key.
93    get_key: F,
94    /// Temporarily save the item and key of the next group when peeking.
95    next_group_initial: Option<(T, K)>,
96}
97
98impl<T, K, I, F> Iterator for RevisitableGroupBy<T, K, I, F>
99where
100    K: PartialEq + Copy,
101    I: Iterator<Item = T> + Clone,
102    F: FnMut(&T) -> K,
103{
104    type Item = RevisitableGroup<T, K, I>;
105
106    fn next(&mut self) -> Option<Self::Item> {
107        let (group_head, group_key) = if let Some((head, key)) = self.next_group_initial.take() {
108            // We already peeked the item of the next group the last time `next()` was called.
109            // Count that in.
110            (head, key)
111        } else {
112            // Either we haven't start iterating, yet, or we already exhausted the iter.
113            // Get the next item from the underlying iter.
114            if let Some(item) = self.iter.next() {
115                // The next group has at least one item.
116                // This is the key of the group.
117                let key = (self.get_key)(&item);
118                (item, key)
119            } else {
120                return None;
121            }
122        };
123
124        // If reached here, the group must have at least one item.
125        let mut group_size = 1;
126
127        // Get the rest of the group.
128        let saved_iter = self.iter.clone();
129        loop {
130            if let Some(item) = self.iter.next() {
131                // The next item exists. It either belongs to the current group or not.
132                let key = (self.get_key)(&item);
133                if key == group_key {
134                    // It is in the same group.
135                    group_size += 1;
136                } else {
137                    // It belongs to the next group.  Save the item and the key...
138                    self.next_group_initial = Some((item, key));
139                    // ... and we have a group now.
140                    break;
141                }
142            } else {
143                // No more items. This is the last group.
144                debug_assert!(self.next_group_initial.is_none());
145                break;
146            }
147        }
148
149        Some(RevisitableGroup {
150            key: group_key,
151            len: group_size,
152            head: Some(group_head),
153            iter: saved_iter,
154            remaining: group_size,
155        })
156    }
157}
158
159pub(crate) struct RevisitableGroup<T, K, I>
160where
161    K: PartialEq + Copy,
162    I: Iterator<Item = T>,
163{
164    /// The key of this group.
165    pub key: K,
166    /// The length of this group.
167    pub len: usize,
168    /// The first item. Note that `iter` starts from the second element due to the way we clone it.
169    head: Option<T>,
170    /// The underlying iterator.
171    iter: I,
172    /// The number of items remain to be iterated.
173    remaining: usize,
174}
175
176impl<T, K, I> Iterator for RevisitableGroup<T, K, I>
177where
178    K: PartialEq + Copy,
179    I: Iterator<Item = T>,
180{
181    type Item = T;
182
183    fn next(&mut self) -> Option<Self::Item> {
184        if self.remaining == 0 {
185            None
186        } else {
187            self.remaining -= 1;
188            if let Some(item) = self.head.take() {
189                Some(item)
190            } else {
191                let result = self.iter.next();
192                debug_assert!(result.is_some());
193                result
194            }
195        }
196    }
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202
203    #[test]
204    fn test_simple_group_by() {
205        let nums = [1, 3, 5, 2, 4, 6, 7, 9];
206        let grouped = nums
207            .iter()
208            .revisitable_group_by(|x| *x % 2)
209            .map(|group| (group.key, group.len, group.copied().collect::<Vec<_>>()))
210            .collect::<Vec<_>>();
211        assert_eq!(
212            grouped,
213            vec![
214                (1, 3, vec![1, 3, 5]),
215                (0, 3, vec![2, 4, 6]),
216                (1, 2, vec![7, 9]),
217            ]
218        );
219    }
220
221    #[test]
222    #[allow(clippy::never_loop)] // We are testing with empty slices. The panic in the loop body should not run.
223    fn test_empty_outer_slice() {
224        let slice_of_slices: &[&[i32]] = &[];
225        for _group in slice_of_slices
226            .iter()
227            .copied()
228            .flatten()
229            .copied()
230            .revisitable_group_by(|_| 42)
231        {
232            panic!("There is no item!");
233        }
234    }
235
236    #[test]
237    #[allow(clippy::never_loop)] // We are testing with empty slices. The panic in the loop body should not run.
238    fn test_empty_inner_slice() {
239        let slice_of_slices: &[&[i32]] = &[&[], &[], &[]];
240        for _group in slice_of_slices
241            .iter()
242            .copied()
243            .flatten()
244            .copied()
245            .revisitable_group_by(|_| 42)
246        {
247            panic!("There is no item!");
248        }
249    }
250
251    #[test]
252    fn test_single_item() {
253        let slice_of_slices: &[&[i32]] = &[&[1]];
254        for group in slice_of_slices
255            .iter()
256            .copied()
257            .flatten()
258            .copied()
259            .revisitable_group_by(|_| 42)
260        {
261            assert_eq!(group.key, 42);
262        }
263    }
264
265    #[test]
266    fn test_single_slice_multi_item() {
267        let slice_of_slices: &[&[i32]] = &[&[1, 3, 5, 2, 4, 6, 7]];
268        let result = slice_of_slices
269            .iter()
270            .copied()
271            .flatten()
272            .copied()
273            .revisitable_group_by(|x| x % 2)
274            .map(|group| (group.key, group.len, group.collect::<Vec<_>>()))
275            .collect::<Vec<_>>();
276        assert_eq!(
277            result,
278            vec![
279                (1, 3, vec![1, 3, 5]),
280                (0, 3, vec![2, 4, 6]),
281                (1, 1, vec![7])
282            ]
283        );
284    }
285
286    #[test]
287    fn test_multi_slice_multi_item() {
288        let slice_of_slices: &[&[i32]] = &[&[10, 20], &[11, 21, 31], &[12, 22, 32, 42]];
289        let result = slice_of_slices
290            .iter()
291            .copied()
292            .flatten()
293            .copied()
294            .revisitable_group_by(|x| x % 10)
295            .map(|group| (group.key, group.len, group.collect::<Vec<_>>()))
296            .collect::<Vec<_>>();
297        assert_eq!(
298            result,
299            vec![
300                (0, 2, vec![10, 20]),
301                (1, 3, vec![11, 21, 31]),
302                (2, 4, vec![12, 22, 32, 42])
303            ]
304        );
305    }
306
307    #[test]
308    fn test_cross_slice_groups() {
309        let slice_of_slices: &[&[i32]] = &[&[10, 20], &[30, 40, 11, 21], &[31, 12, 22]];
310        let result = slice_of_slices
311            .iter()
312            .copied()
313            .flatten()
314            .copied()
315            .revisitable_group_by(|x| x % 10)
316            .map(|group| (group.key, group.len, group.collect::<Vec<_>>()))
317            .collect::<Vec<_>>();
318        assert_eq!(
319            result,
320            vec![
321                (0, 4, vec![10, 20, 30, 40]),
322                (1, 3, vec![11, 21, 31]),
323                (2, 2, vec![12, 22])
324            ]
325        );
326    }
327
328    #[test]
329    fn test_cross_slice_groups2() {
330        let slice_of_slices: &[&[i32]] = &[&[10, 20, 11], &[21, 31, 41], &[51, 61], &[71, 12, 22]];
331        let result = slice_of_slices
332            .iter()
333            .cloned()
334            .flatten()
335            .copied()
336            .revisitable_group_by(|x| x % 10)
337            .map(|group| (group.key, group.len, group.collect::<Vec<_>>()))
338            .collect::<Vec<_>>();
339        assert_eq!(
340            result,
341            vec![
342                (0, 2, vec![10, 20]),
343                (1, 7, vec![11, 21, 31, 41, 51, 61, 71]),
344                (2, 2, vec![12, 22])
345            ]
346        );
347    }
348
349    #[test]
350    fn test_internal_mutability() {
351        use std::sync::atomic::{AtomicUsize, Ordering};
352        let slab0 = vec![
353            AtomicUsize::new(1),
354            AtomicUsize::new(3),
355            AtomicUsize::new(2),
356        ];
357        let slab1 = vec![
358            AtomicUsize::new(4),
359            AtomicUsize::new(6),
360            AtomicUsize::new(5),
361        ];
362        let slab2 = vec![
363            AtomicUsize::new(7),
364            AtomicUsize::new(9),
365            AtomicUsize::new(10),
366        ];
367
368        // Note: We only take the first two elements from slab2,
369        // because the mmapper sometimes processes part of a slab.
370        let slices: Vec<&[AtomicUsize]> = vec![&slab0[0..3], &slab1[0..3], &slab2[0..2]];
371
372        let mut collected = vec![];
373
374        for group in slices
375            .iter()
376            .copied()
377            .flatten()
378            .revisitable_group_by(|x| x.load(Ordering::SeqCst) % 2)
379        {
380            let mut group_collected = vec![];
381            let key = group.key;
382            for elem in group {
383                let value = elem.load(Ordering::SeqCst);
384                group_collected.push(value);
385
386                let new_value = value * 100 + key;
387                elem.store(new_value, Ordering::SeqCst);
388            }
389
390            collected.push(group_collected);
391        }
392
393        assert_eq!(collected, vec![vec![1, 3], vec![2, 4, 6], vec![5, 7, 9]]);
394
395        let load_all = |slab: Vec<AtomicUsize>| {
396            slab.iter()
397                .map(|x| x.load(Ordering::SeqCst))
398                .collect::<Vec<_>>()
399        };
400
401        assert_eq!(load_all(slab0), vec![101, 301, 200]);
402        assert_eq!(load_all(slab1), vec![400, 600, 501]);
403        assert_eq!(load_all(slab2), vec![701, 901, 10]); // The last item should not be affected.
404    }
405}