1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
// This is a free list allocator written based on Microsoft's mimalloc allocator https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action/

use std::sync::Arc;

use crate::policy::marksweepspace::native_ms::*;
use crate::util::alloc::allocator;
use crate::util::alloc::Allocator;
use crate::util::linear_scan::Region;
use crate::util::Address;
use crate::util::VMThread;
use crate::vm::VMBinding;

use super::allocator::AllocatorContext;

/// A MiMalloc free list allocator
#[repr(C)]
pub struct FreeListAllocator<VM: VMBinding> {
    /// [`VMThread`] associated with this allocator instance
    pub tls: VMThread,
    space: &'static MarkSweepSpace<VM>,
    context: Arc<AllocatorContext<VM>>,
    /// blocks with free space
    pub available_blocks: BlockLists,
    /// blocks with free space for precise stress GC
    /// For precise stress GC, we need to be able to trigger slowpath allocation for
    /// each allocation. To achieve this, we put available blocks to this list. So
    /// normal fastpath allocation will fail, as they will see the block lists
    /// as empty.
    pub available_blocks_stress: BlockLists,
    /// blocks that are marked, not swept
    pub unswept_blocks: BlockLists,
    /// full blocks
    pub consumed_blocks: BlockLists,
}

impl<VM: VMBinding> Allocator<VM> for FreeListAllocator<VM> {
    fn get_tls(&self) -> VMThread {
        self.tls
    }

    fn get_space(&self) -> &'static dyn crate::policy::space::Space<VM> {
        self.space
    }

    fn get_context(&self) -> &AllocatorContext<VM> {
        &self.context
    }

    // Find a block with free space and allocate to it
    fn alloc(&mut self, size: usize, align: usize, offset: usize) -> Address {
        debug_assert!(
            size <= MAX_BIN_SIZE,
            "Alloc request for {} bytes is too big.",
            size
        );
        debug_assert!(align <= VM::MAX_ALIGNMENT);
        debug_assert!(align >= VM::MIN_ALIGNMENT);

        if let Some(block) = self.find_free_block_local(size, align) {
            let cell = self.block_alloc(block);
            if !cell.is_zero() {
                // We succeeded in fastpath alloc, this cannot be precise stress test
                debug_assert!(
                    !(*self.context.options.precise_stress
                        && self.context.options.is_stress_test_gc_enabled())
                );

                let res = allocator::align_allocation::<VM>(cell, align, offset);
                // Make sure that the allocation region is within the cell
                #[cfg(debug_assertions)]
                {
                    let cell_size = block.load_block_cell_size();
                    debug_assert!(
                        res + size <= cell + cell_size,
                        "Allocating (size = {}, align = {}, offset = {}) to the cell {} of size {}, but the end of the allocation region {} is beyond the cell end {}",
                        size, align, offset, cell, cell_size, res + size, cell + cell_size
                    );
                }
                return res;
            }
        }

        self.alloc_slow(size, align, offset)
    }

    fn alloc_slow_once(&mut self, size: usize, align: usize, offset: usize) -> Address {
        // Try get a block from the space
        if let Some(block) = self.acquire_global_block(size, align, false) {
            let addr = self.block_alloc(block);
            allocator::align_allocation::<VM>(addr, align, offset)
        } else {
            Address::ZERO
        }
    }

    fn does_thread_local_allocation(&self) -> bool {
        true
    }

    fn get_thread_local_buffer_granularity(&self) -> usize {
        Block::BYTES
    }

    fn alloc_slow_once_precise_stress(
        &mut self,
        size: usize,
        align: usize,
        offset: usize,
        need_poll: bool,
    ) -> Address {
        trace!("allow slow precise stress s={}", size);
        if need_poll {
            self.acquire_global_block(0, 0, true);
        }

        // mimic what fastpath allocation does, except that we allocate from available_blocks_stress.
        if let Some(block) = self.find_free_block_stress(size, align) {
            let cell = self.block_alloc(block);
            allocator::align_allocation::<VM>(cell, align, offset)
        } else {
            Address::ZERO
        }
    }

    fn on_mutator_destroy(&mut self) {
        let mut global = self.space.get_abandoned_block_lists().lock().unwrap();
        self.abandon_blocks(&mut global);
    }
}

impl<VM: VMBinding> FreeListAllocator<VM> {
    // New free list allcoator
    pub(crate) fn new(
        tls: VMThread,
        space: &'static MarkSweepSpace<VM>,
        context: Arc<AllocatorContext<VM>>,
    ) -> Self {
        FreeListAllocator {
            tls,
            space,
            context,
            available_blocks: new_empty_block_lists(),
            available_blocks_stress: new_empty_block_lists(),
            unswept_blocks: new_empty_block_lists(),
            consumed_blocks: new_empty_block_lists(),
        }
    }

    // Find a free cell within a given block
    fn block_alloc(&mut self, block: Block) -> Address {
        let cell = block.load_free_list();
        if cell.is_zero() {
            return cell; // return failed allocation
        }
        let next_cell = unsafe { cell.load::<Address>() };
        // Clear the link
        unsafe { cell.store::<Address>(Address::ZERO) };
        debug_assert!(
            next_cell.is_zero() || block.includes_address(next_cell),
            "next_cell {} is not in {:?}",
            next_cell,
            block
        );
        block.store_free_list(next_cell);

        // Zeroing memory right before we return it.
        // If we move the zeroing to somewhere else, we need to clear the list link here: cell.store::<Address>(Address::ZERO)
        let cell_size = block.load_block_cell_size();
        crate::util::memory::zero(cell, cell_size);

        // Make sure the memory is zeroed. This looks silly as we zero the cell right before this check.
        // But we would need to move the zeroing to somewhere so we can do zeroing at a coarser grainularity.
        #[cfg(debug_assertions)]
        {
            let mut cursor = cell;
            while cursor < cell + cell_size {
                debug_assert_eq!(unsafe { cursor.load::<usize>() }, 0);
                cursor += crate::util::constants::BYTES_IN_ADDRESS;
            }
        }

        cell
    }

    // Find an available block when stress GC is enabled. This includes getting a block from the space.
    fn find_free_block_stress(&mut self, size: usize, align: usize) -> Option<Block> {
        Self::find_free_block_with(
            &mut self.available_blocks_stress,
            &mut self.consumed_blocks,
            size,
            align,
        )
        .or_else(|| self.recycle_local_blocks(size, align, true))
        .or_else(|| self.acquire_global_block(size, align, true))
    }

    // Find an available block from local block lists
    fn find_free_block_local(&mut self, size: usize, align: usize) -> Option<Block> {
        Self::find_free_block_with(
            &mut self.available_blocks,
            &mut self.consumed_blocks,
            size,
            align,
        )
        .or_else(|| self.recycle_local_blocks(size, align, false))
    }

    // Find an available block
    // This will usually be the first block on the available list. If all available blocks are found
    // to be full, other lists are searched
    // This function allows different available block lists -- normal allocation uses self.avaialble_blocks, and precise stress test uses self.avialable_blocks_stress.
    fn find_free_block_with(
        available_blocks: &mut BlockLists,
        consumed_blocks: &mut BlockLists,
        size: usize,
        align: usize,
    ) -> Option<Block> {
        let bin = mi_bin::<VM>(size, align);
        debug_assert!(bin <= MAX_BIN);

        let available = &mut available_blocks[bin];
        debug_assert!(available.size >= size);

        if !available.is_empty() {
            let mut cursor = available.first;

            while let Some(block) = cursor {
                if block.has_free_cells() {
                    return Some(block);
                }
                available.pop();
                consumed_blocks.get_mut(bin).unwrap().push(block);

                cursor = available.first;
            }
        }

        debug_assert!(available_blocks[bin].is_empty());
        None
    }

    /// Add a block to the given bin in the available block lists. Depending on which available block list we are using, this
    /// method may add the block to available_blocks, or available_blocks_stress.
    fn add_to_available_blocks(&mut self, bin: usize, block: Block, stress: bool) {
        if stress {
            debug_assert!(*self.context.options.precise_stress);
            self.available_blocks_stress[bin].push(block);
        } else {
            self.available_blocks[bin].push(block);
        }
    }

    /// Tries to recycle local blocks if there is any. This is a no-op for eager sweeping mark sweep.
    fn recycle_local_blocks(
        &mut self,
        size: usize,
        align: usize,
        _stress_test: bool,
    ) -> Option<Block> {
        if cfg!(feature = "eager_sweeping") {
            // We have swept blocks in the last GC. If we run out of available blocks, there is nothing we can do.
            None
        } else {
            // Get blocks from unswept_blocks and attempt to sweep
            loop {
                let bin = mi_bin::<VM>(size, align);
                debug_assert!(self.available_blocks[bin].is_empty()); // only use this function if there are no blocks available

                if let Some(block) = self.unswept_blocks.get_mut(bin).unwrap().pop() {
                    block.sweep::<VM>();
                    if block.has_free_cells() {
                        // recyclable block
                        self.add_to_available_blocks(
                            bin,
                            block,
                            self.context.options.is_stress_test_gc_enabled(),
                        );
                        return Some(block);
                    } else {
                        // nothing was freed from this block
                        self.consumed_blocks.get_mut(bin).unwrap().push(block);
                    }
                } else {
                    return None;
                }
            }
        }
    }

    /// Get a block from the space.
    fn acquire_global_block(
        &mut self,
        size: usize,
        align: usize,
        stress_test: bool,
    ) -> Option<Block> {
        let bin = mi_bin::<VM>(size, align);
        loop {
            match self.space.acquire_block(self.tls, size, align) {
                crate::policy::marksweepspace::native_ms::BlockAcquireResult::Exhausted => {
                    debug!("Acquire global block: None");
                    // GC
                    return None;
                }

                crate::policy::marksweepspace::native_ms::BlockAcquireResult::Fresh(block) => {
                    debug!("Acquire global block: Fresh {:?}", block);
                    self.add_to_available_blocks(bin, block, stress_test);
                    self.init_block(block, self.available_blocks[bin].size);

                    return Some(block);
                }

                crate::policy::marksweepspace::native_ms::BlockAcquireResult::AbandonedAvailable(block) => {
                    debug!("Acquire global block: AbandonedAvailable {:?}", block);
                    block.store_tls(self.tls);
                    if block.has_free_cells() {
                        self.add_to_available_blocks(bin, block, stress_test);
                        return Some(block);
                    } else {
                        self.consumed_blocks[bin].push(block);
                    }
                }

                crate::policy::marksweepspace::native_ms::BlockAcquireResult::AbandonedUnswept(block) => {
                    debug!("Acquire global block: AbandonedUnswep {:?}", block);
                    block.store_tls(self.tls);
                    block.sweep::<VM>();
                    if block.has_free_cells() {
                        self.add_to_available_blocks(bin, block, stress_test);
                        return Some(block);
                    } else {
                        self.consumed_blocks[bin].push(block);
                    }
                }
            }
        }
    }

    fn init_block(&self, block: Block, cell_size: usize) {
        debug_assert_ne!(cell_size, 0);
        self.space.record_new_block(block);

        // construct free list
        let block_end = block.start() + Block::BYTES;
        let mut old_cell = unsafe { Address::zero() };
        let mut new_cell = block.start();

        let final_cell = loop {
            unsafe {
                new_cell.store::<Address>(old_cell);
            }
            old_cell = new_cell;
            new_cell += cell_size;
            if new_cell + cell_size > block_end {
                break old_cell;
            };
        };

        block.store_free_list(final_cell);
        block.store_block_cell_size(cell_size);
        #[cfg(feature = "malloc_native_mimalloc")]
        {
            block.store_local_free_list(Address::ZERO);
            block.store_thread_free_list(Address::ZERO);
        }

        self.store_block_tls(block);
    }

    #[cfg(feature = "malloc_native_mimalloc")]
    fn free(&self, addr: Address) {
        assert!(!addr.is_zero(), "Attempted to free zero address.");

        use crate::util::ObjectReference;
        let block = Block::from_unaligned_address(addr);
        let block_tls = block.load_tls();

        if self.tls == block_tls {
            // same thread that allocated
            let local_free = block.load_local_free_list();
            unsafe {
                addr.store(local_free);
            }
            block.store_local_free_list(addr);
        } else {
            // different thread to allocator
            unreachable!(
                "tlss don't match freeing from block {}, my tls = {:?}, block tls = {:?}",
                block.start(),
                self.tls,
                block.load_tls()
            );

            // I am not sure whether the following code would be used to free a block for other thread. I will just keep it here as commented out.
            // let mut success = false;
            // while !success {
            //     let thread_free = FreeListAllocator::<VM>::load_thread_free_list(block);
            //     unsafe {
            //         addr.store(thread_free);
            //     }
            //     success = FreeListAllocator::<VM>::cas_thread_free_list(&self, block, thread_free, addr);
            // }
        }

        // unset allocation bit
        // Note: We cannot use `unset_vo_bit_unsafe` because two threads may attempt to free
        // objects at adjacent addresses, and they may share the same byte in the VO bit metadata.
        crate::util::metadata::vo_bit::unset_vo_bit(unsafe {
            ObjectReference::from_raw_address_unchecked(addr)
        })
    }

    fn store_block_tls(&self, block: Block) {
        block.store_tls(self.tls);
    }

    pub(crate) fn prepare(&mut self) {}

    pub(crate) fn release(&mut self) {
        for bin in 0..MI_BIN_FULL {
            let unswept = self.unswept_blocks.get_mut(bin).unwrap();

            // If we do eager sweeping, we should have no unswept blocks.
            debug_assert!(!cfg!(feature = "eager_sweeping") || unswept.is_empty());

            let mut sweep_later = |list: &mut BlockList| {
                list.release_blocks(self.space);

                // For eager sweeping, that's it.  We just release unmarked blocks, and leave marked
                // blocks to be swept later in the `SweepChunk` work packet.

                // For lazy sweeping, we move blocks from available and consumed to unswept.  When
                // an allocator tries to use them, they will sweep the block.
                if cfg!(not(feature = "eager_sweeping")) {
                    unswept.append(list);
                }
            };

            sweep_later(&mut self.available_blocks[bin]);
            sweep_later(&mut self.available_blocks_stress[bin]);
            sweep_later(&mut self.consumed_blocks[bin]);
        }

        // We abandon block lists immediately.  Otherwise, some mutators will hold lots of blocks
        // locally and prevent other mutators to use.
        {
            let mut global = self.space.get_abandoned_block_lists_in_gc().lock().unwrap();
            self.abandon_blocks(&mut global);
        }

        self.space.release_packet_done();
    }

    fn abandon_blocks(&mut self, global: &mut AbandonedBlockLists) {
        for i in 0..MI_BIN_FULL {
            let available = self.available_blocks.get_mut(i).unwrap();
            if !available.is_empty() {
                global.available[i].append(available);
            }

            let available_stress = self.available_blocks_stress.get_mut(i).unwrap();
            if !available_stress.is_empty() {
                global.available[i].append(available_stress);
            }

            let consumed = self.consumed_blocks.get_mut(i).unwrap();
            if !consumed.is_empty() {
                global.consumed[i].append(consumed);
            }

            let unswept = self.unswept_blocks.get_mut(i).unwrap();
            if !unswept.is_empty() {
                global.unswept[i].append(unswept);
            }
        }
    }
}