mmtk/policy/marksweepspace/malloc_ms/
global.rs

1use super::metadata::*;
2use crate::plan::tracing::{ObjectQueue, OptionObjectQueue};
3use crate::policy::sft::GCWorkerMutRef;
4use crate::policy::sft::SFT;
5use crate::policy::space::CommonSpace;
6use crate::scheduler::GCWorkScheduler;
7use crate::util::heap::chunk_map::Chunk;
8use crate::util::heap::chunk_map::ChunkMap;
9use crate::util::heap::gc_trigger::GCTrigger;
10use crate::util::heap::space_descriptor::SpaceDescriptor;
11use crate::util::heap::PageResource;
12use crate::util::linear_scan::Region;
13use crate::util::malloc::library::{BYTES_IN_MALLOC_PAGE, LOG_BYTES_IN_MALLOC_PAGE};
14use crate::util::malloc::malloc_ms_util::*;
15use crate::util::metadata::side_metadata;
16use crate::util::metadata::side_metadata::{
17    SideMetadataContext, SideMetadataSanity, SideMetadataSpec,
18};
19use crate::util::metadata::MetadataSpec;
20use crate::util::object_enum::ObjectEnumerator;
21use crate::util::opaque_pointer::*;
22use crate::util::Address;
23use crate::util::ObjectReference;
24use crate::util::{conversions, metadata};
25use crate::vm::VMBinding;
26use crate::vm::{ActivePlan, Collection, ObjectModel};
27use crate::{policy::space::Space, util::heap::layout::vm_layout::BYTES_IN_CHUNK};
28#[cfg(debug_assertions)]
29use std::collections::HashMap;
30use std::marker::PhantomData;
31#[cfg(debug_assertions)]
32use std::sync::atomic::AtomicU32;
33use std::sync::atomic::{AtomicUsize, Ordering};
34use std::sync::Arc;
35use std::sync::Mutex;
36// If true, we will use a hashmap to store all the allocated memory from malloc, and use it
37// to make sure our allocation is correct.
38#[cfg(debug_assertions)]
39const ASSERT_ALLOCATION: bool = false;
40
41/// This space uses malloc to get new memory, and performs mark-sweep for the memory.
42pub struct MallocSpace<VM: VMBinding> {
43    phantom: PhantomData<VM>,
44    active_bytes: AtomicUsize,
45    active_pages: AtomicUsize,
46    metadata: SideMetadataContext,
47    /// Work packet scheduler
48    scheduler: Arc<GCWorkScheduler<VM>>,
49    gc_trigger: Arc<GCTrigger<VM>>,
50    descriptor: SpaceDescriptor,
51    chunk_map: ChunkMap,
52    mmap_metadata_lock: Mutex<()>,
53    // Mapping between allocated address and its size - this is used to check correctness.
54    // Size will be set to zero when the memory is freed.
55    #[cfg(debug_assertions)]
56    active_mem: Mutex<HashMap<Address, usize>>,
57    // The following fields are used for checking correctness of the parallel sweep implementation
58    // as we need to check how many live bytes exist against `active_bytes` when the last sweep
59    // work packet is executed
60    #[cfg(debug_assertions)]
61    pub total_work_packets: AtomicU32,
62    #[cfg(debug_assertions)]
63    pub completed_work_packets: AtomicU32,
64    #[cfg(debug_assertions)]
65    pub work_live_bytes: AtomicUsize,
66}
67
68impl<VM: VMBinding> SFT for MallocSpace<VM> {
69    fn name(&self) -> &'static str {
70        self.get_name()
71    }
72
73    fn is_live(&self, object: ObjectReference) -> bool {
74        is_marked::<VM>(object, Ordering::SeqCst)
75    }
76
77    #[cfg(feature = "object_pinning")]
78    fn pin_object(&self, _object: ObjectReference) -> bool {
79        false
80    }
81
82    #[cfg(feature = "object_pinning")]
83    fn unpin_object(&self, _object: ObjectReference) -> bool {
84        false
85    }
86
87    #[cfg(feature = "object_pinning")]
88    fn is_object_pinned(&self, _object: ObjectReference) -> bool {
89        false
90    }
91
92    fn is_movable(&self) -> bool {
93        false
94    }
95
96    #[cfg(feature = "sanity")]
97    fn is_sane(&self) -> bool {
98        true
99    }
100
101    // For malloc space, we need to further check the VO bit.
102    fn is_in_space(&self, object: ObjectReference) -> bool {
103        self.is_alloced_by_malloc(object)
104    }
105
106    /// For malloc space, we just use the side metadata.
107    #[cfg(feature = "vo_bit")]
108    fn is_mmtk_object(&self, addr: Address) -> Option<ObjectReference> {
109        debug_assert!(!addr.is_zero());
110        // `addr` cannot be mapped by us. It should be mapped by the malloc library.
111        debug_assert!(!addr.is_mapped());
112        self.has_object_alloced_by_malloc(addr)
113    }
114
115    #[cfg(feature = "vo_bit")]
116    fn find_object_from_internal_pointer(
117        &self,
118        ptr: Address,
119        max_search_bytes: usize,
120    ) -> Option<ObjectReference> {
121        crate::util::metadata::vo_bit::find_object_from_internal_pointer::<VM>(
122            ptr,
123            max_search_bytes,
124        )
125    }
126
127    fn initialize_object_metadata(&self, object: ObjectReference, _bytes: usize) {
128        trace!("initialize_object_metadata for object {}", object);
129        set_vo_bit(object);
130    }
131
132    fn sft_trace_object(
133        &self,
134        queue: &mut OptionObjectQueue,
135        object: ObjectReference,
136        _worker: GCWorkerMutRef,
137    ) -> ObjectReference {
138        self.trace_object(queue, object)
139    }
140}
141
142impl<VM: VMBinding> Space<VM> for MallocSpace<VM> {
143    fn as_space(&self) -> &dyn Space<VM> {
144        self
145    }
146
147    fn as_sft(&self) -> &(dyn SFT + Sync + 'static) {
148        self
149    }
150
151    fn get_page_resource(&self) -> &dyn PageResource<VM> {
152        unreachable!()
153    }
154
155    fn maybe_get_page_resource_mut(&mut self) -> Option<&mut dyn PageResource<VM>> {
156        None
157    }
158
159    fn common(&self) -> &CommonSpace<VM> {
160        unreachable!()
161    }
162
163    fn get_gc_trigger(&self) -> &GCTrigger<VM> {
164        self.gc_trigger.as_ref()
165    }
166
167    fn initialize_sft(&self, _sft_map: &mut dyn crate::policy::sft_map::SFTMap) {
168        // Do nothing - we will set sft when we get new results from malloc
169    }
170
171    fn release_multiple_pages(&mut self, _start: Address) {
172        unreachable!()
173    }
174
175    // We have assertions in a debug build. We allow this pattern for the release build.
176    #[allow(clippy::let_and_return)]
177    fn in_space(&self, object: ObjectReference) -> bool {
178        let ret = self.is_alloced_by_malloc(object);
179
180        #[cfg(debug_assertions)]
181        if ASSERT_ALLOCATION {
182            let addr = object.to_object_start::<VM>();
183            let active_mem = self.active_mem.lock().unwrap();
184            if ret {
185                // The VO bit tells that the object is in space.
186                debug_assert!(
187                    *active_mem.get(&addr).unwrap() != 0,
188                    "active mem check failed for {} (object {}) - was freed",
189                    addr,
190                    object
191                );
192            } else {
193                // The VO bit tells that the object is not in space. It could never be allocated, or have been freed.
194                debug_assert!(
195                    (!active_mem.contains_key(&addr))
196                        || (active_mem.contains_key(&addr) && *active_mem.get(&addr).unwrap() == 0),
197                    "mem check failed for {} (object {}): allocated = {}, size = {:?}",
198                    addr,
199                    object,
200                    active_mem.contains_key(&addr),
201                    if active_mem.contains_key(&addr) {
202                        active_mem.get(&addr)
203                    } else {
204                        None
205                    }
206                );
207            }
208        }
209        ret
210    }
211
212    fn address_in_space(&self, _start: Address) -> bool {
213        unreachable!("We do not know if an address is in malloc space. Use in_space() to check if an object is in malloc space.")
214    }
215
216    fn get_name(&self) -> &'static str {
217        "MallocSpace"
218    }
219
220    fn estimate_side_meta_pages(&self, data_pages: usize) -> usize {
221        self.metadata.calculate_reserved_pages(data_pages)
222    }
223
224    #[allow(clippy::assertions_on_constants)]
225    fn reserved_pages(&self) -> usize {
226        use crate::util::constants::LOG_BYTES_IN_PAGE;
227        // Assume malloc pages are no smaller than 4K pages. Otherwise the substraction below will fail.
228        debug_assert!(LOG_BYTES_IN_MALLOC_PAGE >= LOG_BYTES_IN_PAGE);
229        let data_pages = self.active_pages.load(Ordering::SeqCst)
230            << (LOG_BYTES_IN_MALLOC_PAGE - LOG_BYTES_IN_PAGE);
231        let meta_pages = self.estimate_side_meta_pages(data_pages);
232        data_pages + meta_pages
233    }
234
235    fn verify_side_metadata_sanity(&self, side_metadata_sanity_checker: &mut SideMetadataSanity) {
236        side_metadata_sanity_checker
237            .verify_metadata_context(std::any::type_name::<Self>(), &self.metadata)
238    }
239
240    fn enumerate_objects(&self, _enumerator: &mut dyn ObjectEnumerator) {
241        unimplemented!()
242    }
243
244    fn clear_side_log_bits(&self) {
245        unimplemented!()
246    }
247
248    fn set_side_log_bits(&self) {
249        unimplemented!()
250    }
251}
252
253use crate::scheduler::GCWorker;
254use crate::util::copy::CopySemantics;
255
256impl<VM: VMBinding> crate::policy::gc_work::PolicyTraceObject<VM> for MallocSpace<VM> {
257    fn trace_object<Q: ObjectQueue, const KIND: crate::policy::gc_work::TraceKind>(
258        &self,
259        queue: &mut Q,
260        object: ObjectReference,
261        _copy: Option<CopySemantics>,
262        _worker: &mut GCWorker<VM>,
263    ) -> ObjectReference {
264        self.trace_object(queue, object)
265    }
266
267    fn may_move_objects<const KIND: crate::policy::gc_work::TraceKind>() -> bool {
268        false
269    }
270}
271
272// Actually no max object size.
273#[allow(dead_code)]
274pub const MAX_OBJECT_SIZE: usize = usize::MAX;
275
276impl<VM: VMBinding> MallocSpace<VM> {
277    pub fn extend_global_side_metadata_specs(specs: &mut Vec<SideMetadataSpec>) {
278        // MallocSpace needs to use VO bit. If the feature is turned on, the VO bit spec is in the global specs.
279        // Otherwise, we manually add it.
280        if !cfg!(feature = "vo_bit") {
281            specs.push(crate::util::metadata::vo_bit::VO_BIT_SIDE_METADATA_SPEC);
282        }
283    }
284
285    pub fn new(args: crate::policy::space::PlanCreateSpaceArgs<VM>) -> Self {
286        if *args.options.count_live_bytes_in_gc {
287            // The implementation of counting live bytes needs a SpaceDescriptor which we do not have for MallocSpace.
288            // Besides we cannot meaningfully measure the live bytes vs total pages for MallocSpace.
289            panic!("count_live_bytes_in_gc is not supported by MallocSpace");
290        }
291        let descriptor = SpaceDescriptor::create_descriptor();
292        let chunk_map = ChunkMap::new(descriptor.get_index());
293        MallocSpace {
294            phantom: PhantomData,
295            active_bytes: AtomicUsize::new(0),
296            active_pages: AtomicUsize::new(0),
297            metadata: SideMetadataContext {
298                global: args.global_side_metadata_specs.clone(),
299                local: metadata::extract_side_metadata(&[
300                    MetadataSpec::OnSide(ACTIVE_PAGE_METADATA_SPEC),
301                    MetadataSpec::OnSide(OFFSET_MALLOC_METADATA_SPEC),
302                    *VM::VMObjectModel::LOCAL_MARK_BIT_SPEC,
303                ]),
304            },
305            scheduler: args.scheduler.clone(),
306            gc_trigger: args.gc_trigger,
307            descriptor,
308            chunk_map,
309            mmap_metadata_lock: Mutex::new(()),
310            #[cfg(debug_assertions)]
311            active_mem: Mutex::new(HashMap::new()),
312            #[cfg(debug_assertions)]
313            total_work_packets: AtomicU32::new(0),
314            #[cfg(debug_assertions)]
315            completed_work_packets: AtomicU32::new(0),
316            #[cfg(debug_assertions)]
317            work_live_bytes: AtomicUsize::new(0),
318        }
319    }
320
321    /// Set multiple pages, starting from the given address, for the given size, and increase the active page count if we set any page mark in the region.
322    /// This is a thread-safe method, and can be used during mutator phase when mutators may access the same page.
323    /// Performance-wise, this method may impose overhead, as we are doing a compare-exchange for every page in the range.
324    fn set_page_mark(&self, start: Address, size: usize) {
325        // Set first page
326        let mut page = start.align_down(BYTES_IN_MALLOC_PAGE);
327        let mut used_pages = 0;
328
329        // It is important to go to the end of the object, which may span a page boundary
330        while page < start + size {
331            if compare_exchange_set_page_mark(page) {
332                used_pages += 1;
333            }
334
335            page += BYTES_IN_MALLOC_PAGE;
336        }
337
338        if used_pages != 0 {
339            self.active_pages.fetch_add(used_pages, Ordering::SeqCst);
340        }
341    }
342
343    fn set_chunk_mark(&self, start: Address, size: usize) {
344        let mut chunk = start.align_down(BYTES_IN_CHUNK);
345        while chunk < start + size {
346            self.chunk_map
347                .set_allocated(Chunk::from_aligned_address(chunk), true);
348            chunk += BYTES_IN_CHUNK;
349        }
350    }
351
352    /// Unset multiple pages, starting from the given address, for the given size, and decrease the active page count if we unset any page mark in the region
353    ///
354    /// # Safety
355    /// We need to ensure that only one GC thread is accessing the range.
356    unsafe fn unset_page_mark(&self, start: Address, size: usize) {
357        debug_assert!(start.is_aligned_to(BYTES_IN_MALLOC_PAGE));
358        debug_assert!(crate::util::conversions::raw_is_aligned(
359            size,
360            BYTES_IN_MALLOC_PAGE
361        ));
362        let mut page = start;
363        let mut cleared_pages = 0;
364        while page < start + size {
365            if is_page_marked_unsafe(page) {
366                cleared_pages += 1;
367                unset_page_mark_unsafe(page);
368            }
369            page += BYTES_IN_MALLOC_PAGE;
370        }
371
372        if cleared_pages != 0 {
373            self.active_pages.fetch_sub(cleared_pages, Ordering::SeqCst);
374        }
375    }
376
377    pub fn alloc(&self, tls: VMThread, size: usize, align: usize, offset: usize) -> Address {
378        // TODO: Should refactor this and Space.acquire()
379        if self.get_gc_trigger().poll(false, Some(self)) {
380            assert!(VM::VMActivePlan::is_mutator(tls), "Polling in GC worker");
381            VM::VMCollection::block_for_gc(VMMutatorThread(tls));
382            return unsafe { Address::zero() };
383        }
384
385        let (address, is_offset_malloc) = alloc::<VM>(size, align, offset);
386        if !address.is_zero() {
387            let actual_size = get_malloc_usable_size(address, is_offset_malloc);
388
389            if !self.is_meta_space_mapped(address, actual_size) {
390                // Map the metadata space for the associated chunk
391                self.map_metadata_and_update_bound(address, actual_size);
392                // Update SFT
393                assert!(crate::mmtk::SFT_MAP.has_sft_entry(address)); // make sure the address is okay with our SFT map
394                unsafe { crate::mmtk::SFT_MAP.update(self, address, actual_size) };
395            }
396
397            // Set chunk marks for the current object
398            self.set_chunk_mark(address, actual_size);
399
400            // Set page marks for current object
401            self.set_page_mark(address, actual_size);
402            self.active_bytes.fetch_add(actual_size, Ordering::SeqCst);
403
404            if is_offset_malloc {
405                set_offset_malloc_bit(address);
406            }
407
408            #[cfg(debug_assertions)]
409            if ASSERT_ALLOCATION {
410                debug_assert!(actual_size != 0);
411                self.active_mem.lock().unwrap().insert(address, actual_size);
412            }
413        }
414
415        address
416    }
417
418    /// Check if metadata is mapped for a range [addr, addr + size). Metadata is mapped per chunk,
419    /// we will go through all the chunks for [address, address + size), and check if they are mapped.
420    /// If any of the chunks is not mapped, return false. Otherwise return true.
421    fn is_meta_space_mapped(&self, address: Address, size: usize) -> bool {
422        let mut chunk = address.align_down(BYTES_IN_CHUNK);
423        while chunk < address + size {
424            // is the chunk already mapped?
425            if !self.is_meta_space_mapped_for_address(chunk) {
426                return false;
427            }
428            chunk += BYTES_IN_CHUNK;
429        }
430        true
431    }
432
433    /// Check if metadata is mapped for a given address. We check with the chunk map: if the side metadata
434    /// for the chunk map is mapped, and if it is allocated in the chunk map.
435    fn is_meta_space_mapped_for_address(&self, address: Address) -> bool {
436        let is_chunk_map_mapped = |chunk_start: Address| {
437            let chunk_map_max_meta_address =
438                ChunkMap::ALLOC_TABLE.upper_bound_address_for_contiguous();
439            let meta_address =
440                side_metadata::address_to_meta_address(&ChunkMap::ALLOC_TABLE, chunk_start);
441            if meta_address < chunk_map_max_meta_address {
442                meta_address.is_mapped()
443            } else {
444                false
445            }
446        };
447        let chunk_start = address.align_down(BYTES_IN_CHUNK);
448        is_chunk_map_mapped(chunk_start)
449            && self
450                .chunk_map
451                .get(Chunk::from_aligned_address(chunk_start))
452                .is_some()
453    }
454
455    pub fn free(&self, addr: Address) {
456        let offset_malloc_bit = is_offset_malloc(addr);
457        let bytes = get_malloc_usable_size(addr, offset_malloc_bit);
458        self.free_internal(addr, bytes, offset_malloc_bit);
459    }
460
461    // XXX optimize: We pass the bytes in to free as otherwise there were multiple
462    // indirect call instructions in the generated assembly
463    fn free_internal(&self, addr: Address, bytes: usize, offset_malloc_bit: bool) {
464        if offset_malloc_bit {
465            trace!("Free memory {:x}", addr);
466            offset_free(addr);
467            unsafe { unset_offset_malloc_bit_unsafe(addr) };
468        } else {
469            let ptr = addr.to_mut_ptr();
470            trace!("Free memory {:?}", ptr);
471            unsafe {
472                free(ptr);
473            }
474        }
475
476        self.active_bytes.fetch_sub(bytes, Ordering::SeqCst);
477
478        #[cfg(debug_assertions)]
479        if ASSERT_ALLOCATION {
480            self.active_mem.lock().unwrap().insert(addr, 0).unwrap();
481        }
482    }
483
484    pub fn trace_object<Q: ObjectQueue>(
485        &self,
486        queue: &mut Q,
487        object: ObjectReference,
488    ) -> ObjectReference {
489        assert!(
490            self.in_space(object),
491            "Cannot mark an object {} that was not alloced by malloc.",
492            object,
493        );
494
495        if !is_marked::<VM>(object, Ordering::Relaxed) {
496            set_mark_bit::<VM>(object, Ordering::SeqCst);
497            queue.enqueue(object);
498        }
499
500        object
501    }
502
503    fn map_metadata_and_update_bound(&self, addr: Address, size: usize) {
504        // Acquire the lock before
505        let _lock = self.mmap_metadata_lock.lock().unwrap();
506
507        // Mmap metadata for each chunk
508        let map_metadata_space_for_chunk = |start: Address| {
509            debug_assert!(start.is_aligned_to(BYTES_IN_CHUNK));
510            // Attempt to map the local metadata for the policy.
511            // Note that this might fail. For example, we have marked a chunk as active but later we freed all
512            // the objects in it, and unset its chunk bit. However, we do not free its metadata. So for the chunk,
513            // its chunk bit is mapped, but not marked, and all its local metadata is also mapped.
514            let mmap_metadata_result =
515                self.metadata
516                    .try_map_metadata_space(start, BYTES_IN_CHUNK, self.get_name());
517            debug_assert!(
518                mmap_metadata_result.is_ok(),
519                "mmap sidemetadata failed for chunk_start ({})",
520                start
521            );
522            // Set the chunk mark at the end. So if we have chunk mark set, we know we have mapped side metadata
523            // for the chunk.
524            trace!("set chunk mark bit for {}", start);
525            self.chunk_map
526                .set_allocated(Chunk::from_aligned_address(start), true);
527        };
528
529        // Go through each chunk, and map for them.
530        let mut chunk = conversions::chunk_align_down(addr);
531        while chunk < addr + size {
532            map_metadata_space_for_chunk(chunk);
533            chunk += BYTES_IN_CHUNK;
534        }
535    }
536
537    /// Check if a given object was allocated by malloc
538    pub fn is_alloced_by_malloc(&self, object: ObjectReference) -> bool {
539        self.is_meta_space_mapped_for_address(object.to_raw_address())
540            && crate::util::metadata::vo_bit::is_vo_bit_set(object)
541    }
542
543    /// Check if there is an object allocated by malloc at the address.
544    ///
545    /// This function doesn't check if `addr` is aligned.
546    /// If not, it will try to load the VO bit for the address rounded down to the metadata's granularity.
547    #[cfg(feature = "vo_bit")]
548    pub fn has_object_alloced_by_malloc(&self, addr: Address) -> Option<ObjectReference> {
549        if !self.is_meta_space_mapped_for_address(addr) {
550            return None;
551        }
552        crate::util::metadata::vo_bit::is_vo_bit_set_for_addr(addr)
553    }
554
555    pub fn prepare(&mut self, _full_heap: bool) {}
556
557    pub fn release(&mut self) {
558        use crate::scheduler::WorkBucketStage;
559        let space = unsafe { &*(self as *const Self) };
560        let work_packets = self.chunk_map.generate_tasks(|chunk| {
561            Box::new(MSSweepChunk {
562                ms: space,
563                chunk: chunk.start(),
564            })
565        });
566
567        debug!("Generated {} sweep work packets", work_packets.len());
568        #[cfg(debug_assertions)]
569        {
570            self.total_work_packets
571                .store(work_packets.len() as u32, Ordering::SeqCst);
572            self.completed_work_packets.store(0, Ordering::SeqCst);
573            self.work_live_bytes.store(0, Ordering::SeqCst);
574        }
575
576        self.scheduler.work_buckets[WorkBucketStage::Release].bulk_add(work_packets);
577    }
578
579    pub fn end_of_gc(&mut self) {}
580
581    pub fn sweep_chunk(&self, chunk_start: Address) {
582        // Call the relevant sweep function depending on the location of the mark bits
583        match *VM::VMObjectModel::LOCAL_MARK_BIT_SPEC {
584            MetadataSpec::OnSide(local_mark_bit_side_spec) => {
585                self.sweep_chunk_mark_on_side(chunk_start, local_mark_bit_side_spec);
586            }
587            _ => {
588                self.sweep_chunk_mark_in_header(chunk_start);
589            }
590        }
591    }
592
593    /// Given an object in MallocSpace, return its malloc address, whether it is an offset malloc, and malloc size
594    fn get_malloc_addr_size(object: ObjectReference) -> (Address, bool, usize) {
595        let obj_start = object.to_object_start::<VM>();
596        let offset_malloc_bit = is_offset_malloc(obj_start);
597        let bytes = get_malloc_usable_size(obj_start, offset_malloc_bit);
598        (obj_start, offset_malloc_bit, bytes)
599    }
600
601    /// Clean up for an empty chunk
602    fn clean_up_empty_chunk(&self, chunk_start: Address) {
603        // Clear the chunk map
604        self.chunk_map
605            .set_allocated(Chunk::from_aligned_address(chunk_start), false);
606        // Clear the SFT entry
607        unsafe { crate::mmtk::SFT_MAP.clear(chunk_start) };
608        // Clear the page marks - we are the only GC thread that is accessing this chunk
609        unsafe { self.unset_page_mark(chunk_start, BYTES_IN_CHUNK) };
610    }
611
612    /// Sweep an object if it is dead, and unset page marks for empty pages before this object.
613    /// Return true if the object is swept.
614    fn sweep_object(&self, object: ObjectReference, empty_page_start: &mut Address) -> bool {
615        let (obj_start, offset_malloc, bytes) = Self::get_malloc_addr_size(object);
616
617        // We are the only thread that is dealing with the object. We can use non-atomic methods for the metadata.
618        if !unsafe { is_marked_unsafe::<VM>(object) } {
619            // Dead object
620            trace!("Object {} has been allocated but not marked", object);
621
622            // Free object
623            self.free_internal(obj_start, bytes, offset_malloc);
624            trace!("free object {}", object);
625            unsafe { unset_vo_bit_unsafe(object) };
626
627            true
628        } else {
629            // Live object that we have marked
630
631            // Unset marks for free pages and update last_object_end
632            if !empty_page_start.is_zero() {
633                // unset marks for pages since last object
634                let current_page = object
635                    .to_object_start::<VM>()
636                    .align_down(BYTES_IN_MALLOC_PAGE);
637                if current_page > *empty_page_start {
638                    // we are the only GC thread that is accessing this chunk
639                    unsafe {
640                        self.unset_page_mark(*empty_page_start, current_page - *empty_page_start)
641                    };
642                }
643            }
644
645            // Update last_object_end
646            *empty_page_start = (obj_start + bytes).align_up(BYTES_IN_MALLOC_PAGE);
647
648            false
649        }
650    }
651
652    /// Used when each chunk is done. Only called in debug build.
653    #[cfg(debug_assertions)]
654    fn debug_sweep_chunk_done(&self, live_bytes_in_the_chunk: usize) {
655        debug!(
656            "Used bytes after releasing: {}",
657            self.active_bytes.load(Ordering::SeqCst)
658        );
659
660        let completed_packets = self.completed_work_packets.fetch_add(1, Ordering::SeqCst) + 1;
661        self.work_live_bytes
662            .fetch_add(live_bytes_in_the_chunk, Ordering::SeqCst);
663
664        if completed_packets == self.total_work_packets.load(Ordering::Relaxed) {
665            trace!(
666                "work_live_bytes = {}, live_bytes = {}, active_bytes = {}",
667                self.work_live_bytes.load(Ordering::Relaxed),
668                live_bytes_in_the_chunk,
669                self.active_bytes.load(Ordering::Relaxed)
670            );
671            debug_assert_eq!(
672                self.work_live_bytes.load(Ordering::Relaxed),
673                self.active_bytes.load(Ordering::Relaxed)
674            );
675        }
676    }
677
678    /// This function is called when the mark bits sit on the side metadata.
679    /// This has been optimized with the use of bulk loading and bulk zeroing of
680    /// metadata.
681    ///
682    /// This function uses non-atomic accesses to side metadata (although these
683    /// non-atomic accesses should not have race conditions associated with them)
684    /// as well as calls libc functions (`malloc_usable_size()`, `free()`)
685    fn sweep_chunk_mark_on_side(&self, chunk_start: Address, mark_bit_spec: SideMetadataSpec) {
686        // We can do xor on bulk for mark bits and valid object bits. If the result is zero, that means
687        // the objects in it are all alive (both valid object bit and mark bit is set), and we do not
688        // need to do anything for the region. Otherwise, we will sweep each single object in the region.
689        // Note: Enabling this would result in inaccurate page accounting. We disable this by default, and
690        // we will sweep object one by one.
691        const BULK_XOR_ON_MARK_BITS: bool = false;
692
693        if BULK_XOR_ON_MARK_BITS {
694            #[cfg(debug_assertions)]
695            let mut live_bytes = 0;
696
697            debug!("Check active chunk {:?}", chunk_start);
698            let mut address = chunk_start;
699            let chunk_end = chunk_start + BYTES_IN_CHUNK;
700
701            debug_assert!(
702                crate::util::metadata::vo_bit::VO_BIT_SIDE_METADATA_SPEC.log_bytes_in_region
703                    == mark_bit_spec.log_bytes_in_region,
704                "VO-bit and mark-bit metadata have different minimum object sizes!"
705            );
706
707            // For bulk xor'ing 128-bit vectors on architectures with vector instructions
708            // Each bit represents an object of LOG_MIN_OBJ_SIZE size
709            let bulk_load_size: usize = 128
710                * (1 << crate::util::metadata::vo_bit::VO_BIT_SIDE_METADATA_SPEC
711                    .log_bytes_in_region);
712
713            // The start of a possibly empty page. This will be updated during the sweeping, and always points to the next page of last live objects.
714            let mut empty_page_start = Address::ZERO;
715
716            // Scan the chunk by every 'bulk_load_size' region.
717            while address < chunk_end {
718                let alloc_128: u128 = unsafe {
719                    load128(
720                        &crate::util::metadata::vo_bit::VO_BIT_SIDE_METADATA_SPEC,
721                        address,
722                    )
723                };
724                let mark_128: u128 = unsafe { load128(&mark_bit_spec, address) };
725
726                // Check if there are dead objects in the bulk loaded region
727                if alloc_128 ^ mark_128 != 0 {
728                    let end = address + bulk_load_size;
729
730                    // We will do non atomic load on the VO bit, as this is the only thread that access the VO bit for a chunk.
731                    // Linear scan through the bulk load region.
732                    let bulk_load_scan = crate::util::linear_scan::ObjectIterator::<
733                        VM,
734                        MallocObjectSize<VM>,
735                        false,
736                    >::new(address, end);
737                    for object in bulk_load_scan {
738                        self.sweep_object(object, &mut empty_page_start);
739                    }
740                } else {
741                    // TODO we aren't actually accounting for the case where an object is alive and spans
742                    // a page boundary as we don't know what the object sizes are/what is alive in the bulk region
743                    if alloc_128 != 0 {
744                        empty_page_start = address + bulk_load_size;
745                    }
746                }
747
748                // We have processed this bulk load memory. Step to the next.
749                address += bulk_load_size;
750                debug_assert!(address.is_aligned_to(bulk_load_size));
751            }
752
753            // Linear scan through the chunk, and add up all the live object sizes.
754            // We have to do this as a separate pass, as in the above pass, we did not go through all the live objects
755            #[cfg(debug_assertions)]
756            {
757                let chunk_linear_scan = crate::util::linear_scan::ObjectIterator::<
758                    VM,
759                    MallocObjectSize<VM>,
760                    false,
761                >::new(chunk_start, chunk_end);
762                for object in chunk_linear_scan {
763                    let (obj_start, _, bytes) = Self::get_malloc_addr_size(object);
764
765                    if ASSERT_ALLOCATION {
766                        debug_assert!(
767                            self.active_mem.lock().unwrap().contains_key(&obj_start),
768                            "Address {} with VO bit is not in active_mem",
769                            obj_start
770                        );
771                        debug_assert_eq!(
772                            self.active_mem.lock().unwrap().get(&obj_start),
773                            Some(&bytes),
774                            "Address {} size in active_mem does not match the size from malloc_usable_size",
775                            obj_start
776                        );
777                    }
778
779                    debug_assert!(
780                        unsafe { is_marked_unsafe::<VM>(object) },
781                        "Dead object = {} found after sweep",
782                        object
783                    );
784
785                    live_bytes += bytes;
786                }
787            }
788
789            // Clear all the mark bits
790            mark_bit_spec.bzero_metadata(chunk_start, BYTES_IN_CHUNK);
791
792            // If we never updated empty_page_start, the entire chunk is empty.
793            if empty_page_start.is_zero() {
794                self.clean_up_empty_chunk(chunk_start);
795            }
796
797            #[cfg(debug_assertions)]
798            self.debug_sweep_chunk_done(live_bytes);
799        } else {
800            self.sweep_each_object_in_chunk(chunk_start);
801        }
802    }
803
804    /// This sweep function is called when the mark bit sits in the object header
805    ///
806    /// This function uses non-atomic accesses to side metadata (although these
807    /// non-atomic accesses should not have race conditions associated with them)
808    /// as well as calls libc functions (`malloc_usable_size()`, `free()`)
809    fn sweep_chunk_mark_in_header(&self, chunk_start: Address) {
810        self.sweep_each_object_in_chunk(chunk_start)
811    }
812
813    fn sweep_each_object_in_chunk(&self, chunk_start: Address) {
814        #[cfg(debug_assertions)]
815        let mut live_bytes = 0;
816
817        debug!("Check active chunk {:?}", chunk_start);
818
819        // The start of a possibly empty page. This will be updated during the sweeping, and always points to the next page of last live objects.
820        let mut empty_page_start = Address::ZERO;
821
822        let chunk_linear_scan = crate::util::linear_scan::ObjectIterator::<
823            VM,
824            MallocObjectSize<VM>,
825            false,
826        >::new(chunk_start, chunk_start + BYTES_IN_CHUNK);
827
828        for object in chunk_linear_scan {
829            #[cfg(debug_assertions)]
830            if ASSERT_ALLOCATION {
831                let (obj_start, _, bytes) = Self::get_malloc_addr_size(object);
832                debug_assert!(
833                    self.active_mem.lock().unwrap().contains_key(&obj_start),
834                    "Address {} with VO bit is not in active_mem",
835                    obj_start
836                );
837                debug_assert_eq!(
838                    self.active_mem.lock().unwrap().get(&obj_start),
839                    Some(&bytes),
840                    "Address {} size in active_mem does not match the size from malloc_usable_size",
841                    obj_start
842                );
843            }
844
845            let live = !self.sweep_object(object, &mut empty_page_start);
846            if live {
847                // Live object. Unset mark bit.
848                // We should be the only thread that access this chunk, it is okay to use non-atomic store.
849                unsafe { unset_mark_bit::<VM>(object) };
850
851                #[cfg(debug_assertions)]
852                {
853                    // Accumulate live bytes
854                    let (_, _, bytes) = Self::get_malloc_addr_size(object);
855                    live_bytes += bytes;
856                }
857            }
858        }
859
860        // If we never updated empty_page_start, the entire chunk is empty.
861        if empty_page_start.is_zero() {
862            self.clean_up_empty_chunk(chunk_start);
863        } else if empty_page_start < chunk_start + BYTES_IN_CHUNK {
864            // This is for the edge case where we have a live object and then no other live
865            // objects afterwards till the end of the chunk. For example consider chunk
866            // 0x0-0x400000 where only one object at 0x100 is alive. We will unset page bits
867            // for 0x0-0x100 but then not unset it for the pages after 0x100. This checks
868            // if we have empty pages at the end of a chunk that needs to be cleared.
869            unsafe {
870                self.unset_page_mark(
871                    empty_page_start,
872                    chunk_start + BYTES_IN_CHUNK - empty_page_start,
873                )
874            };
875        }
876
877        #[cfg(debug_assertions)]
878        self.debug_sweep_chunk_done(live_bytes);
879    }
880}
881
882struct MallocObjectSize<VM>(PhantomData<VM>);
883impl<VM: VMBinding> crate::util::linear_scan::LinearScanObjectSize for MallocObjectSize<VM> {
884    fn size(object: ObjectReference) -> usize {
885        let (_, _, bytes) = MallocSpace::<VM>::get_malloc_addr_size(object);
886        bytes
887    }
888}
889
890use crate::scheduler::GCWork;
891use crate::MMTK;
892
893/// Simple work packet that just sweeps a single chunk
894pub struct MSSweepChunk<VM: VMBinding> {
895    ms: &'static MallocSpace<VM>,
896    // starting address of a chunk
897    chunk: Address,
898}
899
900impl<VM: VMBinding> GCWork<VM> for MSSweepChunk<VM> {
901    fn do_work(&mut self, _worker: &mut GCWorker<VM>, _mmtk: &'static MMTK<VM>) {
902        self.ms.sweep_chunk(self.chunk);
903    }
904}