diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index ebb4a89790602..f499e25b1da7d 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -31,6 +31,7 @@ #include "opto/output.hpp" #include "opto/subnode.hpp" #include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" #include "utilities/globalDefinitions.hpp" #include "utilities/powerOfTwo.hpp" @@ -157,6 +158,8 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, Register t1, Label locked; // Finish fast lock unsuccessfully. MUST branch to with flag == NE Label slow_path; + // Finish fast lock unsuccessfully. Sets flag == NE + Label slow_path_set_ne; if (UseObjectMonitorTable) { // Clear cache in case fast locking succeeds or we need to take the slow-path. @@ -222,36 +225,54 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, Register t1, assert(t1_monitor == t1_mark, "should be the same here"); } else { Label monitor_found; + Label lookup_in_table; + Label found_in_cache; // Load cache address lea(t3_t, Address(rthread, JavaThread::om_cache_oops_offset())); - const int num_unrolled = 2; + const int num_unrolled = OMCache::CAPACITY; for (int i = 0; i < num_unrolled; i++) { - ldr(t1, Address(t3_t)); - cmp(obj, t1); - br(Assembler::EQ, monitor_found); + ldr(t2, Address(t3_t)); + cmp(obj, t2); + br(Assembler::EQ, found_in_cache); increment(t3_t, in_bytes(OMCache::oop_to_oop_difference())); } - Label loop; + b(lookup_in_table); - // Search for obj in cache. - bind(loop); + bind(found_in_cache); + ldr(t1_monitor, Address(t3_t, OMCache::oop_to_monitor_difference())); + b(monitor_found); + + bind(lookup_in_table); + + // Grab hash code + ubfx(t1_mark, t1_mark, markWord::hash_shift, markWord::hash_bits); + + // Get the table and calculate bucket + lea(t3, ExternalAddress(ObjectMonitorTable::current_table_address())); + ldr(t3, Address(t3, 0)); + ldr(t2, Address(t3, ObjectMonitorTable::table_capacity_mask_offset())); + ands(t1_mark, t1_mark, t2); + ldr(t3, Address(t3, ObjectMonitorTable::table_buckets_offset())); - // Check for match. - ldr(t1, Address(t3_t)); - cmp(obj, t1); - br(Assembler::EQ, monitor_found); + // Read monitor from bucket + lsl(t1_mark, t1_mark, 3); + ldr(t1_monitor, Address(t3, t1_mark)); - // Search until null encountered, guaranteed _null_sentinel at end. - increment(t3_t, in_bytes(OMCache::oop_to_oop_difference())); - cbnz(t1, loop); - // Cache Miss, NE set from cmp above, cbnz does not set flags - b(slow_path); + // Check if empty slot, removed slot or tomb stone + cmp(t1_monitor, (unsigned char)2); + br(Assembler::LS, slow_path_set_ne); + + // Check if object matches + ldr(t3, Address(t1_monitor, ObjectMonitor::object_offset())); + BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler(); + bs_asm->try_resolve_weak_handle_in_c2(this, t3, t2, slow_path_set_ne); + cmp(t3, obj); + br(Assembler::NE, slow_path); bind(monitor_found); - ldr(t1_monitor, Address(t3_t, OMCache::oop_to_monitor_difference())); } const Register t2_owner_addr = t2; @@ -286,12 +307,18 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, Register t1, bind(locked); + Label the_end; #ifdef ASSERT // Check that locked label is reached with Flags == EQ. Label flag_correct; br(Assembler::EQ, flag_correct); stop("Fast Lock Flag != EQ"); #endif + b(the_end); + + bind(slow_path_set_ne); + // Set NE + cmp(rthread, zr); bind(slow_path); #ifdef ASSERT @@ -300,6 +327,7 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, Register t1, stop("Fast Lock Flag != NE"); bind(flag_correct); #endif + bind(the_end); // C2 uses the value of Flags (NE vs EQ) to determine the continuation. } diff --git a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp index 021af3e5698fa..a52b0dc5266ca 100644 --- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp @@ -441,6 +441,10 @@ OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Na return opto_reg; } +void BarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) { + __ ldr(obj, Address(obj, 0)); +} + #undef __ #define __ _masm-> diff --git a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp index fa093a6ef69b0..1f8ada79a3a1c 100644 --- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp @@ -135,6 +135,7 @@ class BarrierSetAssembler: public CHeapObj { OptoReg::Name opto_reg); OptoReg::Name refine_register(const Node* node, OptoReg::Name opto_reg); + virtual void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path); #endif // COMPILER2 }; diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp index 5d4f0801ec62f..be67c1f2741f4 100644 --- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp @@ -1327,6 +1327,18 @@ void ZStoreBarrierStubC2Aarch64::emit_code(MacroAssembler& masm) { register_stub(this); } +#undef __ +#define __ masm-> + +void ZBarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) { + __ ldr(obj, Address(obj, 0)); + __ relocate(barrier_Relocation::spec(), ZBarrierRelocationFormatMarkBadBeforeMov); + __ movzw(tmp, barrier_Relocation::unpatched); + __ tst(obj, tmp); + __ br(Assembler::NE, slow_path); + __ lsr(obj, obj, ZPointerLoadShift); +} + #undef __ #endif // COMPILER2 diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp index ae2819e78cad2..00bfb35e23b59 100644 --- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp @@ -191,6 +191,7 @@ class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { ZLoadBarrierStubC2* stub) const; void generate_c2_store_barrier_stub(MacroAssembler* masm, ZStoreBarrierStubC2* stub) const; + void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path); #endif // COMPILER2 void check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error); diff --git a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp index 8712c75711d5c..bef66c5332ca1 100644 --- a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp @@ -275,6 +275,10 @@ OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Na return opto_reg; } +void BarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) { + __ ld(obj, 0, obj); +} + #undef __ #define __ _masm-> diff --git a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.hpp index 390623f48a184..2082148c1982f 100644 --- a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2018, 2022 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -81,6 +81,8 @@ class BarrierSetAssembler: public CHeapObj { #ifdef COMPILER2 OptoReg::Name refine_register(const Node* node, OptoReg::Name opto_reg) const; + virtual void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, + Register tmp, Label& slow_path); #endif // COMPILER2 }; diff --git a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp index 0aa5858c8e61a..37a4ebb8413d9 100644 --- a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp @@ -950,6 +950,21 @@ void ZBarrierSetAssembler::generate_c2_store_barrier_stub(MacroAssembler* masm, __ b(*stub->continuation()); } +void ZBarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) { + // Load the oop from the weak handle + __ ld(obj, 0, obj); + + // Check if oop is okay + __ ld(tmp, in_bytes(ZThreadLocalData::mark_bad_mask_offset()), R16_thread); + + // Test reference against bad mask. If mask bad, then we need to fix it up. + __ and_(tmp, obj, tmp); + __ bne(CR0, slow_path); + + // Uncolor oop if okay + __ srdi(obj, obj, ZPointerLoadShift); +} + #undef __ #endif // COMPILER2 diff --git a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp index 27203e7b01c97..b92fa3c4b8ee3 100644 --- a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2021, 2022 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -108,6 +108,8 @@ class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { void generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const; void generate_c2_store_barrier_stub(MacroAssembler* masm, ZStoreBarrierStubC2* stub) const; + + void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path); #endif // COMPILER2 void store_barrier_fast(MacroAssembler* masm, diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index 5649ead2ea8f3..3fc88b834178f 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -2756,39 +2756,60 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register addi(owner_addr, mark, in_bytes(ObjectMonitor::owner_offset()) - monitor_tag); mark = noreg; } else { + const Register cache_addr = tmp2; + const Register tmp3_bucket = tmp3; Label monitor_found; - Register cache_addr = tmp2; + Label lookup_in_table; + Label found_in_cache; // Load cache address addi(cache_addr, R16_thread, in_bytes(JavaThread::om_cache_oops_offset())); - const int num_unrolled = 2; + const int num_unrolled = OMCache::CAPACITY; for (int i = 0; i < num_unrolled; i++) { ld(R0, 0, cache_addr); cmpd(CR0, R0, obj); - beq(CR0, monitor_found); + beq(CR0, found_in_cache); addi(cache_addr, cache_addr, in_bytes(OMCache::oop_to_oop_difference())); } - Label loop; + b(lookup_in_table); - // Search for obj in cache. - bind(loop); + bind(found_in_cache); + ld(monitor, in_bytes(OMCache::oop_to_monitor_difference()), cache_addr); + b(monitor_found); + + bind(lookup_in_table); + + // Grab hash code + srdi(mark, mark, markWord::hash_shift); + + // Get the table and calculate bucket + load_const_optimized(tmp3, ObjectMonitorTable::current_table_address(), R0); + ld_ptr(tmp3, 0, tmp3); + ld(tmp2, in_bytes(ObjectMonitorTable::table_capacity_mask_offset()), tmp3); + andr(mark, mark, tmp2); + ld(tmp3, in_bytes(ObjectMonitorTable::table_buckets_offset()), tmp3); - // Check for match. - ld(R0, 0, cache_addr); - cmpd(CR0, R0, obj); - beq(CR0, monitor_found); + // Read monitor from bucket + sldi(mark, mark, LogBytesPerWord); + add(tmp3_bucket, tmp3, mark); - // Search until null encountered, guaranteed _null_sentinel at end. - addi(cache_addr, cache_addr, in_bytes(OMCache::oop_to_oop_difference())); - cmpdi(CR1, R0, 0); - bne(CR1, loop); - // Cache Miss, CR0.NE set from cmp above - b(slow_path); + // Read monitor from bucket + ld_ptr(monitor, 0, tmp3_bucket); + + // Check if empty slot, removed slot or tomb stone + cmpdi(CR0, monitor, 3); + blt(CR0, slow_path); + + // Check if object matches + ld(tmp3, in_bytes(ObjectMonitor::object_offset()), monitor); + BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler(); + bs_asm->try_resolve_weak_handle_in_c2(this, tmp3, tmp2, slow_path); + cmpd(CR0, tmp3, obj); + bne(CR0, slow_path); bind(monitor_found); - ld(monitor, in_bytes(OMCache::oop_to_monitor_difference()), cache_addr); // Compute owner address. addi(owner_addr, monitor, in_bytes(ObjectMonitor::owner_offset())); diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index abbd7eedbba9b..307882792bb15 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -31,6 +31,7 @@ #include "opto/output.hpp" #include "opto/subnode.hpp" #include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" #include "utilities/globalDefinitions.hpp" #ifdef PRODUCT @@ -123,35 +124,55 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, if (!UseObjectMonitorTable) { assert(tmp1_monitor == tmp1_mark, "should be the same here"); } else { + const Register tmp3_bucket = tmp3; Label monitor_found; + Label lookup_in_table; + Label found_in_cache; // Load cache address la(tmp3_t, Address(xthread, JavaThread::om_cache_oops_offset())); - const int num_unrolled = 2; + const int num_unrolled = OMCache::CAPACITY; for (int i = 0; i < num_unrolled; i++) { ld(tmp1, Address(tmp3_t)); - beq(obj, tmp1, monitor_found); + beq(obj, tmp1, found_in_cache); add(tmp3_t, tmp3_t, in_bytes(OMCache::oop_to_oop_difference())); } - Label loop; + j(lookup_in_table); - // Search for obj in cache. - bind(loop); + bind(found_in_cache); + ld(tmp1_monitor, Address(tmp3_t, OMCache::oop_to_monitor_difference())); + j(monitor_found); + + bind(lookup_in_table); + + // Grab hash code + srli(tmp1, tmp1_mark, markWord::hash_shift); - // Check for match. - ld(tmp1, Address(tmp3_t)); - beq(obj, tmp1, monitor_found); + // Get the table and calculate bucket + la(tmp3_t, ExternalAddress(ObjectMonitorTable::current_table_address())); + ld(tmp3_t, Address(tmp3_t)); + ld(tmp2, Address(tmp3_t, ObjectMonitorTable::table_capacity_mask_offset())); + andr(tmp1, tmp1, tmp2); + ld(tmp3_t, Address(tmp3_t, ObjectMonitorTable::table_buckets_offset())); + slli(tmp1, tmp1, LogBytesPerWord); + add(tmp3_bucket, tmp3_t, tmp1); - // Search until null encountered, guaranteed _null_sentinel at end. - add(tmp3_t, tmp3_t, in_bytes(OMCache::oop_to_oop_difference())); - bnez(tmp1, loop); - // Cache Miss. Take the slowpath. - j(slow_path); + // Read monitor from bucket + ld(tmp1_monitor, Address(tmp3_bucket)); + + // Check if empty slot, removed slot or tomb stone + li(tmp2, 2); + bleu(tmp1_monitor, tmp2, slow_path); + + // Check if object matches + ld(tmp3, Address(tmp1_monitor, ObjectMonitor::object_offset())); + BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler(); + bs_asm->try_resolve_weak_handle_in_c2(this, tmp3, tmp2, slow_path); + bne(tmp3, obj, slow_path); bind(monitor_found); - ld(tmp1_monitor, Address(tmp3_t, OMCache::oop_to_monitor_difference())); } const Register tmp2_owner_addr = tmp2; diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp index f5916000890c4..8a8904c10f4ea 100644 --- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp @@ -369,6 +369,10 @@ OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Na return opto_reg; } +void BarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) { + __ ld(obj, Address(obj)); +} + #undef __ #define __ _masm-> diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp index 63a7032bb847a..d66da2888e8de 100644 --- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -110,6 +110,8 @@ class BarrierSetAssembler: public CHeapObj { #ifdef COMPILER2 OptoReg::Name refine_register(const Node* node, OptoReg::Name opto_reg); + virtual void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, + Register tmp, Label& slow_path); #endif // COMPILER2 }; diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp index 09dea62b6d18f..0fc9a49dbdafa 100644 --- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp @@ -771,6 +771,22 @@ void ZBarrierSetAssembler::generate_c2_store_barrier_stub(MacroAssembler* masm, __ j(slow_continuation); } + +void ZBarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) { + // Load the oop from the weak handle + __ ld(obj, Address(obj)); + + // Check if oop is okay + __ ld(tmp, mark_bad_mask_from_thread(xthread)); + + // Test reference against bad mask. If mask bad, then we need to fix it up. + __ andr(tmp, obj, tmp); + __ bnez(tmp, slow_path); + + // Uncolor oop if okay + __ srli(obj, obj, ZPointerLoadShift); +} + #undef __ #endif // COMPILER2 diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp index 190d81acd0cca..c9b1367c4c0d7 100644 --- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp @@ -170,6 +170,10 @@ class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { ZLoadBarrierStubC2* stub) const; void generate_c2_store_barrier_stub(MacroAssembler* masm, ZStoreBarrierStubC2* stub) const; + void try_resolve_weak_handle_in_c2(MacroAssembler* masm, + Register obj, + Register tmp, + Label& slow_path); #endif // COMPILER2 void check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error); diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp index c6f5a4e119c9a..988aefb207d6d 100644 --- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp @@ -206,6 +206,10 @@ OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Na return opto_reg; } +void BarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) { + __ z_lg(obj, Address(obj)); +} + #undef __ #define __ _masm-> diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp index 3e0b2be487383..cf8519191de3a 100644 --- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp +++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2018 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -65,6 +65,8 @@ class BarrierSetAssembler: public CHeapObj { #ifdef COMPILER2 OptoReg::Name refine_register(const Node* node, OptoReg::Name opto_reg) const; + virtual void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, + Register tmp, Label& slow_path); #endif // COMPILER2 static const int OFFSET_TO_PATCHABLE_DATA_INSTRUCTION = 6 + 6 + 6; // iihf(6) + iilf(6) + lg(6) diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index f35e18c7398dd..c2df87c54f7e9 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -6372,39 +6372,56 @@ void MacroAssembler::compiler_fast_lock_object(Register obj, Register box, Regis if (!UseObjectMonitorTable) { assert(tmp1_monitor == mark, "should be the same here"); } else { + const Register cache_addr = tmp2; NearLabel monitor_found; + NearLabel lookup_in_table; + NearLabel found_in_cache; // load cache address - z_la(tmp1, Address(Z_thread, JavaThread::om_cache_oops_offset())); + z_la(cache_addr, Address(Z_thread, JavaThread::om_cache_oops_offset())); const int num_unrolled = 2; for (int i = 0; i < num_unrolled; i++) { - z_cg(obj, Address(tmp1)); - z_bre(monitor_found); - add2reg(tmp1, in_bytes(OMCache::oop_to_oop_difference())); + z_cg(obj, Address(cache_addr)); + z_bre(found_in_cache); + add2reg(cache_addr, in_bytes(OMCache::oop_to_oop_difference())); } - NearLabel loop; - // Search for obj in cache + z_bru(lookup_in_table); - bind(loop); + bind(found_in_cache); + z_lg(tmp1_monitor, Address(cache_addr, OMCache::oop_to_monitor_difference())); + z_bru(monitor_found); - // check for match. - z_cg(obj, Address(tmp1)); - z_bre(monitor_found); + bind(lookup_in_table); - // search until null encountered, guaranteed _null_sentinel at end. - add2reg(tmp1, in_bytes(OMCache::oop_to_oop_difference())); - z_cghsi(0, tmp1, 0); - z_brne(loop); // if not EQ to 0, go for another loop + // Grab hash code + z_srlg(mark, mark, markWord::hash_shift); - // we reached to the end, cache miss - z_ltgr(obj, obj); // set CC to NE - z_bru(slow_path); + // Get the table and calculate bucket + load_const_optimized(tmp2, ObjectMonitorTable::current_table_address()); + z_lg(tmp2, Address(tmp2)); + z_lg(Z_R0_scratch, Address(tmp2, ObjectMonitorTable::table_capacity_mask_offset())); + z_ngr(mark, Z_R0_scratch); + z_lg(Z_R0_scratch, Address(tmp2, ObjectMonitorTable::table_buckets_offset())); + + // Read monitor from bucket + z_sllg(mark, mark, LogBytesPerWord); + z_agr(tmp1, Z_R0_scratch); + z_lg(tmp1_monitor, Address(tmp1)); + + // Check if empty slot, removed slot or tomb stone + z_cghi(tmp1_monitor, 3); + z_brl(slow_path); + + // Check if object matches + z_lg(tmp2, Address(tmp1_monitor, ObjectMonitor::object_offset())); + BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler(); + bs_asm->try_resolve_weak_handle_in_c2(this, tmp2, Z_R0_scratch, slow_path); + z_cgr(obj, tmp2); + z_brne(slow_path); - // cache hit bind(monitor_found); - z_lg(tmp1_monitor, Address(tmp1, OMCache::oop_to_monitor_difference())); } NearLabel monitor_locked; // lock the monitor diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 51b2eff2cfbc3..b13ce1aba85d6 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -33,6 +33,7 @@ #include "opto/opcodes.hpp" #include "opto/subnode.hpp" #include "runtime/globals.hpp" +#include "runtime/synchronizer.hpp" #include "runtime/objectMonitor.hpp" #include "runtime/stubRoutines.hpp" #include "utilities/checkedCast.hpp" @@ -217,7 +218,6 @@ inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vle // In the case of failure, the node will branch directly to the // FailureLabel - // obj: object to lock // box: on-stack box address -- KILLED // rax: tmp -- KILLED @@ -233,6 +233,8 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, Register rax_reg, Label locked; // Finish fast lock unsuccessfully. MUST jump with ZF == 0 Label slow_path; + // Finish fast lock unsuccessfully. Sets ZF == 0 for you + Label slow_path_clear_zf; if (UseObjectMonitorTable) { // Clear cache in case fast locking succeeds or we need to take the slow-path. @@ -269,7 +271,7 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, Register rax_reg, // Check if recursive. cmpptr(obj, Address(thread, top, Address::times_1, -oopSize)); - jccb(Assembler::equal, push); + jcc(Assembler::equal, push); // Try to lock. Transition lock bits 0b01 => 0b00 movptr(rax_reg, mark); @@ -286,7 +288,7 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, Register rax_reg, // After successful lock, push object on lock-stack. movptr(Address(thread, top), obj); addl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize); - jmpb(locked); + jmp(locked); } { // Handle inflated monitor. @@ -302,33 +304,53 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, Register rax_reg, Label monitor_found; // Load cache address - lea(t, Address(thread, JavaThread::om_cache_oops_offset())); + lea(rax_reg, Address(thread, JavaThread::om_cache_oops_offset())); + + Label found_in_cache; + Label lookup_in_table; - const int num_unrolled = 2; + const int num_unrolled = OMCache::CAPACITY; for (int i = 0; i < num_unrolled; i++) { - cmpptr(obj, Address(t)); - jccb(Assembler::equal, monitor_found); - increment(t, in_bytes(OMCache::oop_to_oop_difference())); + cmpptr(obj, Address(rax_reg)); + jcc(Assembler::equal, found_in_cache); + increment(rax_reg, in_bytes(OMCache::oop_to_oop_difference())); } - Label loop; + jmp(lookup_in_table); + + bind(found_in_cache); + movptr(monitor, Address(rax_reg, OMCache::oop_to_monitor_difference())); + jmp(monitor_found); + + bind(lookup_in_table); + + // Grab hash code + movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes())); + shrq(mark, markWord::hash_shift); + andq(mark, markWord::hash_mask); - // Search for obj in cache. - bind(loop); + // Get the table and calculate bucket + lea(rax_reg, ExternalAddress(ObjectMonitorTable::current_table_address())); + movptr(rax_reg, Address(rax_reg, 0)); + andq(monitor, Address(rax_reg, ObjectMonitorTable::table_capacity_mask_offset())); + movptr(rax_reg, Address(rax_reg, ObjectMonitorTable::table_buckets_offset())); - // Check for match. - cmpptr(obj, Address(t)); - jccb(Assembler::equal, monitor_found); + // Read monitor from bucket + movptr(monitor, Address(rax_reg, mark, Address::times_8)); - // Search until null encountered, guaranteed _null_sentinel at end. - cmpptr(Address(t), 1); - jcc(Assembler::below, slow_path); // 0 check, but with ZF=0 when *t == 0 - increment(t, in_bytes(OMCache::oop_to_oop_difference())); - jmpb(loop); + // Check if empty slot, removed slot or tomb stone + cmpptr(monitor, 2); + jcc(Assembler::belowEqual, slow_path_clear_zf); + + // Check if object matches + movptr(rax_reg, Address(monitor, ObjectMonitor::object_offset())); + BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler(); + bs_asm->try_resolve_weak_handle_in_c2(this, rax_reg, slow_path_clear_zf); + cmpptr(rax_reg, obj); + jcc(Assembler::notEqual, slow_path); // Cache hit. bind(monitor_found); - movptr(monitor, Address(t, OMCache::oop_to_monitor_difference())); } const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value)); const Address recursions_address(monitor, ObjectMonitor::recursions_offset() - monitor_tag); @@ -347,11 +369,11 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, Register rax_reg, xorptr(rax_reg, rax_reg); movptr(box, Address(thread, JavaThread::monitor_owner_id_offset())); lock(); cmpxchgptr(box, owner_address); - jccb(Assembler::equal, monitor_locked); + jcc(Assembler::equal, monitor_locked); // Check if recursive. cmpptr(box, rax_reg); - jccb(Assembler::notEqual, slow_path); + jcc(Assembler::notEqual, slow_path); // Recursive. increment(recursions_address); @@ -363,6 +385,7 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, Register rax_reg, // Set ZF = 1 xorl(rax_reg, rax_reg); + Label the_end; #ifdef ASSERT // Check that locked label is reached with ZF set. Label zf_correct; @@ -370,6 +393,11 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, Register rax_reg, jcc(Assembler::zero, zf_correct); jmp(zf_bad_zero); #endif + jmp(the_end); + + bind(slow_path_clear_zf); + // Set ZF = 0 + orl(rax_reg, 1); bind(slow_path); #ifdef ASSERT @@ -380,6 +408,7 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, Register rax_reg, stop("Fast Lock ZF != 1"); bind(zf_correct); #endif + bind(the_end); // C2 uses the value of ZF to determine the continuation. } @@ -487,14 +516,14 @@ void C2_MacroAssembler::fast_unlock(Register obj, Register reg_rax, Register t, cmpl(top, in_bytes(JavaThread::lock_stack_base_offset())); jcc(Assembler::below, check_done); cmpptr(obj, Address(thread, top)); - jccb(Assembler::notEqual, inflated_check_lock_stack); + jcc(Assembler::notEqual, inflated_check_lock_stack); stop("Fast Unlock lock on stack"); bind(check_done); if (UseObjectMonitorTable) { movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes())); } testptr(mark, markWord::monitor_value); - jccb(Assembler::notZero, inflated); + jcc(Assembler::notZero, inflated); stop("Fast Unlock not monitor"); #endif @@ -519,7 +548,7 @@ void C2_MacroAssembler::fast_unlock(Register obj, Register reg_rax, Register t, // Check if recursive. cmpptr(recursions_address, 0); - jccb(Assembler::notZero, recursive); + jcc(Assembler::notZero, recursive); // Set owner to null. // Release to satisfy the JMM @@ -530,11 +559,11 @@ void C2_MacroAssembler::fast_unlock(Register obj, Register reg_rax, Register t, // Check if the entry_list is empty. cmpptr(entry_list_address, NULL_WORD); - jccb(Assembler::zero, unlocked); // If so we are done. + jcc(Assembler::zero, unlocked); // If so we are done. // Check if there is a successor. cmpptr(succ_address, NULL_WORD); - jccb(Assembler::notZero, unlocked); // If so we are done. + jcc(Assembler::notZero, unlocked); // If so we are done. // Save the monitor pointer in the current thread, so we can try to // reacquire the lock in SharedRuntime::monitor_exit_helper(). diff --git a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp index 09c5d93dbb3a2..b55320e3f3f20 100644 --- a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp @@ -395,6 +395,10 @@ OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Na extern void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, int stack_offset, int reg, uint ireg, outputStream* st); +void BarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Label& slowpath) { + __ movptr(obj, Address(obj, 0)); +} + #undef __ #define __ _masm-> diff --git a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp index fd52379d2e2bc..f0fe258f9147f 100644 --- a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp @@ -109,6 +109,8 @@ class BarrierSetAssembler: public CHeapObj { #ifdef COMPILER2 OptoReg::Name refine_register(const Node* node, OptoReg::Name opto_reg); + + virtual void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Label& slowpath); #endif // COMPILER2 }; diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp index ae93cca8c1991..c87640dce3244 100644 --- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp @@ -1328,6 +1328,19 @@ void ZBarrierSetAssembler::generate_c2_store_barrier_stub(MacroAssembler* masm, __ jmp(slow_continuation); } +void ZBarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Label& slowpath) { + // Load the oop from the weak handle + __ movptr(obj, Address(obj, 0)); + + // Check if oop is okay + __ testptr(obj, Address(r15_thread, ZThreadLocalData::mark_bad_mask_offset())); + __ jcc(Assembler::notZero, slowpath); + + // Uncolor oop if okay + __ relocate(barrier_Relocation::spec(), ZBarrierRelocationFormatLoadGoodBeforeShl); + __ shrq(obj, barrier_Relocation::unpatched); +} + #undef __ #endif // COMPILER2 diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp index 8bb653ec5fbaf..21b94613748b4 100644 --- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp @@ -167,6 +167,8 @@ class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { ZLoadBarrierStubC2* stub) const; void generate_c2_store_barrier_stub(MacroAssembler* masm, ZStoreBarrierStubC2* stub) const; + + virtual void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Label& slowpath); #endif // COMPILER2 void store_barrier_fast(MacroAssembler* masm, diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index d002edd48cd4f..5e158eaccabc6 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -1953,7 +1953,7 @@ const int ObjectAlignmentInBytes = 8; "Mark all threads after a safepoint, and clear on a modify " \ "fence. Add cleanliness checks.") \ \ - product(bool, UseObjectMonitorTable, false, DIAGNOSTIC, \ + product(bool, UseObjectMonitorTable, true, DIAGNOSTIC, \ "Use a table to record inflated monitors rather than the first " \ "word of the object.") \ \ diff --git a/src/hotspot/share/runtime/lockStack.hpp b/src/hotspot/share/runtime/lockStack.hpp index dbc958a71e2b1..e19b6e75b7412 100644 --- a/src/hotspot/share/runtime/lockStack.hpp +++ b/src/hotspot/share/runtime/lockStack.hpp @@ -132,7 +132,7 @@ class LockStack { class OMCache { friend class VMStructs; public: - static constexpr int CAPACITY = 8; + static constexpr int CAPACITY = 2; private: struct OMCacheEntry { diff --git a/src/hotspot/share/runtime/monitorDeflationThread.cpp b/src/hotspot/share/runtime/monitorDeflationThread.cpp index 370a6ab8ebaf0..d8e08a68e52be 100644 --- a/src/hotspot/share/runtime/monitorDeflationThread.cpp +++ b/src/hotspot/share/runtime/monitorDeflationThread.cpp @@ -88,6 +88,8 @@ void MonitorDeflationThread::monitor_deflation_thread_entry(JavaThread* jt, TRAP } } + log_debug(monitorinflation)("Starting deflation cycle"); + (void)ObjectSynchronizer::deflate_idle_monitors(); if (log_is_enabled(Debug, monitorinflation)) { diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp index 53b64f1e8a54c..c35cdbb65b719 100644 --- a/src/hotspot/share/runtime/objectMonitor.hpp +++ b/src/hotspot/share/runtime/objectMonitor.hpp @@ -218,6 +218,7 @@ class ObjectMonitor : public CHeapObj { static int Knob_SpinLimit; + static ByteSize object_offset() { return byte_offset_of(ObjectMonitor, _object); } static ByteSize metadata_offset() { return byte_offset_of(ObjectMonitor, _metadata); } static ByteSize owner_offset() { return byte_offset_of(ObjectMonitor, _owner); } static ByteSize recursions_offset() { return byte_offset_of(ObjectMonitor, _recursions); } diff --git a/src/hotspot/share/runtime/serviceThread.cpp b/src/hotspot/share/runtime/serviceThread.cpp index 27958885a7f63..9b231e4996267 100644 --- a/src/hotspot/share/runtime/serviceThread.cpp +++ b/src/hotspot/share/runtime/serviceThread.cpp @@ -85,7 +85,6 @@ void ServiceThread::service_thread_entry(JavaThread* jt, TRAPS) { bool cldg_cleanup_work = false; bool jvmti_tagmap_work = false; bool oopmap_cache_work = false; - bool object_monitor_table_work = false; { // Need state transition ThreadBlockInVM so that this thread // will be handled by safepoint correctly when this thread is @@ -112,8 +111,7 @@ void ServiceThread::service_thread_entry(JavaThread* jt, TRAPS) { (oop_handles_to_release = JavaThread::has_oop_handles_to_release()) | (cldg_cleanup_work = ClassLoaderDataGraph::should_clean_metaspaces_and_reset()) | (jvmti_tagmap_work = JvmtiTagMap::has_object_free_events_and_reset()) | - (oopmap_cache_work = OopMapCache::has_cleanup_work()) | - (object_monitor_table_work = ObjectSynchronizer::needs_resize()) + (oopmap_cache_work = OopMapCache::has_cleanup_work()) ) == 0) { // Wait until notified that there is some work to do or timer expires. // Some cleanup requests don't notify the ServiceThread so work needs to be done at periodic intervals. @@ -171,10 +169,6 @@ void ServiceThread::service_thread_entry(JavaThread* jt, TRAPS) { if (oopmap_cache_work) { OopMapCache::cleanup(); } - - if (object_monitor_table_work) { - ObjectSynchronizer::resize_table(jt); - } } } diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp index fe95320c57466..cf6e46a2097a6 100644 --- a/src/hotspot/share/runtime/synchronizer.cpp +++ b/src/hotspot/share/runtime/synchronizer.cpp @@ -1194,13 +1194,10 @@ size_t ObjectSynchronizer::deflate_idle_monitors() { GrowableArray delete_list((int)deflated_count); unlinked_count = _in_use_list.unlink_deflated(deflated_count, &delete_list, &safepointer); -#ifdef ASSERT + GrowableArray table_delete_list; if (UseObjectMonitorTable) { - for (ObjectMonitor* monitor : delete_list) { - assert(!ObjectSynchronizer::contains_monitor(current, monitor), "Should have been removed"); - } + ObjectMonitorTable::rebuild(&table_delete_list); } -#endif log.before_handshake(unlinked_count); @@ -1221,6 +1218,9 @@ size_t ObjectSynchronizer::deflate_idle_monitors() { // Delete the unlinked ObjectMonitors. deleted_count = delete_monitors(&delete_list, &safepointer); + if (UseObjectMonitorTable) { + ObjectMonitorTable::destroy(&table_delete_list); + } assert(unlinked_count == deleted_count, "must be"); } @@ -1472,288 +1472,413 @@ void ObjectSynchronizer::log_in_use_monitor_details(outputStream* out, bool log_ // ----------------------------------------------------------------------------- // ConcurrentHashTable storing links from objects to ObjectMonitors -class ObjectMonitorTable : AllStatic { - struct Config { - using Value = ObjectMonitor*; - static uintx get_hash(Value const& value, bool* is_dead) { - return (uintx)value->hash(); - } - static void* allocate_node(void* context, size_t size, Value const& value) { - ObjectMonitorTable::inc_items_count(); - return AllocateHeap(size, mtObjectMonitor); - }; - static void free_node(void* context, void* memory, Value const& value) { - ObjectMonitorTable::dec_items_count(); - FreeHeap(memory); - } - }; - using ConcurrentTable = ConcurrentHashTable; - - static ConcurrentTable* _table; - static volatile size_t _items_count; - static size_t _table_size; - static volatile bool _resize; - - class Lookup : public StackObj { - oop _obj; +ObjectMonitorTable::Table* volatile ObjectMonitorTable::_curr; - public: - explicit Lookup(oop obj) : _obj(obj) {} +class ObjectMonitorTable::Table : public CHeapObj { + friend class ObjectMonitorTable; - uintx get_hash() const { - uintx hash = _obj->mark().hash(); - assert(hash != 0, "should have a hash"); - return hash; - } - - bool equals(ObjectMonitor** value) { - assert(*value != nullptr, "must be"); - return (*value)->object_refers_to(_obj); - } + const size_t _capacity_mask; // One less than its power-of-two capacity + Table* volatile _prev; // Set while rehashing + ObjectMonitor* volatile* _buckets; // The payload - bool is_dead(ObjectMonitor** value) { - assert(*value != nullptr, "must be"); - return false; - } - }; + char _padding[DEFAULT_CACHE_LINE_SIZE]; - class LookupMonitor : public StackObj { - ObjectMonitor* _monitor; + volatile size_t _items_count; - public: - explicit LookupMonitor(ObjectMonitor* monitor) : _monitor(monitor) {} + static ObjectMonitor* tomb_stone() { + return (ObjectMonitor*)1; + } - uintx get_hash() const { - return _monitor->hash(); - } + static ObjectMonitor* removed_entry() { + return (ObjectMonitor*)2; + } - bool equals(ObjectMonitor** value) { - return (*value) == _monitor; + // Make sure we leave space for previous versions to relocate too + bool try_inc_items_count() { + for (;;) { + size_t population = AtomicAccess::load(&_items_count); + if (should_grow(population)) { + return false; + } + if (AtomicAccess::cmpxchg(&_items_count, population, population + 1, memory_order_relaxed) == population) { + return true; + } } + } - bool is_dead(ObjectMonitor** value) { - assert(*value != nullptr, "must be"); - return (*value)->object_is_dead(); - } - }; + double get_load_factor(size_t count) { + return (double)count / (double)capacity(); + } - static void inc_items_count() { + void inc_items_count() { AtomicAccess::inc(&_items_count, memory_order_relaxed); } - static void dec_items_count() { + void dec_items_count() { AtomicAccess::dec(&_items_count, memory_order_relaxed); } - static double get_load_factor() { - size_t count = AtomicAccess::load(&_items_count); - return (double)count / (double)_table_size; +public: + Table(size_t capacity, Table* prev) + : _capacity_mask(capacity - 1), + _prev(prev), + _buckets(NEW_C_HEAP_ARRAY(ObjectMonitor*, capacity, mtObjectMonitor)), + _items_count(0) + { + for (size_t i = 0; i < capacity; ++i) { + _buckets[i] = nullptr; + } } - static size_t table_size(Thread* current = Thread::current()) { - return ((size_t)1) << _table->get_size_log2(current); + ~Table() { + FREE_C_HEAP_ARRAY(ObjectMonitor*, _buckets); } - static size_t max_log_size() { - // TODO[OMTable]: Evaluate the max size. - // TODO[OMTable]: Need to fix init order to use Universe::heap()->max_capacity(); - // Using MaxHeapSize directly this early may be wrong, and there - // are definitely rounding errors (alignment). - const size_t max_capacity = MaxHeapSize; - const size_t min_object_size = CollectedHeap::min_dummy_object_size() * HeapWordSize; - const size_t max_objects = max_capacity / MAX2(MinObjAlignmentInBytes, checked_cast(min_object_size)); - const size_t log_max_objects = log2i_graceful(max_objects); - - return MAX2(MIN2(SIZE_BIG_LOG2, log_max_objects), min_log_size()); + Table* prev() { + return AtomicAccess::load(&_prev); } - static size_t min_log_size() { - // ~= log(AvgMonitorsPerThreadEstimate default) - return 10; + size_t capacity() { + return _capacity_mask + 1; } - template - static size_t clamp_log_size(V log_size) { - return MAX2(MIN2(log_size, checked_cast(max_log_size())), checked_cast(min_log_size())); + bool should_grow(size_t population) { + return get_load_factor(population) > GROW_LOAD_FACTOR; } - static size_t initial_log_size() { - const size_t estimate = log2i(MAX2(os::processor_count(), 1)) + log2i(MAX2(AvgMonitorsPerThreadEstimate, size_t(1))); - return clamp_log_size(estimate); + bool should_grow() { + return should_grow(AtomicAccess::load(&_items_count)); } - static size_t grow_hint () { - return ConcurrentTable::DEFAULT_GROW_HINT; - } + ObjectMonitor* get(oop obj, int hash) { + // Acquire tomb stones and relocations in case prev transitioned to null + Table* prev = AtomicAccess::load_acquire(&_prev); + if (prev != nullptr) { + ObjectMonitor* result = prev->get(obj, hash); + if (result != nullptr) { + return result; + } + } - public: - static void create() { - _table = new ConcurrentTable(initial_log_size(), max_log_size(), grow_hint()); - _items_count = 0; - _table_size = table_size(); - _resize = false; - } + const size_t start_index = size_t(hash) & _capacity_mask; + size_t index = start_index; - static void verify_monitor_get_result(oop obj, ObjectMonitor* monitor) { -#ifdef ASSERT - if (SafepointSynchronize::is_at_safepoint()) { - bool has_monitor = obj->mark().has_monitor(); - assert(has_monitor == (monitor != nullptr), - "Inconsistency between markWord and ObjectMonitorTable has_monitor: %s monitor: " PTR_FORMAT, - BOOL_TO_STR(has_monitor), p2i(monitor)); + for (;;) { + ObjectMonitor* volatile* bucket = _buckets + index; + ObjectMonitor* monitor = AtomicAccess::load(bucket); + + if (monitor == tomb_stone() || monitor == nullptr) { + // Not found + break; + } + + if (monitor != removed_entry() && monitor->object_peek() == obj) { + // Found matching monitor + OrderAccess::acquire(); + return monitor; + } + + index = (index + 1) & _capacity_mask; + if (index == start_index) { + // Not found - wrap around + break; + } } -#endif - } - static ObjectMonitor* monitor_get(Thread* current, oop obj) { - ObjectMonitor* result = nullptr; - Lookup lookup_f(obj); - auto found_f = [&](ObjectMonitor** found) { - assert((*found)->object_peek() == obj, "must be"); - result = *found; - }; - _table->get(current, lookup_f, found_f); - verify_monitor_get_result(obj, result); - return result; + // Rehashing could have stareted by now, but if a monitor has been inserted in a + // newer table, it was inserted after the get linearization point + return nullptr; } - static void try_notify_grow() { - if (!_table->is_max_size_reached() && !AtomicAccess::load(&_resize)) { - AtomicAccess::store(&_resize, true); - if (Service_lock->try_lock()) { - Service_lock->notify(); - Service_lock->unlock(); + ObjectMonitor* get_set(oop obj, ObjectMonitor* new_monitor, int hash) { + // Acquire any tomb stones and relocations if prev transitioned to null + Table* prev = AtomicAccess::load_acquire(&_prev); + if (prev != nullptr) { + ObjectMonitor* result = prev->get_set(obj, new_monitor, hash); + if (result != nullptr) { + return result; } } - } - static bool should_shrink() { - // Not implemented; - return false; - } + const size_t start_index = size_t(hash) & _capacity_mask; + size_t index = start_index; + + for (;;) { + ObjectMonitor* volatile* bucket = _buckets + index; + ObjectMonitor* monitor = AtomicAccess::load(bucket); + + if (monitor == nullptr) { + // Empty slot to install the new monitor + if (try_inc_items_count()) { + // Succeeding in claiming an item + ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, new_monitor, memory_order_release); + if (result == monitor) { + // Success - already incremented + return new_monitor; + } + + // Someething else was installed in place + dec_items_count(); + monitor = result; + } else { + // Out of allowance; leaving place for rehashing to succeed + // To avoid concurrent inserts succeeding, place a tomb stine here. + ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, tomb_stone()); + if (result == monitor) { + // Success; nobody will try to insert here again, except reinsert from rehashing + return nullptr; + } + monitor = result; + } + } - static constexpr double GROW_LOAD_FACTOR = 0.75; + if (monitor == tomb_stone()) { + // Can't insert into this table + return nullptr; + } - static bool should_grow() { - return get_load_factor() > GROW_LOAD_FACTOR && !_table->is_max_size_reached(); + if (monitor != removed_entry() && monitor->object_peek() == obj) { + // Found matching monitor + return monitor; + } + + index = (index + 1) & _capacity_mask; + if (index == start_index) { + // No slot to install in this table + return nullptr; + } + } } - static bool should_resize() { - return should_grow() || should_shrink() || AtomicAccess::load(&_resize); + void remove(oop obj, ObjectMonitor* old_monitor, int hash) { + // Acquire any tomb stones and relocations if prev transitioned to null + Table* prev = AtomicAccess::load_acquire(&_prev); + if (prev != nullptr) { + prev->remove(obj, old_monitor, hash); + } + + const size_t start_index = size_t(hash) & _capacity_mask; + size_t index = start_index; + + for (;;) { + ObjectMonitor* volatile* bucket = _buckets + index; + ObjectMonitor* monitor = AtomicAccess::load(bucket); + + if (monitor == nullptr) { + // Monitor does not exist in this table + return; + } + + if (monitor == old_monitor) { + // Found matching entry; remove it + AtomicAccess::cmpxchg(bucket, monitor, removed_entry()); + return; + } + + index = (index + 1) & _capacity_mask; + if (index == start_index) { + // Not found + return; + } + } } - template - static bool run_task(JavaThread* current, Task& task, const char* task_name, Args&... args) { - if (task.prepare(current)) { - log_trace(monitortable)("Started to %s", task_name); - TraceTime timer(task_name, TRACETIME_LOG(Debug, monitortable, perf)); - while (task.do_task(current, args...)) { - task.pause(current); - { - ThreadBlockInVM tbivm(current); + void reinsert(oop obj, ObjectMonitor* new_monitor) { + int hash = obj->mark().hash(); + + const size_t start_index = size_t(hash) & _capacity_mask; + size_t index = start_index; + + for (;;) { + ObjectMonitor* volatile* bucket = _buckets + index; + ObjectMonitor* monitor = AtomicAccess::load(bucket); + + if (monitor == nullptr) { + // Empty slot to install the new monitor + ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, new_monitor, memory_order_release); + if (result == monitor) { + // Success - unconditionally increment + inc_items_count(); + return; } - task.cont(current); + + // Another monitor was installed + monitor = result; } - task.done(current); - return true; + + if (monitor == tomb_stone()) { + // A concurrent inserter did not get enough allowance in the table + // But reinsert always succeeds - we will take the spot + ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, new_monitor, memory_order_release); + if (result == monitor) { + // Success - unconditionally increment + inc_items_count(); + return; + } + + // Another monitor was installed + monitor = result; + } + + assert(monitor != nullptr, "invariant"); + assert(monitor != tomb_stone(), "invariant"); + assert(monitor == removed_entry() || monitor->object_peek() != obj, "invariant"); + + index = (index + 1) & _capacity_mask; + assert(index != start_index, "should never be full"); } - return false; } - static bool grow(JavaThread* current) { - ConcurrentTable::GrowTask grow_task(_table); - if (run_task(current, grow_task, "Grow")) { - _table_size = table_size(current); - log_info(monitortable)("Grown to size: %zu", _table_size); - return true; + void rebuild() { + Table* prev = _prev; + if (prev == nullptr) { + // Base case for recursion - no previous version + return; } - return false; - } - static bool clean(JavaThread* current) { - ConcurrentTable::BulkDeleteTask clean_task(_table); - auto is_dead = [&](ObjectMonitor** monitor) { - return (*monitor)->object_is_dead(); - }; - auto do_nothing = [&](ObjectMonitor** monitor) {}; - NativeHeapTrimmer::SuspendMark sm("ObjectMonitorTable"); - return run_task(current, clean_task, "Clean", is_dead, do_nothing); - } + // Finish rebuilding up to prev as target so we can use prev as source + prev->rebuild(); - static bool resize(JavaThread* current) { - LogTarget(Info, monitortable) lt; - bool success = false; + JavaThread* current = JavaThread::current(); - if (should_grow()) { - lt.print("Start growing with load factor %f", get_load_factor()); - success = grow(current); - } else { - if (!_table->is_max_size_reached() && AtomicAccess::load(&_resize)) { - lt.print("WARNING: Getting resize hints with load factor %f", get_load_factor()); + // Relocate entries from prev after + for (size_t index = 0; index <= prev->_capacity_mask; index++) { + if ((index & 128) == 0) { + // Poll for safepoints to improve time to safepoint + ThreadBlockInVM tbivm(current); + } + + ObjectMonitor* volatile* bucket = prev->_buckets + index; + ObjectMonitor* monitor = AtomicAccess::load(bucket); + + if (monitor == nullptr) { + // Empty slot; put a tomb stone there + ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, tomb_stone(), memory_order_relaxed); + if (result == nullptr) { + // Success; move to next entry + continue; + } + + // Concurrent insert; relocate + monitor = result; + } + + if (monitor != tomb_stone() && monitor != removed_entry()) { + // A monitor + oop obj = monitor->object_peek(); + if (!monitor->is_being_async_deflated() && obj != nullptr) { + // Re-insert still live monitor + reinsert(obj, monitor); + } } - lt.print("Start cleaning with load factor %f", get_load_factor()); - success = clean(current); } - AtomicAccess::store(&_resize, false); + // Unlink this table, releasing the tomb stones and relocations + AtomicAccess::release_store(&_prev, (Table*)nullptr); + } +}; + +void ObjectMonitorTable::create() { + _curr = new Table(128, nullptr); +} + +ObjectMonitor* ObjectMonitorTable::monitor_get(Thread* current, oop obj) { + const int hash = obj->mark().hash(); + Table* curr = AtomicAccess::load_acquire(&_curr); - return success; + return curr->get(obj, hash); +} + +// Returns a new table to try inserting into +ObjectMonitorTable::Table *ObjectMonitorTable::grow_table(Table *curr) { + Table *new_table = AtomicAccess::load(&_curr); + if (new_table != curr) { + // Table changed; no need to try further + return new_table; } - static ObjectMonitor* monitor_put_get(Thread* current, ObjectMonitor* monitor, oop obj) { - // Enter the monitor into the concurrent hashtable. - ObjectMonitor* result = monitor; - Lookup lookup_f(obj); - auto found_f = [&](ObjectMonitor** found) { - assert((*found)->object_peek() == obj, "must be"); - result = *found; - }; - bool grow; - _table->insert_get(current, lookup_f, monitor, found_f, &grow); - verify_monitor_get_result(obj, result); - if (grow) { - try_notify_grow(); + new_table = new Table(curr->capacity() << 1, curr); + Table *result = + AtomicAccess::cmpxchg(&_curr, curr, new_table, memory_order_acq_rel); + if (result == curr) { + // Successfully started rehashing + log_info(monitorinflation)("Growing object monitor table"); + ObjectSynchronizer::request_deflate_idle_monitors(); + return new_table; + } + + // Somebody else started rehashing; restart in new table + delete new_table; + + return result; +} + +ObjectMonitor* ObjectMonitorTable::monitor_put_get(Thread* current, ObjectMonitor* monitor, oop obj) { + const int hash = obj->mark().hash(); + Table* curr = AtomicAccess::load_acquire(&_curr); + + for (;;) { + // Curr is the latest table and is reasonably loaded + ObjectMonitor* result = curr->get_set(obj, monitor, hash); + if (result != nullptr) { + return result; + // Table rehashing started; try again in the new table } - return result; + curr = grow_table(curr); } +} - static bool remove_monitor_entry(Thread* current, ObjectMonitor* monitor) { - LookupMonitor lookup_f(monitor); - return _table->remove(current, lookup_f); +void ObjectMonitorTable::remove_monitor_entry(Thread* current, ObjectMonitor* monitor) { + oop obj = monitor->object_peek(); + if (obj == nullptr) { + // Defer removal until subsequent rebuilding + return; } + const int hash = obj->mark().hash(); - static bool contains_monitor(Thread* current, ObjectMonitor* monitor) { - LookupMonitor lookup_f(monitor); - bool result = false; - auto found_f = [&](ObjectMonitor** found) { - result = true; - }; - _table->get(current, lookup_f, found_f); - return result; - } - - static void print_on(outputStream* st) { - auto printer = [&] (ObjectMonitor** entry) { - ObjectMonitor* om = *entry; - oop obj = om->object_peek(); - st->print("monitor=" PTR_FORMAT ", ", p2i(om)); - st->print("object=" PTR_FORMAT, p2i(obj)); - assert(obj->mark().hash() == om->hash(), "hash must match"); - st->cr(); - return true; - }; - if (SafepointSynchronize::is_at_safepoint()) { - _table->do_safepoint_scan(printer); - } else { - _table->do_scan(Thread::current(), printer); + Table* curr = AtomicAccess::load_acquire(&_curr); + curr->remove(obj, monitor, hash); +} + +// Before handshake; rehash and unlink tables +void ObjectMonitorTable::rebuild(GrowableArray* delete_list) { + Table* new_table; + { + Table* curr = AtomicAccess::load_acquire(&_curr); + new_table = new Table(curr->capacity(), curr); + Table* result = AtomicAccess::cmpxchg(&_curr, curr, new_table, memory_order_release); + if (result != curr) { + // Somebody else racingly started rehashing; try again + new_table = result; } } -}; -ObjectMonitorTable::ConcurrentTable* ObjectMonitorTable::_table = nullptr; -volatile size_t ObjectMonitorTable::_items_count = 0; -size_t ObjectMonitorTable::_table_size = 0; -volatile bool ObjectMonitorTable::_resize = false; + for (Table* curr = new_table->prev(); curr != nullptr; curr = curr->prev()) { + delete_list->append(curr); + } + + // Rebuild with the new table as target + new_table->rebuild(); +} + +// After handshake; destroy old tables +void ObjectMonitorTable::destroy(GrowableArray* delete_list) { + for (ObjectMonitorTable::Table* table: *delete_list) { + delete table; + } +} + +address ObjectMonitorTable::current_table_address() { + return (address)(&_curr); +} + +ByteSize ObjectMonitorTable::table_capacity_mask_offset() { + return byte_offset_of(Table, _capacity_mask); +} + +ByteSize ObjectMonitorTable::table_buckets_offset() { + return byte_offset_of(Table, _buckets); +} ObjectMonitor* ObjectSynchronizer::get_or_insert_monitor_from_table(oop object, JavaThread* current, bool* inserted) { ObjectMonitor* monitor = get_monitor_from_table(current, object); @@ -1833,11 +1958,11 @@ ObjectMonitor* ObjectSynchronizer::add_monitor(JavaThread* current, ObjectMonito return ObjectMonitorTable::monitor_put_get(current, monitor, obj); } -bool ObjectSynchronizer::remove_monitor(Thread* current, ObjectMonitor* monitor, oop obj) { +void ObjectSynchronizer::remove_monitor(Thread* current, ObjectMonitor* monitor, oop obj) { assert(UseObjectMonitorTable, "must be"); assert(monitor->object_peek() == obj, "must be, cleared objects are removed by is_dead"); - return ObjectMonitorTable::remove_monitor_entry(current, monitor); + ObjectMonitorTable::remove_monitor_entry(current, monitor); } void ObjectSynchronizer::deflate_mark_word(oop obj) { @@ -1859,20 +1984,6 @@ void ObjectSynchronizer::create_om_table() { ObjectMonitorTable::create(); } -bool ObjectSynchronizer::needs_resize() { - if (!UseObjectMonitorTable) { - return false; - } - return ObjectMonitorTable::should_resize(); -} - -bool ObjectSynchronizer::resize_table(JavaThread* current) { - if (!UseObjectMonitorTable) { - return true; - } - return ObjectMonitorTable::resize(current); -} - class ObjectSynchronizer::LockStackInflateContendedLocks : private OopClosure { private: oop _contended_oops[LockStack::CAPACITY]; @@ -2580,10 +2691,7 @@ ObjectMonitor* ObjectSynchronizer::inflate_and_enter(oop object, BasicLock* lock void ObjectSynchronizer::deflate_monitor(Thread* current, oop obj, ObjectMonitor* monitor) { if (obj != nullptr) { deflate_mark_word(obj); - } - bool removed = remove_monitor(current, monitor, obj); - if (obj != nullptr) { - assert(removed, "Should have removed the entry if obj was alive"); + remove_monitor(current, monitor, obj); } } @@ -2592,11 +2700,6 @@ ObjectMonitor* ObjectSynchronizer::get_monitor_from_table(Thread* current, oop o return ObjectMonitorTable::monitor_get(current, obj); } -bool ObjectSynchronizer::contains_monitor(Thread* current, ObjectMonitor* monitor) { - assert(UseObjectMonitorTable, "must be"); - return ObjectMonitorTable::contains_monitor(current, monitor); -} - ObjectMonitor* ObjectSynchronizer::read_monitor(markWord mark) { return mark.monitor(); } diff --git a/src/hotspot/share/runtime/synchronizer.hpp b/src/hotspot/share/runtime/synchronizer.hpp index a10e44b309272..bb3888b0a0672 100644 --- a/src/hotspot/share/runtime/synchronizer.hpp +++ b/src/hotspot/share/runtime/synchronizer.hpp @@ -68,6 +68,31 @@ class MonitorList::Iterator { ObjectMonitor* next(); }; +class ObjectMonitorTable : AllStatic { + static constexpr double GROW_LOAD_FACTOR = 0.125; + +public: + class Table; + +private: + static Table* volatile _curr; + static Table* grow_table(Table* curr); + +public: + static void create(); + static ObjectMonitor* monitor_get(Thread* current, oop obj); + static ObjectMonitor* monitor_put_get(Thread* current, ObjectMonitor* monitor, oop obj); + static void rebuild(GrowableArray* delete_list); + static void destroy(GrowableArray* delete_list); + static void remove_monitor_entry(Thread* current, ObjectMonitor* monitor); + static void monitor_reinsert(Table* from, ObjectMonitor* monitor, oop obj); + + // Compiler support + static address current_table_address(); + static ByteSize table_capacity_mask_offset(); + static ByteSize table_buckets_offset(); +}; + class ObjectSynchronizer : AllStatic { friend class VMStructs; friend class ObjectMonitorDeflationLogging; @@ -213,7 +238,7 @@ class ObjectSynchronizer : AllStatic { static ObjectMonitor* get_or_insert_monitor(oop object, JavaThread* current, ObjectSynchronizer::InflateCause cause); static ObjectMonitor* add_monitor(JavaThread* current, ObjectMonitor* monitor, oop obj); - static bool remove_monitor(Thread* current, ObjectMonitor* monitor, oop obj); + static void remove_monitor(Thread* current, ObjectMonitor* monitor, oop obj); static void deflate_mark_word(oop object);