diff --git a/.ci/jit-debug-test.sh b/.ci/jit-debug-test.sh new file mode 100755 index 00000000..964efdfc --- /dev/null +++ b/.ci/jit-debug-test.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash + +# JIT Debug Test Script +# This script tests JIT compiler with debug mode enabled to catch issues early + +set -e + +PARALLEL="${PARALLEL:--j$(nproc 2> /dev/null || sysctl -n hw.ncpu 2> /dev/null || echo 4)}" + +echo "======================================" +echo "JIT Debug Mode Test" +echo "======================================" + +# Test 1: Standard JIT with debug +echo "" +echo "Test 1: Building with ENABLE_JIT_DEBUG=1..." +make distclean +make ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 $PARALLEL + +echo "" +echo "Running basic tests with JIT debug..." +make ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 check + +# Test 2: JIT with EXT_C=0 and debug (regression test) +echo "" +echo "Test 2: Building with ENABLE_EXT_C=0 ENABLE_JIT_DEBUG=1..." +make distclean +make ENABLE_EXT_C=0 ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 $PARALLEL + +echo "" +echo "Running tests with EXT_C=0 and JIT debug..." +make ENABLE_EXT_C=0 ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 check + +# Test 3: JIT with various extension combinations +echo "" +echo "Test 3: Testing multiple JIT configurations with debug..." +for config in \ + "ENABLE_EXT_A=0" \ + "ENABLE_EXT_F=0" \ + "ENABLE_EXT_M=0" \ + "ENABLE_Zba=0" \ + "ENABLE_Zbb=0"; do + echo "" + echo "Testing: $config with JIT debug" + make distclean + make $config ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 $PARALLEL + make $config ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 check +done + +echo "" +echo "======================================" +echo "All JIT debug tests passed!" +echo "======================================" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fb64fefb..8fc713d1 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -135,6 +135,7 @@ jobs: - name: default build using emcc if: success() run: | + make distclean make CC=emcc ENABLE_JIT=0 $PARALLEL - name: default build for system emulation using emcc @@ -142,7 +143,7 @@ jobs: run: | make distclean make CC=emcc ENABLE_SYSTEM=1 ENABLE_JIT=0 $PARALLEL - make distclean ENABLE_SYSTEM=1 + make distclean - name: Build with various optimization levels if: success() @@ -249,6 +250,46 @@ jobs: make distclean && make ENABLE_UBSAN=1 check $PARALLEL make ENABLE_JIT=1 clean && make ENABLE_JIT=1 ENABLE_UBSAN=1 check $PARALLEL + - name: ThreadSanitizer race detection test + if: success() || failure() + env: + CC: ${{ steps.install_cc.outputs.cc }} + run: | + set -o pipefail + + # TSAN requires ASLR disabled to prevent allocations in shadow memory + # Interpreter with FULL4G: Basic race detection across emulation core + echo "=== TSAN Test 1/3: Interpreter + FULL4G ===" + make distclean && setarch -R make ENABLE_TSAN=1 ENABLE_FULL4G=1 check $PARALLEL 2>&1 | tee tsan-interpreter.log + if grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-interpreter.log; then + echo "ERROR: Data race detected in interpreter mode!" + grep -A 10 "ThreadSanitizer:" tsan-interpreter.log + exit 1 + fi + echo "✓ No races detected in interpreter mode" + + # JIT tier-1: Race detection in template-based JIT compilation + echo "=== TSAN Test 2/3: JIT Tier-1 ===" + make ENABLE_JIT=1 clean && setarch -R make ENABLE_TSAN=1 ENABLE_FULL4G=1 ENABLE_JIT=1 check $PARALLEL 2>&1 | tee tsan-jit.log + if grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-jit.log; then + echo "ERROR: Data race detected in JIT tier-1 mode!" + grep -A 10 "ThreadSanitizer:" tsan-jit.log + exit 1 + fi + echo "✓ No races detected in JIT tier-1 mode" + + # JIT tier-2 (T2C): Race detection across LLVM compilation thread + echo "=== TSAN Test 3/3: JIT Tier-2 (T2C) ===" + make ENABLE_JIT=1 clean && setarch -R make ENABLE_TSAN=1 ENABLE_FULL4G=1 ENABLE_JIT=1 ENABLE_T2C=1 check $PARALLEL 2>&1 | tee tsan-t2c.log + if grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-t2c.log; then + echo "ERROR: Data race detected in JIT tier-2 (T2C) mode!" + grep -A 10 "ThreadSanitizer:" tsan-t2c.log + exit 1 + fi + echo "✓ No races detected in JIT tier-2 (T2C) mode" + + echo "=== All TSAN tests passed ===" + - name: boot Linux kernel test if: success() env: @@ -301,9 +342,89 @@ jobs: githubToken: ${{ github.token }} # No 'sudo' is available install: | - apt update -qq - apt install -yqq make git curl wget clang libsdl2-dev libsdl2-mixer-dev lsb-release software-properties-common gnupg bc - which wget || echo "WARNING: wget not found after installation" + # Retry apt update with exponential backoff for mirror sync issues + # dep11 = AppStream metadata (GUI app discovery, non-critical for CLI builds) + # Critical files: Packages, Sources, Release, InRelease (binary/source indices) + set -o pipefail + APT_SUCCESS=0 + for i in 1 2 3; do + echo "=== apt update attempt $i/3 ===" + if apt update -qq --allow-releaseinfo-change 2>&1 | tee /tmp/apt-update.log; then + APT_EXIT=0 + else + APT_EXIT=$? + fi + + # Check if log file was created + if [ ! -f /tmp/apt-update.log ]; then + echo "ERROR: apt update log file not created" + if [ $i -lt 3 ]; then + sleep $((i * 30)) + continue + else + exit 1 + fi + fi + + # Check for critical package index failures (ignore dep11 metadata) + # dep11 files like Components-arm64.yml.gz are non-critical (AppStream metadata) + # Core package indices (Packages/Sources/Release/InRelease) MUST succeed + if grep -q -E "Failed to fetch.*/(Packages|Sources|Release|InRelease)" /tmp/apt-update.log; then + # Critical failure detected + echo "ERROR: Critical package index files failed to download" + grep -E "Failed to fetch.*/(Packages|Sources|Release|InRelease)" /tmp/apt-update.log | head -5 + if [ $i -lt 3 ]; then + delay=$((i * 30)) + echo "Retrying in ${delay}s... (attempt $((i + 1))/3)" + sleep $delay + else + echo "FATAL: Core package indices unavailable after 3 attempts" + cat /tmp/apt-update.log + exit 1 + fi + else + # Success: core package indices available (dep11 failures OK) + APT_SUCCESS=1 + if [ $APT_EXIT -eq 0 ]; then + echo "✓ apt update succeeded (all package lists available)" + else + echo "✓ apt update completed with warnings (exit=$APT_EXIT)" + echo " Core package indices: AVAILABLE" + if grep -q "dep11" /tmp/apt-update.log; then + echo " dep11 metadata: INCOMPLETE (non-critical, GUI app metadata)" + fi + fi + break + fi + done + + # Verify we succeeded in at least one attempt + if [ $APT_SUCCESS -ne 1 ]; then + echo "FATAL: apt update failed after all retry attempts" + exit 1 + fi + + # Install packages (exit 0 even if dep11 metadata is incomplete) + echo "=== Installing build dependencies ===" + apt install -yqq make git curl wget clang libsdl2-dev libsdl2-mixer-dev lsb-release software-properties-common gnupg bc 2>&1 | tee /tmp/apt-install.log || true + + # Verify critical packages were installed successfully + echo "=== Verifying critical build tools ===" + MISSING_PKGS="" + for pkg in make git curl clang bc; do + if ! command -v $pkg >/dev/null 2>&1; then + MISSING_PKGS="$MISSING_PKGS $pkg" + fi + done + + if [ -n "$MISSING_PKGS" ]; then + echo "ERROR: Critical packages failed to install:$MISSING_PKGS" + echo "=== apt install log ===" + cat /tmp/apt-install.log + exit 1 + fi + + echo "✓ All critical build tools installed successfully" # FIXME: gcc build fails on Aarch64/Linux hosts env: | CC: clang-18 @@ -311,7 +432,15 @@ jobs: run: | # Verify and install wget if needed (workaround for install step issues) if ! command -v wget > /dev/null 2>&1; then - apt update -qq && apt install -yqq wget + echo "wget not found, attempting to install..." + apt update -qq --allow-releaseinfo-change 2>&1 | tee /tmp/apt-update-wget.log || true + apt install -yqq wget 2>&1 | tee /tmp/wget-install.log || true + if ! command -v wget > /dev/null 2>&1; then + echo "ERROR: wget installation failed!" + cat /tmp/wget-install.log + exit 1 + fi + echo "wget installed successfully" fi git config --global --add safe.directory ${{ github.workspace }} git config --global --add safe.directory ${{ github.workspace }}/src/softfloat @@ -328,6 +457,33 @@ jobs: make ENABLE_JIT=1 clean && make ENABLE_EXT_A=0 ENABLE_JIT=1 check $PARALLEL make ENABLE_JIT=1 clean && make ENABLE_EXT_F=0 ENABLE_JIT=1 check $PARALLEL make ENABLE_JIT=1 clean && make ENABLE_EXT_C=0 ENABLE_JIT=1 check $PARALLEL + # TSAN on ARM64: Fixed memory layout (0x150000000000 for main, 0x151000000000 for JIT) + set -o pipefail + echo "=== TSAN Test 1/3: Interpreter + FULL4G (ARM64) ===" + make distclean && setarch -R make ENABLE_TSAN=1 ENABLE_FULL4G=1 check $PARALLEL 2>&1 | tee tsan-interpreter.log + if grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-interpreter.log; then + echo "ERROR: Data race detected in interpreter mode!" + grep -A 10 "ThreadSanitizer:" tsan-interpreter.log + exit 1 + fi + echo "✓ No races detected in interpreter mode" + echo "=== TSAN Test 2/3: JIT Tier-1 (ARM64) ===" + make ENABLE_JIT=1 clean && setarch -R make ENABLE_TSAN=1 ENABLE_FULL4G=1 ENABLE_JIT=1 check $PARALLEL 2>&1 | tee tsan-jit.log + if grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-jit.log; then + echo "ERROR: Data race detected in JIT tier-1 mode!" + grep -A 10 "ThreadSanitizer:" tsan-jit.log + exit 1 + fi + echo "✓ No races detected in JIT tier-1 mode" + echo "=== TSAN Test 3/3: JIT Tier-2 (T2C) (ARM64) ===" + make ENABLE_JIT=1 clean && setarch -R make ENABLE_TSAN=1 ENABLE_FULL4G=1 ENABLE_JIT=1 ENABLE_T2C=1 check $PARALLEL 2>&1 | tee tsan-t2c.log + if grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-t2c.log; then + echo "ERROR: Data race detected in JIT tier-2 (T2C) mode!" + grep -A 10 "ThreadSanitizer:" tsan-t2c.log + exit 1 + fi + echo "✓ No races detected in JIT tier-2 (T2C) mode" + echo "=== All TSAN tests passed (ARM64) ===" macOS-arm64: needs: [detect-code-related-file-changes] @@ -435,6 +591,7 @@ jobs: - name: default build using emcc if: success() run: | + make distclean make CC=emcc ENABLE_JIT=0 $PARALLEL - name: default build for system emulation using emcc @@ -442,7 +599,7 @@ jobs: run: | make distclean make CC=emcc ENABLE_SYSTEM=1 ENABLE_JIT=0 $PARALLEL - make distclean ENABLE_SYSTEM=1 + make distclean - name: check + tests if: success() @@ -499,6 +656,15 @@ jobs: fi done + - name: JIT debug test + env: + CC: ${{ steps.install_cc.outputs.cc }} + run: | + # Run JIT tests with debug mode to catch register allocation and cache coherency issues + make distclean && make ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 check $PARALLEL + make distclean && make ENABLE_EXT_C=0 ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 check $PARALLEL + if: ${{ always() }} + - name: undefined behavior test if: (success() || failure()) && steps.install_cc.outputs.cc == 'clang' # gcc on macOS/arm64 does not support sanitizers env: @@ -507,6 +673,73 @@ jobs: make distclean && make ENABLE_UBSAN=1 check $PARALLEL make ENABLE_JIT=1 clean && make ENABLE_JIT=1 ENABLE_UBSAN=1 check $PARALLEL + - name: ThreadSanitizer race detection test + if: (success() || failure()) && steps.install_cc.outputs.cc == 'clang' # Only clang supports TSAN on macOS + env: + CC: ${{ steps.install_cc.outputs.cc }} + run: | + set -o pipefail + + # macOS TSAN: Fixed memory at 0x150000000000 (main) and 0x151000000000 (JIT) + # Note: ASLR disabled via mmap(MAP_FIXED), but SIP may restrict full ASLR control on GitHub runners + + # Test 1: Interpreter + FULL4G + echo "=== TSAN Test 1/3: Interpreter + FULL4G (macOS ARM64) ===" + make distclean && make ENABLE_TSAN=1 ENABLE_FULL4G=1 check $PARALLEL 2>&1 | tee tsan-interpreter.log || { + # Check if failure is due to MAP_FIXED restriction vs actual race + if grep -q "MAP_FAILED\|unexpected memory mapping\|FATAL: ThreadSanitizer" tsan-interpreter.log; then + echo "⚠️ TSAN memory allocation failed (SIP/ASLR restriction) - test skipped" + else + echo "ERROR: Test execution failed" + cat tsan-interpreter.log + exit 1 + fi + } + if [ -f tsan-interpreter.log ] && grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-interpreter.log; then + echo "ERROR: Data race detected in interpreter mode!" + grep -A 10 "ThreadSanitizer:" tsan-interpreter.log + exit 1 + fi + echo "✓ No races detected in interpreter mode" + + # Test 2: JIT tier-1 + echo "=== TSAN Test 2/3: JIT Tier-1 (macOS ARM64) ===" + make ENABLE_JIT=1 clean && make ENABLE_TSAN=1 ENABLE_FULL4G=1 ENABLE_JIT=1 check $PARALLEL 2>&1 | tee tsan-jit.log || { + if grep -q "MAP_FAILED\|unexpected memory mapping\|FATAL: ThreadSanitizer" tsan-jit.log; then + echo "⚠️ TSAN memory allocation failed (SIP/ASLR restriction) - test skipped" + else + echo "ERROR: Test execution failed" + cat tsan-jit.log + exit 1 + fi + } + if [ -f tsan-jit.log ] && grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-jit.log; then + echo "ERROR: Data race detected in JIT tier-1 mode!" + grep -A 10 "ThreadSanitizer:" tsan-jit.log + exit 1 + fi + echo "✓ No races detected in JIT tier-1 mode" + + # Test 3: JIT tier-2 (T2C) + echo "=== TSAN Test 3/3: JIT Tier-2 (T2C) (macOS ARM64) ===" + make ENABLE_JIT=1 clean && make ENABLE_TSAN=1 ENABLE_FULL4G=1 ENABLE_JIT=1 ENABLE_T2C=1 check $PARALLEL 2>&1 | tee tsan-t2c.log || { + if grep -q "MAP_FAILED\|unexpected memory mapping\|FATAL: ThreadSanitizer" tsan-t2c.log; then + echo "⚠️ TSAN memory allocation failed (SIP/ASLR restriction) - test skipped" + else + echo "ERROR: Test execution failed" + cat tsan-t2c.log + exit 1 + fi + } + if [ -f tsan-t2c.log ] && grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-t2c.log; then + echo "ERROR: Data race detected in JIT tier-2 (T2C) mode!" + grep -A 10 "ThreadSanitizer:" tsan-t2c.log + exit 1 + fi + echo "✓ No races detected in JIT tier-2 (T2C) mode" + + echo "=== All TSAN tests completed (macOS ARM64) ===" + - name: boot Linux kernel test if: success() env: diff --git a/Makefile b/Makefile index 4481ed3a..2aeada06 100644 --- a/Makefile +++ b/Makefile @@ -20,13 +20,6 @@ CFLAGS += -include src/common.h -Isrc/ OBJS_EXT := -# In the system test suite, the executable is an ELF file (e.g., MMU). -# However, the Linux kernel emulation includes the Image, DT, and -# root filesystem (rootfs). Therefore, the test suite needs this -# flag to load the ELF and differentiate it from the kernel emulation. -ENABLE_ELF_LOADER ?= 0 -$(call set-feature, ELF_LOADER) - # Enable MOP fusion, easier for ablation study ENABLE_MOP_FUSION ?= 1 $(call set-feature, MOP_FUSION) @@ -80,6 +73,49 @@ endif ENABLE_ARCH_TEST ?= 0 $(call set-feature, ARCH_TEST) +# In the system test suite, the executable is an ELF file (e.g., MMU). +# However, the Linux kernel emulation includes the Image, DT, and +# root filesystem (rootfs). Therefore, the test suite needs this +# flag to load the ELF and differentiate it from the kernel emulation. +# User-space emulation (SYSTEM=0) always needs ELF loader, except for architecture tests. +ifeq ($(ENABLE_SYSTEM), 0) + ifneq ($(ENABLE_ARCH_TEST), 1) + override ENABLE_ELF_LOADER := 1 + else + ENABLE_ELF_LOADER ?= 0 + endif +else + ENABLE_ELF_LOADER ?= 0 +endif +$(call set-feature, ELF_LOADER) + +# ThreadSanitizer support +# TSAN on x86-64 memory layout: +# Shadow: 0x02a000000000 - 0x7cefffffffff (reserved by TSAN) +# App: 0x7cf000000000 - 0x7ffffffff000 (usable by application) +# +# We use MAP_FIXED to allocate FULL4G's 4GB memory at a fixed address +# (0x7d0000000000) within TSAN's app range, ensuring compatibility. +# +# IMPORTANT: TSAN requires ASLR (Address Space Layout Randomization) to be +# disabled to prevent system allocations from landing in TSAN's shadow memory. +# Tests are run with 'setarch $(uname -m) -R' to disable ASLR. +ENABLE_TSAN ?= 0 +ifeq ("$(ENABLE_TSAN)", "1") +override ENABLE_SDL := 0 # SDL (uninstrumented system lib) creates threads TSAN cannot track +override ENABLE_LTO := 0 # LTO interferes with TSAN instrumentation +CFLAGS += -DTSAN_ENABLED # Signal code to use TSAN-compatible allocations +# Disable ASLR for TSAN tests to prevent allocations in TSAN shadow memory +# Note: setarch is Linux-only; macOS requires different approach (SIP disable) +ifeq ($(UNAME_S),Linux) +BIN_WRAPPER = setarch $(shell uname -m) -R +else +BIN_WRAPPER = +endif +else +BIN_WRAPPER = +endif + # Enable link-time optimization (LTO) ENABLE_LTO ?= 1 ifeq ($(call has, LTO), 1) @@ -280,6 +316,11 @@ ENABLE_JIT ?= 0 $(call set-feature, JIT) ifeq ($(call has, JIT), 1) OBJS_EXT += jit.o + # JIT debug mode for early issue detection in CI/CD + ENABLE_JIT_DEBUG ?= 0 + ifeq ("$(ENABLE_JIT_DEBUG)", "1") + CFLAGS += -DENABLE_JIT_DEBUG=1 + endif ENABLE_T2C ?= 1 $(call set-feature, T2C) ifeq ($(call has, T2C), 1) @@ -332,6 +373,12 @@ CFLAGS += -fsanitize=undefined -fno-sanitize=alignment -fno-sanitize-recover=all LDFLAGS += -fsanitize=undefined -fno-sanitize=alignment -fno-sanitize-recover=all endif +# ThreadSanitizer flags (ENABLE_TSAN is set earlier to override SDL/FULL4G) +ifeq ("$(ENABLE_TSAN)", "1") +CFLAGS += -fsanitize=thread -g +LDFLAGS += -fsanitize=thread +endif + $(OUT)/emulate.o: CFLAGS += -foptimize-sibling-calls -fomit-frame-pointer -fno-stack-check -fno-stack-protector # .DEFAULT_GOAL should be set to all since the very first target is not all @@ -350,7 +397,7 @@ DTB_DEPS := $(BUILD_DTB) $(BUILD_DTB2C) endif endif -all: config $(DTB_DEPS) $(BUILD_DTB) $(BUILD_DTB2C) $(BIN) +all: config $(DTB_DEPS) $(BIN) OBJS := \ map.o \ @@ -395,7 +442,7 @@ $(OUT): $(BIN): $(OBJS) $(DEV_OBJS) | $(OUT) $(VECHO) " LD\t$@\n" - $(Q)$(CC) -o $@ $(CFLAGS_emcc) $^ $(LDFLAGS) + $(Q)$(CC) -o $@ $(CFLAGS_emcc) $(filter-out %.dtb %.h,$^) $(LDFLAGS) $(CONFIG_FILE): FORCE $(Q)mkdir -p $(OUT) @@ -445,7 +492,7 @@ define check-test $(Q)true; \ $(PRINTF) "Running $(3) ... "; \ OUTPUT_FILE="$$(mktemp)"; \ -if (LC_ALL=C $(BIN) $(1) $(2) > "$$OUTPUT_FILE") && \ +if (LC_ALL=C $(BIN_WRAPPER) $(BIN) $(1) $(2) > "$$OUTPUT_FILE") && \ [ "$$(cat "$$OUTPUT_FILE" | $(LOG_FILTER) | $(4))" = "$(5)" ]; then \ $(call notice, [OK]); \ else \ diff --git a/mk/toolchain.mk b/mk/toolchain.mk index eef08de3..e576ecf7 100644 --- a/mk/toolchain.mk +++ b/mk/toolchain.mk @@ -22,10 +22,10 @@ ifneq ($(shell $(CC) --version | head -n 1 | grep emcc),) $(warning $(SDL_MUSIC_CANNOT_PLAY_WARNING)) endif else - $(warning $(SDL_MUSIC_CANNOT_PLAY_WARNING)) + $(warning $(SDL_MUSIC_CANNOT_PLAY_WARNING)) endif else - $(warning $(SDL_MUSIC_CANNOT_PLAY_WARNING)) + $(warning $(SDL_MUSIC_CANNOT_PLAY_WARNING)) endif # see commit 165c1a3 of emscripten diff --git a/mk/wasm.mk b/mk/wasm.mk index 78b818cd..a7597ce1 100644 --- a/mk/wasm.mk +++ b/mk/wasm.mk @@ -166,3 +166,21 @@ start-web: $(start_web_deps) .PHONY: check-demo-dir-exist start-web endif + +# For SYSTEM mode, DTB needs to be built regardless of whether we're using emcc +# DTB is only built when SYSTEM=1 and ELF_LOADER=0 +ifeq ($(call has, SYSTEM), 1) +ifeq ($(call has, ELF_LOADER), 0) +# Add DTB as dependency for compilation stages +# This is used by mk/system.mk for device object files +deps_emcc += $(BUILD_DTB) $(BUILD_DTB2C) + +# For emcc builds: ensure DTB exists before emcc embeds it +# Make BIN directly depend on DTB files as regular prerequisites +# This will cause them to be built, but they'll also be passed to the linker +# We need to filter them out in the linker command +ifeq ("$(CC_IS_EMCC)", "1") +$(BIN): $(BUILD_DTB) $(BUILD_DTB2C) +endif +endif +endif diff --git a/src/emulate.c b/src/emulate.c index e5e4cddf..67d26d86 100644 --- a/src/emulate.c +++ b/src/emulate.c @@ -24,6 +24,7 @@ extern struct target_ops gdbstub_ops; #endif #include "decode.h" +#include "log.h" #include "mpool.h" #include "riscv.h" #include "riscv_private.h" @@ -304,6 +305,7 @@ static block_t *block_alloc(riscv_t *rv) block->hot2 = false; block->has_loops = false; block->n_invoke = 0; + block->func = NULL; INIT_LIST_HEAD(&block->list); #if RV32_HAS(T2C) block->compiled = false; @@ -1176,22 +1178,32 @@ void rv_step(void *arg) #if RV32_HAS(JIT) #if RV32_HAS(T2C) /* executed through the tier-2 JIT compiler */ - if (block->hot2) { + /* Use acquire semantics to ensure we see func write before using it */ + if (__atomic_load_n(&block->hot2, __ATOMIC_ACQUIRE)) { ((exec_t2c_func_t) block->func)(rv); prev = NULL; continue; } /* check if invoking times of t1 generated code exceed threshold */ - else if (!block->compiled && block->n_invoke >= THRESHOLD) { - block->compiled = true; + else if (!__atomic_load_n(&block->compiled, __ATOMIC_RELAXED) && + __atomic_load_n(&block->n_invoke, __ATOMIC_RELAXED) >= + THRESHOLD) { + __atomic_store_n(&block->compiled, true, __ATOMIC_RELAXED); queue_entry_t *entry = malloc(sizeof(queue_entry_t)); if (unlikely(!entry)) { /* Malloc failed - reset compiled flag to allow retry later */ - block->compiled = false; + __atomic_store_n(&block->compiled, false, __ATOMIC_RELAXED); continue; } - entry->block = block; + /* Store cache key instead of pointer to prevent use-after-free */ +#if RV32_HAS(SYSTEM) + entry->key = + (uint64_t) block->pc_start | ((uint64_t) block->satp << 32); +#else + entry->key = (uint64_t) block->pc_start; +#endif pthread_mutex_lock(&rv->wait_queue_lock); list_add(&entry->list, &rv->wait_queue); + pthread_cond_signal(&rv->wait_queue_cond); pthread_mutex_unlock(&rv->wait_queue_lock); } #endif @@ -1203,7 +1215,11 @@ void rv_step(void *arg) * entry in compiled binary buffer. */ if (block->hot) { +#if RV32_HAS(T2C) + __atomic_fetch_add(&block->n_invoke, 1, __ATOMIC_RELAXED); +#else block->n_invoke++; +#endif ((exec_block_func_t) state->buf)( rv, (uintptr_t) (state->buf + block->offset)); prev = NULL; @@ -1215,10 +1231,20 @@ void rv_step(void *arg) #endif ) { jit_translate(rv, block); - ((exec_block_func_t) state->buf)( - rv, (uintptr_t) (state->buf + block->offset)); - prev = NULL; - continue; + /* Only execute if translation succeeded (block is hot) */ + if (block->hot) { + rv_log_debug("JIT: Executing block pc=0x%08x, offset=%u", + block->pc_start, block->offset); + ((exec_block_func_t) state->buf)( + rv, (uintptr_t) (state->buf + block->offset)); + prev = NULL; + continue; + } + /* Fall through to interpreter if translation failed */ + rv_log_debug( + "JIT: Translation failed for block pc=0x%08x, using " + "interpreter", + block->pc_start); } set_reset(&pc_set); has_loops = false; diff --git a/src/io.c b/src/io.c index 4ff325d3..975013ee 100644 --- a/src/io.c +++ b/src/io.c @@ -27,12 +27,47 @@ memory_t *memory_new(uint32_t size) return NULL; assert(mem); #if HAVE_MMAP +#if defined(TSAN_ENABLED) + /* ThreadSanitizer compatibility: Use MAP_FIXED to allocate at a specific + * address to avoid conflicts with TSAN's shadow memory. + */ +#if defined(__x86_64__) + /* x86_64: Allocate within TSAN's range (0x7cf000000000 - 0x7ffffffff000). + * + * Fixed address: 0x7d0000000000 + * Size: up to 4GB (0x100000000) + * End: 0x7d0100000000 (well within app range) + */ + void *fixed_addr = (void *) 0x7d0000000000UL; +#elif defined(__aarch64__) + /* ARM64 (macOS/Apple Silicon): Use higher address range. + * + * Fixed address: 0x150000000000 (21TB) + * Size: up to 4GB (0x100000000) + * End: 0x150100000000 + * + * This avoids TSAN's shadow memory and typical process allocations. + * Requires ASLR disabled via: setarch $(uname -m) -R + */ + void *fixed_addr = (void *) 0x150000000000UL; +#else +#error "TSAN is only supported on x86_64 and aarch64" +#endif + data_memory_base = mmap(fixed_addr, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (data_memory_base == MAP_FAILED) { + free(mem); + return NULL; + } +#else + /* Standard allocation without TSAN */ data_memory_base = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (data_memory_base == MAP_FAILED) { free(mem); return NULL; } +#endif #else data_memory_base = malloc(size); if (!data_memory_base) { diff --git a/src/jit.c b/src/jit.c index a6dfdb70..61ee6ee5 100644 --- a/src/jit.c +++ b/src/jit.c @@ -42,6 +42,7 @@ #include "decode.h" #include "io.h" #include "jit.h" +#include "log.h" #include "riscv.h" #include "riscv_private.h" #include "utils.h" @@ -298,6 +299,89 @@ static inline void offset_map_insert(struct jit_state *state, block_t *block) __builtin___clear_cache((char *) (addr), (char *) (addr) + (size)); #endif +/* JIT debug helpers - enable with ENABLE_JIT_DEBUG=1 to detect issues early */ +#ifndef ENABLE_JIT_DEBUG +#define ENABLE_JIT_DEBUG 0 +#endif + +#if ENABLE_JIT_DEBUG +static void jit_dump_regmap(const char *ctx) +{ + rv_log_debug("JIT RegMap [%s]:", ctx); + for (int i = 0; i < n_host_regs; i++) { + if (register_map[i].vm_reg_idx >= 0) { + rv_log_debug(" Host R%d -> VM x%d (dirty=%d)", + register_map[i].reg_idx, register_map[i].vm_reg_idx, + register_map[i].dirty); + } + } +} + +static void jit_check_regmap_conflict(int vm_reg, + int host_reg, + const char *insn) +{ + int found_idx = -1; + /* Check if VM register is already mapped */ + for (int i = 0; i < n_host_regs; i++) { + if (register_map[i].vm_reg_idx == vm_reg) { + if (found_idx >= 0 && found_idx != i) { + /* VM register mapped to multiple host registers */ + rv_log_error( + "JIT RegMap CONFLICT in %s: VM x%d mapped to " + "Host R%d (idx %d) and R%d (idx %d)", + insn, vm_reg, register_map[found_idx].reg_idx, found_idx, + register_map[i].reg_idx, i); + jit_dump_regmap("CONFLICT"); + assert(false); + } + found_idx = i; + /* Verify the found mapping is correct */ + if (register_map[i].reg_idx != host_reg) { + rv_log_error( + "JIT RegMap CONFLICT in %s: VM x%d expected at " + "Host R%d but found at R%d", + insn, vm_reg, host_reg, register_map[i].reg_idx); + jit_dump_regmap("CONFLICT"); + assert(false); + } + } else if (register_map[i].reg_idx == host_reg && + register_map[i].vm_reg_idx >= 0) { + /* Host register holds different VM register */ + rv_log_error( + "JIT RegMap CONFLICT in %s: Host R%d already holds " + "VM x%d, cannot map VM x%d", + insn, host_reg, register_map[i].vm_reg_idx, vm_reg); + jit_dump_regmap("CONFLICT"); + assert(false); + } + } +} + +static void jit_verify_cache_coherency(struct jit_state *state, uint32_t pc) + UNUSED; +static void jit_verify_cache_coherency(struct jit_state *state, uint32_t pc) +{ + /* On ARM64, verify instruction cache was properly invalidated */ +#if defined(__aarch64__) + if (state->offset > 0) { + rv_log_debug("JIT: Cache coherency check at PC=0x%08x, offset=%u", pc, + state->offset); + } +#endif +} +#else +#define jit_dump_regmap(ctx) \ + do { \ + } while (0) +#define jit_check_regmap_conflict(vm_reg, host_reg, insn) \ + do { \ + } while (0) +#define jit_verify_cache_coherency(state, pc) \ + do { \ + } while (0) +#endif + static bool should_flush = false; static void emit_bytes(struct jit_state *state, void *data, uint32_t len) { @@ -593,24 +677,30 @@ static void update_branch_imm(struct jit_state *state, assert((imm & 3) == 0); uint32_t insn; imm >>= 2; + rv_log_debug("JIT: Patching branch at offset=%u, imm=%d", offset, imm * 4); + /* Read instruction while in execute mode (MAP_JIT requirement) */ memcpy(&insn, state->buf + offset, sizeof(uint32_t)); if ((insn & 0xfe000000U) == 0x54000000U /* Conditional branch immediate. */ || (insn & 0x7e000000U) == 0x34000000U) { /* Compare and branch immediate. */ assert((imm >> 19) == INT64_C(-1) || (imm >> 19) == 0); + insn &= ~(0x7ffffU << 5); /* Clear old offset bits */ insn |= (imm & 0x7ffff) << 5; } else if ((insn & 0x7c000000U) == 0x14000000U) { /* Unconditional branch immediate. */ assert((imm >> 26) == INT64_C(-1) || (imm >> 26) == 0); + insn &= ~0x03ffffffU; /* Clear old offset bits */ insn |= (imm & 0x03ffffffU) << 0; } else { assert(false); insn = BAD_OPCODE; } #if defined(__APPLE__) && defined(__aarch64__) + /* Switch to write mode only for writing */ pthread_jit_write_protect_np(false); #endif memcpy(state->buf + offset, &insn, sizeof(uint32_t)); + sys_icache_invalidate(state->buf + offset, sizeof(uint32_t)); #if defined(__APPLE__) && defined(__aarch64__) pthread_jit_write_protect_np(true); #endif @@ -1883,6 +1973,7 @@ static inline int map_vm_reg(struct jit_state *state, int vm_reg_idx) save_reg(state, idx); unmap_vm_reg(idx); set_vm_reg(idx, vm_reg_idx); + jit_check_regmap_conflict(vm_reg_idx, target_reg, "map_vm_reg"); return target_reg; } @@ -1926,6 +2017,15 @@ static inline int map_vm_reg_reserved(struct jit_state *state, save_reg(state, idx); unmap_vm_reg(idx); set_vm_reg(idx, vm_reg_idx); + jit_check_regmap_conflict(vm_reg_idx, target_reg, "map_vm_reg_reserved"); + /* Additional check: ensure we didn't allocate the reserved register */ + if (target_reg == reserved_reg_idx) { + rv_log_error( + "JIT RegMap ERROR: map_vm_reg_reserved allocated reserved " + "register R%d for VM x%d", + reserved_reg_idx, vm_reg_idx); + assert(false); + } return target_reg; } @@ -2164,9 +2264,12 @@ void clear_hot(block_t *block) static void code_cache_flush(struct jit_state *state, riscv_t *rv) { + rv_log_info("JIT: Flushing code cache (n_blocks=%d, n_jumps=%d, offset=%u)", + state->n_blocks, state->n_jumps, state->offset); should_flush = false; state->offset = state->org_size; state->n_blocks = 0; + state->n_jumps = 0; /* Reset jump count when flushing */ set_reset(&state->set); clear_cache_hot(rv->block_cache, (clear_func_t) clear_hot); #if RV32_HAS(T2C) @@ -2196,6 +2299,7 @@ static void translate(struct jit_state *state, riscv_t *rv, block_t *block) static void resolve_jumps(struct jit_state *state) { + rv_log_debug("JIT: Resolving %d jumps", state->n_jumps); for (int i = 0; i < state->n_jumps; i++) { struct jump jump = state->jumps[i]; int target_loc; @@ -2218,6 +2322,10 @@ static void resolve_jumps(struct jit_state *state) (if (jump.target_satp == state->offset_map[i].satp), ) { target_loc = state->offset_map[i].offset; + rv_log_debug( + "JIT: Jump %d resolved to block pc=0x%08x, " + "offset=%d", + i, jump.target_pc, target_loc); break; } } @@ -2229,6 +2337,7 @@ static void resolve_jumps(struct jit_state *state) uint8_t *offset_ptr = &state->buf[jump.offset_loc]; memcpy(offset_ptr, &rel, sizeof(uint32_t)); + sys_icache_invalidate(offset_ptr, sizeof(uint32_t)); #elif defined(__aarch64__) int32_t rel = target_loc - jump.offset_loc; update_branch_imm(state, jump.offset_loc, rel); @@ -2308,23 +2417,35 @@ void jit_translate(riscv_t *rv, block_t *block) ) { block->offset = state->offset_map[i].offset; block->hot = true; + rv_log_debug("JIT: Cache hit for block pc=0x%08x, offset=%u", + block->pc_start, block->offset); return; } } assert(NULL); __UNREACHABLE; } + rv_log_debug("JIT: Starting translation for block pc=0x%08x", + block->pc_start); restart: memset(state->jumps, 0, MAX_JUMPS * sizeof(struct jump)); state->n_jumps = 0; block->offset = state->offset; translate_chained_block(state, rv, block); if (unlikely(should_flush)) { + /* Mark block as not translated since translation was incomplete */ + block->hot = false; + /* Don't reset offset - it will be set correctly on restart */ + rv_log_debug("JIT: Translation triggered flush for block pc=0x%08x", + block->pc_start); code_cache_flush(state, rv); goto restart; } resolve_jumps(state); block->hot = true; + rv_log_debug( + "JIT: Translation completed for block pc=0x%08x, offset=%u, size=%u", + block->pc_start, block->offset, state->offset - block->offset); } struct jit_state *jit_state_init(size_t size) @@ -2336,6 +2457,52 @@ struct jit_state *jit_state_init(size_t size) state->offset = 0; state->size = size; +#if defined(TSAN_ENABLED) + /* ThreadSanitizer compatibility: Allocate JIT code buffer at a fixed + * address above the main memory region to avoid conflicts. + */ +#if defined(__x86_64__) + /* x86_64 memory layout: + * Main memory: 0x7d0000000000 - 0x7d0100000000 (4GB for FULL4G) + * JIT buffer: 0x7d1000000000 + size + * + * This keeps both allocations in TSAN's app range (0x7cf000000000 - + * 0x7ffffffff000) and prevents overlap with main memory or TSAN shadow. + */ + void *jit_addr = (void *) 0x7d1000000000UL; + state->buf = mmap(jit_addr, size, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED +#if defined(__APPLE__) + | MAP_JIT +#endif + , + -1, 0); +#elif defined(__aarch64__) + /* ARM64 memory layout (macOS/Apple Silicon): + * Main memory: 0x150000000000 - 0x150100000000 (4GB for FULL4G) + * JIT buffer: 0x151000000000 + size + * + * Apple Silicon requires MAP_JIT for executable memory. The fixed + * address is chosen to avoid TSAN's shadow memory and typical process + * allocations. Requires ASLR disabled via: setarch $(uname -m) -R + */ + void *jit_addr = (void *) 0x151000000000UL; + state->buf = mmap(jit_addr, size, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED +#if defined(__APPLE__) + | MAP_JIT +#endif + , + -1, 0); +#else +#error "TSAN is only supported on x86_64 and aarch64" +#endif + if (state->buf == MAP_FAILED) { + free(state); + return NULL; + } +#else + /* Standard allocation without TSAN */ state->buf = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS #if defined(__APPLE__) @@ -2347,8 +2514,7 @@ struct jit_state *jit_state_init(size_t size) free(state); return NULL; } - assert(state->buf != MAP_FAILED); - +#endif state->n_blocks = 0; set_reset(&state->set); reset_reg(); diff --git a/src/main.c b/src/main.c index 4c851edd..cae03d10 100644 --- a/src/main.c +++ b/src/main.c @@ -19,6 +19,42 @@ #include "riscv.h" #include "utils.h" +/* ThreadSanitizer configuration for FULL4G compatibility + * + * We use MAP_FIXED to allocate emulated memory at 0x7d0000000000, which is + * within TSAN's application memory range (0x7cf000000000 - 0x7ffffffff000). + * This avoids conflicts with TSAN's shadow memory and allows race detection + * to work with FULL4G's 4GB address space. + * + * Configuration optimizes for race detection with minimal overhead. + */ +/* GCC uses __SANITIZE_THREAD__, clang uses __has_feature(thread_sanitizer) */ +#if defined(__SANITIZE_THREAD__) +const char *__tsan_default_options() +{ + return "halt_on_error=0" /* Continue after errors */ + ":report_bugs=1" /* Report data races */ + ":second_deadlock_stack=1" /* Full deadlock info */ + ":verbosity=0" /* Reduce noise */ + ":memory_limit_mb=0" /* No memory limit */ + ":history_size=7" /* Larger race detection window */ + ":io_sync=0"; /* Don't sync on I/O */ +} +#elif defined(__clang__) +#if __has_feature(thread_sanitizer) +const char *__tsan_default_options() +{ + return "halt_on_error=0" /* Continue after errors */ + ":report_bugs=1" /* Report data races */ + ":second_deadlock_stack=1" /* Full deadlock info */ + ":verbosity=0" /* Reduce noise */ + ":memory_limit_mb=0" /* No memory limit */ + ":history_size=7" /* Larger race detection window */ + ":io_sync=0"; /* Don't sync on I/O */ +} +#endif +#endif + /* enable program trace mode */ #if !RV32_HAS(SYSTEM) || (RV32_HAS(SYSTEM) && RV32_HAS(ELF_LOADER)) static bool opt_trace = false; diff --git a/src/riscv.c b/src/riscv.c index b892cf27..e500aa67 100644 --- a/src/riscv.c +++ b/src/riscv.c @@ -206,19 +206,41 @@ static pthread_t t2c_thread; static void *t2c_runloop(void *arg) { riscv_t *rv = (riscv_t *) arg; + pthread_mutex_lock(&rv->wait_queue_lock); while (!rv->quit) { - if (!list_empty(&rv->wait_queue)) { - queue_entry_t *entry = - list_last_entry(&rv->wait_queue, queue_entry_t, list); - pthread_mutex_lock(&rv->wait_queue_lock); - list_del_init(&entry->list); - pthread_mutex_unlock(&rv->wait_queue_lock); - pthread_mutex_lock(&rv->cache_lock); - t2c_compile(rv, entry->block); - pthread_mutex_unlock(&rv->cache_lock); - free(entry); - } + /* Wait for work or quit signal */ + while (list_empty(&rv->wait_queue) && !rv->quit) + pthread_cond_wait(&rv->wait_queue_cond, &rv->wait_queue_lock); + + if (rv->quit) + break; + + /* Extract work item while holding the lock */ + queue_entry_t *entry = + list_last_entry(&rv->wait_queue, queue_entry_t, list); + list_del_init(&entry->list); + pthread_mutex_unlock(&rv->wait_queue_lock); + + /* Perform compilation with cache lock */ + pthread_mutex_lock(&rv->cache_lock); + /* Look up block from cache using the key (might have been evicted) */ + uint32_t pc = (uint32_t) entry->key; + block_t *block = (block_t *) cache_get(rv->block_cache, pc, false); +#if RV32_HAS(SYSTEM) + /* Verify SATP matches (for system mode) */ + uint32_t satp = (uint32_t) (entry->key >> 32); + if (block && block->satp != satp) + block = NULL; +#endif + /* Compile only if block still exists in cache */ + if (block) + t2c_compile(rv, block); + pthread_mutex_unlock(&rv->cache_lock); + free(entry); + + pthread_mutex_lock(&rv->wait_queue_lock); } + pthread_mutex_unlock(&rv->wait_queue_lock); return NULL; } #endif @@ -777,6 +799,7 @@ riscv_t *rv_create(riscv_user_t rv_attr) /* prepare wait queue. */ pthread_mutex_init(&rv->wait_queue_lock, NULL); pthread_mutex_init(&rv->cache_lock, NULL); + pthread_cond_init(&rv->wait_queue_cond, NULL); INIT_LIST_HEAD(&rv->wait_queue); /* activate the background compilation thread. */ pthread_create(&t2c_thread, NULL, t2c_runloop, rv); @@ -910,10 +933,24 @@ void rv_delete(riscv_t *rv) block_map_destroy(rv); #else #if RV32_HAS(T2C) + /* Signal the thread to quit */ + pthread_mutex_lock(&rv->wait_queue_lock); rv->quit = true; + pthread_cond_signal(&rv->wait_queue_cond); + pthread_mutex_unlock(&rv->wait_queue_lock); + pthread_join(t2c_thread, NULL); + + /* Clean up any remaining entries in wait queue */ + queue_entry_t *entry, *safe; + list_for_each_entry_safe (entry, safe, &rv->wait_queue, list) { + list_del(&entry->list); + free(entry); + } + pthread_mutex_destroy(&rv->wait_queue_lock); pthread_mutex_destroy(&rv->cache_lock); + pthread_cond_destroy(&rv->wait_queue_cond); jit_cache_exit(rv->jit_cache); #endif jit_state_exit(rv->jit_state); diff --git a/src/riscv_private.h b/src/riscv_private.h index 12a3bfd0..89165011 100644 --- a/src/riscv_private.h +++ b/src/riscv_private.h @@ -105,7 +105,7 @@ typedef struct block { #if RV32_HAS(JIT) && RV32_HAS(T2C) typedef struct { - block_t *block; + uint64_t key; /**< cache key (PC or PC|SATP) to look up block */ struct list_head list; } queue_entry_t; #endif @@ -197,6 +197,7 @@ struct riscv_internal { #if RV32_HAS(T2C) struct list_head wait_queue; pthread_mutex_t wait_queue_lock, cache_lock; + pthread_cond_t wait_queue_cond; volatile bool quit; /**< Determine the main thread is terminated or not */ #endif void *jit_state; diff --git a/src/rv32_jit.c b/src/rv32_jit.c index 8e084f62..6ea9c8e9 100644 --- a/src/rv32_jit.c +++ b/src/rv32_jit.c @@ -180,7 +180,7 @@ GEN(lb, { emit_cmp_imm32(state, temp_reg, 0); uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x84); - vm_reg[1] = map_vm_reg(state, ir->rd); + vm_reg[1] = map_vm_reg_reserved(state, ir->rd, vm_reg[0]); emit_load(state, S32, parameter_reg[0], vm_reg[1], offsetof(riscv_t, X) + 4 * ir->rd); @@ -232,7 +232,7 @@ GEN(lh, { emit_cmp_imm32(state, temp_reg, 0); uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x84); - vm_reg[1] = map_vm_reg(state, ir->rd); + vm_reg[1] = map_vm_reg_reserved(state, ir->rd, vm_reg[0]); emit_load(state, S32, parameter_reg[0], vm_reg[1], offsetof(riscv_t, X) + 4 * ir->rd); @@ -284,7 +284,7 @@ GEN(lw, { emit_cmp_imm32(state, temp_reg, 0); uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x84); - vm_reg[1] = map_vm_reg(state, ir->rd); + vm_reg[1] = map_vm_reg_reserved(state, ir->rd, vm_reg[0]); emit_load(state, S32, parameter_reg[0], vm_reg[1], offsetof(riscv_t, X) + 4 * ir->rd); @@ -336,7 +336,7 @@ GEN(lbu, { emit_cmp_imm32(state, temp_reg, 0); uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x84); - vm_reg[1] = map_vm_reg(state, ir->rd); + vm_reg[1] = map_vm_reg_reserved(state, ir->rd, vm_reg[0]); emit_load(state, S32, parameter_reg[0], vm_reg[1], offsetof(riscv_t, X) + 4 * ir->rd); @@ -388,7 +388,7 @@ GEN(lhu, { emit_cmp_imm32(state, temp_reg, 0); uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x84); - vm_reg[1] = map_vm_reg(state, ir->rd); + vm_reg[1] = map_vm_reg_reserved(state, ir->rd, vm_reg[0]); emit_load(state, S32, parameter_reg[0], vm_reg[1], offsetof(riscv_t, X) + 4 * ir->rd); diff --git a/src/t2c.c b/src/t2c.c index 343b85e6..2115adaf 100644 --- a/src/t2c.c +++ b/src/t2c.c @@ -346,7 +346,9 @@ void t2c_compile(riscv_t *rv, block_t *block) jit_cache_update(rv->jit_cache, key, block->func); - block->hot2 = true; + /* Use release semantics to ensure func write is visible before hot2 is set + */ + __atomic_store_n(&block->hot2, true, __ATOMIC_RELEASE); } struct jit_cache *jit_cache_init()