From 92738d6ac51e5462d542a55d214319d04cfd87b8 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 18 Apr 2026 08:50:06 +0200 Subject: [PATCH 1/8] JIT xtensa: assembler Signed-off-by: Paul Guyot --- libs/jit/src/CMakeLists.txt | 1 + libs/jit/src/jit_xtensa_asm.erl | 503 ++++++++++++++++++++++++ tests/libs/jit/CMakeLists.txt | 1 + tests/libs/jit/jit_tests_common.erl | 20 +- tests/libs/jit/jit_xtensa_asm_tests.erl | 304 ++++++++++++++ tests/libs/jit/tests.erl | 3 +- 6 files changed, 828 insertions(+), 4 deletions(-) create mode 100644 libs/jit/src/jit_xtensa_asm.erl create mode 100644 tests/libs/jit/jit_xtensa_asm_tests.erl diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index bb57b0fe8d..2d222da7a2 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -46,6 +46,7 @@ set(ERLANG_MODULES jit_wasm32_asm jit_x86_64 jit_x86_64_asm + jit_xtensa_asm ) include(../../../version.cmake) diff --git a/libs/jit/src/jit_xtensa_asm.erl b/libs/jit/src/jit_xtensa_asm.erl new file mode 100644 index 0000000000..075a2e3840 --- /dev/null +++ b/libs/jit/src/jit_xtensa_asm.erl @@ -0,0 +1,503 @@ +% +% This file is part of AtomVM. +% +% Copyright 2026 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_xtensa_asm). + +-export([ + add/3, + sub/3, + and_/3, + or_/3, + xor_/3, + srl/3, + slli/3, + srli/3, + srai/3, + ssr/1, + mull/3, + quos/3, + rems/3, + addi/3, + addmi/3, + movi/2, + mov/2, + l32i/3, + s32i/3, + beq/3, + bne/3, + blt/3, + beqz/2, + bnez/2, + bltz/2, + beqi/3, + bnei/3, + blti/3, + j/1, + jx/1, + callx8/1, + retw/0, + entry/2, + l32r/2, + nop/0, + break/2, + neg/2 +]). + +-ifdef(JIT_DWARF). +-export([reg_to_num/1]). +-endif. + +-export_type([ + xtensa_register/0 +]). + +%% Xtensa Assembler for ESP32 (Xtensa LX6/LX7, windowed ABI) +%% This assembler includes ESP32 specific instructions such as quos and rems. +%% +%% Xtensa Register Set (16 registers with windowed ABI): +%% a0 - Return address +%% a1 - Stack pointer +%% a2 - Function argument 0 / Return value +%% a3 - Function argument 1 +%% a4 - Function argument 2 +%% a5 - Function argument 3 +%% a6 - Function argument 4 +%% a7 - Function argument 5 +%% a8 - Temporary (caller-saved) +%% a9 - Temporary (caller-saved) +%% a10 - Temporary (caller-saved) +%% a11 - Temporary (caller-saved) +%% a12 - Callee-saved +%% a13 - Callee-saved +%% a14 - Callee-saved +%% a15 - Callee-saved +%% +%% Instruction Encoding: +%% 24-bit (3 bytes) for standard instructions +%% 16-bit (2 bytes) for narrow/density instructions +%% Byte ordering: little-endian (byte0 = bits[7:0]) +%% +%% Instruction Formats (24-bit): +%% RRR: op2[23:20] | op1[19:16] | r[15:12] | s[11:8] | t[7:4] | op0[3:0] +%% RRI8: imm8[23:16]| r[15:12] | s[11:8] | t[7:4] | op0[3:0] +%% RI16: imm16[23:8] | t[7:4] | op0[3:0] +%% CALL: offset[23:6] | n[5:4] | op0[3:0] +%% BRI8: imm8[23:16]| r[15:12] | s[11:8] | t[7:4] | op0[3:0] +%% BRI12:imm12[23:12] | s[11:8] | t[7:4] | op0[3:0] +%% +%% See: Xtensa Instruction Set Architecture (ISA) Reference Manual + +-type xtensa_register() :: + a0 + | a1 + | a2 + | a3 + | a4 + | a5 + | a6 + | a7 + | a8 + | a9 + | a10 + | a11 + | a12 + | a13 + | a14 + | a15. + +-spec reg_to_num(xtensa_register()) -> 0..15. +reg_to_num(a0) -> 0; +reg_to_num(a1) -> 1; +reg_to_num(a2) -> 2; +reg_to_num(a3) -> 3; +reg_to_num(a4) -> 4; +reg_to_num(a5) -> 5; +reg_to_num(a6) -> 6; +reg_to_num(a7) -> 7; +reg_to_num(a8) -> 8; +reg_to_num(a9) -> 9; +reg_to_num(a10) -> 10; +reg_to_num(a11) -> 11; +reg_to_num(a12) -> 12; +reg_to_num(a13) -> 13; +reg_to_num(a14) -> 14; +reg_to_num(a15) -> 15. + +%%============================================================================= +%% 24-bit Instruction Format Encoders +%%============================================================================= + +%% RRR format: op1[23:20] | op2[19:16] | r[15:12] | s[11:8] | t[7:4] | op0[3:0] +-spec encode_rrr(integer(), integer(), integer(), integer(), integer(), integer()) -> binary(). +encode_rrr(Op0, T, S, R, Op1, Op2) -> + Instr = (Op1 bsl 20) bor (Op2 bsl 16) bor (R bsl 12) bor (S bsl 8) bor (T bsl 4) bor Op0, + <>. + +%% RRI8 format: imm8[23:16] | r[15:12] | s[11:8] | t[7:4] | op0[3:0] +-spec encode_rri8(integer(), integer(), integer(), integer(), integer()) -> binary(). +encode_rri8(Op0, T, S, R, Imm8) -> + Instr = ((Imm8 band 16#FF) bsl 16) bor (R bsl 12) bor (S bsl 8) bor (T bsl 4) bor Op0, + <>. + +%% RI16 format: imm16[23:8] | t[7:4] | op0[3:0] +-spec encode_ri16(integer(), integer(), integer()) -> binary(). +encode_ri16(Op0, T, Imm16) -> + Instr = ((Imm16 band 16#FFFF) bsl 8) bor (T bsl 4) bor Op0, + <>. + +%% CALL format: offset[23:6] | n[5:4] | op0[3:0] +-spec encode_call(integer(), integer(), integer()) -> binary(). +encode_call(Op0, N, Offset18) -> + Instr = ((Offset18 band 16#3FFFF) bsl 6) bor (N bsl 4) bor Op0, + <>. + +%% BRI8 format for conditional branches: imm8[23:16] | r[15:12] | s[11:8] | t[7:4] | op0[3:0] +-spec encode_bri8(integer(), integer(), integer(), integer(), integer()) -> binary(). +encode_bri8(Op0, T, S, R, Imm8) -> + Instr = ((Imm8 band 16#FF) bsl 16) bor (R bsl 12) bor (S bsl 8) bor (T bsl 4) bor Op0, + <>. + +%% BRI12 format for BEQZ/BNEZ/BGEZ/BLTZ: +%% imm12[23:12] | s[11:8] | t[7:4] | op0[3:0] +-spec encode_bri12(integer(), integer(), integer(), integer()) -> binary(). +encode_bri12(Op0, T, S, Imm12) -> + Instr = ((Imm12 band 16#FFF) bsl 12) bor (S bsl 8) bor (T bsl 4) bor Op0, + <>. + +%%============================================================================= +%% 16-bit Narrow Instruction Format Encoders +%%============================================================================= + +%% RRRN format: r[15:12] | s[11:8] | t[7:4] | op0[3:0] +-spec encode_rrrn(integer(), integer(), integer(), integer()) -> binary(). +encode_rrrn(Op0, T, S, R) -> + Instr = (R bsl 12) bor (S bsl 8) bor (T bsl 4) bor Op0, + <>. + +%%============================================================================= +%% RRR-type Arithmetic Instructions +%%============================================================================= + +%% ADD: AR[r] = AR[s] + AR[t] +%% op0=0, op1=8, op2=0 +-spec add(xtensa_register(), xtensa_register(), xtensa_register()) -> binary(). +add(Ar, As, At) -> + encode_rrr(0, reg_to_num(At), reg_to_num(As), reg_to_num(Ar), 16#8, 0). + +%% SUB: AR[r] = AR[s] - AR[t] +%% op0=0, op1=12, op2=0 +-spec sub(xtensa_register(), xtensa_register(), xtensa_register()) -> binary(). +sub(Ar, As, At) -> + encode_rrr(0, reg_to_num(At), reg_to_num(As), reg_to_num(Ar), 16#C, 0). + +%% AND: AR[r] = AR[s] & AR[t] +%% op0=0, op1=1, op2=0 +-spec and_(xtensa_register(), xtensa_register(), xtensa_register()) -> binary(). +and_(Ar, As, At) -> + encode_rrr(0, reg_to_num(At), reg_to_num(As), reg_to_num(Ar), 16#1, 0). + +%% OR: AR[r] = AR[s] | AR[t] +%% op0=0, op1=2, op2=0 +-spec or_(xtensa_register(), xtensa_register(), xtensa_register()) -> binary(). +or_(Ar, As, At) -> + encode_rrr(0, reg_to_num(At), reg_to_num(As), reg_to_num(Ar), 16#2, 0). + +%% XOR: AR[r] = AR[s] ^ AR[t] +%% op0=0, op1=3, op2=0 +-spec xor_(xtensa_register(), xtensa_register(), xtensa_register()) -> binary(). +xor_(Ar, As, At) -> + encode_rrr(0, reg_to_num(At), reg_to_num(As), reg_to_num(Ar), 16#3, 0). + +%% NEG: AR[r] = 0 - AR[t] +%% op0=0, t=At, s=0, r=Ar, bits[23:20]=6, bits[19:16]=0 +-spec neg(xtensa_register(), xtensa_register()) -> binary(). +neg(Ar, At) -> + encode_rrr(0, reg_to_num(At), 0, reg_to_num(Ar), 16#6, 0). + +%%============================================================================= +%% Shift Instructions +%%============================================================================= + +%% SRL: AR[r] = AR[t] >> SAR (logical) +%% Must set SAR with SSR first. +%% op0=0, bits[23:20]=0x9, bits[19:16]=1, s=0 +-spec srl(xtensa_register(), xtensa_register(), xtensa_register()) -> binary(). +srl(Ar, _As, At) -> + encode_rrr(0, reg_to_num(At), 0, reg_to_num(Ar), 16#9, 16#1). + +%% SSR: Set SAR for right shift. SAR = AR[s][4:0] +%% op0=0, op1=4, op2=0, r=0, t=0 +-spec ssr(xtensa_register()) -> binary(). +ssr(As) -> + encode_rrr(0, 0, reg_to_num(As), 0, 16#4, 0). + +%% SLLI: AR[r] = AR[s] << sa (1..31) +%% op0=0, RRR format with shift amount encoded as (32 - sa). +%% The encoded value split: sa_enc[4] at bits[23:20], op=1 at bits[19:16], +%% r at bits[15:12], s at bits[11:8], sa_enc[3:0] at bits[7:4]. +-spec slli(xtensa_register(), xtensa_register(), 1..31) -> binary(). +slli(Ar, As, Sa) when Sa >= 1, Sa =< 31 -> + SaEnc = 32 - Sa, + Sa4 = (SaEnc bsr 4) band 1, + Sa30 = SaEnc band 16#F, + Instr = + (Sa4 bsl 20) bor (16#1 bsl 16) bor (reg_to_num(Ar) bsl 12) bor + (reg_to_num(As) bsl 8) bor (Sa30 bsl 4) bor 0, + <>. + +%% SRLI: AR[r] = AR[t] >> sa (logical, immediate) +%% op0=0, t=At, s=sa[3:0], r=Ar, bits[23:20]=4, bits[19:16]=1 +-spec srli(xtensa_register(), xtensa_register(), 0..15) -> binary(). +srli(Ar, At, Sa) when Sa >= 0, Sa =< 15 -> + encode_rrr(0, reg_to_num(At), Sa band 16#F, reg_to_num(Ar), 16#4, 1). + +%% SRAI: AR[r] = AR[t] >> sa (arithmetic, immediate) +%% op0=0, bits[23:20] = 2 + sa[4], bits[19:16] = 1, r=Ar, s=sa[3:0], t=At +-spec srai(xtensa_register(), xtensa_register(), 0..31) -> binary(). +srai(Ar, At, Sa) when Sa >= 0, Sa =< 31 -> + Sa4 = (Sa bsr 4) band 1, + Sa30 = Sa band 16#F, + Instr = + ((2 + Sa4) bsl 20) bor (16#1 bsl 16) bor (reg_to_num(Ar) bsl 12) bor + (Sa30 bsl 8) bor (reg_to_num(At) bsl 4) bor 0, + <>. + +%%============================================================================= +%% Multiply/Divide Instructions +%%============================================================================= + +%% MULL: AR[r] = AR[s] * AR[t] (low 32 bits) +%% op0=0, op1=8, op2=2 +-spec mull(xtensa_register(), xtensa_register(), xtensa_register()) -> binary(). +mull(Ar, As, At) -> + encode_rrr(0, reg_to_num(At), reg_to_num(As), reg_to_num(Ar), 16#8, 16#2). + +%% QUOS: AR[r] = AR[s] / AR[t] (signed) +%% op0=0, op1=13, op2=2 +-spec quos(xtensa_register(), xtensa_register(), xtensa_register()) -> binary(). +quos(Ar, As, At) -> + encode_rrr(0, reg_to_num(At), reg_to_num(As), reg_to_num(Ar), 16#D, 16#2). + +%% REMS: AR[r] = AR[s] % AR[t] (signed) +%% op0=0, op1=15, op2=2 +-spec rems(xtensa_register(), xtensa_register(), xtensa_register()) -> binary(). +rems(Ar, As, At) -> + encode_rrr(0, reg_to_num(At), reg_to_num(As), reg_to_num(Ar), 16#F, 16#2). + +%%============================================================================= +%% Immediate Arithmetic +%%============================================================================= + +%% ADDI: AR[t] = AR[s] + sign_extend(imm8) +%% op0=2, r=0xC +-spec addi(xtensa_register(), xtensa_register(), -128..127) -> binary(). +addi(At, As, Imm8) when Imm8 >= -128, Imm8 =< 127 -> + encode_rri8(16#2, reg_to_num(At), reg_to_num(As), 16#C, Imm8). + +%% ADDMI: AR[t] = AR[s] + sign_extend(imm8 << 8) +%% op0=2, r=0xD +%% Imm must be a multiple of 256 in range -32768..32512 +-spec addmi(xtensa_register(), xtensa_register(), integer()) -> binary(). +addmi(At, As, Imm) when Imm rem 256 =:= 0, Imm >= -32768, Imm =< 32512 -> + Imm8 = (Imm bsr 8) band 16#FF, + encode_rri8(16#2, reg_to_num(At), reg_to_num(As), 16#D, Imm8); +addmi(_At, _As, Imm) -> + error({addmi_value_out_of_range, Imm}). + +%%============================================================================= +%% Move Instructions +%%============================================================================= + +%% MOVI: AR[t] = sign_extend_12(imm) +%% op0=2, r=0xA, imm12 split: bits[11:8] in s field, bits[7:0] in imm8 field +-spec movi(xtensa_register(), -2048..2047) -> binary(). +movi(At, Imm) when Imm >= -2048, Imm =< 2047 -> + Imm12 = Imm band 16#FFF, + Imm8 = Imm12 band 16#FF, + Imm118 = (Imm12 bsr 8) band 16#F, + encode_rri8(16#2, reg_to_num(At), Imm118, 16#A, Imm8). + +%% MOV.N: AR[t] = AR[s] (narrow, 2 bytes) +-spec mov(xtensa_register(), xtensa_register()) -> binary(). +mov(Dst, Src) -> + encode_rrrn(16#D, reg_to_num(Dst), reg_to_num(Src), 0). + +%%============================================================================= +%% Load Instructions +%%============================================================================= + +%% L32I: AR[t] = mem[AR[s] + imm8*4] +%% op0=2, r=2 +-spec l32i(xtensa_register(), xtensa_register(), non_neg_integer()) -> binary(). +l32i(At, As, Offset) when Offset >= 0, Offset =< 1020, (Offset rem 4) =:= 0 -> + encode_rri8(16#2, reg_to_num(At), reg_to_num(As), 16#2, Offset div 4). + +%%============================================================================= +%% Store Instructions +%%============================================================================= + +%% S32I: mem[AR[s] + imm8*4] = AR[t] +%% op0=2, r=6 +-spec s32i(xtensa_register(), xtensa_register(), non_neg_integer()) -> binary(). +s32i(At, As, Offset) when Offset >= 0, Offset =< 1020, (Offset rem 4) =:= 0 -> + encode_rri8(16#2, reg_to_num(At), reg_to_num(As), 16#6, Offset div 4). + +%%============================================================================= +%% Branch Instructions +%%============================================================================= + +%% BEQ: if AR[s] == AR[t] then PC += sign_extend(imm8) +%% op0=7, r=1 +-spec beq(xtensa_register(), xtensa_register(), integer()) -> binary(). +beq(As, At, Offset) when Offset >= -128, Offset =< 127 -> + encode_bri8(16#7, reg_to_num(At), reg_to_num(As), 16#1, Offset). + +%% BNE: if AR[s] != AR[t] then PC += sign_extend(imm8) +%% op0=7, r=9 +-spec bne(xtensa_register(), xtensa_register(), integer()) -> binary(). +bne(As, At, Offset) when Offset >= -128, Offset =< 127 -> + encode_bri8(16#7, reg_to_num(At), reg_to_num(As), 16#9, Offset). + +%% BLT: if AR[s] < AR[t] (signed) then PC += sign_extend(imm8) +%% op0=7, r=2 +-spec blt(xtensa_register(), xtensa_register(), integer()) -> binary(). +blt(As, At, Offset) when Offset >= -128, Offset =< 127 -> + encode_bri8(16#7, reg_to_num(At), reg_to_num(As), 16#2, Offset). + +%% BEQZ: if AR[s] == 0 then PC += sign_extend(imm12) +%% Encoding: imm12[11:0] in bits[23:12], s[11:8], t=0001b[7:4], op0=0110b[3:0] +-spec beqz(xtensa_register(), integer()) -> binary(). +beqz(As, Offset) when Offset >= -2048, Offset =< 2047 -> + encode_bri12(16#6, 16#1, reg_to_num(As), Offset band 16#FFF). + +%% BNEZ: if AR[s] != 0 then PC += sign_extend(imm12) +%% op0=6, t=5 (BNZ type) +-spec bnez(xtensa_register(), integer()) -> binary(). +bnez(As, Offset) when Offset >= -2048, Offset =< 2047 -> + encode_bri12(16#6, 16#5, reg_to_num(As), Offset band 16#FFF). + +%% BLTZ: if AR[s] < 0 then PC += sign_extend(imm12) +%% op0=6, t=9 +-spec bltz(xtensa_register(), integer()) -> binary(). +bltz(As, Offset) when Offset >= -2048, Offset =< 2047 -> + encode_bri12(16#6, 16#9, reg_to_num(As), Offset band 16#FFF). + +%% BEQI: if AR[s] == b4const(r) then PC += sign_extend(imm8) +%% op0=6, r encodes constant via b4const table, t=2 +-spec beqi(xtensa_register(), integer(), integer()) -> binary(). +beqi(As, Imm, Offset) when Offset >= -128, Offset =< 127 -> + R = b4const_encode(Imm), + encode_bri8(16#6, 16#2, reg_to_num(As), R, Offset). + +%% BNEI: if AR[s] != b4const(r) then PC += sign_extend(imm8) +%% op0=6, t=6 (0x6) +-spec bnei(xtensa_register(), integer(), integer()) -> binary(). +bnei(As, Imm, Offset) when Offset >= -128, Offset =< 127 -> + R = b4const_encode(Imm), + encode_bri8(16#6, 16#6, reg_to_num(As), R, Offset). + +%% BLTI: if AR[s] < b4const(r) then PC += sign_extend(imm8) +%% op0=6, t=0xA +-spec blti(xtensa_register(), integer(), integer()) -> binary(). +blti(As, Imm, Offset) when Offset >= -128, Offset =< 127 -> + R = b4const_encode(Imm), + encode_bri8(16#6, 16#A, reg_to_num(As), R, Offset). + +%%============================================================================= +%% B4CONST encoding table +%%============================================================================= + +%% B4CONST table maps r field to constants for BEQI/BNEI/BLTI +b4const_encode(-1) -> 0; +b4const_encode(1) -> 1; +b4const_encode(2) -> 2; +b4const_encode(3) -> 3; +b4const_encode(4) -> 4; +b4const_encode(5) -> 5; +b4const_encode(6) -> 6; +b4const_encode(7) -> 7; +b4const_encode(8) -> 8; +b4const_encode(10) -> 9; +b4const_encode(12) -> 10; +b4const_encode(16) -> 11; +b4const_encode(32) -> 12; +b4const_encode(64) -> 13; +b4const_encode(128) -> 14; +b4const_encode(256) -> 15. + +%%============================================================================= +%% Jump Instructions +%%============================================================================= + +%% J: PC = PC + sign_extend(offset18) + 4 +%% CALL format: op0=6, n=0 +%% offset18 is an 18-bit signed value: range -131072..131071 +-spec j(integer()) -> binary(). +j(Offset) when Offset >= -131072, Offset =< 131071 -> + encode_call(16#6, 0, Offset band 16#3FFFF); +j(Offset) -> + error({j_offset_out_of_range, Offset}). + +%% JX: PC = AR[s] +%% op0=0, t=0xa, s=register, r=0, op1=0, op2=0 +-spec jx(xtensa_register()) -> binary(). +jx(As) -> + encode_rrr(0, 16#A, reg_to_num(As), 0, 0, 0). + +%% CALLX8: indirect windowed call through register with 8-register rotation +%% op0=0, t=0xE, s=register, r=0, op1=0, op2=0 +-spec callx8(xtensa_register()) -> binary(). +callx8(As) -> + encode_rrr(0, 16#E, reg_to_num(As), 0, 0, 0). + +%% RETW: return from windowed subroutine. Rotates window back and PC = a0[29:0] +%% op0=0, t=0x9, s=0, r=0, op1=0, op2=0 +-spec retw() -> binary(). +retw() -> + encode_rrr(0, 16#9, 0, 0, 0, 0). + +%% ENTRY: allocate stack frame and rotate register window +%% BRI12 format: op0=6, t=3, s=register, imm12=framesize/8 +-spec entry(xtensa_register(), non_neg_integer()) -> binary(). +entry(As, FrameSize) when (FrameSize rem 8) =:= 0, FrameSize >= 0, FrameSize =< 32760 -> + Imm12 = FrameSize bsr 3, + encode_bri12(16#6, 16#3, reg_to_num(As), Imm12 band 16#FFF). + +%%============================================================================= +%% L32R and Misc Instructions +%%============================================================================= + +%% L32R: AR[t] = mem[((PC + 3) & ~3) + sign_extend(imm16) * 4 - ...] +%% op0=1 +-spec l32r(xtensa_register(), integer()) -> binary(). +l32r(At, Imm16) -> + encode_ri16(16#1, reg_to_num(At), Imm16). + +%% NOP +%% op0=0, t=15, s=0, r=2, op1=0, op2=0 +-spec nop() -> binary(). +nop() -> + encode_rrr(0, 15, 0, 2, 0, 0). + +%% BREAK: breakpoint/debug trap +%% op0=0, op1=0, op2=0, r=4 +-spec break(integer(), integer()) -> binary(). +break(S, T) -> + encode_rrr(0, T band 16#F, S band 16#F, 16#4, 0, 0). diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 1446eebe2e..6e03cae7db 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -44,6 +44,7 @@ set(ERLANG_MODULES jit_wasm32_asm_tests jit_x86_64_tests jit_x86_64_asm_tests + jit_xtensa_asm_tests ) if (NOT AVM_DISABLE_JIT_DWARF) diff --git a/tests/libs/jit/jit_tests_common.erl b/tests/libs/jit/jit_tests_common.erl index 70f63377e5..21783251be 100644 --- a/tests/libs/jit/jit_tests_common.erl +++ b/tests/libs/jit/jit_tests_common.erl @@ -148,6 +148,14 @@ find_binutils_beam(Arch) -> -spec toolchain_prefixes(atom()) -> [string()]. toolchain_prefixes(arm32) -> toolchain_prefixes(arm); +toolchain_prefixes(xtensa) -> + %% Prefer the ESP32-specific LE toolchain over the generic BE one. + %% The generic xtensa-esp-elf toolchain defaults to big-endian ELF, + %% whose objdump displays raw bytes rather than instruction words, + %% breaking hex_to_bin/3's little-endian conversion. + ["xtensa-esp32-elf", "xtensa-esp32s2-elf", "xtensa-esp32s3-elf"] ++ + ["xtensa" ++ V || V <- ["-unknown-elf", "-elf", "-linux-gnu"]] ++ + ["xtensa-lx6-linux-gnu"]; toolchain_prefixes(Arch) -> ArchStr = atom_to_list(Arch), Variants = [ @@ -249,7 +257,9 @@ get_asm_header(riscv64) -> ".text\n"; get_asm_header(wasm32) -> %% Include a memory so that memory instruction tests don't need extra module context. - "(module\n (memory 1)\n (func\n". + "(module\n (memory 1)\n (func\n"; +get_asm_header(xtensa) -> + ".text\n". -spec get_asm_footer(atom()) -> string(). get_asm_footer(wasm32) -> @@ -272,7 +282,9 @@ get_as_flags(x86_64) -> get_as_flags(riscv32) -> "-march=rv32imac"; get_as_flags(riscv64) -> - "-march=rv64imac -mabi=lp64". + "-march=rv64imac -mabi=lp64"; +get_as_flags(xtensa) -> + "--no-transform". %% File extensions for assembler input/output -spec asm_file_exts(atom(), string()) -> {string(), string()}. @@ -551,7 +563,9 @@ get_objdump_flags(arm_thumb2) -> get_objdump_flags(riscv64) -> "-m riscv:rv64"; get_objdump_flags(wasm32) -> - "". + ""; +get_objdump_flags(xtensa) -> + "-m xtensa -EL". %% Write binary data to a file suitable for disassembly. %% For wasm32, wraps raw bytes in a WASM module; for others, writes raw bytes. diff --git a/tests/libs/jit/jit_xtensa_asm_tests.erl b/tests/libs/jit/jit_xtensa_asm_tests.erl new file mode 100644 index 0000000000..4b4c151e95 --- /dev/null +++ b/tests/libs/jit/jit_xtensa_asm_tests.erl @@ -0,0 +1,304 @@ +% +% This file is part of AtomVM. +% +% Copyright 2026 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_xtensa_asm_tests). + +-include_lib("eunit/include/eunit.hrl"). + +-define(_assertAsmEqual(Bin, Str, Value), + ?_assertEqual(jit_tests_common:asm(xtensa, Bin, Str), Value) +). + +add_test_() -> + [ + ?_assertAsmEqual(<<16#803450:24/little>>, "add a3, a4, a5", jit_xtensa_asm:add(a3, a4, a5)), + ?_assertAsmEqual(<<16#800000:24/little>>, "add a0, a0, a0", jit_xtensa_asm:add(a0, a0, a0)), + ?_assertAsmEqual( + <<16#80FFE0:24/little>>, "add a15, a15, a14", jit_xtensa_asm:add(a15, a15, a14) + ) + ]. + +sub_test_() -> + [ + ?_assertAsmEqual(<<16#C03450:24/little>>, "sub a3, a4, a5", jit_xtensa_asm:sub(a3, a4, a5)), + ?_assertAsmEqual( + <<16#C0BAC0:24/little>>, "sub a11, a10, a12", jit_xtensa_asm:sub(a11, a10, a12) + ) + ]. + +and_test_() -> + [ + ?_assertAsmEqual(<<16#103450:24/little>>, "and a3, a4, a5", jit_xtensa_asm:and_(a3, a4, a5)) + ]. + +or_test_() -> + [ + ?_assertAsmEqual(<<16#203450:24/little>>, "or a3, a4, a5", jit_xtensa_asm:or_(a3, a4, a5)) + ]. + +xor_test_() -> + [ + ?_assertAsmEqual(<<16#303450:24/little>>, "xor a3, a4, a5", jit_xtensa_asm:xor_(a3, a4, a5)) + ]. + +slli_test_() -> + [ + ?_assertAsmEqual(<<16#113480:24/little>>, "slli a3, a4, 8", jit_xtensa_asm:slli(a3, a4, 8)), + ?_assertAsmEqual( + <<16#113400:24/little>>, "slli a3, a4, 16", jit_xtensa_asm:slli(a3, a4, 16) + ), + ?_assertAsmEqual( + <<16#013110:24/little>>, "slli a3, a1, 31", jit_xtensa_asm:slli(a3, a1, 31) + ) + ]. + +srli_test_() -> + [ + ?_assertAsmEqual(<<16#413840:24/little>>, "srli a3, a4, 8", jit_xtensa_asm:srli(a3, a4, 8)), + ?_assertAsmEqual(<<16#410040:24/little>>, "srli a0, a4, 0", jit_xtensa_asm:srli(a0, a4, 0)) + ]. + +srai_test_() -> + [ + ?_assertAsmEqual( + <<16#313440:24/little>>, "srai a3, a4, 20", jit_xtensa_asm:srai(a3, a4, 20) + ), + ?_assertAsmEqual(<<16#213840:24/little>>, "srai a3, a4, 8", jit_xtensa_asm:srai(a3, a4, 8)), + ?_assertAsmEqual( + <<16#313F40:24/little>>, "srai a3, a4, 31", jit_xtensa_asm:srai(a3, a4, 31) + ) + ]. + +ssr_test_() -> + [ + ?_assertAsmEqual(<<16#400300:24/little>>, "ssr a3", jit_xtensa_asm:ssr(a3)) + ]. + +srl_test_() -> + [ + ?_assertAsmEqual(<<16#913050:24/little>>, "srl a3, a5", jit_xtensa_asm:srl(a3, a4, a5)), + ?_assertAsmEqual(<<16#910000:24/little>>, "srl a0, a0", jit_xtensa_asm:srl(a0, a0, a0)) + ]. + +mull_test_() -> + [ + ?_assertAsmEqual( + <<16#823450:24/little>>, "mull a3, a4, a5", jit_xtensa_asm:mull(a3, a4, a5) + ) + ]. + +quos_test_() -> + [ + ?_assertAsmEqual( + <<16#D23450:24/little>>, "quos a3, a4, a5", jit_xtensa_asm:quos(a3, a4, a5) + ) + ]. + +rems_test_() -> + [ + ?_assertAsmEqual( + <<16#F23450:24/little>>, "rems a3, a4, a5", jit_xtensa_asm:rems(a3, a4, a5) + ) + ]. + +addi_test_() -> + [ + ?_assertAsmEqual( + <<16#0AC432:24/little>>, "addi a3, a4, 10", jit_xtensa_asm:addi(a3, a4, 10) + ), + ?_assertAsmEqual( + <<16#FFC432:24/little>>, "addi a3, a4, -1", jit_xtensa_asm:addi(a3, a4, -1) + ), + ?_assertAsmEqual( + <<16#80C432:24/little>>, "addi a3, a4, -128", jit_xtensa_asm:addi(a3, a4, -128) + ), + ?_assertAsmEqual( + <<16#7FC432:24/little>>, "addi a3, a4, 127", jit_xtensa_asm:addi(a3, a4, 127) + ), + ?_assertAsmEqual(<<16#00C002:24/little>>, "addi a0, a0, 0", jit_xtensa_asm:addi(a0, a0, 0)) + ]. + +addmi_test_() -> + [ + ?_assertAsmEqual( + <<16#01D432:24/little>>, "addmi a3, a4, 256", jit_xtensa_asm:addmi(a3, a4, 256) + ), + ?_assertAsmEqual( + <<16#FFD432:24/little>>, "addmi a3, a4, -256", jit_xtensa_asm:addmi(a3, a4, -256) + ) + ]. + +movi_test_() -> + [ + ?_assertAsmEqual(<<16#00A032:24/little>>, "movi a3, 0", jit_xtensa_asm:movi(a3, 0)), + ?_assertAsmEqual(<<16#64A032:24/little>>, "movi a3, 100", jit_xtensa_asm:movi(a3, 100)), + ?_assertAsmEqual(<<16#FFAF32:24/little>>, "movi a3, -1", jit_xtensa_asm:movi(a3, -1)), + ?_assertAsmEqual(<<16#FFA732:24/little>>, "movi a3, 2047", jit_xtensa_asm:movi(a3, 2047)), + ?_assertAsmEqual(<<16#00A832:24/little>>, "movi a3, -2048", jit_xtensa_asm:movi(a3, -2048)) + ]. + +l32i_test_() -> + [ + ?_assertAsmEqual(<<16#002432:24/little>>, "l32i a3, a4, 0", jit_xtensa_asm:l32i(a3, a4, 0)), + ?_assertAsmEqual(<<16#022432:24/little>>, "l32i a3, a4, 8", jit_xtensa_asm:l32i(a3, a4, 8)), + ?_assertAsmEqual( + <<16#FF2432:24/little>>, "l32i a3, a4, 1020", jit_xtensa_asm:l32i(a3, a4, 1020) + ) + ]. + +s32i_test_() -> + [ + ?_assertAsmEqual(<<16#006432:24/little>>, "s32i a3, a4, 0", jit_xtensa_asm:s32i(a3, a4, 0)), + ?_assertAsmEqual(<<16#026432:24/little>>, "s32i a3, a4, 8", jit_xtensa_asm:s32i(a3, a4, 8)), + ?_assertAsmEqual( + <<16#FF6432:24/little>>, "s32i a3, a4, 1020", jit_xtensa_asm:s32i(a3, a4, 1020) + ) + ]. + +beq_test_() -> + [ + ?_assertAsmEqual( + <<16#081347:24/little>>, "beq a3, a4, . + 12", jit_xtensa_asm:beq(a3, a4, 8) + ), + ?_assertAsmEqual( + <<16#001347:24/little>>, "beq a3, a4, . + 4", jit_xtensa_asm:beq(a3, a4, 0) + ), + ?_assertAsmEqual( + <<16#FC1347:24/little>>, "beq a3, a4, . + 0", jit_xtensa_asm:beq(a3, a4, -4) + ) + ]. + +bne_test_() -> + [ + ?_assertAsmEqual( + <<16#009347:24/little>>, "bne a3, a4, . + 4", jit_xtensa_asm:bne(a3, a4, 0) + ), + ?_assertAsmEqual( + <<16#089347:24/little>>, "bne a3, a4, . + 12", jit_xtensa_asm:bne(a3, a4, 8) + ) + ]. + +blt_test_() -> + [ + ?_assertAsmEqual( + <<16#002347:24/little>>, "blt a3, a4, . + 4", jit_xtensa_asm:blt(a3, a4, 0) + ) + ]. + +beqi_test_() -> + [ + ?_assertAsmEqual( + <<16#001326:24/little>>, "beqi a3, 1, . + 4", jit_xtensa_asm:beqi(a3, 1, 0) + ), + ?_assertAsmEqual( + <<16#000326:24/little>>, "beqi a3, -1, . + 4", jit_xtensa_asm:beqi(a3, -1, 0) + ) + ]. + +bnei_test_() -> + [ + ?_assertAsmEqual( + <<16#001366:24/little>>, "bnei a3, 1, . + 4", jit_xtensa_asm:bnei(a3, 1, 0) + ) + ]. + +blti_test_() -> + [ + ?_assertAsmEqual( + <<16#0013A6:24/little>>, "blti a3, 1, . + 4", jit_xtensa_asm:blti(a3, 1, 0) + ) + ]. + +beqz_test_() -> + [ + ?_assertAsmEqual(<<16#000316:24/little>>, "beqz a3, . + 4", jit_xtensa_asm:beqz(a3, 0)), + ?_assertAsmEqual(<<16#07F316:24/little>>, "beqz a3, . + 131", jit_xtensa_asm:beqz(a3, 127)) + ]. + +bnez_test_() -> + [ + ?_assertAsmEqual(<<16#000356:24/little>>, "bnez a3, . + 4", jit_xtensa_asm:bnez(a3, 0)), + ?_assertAsmEqual(<<16#001356:24/little>>, "bnez a3, . + 5", jit_xtensa_asm:bnez(a3, 1)) + ]. + +bltz_test_() -> + [ + ?_assertAsmEqual(<<16#000396:24/little>>, "bltz a3, . + 4", jit_xtensa_asm:bltz(a3, 0)) + ]. + +j_test_() -> + [ + ?_assertAsmEqual(<<16#000006:24/little>>, "j . + 4", jit_xtensa_asm:j(0)), + ?_assertAsmEqual(<<16#001906:24/little>>, "j . + 104", jit_xtensa_asm:j(100)) + ]. + +jx_test_() -> + [ + ?_assertAsmEqual(<<16#0003A0:24/little>>, "jx a3", jit_xtensa_asm:jx(a3)), + ?_assertAsmEqual(<<16#0000A0:24/little>>, "jx a0", jit_xtensa_asm:jx(a0)) + ]. + +retw_test_() -> + [ + ?_assertAsmEqual(<<16#000090:24/little>>, "retw", jit_xtensa_asm:retw()) + ]. + +callx8_test_() -> + [ + ?_assertAsmEqual(<<16#0008E0:24/little>>, "callx8 a8", jit_xtensa_asm:callx8(a8)), + ?_assertAsmEqual(<<16#0003E0:24/little>>, "callx8 a3", jit_xtensa_asm:callx8(a3)) + ]. + +entry_test_() -> + [ + ?_assertAsmEqual(<<16#004136:24/little>>, "entry a1, 32", jit_xtensa_asm:entry(a1, 32)), + ?_assertAsmEqual(<<16#006136:24/little>>, "entry a1, 48", jit_xtensa_asm:entry(a1, 48)) + ]. + +mov_n_test_() -> + [ + ?_assertAsmEqual(<<16#043D:16/little>>, "mov.n a3, a4", jit_xtensa_asm:mov(a3, a4)), + ?_assertAsmEqual(<<16#000D:16/little>>, "mov.n a0, a0", jit_xtensa_asm:mov(a0, a0)) + ]. + +nop_test_() -> + [ + ?_assertAsmEqual(<<16#0020f0:24/little>>, "nop", jit_xtensa_asm:nop()) + ]. + +break_test_() -> + [ + ?_assertAsmEqual(<<16#004000:24/little>>, "break 0, 0", jit_xtensa_asm:break(0, 0)), + ?_assertAsmEqual(<<16#004120:24/little>>, "break 1, 2", jit_xtensa_asm:break(1, 2)) + ]. + +neg_test_() -> + [ + ?_assertAsmEqual(<<16#603050:24/little>>, "neg a3, a5", jit_xtensa_asm:neg(a3, a5)), + ?_assertAsmEqual(<<16#600000:24/little>>, "neg a0, a0", jit_xtensa_asm:neg(a0, a0)) + ]. + +l32r_test_() -> + [ + ?_assertAsmEqual(<<16#000031:24/little>>, "l32r a3, 0", jit_xtensa_asm:l32r(a3, 0)), + ?_assertAsmEqual( + <<16#FFFF31:24/little>>, ".byte 0x31, 0xff, 0xff", jit_xtensa_asm:l32r(a3, -1) + ) + ]. diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl index 37d8c0e178..ba8d2d36fc 100644 --- a/tests/libs/jit/tests.erl +++ b/tests/libs/jit/tests.erl @@ -42,7 +42,8 @@ start() -> jit_riscv64_tests, jit_riscv64_asm_tests, jit_x86_64_tests, - jit_x86_64_asm_tests + jit_x86_64_asm_tests, + jit_xtensa_asm_tests ]), case Result of ok -> ok; From 908b4baeb2a9f2cf3c0c31abc35a5736ec7284c7 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 18 Apr 2026 08:54:19 +0200 Subject: [PATCH 2/8] JIT xtensa: backend Signed-off-by: Paul Guyot --- CHANGELOG.md | 1 + CMakeLists.txt | 4 +- doc/src/jit.md | 3 +- libs/jit/include/jit.hrl | 1 + libs/jit/src/CMakeLists.txt | 1 + libs/jit/src/jit_dwarf.erl | 3 +- libs/jit/src/jit_dwarf.hrl | 3 + libs/jit/src/jit_precompile.erl | 1 + libs/jit/src/jit_xtensa.erl | 4345 +++++++++++++++++++++++++++ src/libAtomVM/defaultatoms.def | 1 + src/libAtomVM/jit.c | 28 +- src/libAtomVM/jit.h | 127 +- src/libAtomVM/module.c | 24 +- src/libAtomVM/nifs.c | 2 + src/libAtomVM/opcodesswitch.h | 3 + src/platforms/esp32/CMakeLists.txt | 6 +- tests/libs/jit/CMakeLists.txt | 1 + tests/libs/jit/jit_xtensa_tests.erl | 2551 ++++++++++++++++ tests/libs/jit/tests.erl | 1 + 19 files changed, 7038 insertions(+), 68 deletions(-) create mode 100644 libs/jit/src/jit_xtensa.erl create mode 100644 tests/libs/jit/jit_xtensa_tests.erl diff --git a/CHANGELOG.md b/CHANGELOG.md index a0b55f2c2c..0cc31578ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `"USB_SERIAL_JTAG"` peripheral to the ESP32 `uart` module on chips with a built-in USB-Serial-JTAG controller (C3/C5/C6/C61/H2/H21/H4/P4/S3) - Added support for the `safe` option in `erlang:binary_to_term/2` +- Added xtensa JIT backend for esp32 platform ### Changed - Updated network type db() to dbm() to reflect the actual representation of the type diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d9a669ac3..91eaec8bc7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,12 +61,14 @@ if (NOT AVM_DISABLE_JIT AND NOT DEFINED AVM_JIT_TARGET_ARCH) set(AVM_JIT_TARGET_ARCH "arm32") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^riscv64$") set(AVM_JIT_TARGET_ARCH "riscv64") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^xtensa") + set(AVM_JIT_TARGET_ARCH "xtensa") else() message(FATAL_ERROR "JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}") endif() endif() -set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;arm32;armv6m;armv6m+float32;armv6m+thumb2;riscv32;riscv64;wasm32" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") +set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;arm32;armv6m;armv6m+float32;armv6m+thumb2;riscv32;riscv64;wasm32;xtensa" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") # DWARF is not supported on wasm32 if (NOT AVM_DISABLE_JIT_DWARF) diff --git a/doc/src/jit.md b/doc/src/jit.md index 9ea7af11a3..5c9cd53fe4 100644 --- a/doc/src/jit.md +++ b/doc/src/jit.md @@ -28,6 +28,7 @@ The JIT compiler supports the following target architectures: | `riscv32` | 32-bit RISC-V (ESP32Cx, ESP32Hx, ESP32P4) | | `riscv64` | 64-bit RISC-V (Linux) | | `wasm32` | WebAssembly (nodeJS, browsers) | +| `xtensa` | ESP32, ESP32Sx | ### Requirements @@ -252,4 +253,4 @@ $ riscv64-elf-objdump -d module.elf |--------|---------|-------------| | `AVM_DISABLE_JIT` | `ON` | Disable JIT compilation | | `AVM_DISABLE_JIT_DWARF` | `ON` | Disable DWARF debug information in JIT | -| `AVM_JIT_TARGET_ARCH` | auto-detected | Target architecture (`x86_64`, `aarch64`, `arm32`, `armv6m`, `armv6m+thumb2`, `riscv32`, `riscv64`) | +| `AVM_JIT_TARGET_ARCH` | auto-detected | Target architecture (`x86_64`, `aarch64`, `arm32`, `armv6m`, `armv6m+thumb2`, `riscv32`, `riscv64`, `wasm32`, `xtensa`) | diff --git a/libs/jit/include/jit.hrl b/libs/jit/include/jit.hrl index 639df097ac..9ac6e80e1d 100644 --- a/libs/jit/include/jit.hrl +++ b/libs/jit/include/jit.hrl @@ -31,6 +31,7 @@ -define(JIT_ARCH_RISCV64, 5). -define(JIT_ARCH_ARM32, 6). -define(JIT_ARCH_WASM32, 7). +-define(JIT_ARCH_XTENSA, 8). -define(JIT_VARIANT_PIC, 1). -define(JIT_VARIANT_FLOAT32, 2). diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index 2d222da7a2..68d0a6bad2 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -46,6 +46,7 @@ set(ERLANG_MODULES jit_wasm32_asm jit_x86_64 jit_x86_64_asm + jit_xtensa jit_xtensa_asm ) diff --git a/libs/jit/src/jit_dwarf.erl b/libs/jit/src/jit_dwarf.erl index c8d9b105d7..16882be2ce 100644 --- a/libs/jit/src/jit_dwarf.erl +++ b/libs/jit/src/jit_dwarf.erl @@ -436,7 +436,8 @@ backend_to_machine_type(jit_aarch64) -> ?EM_AARCH64; backend_to_machine_type(jit_armv6m) -> ?EM_ARM; backend_to_machine_type(jit_arm32) -> ?EM_ARM; backend_to_machine_type(jit_riscv32) -> ?EM_RISCV; -backend_to_machine_type(jit_riscv64) -> ?EM_RISCV. +backend_to_machine_type(jit_riscv64) -> ?EM_RISCV; +backend_to_machine_type(jit_xtensa) -> ?EM_XTENSA. backend_to_elf_flags(jit_armv6m) -> ?EF_ARM_EABI_VER5 bor ?EF_ARM_ABI_FLOAT_SOFT bor ?EF_ARM_ARCH_V6M; diff --git a/libs/jit/src/jit_dwarf.hrl b/libs/jit/src/jit_dwarf.hrl index ee355e22c7..c3a6384e50 100644 --- a/libs/jit/src/jit_dwarf.hrl +++ b/libs/jit/src/jit_dwarf.hrl @@ -85,6 +85,7 @@ -define(EM_X86_64, 62). -define(EM_AARCH64, 183). -define(EM_RISCV, 243). +-define(EM_XTENSA, 94). -define(SHT_PROGBITS, 1). -define(SHT_SYMTAB, 2). -define(SHT_STRTAB, 3). @@ -109,3 +110,5 @@ % r0 register in ARM -define(DWARF_R0_REG_ARMV6M, 0). -define(DWARF_R0_REG_ARM32, 0). +% a2 register in Xtensa (ctx is passed in a2) +-define(DWARF_A2_REG_XTENSA, 2). diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl index 681bdabfe9..679c8ddbbe 100644 --- a/libs/jit/src/jit_precompile.erl +++ b/libs/jit/src/jit_precompile.erl @@ -140,6 +140,7 @@ compile(Target, Dir, Dwarf, Path) -> "riscv64" -> ?JIT_ARCH_RISCV64; "arm32" -> ?JIT_ARCH_ARM32; "wasm32" -> ?JIT_ARCH_WASM32; + "xtensa" -> ?JIT_ARCH_XTENSA; _ -> error({unsupported_target, Target}) end, diff --git a/libs/jit/src/jit_xtensa.erl b/libs/jit/src/jit_xtensa.erl new file mode 100644 index 0000000000..b72f2d6004 --- /dev/null +++ b/libs/jit/src/jit_xtensa.erl @@ -0,0 +1,4345 @@ +% +% This file is part of AtomVM. +% +% Copyright 2026 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_xtensa). + +-export([ + word_size/0, + new/3, + stream/1, + offset/1, + flush/1, + debugger/1, + used_regs/1, + available_regs/1, + free_native_registers/2, + assert_all_native_free/1, + jump_table/2, + update_branches/1, + call_primitive/3, + call_primitive_last/3, + call_primitive_with_cp/3, + return_if_not_equal_to_ctx/2, + jump_to_label/2, + jump_to_continuation/2, + jump_to_offset/2, + cond_jump_to_label/3, + if_block/3, + if_else_block/4, + shift_right/3, + shift_left/3, + move_to_vm_register/3, + move_to_native_register/2, + move_to_native_register/3, + move_to_cp/2, + move_array_element/4, + move_to_array_element/4, + move_to_array_element/5, + set_bs/2, + copy_to_native_register/2, + get_array_element/3, + increment_sp/2, + set_continuation_to_label/2, + set_continuation_to_offset/1, + continuation_entry_point/1, + get_module_index/1, + and_/3, + or_/3, + add/3, + sub/3, + mul/3, + decrement_reductions_and_maybe_schedule_next/1, + call_or_schedule_next/2, + call_only_or_schedule_next/2, + call_func_ptr/3, + return_labels_and_lines/2, + add_label/2, + add_label/3, + get_regs_tracking/1, + xor_/3, + shift_right_arith/3, + div_reg/3, + rem_reg/3 +]). + +-ifdef(JIT_DWARF). +-export([ + dwarf_opcode/2, + dwarf_label/2, + dwarf_function/3, + dwarf_line/2, + dwarf_variables/2, + dwarf_ctx_register/0 +]). +-endif. + +%% Exported for tests (see tests/libs/jit/jit_xtensa_tests.erl) +-export([ + mov_immediate/2, + mov_immediate_large/2 +]). + +-compile([warnings_as_errors]). + +-include_lib("jit.hrl"). + +-include("primitives.hrl"). +-include("term.hrl"). + +-ifdef(JIT_DWARF). +-include("jit_dwarf.hrl"). +-endif. + +-define(ASSERT(Expr), true = Expr). + +%% Xtensa (ESP32 LX6/LX7) windowed ABI: +%% C calls JIT via CALL8. JIT entry points start with ENTRY a1, 32. +%% JIT returns to C with RETW (not RET). +%% JIT calls C primitives via CALLX8 with args in a10-a15 +%% (callee sees them as a2-a7). +%% Return value from C comes back in a10 (callee's a2 mapped back). +%% CALLX8 preserves caller's a0-a7 automatically (register window). +%% a8-a15 are clobbered by CALLX8. +%% a0 contains encoded return address - MUST NOT be modified (needed for RETW). +%% +%% The assembler specifically targets ESP32 with instructions such as quos and +%% rems. +%% +%% Registers used by the JIT backend (Xtensa windowed): +%% - Context: a2 (ctx pointer, first parameter from C via CALL8) +%% - JITState: a3 (second parameter from C) +%% - Native interface: a4 (third parameter from C) +%% - Return address: a0 (encoded, must not be touched - needed for RETW) +%% - Stack pointer: a1 +%% - Available for JIT scratch: a5-a15 (11 registers) +%% a5-a7: preserved across CALLX8 (in caller's window) +%% a8-a15: clobbered by CALLX8 +%% - A8_REG: a8 (special scratch, not in SCRATCH_REGS) + +-type xtensa_register() :: + a0 + | a1 + | a2 + | a3 + | a4 + | a5 + | a6 + | a7 + | a8 + | a9 + | a10 + | a11 + | a12 + | a13 + | a14 + | a15. + +-define(IS_GPR(Reg), + (Reg =:= a0 orelse Reg =:= a1 orelse Reg =:= a2 orelse Reg =:= a3 orelse Reg =:= a4 orelse + Reg =:= a5 orelse Reg =:= a6 orelse Reg =:= a7 orelse Reg =:= a8 orelse Reg =:= a9 orelse + Reg =:= a10 orelse Reg =:= a11 orelse Reg =:= a12 orelse Reg =:= a13 orelse + Reg =:= a14 orelse Reg =:= a15) +). + +-type stream() :: any(). + +-record(state, { + stream_module :: module(), + stream :: stream(), + offset :: non_neg_integer(), + branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + jump_table_start :: non_neg_integer(), + available_regs :: non_neg_integer(), + used_regs :: non_neg_integer(), + labels :: [{integer() | reference(), integer()}], + variant :: non_neg_integer(), + regs :: jit_regs:regs() +}). + +-type state() :: #state{}. +-type immediate() :: non_neg_integer(). +-type vm_register() :: + {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, xtensa_register()}. +-type value() :: immediate() | vm_register() | xtensa_register() | {ptr, xtensa_register()}. +-type arg() :: ctx | jit_state | offset | value() | {free, value()} | {avm_int64_t, integer()}. + +-type maybe_free_xtensa_register() :: + {free, xtensa_register()} | xtensa_register(). + +-type condition() :: + {xtensa_register(), '<', integer()} + | {maybe_free_xtensa_register(), '<', xtensa_register()} + | {integer(), '<', maybe_free_xtensa_register()} + | {maybe_free_xtensa_register(), '==', integer()} + | {maybe_free_xtensa_register(), '!=', xtensa_register() | integer()} + | {'(int)', maybe_free_xtensa_register(), '==', integer()} + | {'(int)', maybe_free_xtensa_register(), '!=', xtensa_register() | integer()} + | {'(bool)', maybe_free_xtensa_register(), '==', false} + | {'(bool)', maybe_free_xtensa_register(), '!=', false} + | {maybe_free_xtensa_register(), '&', non_neg_integer(), '!=', integer()} + | {{free, xtensa_register()}, '==', {free, xtensa_register()}}. + +% Context offsets (32-bit architecture) +% ctx->e is 0x14 +% ctx->x is 0x18 +-define(CTX_REG, a2). +-define(NATIVE_INTERFACE_REG, a4). +-define(Y_REGS, {?CTX_REG, 16#14}). +-define(X_REG(N), {?CTX_REG, 16#18 + (N * 4)}). +-define(CP, {?CTX_REG, 16#5C}). +-define(FP_REGS, {?CTX_REG, 16#60}). +-define(BS, {?CTX_REG, 16#64}). +-define(BS_OFFSET, {?CTX_REG, 16#68}). +-define(JITSTATE_REG, a3). +-define(JITSTATE_MODULE_OFFSET, 0). +-define(JITSTATE_CONTINUATION_OFFSET, 16#4). +-define(JITSTATE_REDUCTIONCOUNT_OFFSET, 16#8). +-define(JITSTATE_CODE_BASE_OFFSET, 16#C). + +%% Each jump table entry: literal(4) + ENTRY(3) + L32R(3) + L32I(3) + ADD(3) + JX(3) + pad(1) = 20 bytes +-define(JUMP_TABLE_ENTRY_SIZE, 20). +%% Offset from start of entry to the ENTRY instruction (skip the 4-byte literal) +-define(JUMP_TABLE_OFFSET, 4). +%% Size of code_relative_address_padded: l32i(3) + mov_immediate(padded to 15) + add(3) = 21 +-define(CODE_RELATIVE_ADDRESS_PADDED_SIZE, 21). +%% ENTRY frame size: 32 bytes for window save areas (base + extra) + +%% 16 bytes gap for alignment + +%% 48 bytes for local storage (push_registers at FRAME_LOCAL_OFFSET). +%% Total must be 16-byte aligned. +-define(ENTRY_FRAME_SIZE, 96). +%% Offset within the ENTRY frame where push_registers stores data. +%% This must be above the window save areas (32 bytes). +-define(FRAME_LOCAL_OFFSET, 48). + +%% Use a8 as temporary for some operations +-define(A8_REG, a8). + +-define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127). +-define(IS_B4CONST(X), + (X =:= -1 orelse X =:= 1 orelse X =:= 2 orelse X =:= 3 orelse X =:= 4 orelse + X =:= 5 orelse X =:= 6 orelse X =:= 7 orelse X =:= 8 orelse X =:= 10 orelse + X =:= 12 orelse X =:= 16 orelse X =:= 32 orelse X =:= 64 orelse X =:= 128 orelse + X =:= 256) +). +-define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000). +-define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255). +-define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000). +-define(IS_SIGNED_OR_UNSIGNED_INT32_T(X), + is_integer(X) andalso X >= -16#80000000 andalso X < 16#100000000 +). + +-define(AVAILABLE_REGS, [a15, a14, a13, a12, a11, a10, a9, a7, a6, a5]). +-define(PARAMETER_REGS, [a10, a11, a12, a13, a14, a15]). +-define(SCRATCH_REGS, [a15, a14, a13, a12, a11, a10, a9]). + +-define(REG_BIT_A0, (1 bsl 0)). +-define(REG_BIT_A1, (1 bsl 1)). +-define(REG_BIT_A2, (1 bsl 2)). +-define(REG_BIT_A3, (1 bsl 3)). +-define(REG_BIT_A4, (1 bsl 4)). +-define(REG_BIT_A5, (1 bsl 5)). +-define(REG_BIT_A6, (1 bsl 6)). +-define(REG_BIT_A7, (1 bsl 7)). +-define(REG_BIT_A8, (1 bsl 8)). +-define(REG_BIT_A9, (1 bsl 9)). +-define(REG_BIT_A10, (1 bsl 10)). +-define(REG_BIT_A11, (1 bsl 11)). +-define(REG_BIT_A12, (1 bsl 12)). +-define(REG_BIT_A13, (1 bsl 13)). +-define(REG_BIT_A14, (1 bsl 14)). +-define(REG_BIT_A15, (1 bsl 15)). + +-define(AVAILABLE_REGS_MASK, + (?REG_BIT_A15 bor ?REG_BIT_A14 bor ?REG_BIT_A13 bor ?REG_BIT_A12 bor + ?REG_BIT_A11 bor ?REG_BIT_A10 bor ?REG_BIT_A9 bor + ?REG_BIT_A7 bor ?REG_BIT_A6 bor ?REG_BIT_A5) +). +-define(SCRATCH_REGS_MASK, + (?REG_BIT_A15 bor ?REG_BIT_A14 bor ?REG_BIT_A13 bor ?REG_BIT_A12 bor + ?REG_BIT_A11 bor ?REG_BIT_A10 bor ?REG_BIT_A9) +). + +-include("jit_backend_dwarf_impl.hrl"). + +%%----------------------------------------------------------------------------- +%% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. +%% sizeof(uintptr_t) +%% +%% C code equivalent is: +%% #if UINTPTR_MAX == UINT32_MAX +%% #define TERM_BYTES 4 +%% #elif UINTPTR_MAX == UINT64_MAX +%% #define TERM_BYTES 8 +%% #else +%% #error "Term size must be either 32 bit or 64 bit." +%% #endif +%% +%% @end +%% @return Word size in bytes +%%----------------------------------------------------------------------------- +-spec word_size() -> 4 | 8. +word_size() -> 4. + +%%----------------------------------------------------------------------------- +%% @doc Create a new backend state for provided variant, module and stream. +%% @end +%% @param Variant JIT variant to use (currently ?JIT_VARIANT_PIC) +%% @param StreamModule module to stream instructions +%% @param Stream stream state +%% @return New backend state +%%----------------------------------------------------------------------------- +-spec new(any(), module(), stream()) -> state(). +new(Variant, StreamModule, Stream) -> + #state{ + stream_module = StreamModule, + stream = Stream, + branches = [], + jump_table_start = 0, + offset = StreamModule:offset(Stream), + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0, + labels = [], + variant = Variant, + regs = jit_regs:new() + }. + +%%----------------------------------------------------------------------------- +%% @doc Access the stream object. +%% @end +%% @param State current backend state +%% @return The stream object +%%----------------------------------------------------------------------------- +-spec stream(state()) -> stream(). +stream(#state{stream = Stream}) -> + Stream. + +%%----------------------------------------------------------------------------- +%% @doc Get the current offset in the stream +%% @end +%% @param State current backend state +%% @return The current offset +%%----------------------------------------------------------------------------- +-spec offset(state()) -> non_neg_integer(). +offset(#state{stream_module = StreamModule, stream = Stream}) -> + StreamModule:offset(Stream). + +%%----------------------------------------------------------------------------- +%% @doc Flush the stream. +%% @end +%% @param State current backend state +%% @return The new state +%%----------------------------------------------------------------------------- +-spec flush(state()) -> stream(). +flush(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:flush(Stream0), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a debugger of breakpoint instruction. This is used for debugging +%% and not in production. +%% @end +%% @param State current backend state +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec debugger(state()) -> state(). +debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:append(Stream0, jit_xtensa_asm:break(1, 15)), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently used native registers. This is used for +%% debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of used registers +%%----------------------------------------------------------------------------- +-spec used_regs(state()) -> [xtensa_register()]. +used_regs(#state{used_regs = Used}) -> mask_to_list(Used). + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently available native scratch registers. This +%% is used for debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of available registers +%%----------------------------------------------------------------------------- +-spec available_regs(state()) -> [xtensa_register()]. +available_regs(#state{available_regs = Available}) -> mask_to_list(Available). + +%%----------------------------------------------------------------------------- +%% @doc Free native registers. The passed list of registers can contain +%% registers, pointer to registers or other values that are ignored. +%% @end +%% @param State current backend state +%% @param Regs list of registers or other values +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec free_native_registers(state(), [value()]) -> state(). +free_native_registers(State, []) -> + State; +free_native_registers(State, [Reg | Rest]) -> + State1 = free_native_register(State, Reg), + free_native_registers(State1, Rest). + +-spec free_native_register(state(), value()) -> state(). +free_native_register( + #state{available_regs = Available0, used_regs = Used0} = State, + Reg +) when is_atom(Reg) -> + Bit = reg_bit(Reg), + State#state{ + available_regs = Available0 bor Bit, used_regs = Used0 band (bnot Bit) + }; +free_native_register(State, {ptr, Reg}) -> + free_native_register(State, Reg); +free_native_register(State, _Other) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Assert that all native scratch registers are available. This is used +%% for debugging and not in production. +%% @end +%% @param State current backend state +%% @return ok +%%----------------------------------------------------------------------------- +-spec assert_all_native_free(state()) -> ok. +assert_all_native_free(State) -> + 0 = State#state.used_regs, + ?AVAILABLE_REGS_MASK = State#state.available_regs, + ok. + +%%----------------------------------------------------------------------------- +%% @doc Emit the jump table at the beginning of the module. Branches will be +%% updated afterwards with update_branches/2. Emit branches for labels from +%% 0 (special entry for lines and labels information) to LabelsCount included +%% (special entry for OP_INT_CALL_END). +%% +%% On Xtensa, each jump table entry is 20 bytes: +%% ``` +%% .word label_offset ; 4-byte literal (code-relative offset) +%% entry a1, 96 ; 3 bytes (windowed ABI frame setup) +%% l32r a5, [literal above] ; 3 bytes (load label offset, imm16=-2) +%% l32i a8, a3, 0xC ; 3 bytes (load code_base from jit_state) +%% add a8, a8, a5 ; 3 bytes (compute absolute address) +%% jx a8 ; 3 bytes (indirect jump) +%% .byte 0xFF ; 1 byte padding (alignment) +%% ``` +%% +%% @end +%% @param State current backend state +%% @param LabelsCount number of labels in the module. +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec jump_table(state(), pos_integer()) -> state(). +jump_table(#state{stream_module = StreamModule, stream = Stream0} = State, LabelsCount) -> + JumpTableStart = StreamModule:offset(Stream0), + jump_table0(State#state{jump_table_start = JumpTableStart}, 0, LabelsCount). + +jump_table0(State, N, LabelsCount) when N > LabelsCount -> + State; +jump_table0( + #state{stream_module = StreamModule, stream = Stream0} = State, + N, + LabelsCount +) -> + %% The 4-byte literal will be patched in add_label with the code-relative target offset. + %% L32R imm16 = -2: at PC=entry_start+7, ((PC+3)&~3) = entry_start+8, + %% target = -2*4 + entry_start+8 = entry_start = literal position. + %% We use a5 for the offset (safe: a5 is unallocated at function entry from C). + %% a8 (A8_REG) holds code_base then the final target address. + %% a0 (return address) must NOT be clobbered, RETW needs it later. + LiteralPlaceholder = <<16#FF, 16#FF, 16#FF, 16#FF>>, + EntryInstr = jit_xtensa_asm:entry(a1, ?ENTRY_FRAME_SIZE), + LoadOffset = jit_xtensa_asm:l32r(a5, -2), + LoadCodeBase = jit_xtensa_asm:l32i(?A8_REG, ?JITSTATE_REG, ?JITSTATE_CODE_BASE_OFFSET), + AddInstr = jit_xtensa_asm:add(?A8_REG, ?A8_REG, a5), + JxInstr = jit_xtensa_asm:jx(?A8_REG), + Padding = <<16#FF>>, + JumpEntry = + <>, + 20 = byte_size(JumpEntry), + Stream1 = StreamModule:append(Stream0, JumpEntry), + jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). + +%%----------------------------------------------------------------------------- +%% @doc Patch a single branch in the stream +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param Offset offset of the branch to patch +%% @param Type type of the branch +%% @param LabelOffset target label offset +%% @return Updated stream +%%----------------------------------------------------------------------------- +-spec patch_branch(module(), stream(), non_neg_integer(), any(), non_neg_integer()) -> stream(). +patch_branch(StreamModule, Stream, Offset, Type, LabelOffset) -> + NewInstr = + case Type of + {adr, Reg, HeaderOffset} -> + % Generate code_relative_address padded to 21 bytes + % LabelOffset is a stream offset; subtract header to get code-relative + code_relative_address_padded(Reg, LabelOffset - HeaderOffset); + {far_branch, Temp, JumpTableStart} -> + % 24-byte placeholder: J+NOPs for near, indirect jump for far + Rel = LabelOffset - Offset - 4, + case Rel >= -131072 andalso Rel =< 131071 of + true -> + J = jit_xtensa_asm:j(Rel), + Nops = list_to_binary([jit_xtensa_asm:nop() || _ <- lists:seq(1, 7)]), + <>; + false -> + CodeRelativeTarget = LabelOffset - JumpTableStart, + I1 = jit_xtensa_asm:l32i( + ?A8_REG, ?JITSTATE_REG, ?JITSTATE_CODE_BASE_OFFSET + ), + I2 = mov_immediate(Temp, CodeRelativeTarget), + MovSize = byte_size(I2), + NopCount = (15 - MovSize) div 3, + Nops = list_to_binary([jit_xtensa_asm:nop() || _ <- lists:seq(1, NopCount)]), + PaddedMov = <>, + 15 = byte_size(PaddedMov), + I3 = jit_xtensa_asm:add(?A8_REG, ?A8_REG, Temp), + I4 = jit_xtensa_asm:jx(?A8_REG), + <> + end + end, + StreamModule:replace(Stream, Offset, NewInstr). + +%%----------------------------------------------------------------------------- +%% @doc Patch all branches targeting a specific label and return remaining branches +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param TargetLabel label to patch branches for +%% @param LabelOffset offset of the target label +%% @param Branches list of pending branches +%% @return {UpdatedStream, RemainingBranches} +%%----------------------------------------------------------------------------- +-spec patch_branches_for_label( + module(), + stream(), + integer(), + non_neg_integer(), + [{integer(), non_neg_integer(), any()}] +) -> {stream(), [{integer(), non_neg_integer(), any()}]}. +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []). + +patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) -> + {Stream, lists:reverse(Acc)}; +patch_branches_for_label( + StreamModule, + Stream0, + TargetLabel, + LabelOffset, + [{Label, Offset, Type} | Rest], + Acc +) when Label =:= TargetLabel -> + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), + patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc); +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]). + +%%----------------------------------------------------------------------------- +%% @doc Rewrite stream to update all branches for labels. +%% @end +%% @param State current backend state +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec update_branches(state()) -> state(). +update_branches(#state{branches = []} = State) -> + State; +update_branches( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = [{Label, Offset, Type} | BranchesT], + labels = Labels + } = State +) -> + % all branches Labels are in Labels + {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), + update_branches(State#state{stream = Stream1, branches = BranchesT}). + +%%----------------------------------------------------------------------------- +%% @doc Generate code to load a primitive function pointer into a register +%% @param Primitive index to the primitive to call +%% @param TargetReg register to load the function pointer into +%% @return Binary instruction sequence +%%----------------------------------------------------------------------------- +-spec load_primitive_ptr(non_neg_integer(), xtensa_register()) -> binary(). +load_primitive_ptr(Primitive, TargetReg) -> + case Primitive of + 0 -> + jit_xtensa_asm:l32i(TargetReg, ?NATIVE_INTERFACE_REG, 0); + N when N * 4 =< 1020 -> + % There are definitely less than 256 primitives + jit_xtensa_asm:l32i(TargetReg, ?NATIVE_INTERFACE_REG, N * 4) + end. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call (call with return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. It also saves scratch registers we need to preserve. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec call_primitive(state(), non_neg_integer(), [arg()]) -> {state(), xtensa_register()}. +call_primitive( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Available, + used_regs = Used + } = State, + Primitive, + Args +) when Available =/= 0 -> + TempReg = first_avail(Available), + TempBit = reg_bit(TempReg), + PrepCall = load_primitive_ptr(Primitive, TempReg), + Stream1 = StreamModule:append(Stream0, PrepCall), + StateCall = State#state{ + stream = Stream1, + available_regs = Available band (bnot TempBit), + used_regs = Used bor TempBit + }, + call_func_ptr(StateCall, {free, TempReg}, Args); +call_primitive( + #state{available_regs = 0} = State, + Primitive, + Args +) -> + call_func_ptr(State, {primitive, Primitive}, Args). + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump (call without return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. +%% +%% Unlike other backends, which tail-jump to the primitive so its own return +%% returns directly to C, on Xtensa we emit a plain CALLX8 + RETW. Primitives +%% begin with an ENTRY instruction and follow the windowed ABI, and there is +%% no standard way to tail-jump into a windowed callee: ENTRY requires a fresh +%% rotated window (PS.CALLINC set by a preceding CALL4/8/12), so a bare JX +%% would either fault or corrupt WindowStart. GCC and LLVM likewise fall back +%% to call+retw when tail-calling a windowed callee. +%% +%% The functional cost is one extra register window plus ENTRY_FRAME_SIZE +%% bytes of stack that live only for the duration of the primitive call; it +%% is unwound before we return to C, so the overhead does not accumulate +%% across BEAM tail-call chains +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +call_primitive_last( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + Primitive, + Args +) -> + %% Xtensa windowed ABI: CALLX8 to the primitive, move its return value + %% from a10 (our view of callee's a2) into our a2, then RETW to C. + ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), + ArgsRegs = args_regs(Args), + ArgsRegsMask = jit_regs:regs_to_mask(ArgsRegs, fun reg_bit/1), + ParamMask = jit_regs:regs_to_mask(ParamRegs, fun reg_bit/1), + ScratchMask = ?AVAILABLE_REGS_MASK band (bnot (ArgsRegsMask bor ParamMask)), + Temp = first_avail(ScratchMask), + TempBit = reg_bit(Temp), + AvailableRegs1 = ScratchMask band (bnot TempBit), + UsedMask = ?AVAILABLE_REGS_MASK band (bnot AvailableRegs1), + PrepCall = load_primitive_ptr(Primitive, Temp), + Stream1 = StreamModule:append(Stream0, PrepCall), + + State1 = State0#state{ + stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedMask + }, + + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), + case Args1 of + [FirstArg, jit_state | ArgsT] -> + ArgsForCall = [FirstArg, jit_state_tail_call | ArgsT], + ParameterRegs = parameter_regs(Args1), + State2 = set_registers_args(State1, ArgsForCall, ParameterRegs, 0), + Stream2 = State2#state.stream, + I_call = jit_xtensa_asm:callx8(Temp), + I_mv = jit_xtensa_asm:mov(a2, a10), + I_retw = jit_xtensa_asm:retw(), + Stream3 = StreamModule:append(Stream2, <>), + State2#state{ + stream = Stream3, + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0, + regs = jit_regs:unreachable(State2#state.regs) + } + end. + +%%----------------------------------------------------------------------------- +%% @doc Emit a return of a value if it's not equal to ctx. +%% This logic is used to break out to the scheduler, typically after signal +%% messages have been processed. +%% @end +%% @param State current backend state +%% @param Reg register to compare to (should be {free, Reg} as it's always freed) +%% @return Updated backend state +%%----------------------------------------------------------------------------- +return_if_not_equal_to_ctx( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + {free, Reg} +) -> + % Xtensa windowed ABI: return value in a2 (= CTX_REG) + % RETW returns to the caller's register window + I2 = + case Reg of + % Return value is already in a2 + a2 -> <<>>; + % Move to a2 (return register) + _ -> jit_xtensa_asm:mov(a2, Reg) + end, + I3 = jit_xtensa_asm:retw(), + %% Branch if equal (skip the return) + %% Offset accounts for beq instruction (3 bytes) plus I2 and I3 + I1 = jit_xtensa_asm:beq(Reg, ?CTX_REG, byte_size(I2) + byte_size(I3) - 1), + Stream1 = StreamModule:append(Stream0, <>), + RegBit = reg_bit(Reg), + State#state{ + stream = Stream1, + available_regs = AvailableRegs0 bor RegBit, + used_regs = UsedRegs0 band (bnot RegBit), + regs = State#state.regs + }. + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump to a label. The offset of the relocation is saved and will +%% be updated with `update_branches/2`. +%% @end +%% @param State current backend state +%% @param Label to jump to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_label( + #state{stream_module = StreamModule, stream = Stream0, labels = Labels} = State0, Label +) -> + LabelLookupResult = lists:keyfind(Label, 1, Labels), + Offset = StreamModule:offset(Stream0), + {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State1#state{stream = Stream1, regs = jit_regs:unreachable(State1#state.regs)}. + +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State#state{stream = Stream1, regs = jit_regs:unreachable(State#state.regs)}. + +cond_jump_to_label(State, Cond, Label) -> + if_block(State, Cond, fun(S) -> jump_to_label(S, Label) end). + +%%----------------------------------------------------------------------------- +%% @doc Jump to address in continuation pointer register +%% Calculate absolute address and jump to it. +%% @end +%% @param State current backend state +%% @param {free, OffsetReg} register containing the offset value +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_continuation( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Available, + offset = BaseOffset + } = State0, + {free, OffsetReg} +) -> + Temp = first_avail(Available), + % Calculate absolute address: native_code_base + target_offset + % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) + CurrentStreamOffset = StreamModule:offset(Stream0), + _NetOffset = BaseOffset - CurrentStreamOffset, + + I1 = jit_xtensa_asm:l32i(Temp, ?JITSTATE_REG, ?JITSTATE_CODE_BASE_OFFSET), + I2 = jit_xtensa_asm:add(Temp, Temp, OffsetReg), + %% Skip past the ENTRY instruction (3 bytes) at the continuation entry point. + %% All continuation targets have an ENTRY instruction (needed when C code + %% calls via CALL8 through jit_return). When jumping from within JIT code + %% we are already in a windowed frame, so we must skip the ENTRY. + I3 = jit_xtensa_asm:addi(Temp, Temp, 3), + I4 = jit_xtensa_asm:jx(Temp), + + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + % Free all registers since this is a tail jump + State0#state{ + stream = Stream1, + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0, + regs = jit_regs:unreachable(State0#state.regs) + }. + +branch_to_offset_code(State, Offset, TargetOffset) -> + %% J instruction has 18-bit signed offset range (+-131072 bytes) + Rel = TargetOffset - Offset - 4, + case Rel >= -131072 andalso Rel =< 131071 of + true -> + jit_xtensa_asm:j(Rel); + false -> + %% Far jump: use code_base + code-relative offset via indirect jump. + %% Need two scratch regs: A8_REG (a8) for code_base/target, and one + %% available register for the offset immediate. + #state{jump_table_start = JumpTableStart, available_regs = Avail} = State, + CodeRelativeTarget = TargetOffset - JumpTableStart, + Temp = first_avail(Avail), + I1 = jit_xtensa_asm:l32i(?A8_REG, ?JITSTATE_REG, ?JITSTATE_CODE_BASE_OFFSET), + I2 = mov_immediate(Temp, CodeRelativeTarget), + I3 = jit_xtensa_asm:add(?A8_REG, ?A8_REG, Temp), + I4 = jit_xtensa_asm:jx(?A8_REG), + <> + end. + +branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> + CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), + {State, CodeBlock}; +branch_to_label_code( + #state{branches = Branches, available_regs = Avail, jump_table_start = JTS} = State0, + Offset, + Label, + false +) -> + %% Reserve 24 bytes for forward branch placeholder. + %% Near targets will use J + NOPs, far targets use indirect jump. + %% Pick a temp register now and encode it in the relocation. + Temp = first_avail(Avail), + CodeBlock = list_to_binary(lists:duplicate(24, 16#FF)), + Reloc = {Label, Offset, {far_branch, Temp, JTS}}, + State1 = State0#state{branches = [Reloc | Branches]}, + {State1, CodeBlock}. + +%%----------------------------------------------------------------------------- +%% @doc Emit an if block, i.e. emit a test of a condition and conditionnally +%% execute a block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockFn function to emit the block that may be executed +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_block(state(), condition() | {'and', [condition()]}, fun((state()) -> state())) -> state(). +if_block( + #state{stream_module = StreamModule} = State0, + {'and', CondList}, + BlockFn +) -> + {Replacements, State1} = lists:foldl( + fun(Cond, {AccReplacements, AccState}) -> + Offset = StreamModule:offset(AccState#state.stream), + {NewAccState, JumpDelta} = if_block_cond(AccState, Cond), + {[{Offset + JumpDelta} | AccReplacements], NewAccState} + end, + {[], State0}, + CondList + ), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + Stream3 = lists:foldl( + fun({JumpOffset}, AccStream) -> + JumpRel = OffsetAfter - JumpOffset - 4, + NewJumpInstr = jit_xtensa_asm:j(JumpRel), + StreamModule:replace(AccStream, JumpOffset, NewJumpInstr) + end, + Stream2, + Replacements + ), + State3 = merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs), + MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), + State3#state{regs = MergedRegs}; +if_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, JumpDelta} = if_block_cond(State0, Cond), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + %% Patch the J instruction in the trampoline to jump to the end of the block + JumpOffset = Offset + JumpDelta, + JumpRel = OffsetAfter - JumpOffset - 4, + NewJumpInstr = jit_xtensa_asm:j(JumpRel), + Stream3 = StreamModule:replace(Stream2, JumpOffset, NewJumpInstr), + State3 = merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs), + MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), + State3#state{regs = MergedRegs}. + +%%----------------------------------------------------------------------------- +%% @doc Emit an if else block, i.e. emit a test of a condition and +%% conditionnally execute a block or another block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockTrueFn function to emit the block that is executed if condition is true +%% @param BlockFalseFn function to emit the block that is executed if condition is false +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_else_block(state(), condition(), fun((state()) -> state()), fun((state()) -> state())) -> + state(). +if_else_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockTrueFn, + BlockFalseFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, JumpDelta} = if_block_cond(State0, Cond), + JumpInstrOffset = Offset + JumpDelta, + State2 = BlockTrueFn(State1), + Stream2 = State2#state.stream, + %% Emit unconditional J to skip the else block (will be replaced, 3 bytes) + ElseJumpOffset = StreamModule:offset(Stream2), + %% Use all-1s placeholder for flash compatibility (can only flip 1->0) + ElseJumpInstr = <<16#FF, 16#FF, 16#FF>>, + Stream3 = StreamModule:append(Stream2, ElseJumpInstr), + %% Else block starts here. + OffsetAfter = StreamModule:offset(Stream3), + %% Patch the J in the trampoline to jump to the else block + JumpRel = OffsetAfter - JumpInstrOffset - 4, + NewJumpInstr = jit_xtensa_asm:j(JumpRel), + Stream4 = StreamModule:replace(Stream3, JumpInstrOffset, NewJumpInstr), + %% Build the else block + StateElse = State2#state{ + stream = Stream4, + used_regs = State1#state.used_regs, + available_regs = State1#state.available_regs + }, + State3 = BlockFalseFn(StateElse), + Stream5 = State3#state.stream, + OffsetFinal = StreamModule:offset(Stream5), + %% Patch the unconditional J to jump to the end (3 bytes) + FinalJumpOffset = OffsetFinal - ElseJumpOffset - 4, + NewElseJumpInstr = jit_xtensa_asm:j(FinalJumpOffset), + 3 = byte_size(NewElseJumpInstr), + Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), + State4 = merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs), + MergedRegs = jit_regs:merge(State2#state.regs, State3#state.regs), + State4#state{regs = MergedRegs}. + +-spec if_block_cond(state(), condition()) -> + {state(), non_neg_integer()}. +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa: bltz Reg, +2 (skip over J if less than 0) + J placeholder + I1 = jit_xtensa_asm:bltz(Reg, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream1 = StreamModule:append(Stream0, <>), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '<', Val} +) when ?IS_B4CONST(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa: blti Reg, Val, +2; J placeholder (skip J if less than Val) + I1 = jit_xtensa_asm:blti(Reg, Val, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream1 = StreamModule:append(Stream0, <>), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '<', Val} +) when is_integer(Val), Val >= 0, Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa: load Val, blt Reg, Temp, +2; J placeholder + Temp = + case State0#state.available_regs of + 0 -> ?A8_REG; + _ -> first_avail(State0#state.available_regs) + end, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + MovSize = StreamModule:offset(Stream1) - OffsetBefore, + I1 = jit_xtensa_asm:blt(Reg, Temp, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream2 = StreamModule:append(Stream1, <>), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + JumpDelta = MovSize + byte_size(I1), + {State3, JumpDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + {RegOrTuple, '<', Val} +) when is_integer(Val) -> + Temp = + case Available of + 0 -> ?A8_REG; + _ -> first_avail(Available) + end, + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa: blt Reg, Temp, +2; J placeholder + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + MovSize = StreamModule:offset(Stream1) - OffsetBefore, + I1 = jit_xtensa_asm:blt(Reg, Temp, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream2 = StreamModule:append(Stream1, <>), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + JumpDelta = MovSize + byte_size(I1), + {State3, JumpDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + {Val, '<', RegOrTuple} +) when is_integer(Val), Val >= 0, Val =< 255 -> + Temp = + case Available of + 0 -> ?A8_REG; + _ -> first_avail(Available) + end, + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa: blt Temp, Reg, +2; J placeholder + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + MovSize = StreamModule:offset(Stream1) - OffsetBefore, + I1 = jit_xtensa_asm:blt(Temp, Reg, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream2 = StreamModule:append(Stream1, <>), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + JumpDelta = MovSize + byte_size(I1), + {State3, JumpDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + {Val, '<', RegOrTuple} +) when is_integer(Val) -> + Temp = + case Available of + 0 -> ?A8_REG; + _ -> first_avail(Available) + end, + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa: blt Temp, Reg, +2; J placeholder + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + MovSize = StreamModule:offset(Stream1) - OffsetBefore, + I1 = jit_xtensa_asm:blt(Temp, Reg, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream2 = StreamModule:append(Stream1, <>), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + JumpDelta = MovSize + byte_size(I1), + {State3, JumpDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '<', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa: blt Reg, RegB, +2; J placeholder + I1 = jit_xtensa_asm:blt(Reg, RegB, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream1 = StreamModule:append(Stream0, <>), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa: beqz Reg, +2 (skip over J if equal to 0) + J placeholder + I1 = jit_xtensa_asm:beqz(Reg, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream1 = StreamModule:append(Stream0, <>), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '!=', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa: bnez Reg, +2 (skip over J if not equal to 0) + J placeholder + I1 = jit_xtensa_asm:bnez(Reg, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream1 = StreamModule:append(Stream0, <>), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '==', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa: beq Reg, RegB, +2; J placeholder + I1 = jit_xtensa_asm:beq(Reg, RegB, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream1 = StreamModule:append(Stream0, <>), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +%% Delegate (int) forms to regular forms since we only have 32-bit words +if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) -> + if_block_cond(State, {RegOrTuple, '==', 0}); +if_block_cond(State, {'(int)', RegOrTuple, '==', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '==', Val}); +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '!=', Val} +) when ?IS_B4CONST(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa: bnei Reg, Val, +2; J placeholder + I1 = jit_xtensa_asm:bnei(Reg, Val, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream1 = StreamModule:append(Stream0, <>), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Temp = + case Available of + 0 -> ?A8_REG; + _ -> first_avail(Available) + end, + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa: li Temp, Val; bne Reg, Temp, +2; J placeholder + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + MovSize = StreamModule:offset(Stream1) - OffsetBefore, + I1 = jit_xtensa_asm:bne(Reg, Temp, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream2 = StreamModule:append(Stream1, <>), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + JumpDelta = MovSize + byte_size(I1), + {State3, JumpDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '!=', Val} +) when ?IS_GPR(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa: bne Reg, Val, +2; J placeholder + I1 = jit_xtensa_asm:bne(Reg, Val, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream1 = StreamModule:append(Stream0, <>), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '!=', Val}); +%% b4const fast paths for ==, !=, < using beqi/bnei/bgei +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '==', Val} +) when ?IS_B4CONST(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa: beqi Reg, Val, +2; J placeholder + I1 = jit_xtensa_asm:beqi(Reg, Val, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream1 = StreamModule:append(Stream0, <>), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa: li Temp, Val; beq Reg, Temp, +2; J placeholder + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + MovSize = StreamModule:offset(Stream1) - OffsetBefore, + I1 = jit_xtensa_asm:beq(Reg, Temp, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream2 = StreamModule:append(Stream1, <>), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + JumpDelta = MovSize + byte_size(I1), + {State3, JumpDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {{free, RegA}, '==', {free, RegB}} +) -> + %% Xtensa: beq RegA, RegB, +2; J placeholder + I1 = jit_xtensa_asm:beq(RegA, RegB, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream1 = StreamModule:append(Stream0, <>), + State1 = State0#state{stream = Stream1}, + State2 = if_block_free_reg({free, RegA}, State1), + State3 = if_block_free_reg({free, RegB}, State2), + {State3, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + MovSize = StreamModule:offset(Stream1) - OffsetBefore, + %% Xtensa: beq Reg, Temp, +2; J placeholder + I1 = jit_xtensa_asm:beq(Reg, Temp, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream2 = StreamModule:append(Stream1, <>), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + JumpDelta = MovSize + byte_size(I1), + {State3, JumpDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + MovSize = StreamModule:offset(Stream1) - OffsetBefore, + %% Xtensa: bne Reg, Temp, +2; J placeholder + I1 = jit_xtensa_asm:bne(Reg, Temp, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream2 = StreamModule:append(Stream1, <>), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + JumpDelta = MovSize + byte_size(I1), + {State3, JumpDelta}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + {'(bool)', RegOrTuple, '==', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Condition: Reg == false (== 0). Block executes when condition is met. + %% Branch skips J when condition IS met (Reg == 0) to enter the block. + %% When condition is NOT met (Reg != 0), fall through to J which skips block. + I1 = jit_xtensa_asm:beqz(Reg, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream1 = StreamModule:append(Stream0, <>), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + {'(bool)', RegOrTuple, '!=', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Condition: Reg != false (Reg != 0). Block executes when condition is met. + %% Branch skips J when condition IS met (Reg != 0) to enter the block. + %% When condition is NOT met (Reg == 0), fall through to J which skips block. + I1 = jit_xtensa_asm:bnez(Reg, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream1 = StreamModule:append(Stream0, <>), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + regs = Regs0 + } = State0, + {RegOrTuple, '&', Val, '!=', 0} +) -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Xtensa has no andi instruction - always use li+and_ + TestCode0 = mov_immediate(Temp, Val), + TestCode1 = jit_xtensa_asm:and_(Temp, Reg, Temp), + TestCode = <>, + OffsetBefore = StreamModule:offset(Stream0), + Stream1 = StreamModule:append(Stream0, TestCode), + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% Xtensa: bnez Temp, +2; J placeholder (skip J if nonzero = IS != 0) + I_beqz = jit_xtensa_asm:bnez(Temp, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream2 = StreamModule:append(Stream1, <>), + Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + State1 = if_block_free_reg(RegOrTuple, State0#state{regs = Regs1}), + State2 = State1#state{stream = Stream2}, + BranchDelta2 = BranchDelta + byte_size(I_beqz), + {State2, BranchDelta2}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + regs = Regs0 + } = State0, + {Reg, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + %% NOT(Reg) via movi -1 + xor, then slli to isolate low bits. + I1 = <<(jit_xtensa_asm:movi(Temp, -1))/binary, (jit_xtensa_asm:xor_(Temp, Temp, Reg))/binary>>, + I2 = jit_xtensa_asm:slli(Temp, Temp, 28), + Stream1 = StreamModule:append(Stream0, <>), + I3 = jit_xtensa_asm:bnez(Temp, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream2 = StreamModule:append(Stream1, <>), + Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + State1 = State0#state{stream = Stream2, regs = Regs1}, + {State1, byte_size(I1) + byte_size(I2) + byte_size(I3)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + regs = Regs0 + } = State0, + {{free, Reg} = RegTuple, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + %% NOT(Reg) in-place via neg + addi -1, then slli to isolate low bits. + I1 = <<(jit_xtensa_asm:neg(Reg, Reg))/binary, (jit_xtensa_asm:addi(Reg, Reg, -1))/binary>>, + I2 = jit_xtensa_asm:slli(Reg, Reg, 28), + Stream1 = StreamModule:append(Stream0, <>), + I3 = jit_xtensa_asm:bnez(Reg, 2), + JPlaceholder = <<16#FF, 16#FF, 16#FF>>, + Stream2 = StreamModule:append(Stream1, <>), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + State1 = State0#state{stream = Stream2, regs = Regs1}, + State2 = if_block_free_reg(RegTuple, State1), + {State2, byte_size(I1) + byte_size(I2) + byte_size(I3)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail + } = State0, + {Reg, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg), Val =/= 0 -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + AT = Avail band (bnot reg_bit(Temp)), + OffsetBefore = StreamModule:offset(Stream0), + I1 = jit_xtensa_asm:mov(Temp, Reg), + Stream1 = StreamModule:append(Stream0, I1), + State1 = State0#state{stream = Stream1}, + {State2, Temp} = and_(State1#state{available_regs = AT}, {free, Temp}, Mask), + Stream2 = State2#state.stream, + case Val of + _ when ?IS_GPR(Val) -> + I_beq = jit_xtensa_asm:bne(Temp, Val, 2), + JPlaceholder2 = <<16#FF, 16#FF, 16#FF>>, + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore + byte_size(I_beq), + Stream3 = StreamModule:append(Stream2, <>), + State3 = State2#state{ + stream = Stream3, available_regs = State2#state.available_regs bor reg_bit(Temp) + }, + {State3, BranchDelta}; + _ -> + %% Val is an immediate - need second temp register + MaskReg = + case AT of + 0 -> ?A8_REG; + _ -> first_avail(AT) + end, + AT2 = AT band (bnot reg_bit(MaskReg)), + State3 = mov_immediate(State2#state{available_regs = AT2}, MaskReg, Val), + Stream3 = State3#state.stream, + I_beq2 = jit_xtensa_asm:bne(Temp, MaskReg, 2), + JPlaceholder3 = <<16#FF, 16#FF, 16#FF>>, + BranchDelta = StreamModule:offset(Stream3) - OffsetBefore + byte_size(I_beq2), + Stream4 = StreamModule:append(Stream3, <>), + State4 = State3#state{ + stream = Stream4, + available_regs = State3#state.available_regs bor reg_bit(Temp) bor reg_bit(MaskReg) + }, + {State4, BranchDelta} + end; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailRegs + } = State0, + {{free, Reg} = RegTuple, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg), Val =/= 0 -> + OffsetBefore = StreamModule:offset(Stream0), + {State1, Reg} = and_(State0, RegTuple, Mask), + Stream1 = State1#state.stream, + case Val of + _ when ?IS_GPR(Val) -> + I_beq3 = jit_xtensa_asm:bne(Reg, Val, 2), + JPlaceholder5 = <<16#FF, 16#FF, 16#FF>>, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore + byte_size(I_beq3), + Stream2 = StreamModule:append(Stream1, <>), + State2 = State1#state{stream = Stream2}, + State3 = if_block_free_reg(RegTuple, State2), + {State3, BranchDelta}; + _ -> + %% Val is an immediate - need temp register + MaskReg = first_avail(State1#state.available_regs), + AT = State1#state.available_regs band (bnot reg_bit(MaskReg)), + State2 = mov_immediate(State1#state{available_regs = AT}, MaskReg, Val), + Stream2 = State2#state.stream, + I_beq4 = jit_xtensa_asm:bne(Reg, MaskReg, 2), + JPlaceholder6 = <<16#FF, 16#FF, 16#FF>>, + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore + byte_size(I_beq4), + Stream3 = StreamModule:append(Stream2, <>), + State3 = State2#state{stream = Stream3, available_regs = AvailRegs}, + State4 = if_block_free_reg(RegTuple, State3), + {State4, BranchDelta} + end. + +-spec if_block_free_reg(xtensa_register() | {free, xtensa_register()}, state()) -> state(). +if_block_free_reg({free, Reg}, State0) -> + #state{available_regs = AvR0, used_regs = UR0} = State0, + Bit = reg_bit(Reg), + AvR1 = AvR0 bor Bit, + UR1 = UR0 band (bnot Bit), + State0#state{ + available_regs = AvR1, + used_regs = UR1 + }; +if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> + State0. + +-spec merge_used_regs(state(), non_neg_integer()) -> state(). +merge_used_regs(#state{used_regs = UR} = State, OtherUR) -> + MergedUR = UR bor OtherUR, + MergedAvail = ?AVAILABLE_REGS_MASK band (bnot MergedUR), + State#state{used_regs = MergedUR, available_regs = MergedAvail}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register right by a fixed number of bits, effectively +%% dividing it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +-spec shift_right(#state{}, maybe_free_xtensa_register(), non_neg_integer()) -> + {#state{}, xtensa_register()}. +shift_right( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {free, Reg}, Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) andalso Shift =< 15 +-> + I = jit_xtensa_asm:srli(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + {State#state{stream = Stream1, regs = Regs1}, Reg}; +shift_right( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + State, + {free, Reg}, + Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + I1 = jit_xtensa_asm:movi(Temp, Shift), + I2 = jit_xtensa_asm:ssr(Temp), + I3 = jit_xtensa_asm:srl(Reg, Reg, Reg), + Stream1 = StreamModule:append(Stream0, <>), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), + {State#state{stream = Stream1, regs = Regs1}, Reg}; +shift_right( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + used_regs = UR, + regs = Regs0 + } = State, + Reg, + Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) andalso Shift =< 15 +-> + ResultReg = first_avail(Avail), + ResultBit = reg_bit(ResultReg), + I = jit_xtensa_asm:srli(ResultReg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + Regs1 = jit_regs:invalidate_reg(Regs0, ResultReg), + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot ResultBit), + used_regs = UR bor ResultBit, + regs = Regs1 + }, + ResultReg + }; +shift_right( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + used_regs = UR, + regs = Regs0 + } = State, + Reg, + Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + ResultReg = first_avail(Avail), + ResultBit = reg_bit(ResultReg), + I1 = jit_xtensa_asm:movi(ResultReg, Shift), + I2 = jit_xtensa_asm:ssr(ResultReg), + I3 = jit_xtensa_asm:srl(ResultReg, Reg, Reg), + Stream1 = StreamModule:append(Stream0, <>), + Regs1 = jit_regs:invalidate_reg(Regs0, ResultReg), + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot ResultBit), + used_regs = UR bor ResultBit, + regs = Regs1 + }, + ResultReg + }. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register left by a fixed number of bits, effectively +%% multiplying it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +shift_left(State, {free, Reg}, Shift) -> + {shift_left(State, Reg, Shift), Reg}; +shift_left( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Shift +) when + is_atom(Reg) +-> + I = jit_xtensa_asm:slli(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + State#state{stream = Stream1, regs = Regs1}. + +shift_right_arith( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {free, Reg}, Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_xtensa_asm:srai(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + {State#state{stream = Stream1, regs = Regs1}, Reg}; +shift_right_arith( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + used_regs = UR, + regs = Regs0 + } = State, + Reg, + Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + ResultReg = first_avail(Avail), + ResultBit = reg_bit(ResultReg), + I = jit_xtensa_asm:srai(ResultReg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + Regs1 = jit_regs:invalidate_reg(Regs0, ResultReg), + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot ResultBit), + used_regs = UR bor ResultBit, + regs = Regs1 + }, + ResultReg + }. + +div_reg( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, + DividendReg, + DivisorReg +) -> + I = jit_xtensa_asm:quos(DividendReg, DividendReg, DivisorReg), + Stream1 = StreamModule:append(Stream0, I), + Regs1 = jit_regs:invalidate_reg(Regs0, DividendReg), + {State#state{stream = Stream1, regs = Regs1}, DividendReg}. + +rem_reg( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, + DividendReg, + DivisorReg +) -> + I = jit_xtensa_asm:rems(DividendReg, DividendReg, DivisorReg), + Stream1 = StreamModule:append(Stream0, I), + Regs1 = jit_regs:invalidate_reg(Regs0, DividendReg), + {State#state{stream = Stream1, regs = Regs1}, DividendReg}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call to a function pointer with arguments. This function converts +%% arguments and passes them following the backend ABI convention. +%% @end +%% @param State current backend state +%% @param FuncPtrTuple either {free, Reg} or {primitive, PrimitiveIndex} +%% @param Args arguments to pass to the function +%% @return Updated backend state and return register +%%----------------------------------------------------------------------------- +-spec call_func_ptr(state(), {free, xtensa_register()} | {primitive, non_neg_integer()}, [arg()]) -> + {state(), xtensa_register()}. +call_func_ptr( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0Mask, + used_regs = UsedRegs0Mask + } = State0, + FuncPtrTuple, + Args +) -> + AvailableRegs0 = mask_to_list(AvailableRegs0Mask), + UsedRegs0 = mask_to_list(UsedRegs0Mask), + FreeRegs = lists:flatmap( + fun + ({free, {ptr, Reg}}) -> [Reg]; + ({free, Reg}) when is_atom(Reg) -> [Reg]; + (_) -> [] + end, + [FuncPtrTuple | Args] + ), + UsedRegs1 = UsedRegs0 -- FreeRegs, + + %% Windowed ABI: a0-a7 are preserved across CALLX8 automatically. + %% Only a8-a15 are clobbered. We only need to save used regs in a8-a15 + %% that are not freed by this call. + HighRegs = [a8, a9, a10, a11, a12, a13, a14, a15], + RegsToSave = [R || R <- UsedRegs1, lists:member(R, HighRegs)], + + FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), + AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, + + NumToSave = length(RegsToSave), + AlignedStackBytes = + if + NumToSave > 0 -> ((NumToSave * 4 + 15) div 16) * 16; + true -> 0 + end, + Stream1 = push_registers(RegsToSave, AlignedStackBytes, StreamModule, Stream0), + + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), + + RegArgs0 = Args1, + RegArgsRegs = lists:flatmap(fun arg_to_reg_list/1, RegArgs0), + + % Registers available for set_registers_args: + % - saved regs (will be restored after call) that are not arg sources + % - previously available regs + % + %% Only HIGH used regs (a8-a15) are pushed to the stack via + %% push_registers and restored after the call. Low used regs (a5-a7) + %% are preserved by the windowed ABI but NOT backed up, so if we + %% use them as scratch here (e.g. to hold the primitive func ptr) + %% their live contents are silently clobbered. Only RegsToSave is + %% safe to reuse as scratch among the currently-used registers. + SetArgsRegsOnlyAvailableArgs = (RegsToSave -- RegArgsRegs) ++ AvailableRegs0, + State1 = State0#state{ + available_regs = jit_regs:regs_to_mask(SetArgsRegsOnlyAvailableArgs, fun reg_bit/1), + used_regs = jit_regs:regs_to_mask( + ?AVAILABLE_REGS -- SetArgsRegsOnlyAvailableArgs, fun reg_bit/1 + ), + stream = Stream1 + }, + + ParameterRegs = parameter_regs(RegArgs0), + {Stream3, SetArgsAvailableRegs, FuncPtrReg, RegArgs} = + case FuncPtrTuple of + {free, FuncPtrReg0} -> + % If FuncPtrReg is in parameter regs, we must move it out. + case lists:member(FuncPtrReg0, ParameterRegs) of + true -> + case SetArgsRegsOnlyAvailableArgs -- ParameterRegs of + [] when SetArgsRegsOnlyAvailableArgs =:= [] -> + % No available registers at all, use ?A8_REG + MovInstr = jit_xtensa_asm:mov(?A8_REG, FuncPtrReg0), + SetArgsAvailableArgs1 = [FuncPtrReg0], + { + StreamModule:append(State1#state.stream, MovInstr), + SetArgsAvailableArgs1, + ?A8_REG, + RegArgs0 + }; + [] -> + % Swap with a reg used in RegArgs0 that is not in ParameterRegs + [NewArgReg | _] = SetArgsRegsOnlyAvailableArgs, + [FuncPtrReg1 | _] = RegArgsRegs -- ParameterRegs, + MovInstr1 = jit_xtensa_asm:mov(NewArgReg, FuncPtrReg1), + MovInstr2 = jit_xtensa_asm:mov(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + RegArgs1 = replace_reg(RegArgs0, FuncPtrReg1, NewArgReg), + { + StreamModule:append( + State1#state.stream, <> + ), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs1 + }; + [FuncPtrReg1 | _] -> + MovInstr = jit_xtensa_asm:mov(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + { + StreamModule:append(State1#state.stream, MovInstr), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs0 + } + end; + false -> + SetArgsAvailableArgs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + {State1#state.stream, SetArgsAvailableArgs1, FuncPtrReg0, RegArgs0} + end; + {primitive, Primitive} -> + FuncPtrReg0 = + case SetArgsRegsOnlyAvailableArgs -- ParameterRegs of + [] -> ?A8_REG; + [R | _] -> R + end, + SetArgsAvailableRegs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + PrepCall = load_primitive_ptr(Primitive, FuncPtrReg0), + Stream2 = StreamModule:append(State1#state.stream, PrepCall), + {Stream2, SetArgsAvailableRegs1, FuncPtrReg0, RegArgs0} + end, + + State3 = State1#state{ + available_regs = jit_regs:regs_to_mask(SetArgsAvailableRegs, fun reg_bit/1), + used_regs = jit_regs:regs_to_mask(?AVAILABLE_REGS -- SetArgsAvailableRegs, fun reg_bit/1), + stream = Stream3 + }, + + StackOffset = AlignedStackBytes, + State4 = set_registers_args(State3, RegArgs, ParameterRegs, StackOffset), + Stream4 = State4#state.stream, + + %% Call the function pointer using callx8 (windowed ABI) + Call = jit_xtensa_asm:callx8(FuncPtrReg), + Stream5 = StreamModule:append(Stream4, Call), + + %% Return value is in a10 (callee's a2 mapped back to caller's a10). + %% Pick a result register from the available registers. + %% After CALLX8, a8-a15 are all clobbered, so they are all available + %% (except for any we still need to restore from stack). + %% a5-a7 are preserved and may still hold live values. + PostCallAvail = (AvailableRegs1 -- HighRegs) ++ (HighRegs -- RegsToSave), + {Stream6, UsedRegs2, ResultReg} = + case PostCallAvail of + [ResultReg0 | _] -> + case ResultReg0 of + a10 -> + %% Result already in the right register + {Stream5, [a10 | UsedRegs1], a10}; + _ -> + MoveResult = jit_xtensa_asm:mov(ResultReg0, a10), + { + StreamModule:append(Stream5, MoveResult), + [ResultReg0 | UsedRegs1], + ResultReg0 + } + end; + [] -> + %% Fallback: use ?A8_REG + MoveResult = jit_xtensa_asm:mov(?A8_REG, a10), + {StreamModule:append(Stream5, MoveResult), [?A8_REG | UsedRegs1], ?A8_REG} + end, + + Stream8 = pop_registers(RegsToSave, AlignedStackBytes, StreamModule, Stream6), + + AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), + AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), + %% Invalidate ALL register tracking after a call. Even though a0-a7 are + %% preserved by the windowed ABI, the called function may trigger GC which + %% moves heap objects. Any cached heap pointers in registers become stale. + Regs1 = jit_regs:invalidate_all(State0#state.regs), + { + State4#state{ + stream = Stream8, + available_regs = jit_regs:regs_to_mask(AvailableRegs3, fun reg_bit/1), + used_regs = jit_regs:regs_to_mask(UsedRegs2, fun reg_bit/1), + regs = Regs1 + }, + ResultReg + }. + +arg_to_reg_list({free, {ptr, Reg}}) -> [Reg]; +arg_to_reg_list({free, Reg}) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(Reg) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(_) -> []. + +push_registers(SavedRegs, _AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 -> + %% Windowed ABI: store registers within the ENTRY frame at positive offsets + %% from SP, starting at FRAME_LOCAL_OFFSET (above the window save areas). + %% This avoids conflicts with the window overflow handler. + {Stream1, _} = lists:foldl( + fun(Reg, {StreamAcc, Offset}) -> + Store = jit_xtensa_asm:s32i(Reg, a1, Offset), + {StreamModule:append(StreamAcc, Store), Offset + 4} + end, + {Stream0, ?FRAME_LOCAL_OFFSET}, + SavedRegs + ), + Stream1; +push_registers([], _AlignedStackBytes, _StreamModule, Stream0) -> + Stream0. + +pop_registers(SavedRegs, _AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 -> + %% Windowed ABI: restore registers from the ENTRY frame at positive offsets. + {Stream1, _} = lists:foldl( + fun(Reg, {StreamAcc, Offset}) -> + Load = jit_xtensa_asm:l32i(Reg, a1, Offset), + {StreamModule:append(StreamAcc, Load), Offset + 4} + end, + {Stream0, ?FRAME_LOCAL_OFFSET}, + SavedRegs + ), + Stream1; +pop_registers([], _AlignedStackBytes, _StreamModule, Stream0) -> + Stream0. + +set_registers_args( + #state{used_regs = UsedRegsMask} = State0, + Args, + ParamRegs, + StackOffset +) -> + UsedRegs = mask_to_list(UsedRegsMask), + ArgsRegs = args_regs(Args), + AvailableScratchGP = ((?AVAILABLE_REGS -- ParamRegs) -- ArgsRegs) -- UsedRegs, + State1 = set_registers_args0( + State0, Args, ArgsRegs, ParamRegs, AvailableScratchGP, StackOffset + ), + Stream1 = State1#state.stream, + NewUsedRegs = lists:foldl( + fun + ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed); + ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed); + (_, AccUsed) -> AccUsed + end, + UsedRegs, + Args + ), + State1#state{ + stream = Stream1, + available_regs = jit_regs:regs_to_mask( + ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, fun reg_bit/1 + ), + used_regs = jit_regs:regs_to_mask(ParamRegs ++ (NewUsedRegs -- ParamRegs), fun reg_bit/1) + }. + +parameter_regs(Args) -> + parameter_regs0(Args, ?PARAMETER_REGS, []). + +% ILP32 (Xtensa windowed ABI): 64-bit args require even register number +% alignment. Caller-side parameter registers a10..a15 map to the callee's +% a2..a7 after CALL8, so even-aligned pairs are (a10,a11), (a12,a13), +% (a14,a15); a11/a13/a15 are odd (callee a3/a5/a7) and cannot start a pair, +% so the int64 is placed in the next aligned pair and the odd register is +% left as padding. +parameter_regs0([], _, Acc) -> + lists:reverse(Acc); +parameter_regs0([{avm_int64_t, _} | T], [a10, a11 | Rest], Acc) -> + parameter_regs0(T, Rest, [a11, a10 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a11, a12, a13 | Rest], Acc) -> + parameter_regs0(T, Rest, [a13, a12 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a12, a13 | Rest], Acc) -> + parameter_regs0(T, Rest, [a13, a12 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a13, a14, a15 | Rest], Acc) -> + parameter_regs0(T, Rest, [a15, a14 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a14, a15 | Rest], Acc) -> + parameter_regs0(T, Rest, [a15, a14 | Acc]); +parameter_regs0([{avm_int64_t, _} | _], Regs, _Acc) -> + error({cannot_align_int64, Regs}); +parameter_regs0([_Other | T], [Reg | Rest], Acc) -> + parameter_regs0(T, Rest, [Reg | Acc]). + +replace_reg(Args, Reg1, Reg2) -> + replace_reg0(Args, Reg1, Reg2, []). + +replace_reg0([Reg | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([Other | T], Reg, Replacement, Acc) -> + replace_reg0(T, Reg, Replacement, [Other | Acc]). + +set_registers_args0(State, [], [], [], _AvailGP, _StackOffset) -> + State; +set_registers_args0(State, [{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset) -> + set_registers_args0(State, [FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( + State, [ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + set_registers_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +% Handle 64-bit arguments that need two registers according to ILP32 +set_registers_args0( + State, + [{avm_int64_t, Value} | ArgsT], + ArgsRegs, + ParamRegs, + AvailGP, + StackOffset +) when is_integer(Value) -> + LowPart = Value band 16#FFFFFFFF, + HighPart = (Value bsr 32) band 16#FFFFFFFF, + set_registers_args0( + State, [LowPart, HighPart | ArgsT], [imm | ArgsRegs], ParamRegs, AvailGP, StackOffset + ); +% ctx is special as we need it to access x_reg/y_reg/fp_reg and we don't +% want to replace it +set_registers_args0( + State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + false = lists:member(?CTX_REG, ArgsRegs), + State1 = set_registers_args1(State, Arg, ?CTX_REG, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( + #state{stream_module = StreamModule} = State0, + [Arg | ArgsT], + [_ArgReg | ArgsRegsT], + [ParamReg | ParamRegsT], + AvailGP, + StackOffset +) -> + case lists:member(ParamReg, ArgsRegsT) of + false -> + State1 = set_registers_args1(State0, Arg, ParamReg, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegsT, ParamRegsT, AvailGP, StackOffset); + true -> + [Avail | AvailGPT] = AvailGP, + I = jit_xtensa_asm:mov(Avail, ParamReg), + Stream1 = StreamModule:append(State0#state.stream, I), + State1 = set_registers_args1( + State0#state{stream = Stream1}, Arg, ParamReg, StackOffset + ), + NewArgsT = replace_reg(ArgsT, ParamReg, Avail), + set_registers_args0( + State1, NewArgsT, ArgsRegsT, ParamRegsT, AvailGPT, StackOffset + ) + end. + +set_registers_args1(State, Reg, Reg, _Offset) -> + State; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + ctx, + ParamReg, + _StackOffset +) -> + %% ctx is always in a2 (?CTX_REG) + case ParamReg of + ?CTX_REG -> + State; + _ -> + I = jit_xtensa_asm:mov(ParamReg, ?CTX_REG), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1} + end; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + jit_state, + ParamReg, + _StackOffset +) -> + %% jit_state is always in a3 (?JITSTATE_REG) + case ParamReg of + ?JITSTATE_REG -> + State; + _ -> + I = jit_xtensa_asm:mov(ParamReg, ?JITSTATE_REG), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1} + end; +%% For tail calls, jit_state is in ?JITSTATE_REG (a3). +%% In windowed ABI, it still needs to be moved to the parameter register +%% (a11 etc.) since the callee will see it in a different window. +set_registers_args1(State, jit_state_tail_call, ?JITSTATE_REG, _StackOffset) -> + State; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + jit_state_tail_call, + ParamReg, + _StackOffset +) -> + I = jit_xtensa_asm:mov(ParamReg, ?JITSTATE_REG), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + {x_reg, extra}, + Reg, + _StackOffset +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I = jit_xtensa_asm:l32i(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, Reg, _StackOffset +) -> + {XReg, X_REGOffset} = ?X_REG(X), + I = jit_xtensa_asm:l32i(Reg, XReg, X_REGOffset), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Source}, Reg, _StackOffset +) -> + I = jit_xtensa_asm:l32i(Reg, Source, 0), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs, regs = Regs0} = + State, + {y_reg, X}, + Reg, + _StackOffset +) -> + Code = ldr_y_reg(Reg, X, AvailRegs), + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = + case AvailRegs of + 0 -> Regs0; + _ -> jit_regs:invalidate_reg(Regs0, first_avail(AvailRegs)) + end, + State#state{stream = Stream1, regs = Regs1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg, _StackOffset +) when + ?IS_GPR(ArgReg) +-> + I = jit_xtensa_asm:mov(Reg, ArgReg), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1(State, Value, Reg, _StackOffset) when ?IS_SIGNED_OR_UNSIGNED_INT32_T(Value) -> + mov_immediate(State, Reg, Value). + +%%----------------------------------------------------------------------------- +%% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg) +%% from an immediate, a native register or another vm register. +%% @end +%% @param State current backend state +%% @param Src value to move to vm register +%% @param Dest vm register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_to_vm_register(state(), Src :: value() | vm_register(), Dest :: vm_register()) -> + state(). +move_to_vm_register(#state{regs = Regs0} = State, Src, Dest) -> + VmLoc = vm_dest_to_contents(Dest), + Regs1 = + case VmLoc of + unknown -> Regs0; + _ -> jit_regs:invalidate_vm_loc(Regs0, VmLoc) + end, + State1 = move_to_vm_register_emit(State#state{regs = Regs1}, Src, Dest), + case {Src, VmLoc} of + {Reg, Contents} when is_atom(Reg), Contents =/= unknown -> + #state{regs = Regs2} = State1, + State1#state{regs = jit_regs:set_contents(Regs2, Reg, Contents)}; + _ -> + State1 + end. + +% Native register to VM register +move_to_vm_register_emit(State0, Src, {x_reg, extra}) when is_atom(Src) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_xtensa_asm:s32i(Src, BaseReg, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register_emit(State0, Src, {x_reg, X}) when is_atom(Src) -> + {BaseReg, Off} = ?X_REG(X), + I1 = jit_xtensa_asm:s32i(Src, BaseReg, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register_emit(State0, Src, {ptr, Reg}) when is_atom(Src) -> + I1 = jit_xtensa_asm:s32i(Src, Reg, 0), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register_emit( + #state{available_regs = Avail, regs = Regs0} = State0, Src, {y_reg, Y} +) when + is_atom(Src) +-> + Temp1 = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + AT = Avail band (bnot reg_bit(Temp1)), + Code = str_y_reg(Src, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), + Regs1 = jit_regs:invalidate_reg(Regs0, Temp1), + Regs2 = + case AT of + 0 -> Regs1; + _ -> jit_regs:invalidate_reg(Regs1, first_avail(AT)) + end, + State0#state{stream = Stream1, regs = Regs2}; +% Source is an integer to y_reg (optimized: ldr first, then movs) +move_to_vm_register_emit( + #state{available_regs = Avail, regs = Regs0} = State0, N, {y_reg, Y} +) when + is_integer(N), N >= 0, N =< 255 +-> + Temp1 = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + Avail2 = Avail band (bnot reg_bit(Temp1)), + Temp2 = + case Avail2 of + 0 -> ?A8_REG; + _ -> first_avail(Avail2) + end, + AT = Avail2 band (bnot reg_bit(Temp2)), + I1 = mov_immediate(Temp2, N), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), + Regs1a = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp1), Temp2), + Regs1 = + case AT of + 0 -> Regs1a; + _ -> jit_regs:invalidate_reg(Regs1a, first_avail(AT)) + end, + State0#state{stream = Stream1, regs = Regs1}; +% Source is an integer (0-255 for movs, negative values need different handling) +move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when + is_integer(N), N >= 0, N =< 255 +-> + Temp = + case AR0 of + 0 -> ?A8_REG; + _ -> first_avail(AR0) + end, + AT = AR0 band (bnot reg_bit(Temp)), + I1 = mov_immediate(Temp, N), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {imm, N}), + State1#state{available_regs = AR0, regs = Regs1}; +%% Handle large values using simple literal pool (branch-over pattern) +move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when + is_integer(N) +-> + Temp = + case AR0 of + 0 -> ?A8_REG; + _ -> first_avail(AR0) + end, + AT = AR0 band (bnot reg_bit(Temp)), + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), + State2 = move_to_vm_register(State1, Temp, Dest), + Regs1 = jit_regs:set_contents(State2#state.regs, Temp, {imm, N}), + State2#state{available_regs = AR0, regs = Regs1}; +% Source is a VM register +move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, extra}, Dest) -> + Temp = + case AR0 of + 0 -> ?A8_REG; + _ -> first_avail(AR0) + end, + AT = AR0 band (bnot reg_bit(Temp)), + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_xtensa_asm:l32i(Temp, BaseReg, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, extra}), + State1#state{available_regs = AR0, regs = Regs1}; +move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, X}, Dest) -> + Temp = + case AR0 of + 0 -> ?A8_REG; + _ -> first_avail(AR0) + end, + AT = AR0 band (bnot reg_bit(Temp)), + {XReg, X_REGOffset} = ?X_REG(X), + I1 = jit_xtensa_asm:l32i(Temp, XReg, X_REGOffset), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, X}), + State1#state{available_regs = AR0, regs = Regs1}; +move_to_vm_register_emit(#state{available_regs = AR0} = State0, {ptr, Reg}, Dest) -> + Temp = + case AR0 of + 0 -> ?A8_REG; + _ -> first_avail(AR0) + end, + AT = AR0 band (bnot reg_bit(Temp)), + I1 = jit_xtensa_asm:l32i(Temp, Reg, 0), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), + State1#state{available_regs = AR0, regs = Regs1}; +move_to_vm_register_emit(#state{available_regs = AR0} = State0, {y_reg, Y}, Dest) -> + Temp = + case AR0 of + 0 -> ?A8_REG; + _ -> first_avail(AR0) + end, + AT = AR0 band (bnot reg_bit(Temp)), + Code = ldr_y_reg(Temp, Y, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), + Regs1 = + case AT of + 0 -> State0#state.regs; + _ -> jit_regs:invalidate_reg(State0#state.regs, first_avail(AT)) + end, + State1 = move_to_vm_register( + State0#state{stream = Stream1, available_regs = AT, regs = Regs1}, Temp, Dest + ), + Regs2 = jit_regs:set_contents(State1#state.regs, Temp, {y_reg, Y}), + State1#state{available_regs = AR0, regs = Regs2}; +% term_to_float +move_to_vm_register_emit( + #state{ + stream_module = StreamModule, + available_regs = Avail, + stream = Stream0, + variant = Variant + } = + State0, + {free, {ptr, Reg, 1}}, + {fp_reg, F} +) -> + Temp1 = first_avail(Avail), + Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), + {BaseReg, Off} = ?FP_REGS, + I1 = jit_xtensa_asm:l32i(Temp1, BaseReg, Off), + I2 = jit_xtensa_asm:l32i(Temp2, Reg, 4), + case Variant band ?JIT_VARIANT_FLOAT32 of + 0 -> + % Double precision: write both 32-bit parts + I3 = jit_xtensa_asm:s32i(Temp2, Temp1, F * 8), + I4 = jit_xtensa_asm:l32i(Temp2, Reg, 8), + I5 = jit_xtensa_asm:s32i(Temp2, Temp1, F * 8 + 4), + Code = <>; + _ -> + % Single precision: write only first 32-bit part + I3 = jit_xtensa_asm:s32i(Temp2, Temp1, F * 4), + Code = <> + end, + Stream1 = StreamModule:append(Stream0, Code), + State1 = free_native_register(State0, Reg), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Temp1), Temp2), + State1#state{stream = Stream1, regs = Regs1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a move of an array element (reg[x]) to a vm or a native register. +%% @end +%% @param State current backend state +%% @param Reg base register of the array +%% @param Index index in the array, as an integer or a native register +%% @param Dest vm or native register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_array_element( + state(), + xtensa_register(), + non_neg_integer() | xtensa_register(), + vm_register() | xtensa_register() +) -> state(). +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + State, + Reg, + Index, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + Temp = first_avail(Avail), + I1 = jit_xtensa_asm:l32i(Temp, Reg, Index * 4), + {BaseReg, Off} = ?X_REG(X), + I2 = jit_xtensa_asm:s32i(Temp, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, <>), + Regs1 = jit_regs:invalidate_vm_loc(Regs0, {x_reg, X}), + Regs2 = jit_regs:set_contents(Regs1, Temp, {x_reg, X}), + State#state{stream = Stream1, regs = Regs2}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + State, + Reg, + Index, + {ptr, Dest} +) when is_atom(Reg) andalso is_integer(Index) -> + Temp = first_avail(Avail), + I1 = jit_xtensa_asm:l32i(Temp, Reg, Index * 4), + I2 = jit_xtensa_asm:s32i(Temp, Dest, 0), + Stream1 = StreamModule:append(Stream0, <>), + Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + State#state{stream = Stream1, regs = Regs1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + State, + Reg, + Index, + {y_reg, Y} +) when is_atom(Reg) andalso is_integer(Index) -> + Temp1 = first_avail(Avail), + Avail2 = Avail band (bnot reg_bit(Temp1)), + Temp2 = first_avail(Avail2), + AT = Avail2 band (bnot reg_bit(Temp2)), + I1 = jit_xtensa_asm:l32i(Temp2, Reg, Index * 4), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = jit_regs:invalidate_vm_loc(Regs0, {y_reg, Y}), + Regs2 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs1, Temp1), Temp2), + State#state{stream = Stream1, regs = Regs2}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + State, + {free, Reg}, + Index, + {y_reg, Y} +) when is_integer(Index) -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), + I1 = jit_xtensa_asm:l32i(Reg, Reg, Index * 4), + YCode = str_y_reg(Reg, Y, Temp, AT), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = jit_regs:invalidate_vm_loc(Regs0, {y_reg, Y}), + Regs2 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs1, Reg), Temp), + State#state{stream = Stream1, regs = Regs2}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest +) when is_atom(Dest) andalso is_integer(Index) -> + I1 = jit_xtensa_asm:l32i(Dest, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0, + regs = Regs0 + } = State, + Reg, + {free, IndexReg}, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(IndexReg) -> + I1 = jit_xtensa_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_xtensa_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_xtensa_asm:l32i(IndexReg, IndexReg, 0), + {BaseReg, Off} = ?X_REG(X), + I4 = jit_xtensa_asm:s32i(IndexReg, BaseReg, Off), + Bit = reg_bit(IndexReg), + AvailableRegs1 = AvailableRegs0 bor Bit, + UsedRegs1 = UsedRegs0 band (bnot Bit), + Stream1 = StreamModule:append(Stream0, <>), + Regs1 = jit_regs:invalidate_vm_loc(Regs0, {x_reg, X}), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1, + regs = Regs1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {ptr, PtrReg} +) when is_atom(IndexReg) -> + I1 = jit_xtensa_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_xtensa_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_xtensa_asm:l32i(IndexReg, IndexReg, 0), + I4 = jit_xtensa_asm:s32i(IndexReg, PtrReg, 0), + Bit = reg_bit(IndexReg), + AvailableRegs1 = AvailableRegs0 bor Bit, + UsedRegs1 = UsedRegs0 band (bnot Bit), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0, + regs = Regs0 + } = State, + Reg, + {free, IndexReg}, + {y_reg, Y} +) when is_atom(IndexReg) -> + Temp = first_avail(AvailableRegs0), + AT = AvailableRegs0 band (bnot reg_bit(Temp)), + I1 = jit_xtensa_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_xtensa_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_xtensa_asm:l32i(IndexReg, IndexReg, 0), + Code = str_y_reg(IndexReg, Y, Temp, AT), + I4 = Code, + Bit = reg_bit(IndexReg), + AvailableRegs1 = AvailableRegs0 bor Bit, + UsedRegs1 = UsedRegs0 band (bnot Bit), + Stream1 = StreamModule:append( + Stream0, <> + ), + Regs1a = jit_regs:invalidate_vm_loc(Regs0, {y_reg, Y}), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs1a, IndexReg), Temp), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1, + regs = Regs1 + }. + +%% @doc move reg[x] to a vm or native register +-spec get_array_element( + state(), xtensa_register() | {free, xtensa_register()}, non_neg_integer() +) -> + {state(), xtensa_register()}. +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {free, Reg}, + Index +) -> + I1 = jit_xtensa_asm:l32i(Reg, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, <>), + {State#state{stream = Stream1}, Reg}; +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + used_regs = UsedRegs0 + } = State, + Reg, + Index +) -> + ElemReg = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + ElemBit = reg_bit(ElemReg), + I1 = jit_xtensa_asm:l32i(ElemReg, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, <>), + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot ElemBit), + used_regs = UsedRegs0 bor ElemBit + }, + ElemReg + }. + +%% @doc move an integer, a vm or native register to reg[x] +-spec move_to_array_element( + state(), integer() | vm_register() | xtensa_register(), xtensa_register(), non_neg_integer() +) -> state(). +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State0, + ValueReg, + Reg, + Index +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + I1 = jit_xtensa_asm:s32i(ValueReg, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, I1), + State0#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + State0, + ValueReg, + Reg, + IndexReg +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + I1 = jit_xtensa_asm:mov(Temp, IndexReg), + I2 = jit_xtensa_asm:slli(Temp, Temp, 2), + I3 = jit_xtensa_asm:add(Temp, Reg, Temp), + I4 = jit_xtensa_asm:s32i(ValueReg, Temp, 0), + Stream1 = StreamModule:append(Stream0, <>), + Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + State0#state{stream = Stream1, regs = Regs1}; +move_to_array_element( + State0, + Value, + Reg, + Index +) -> + {State1, Temp} = copy_to_native_register(State0, Value), + State2 = move_to_array_element(State1, Temp, Reg, Index), + free_native_register(State2, Temp). + +move_to_array_element( + State, + Value, + BaseReg, + IndexReg, + Offset +) when is_integer(IndexReg) andalso is_integer(Offset) -> + move_to_array_element(State, Value, BaseReg, IndexReg + Offset); +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + State, + ValueReg, + BaseReg, + IndexReg, + Offset +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + I1 = jit_xtensa_asm:addi(Temp, IndexReg, Offset), + I2 = jit_xtensa_asm:slli(Temp, Temp, 2), + I3 = jit_xtensa_asm:add(Temp, BaseReg, Temp), + I4 = jit_xtensa_asm:s32i(ValueReg, Temp, 0), + Stream1 = StreamModule:append(Stream0, <>), + Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + State#state{stream = Stream1, regs = Regs1}; +move_to_array_element( + State0, + Value, + BaseReg, + IndexReg, + Offset +) -> + {State1, ValueReg} = copy_to_native_register(State0, Value), + Temp = + case State1#state.available_regs of + 0 -> ?A8_REG; + _ -> first_avail(State1#state.available_regs) + end, + I1 = jit_xtensa_asm:addi(Temp, IndexReg, Offset), + I2 = jit_xtensa_asm:slli(Temp, Temp, 2), + I3 = jit_xtensa_asm:add(Temp, BaseReg, Temp), + I4 = jit_xtensa_asm:s32i(ValueReg, Temp, 0), + Stream1 = (State1#state.stream_module):append( + State1#state.stream, <> + ), + State2 = State1#state{stream = Stream1}, + free_native_register(State2, ValueReg). + +-spec move_to_native_register(state(), value() | cp) -> {state(), xtensa_register()}. +move_to_native_register(State, Reg) when ?IS_GPR(Reg) -> + {State, Reg}; +move_to_native_register(#state{regs = Regs} = State, Value) -> + Contents = value_to_contents(Value), + case Contents =/= unknown andalso jit_regs:find_reg_with_contents(Regs, Contents) of + {ok, CachedReg} -> + Bit = reg_bit(CachedReg), + case State#state.used_regs band Bit of + 0 -> + case State#state.available_regs band Bit of + 0 -> + move_to_native_register_emit(State, Value, Contents); + _ -> + { + State#state{ + used_regs = State#state.used_regs bor Bit, + available_regs = State#state.available_regs band (bnot Bit) + }, + CachedReg + } + end; + _ -> + {State, CachedReg} + end; + _ -> + move_to_native_register_emit(State, Value, Contents) + end. + +move_to_native_register_emit( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + used_regs = Used, + regs = Regs0 + } = State, + cp, + Contents +) -> + Reg = first_avail(Avail), + RegBit = reg_bit(Reg), + {BaseReg, Off} = ?CP, + I1 = jit_xtensa_asm:l32i(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + Regs1 = jit_regs:set_contents(Regs0, Reg, Contents), + { + State#state{ + stream = Stream1, + used_regs = Used bor RegBit, + available_regs = Avail band (bnot RegBit), + regs = Regs1 + }, + Reg + }; +move_to_native_register_emit( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, + {ptr, Reg}, + _Contents +) when is_atom(Reg) -> + I1 = jit_xtensa_asm:l32i(Reg, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + {State#state{stream = Stream1, regs = Regs1}, Reg}; +move_to_native_register_emit( + #state{ + available_regs = Avail, + used_regs = Used, + regs = Regs0 + } = State0, + Imm, + Contents +) when + is_integer(Imm) +-> + Reg = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + RegBit = reg_bit(Reg), + Regs1 = jit_regs:set_contents(Regs0, Reg, Contents), + State1 = State0#state{ + used_regs = Used bor RegBit, + available_regs = Avail band (bnot RegBit), + regs = Regs1 + }, + {move_to_native_register(State1, Imm, Reg), Reg}; +move_to_native_register_emit( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + used_regs = Used, + regs = Regs0 + } = State, + {x_reg, extra}, + Contents +) -> + Reg = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + RegBit = reg_bit(Reg), + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_xtensa_asm:l32i(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + Regs1 = jit_regs:set_contents(Regs0, Reg, Contents), + { + State#state{ + stream = Stream1, + used_regs = Used bor RegBit, + available_regs = Avail band (bnot RegBit), + regs = Regs1 + }, + Reg + }; +move_to_native_register_emit( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + used_regs = Used, + regs = Regs0 + } = State, + {x_reg, X}, + Contents +) when + X < ?MAX_REG +-> + Reg = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + RegBit = reg_bit(Reg), + {BaseReg, Offset} = ?X_REG(X), + I1 = jit_xtensa_asm:l32i(Reg, BaseReg, Offset), + Stream1 = StreamModule:append(Stream0, I1), + Regs1 = jit_regs:set_contents(Regs0, Reg, Contents), + { + State#state{ + stream = Stream1, + used_regs = Used bor RegBit, + available_regs = Avail band (bnot RegBit), + regs = Regs1 + }, + Reg + }; +move_to_native_register_emit( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + used_regs = Used, + regs = Regs0 + } = State, + {y_reg, Y}, + Contents +) -> + Reg = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + RegBit = reg_bit(Reg), + AvailT = Avail band (bnot RegBit), + Code = ldr_y_reg(Reg, Y, AvailT), + Stream1 = StreamModule:append(Stream0, Code), + Regs1a = jit_regs:set_contents(Regs0, Reg, Contents), + Regs1 = + case AvailT of + 0 -> Regs1a; + _ -> jit_regs:invalidate_reg(Regs1a, first_avail(AvailT)) + end, + { + State#state{ + stream = Stream1, + available_regs = AvailT, + used_regs = Used bor RegBit, + regs = Regs1 + }, + Reg + }; +move_to_native_register_emit( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + used_regs = Used + } = State, + {fp_reg, F}, + _Contents +) -> + RegA = first_avail(Avail), + RegABit = reg_bit(RegA), + Avail2 = Avail band (bnot RegABit), + RegB = first_avail(Avail2), + RegBBit = reg_bit(RegB), + AvailT = Avail2 band (bnot RegBBit), + {BaseReg, Off} = ?FP_REGS, + I1 = jit_xtensa_asm:l32i(RegB, BaseReg, Off), + I2 = jit_xtensa_asm:l32i(RegA, RegB, F * 8), + I3 = jit_xtensa_asm:l32i(RegB, RegB, F * 8 + 4), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{ + stream = Stream1, available_regs = AvailT, used_regs = Used bor RegABit bor RegBBit + }, + {fp, RegA, RegB} + }. + +-spec move_to_native_register(state(), value(), xtensa_register()) -> state(). +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, RegSrc, RegDst +) when is_atom(RegSrc) -> + I = jit_xtensa_asm:mov(RegDst, RegSrc), + Stream1 = StreamModule:append(Stream0, I), + SrcContents = jit_regs:get_contents(Regs0, RegSrc), + Regs1 = jit_regs:set_contents(Regs0, RegDst, SrcContents), + State#state{stream = Stream1, regs = Regs1}; +move_to_native_register(State, ValSrc, RegDst) when is_integer(ValSrc) -> + State1 = mov_immediate(State, RegDst, ValSrc), + #state{regs = Regs0} = State1, + Regs1 = jit_regs:set_contents(Regs0, RegDst, {imm, ValSrc}), + State1#state{regs = Regs1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {ptr, Reg}, RegDst +) when ?IS_GPR(Reg) -> + I1 = jit_xtensa_asm:l32i(RegDst, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + Regs1 = jit_regs:invalidate_reg(Regs0, RegDst), + State#state{stream = Stream1, regs = Regs1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, + {x_reg, extra}, + RegDst +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_xtensa_asm:l32i(RegDst, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + Regs1 = jit_regs:set_contents(Regs0, RegDst, {x_reg, extra}), + State#state{stream = Stream1, regs = Regs1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {x_reg, X}, RegDst +) when + X < ?MAX_REG +-> + {XReg, X_REGOffset} = ?X_REG(X), + I1 = jit_xtensa_asm:l32i(RegDst, XReg, X_REGOffset), + Stream1 = StreamModule:append(Stream0, I1), + Regs1 = jit_regs:set_contents(Regs0, RegDst, {x_reg, X}), + State#state{stream = Stream1, regs = Regs1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = AT, regs = Regs0} = + State, + {y_reg, Y}, + RegDst +) -> + Code = ldr_y_reg(RegDst, Y, AT), + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = + case AT of + 0 -> Regs0; + _ -> jit_regs:invalidate_reg(Regs0, first_avail(AT)) + end, + Regs2 = jit_regs:set_contents(Regs1, RegDst, {y_reg, Y}), + State#state{stream = Stream1, regs = Regs2}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {fp_reg, F}, + {fp, RegA, RegB} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_xtensa_asm:l32i(RegB, BaseReg, Off), + I2 = jit_xtensa_asm:l32i(RegA, RegB, F * 8), + I3 = jit_xtensa_asm:l32i(RegB, RegB, F * 8 + 4), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +-spec copy_to_native_register(state(), value()) -> {state(), xtensa_register()}. +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + used_regs = Used, + regs = Regs0 + } = State, + Reg +) when is_atom(Reg) -> + SaveReg = first_avail(Avail), + SaveBit = reg_bit(SaveReg), + I1 = jit_xtensa_asm:mov(SaveReg, Reg), + Stream1 = StreamModule:append(Stream0, I1), + SrcContents = jit_regs:get_contents(Regs0, Reg), + Regs1 = jit_regs:set_contents(Regs0, SaveReg, SrcContents), + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot SaveBit), + used_regs = Used bor SaveBit, + regs = Regs1 + }, + SaveReg + }; +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + used_regs = Used, + regs = Regs0 + } = State, + {ptr, Reg} +) when is_atom(Reg) -> + SaveReg = first_avail(Avail), + SaveBit = reg_bit(SaveReg), + I1 = jit_xtensa_asm:l32i(SaveReg, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + Regs1 = jit_regs:invalidate_reg(Regs0, SaveReg), + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot SaveBit), + used_regs = Used bor SaveBit, + regs = Regs1 + }, + SaveReg + }; +copy_to_native_register(State, Reg) -> + move_to_native_register(State, Reg). + +move_to_cp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + State, + {y_reg, Y} +) -> + Reg = first_avail(Avail), + AvailT = Avail band (bnot reg_bit(Reg)), + I1 = ldr_y_reg(Reg, Y, AvailT), + {BaseReg, Off} = ?CP, + I2 = jit_xtensa_asm:s32i(Reg, BaseReg, Off), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + Regs2 = + case AvailT of + 0 -> Regs1; + _ -> jit_regs:invalidate_reg(Regs1, first_avail(AvailT)) + end, + State#state{stream = Stream1, regs = Regs2}. + +increment_sp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + State, + Offset +) -> + Reg = first_avail(Avail), + {BaseReg1, Off1} = ?Y_REGS, + I1 = jit_xtensa_asm:l32i(Reg, BaseReg1, Off1), + I2 = add_immediate_binary(Reg, Reg, Offset * 4), + {BaseReg2, Off2} = ?Y_REGS, + I3 = jit_xtensa_asm:s32i(Reg, BaseReg2, Off2), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + State#state{stream = Stream1, regs = Regs1}. + +set_continuation_to_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + regs = Regs0 + } = State, + Label +) -> + Temp = first_avail(Avail), + Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + %% In windowed ABI, the continuation will be called by C via CALL8, + %% so it must point to a location with an ENTRY instruction. + %% We use the jump table entry for the label, which has ENTRY after the literal. + %% code_base points past the chunk header, so the offset is relative to + %% the jump table start (which is the first thing after the header). + %% JUMP_TABLE_OFFSET skips past the 4-byte literal to the ENTRY instruction. + CodeRelativeOffset = ?JUMP_TABLE_OFFSET + Label * ?JUMP_TABLE_ENTRY_SIZE, + I1 = code_relative_address(Temp, CodeRelativeOffset), + I2 = jit_xtensa_asm:s32i(Temp, ?JITSTATE_REG, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, regs = Regs1}. + +%% @doc Set the continuation to a given offset. +%% Returns a reference so the offset will be updated with update_branches. +%% Only used with OP_WAIT_TIMEOUT. +set_continuation_to_offset( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + branches = Branches, + jump_table_start = JumpTableStart, + regs = Regs0 + } = State +) -> + Temp = first_avail(Avail), + OffsetRef = make_ref(), + Offset = StreamModule:offset(Stream0), + %% Reserve 21 bytes placeholder for code_relative_address_padded + I1 = list_to_binary(lists:duplicate(?CODE_RELATIVE_ADDRESS_PADDED_SIZE, 16#FF)), + Reloc = {OffsetRef, Offset, {adr, Temp, JumpTableStart}}, + %% Store continuation + I2 = jit_xtensa_asm:s32i(Temp, ?JITSTATE_REG, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + {State#state{stream = Stream1, branches = [Reloc | Branches], regs = Regs1}, OffsetRef}. + +%% @doc Implement a continuation entry point. +%% In windowed ABI, C calls the continuation via CALL8, so we need ENTRY +%% to establish a new register window frame. +-spec continuation_entry_point(#state{}) -> #state{}. +continuation_entry_point(#state{stream_module = StreamModule, stream = Stream0} = State) -> + I1 = jit_xtensa_asm:entry(a1, ?ENTRY_FRAME_SIZE), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + +get_module_index( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + used_regs = UsedRegs0, + regs = Regs0 + } = State +) -> + Reg = first_avail(Avail), + RegBit = reg_bit(Reg), + % Load module from jit_state (which is in a3) + I1 = jit_xtensa_asm:l32i(Reg, ?JITSTATE_REG, ?JITSTATE_MODULE_OFFSET), + I2 = jit_xtensa_asm:l32i(Reg, Reg, 0), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = jit_regs:set_contents(Regs0, Reg, module_index), + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot RegBit), + used_regs = UsedRegs0 bor RegBit, + regs = Regs1 + }, + Reg + }. + +and_( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, + {free, Reg}, + SrcReg +) when + is_atom(SrcReg) +-> + I = jit_xtensa_asm:and_(Reg, Reg, SrcReg), + Stream1 = StreamModule:append(Stream0, I), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + {State0#state{stream = Stream1, regs = Regs1}, Reg}; +and_( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, + {free, Reg}, + 16#FFFFFF +) -> + I1 = jit_xtensa_asm:slli(Reg, Reg, 8), + I2 = jit_xtensa_asm:srli(Reg, Reg, 8), + Stream1 = StreamModule:append(Stream0, <>), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + {State0#state{stream = Stream1, regs = Regs1}, Reg}; +% Xtensa has no andi instruction - small values fall through to general case +and_( + #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + {free, Reg}, + Val +) when + Val >= -2048 andalso Val =< 2047 andalso Avail =/= 0 +-> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), + {State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + {free, Reg}, + Val +) when Val < 0 andalso Val >= -256 andalso Avail =/= 0 -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), + {State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + {free, Reg}, + Val +) when Avail =/= 0 -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), + {State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}, Reg}; +and_( + #state{available_regs = 0} = State0, + {free, Reg}, + Val +) when Val < 0 andalso Val >= -256 -> + %% No available registers; use ?A8_REG (a8) directly as scratch. + %% a8 is not in AVAILABLE_REGS so it is the dedicated implicit scratch. + %% Avoid clobbering a0 (encoded return address required by RETW). + State1 = mov_immediate(State0, ?A8_REG, Val), + #state{stream_module = StreamModule, stream = Stream1} = State1, + I = jit_xtensa_asm:and_(Reg, Reg, ?A8_REG), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(State0#state.regs, Reg), + {State1#state{stream = Stream2, regs = Regs1}, Reg}; +and_( + #state{available_regs = 0, regs = Regs0} = State0, + {free, Reg}, + Val +) -> + %% No available registers; use ?A8_REG (a8) directly as scratch. + State1 = mov_immediate(State0, ?A8_REG, Val), + #state{stream_module = StreamModule, stream = Stream1} = State1, + I = jit_xtensa_asm:and_(Reg, Reg, ?A8_REG), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + {State1#state{stream = Stream2, regs = Regs1}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = Avail, used_regs = UR, regs = Regs0} = + State0, + Reg, + ?TERM_PRIMARY_CLEAR_MASK +) -> + ResultReg = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + ResultBit = reg_bit(ResultReg), + %% Xtensa has no andi - use movi+and_ for TERM_PRIMARY_CLEAR_MASK (-4) + I1 = jit_xtensa_asm:movi(ResultReg, -4), + I2 = jit_xtensa_asm:and_(ResultReg, Reg, ResultReg), + Stream1 = StreamModule:append(State0#state.stream, <>), + Regs1 = jit_regs:invalidate_reg(Regs0, ResultReg), + { + State0#state{ + stream = Stream1, + available_regs = Avail band (bnot ResultBit), + used_regs = UR bor ResultBit, + regs = Regs1 + }, + ResultReg + }. + +or_(State0, {free, Reg}, Val) -> + {or_(State0, Reg, Val), Reg}; +or_(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, Reg, SrcReg) when + is_atom(SrcReg) +-> + I = jit_xtensa_asm:or_(Reg, Reg, SrcReg), + Stream1 = StreamModule:append(Stream0, I), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + State0#state{stream = Stream1, regs = Regs1}; +% Xtensa has no ori instruction - small values use li+or_ like general case +or_( + #state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val +) when + Val >= -2048 andalso Val =< 2047 andalso Avail =/= 0 +-> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:or_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + %% Take the cache from State1 (mov_immediate already invalidated Temp), + %% then also invalidate Reg whose value just got rewritten. + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}; +or_( + #state{available_regs = 0} = State0, Reg, Val +) -> + %% No available registers; use ?A8_REG (a8) directly as scratch. + %% a8 is not in AVAILABLE_REGS so it is the dedicated implicit scratch. + %% Avoid clobbering a0 (encoded return address required by RETW). + State1 = mov_immediate(State0, ?A8_REG, Val), + #state{stream_module = StreamModule, stream = Stream1} = State1, + I = jit_xtensa_asm:or_(Reg, Reg, ?A8_REG), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{stream = Stream2, regs = Regs1}; +or_( + #state{stream_module = StreamModule, available_regs = Avail} = State0, + Reg, + Val +) -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:or_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + +xor_( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, Reg, SrcReg +) when + is_atom(SrcReg) +-> + I = jit_xtensa_asm:xor_(Reg, Reg, SrcReg), + Stream1 = StreamModule:append(Stream0, I), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + State0#state{stream = Stream1, regs = Regs1}; +% Xtensa has no xori instruction - small values use li+xor_ like general case +xor_( + #state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val +) when + Val >= -2048 andalso Val =< 2047 andalso Avail =/= 0 +-> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:xor_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + %% mov_immediate already invalidated Temp in State1#state.regs; also + %% invalidate Reg whose value just got rewritten. + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}; +xor_( + #state{available_regs = 0} = State0, Reg, Val +) -> + %% No available registers; use ?A8_REG (a8) directly as scratch. + %% Avoid clobbering a0 (encoded return address required by RETW). + State1 = mov_immediate(State0, ?A8_REG, Val), + #state{stream_module = StreamModule, stream = Stream1} = State1, + I = jit_xtensa_asm:xor_(Reg, Reg, ?A8_REG), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{stream = Stream2, regs = Regs1}; +xor_( + #state{stream_module = StreamModule, available_regs = Avail} = State0, + Reg, + Val +) -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:xor_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + +add(State0, {free, Reg}, Val) -> + {add(State0, Reg, Val), Reg}; +add(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, Reg, Val) when + Val >= -128 andalso Val =< 127 +-> + I = jit_xtensa_asm:addi(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + State0#state{stream = Stream1, regs = Regs1}; +add(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, Reg, Val) when + is_atom(Val) +-> + I = jit_xtensa_asm:add(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + State0#state{stream = Stream1, regs = Regs1}; +add(#state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val) -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:add(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + %% mov_immediate already invalidated Temp in State1#state.regs. + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + +%% mov_immediate/2: returns binary, for inline code generation. +%% Uses movi, movi+addmi, or movi+slli+addi sequences (no L32R). +mov_immediate(Reg, Val) when Val >= -2048, Val =< 2047 -> + jit_xtensa_asm:movi(Reg, Val); +mov_immediate(Reg, Val) -> + case split_movi_addmi(Val) of + {ok, Low, High} -> + << + (jit_xtensa_asm:movi(Reg, Low))/binary, + (jit_xtensa_asm:addmi(Reg, Reg, High))/binary + >>; + false -> + mov_immediate_large(Reg, Val band 16#FFFFFFFF) + end. + +%% Build arbitrary 32-bit value using MOVI + SLLI 8 + ADDI byte-by-byte sequence. +%% We solve for signed B3, B2, B1, B0 in [-128, 127] such that +%% ((B3*256 + B2)*256 + B1)*256 + B0 == Imm32 (mod 2^32). +%% Each ADDI sign-extends its 8-bit immediate, so we must absorb the borrow +%% caused by negative lower bytes into the byte directly above before encoding. +mov_immediate_large(Reg, Imm32) -> + {B0, R1} = split_signed_byte(Imm32), + {B1, R2} = split_signed_byte(R1), + {B2, R3} = split_signed_byte(R2), + B3 = sign_byte(R3), + mov_immediate_build(Reg, B3, B2, B1, B0). + +%% Split an integer V into {SignedByte, Higher} such that +%% V == SignedByte + Higher * 256, with SignedByte in [-128, 127]. +split_signed_byte(V) -> + Low = V band 16#FF, + Signed = + if + Low >= 128 -> Low - 256; + true -> Low + end, + Higher = (V - Signed) div 256, + {Signed, Higher}. + +sign_byte(V) -> + V1 = V band 16#FF, + if + V1 >= 128 -> V1 - 256; + true -> V1 + end. + +mov_immediate_build(Reg, 0, 0, B1, B0) -> + << + (jit_xtensa_asm:movi(Reg, B1))/binary, + (jit_xtensa_asm:slli(Reg, Reg, 8))/binary, + (jit_xtensa_asm:addi(Reg, Reg, B0))/binary + >>; +mov_immediate_build(Reg, 0, B2, B1, B0) -> + << + (jit_xtensa_asm:movi(Reg, B2))/binary, + (jit_xtensa_asm:slli(Reg, Reg, 8))/binary, + (jit_xtensa_asm:addi(Reg, Reg, B1))/binary, + (jit_xtensa_asm:slli(Reg, Reg, 8))/binary, + (jit_xtensa_asm:addi(Reg, Reg, B0))/binary + >>; +mov_immediate_build(Reg, B3, B2, B1, B0) -> + << + (jit_xtensa_asm:movi(Reg, B3))/binary, + (jit_xtensa_asm:slli(Reg, Reg, 8))/binary, + (jit_xtensa_asm:addi(Reg, Reg, B2))/binary, + (jit_xtensa_asm:slli(Reg, Reg, 8))/binary, + (jit_xtensa_asm:addi(Reg, Reg, B1))/binary, + (jit_xtensa_asm:slli(Reg, Reg, 8))/binary, + (jit_xtensa_asm:addi(Reg, Reg, B0))/binary + >>. + +%% mov_immediate/3: state-based, tries efficient sequences first then L32R. +mov_immediate( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Val +) when + Val >= -2048, Val =< 2047 +-> + %% Single movi instruction (3 bytes) + I = jit_xtensa_asm:movi(Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + State#state{stream = Stream1, regs = Regs1}; +mov_immediate(#state{regs = Regs0} = State0, Reg, Val) -> + State = + case split_movi_addmi(Val) of + {ok, Low, High} -> + %% movi + addmi (6 bytes) + mov_immediate_movi_addmi(State0, Reg, Low, High); + false -> + case find_movi_slli(Val band 16#FFFFFFFF) of + {ok, Core, Shift} -> + %% movi + slli (6 bytes) + mov_immediate_movi_slli(State0, Reg, Core, Shift); + false -> + case find_movi_slli_addi(Val band 16#FFFFFFFF) of + {ok, Core, Shift, Addend} -> + %% movi + slli + addi (9 bytes) + mov_immediate_movi_slli_addi(State0, Reg, Core, Shift, Addend); + false -> + case find_movi_slli_addmi(Val band 16#FFFFFFFF) of + {ok, Core, Shift, High} -> + %% movi + slli + addmi (9 bytes) + mov_immediate_movi_slli_addmi( + State0, Reg, Core, Shift, High + ); + false -> + %% Inline literal: j over value, then l32r (10-13 bytes) + mov_immediate_l32r(State0, Reg, Val) + end + end + end + end, + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + State#state{regs = Regs1}. + +mov_immediate_movi_addmi( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Low, High +) -> + I = <<(jit_xtensa_asm:movi(Reg, Low))/binary, (jit_xtensa_asm:addmi(Reg, Reg, High))/binary>>, + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +mov_immediate_movi_slli( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Core, Shift +) -> + I = <<(jit_xtensa_asm:movi(Reg, Core))/binary, (jit_xtensa_asm:slli(Reg, Reg, Shift))/binary>>, + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +mov_immediate_movi_slli_addi( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Core, Shift, Addend +) -> + I = << + (jit_xtensa_asm:movi(Reg, Core))/binary, + (jit_xtensa_asm:slli(Reg, Reg, Shift))/binary, + (jit_xtensa_asm:addi(Reg, Reg, Addend))/binary + >>, + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +mov_immediate_movi_slli_addmi( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Core, Shift, High +) -> + I = << + (jit_xtensa_asm:movi(Reg, Core))/binary, + (jit_xtensa_asm:slli(Reg, Reg, Shift))/binary, + (jit_xtensa_asm:addmi(Reg, Reg, High))/binary + >>, + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +mov_immediate_l32r(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> + O = StreamModule:offset(Stream0), + Padding = (4 - ((O + 3) rem 4)) rem 4, + %% j(N) targets PC+4+N; skip over Padding+literal(4) to land on l32r + SkipDist = Padding + 3, + JInstr = jit_xtensa_asm:j(SkipDist), + PadBytes = <<16#FF:(Padding * 8)>>, + ValBytes = <<(Val band 16#FFFFFFFF):32/little>>, + %% L32R with imm16=-1 loads from 4 bytes before the aligned PC + L32rInstr = jit_xtensa_asm:l32r(Reg, -1), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +%% Split Val into Low (for movi, -2048..2047) and High (for addmi, multiple of 256, +%% with High/256 in -128..127) +split_movi_addmi(Val) -> + Low0 = Val band 16#FF, + LowSigned0 = + if + Low0 >= 128 -> Low0 - 256; + true -> Low0 + end, + High0 = Val - LowSigned0, + HighByte0 = High0 bsr 8, + {LowSigned, High} = + if + HighByte0 >= -128, HighByte0 =< 127 -> + {LowSigned0, High0}; + HighByte0 > 127 -> + {LowSigned0 + 256, High0 - 256}; + true -> + {LowSigned0 - 256, High0 + 256} + end, + HighByte = High bsr 8, + if + LowSigned >= -2048, + LowSigned =< 2047, + HighByte >= -128, + HighByte =< 127, + High rem 256 =:= 0 -> + {ok, LowSigned, High}; + true -> + false + end. + +%% Find Core and Shift such that (Core << Shift) =:= Val (unsigned 32-bit) +%% where Core fits movi (-2048..2047) and Shift is 1..31 +find_movi_slli(Val) when Val =:= 0 -> false; +find_movi_slli(Val) -> + Tz = count_trailing_zeros(Val), + if + Tz >= 1 -> + Core = sign_extend_from_width(Val bsr Tz, 32 - Tz), + if + Core >= -2048, Core =< 2047 -> + {ok, Core, Tz}; + true -> + false + end; + true -> + false + end. + +%% Find Core, Shift, Addend such that (Core << Shift) + Addend =:= Val +%% where Core fits movi, Shift is 1..31, Addend fits addi (-128..127) +find_movi_slli_addi(Val) -> + find_movi_slli_addi(Val, -128). + +find_movi_slli_addi(_Val, Addend) when Addend > 127 -> false; +find_movi_slli_addi(Val, Addend) -> + Shifted = (Val - Addend) band 16#FFFFFFFF, + case find_movi_slli(Shifted) of + {ok, Core, Shift} -> {ok, Core, Shift, Addend}; + false -> find_movi_slli_addi(Val, Addend + 1) + end. + +%% Find Core, Shift, High such that (Core << Shift) + High =:= Val +%% where Core fits movi, Shift is 1..31, High fits addmi (multiple of 256, High/256 in -128..127) +find_movi_slli_addmi(Val) -> + find_movi_slli_addmi(Val, -128). + +find_movi_slli_addmi(_Val, HighByte) when HighByte > 127 -> false; +find_movi_slli_addmi(Val, HighByte) -> + High = HighByte * 256, + Shifted = (Val - High) band 16#FFFFFFFF, + case find_movi_slli(Shifted) of + {ok, Core, Shift} -> {ok, Core, Shift, High}; + false -> find_movi_slli_addmi(Val, HighByte + 1) + end. + +count_trailing_zeros(0) -> 32; +count_trailing_zeros(N) -> count_trailing_zeros(N, 0). +count_trailing_zeros(N, Count) when N band 1 =:= 1 -> Count; +count_trailing_zeros(N, Count) -> count_trailing_zeros(N bsr 1, Count + 1). + +%% Sign-extend a value from Width bits to full Erlang integer +sign_extend_from_width(Val, Width) -> + Mask = 1 bsl (Width - 1), + if + Val band Mask =/= 0 -> Val - (1 bsl Width); + true -> Val + end. + +sub(State, {free, Reg}, Val) -> + {sub(State, Reg, Val), Reg}; +sub(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Val) when + Val >= 0 andalso Val =< 127 +-> + I1 = jit_xtensa_asm:addi(Reg, Reg, -Val), + Stream1 = StreamModule:append(Stream0, I1), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + State#state{stream = Stream1, regs = Regs1}; +sub(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Val) when + is_atom(Val) +-> + I = jit_xtensa_asm:sub(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + State#state{stream = Stream1, regs = Regs1}; +sub(#state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val) -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:sub(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + %% mov_immediate already invalidated Temp in State1#state.regs. + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + +mul(State, _Reg, 1) -> + State; +mul(State, Reg, 2) -> + shift_left(State, Reg, 1); +mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 3) -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + I1 = jit_xtensa_asm:slli(Temp, Reg, 1), + I2 = jit_xtensa_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp), Reg), + State#state{stream = Stream1, regs = Regs1}; +mul(State, Reg, 4) -> + shift_left(State, Reg, 2); +mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 5) -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + I1 = jit_xtensa_asm:slli(Temp, Reg, 2), + I2 = jit_xtensa_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp), Reg), + State#state{stream = Stream1, regs = Regs1}; +mul(State0, Reg, 6) -> + State1 = mul(State0, Reg, 3), + mul(State1, Reg, 2); +mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 7) -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + I1 = jit_xtensa_asm:slli(Temp, Reg, 3), + I2 = jit_xtensa_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp), Reg), + State#state{stream = Stream1, regs = Regs1}; +mul(State, Reg, 8) -> + shift_left(State, Reg, 3); +mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 9) -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + I1 = jit_xtensa_asm:slli(Temp, Reg, 3), + I2 = jit_xtensa_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp), Reg), + State#state{stream = Stream1, regs = Regs1}; +mul(State0, Reg, 10) -> + State1 = mul(State0, Reg, 5), + mul(State1, Reg, 2); +mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 15) -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + I1 = jit_xtensa_asm:slli(Temp, Reg, 4), + I2 = jit_xtensa_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp), Reg), + State#state{stream = Stream1, regs = Regs1}; +mul(State, Reg, 16) -> + shift_left(State, Reg, 4); +mul(State, Reg, 32) -> + shift_left(State, Reg, 5); +mul(State, Reg, 64) -> + shift_left(State, Reg, 6); +mul( + #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + Reg, + Val +) when is_integer(Val) -> + Temp = + case Avail of + 0 -> ?A8_REG; + _ -> first_avail(Avail) + end, + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:mull(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp), Reg), + State1#state{ + stream = Stream2, + available_regs = State1#state.available_regs bor reg_bit(Temp), + regs = Regs1 + }; +mul( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DestReg, SrcReg +) when is_atom(SrcReg) -> + I = jit_xtensa_asm:mull(DestReg, DestReg, SrcReg), + Stream1 = StreamModule:append(Stream0, I), + Regs1 = jit_regs:invalidate_reg(Regs0, DestReg), + State#state{stream = Stream1, regs = Regs1}. + +-spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). +decrement_reductions_and_maybe_schedule_next( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + jump_table_start = JumpTableStart, + regs = Regs0 + } = State0 +) -> + Temp = first_avail(Avail), + I1 = jit_xtensa_asm:l32i(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + I2 = jit_xtensa_asm:addi(Temp, Temp, -1), + I3 = jit_xtensa_asm:s32i(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + BNEOffset = StreamModule:offset(Stream1), + %% Placeholder for bnez (3 bytes) patched below once the continuation offset is known. + I4 = <<16#FF, 16#FF, 16#FF>>, + _ADROffset = BNEOffset + byte_size(I4), + I5 = list_to_binary(lists:duplicate(?CODE_RELATIVE_ADDRESS_PADDED_SIZE, 16#FF)), + I6 = jit_xtensa_asm:s32i(Temp, ?JITSTATE_REG, ?JITSTATE_CONTINUATION_OFFSET), + Stream2 = StreamModule:append(Stream1, <>), + Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + State1 = State0#state{stream = Stream2, regs = Regs1}, + State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]), + #state{stream = Stream3} = State2, + %% Emit ENTRY at the continuation point so C can call it via CALL8. + %% The continuation offset stored in jit_state points here. + EntryInstr = jit_xtensa_asm:entry(a1, ?ENTRY_FRAME_SIZE), + NewOffset = StreamModule:offset(Stream3), + Stream3b = StreamModule:append(Stream3, EntryInstr), + %% The BNEZ from within JIT code must skip past the ENTRY instruction + %% (we are already in a windowed frame, so hitting ENTRY again is wrong). + NewI4 = jit_xtensa_asm:bnez(Temp, (NewOffset + 3) - BNEOffset - 4), + %% Generate code_relative_address padded to 21 bytes + %% The continuation address is NewOffset (stream offset) minus header + NewI5 = code_relative_address_padded(Temp, NewOffset - JumpTableStart), + Stream4 = StreamModule:replace( + Stream3b, BNEOffset, <> + ), + StreamN = Stream4, + State3 = merge_used_regs(State2#state{stream = StreamN}, State1#state.used_regs), + %% schedule_next clobbers caller-saved regs; invalidate cache at continuation. + State3#state{regs = jit_regs:invalidate_all(State1#state.regs)}. + +-spec call_or_schedule_next(state(), non_neg_integer()) -> state(). +call_or_schedule_next(State0, Label) -> + {State1, RewriteOffset, TempReg} = set_cp(State0), + State2 = call_only_or_schedule_next(State1, Label), + rewrite_cp_offset(State2, RewriteOffset, TempReg). + +call_only_or_schedule_next( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = Avail, + regs = Regs0 + } = State0a, + Label +) -> + Temp = first_avail(Avail), + Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + State0 = State0a#state{regs = Regs1}, + I1 = jit_xtensa_asm:l32i(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + I2 = jit_xtensa_asm:addi(Temp, Temp, -1), + I3 = jit_xtensa_asm:s32i(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + LabelLookupResult = lists:keyfind(Label, 1, State0#state.labels), + + BccOffset = StreamModule:offset(Stream1), + + State4 = + case LabelLookupResult of + {Label, LabelOffset} -> + Rel = LabelOffset - BccOffset, + %% BNEZ (BRI12 format): 12-bit signed offset relative to PC+4. + %% Account for the -4 adjustment: Rel - 4 must fit in [-2048, 2047]. + BnezOffset = Rel - 4, + if + BnezOffset >= -2048 andalso BnezOffset =< 2047 -> + I4 = jit_xtensa_asm:bnez(Temp, BnezOffset), + Stream2 = StreamModule:append(Stream1, I4), + State0#state{stream = Stream2}; + true -> + FarSeqOffset = BccOffset + 3, + {State1, FarCodeBlock} = branch_to_label_code( + State0, FarSeqOffset, Label, LabelLookupResult + ), + FarSeqSize = byte_size(FarCodeBlock), + I4 = jit_xtensa_asm:beqz(Temp, FarSeqSize - 1), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end; + false -> + FarSeqOffset = BccOffset + 3, + {State1, FarCodeBlock} = branch_to_label_code(State0, FarSeqOffset, Label, false), + FarSeqSize = byte_size(FarCodeBlock), + I4 = jit_xtensa_asm:beqz(Temp, FarSeqSize - 1), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end, + State5 = set_continuation_to_label(State4, Label), + call_primitive_last(State5, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). + +call_primitive_with_cp(State0, Primitive, Args) -> + {State1, RewriteOffset, TempReg} = set_cp(State0), + State2 = call_primitive_last(State1, Primitive, Args), + rewrite_cp_offset(State2, RewriteOffset, TempReg). + +-spec set_cp(state()) -> {state(), non_neg_integer(), xtensa_register()}. +set_cp(#state{available_regs = Avail, used_regs = UsedRegs} = State0) -> + TempReg = first_avail(Avail), + TempBit = reg_bit(TempReg), + %% Reserve TempReg for the offset BEFORE get_module_index consumes available registers. + State1 = State0#state{ + available_regs = Avail band (bnot TempBit), used_regs = UsedRegs bor TempBit + }, + {State2, Reg} = get_module_index(State1), + #state{stream_module = StreamModule, stream = Stream0} = State2, + + Offset = StreamModule:offset(Stream0), + I1 = jit_xtensa_asm:slli(Reg, Reg, 24), + %% Reserve 15 bytes for offset load (li generates 3..21 bytes, patched by rewrite_cp_offset). + I2 = + <<16#FF, 16#FF, 16#FF, 16#FF, 16#FF, 16#FF, 16#FF, 16#FF, 16#FF, 16#FF, 16#FF, 16#FF, 16#FF, + 16#FF, 16#FF>>, + MOVOffset = Offset + byte_size(I1), + I4 = jit_xtensa_asm:or_(Reg, Reg, TempReg), + {BaseReg, Off} = ?CP, + I5 = jit_xtensa_asm:s32i(Reg, BaseReg, Off), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State3 = State2#state{stream = Stream1}, + State4 = free_native_register(State3, Reg), + State5 = free_native_register(State4, TempReg), + {State5, MOVOffset, TempReg}. + +-spec rewrite_cp_offset(state(), non_neg_integer(), xtensa_register()) -> state(). +rewrite_cp_offset( + #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0, + RewriteOffset, + TempReg +) -> + NewOffset = StreamModule:offset(Stream0) - CodeOffset, + CPValue = NewOffset bsl 2, + NewMoveInstr = mov_immediate(TempReg, CPValue), + %% We reserved 15 bytes for the li instruction + %% li generates 3 bytes (movi for small) or 6 bytes (movi+addmi) or more for large + %% Pad with nops to fill 15 bytes + NopCount = (15 - byte_size(NewMoveInstr)) div 3, + Nops = list_to_binary([jit_xtensa_asm:nop() || _ <- lists:seq(1, NopCount)]), + PaddedInstr = <>, + 15 = byte_size(PaddedInstr), + Stream1 = StreamModule:replace(Stream0, RewriteOffset, PaddedInstr), + %% Emit ENTRY at the continuation point so that jump_to_continuation can + %% skip past it with +3. This ENTRY is also needed when C code calls the + %% continuation via CALL8 (through jit_return which converts ctx->cp to + %% jit_state.continuation). + EntryInstr = jit_xtensa_asm:entry(a1, ?ENTRY_FRAME_SIZE), + Stream2 = StreamModule:append(Stream1, EntryInstr), + State0#state{stream = Stream2}. + +set_bs( + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, + TermReg +) -> + Temp = first_avail(Avail), + {BaseReg1, Off1} = ?BS, + I1 = jit_xtensa_asm:s32i(TermReg, BaseReg1, Off1), + I2 = mov_immediate(Temp, 0), + {BaseReg2, Off2} = ?BS_OFFSET, + I3 = jit_xtensa_asm:s32i(Temp, BaseReg2, Off2), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param SortedLines line information, sorted by offset +%% @doc Build labels and line tables and encode a function that returns it. +%% In this case, the function returns the effective address of what immediately +%% follows. +%% @end +%% @return New state +%%----------------------------------------------------------------------------- +return_labels_and_lines( + #state{ + stream_module = StreamModule, + stream = Stream0, + labels = Labels, + jump_table_start = JumpTableStart + } = State, + SortedLines +) -> + SortedLabels = lists:keysort(2, [ + {Label, LabelOffset} + || {Label, LabelOffset} <- Labels, is_integer(Label) + ]), + + I2 = jit_xtensa_asm:retw(), + %% Xtensa: Return address of data that follows this prologue. + %% code_relative_address loads code_base+offset into a2 (return register). + %% We need the offset of the data after this prologue. + %% Prologue = code_relative_address(21 bytes padded) + retw(3 bytes) = 24 bytes + %% So the data starts at current_offset + 24 + %% Subtract header offset since code_base points past the header + PrologueSize = ?CODE_RELATIVE_ADDRESS_PADDED_SIZE + 3, + DataOffset = StreamModule:offset(Stream0) + PrologueSize - JumpTableStart, + I1 = code_relative_address_padded(a2, DataOffset), + Prologue = <>, + PrologueSize = byte_size(Prologue), + LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, + LinesTable = <<<> || {Line, Offset} <- SortedLines>>, + Stream1 = StreamModule:append( + Stream0, + <> + ), + State#state{stream = Stream1}. + +%% @doc Generate code to load the absolute address of a code offset into Rd. +%% Loads code_base from JITState, then adds the absolute offset. +%% Returns a variable-length sequence to load code_base + offset into Rd. +-spec code_relative_address(xtensa_register(), non_neg_integer()) -> binary(). +code_relative_address(Rd, CodeRelativeOffset) -> + %% Load code_base from JITState (stored at offset 0xC) + %% code_base points to the start of the native code (past the chunk header). + %% CodeRelativeOffset must be relative to code_base, NOT a raw stream offset. + I1 = jit_xtensa_asm:l32i(Rd, ?JITSTATE_REG, ?JITSTATE_CODE_BASE_OFFSET), + I2 = add_immediate_binary(Rd, Rd, CodeRelativeOffset), + <>. + +%% @doc Generate code_relative_address padded to exactly 21 bytes. +%% Loads code_base from JITState, then adds the offset using mov_immediate +%% into A8_REG as scratch, then ADD to combine. +%% Layout: l32i(3) + mov_immediate(padded to 15) + add(3) = 21 bytes. +%% Handles offsets up to 16MB (3-byte mov_immediate_large). +-spec code_relative_address_padded(xtensa_register(), non_neg_integer()) -> binary(). +code_relative_address_padded(Rd, AbsOffset) when AbsOffset >= 0, AbsOffset =< 16#FFFFFF -> + %% l32i Rd, JITSTATE_REG, CODE_BASE_OFFSET (3 bytes) - load code base + I1 = jit_xtensa_asm:l32i(Rd, ?JITSTATE_REG, ?JITSTATE_CODE_BASE_OFFSET), + %% mov_immediate into A8_REG, padded to 15 bytes with NOPs + %% mov_immediate produces 3, 6, 9, or 15 bytes for offsets up to 16MB + MovInstr = mov_immediate(?A8_REG, AbsOffset), + MovSize = byte_size(MovInstr), + NopCount = (15 - MovSize) div 3, + Nops = list_to_binary([jit_xtensa_asm:nop() || _ <- lists:seq(1, NopCount)]), + PaddedMov = <>, + 15 = byte_size(PaddedMov), + %% add Rd, Rd, A8_REG (3 bytes) - combine code_base + offset + I3 = jit_xtensa_asm:add(Rd, Rd, ?A8_REG), + Result = <>, + 21 = byte_size(Result), + Result. + +%% @doc Generate an add-immediate instruction sequence. +%% For small values (+-128): addi (3 bytes) +%% For larger values: li + add (6 bytes) +-spec add_immediate_binary(xtensa_register(), xtensa_register(), integer()) -> binary(). +add_immediate_binary(Rd, Rs, 0) -> + case Rd of + Rs -> <<>>; + _ -> jit_xtensa_asm:mov(Rd, Rs) + end; +add_immediate_binary(Rd, Rs, Imm) when Imm >= -128, Imm =< 127 -> + jit_xtensa_asm:addi(Rd, Rs, Imm); +add_immediate_binary(Rd, Rs, Imm) -> + %% For larger immediates, use li into Rd then add + %% This works because we can use Rd as temp since we're computing Rd = Rs + Imm + case Rd of + Rs -> + %% Can't use Rd as temp since Rd == Rs, use addmi+addi combo + %% addmi handles multiples of 256 in range -32768..32512 + %% Then addi handles the remainder in range -128..127 + HiPart = (Imm div 256) * 256, + LoPart = Imm - HiPart, + if + HiPart >= -32768, HiPart =< 32512, LoPart >= -128, LoPart =< 127 -> + I1 = jit_xtensa_asm:addmi(Rd, Rs, HiPart), + case LoPart of + 0 -> + I1; + _ -> + I2 = jit_xtensa_asm:addi(Rd, Rd, LoPart), + <> + end; + true -> + %% Very large offset - must use a different temp + %% Use A8_REG as emergency temp + I1 = mov_immediate(?A8_REG, Imm), + I2 = jit_xtensa_asm:add(Rd, Rs, ?A8_REG), + <> + end; + _ -> + I1 = mov_immediate(Rd, Imm), + I2 = jit_xtensa_asm:add(Rd, Rs, Rd), + <> + end. + +%% Helper function to generate str instruction with y_reg offset, handling large offsets +str_y_reg(SrcReg, Y, TempReg, _AvailableMask) when Y * 4 =< 1020 -> + % Small offset - use immediate addressing + {BaseReg, Off} = ?Y_REGS, + I1 = jit_xtensa_asm:l32i(TempReg, BaseReg, Off), + I2 = jit_xtensa_asm:s32i(SrcReg, TempReg, Y * 4), + <>; +str_y_reg(SrcReg, Y, TempReg1, AvailableMask) when AvailableMask =/= 0 -> + % Large offset - use register arithmetic with second available register + TempReg2 = first_avail(AvailableMask), + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_xtensa_asm:l32i(TempReg1, BaseReg, Off), + I2 = mov_immediate(TempReg2, Offset), + I3 = jit_xtensa_asm:add(TempReg2, TempReg2, TempReg1), + I4 = jit_xtensa_asm:s32i(SrcReg, TempReg2, 0), + <>; +str_y_reg(SrcReg, Y, TempReg1, 0) -> + % Large offset - no additional registers available, use A8_REG as second temp + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_xtensa_asm:l32i(TempReg1, BaseReg, Off), + I2 = jit_xtensa_asm:mov(?A8_REG, TempReg1), + I3 = mov_immediate(TempReg1, Offset), + I4 = jit_xtensa_asm:add(TempReg1, TempReg1, ?A8_REG), + I5 = jit_xtensa_asm:s32i(SrcReg, TempReg1, 0), + <>. + +%% Helper function to generate ldr instruction with y_reg offset, handling large offsets +ldr_y_reg(DstReg, Y, AvailableMask) when AvailableMask =/= 0 andalso Y * 4 =< 1020 -> + % Small offset - use immediate addressing + TempReg = first_avail(AvailableMask), + {BaseReg, Off} = ?Y_REGS, + I1 = jit_xtensa_asm:l32i(TempReg, BaseReg, Off), + I2 = jit_xtensa_asm:l32i(DstReg, TempReg, Y * 4), + <>; +ldr_y_reg(DstReg, Y, AvailableMask) when AvailableMask =/= 0 -> + % Large offset - use DstReg as second temp register for arithmetic + TempReg = first_avail(AvailableMask), + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_xtensa_asm:l32i(TempReg, BaseReg, Off), + I2 = mov_immediate(DstReg, Offset), + I3 = jit_xtensa_asm:add(DstReg, DstReg, TempReg), + I4 = jit_xtensa_asm:l32i(DstReg, DstReg, 0), + <>; +ldr_y_reg(DstReg, Y, 0) when Y * 4 =< 1020 -> + % Small offset, no registers available - use DstReg as temp + {BaseReg, Off} = ?Y_REGS, + I1 = jit_xtensa_asm:l32i(DstReg, BaseReg, Off), + I2 = jit_xtensa_asm:l32i(DstReg, DstReg, Y * 4), + <>; +ldr_y_reg(DstReg, Y, 0) -> + % Large offset, no registers available - use A8_REG as temp register + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_xtensa_asm:l32i(DstReg, BaseReg, Off), + I2 = jit_xtensa_asm:mov(?A8_REG, DstReg), + I3 = mov_immediate(DstReg, Offset), + I4 = jit_xtensa_asm:add(DstReg, DstReg, ?A8_REG), + I5 = jit_xtensa_asm:l32i(DstReg, DstReg, 0), + <>. + +reg_bit(a0) -> ?REG_BIT_A0; +reg_bit(a1) -> ?REG_BIT_A1; +reg_bit(a2) -> ?REG_BIT_A2; +reg_bit(a3) -> ?REG_BIT_A3; +reg_bit(a4) -> ?REG_BIT_A4; +reg_bit(a5) -> ?REG_BIT_A5; +reg_bit(a6) -> ?REG_BIT_A6; +reg_bit(a7) -> ?REG_BIT_A7; +reg_bit(a8) -> ?REG_BIT_A8; +reg_bit(a9) -> ?REG_BIT_A9; +reg_bit(a10) -> ?REG_BIT_A10; +reg_bit(a11) -> ?REG_BIT_A11; +reg_bit(a12) -> ?REG_BIT_A12; +reg_bit(a13) -> ?REG_BIT_A13; +reg_bit(a14) -> ?REG_BIT_A14; +reg_bit(a15) -> ?REG_BIT_A15. + +%% High registers (a15-a9) are clobbered by CALLX8 so prefer them to minimize saves. +first_avail(Mask) when Mask band ?REG_BIT_A15 =/= 0 -> a15; +first_avail(Mask) when Mask band ?REG_BIT_A14 =/= 0 -> a14; +first_avail(Mask) when Mask band ?REG_BIT_A13 =/= 0 -> a13; +first_avail(Mask) when Mask band ?REG_BIT_A12 =/= 0 -> a12; +first_avail(Mask) when Mask band ?REG_BIT_A11 =/= 0 -> a11; +first_avail(Mask) when Mask band ?REG_BIT_A10 =/= 0 -> a10; +first_avail(Mask) when Mask band ?REG_BIT_A9 =/= 0 -> a9; +first_avail(Mask) when Mask band ?REG_BIT_A8 =/= 0 -> a8; +first_avail(Mask) when Mask band ?REG_BIT_A7 =/= 0 -> a7; +first_avail(Mask) when Mask band ?REG_BIT_A6 =/= 0 -> a6; +first_avail(Mask) when Mask band ?REG_BIT_A5 =/= 0 -> a5. + +mask_to_list(0) -> []; +mask_to_list(Mask) -> mask_to_list_a15(Mask). + +mask_to_list_a15(Mask) when Mask band ?REG_BIT_A15 =/= 0 -> [a15 | mask_to_list_a14(Mask)]; +mask_to_list_a15(Mask) -> mask_to_list_a14(Mask). +mask_to_list_a14(Mask) when Mask band ?REG_BIT_A14 =/= 0 -> [a14 | mask_to_list_a13(Mask)]; +mask_to_list_a14(Mask) -> mask_to_list_a13(Mask). +mask_to_list_a13(Mask) when Mask band ?REG_BIT_A13 =/= 0 -> [a13 | mask_to_list_a12(Mask)]; +mask_to_list_a13(Mask) -> mask_to_list_a12(Mask). +mask_to_list_a12(Mask) when Mask band ?REG_BIT_A12 =/= 0 -> [a12 | mask_to_list_a11(Mask)]; +mask_to_list_a12(Mask) -> mask_to_list_a11(Mask). +mask_to_list_a11(Mask) when Mask band ?REG_BIT_A11 =/= 0 -> [a11 | mask_to_list_a10(Mask)]; +mask_to_list_a11(Mask) -> mask_to_list_a10(Mask). +mask_to_list_a10(Mask) when Mask band ?REG_BIT_A10 =/= 0 -> [a10 | mask_to_list_a9(Mask)]; +mask_to_list_a10(Mask) -> mask_to_list_a9(Mask). +mask_to_list_a9(Mask) when Mask band ?REG_BIT_A9 =/= 0 -> [a9 | mask_to_list_a8(Mask)]; +mask_to_list_a9(Mask) -> mask_to_list_a8(Mask). +mask_to_list_a8(Mask) when Mask band ?REG_BIT_A8 =/= 0 -> [a8 | mask_to_list_a7(Mask)]; +mask_to_list_a8(Mask) -> mask_to_list_a7(Mask). +mask_to_list_a7(Mask) when Mask band ?REG_BIT_A7 =/= 0 -> [a7 | mask_to_list_a6(Mask)]; +mask_to_list_a7(Mask) -> mask_to_list_a6(Mask). +mask_to_list_a6(Mask) when Mask band ?REG_BIT_A6 =/= 0 -> [a6 | mask_to_list_a5(Mask)]; +mask_to_list_a6(Mask) -> mask_to_list_a5(Mask). +mask_to_list_a5(Mask) when Mask band ?REG_BIT_A5 =/= 0 -> [a5 | mask_to_list_a4(Mask)]; +mask_to_list_a5(Mask) -> mask_to_list_a4(Mask). +mask_to_list_a4(Mask) when Mask band ?REG_BIT_A4 =/= 0 -> [a4 | mask_to_list_a3(Mask)]; +mask_to_list_a4(Mask) -> mask_to_list_a3(Mask). +mask_to_list_a3(Mask) when Mask band ?REG_BIT_A3 =/= 0 -> [a3 | mask_to_list_a2(Mask)]; +mask_to_list_a3(Mask) -> mask_to_list_a2(Mask). +mask_to_list_a2(Mask) when Mask band ?REG_BIT_A2 =/= 0 -> [a2 | mask_to_list_a1(Mask)]; +mask_to_list_a2(Mask) -> mask_to_list_a1(Mask). +mask_to_list_a1(Mask) when Mask band ?REG_BIT_A1 =/= 0 -> [a1 | mask_to_list_a0(Mask)]; +mask_to_list_a1(Mask) -> mask_to_list_a0(Mask). +mask_to_list_a0(Mask) when Mask band ?REG_BIT_A0 =/= 0 -> [a0]; +mask_to_list_a0(_Mask) -> []. + +args_regs(Args) -> + lists:map( + fun + ({free, {ptr, Reg}}) -> Reg; + ({free, Reg}) when is_atom(Reg) -> Reg; + ({free, Imm}) when is_integer(Imm) -> imm; + (offset) -> imm; + (ctx) -> ?CTX_REG; + (jit_state) -> jit_state; + (jit_state_tail_call) -> jit_state; + (stack) -> stack; + (Reg) when is_atom(Reg) -> Reg; + (Imm) when is_integer(Imm) -> imm; + ({ptr, Reg}) -> Reg; + ({x_reg, _}) -> ?CTX_REG; + ({y_reg, _}) -> ?CTX_REG; + ({fp_reg, _}) -> ?CTX_REG; + ({free, {x_reg, _}}) -> ?CTX_REG; + ({free, {y_reg, _}}) -> ?CTX_REG; + ({free, {fp_reg, _}}) -> ?CTX_REG; + ({avm_int64_t, _}) -> imm + end, + Args + ). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at the current offset. +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference()) -> state(). +add_label(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, Label) -> + Offset0 = StreamModule:offset(Stream0), + Regs1 = jit_regs:invalidate_all(Regs0), + add_label(State0#state{regs = Regs1}, Label, Offset0). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at a specific offset +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @param Offset the explicit offset for this label +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + jump_table_start = JumpTableStart, + branches = Branches, + labels = Labels + } = State, + Label, + LabelOffset +) when is_integer(Label) -> + %% Patch the jump table entry literal + %% Each entry is: literal(4) + ENTRY(3) + L32I(3) + L32R(3) + ADD(3) + JX(3) + pad(1) + %% The 4-byte literal at the start of the entry stores the code-relative target offset. + JumpTableEntryOffset = JumpTableStart + Label * ?JUMP_TABLE_ENTRY_SIZE, + LiteralOffset = JumpTableEntryOffset, + + %% Code-relative offset: from code_base (= native_code start = JumpTableStart) to label + CodeRelativeTarget = LabelOffset - JumpTableStart, + LiteralData = <>, + + Stream1 = StreamModule:replace(Stream0, LiteralOffset, LiteralData), + + % Eagerly patch any branches targeting this label + {Stream2, RemainingBranches} = patch_branches_for_label( + StreamModule, + Stream1, + Label, + LabelOffset, + Branches + ), + + State#state{ + stream = Stream2, + branches = RemainingBranches, + labels = [{Label, LabelOffset} | Labels], + regs = jit_regs:invalidate_all(State#state.regs) + }; +add_label(#state{labels = Labels, regs = Regs0} = State, Label, Offset) -> + State#state{labels = [{Label, Offset} | Labels], regs = jit_regs:invalidate_all(Regs0)}. + +%% @doc Get the register tracking state. +get_regs_tracking(#state{regs = Regs}) -> Regs. + +value_to_contents(Value) -> + jit_regs:value_to_contents(Value, ?MAX_REG). + +vm_dest_to_contents(Dest) -> + jit_regs:vm_dest_to_contents(Dest, ?MAX_REG). + +-ifdef(JIT_DWARF). +%%----------------------------------------------------------------------------- +%% @doc Return the DWARF register number for the ctx parameter +%% @returns The DWARF register number where ctx is passed (a2 in Xtensa) +%% @end +%%----------------------------------------------------------------------------- +-spec dwarf_ctx_register() -> non_neg_integer(). +dwarf_ctx_register() -> + ?DWARF_A2_REG_XTENSA. + +-spec dwarf_register_number(xtensa_register()) -> non_neg_integer(). +dwarf_register_number(Reg) -> jit_xtensa_asm:reg_to_num(Reg). +-endif. diff --git a/src/libAtomVM/defaultatoms.def b/src/libAtomVM/defaultatoms.def index 89a2cd1de9..6dc95175d9 100644 --- a/src/libAtomVM/defaultatoms.def +++ b/src/libAtomVM/defaultatoms.def @@ -221,3 +221,4 @@ X(JIT_ARMV6M_ATOM, "\xA", "jit_armv6m") X(JIT_RISCV32_ATOM, "\xB", "jit_riscv32") X(JIT_RISCV64_ATOM, "\xB", "jit_riscv64") X(JIT_WASM32_ATOM, "\xA", "jit_wasm32") +X(JIT_XTENSA_ATOM, "\xA", "jit_xtensa") diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 2dab1587bf..2cd85ef86c 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -158,7 +158,7 @@ _Static_assert(offsetof(Context, bs_offset) == 0xD0, "ctx->bs_offset is 0xD0 in _Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); _Static_assert(offsetof(JITState, continuation) == 0x8, "jit_state->continuation is 0x8 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); _Static_assert(offsetof(JITState, remaining_reductions) == 0x10, "jit_state->remaining_reductions is 0x10 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); -#elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M || JIT_ARCH_TARGET == JIT_ARCH_ARM32 || JIT_ARCH_TARGET == JIT_ARCH_RISCV32 || JIT_ARCH_TARGET == JIT_ARCH_WASM32 +#elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M || JIT_ARCH_TARGET == JIT_ARCH_ARM32 || JIT_ARCH_TARGET == JIT_ARCH_RISCV32 || JIT_ARCH_TARGET == JIT_ARCH_WASM32 || JIT_ARCH_TARGET == JIT_ARCH_XTENSA _Static_assert(offsetof(Context, e) == 0x14, "ctx->e is 0x14 in 32-bit backends"); _Static_assert(offsetof(Context, x) == 0x18, "ctx->x is 0x18 in 32-bit backends"); _Static_assert(offsetof(Context, cp) == 0x5C, "ctx->cp is 0x5C in 32-bit backends"); @@ -169,6 +169,9 @@ _Static_assert(offsetof(Context, bs_offset) == 0x68, "ctx->bs_offset is 0x68 in _Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in 32-bit backends"); _Static_assert(offsetof(JITState, continuation) == 0x4, "jit_state->continuation is 0x4 in 32-bit backends"); _Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->remaining_reductions is 0x8 in 32-bit backends"); +#if JIT_ARCH_TARGET == JIT_ARCH_XTENSA +_Static_assert(offsetof(JITState, code_base) == 0xC, "jit_state->code_base is 0xC in jit/src/jit_xtensa.erl"); +#endif #else #error Unknown jit target @@ -2127,9 +2130,11 @@ void jit_debug_register_code(Module *mod, const void *native_code, size_t native return; } - // Compute ELF size from its headers - const Elf_Ehdr *ehdr = (const Elf_Ehdr *) elf_start; - size_t elf_size = ehdr->e_shoff + (size_t) ehdr->e_shnum * ehdr->e_shentsize; + // Compute ELF size from its headers using memcpy to avoid unaligned access + // (elf_start may not be properly aligned for struct access on Xtensa) + Elf_Ehdr ehdr_copy; + memcpy(&ehdr_copy, elf_start, sizeof(Elf_Ehdr)); + size_t elf_size = ehdr_copy.e_shoff + (size_t) ehdr_copy.e_shnum * ehdr_copy.e_shentsize; if (elf_size > remaining) { return; } @@ -2152,7 +2157,8 @@ void jit_debug_register_code(Module *mod, const void *native_code, size_t native } } - // Make a writable copy of the ELF for patching + // Make a writable copy of the ELF for patching. + // malloc returns aligned memory, so struct access on patched_elf is safe. uint8_t *patched_elf = (uint8_t *) malloc(elf_size); if (!patched_elf) { return; @@ -2163,10 +2169,14 @@ void jit_debug_register_code(Module *mod, const void *native_code, size_t native // Patch .text section header: set sh_addr to load_address. // LLDB uses this to auto-relocate symbol table addresses. - Elf_Shdr *shdrs = (Elf_Shdr *) (patched_elf + ehdr->e_shoff); - for (int i = 0; i < ehdr->e_shnum; i++) { - if (shdrs[i].sh_type == 1 && (shdrs[i].sh_flags & 6) == 6) { - shdrs[i].sh_addr = load_address; + // Use memcpy to handle potentially unaligned section headers on Xtensa. + uint8_t *shdrs_base = patched_elf + ehdr_copy.e_shoff; + for (int i = 0; i < ehdr_copy.e_shnum; i++) { + Elf_Shdr shdr; + memcpy(&shdr, shdrs_base + (size_t) i * ehdr_copy.e_shentsize, sizeof(Elf_Shdr)); + if (shdr.sh_type == 1 && (shdr.sh_flags & 6) == 6) { + shdr.sh_addr = load_address; + memcpy(shdrs_base + (size_t) i * ehdr_copy.e_shentsize, &shdr, sizeof(Elf_Shdr)); break; } } diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index f6460bbc1f..4f0b86fac8 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -63,6 +63,77 @@ struct Module; typedef struct Module Module; #endif +// Numeric architecture identifiers. These must be defined before the +// JIT_ARCH_TARGET assignments below so that `JIT_ARCH_TARGET == JIT_ARCH_*` +// comparisons (including those used in struct JITState) resolve correctly +// rather than against undefined identifiers. +#define JIT_ARCH_X86_64 1 +#define JIT_ARCH_AARCH64 2 +#define JIT_ARCH_ARMV6M 3 +#define JIT_ARCH_RISCV32 4 +#define JIT_ARCH_RISCV64 5 +#define JIT_ARCH_ARM32 6 +#define JIT_ARCH_WASM32 7 +#define JIT_ARCH_XTENSA 8 + +#ifndef AVM_NO_JIT + +#ifdef __x86_64__ +#define JIT_ARCH_TARGET JIT_ARCH_X86_64 +#define JIT_JUMPTABLE_ENTRY_SIZE 5 +#define JIT_JUMPTABLE_OFFSET 0 +#endif + +#if defined(__arm64__) || defined(__aarch64__) +#define JIT_ARCH_TARGET JIT_ARCH_AARCH64 +#define JIT_JUMPTABLE_ENTRY_SIZE 4 +#define JIT_JUMPTABLE_OFFSET 0 +#endif + +#if defined(__arm__) && defined(AVM_JIT_ARM32) +#define JIT_ARCH_TARGET JIT_ARCH_ARM32 +#define JIT_JUMPTABLE_ENTRY_SIZE 8 +#define JIT_JUMPTABLE_OFFSET 0 +#elif defined(__arm__) +#define JIT_ARCH_TARGET JIT_ARCH_ARMV6M +#ifdef AVM_JIT_THUMB2 +#define JIT_JUMPTABLE_ENTRY_SIZE 6 +#else +#define JIT_JUMPTABLE_ENTRY_SIZE 12 +#endif +#define JIT_JUMPTABLE_OFFSET 0 +#endif + +#if defined(__riscv) && (__riscv_xlen == 32) +#define JIT_ARCH_TARGET JIT_ARCH_RISCV32 +#define JIT_JUMPTABLE_ENTRY_SIZE 8 +#define JIT_JUMPTABLE_OFFSET 0 +#endif + +#if defined(__riscv) && (__riscv_xlen == 64) +#define JIT_ARCH_TARGET JIT_ARCH_RISCV64 +#define JIT_JUMPTABLE_ENTRY_SIZE 8 +#define JIT_JUMPTABLE_OFFSET 0 +#endif + +#ifdef __wasm__ +#define JIT_ARCH_TARGET JIT_ARCH_WASM32 +#define JIT_JUMPTABLE_ENTRY_SIZE 4 +#define JIT_JUMPTABLE_IS_DATA +#define JIT_JUMPTABLE_OFFSET 0 +#endif + +#ifdef __XTENSA__ +#define JIT_ARCH_TARGET JIT_ARCH_XTENSA +#define JIT_JUMPTABLE_ENTRY_SIZE 20 +#define JIT_JUMPTABLE_OFFSET 4 +#endif + +#ifndef JIT_ARCH_TARGET +#error Unknown JIT target +#endif +#endif + // Interface to native code: // Entry point returns the current (or new) context // jit_state->remaining_reductions is updated. @@ -88,6 +159,9 @@ struct JITState const void *continuation_pc; }; int remaining_reductions; +#if JIT_ARCH_TARGET == JIT_ARCH_XTENSA + const void *code_base; +#endif }; // Remember to keep this struct in sync with libs/jit/src/primitives.hrl @@ -191,63 +265,10 @@ enum TrapAndLoadResult #define JIT_FORMAT_VERSION 1 -#define JIT_ARCH_X86_64 1 -#define JIT_ARCH_AARCH64 2 -#define JIT_ARCH_ARMV6M 3 -#define JIT_ARCH_RISCV32 4 -#define JIT_ARCH_RISCV64 5 -#define JIT_ARCH_ARM32 6 -#define JIT_ARCH_WASM32 7 - #define JIT_VARIANT_PIC 1 #define JIT_VARIANT_FLOAT32 2 #define JIT_VARIANT_THUMB2 4 -#ifndef AVM_NO_JIT - -#ifdef __x86_64__ -#define JIT_ARCH_TARGET JIT_ARCH_X86_64 -#define JIT_JUMPTABLE_ENTRY_SIZE 5 -#endif - -#if defined(__arm64__) || defined(__aarch64__) -#define JIT_ARCH_TARGET JIT_ARCH_AARCH64 -#define JIT_JUMPTABLE_ENTRY_SIZE 4 -#endif - -#if defined(__arm__) && defined(AVM_JIT_ARM32) -#define JIT_ARCH_TARGET JIT_ARCH_ARM32 -#define JIT_JUMPTABLE_ENTRY_SIZE 8 -#elif defined(__arm__) -#define JIT_ARCH_TARGET JIT_ARCH_ARMV6M -#ifdef AVM_JIT_THUMB2 -#define JIT_JUMPTABLE_ENTRY_SIZE 6 -#else -#define JIT_JUMPTABLE_ENTRY_SIZE 12 -#endif -#endif - -#if defined(__riscv) && (__riscv_xlen == 32) -#define JIT_ARCH_TARGET JIT_ARCH_RISCV32 -#define JIT_JUMPTABLE_ENTRY_SIZE 8 -#endif - -#if defined(__riscv) && (__riscv_xlen == 64) -#define JIT_ARCH_TARGET JIT_ARCH_RISCV64 -#define JIT_JUMPTABLE_ENTRY_SIZE 8 -#endif - -#ifdef __wasm__ -#define JIT_ARCH_TARGET JIT_ARCH_WASM32 -#define JIT_JUMPTABLE_ENTRY_SIZE 4 -#define JIT_JUMPTABLE_IS_DATA -#endif - -#ifndef JIT_ARCH_TARGET -#error Unknown JIT target -#endif -#endif - #ifdef JIT_JUMPTABLE_IS_DATA /** * @brief Get per-thread function pointer for a WASM JIT label. diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c index d75221dc5d..547386c474 100644 --- a/src/libAtomVM/module.c +++ b/src/libAtomVM/module.c @@ -1602,7 +1602,7 @@ ModuleNativeEntryPoint module_get_native_entry_point(Module *module, int exporte return jit_wasm_get_entry_point((const void *) module->native_code, exported_label); #else assert(module->native_code); - return (ModuleNativeEntryPoint) ((uintptr_t) module->native_code + JIT_JUMPTABLE_ENTRY_SIZE * exported_label); + return (ModuleNativeEntryPoint) (((const uint8_t *) module->native_code) + JIT_JUMPTABLE_OFFSET + JIT_JUMPTABLE_ENTRY_SIZE * exported_label); #endif } #endif @@ -2006,8 +2006,15 @@ bool module_find_line(Module *mod, size_t offset, uint32_t *line, size_t *filena } return module_find_line_ref(mod, prev_line_ref, line, filename_len, filename); } + +#else +#if JIT_ARCH_TARGET == JIT_ARCH_XTENSA + struct JITState temp_jit_state = { .code_base = (const void *) mod->native_code }; + ModuleNativeEntryPoint label0_entry = module_get_native_entry_point(mod, 0); + const uint8_t *labels_and_lines = (const uint8_t *) label0_entry(NULL, &temp_jit_state, NULL); #else const uint8_t *labels_and_lines = (const uint8_t *) mod->native_code(NULL, NULL, NULL); +#endif int labels_count = READ_16_UNALIGNED(labels_and_lines); labels_and_lines += 2 + labels_count * 6; size_t lines_count = READ_16_UNALIGNED(labels_and_lines); @@ -2092,8 +2099,15 @@ COLD_FUNC void module_cp_to_label_offset(term cp, Module **cp_mod, int *label, s if (l_off) { *l_off = 0; } +#else +#if JIT_ARCH_TARGET == JIT_ARCH_XTENSA + // C11 6.7.9 §21 : this is safe + struct JITState temp_jit_state = { .code_base = (const void *) mod->native_code }; + ModuleNativeEntryPoint label0_entry = module_get_native_entry_point(mod, 0); + const uint8_t *labels_and_lines = (const uint8_t *) label0_entry(NULL, &temp_jit_state, NULL); #else const uint8_t *labels_and_lines = (const uint8_t *) mod->native_code(NULL, NULL, NULL); +#endif int labels_count = READ_16_UNALIGNED(labels_and_lines); labels_and_lines += 2; uint32_t label_offset = 0; @@ -2175,8 +2189,14 @@ uint32_t module_label_code_offset(Module *mod, int label) if (mod->native_code) { #ifdef JIT_JUMPTABLE_IS_DATA return (uint32_t) label * JIT_JUMPTABLE_ENTRY_SIZE; +#else +#if JIT_ARCH_TARGET == JIT_ARCH_XTENSA + struct JITState temp_jit_state = { .code_base = (const void *) mod->native_code }; + ModuleNativeEntryPoint label0_entry = module_get_native_entry_point(mod, 0); + const uint8_t *labels_and_lines = (const uint8_t *) label0_entry(NULL, &temp_jit_state, NULL); #else const uint8_t *labels_and_lines = (const uint8_t *) mod->native_code(NULL, NULL, NULL); +#endif int labels_count = READ_16_UNALIGNED(labels_and_lines); labels_and_lines += 2; while (labels_count > 0) { @@ -2205,6 +2225,6 @@ void module_set_native_code(Module *mod, uint32_t labels_count, ModuleNativeEntr { mod->native_code = entry_point; // Extra function is OP_INT_CALL_END - mod->end_instruction_ii = JIT_JUMPTABLE_ENTRY_SIZE * labels_count; + mod->end_instruction_ii = JIT_JUMPTABLE_OFFSET + JIT_JUMPTABLE_ENTRY_SIZE * labels_count; } #endif diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index 95e9f23d1c..886c66a922 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -6626,6 +6626,8 @@ static term nif_jit_backend_module(Context *ctx, int argc, term argv[]) return JIT_ARM32_ATOM; #elif JIT_ARCH_TARGET == JIT_ARCH_WASM32 return JIT_WASM32_ATOM; +#elif JIT_ARCH_TARGET == JIT_ARCH_XTENSA + return JIT_XTENSA_ATOM; #else #error Unknown JIT target #endif diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h index 5f962e0e2d..fbdbd63668 100644 --- a/src/libAtomVM/opcodesswitch.h +++ b/src/libAtomVM/opcodesswitch.h @@ -1737,6 +1737,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) jit_state.module = mod; jit_state.remaining_reductions = remaining_reductions; // __asm__ volatile("int $0x03"); +#if JIT_ARCH_TARGET == JIT_ARCH_XTENSA + jit_state.code_base = (const void *) mod->native_code; +#endif TRACE("calling native code at %p, ctx = %p\n", (void *) native_pc, (void *) ctx); Context *new_ctx = native_pc(ctx, &jit_state, &module_native_interface); TRACE("returning from native code at %p, ctx = %p, new_ctx = %p, jit_state.continuation = %p\n", (void *) native_pc, (void *) ctx, (void *) new_ctx, (void *) jit_state.continuation); diff --git a/src/platforms/esp32/CMakeLists.txt b/src/platforms/esp32/CMakeLists.txt index a2e9d1d862..96915ec3d7 100644 --- a/src/platforms/esp32/CMakeLists.txt +++ b/src/platforms/esp32/CMakeLists.txt @@ -82,8 +82,12 @@ if(CONFIG_JIT_ENABLED) set(AVM_DISABLE_JIT OFF) set(AVM_JIT_TARGET_ARCH riscv32) message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)") + elseif(${IDF_TARGET} MATCHES "esp32|esp32s2|esp32s3") + set(AVM_DISABLE_JIT OFF) + set(AVM_JIT_TARGET_ARCH xtensa) + message(STATUS "JIT compilation enabled for ${IDF_TARGET} (Xtensa)") else() - message(WARNING "JIT is not supported on ${IDF_TARGET} (Xtensa architecture)") + message(WARNING "JIT is not supported on ${IDF_TARGET}") set(AVM_DISABLE_JIT ON) endif() else() diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 6e03cae7db..870aa971aa 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -44,6 +44,7 @@ set(ERLANG_MODULES jit_wasm32_asm_tests jit_x86_64_tests jit_x86_64_asm_tests + jit_xtensa_tests jit_xtensa_asm_tests ) diff --git a/tests/libs/jit/jit_xtensa_tests.erl b/tests/libs/jit/jit_xtensa_tests.erl new file mode 100644 index 0000000000..2a7b677f16 --- /dev/null +++ b/tests/libs/jit/jit_xtensa_tests.erl @@ -0,0 +1,2551 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_xtensa_tests). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-include("jit/include/jit.hrl"). +-include("jit/src/term.hrl"). +-include("jit/src/default_atoms.hrl"). +-include("jit/src/primitives.hrl"). +-include("jit_tests_common.hrl"). + +-define(BACKEND, jit_xtensa). + +word_size_test() -> + ?assertEqual(4, ?BACKEND:word_size()). + +new_state_test() -> + State = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Stream = ?BACKEND:stream(State), + ?assertEqual(0, byte_size(Stream)), + ?assertEqual(0, ?BACKEND:offset(State)). + +jump_table_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: ff .byte 0xff\n" + " 1: ff .byte 0xff\n" + " 2: ff .byte 0xff\n" + " 3: ff .byte 0xff\n" + " 4: 00c136 entry a1, 96\n" + " 7: fffe51 l32r a5, 0x0 (0xffffffff)\n" + " a: 032382 l32i a8, a3, 12\n" + " d: 808850 add a8, a8, a5\n" + " 10: 0008a0 jx a8\n" + " 13: ff .byte 0xff\n" + " 14: ff .byte 0xff\n" + " 15: ff .byte 0xff\n" + " 16: ff .byte 0xff\n" + " 17: ff .byte 0xff\n" + " 18: 00c136 entry a1, 96\n" + " 1b: fffe51 l32r a5, 0x14 (0xffffffff)\n" + " 1e: 032382 l32i a8, a3, 12\n" + " 21: 808850 add a8, a8, a5\n" + " 24: 0008a0 jx a8\n" + " 27: ff .byte 0xff\n" + " 28: ff .byte 0xff\n" + " 29: ff .byte 0xff\n" + " 2a: ff .byte 0xff\n" + " 2b: ff .byte 0xff\n" + " 2c: 00c136 entry a1, 96\n" + " 2f: fffe51 l32r a5, 0x28 (0xffffffff)\n" + " 32: 032382 l32i a8, a3, 12\n" + " 35: 808850 add a8, a8, a5\n" + " 38: 0008a0 jx a8\n" + " 3b: ff .byte 0xff" + >>, + ?assertStream(xtensa, Dump, Stream). + +add_label_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:add_label(State2, 2), + State4 = ?BACKEND:add_label(State3, 0), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 003c movi.n a0, 48\n" + " 2: 360000 lsi f0, a0, 216\n" + " 5: 5100c1 l32r a12, 0xfffd4408\n" + " 8: fe .byte 0xfe\n" + " 9: ff .byte 0xff\n" + " a: 032382 l32i a8, a3, 12\n" + " d: 808850 add a8, a8, a5\n" + " 10: 0008a0 jx a8\n" + " 13: ff .byte 0xff\n" + " 14: 003c movi.n a0, 48\n" + " 16: 360000 lsi f0, a0, 216\n" + " 19: 5100c1 l32r a12, 0xfffd441c\n" + " 1c: fe .byte 0xfe\n" + " 1d: ff .byte 0xff\n" + " 1e: 032382 l32i a8, a3, 12\n" + " 21: 808850 add a8, a8, a5\n" + " 24: 0008a0 jx a8\n" + " 27: ff .byte 0xff\n" + " 28: 003c movi.n a0, 48\n" + " 2a: 360000 lsi f0, a0, 216\n" + " 2d: 5100c1 l32r a12, 0xfffd4430\n" + " 30: fe .byte 0xfe\n" + " 31: ff .byte 0xff\n" + " 32: 032382 l32i a8, a3, 12\n" + " 35: 808850 add a8, a8, a5\n" + " 38: 0008a0 jx a8\n" + " 3b: ff .byte 0xff" + >>, + ?assertStream(xtensa, Dump, Stream). + +add_label_with_offset_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:add_label(State2, 2, 16#20), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: ff .byte 0xff\n" + " 1: ff .byte 0xff\n" + " 2: ff .byte 0xff\n" + " 3: ff .byte 0xff\n" + " 4: 00c136 entry a1, 96\n" + " 7: fffe51 l32r a5, 0x0 (0xffffffff)\n" + " a: 032382 l32i a8, a3, 12\n" + " d: 808850 add a8, a8, a5\n" + " 10: 0008a0 jx a8\n" + " 13: ff .byte 0xff\n" + " 14: 003c movi.n a0, 48\n" + " 16: 360000 lsi f0, a0, 216\n" + " 19: 5100c1 l32r a12, 0xfffd441c\n" + " 1c: fe .byte 0xfe\n" + " 1d: ff .byte 0xff\n" + " 1e: 032382 l32i a8, a3, 12\n" + " 21: 808850 add a8, a8, a5\n" + " 24: 0008a0 jx a8\n" + " 27: ff .byte 0xff\n" + " 28: 000020 lsi f2, a0, 0\n" + " 2b: c13600 mul16u a3, a6, a0\n" + " 2e: fe5100 f64iter a5, a1, a0, 3, 1\n" + " 31: ff .byte 0xff\n" + " 32: 032382 l32i a8, a3, 12\n" + " 35: 808850 add a8, a8, a5\n" + " 38: 0008a0 jx a8\n" + " 3b: ff .byte 0xff" + >>, + ?assertStream(xtensa, Dump, Stream). + +move_to_native_register_xreg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + Stream = ?BACKEND:stream(State1), + ?assert(is_atom(Reg)), + Dump = << + " 0: 0622f2 l32i a15, a2, 24" + >>, + ?assertStream(xtensa, Dump, Stream). + +move_to_native_register_imm_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, 42), + Stream = ?BACKEND:stream(State1), + ?assert(is_atom(Reg)), + Dump = << + " 0: 2aa0f2 movi a15, 42" + >>, + ?assertStream(xtensa, Dump, Stream). + +move_to_native_register_yreg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 3}), + ?assert(is_atom(Reg)), + Stream = ?BACKEND:stream(State1), + ?assert(byte_size(Stream) > 0), + Dump = << + " 0: 0522e2 l32i a14, a2, 20\n" + " 3: 032ef2 l32i a15, a14, 12" + >>, + ?assertStream(xtensa, Dump, Stream). + +move_to_vm_register_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, 42, {x_reg, 0}), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 2aa0f2 movi a15, 42\n" + " 3: 0662f2 s32i a15, a2, 24" + >>, + ?assertStream(xtensa, Dump, Stream). + +move_to_vm_register_yreg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, 42, {y_reg, 2}), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 2aa0e2 movi a14, 42\n" + " 3: 0522f2 l32i a15, a2, 20\n" + " 6: 026fe2 s32i a14, a15, 8" + >>, + ?assertStream(xtensa, Dump, Stream). + +and_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegA} = ?BACKEND:and_(State1, {free, RegA}, 16#3F), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 3fa0e2 movi a14, 63\n" + " 6: 10ffe0 and a15, a15, a14" + >>, + ?assertStream(xtensa, Dump, Stream). + +or_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegA} = ?BACKEND:or_(State1, {free, RegA}, 16#0F), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0fa0e2 movi a14, 15\n" + " 6: 20ffe0 or a15, a15, a14" + >>, + ?assertStream(xtensa, Dump, Stream). + +add_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegA} = ?BACKEND:add(State1, {free, RegA}, 4), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 04cff2 addi a15, a15, 4" + >>, + ?assertStream(xtensa, Dump, Stream). + +sub_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegA} = ?BACKEND:sub(State1, {free, RegA}, 4), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: fccff2 addi a15, a15, -4" + >>, + ?assertStream(xtensa, Dump, Stream). + +debugger_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:debugger(State0), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +call_primitive_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 0, [ctx, jit_state]), + Stream = ?BACKEND:stream(State1), + ?assert(is_atom(ResultReg)), + Dump = << + " 0: 0024f2 l32i a15, a4, 0\n" + " 3: 02ad mov.n a10, a2\n" + " 5: 03bd mov.n a11, a3\n" + " 7: 000fe0 callx8 a15\n" + " a: 0a7d mov.n a7, a10" + >>, + ?assertStream(xtensa, Dump, Stream). + +call_primitive_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state]), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 0024f2 l32i a15, a4, 0\n" + " 3: 02ad mov.n a10, a2\n" + " 5: 03bd mov.n a11, a3\n" + " 7: 000fe0 callx8 a15\n" + " a: 0a2d mov.n a2, a10\n" + " c: 000090 retw" + >>, + ?assertStream(xtensa, Dump, Stream). + +shift_right_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegA} = ?BACKEND:shift_right(State1, {free, RegA}, 2), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 41f2f0 srli a15, a15, 2" + >>, + ?assertStream(xtensa, Dump, Stream). + +shift_left_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegA} = ?BACKEND:shift_left(State1, {free, RegA}, 2), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 11ffe0 slli a15, a15, 2" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% Test that multiple move_to_native_register calls allocate different registers +register_allocation_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, Reg3} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + ?assertNotEqual(Reg1, Reg2), + ?assertNotEqual(Reg2, Reg3), + ?assertNotEqual(Reg1, Reg3), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 0822d2 l32i a13, a2, 32" + >>, + ?assertStream(xtensa, Dump, Stream). + +update_branches_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:call_primitive_last(State2, 0, [ctx, jit_state]), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 1, [ctx, jit_state]), + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 0, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = << + " 0: 005a add.n a0, a0, a5\n" + " 2: 360000 lsi f0, a0, 216\n" + " 5: 5100c1 l32r a12, 0xfffd4408\n" + " 8: fe .byte 0xfe\n" + " 9: ff .byte 0xff\n" + " a: 032382 l32i a8, a3, 12\n" + " d: 808850 add a8, a8, a5\n" + " 10: 0008a0 jx a8\n" + " 13: ff .byte 0xff\n" + " 14: 003c movi.n a0, 48\n" + " 16: 360000 lsi f0, a0, 216\n" + " 19: 5100c1 l32r a12, 0xfffd441c\n" + " 1c: fe .byte 0xfe\n" + " 1d: ff .byte 0xff\n" + " 1e: 032382 l32i a8, a3, 12\n" + " 21: 808850 add a8, a8, a5\n" + " 24: 0008a0 jx a8\n" + " 27: ff .byte 0xff\n" + " 28: 004b addi.n a0, a0, 4\n" + " 2a: 360000 lsi f0, a0, 216\n" + " 2d: 5100c1 l32r a12, 0xfffd4430\n" + " 30: fe .byte 0xfe\n" + " 31: ff .byte 0xff\n" + " 32: 032382 l32i a8, a3, 12\n" + " 35: 808850 add a8, a8, a5\n" + " 38: 0008a0 jx a8\n" + " 3b: ff .byte 0xff\n" + " 3c: 0024f2 l32i a15, a4, 0\n" + " 3f: 02ad mov.n a10, a2\n" + " 41: 03bd mov.n a11, a3\n" + " 43: 000fe0 callx8 a15\n" + " 46: 0a2d mov.n a2, a10\n" + " 48: 000090 retw\n" + " 4b: 0124f2 l32i a15, a4, 4\n" + " 4e: 02ad mov.n a10, a2\n" + " 50: 03bd mov.n a11, a3\n" + " 52: 000fe0 callx8 a15\n" + " 55: 0a2d mov.n a2, a10\n" + " 57: 000090 retw\n" + " 5a: 0024f2 l32i a15, a4, 0\n" + " 5d: 02ad mov.n a10, a2\n" + " 5f: 03bd mov.n a11, a3\n" + " 61: 000fe0 callx8 a15\n" + " 64: 0a2d mov.n a2, a10\n" + " 66: 000090 retw" + >>, + ?assertStream(xtensa, Dump, Stream). + +move_to_cp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 0522e2 l32i a14, a2, 20\n" + " 3: 002ef2 l32i a15, a14, 0\n" + " 6: 1762f2 s32i a15, a2, 92" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% mov_immediate tests (via move_to_native_register with integer values) +%%----------------------------------------------------------------------------- + +mov_immediate_small_pos_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, 42, a3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 2aa032 movi a3, 42" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_small_neg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, -1, a3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: ffaf32 movi a3, -1" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_zero_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, 0, a3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 00a032 movi a3, 0" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_max_movi_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, 2047, a3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: ffa732 movi a3, 0x7ff" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_min_movi_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, -2048, a3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 00a832 movi a3, 0xfffff800" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_medium_pos_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, 4096, a3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 00a032 movi a3, 0\n" + " 3: 10d332 addmi a3, a3, 0x1000" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_medium_neg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, -4096, a3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 00a032 movi a3, 0\n" + " 3: f0d332 addmi a3, a3, 0xfffff000" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, 16#12345678, a3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 000106 j 0x8\n" + " 3: ff .byte 0xff\n" + " 4: 5678 l32i.n a7, a6, 20\n" + " 6: 311234 lsi f3, a2, 196\n" + " 9: ff .byte 0xff\n" + " a: ff .byte 0xff" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_large_neg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, -100000, a3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: e7a932 movi a3, 0xfffff9e7\n" + " 3: 1133a0 slli a3, a3, 6\n" + " 6: a0c332 addi a3, a3, -96" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_shifted_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, 16#10000, a3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01a032 movi a3, 1\n" + " 3: 113300 slli a3, a3, 16" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% mov_immediate/2 (fixed-size binary, no L32R) tests +%% Regression coverage for values with 0xFF in intermediate bytes, which used +%% to be misencoded by mov_immediate_large. Exercises the +%% movi + slli-8 + addi byte-chain path that set_cp / rewrite_cp_offset rely on. +%%----------------------------------------------------------------------------- + +mov_immediate_bin_small_test() -> + Stream = jit_xtensa:mov_immediate(a3, 42), + Dump = << + " 0: 2aa032 movi a3, 42" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_bin_movi_addmi_test() -> + Stream = jit_xtensa:mov_immediate(a3, 4096), + Dump = << + " 0: 00a032 movi a3, 0\n" + " 3: 10d332 addmi a3, a3, 0x1000" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_bin_ff_middle_test() -> + Stream = jit_xtensa:mov_immediate(a3, 65444), + Dump = << + " 0: 01a032 movi a3, 1\n" + " 3: 113380 slli a3, a3, 8\n" + " 6: 00c332 addi a3, a3, 0\n" + " 9: 113380 slli a3, a3, 8\n" + " c: a4c332 addi a3, a3, -92" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_bin_00ffffff_test() -> + Stream = jit_xtensa:mov_immediate(a3, 16#00FFFFFF), + Dump = << + " 0: 01a032 movi a3, 1\n" + " 3: 113380 slli a3, a3, 8\n" + " 6: 00c332 addi a3, a3, 0\n" + " 9: 113380 slli a3, a3, 8\n" + " c: 00c332 addi a3, a3, 0\n" + " f: 113380 slli a3, a3, 8\n" + " 12: ffc332 addi a3, a3, -1" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_bin_ffffffff_test() -> + Stream = jit_xtensa:mov_immediate(a3, 16#FFFFFFFF), + Dump = << + " 0: 00a032 movi a3, 0\n" + " 3: 113380 slli a3, a3, 8\n" + " 6: ffc332 addi a3, a3, -1" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_bin_00ff00ff_test() -> + Stream = jit_xtensa:mov_immediate(a3, 16#00FF00FF), + Dump = << + " 0: 01a032 movi a3, 1\n" + " 3: 113380 slli a3, a3, 8\n" + " 6: ffc332 addi a3, a3, -1\n" + " 9: 113380 slli a3, a3, 8\n" + " c: 01c332 addi a3, a3, 1\n" + " f: 113380 slli a3, a3, 8\n" + " 12: ffc332 addi a3, a3, -1" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_bin_deadbeef_test() -> + Stream = jit_xtensa:mov_immediate(a3, 16#DEADBEEF), + Dump = << + " 0: dfaf32 movi a3, -33\n" + " 3: 113380 slli a3, a3, 8\n" + " 6: aec332 addi a3, a3, -82\n" + " 9: 113380 slli a3, a3, 8\n" + " c: bfc332 addi a3, a3, -65\n" + " f: 113380 slli a3, a3, 8\n" + " 12: efc332 addi a3, a3, -17" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_bin_ffee1234_test() -> + Stream = jit_xtensa:mov_immediate(a3, 16#FFEE1234), + Dump = << + " 0: eeaf32 movi a3, -18\n" + " 3: 113380 slli a3, a3, 8\n" + " 6: 12c332 addi a3, a3, 18\n" + " 9: 113380 slli a3, a3, 8\n" + " c: 34c332 addi a3, a3, 52" + >>, + ?assertStream(xtensa, Dump, Stream). + +mov_immediate_bin_12345678_test() -> + Stream = jit_xtensa:mov_immediate(a3, 16#12345678), + Dump = << + " 0: 12a032 movi a3, 18\n" + " 3: 113380 slli a3, a3, 8\n" + " 6: 34c332 addi a3, a3, 52\n" + " 9: 113380 slli a3, a3, 8\n" + " c: 56c332 addi a3, a3, 86\n" + " f: 113380 slli a3, a3, 8\n" + " 12: 78c332 addi a3, a3, 120" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% flush/1, used_regs/1, available_regs/1, free_native_registers/2, +%% assert_all_native_free/1 +%%----------------------------------------------------------------------------- + +flush_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:debugger(State0), + State2 = ?BACKEND:flush(State1), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +used_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + UsedRegs = ?BACKEND:used_regs(State2), + ?assertEqual(lists:sort([Reg1, Reg2]), lists:sort(UsedRegs)). + +available_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Available0 = ?BACKEND:available_regs(State0), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + Available1 = ?BACKEND:available_regs(State1), + ?assert(length(Available0) > length(Available1)), + ?assertNot(lists:member(Reg1, Available1)). + +free_native_registers_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:free_native_registers(State2, [Reg1, Reg2]), + ?BACKEND:assert_all_native_free(State3). + +assert_all_native_free_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + ok = ?BACKEND:assert_all_native_free(State0). + +%%----------------------------------------------------------------------------- +%% return_if_not_equal_to_ctx/2 +%%----------------------------------------------------------------------------- + +return_if_not_equal_to_ctx_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:return_if_not_equal_to_ctx(State1, {free, Reg}), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 041f27 beq a15, a2, 0xb\n" + " 6: 0f2d mov.n a2, a15\n" + " 8: 000090 retw" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% jump_to_offset/2 +%%----------------------------------------------------------------------------- + +jump_to_offset_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 0), + State2 = ?BACKEND:jump_to_offset(State1, 0), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: ff .byte 0xff\n" + " 1: ff .byte 0xff\n" + " 2: ff .byte 0xff\n" + " 3: ff .byte 0xff\n" + " 4: 00c136 entry a1, 96\n" + " 7: fffe51 l32r a5, 0x0 (0xffffffff)\n" + " a: 032382 l32i a8, a3, 12\n" + " d: 808850 add a8, a8, a5\n" + " 10: 0008a0 jx a8\n" + " 13: ff .byte 0xff\n" + " 14: fffa06 j 0x0" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% jump_to_label/2 (forward reference resolved via add_label/3 + update_branches) +%%----------------------------------------------------------------------------- + +jump_to_label_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 0), + Ref = make_ref(), + State2 = ?BACKEND:jump_to_label(State1, Ref), + TargetOffset = ?BACKEND:offset(State2), + State3 = ?BACKEND:add_label(State2, Ref, TargetOffset), + State4 = ?BACKEND:update_branches(State3), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: ff .byte 0xff\n" + " 1: ff .byte 0xff\n" + " 2: ff .byte 0xff\n" + " 3: ff .byte 0xff\n" + " 4: 00c136 entry a1, 96\n" + " 7: fffe51 l32r a5, 0x0 (0xffffffff)\n" + " a: 032382 l32i a8, a3, 12\n" + " d: 808850 add a8, a8, a5\n" + " 10: 0008a0 jx a8\n" + " 13: ff .byte 0xff\n" + " 14: 000506 j 0x2c\n" + " 17: 0020f0 nop\n" + " 1a: 0020f0 nop\n" + " 1d: 0020f0 nop\n" + " 20: 0020f0 nop\n" + " 23: 0020f0 nop\n" + " 26: 0020f0 nop\n" + " 29: 0020f0 nop" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% cond_jump_to_label/3 +%%----------------------------------------------------------------------------- + +cond_jump_to_label_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 0), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + Ref = make_ref(), + State3 = ?BACKEND:cond_jump_to_label(State2, {{free, Reg}, '<', 0}, Ref), + TargetOffset = ?BACKEND:offset(State3), + State4 = ?BACKEND:add_label(State3, Ref, TargetOffset), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: ff .byte 0xff\n" + " 1: ff .byte 0xff\n" + " 2: ff .byte 0xff\n" + " 3: ff .byte 0xff\n" + " 4: 00c136 entry a1, 96\n" + " 7: fffe51 l32r a5, 0x0 (0xffffffff)\n" + " a: 032382 l32i a8, a3, 12\n" + " d: 808850 add a8, a8, a5\n" + " 10: 0008a0 jx a8\n" + " 13: ff .byte 0xff\n" + " 14: 0622f2 l32i a15, a2, 24\n" + " 17: 002f96 bltz a15, 0x1d\n" + " 1a: 0005c6 j 0x35\n" + " 1d: 000506 j 0x35\n" + " 20: 0020f0 nop\n" + " 23: 0020f0 nop\n" + " 26: 0020f0 nop\n" + " 29: 0020f0 nop\n" + " 2c: 0020f0 nop\n" + " 2f: 0020f0 nop\n" + " 32: 0020f0 nop" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% jump_to_continuation/2 +%%----------------------------------------------------------------------------- + +jump_to_continuation_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:jump_to_continuation(State1, {free, Reg}), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0323e2 l32i a14, a3, 12\n" + " 6: 80eef0 add a14, a14, a15\n" + " 9: 03cee2 addi a14, a14, 3\n" + " c: 000ea0 jx a14" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% if_block/3 +%%----------------------------------------------------------------------------- + +if_block_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, + {Reg, '==', 0}, + fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 002f16 beqz a15, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% if_else_block/4 +%%----------------------------------------------------------------------------- + +if_else_block_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_else_block( + State1, + {Reg, '==', 0}, + fun(S) -> ?BACKEND:debugger(S) end, + fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 002f16 beqz a15, 0x9\n" + " 6: 000146 j 0xf\n" + " 9: 0041f0 break 1, 15\n" + " c: 000086 j 0x12\n" + " f: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% get_regs_tracking/1 +%%----------------------------------------------------------------------------- + +get_regs_tracking_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Regs = ?BACKEND:get_regs_tracking(State0), + ?assertNotEqual(undefined, Regs). + +%%----------------------------------------------------------------------------- +%% if_block/3 first clause: {'and', CondList} +%%----------------------------------------------------------------------------- + +if_block_and_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_block( + State2, {'and', [{Reg1, '==', 0}, {Reg2, '==', 0}]}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 002f16 beqz a15, 0xc\n" + " 9: 000206 j 0x15\n" + " c: 002e16 beqz a14, 0x12\n" + " f: 000086 j 0x15\n" + " 12: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% if_block_cond '<' variants +%%----------------------------------------------------------------------------- + +%% {Reg, '<', 0} with bare register (not {free, Reg}) +if_block_cond_lt_0_bare_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '<', 0}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 002f96 bltz a15, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Reg, '<', IS_B4CONST} -> blti +if_block_cond_lt_b4const_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '<', 1}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 021fa6 blti a15, 1, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Reg, '<', Val} where 0 < Val <= 255 and not B4CONST -> movi+blt +if_block_cond_lt_uint8_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '<', 100}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 64a0e2 movi a14, 100\n" + " 6: 022fe7 blt a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Reg, '<', Val} where Val > 255 -> mov_immediate+blt +if_block_cond_lt_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '<', 1000}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: e8a3e2 movi a14, 0x3e8\n" + " 6: 022fe7 blt a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Val, '<', Reg} where 0 <= Val <= 255 -> movi+blt(Temp, Reg) +if_block_cond_uint8_lt_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {42, '<', Reg}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 2aa0e2 movi a14, 42\n" + " 6: 022ef7 blt a14, a15, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Val, '<', Reg} where Val > 255 -> mov_immediate+blt(Temp, Reg) +if_block_cond_large_lt_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {1000, '<', Reg}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: e8a3e2 movi a14, 0x3e8\n" + " 6: 022ef7 blt a14, a15, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Reg1, '<', Reg2} both native registers -> blt(Reg1, Reg2) +if_block_cond_reg_lt_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_block(State2, {Reg1, '<', Reg2}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 022fe7 blt a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% if_block_cond '==' variants +%%----------------------------------------------------------------------------- + +%% {{free, Reg}, '==', 0} -> covers {free, Reg0} -> Reg0 path in beqz clause +if_block_cond_free_eq_0_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {{free, Reg}, '==', 0}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 002f16 beqz a15, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Reg1, '==', Reg2} bare registers -> beq(Reg1, Reg2) +if_block_cond_eq_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_block(State2, {Reg1, '==', Reg2}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 021fe7 beq a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {{free, Reg1}, '==', Reg2} -> covers {free, Reg0} -> Reg0 path in beq clause +if_block_cond_free_eq_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_block( + State2, {{free, Reg1}, '==', Reg2}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 021fe7 beq a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {'(int)', Reg, '==', 0} -> delegates to beqz +if_block_cond_int_eq_zero_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {'(int)', Reg, '==', 0}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 002f16 beqz a15, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {'(int)', Reg, '==', Val} -> delegates to '==' clause +if_block_cond_int_eq_val_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {'(int)', Reg, '==', 42}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 2aa0e2 movi a14, 42\n" + " 6: 021fe7 beq a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Reg, '==', IS_B4CONST} -> beqi +if_block_cond_eq_b4const_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '==', 1}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 021f26 beqi a15, 1, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Reg, '==', Val} where 0 < Val <= 255, not B4CONST -> movi+beq +if_block_cond_eq_uint8_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '==', 42}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 2aa0e2 movi a14, 42\n" + " 6: 021fe7 beq a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {{free, Reg1}, '==', {free, Reg2}} -> beq(RegA, RegB), frees both +if_block_cond_free_eq_free_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_block( + State2, + {{free, Reg1}, '==', {free, Reg2}}, + fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 021fe7 beq a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Reg, '==', Val} where Val > 255 -> mov_immediate+beq +if_block_cond_eq_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '==', 1000}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: e8a3e2 movi a14, 0x3e8\n" + " 6: 021fe7 beq a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% if_block_cond '!=' variants +%%----------------------------------------------------------------------------- + +%% {Reg, '!=', 0} bare register -> bnez +if_block_cond_ne_zero_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', 0}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 002f56 bnez a15, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {{free, Reg}, '!=', 0} -> covers {free, Reg0} -> Reg0 path in bnez clause +if_block_cond_free_ne_zero_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {{free, Reg}, '!=', 0}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 002f56 bnez a15, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Reg, '!=', IS_B4CONST} -> bnei +if_block_cond_ne_b4const_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', 1}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 021f66 bnei a15, 1, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Reg, '!=', Val} where 0 < Val <= 255, not B4CONST -> movi+bne +if_block_cond_ne_uint8_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', 42}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 2aa0e2 movi a14, 42\n" + " 6: 029fe7 bne a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Reg1, '!=', Reg2} both native registers -> bne(Reg1, Reg2) +if_block_cond_ne_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_block(State2, {Reg1, '!=', Reg2}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 029fe7 bne a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {'(int)', Reg, '!=', Val} -> delegates to '!=' clause +if_block_cond_int_ne_val_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {'(int)', Reg, '!=', 42}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 2aa0e2 movi a14, 42\n" + " 6: 029fe7 bne a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Reg, '!=', Val} where Val > 255 -> mov_immediate+bne +if_block_cond_ne_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', 1000}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: e8a3e2 movi a14, 0x3e8\n" + " 6: 029fe7 bne a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% if_block_cond '(bool)' variants +%%----------------------------------------------------------------------------- + +%% {'(bool)', Reg, '==', false} -> beqz +if_block_cond_bool_eq_false_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {'(bool)', Reg, '==', false}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 002f16 beqz a15, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {'(bool)', Reg, '!=', false} -> bnez +if_block_cond_bool_ne_false_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {'(bool)', Reg, '!=', false}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 002f56 bnez a15, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% if_block_cond '&' variants +%%----------------------------------------------------------------------------- + +%% {Reg, '&', Mask, '!=', 0} -> mov_immediate(Mask)+and+bnez +if_block_cond_and_ne_zero_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', 15, '!=', 0}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0fa0e2 movi a14, 15\n" + " 6: 10efe0 and a14, a15, a14\n" + " 9: 002e56 bnez a14, 0xf\n" + " c: 000086 j 0x12\n" + " f: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Reg, '&', 16#F, '!=', 16#F} bare IS_GPR -> movi(-1)+xor+slli+bnez +if_block_cond_and_nibble_ne_f_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', 16#F, '!=', 16#F}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: ffafe2 movi a14, -1\n" + " 6: 30eef0 xor a14, a14, a15\n" + " 9: 01ee40 slli a14, a14, 28\n" + " c: 002e56 bnez a14, 0x12\n" + " f: 000086 j 0x15\n" + " 12: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {{free, Reg}, '&', 16#F, '!=', 16#F} -> neg+addi+slli+bnez +if_block_cond_free_and_nibble_ne_f_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {{free, Reg}, '&', 16#F, '!=', 16#F}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 60f0f0 neg a15, a15\n" + " 6: ffcff2 addi a15, a15, -1\n" + " 9: 01ff40 slli a15, a15, 28\n" + " c: 002f56 bnez a15, 0x12\n" + " f: 000086 j 0x15\n" + " 12: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Reg, '&', Mask, '!=', RegB} general clause, IS_GPR(Val) sub-case +if_block_cond_and_ne_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_block( + State2, {Reg1, '&', 16#FF, '!=', Reg2}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 0fdd mov.n a13, a15\n" + " 8: ffa0c2 movi a12, 255\n" + " b: 10ddc0 and a13, a13, a12\n" + " e: 029de7 bne a13, a14, 0x14\n" + " 11: 000086 j 0x17\n" + " 14: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {Reg, '&', Mask, '!=', Imm} general clause, integer Val sub-case +if_block_cond_and_ne_imm_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', 16#FF, '!=', 42}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0fed mov.n a14, a15\n" + " 5: ffa0d2 movi a13, 255\n" + " 8: 10eed0 and a14, a14, a13\n" + " b: 2aa0d2 movi a13, 42\n" + " e: 029ed7 bne a14, a13, 0x14\n" + " 11: 000086 j 0x17\n" + " 14: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {{free, Reg}, '&', Mask, '!=', RegB} general clause with free reg, IS_GPR(Val) sub-case +if_block_cond_free_and_ne_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_block( + State2, {{free, Reg1}, '&', 16#FF, '!=', Reg2}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: ffa0d2 movi a13, 255\n" + " 9: 10ffd0 and a15, a15, a13\n" + " c: 029fe7 bne a15, a14, 0x12\n" + " f: 000086 j 0x15\n" + " 12: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% {{free, Reg}, '&', Mask, '!=', Imm} general clause with free reg, integer Val sub-case +if_block_cond_free_and_ne_imm_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {{free, Reg}, '&', 16#FF, '!=', 42}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: ffa0e2 movi a14, 255\n" + " 6: 10ffe0 and a15, a15, a14\n" + " 9: 2aa0e2 movi a14, 42\n" + " c: 029fe7 bne a15, a14, 0x12\n" + " f: 000086 j 0x15\n" + " 12: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% patch_branch / branch_to_offset_code / branch_to_label_code +%%----------------------------------------------------------------------------- + +%% patch_branch near {far_branch}: forward reference resolved to near J + NOPs +patch_branch_near_forward_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Ref = make_ref(), + State1 = ?BACKEND:jump_to_label(State0, Ref), + TargetOffset = ?BACKEND:offset(State1), + State2 = ?BACKEND:add_label(State1, Ref, TargetOffset), + State3 = ?BACKEND:update_branches(State2), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 000506 j 0x18\n" + " 3: 0020f0 nop\n" + " 6: 0020f0 nop\n" + " 9: 0020f0 nop\n" + " c: 0020f0 nop\n" + " f: 0020f0 nop\n" + " 12: 0020f0 nop\n" + " 15: 0020f0 nop" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% patch_branch far {far_branch}: forward reference resolved to far indirect jump +patch_branch_far_forward_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Ref = make_ref(), + State1 = ?BACKEND:jump_to_label(State0, Ref), + %% Define label at a far offset without emitting that much data + State2 = ?BACKEND:add_label(State1, Ref, 200000), + State3 = ?BACKEND:update_branches(State2), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 032382 l32i a8, a3, 12\n" + " 3: 03a0f2 movi a15, 3\n" + " 6: 11ff80 slli a15, a15, 8\n" + " 9: 0dcff2 addi a15, a15, 13\n" + " c: 11ff80 slli a15, a15, 8\n" + " f: 40cff2 addi a15, a15, 64\n" + " 12: 8088f0 add a8, a8, a15\n" + " 15: 0008a0 jx a8" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% branch_to_label_code first clause: jump to already-known label (backward jump) +branch_to_backward_label_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Ref = make_ref(), + %% Define label at current offset (0) before emitting the jump + State1 = ?BACKEND:add_label(State0, Ref, 0), + State2 = ?BACKEND:jump_to_label(State1, Ref), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: ffff06 j 0x0" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% branch_to_offset_code far branch: jump_to_offset with target > 131071 bytes away +jump_to_offset_far_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_to_offset(State0, 200000), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 032382 l32i a8, a3, 12\n" + " 3: 03a0f2 movi a15, 3\n" + " 6: 11ff80 slli a15, a15, 8\n" + " 9: 0dcff2 addi a15, a15, 13\n" + " c: 11ff80 slli a15, a15, 8\n" + " f: 40cff2 addi a15, a15, 64\n" + " 12: 8088f0 add a8, a8, a15\n" + " 15: 0008a0 jx a8" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% return_if_not_equal_to_ctx with Reg = a2 (I2 = <<>>, no MOV needed) +%%----------------------------------------------------------------------------- + +return_if_not_equal_to_ctx_a2_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:return_if_not_equal_to_ctx(State0, {free, a2}), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 021227 beq a2, a2, 0x6\n" + " 3: 000090 retw" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% call_primitive second clause: when available_regs = 0 +%%----------------------------------------------------------------------------- + +call_primitive_no_avail_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, _} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, _} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, _} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {State6, _} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + {State7, _} = ?BACKEND:move_to_native_register(State6, {x_reg, 6}), + {State8, _} = ?BACKEND:move_to_native_register(State7, {x_reg, 7}), + {State9, _} = ?BACKEND:move_to_native_register(State8, {x_reg, 8}), + {State10, _} = ?BACKEND:move_to_native_register(State9, {x_reg, 9}), + {State11, _} = ?BACKEND:call_primitive(State10, 0, [ctx, jit_state]), + Stream = ?BACKEND:stream(State11), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 0822d2 l32i a13, a2, 32\n" + " 9: 0922c2 l32i a12, a2, 36\n" + " c: 0a22b2 l32i a11, a2, 40\n" + " f: 0b22a2 l32i a10, a2, 44\n" + " 12: 0c2292 l32i a9, a2, 48\n" + " 15: 0d2272 l32i a7, a2, 52\n" + " 18: 0e2262 l32i a6, a2, 56\n" + " 1b: 0f2252 l32i a5, a2, 60\n" + " 1e: 0c61f2 s32i a15, a1, 48\n" + " 21: 0d61e2 s32i a14, a1, 52\n" + " 24: 0e61d2 s32i a13, a1, 56\n" + " 27: 0f61c2 s32i a12, a1, 60\n" + " 2a: 1061b2 s32i a11, a1, 64\n" + " 2d: 1161a2 s32i a10, a1, 68\n" + " 30: 126192 s32i a9, a1, 72\n" + " 33: 0024f2 l32i a15, a4, 0\n" + " 36: 02ad mov.n a10, a2\n" + " 38: 03bd mov.n a11, a3\n" + " 3a: 000fe0 callx8 a15\n" + " 3d: 0a8d mov.n a8, a10\n" + " 3f: 0c21f2 l32i a15, a1, 48\n" + " 42: 0d21e2 l32i a14, a1, 52\n" + " 45: 0e21d2 l32i a13, a1, 56\n" + " 48: 0f21c2 l32i a12, a1, 60\n" + " 4b: 1021b2 l32i a11, a1, 64\n" + " 4e: 1121a2 l32i a10, a1, 68\n" + " 51: 122192 l32i a9, a1, 72" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% call_primitive_last with offset arg +%%----------------------------------------------------------------------------- + +call_primitive_last_with_offset_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state, offset]), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 0024f2 l32i a15, a4, 0\n" + " 3: 02ad mov.n a10, a2\n" + " 5: 03bd mov.n a11, a3\n" + " 7: 03a0c2 movi a12, 3\n" + " a: 000fe0 callx8 a15\n" + " d: 0a2d mov.n a2, a10\n" + " f: 000090 retw" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% patch_branches_for_label clause 3: non-matching branch is skipped +%%----------------------------------------------------------------------------- + +patch_branches_for_label_skip_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + %% Forward jump to label 2 (pending in branches) + State2 = ?BACKEND:jump_to_label(State1, 2), + %% Define label 1: patch_branches_for_label iterates [{2,...}], label 2 != 1 -> clause 3 + State3 = ?BACKEND:add_label(State2, 1), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:add_label(State4, 0), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 000054 lsi f5, a0, 0\n" + " 3: c13600 mul16u a3, a6, a0\n" + " 6: fe5100 f64iter a5, a1, a0, 3, 1\n" + " 9: ff .byte 0xff\n" + " a: 032382 l32i a8, a3, 12\n" + " d: 808850 add a8, a8, a5\n" + " 10: 0008a0 jx a8\n" + " 13: ff .byte 0xff\n" + " 14: 000054 lsi f5, a0, 0\n" + " 17: c13600 mul16u a3, a6, a0\n" + " 1a: fe5100 f64iter a5, a1, a0, 3, 1\n" + " 1d: ff .byte 0xff\n" + " 1e: 032382 l32i a8, a3, 12\n" + " 21: 808850 add a8, a8, a5\n" + " 24: 0008a0 jx a8\n" + " 27: ff .byte 0xff\n" + " 28: 000054 lsi f5, a0, 0\n" + " 2b: c13600 mul16u a3, a6, a0\n" + " 2e: fe5100 f64iter a5, a1, a0, 3, 1\n" + " 31: ff .byte 0xff\n" + " 32: 032382 l32i a8, a3, 12\n" + " 35: 808850 add a8, a8, a5\n" + " 38: 0008a0 jx a8\n" + " 3b: ff .byte 0xff\n" + " 3c: 000506 j 0x54\n" + " 3f: 0020f0 nop\n" + " 42: 0020f0 nop\n" + " 45: 0020f0 nop\n" + " 48: 0020f0 nop\n" + " 4b: 0020f0 nop\n" + " 4e: 0020f0 nop\n" + " 51: 0020f0 nop" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% if_block_cond with {free, Reg} for '<' variants +%%----------------------------------------------------------------------------- + +if_block_cond_free_lt_b4const_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {{free, Reg}, '<', 1}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 021fa6 blti a15, 1, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +if_block_cond_free_lt_uint8_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {{free, Reg}, '<', 42}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 2aa0e2 movi a14, 42\n" + " 6: 022fe7 blt a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +if_block_cond_free_lt_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {{free, Reg}, '<', 1000}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: e8a3e2 movi a14, 0x3e8\n" + " 6: 022fe7 blt a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +if_block_cond_free_uint8_lt_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {42, '<', {free, Reg}}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 2aa0e2 movi a14, 42\n" + " 6: 022ef7 blt a14, a15, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +if_block_cond_free_large_lt_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {1000, '<', {free, Reg}}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: e8a3e2 movi a14, 0x3e8\n" + " 6: 022ef7 blt a14, a15, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +if_block_cond_free_reg_lt_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_block( + State2, {{free, Reg1}, '<', Reg2}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 022fe7 blt a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% if_block_cond with {free, Reg} for '!=' variants +%%----------------------------------------------------------------------------- + +if_block_cond_free_ne_b4const_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {{free, Reg}, '!=', 1}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 021f66 bnei a15, 1, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +if_block_cond_free_ne_uint8_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {{free, Reg}, '!=', 42}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 2aa0e2 movi a14, 42\n" + " 6: 029fe7 bne a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +if_block_cond_free_ne_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_block( + State2, {{free, Reg1}, '!=', Reg2}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 029fe7 bne a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +if_block_cond_free_ne_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {{free, Reg}, '!=', 1000}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: e8a3e2 movi a14, 0x3e8\n" + " 6: 029fe7 bne a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% if_block_cond with {free, Reg} for '==' variants +%%----------------------------------------------------------------------------- + +if_block_cond_free_eq_b4const_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {{free, Reg}, '==', 1}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 021f26 beqi a15, 1, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +if_block_cond_free_eq_uint8_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {{free, Reg}, '==', 42}, fun(S) -> ?BACKEND:debugger(S) end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 2aa0e2 movi a14, 42\n" + " 6: 021fe7 beq a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +if_block_cond_free_eq_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {{free, Reg}, '==', 1000}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: e8a3e2 movi a14, 0x3e8\n" + " 6: 021fe7 beq a15, a14, 0xc\n" + " 9: 000086 j 0xf\n" + " c: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% if_block_cond with {free, Reg} for '(bool)' variants +%%----------------------------------------------------------------------------- + +if_block_cond_free_bool_eq_false_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {'(bool)', {free, Reg}, '==', false}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 002f16 beqz a15, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +if_block_cond_free_bool_ne_false_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {'(bool)', {free, Reg}, '!=', false}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 002f56 bnez a15, 0x9\n" + " 6: 000086 j 0xc\n" + " 9: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% if_block_cond with {free, Reg} for '&' != 0 variant +%%----------------------------------------------------------------------------- + +if_block_cond_free_and_ne_zero_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block( + State1, {{free, Reg}, '&', 15, '!=', 0}, fun(S) -> ?BACKEND:debugger(S) end + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0fa0e2 movi a14, 15\n" + " 6: 10efe0 and a14, a15, a14\n" + " 9: 002e56 bnez a14, 0xf\n" + " c: 000086 j 0x12\n" + " f: 0041f0 break 1, 15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% shift_right: {free, Reg} with Shift > 15 (movi+ssr+srl in-place) +%%----------------------------------------------------------------------------- + +shift_right_free_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegA} = ?BACKEND:shift_right(State1, {free, RegA}, 16), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 10a0e2 movi a14, 16\n" + " 6: 400e00 ssr a14\n" + " 9: 91f0f0 srl a15, a15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% shift_right: plain Reg with Shift =< 15 (srli to new result reg) +%%----------------------------------------------------------------------------- + +shift_right_new_reg_small_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, ResultReg} = ?BACKEND:shift_right(State1, RegA, 3), + ?assertNotEqual(RegA, ResultReg), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 41e3f0 srli a14, a15, 3" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% shift_right: plain Reg with Shift > 15 (movi+ssr+srl to new result reg) +%%----------------------------------------------------------------------------- + +shift_right_new_reg_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, ResultReg} = ?BACKEND:shift_right(State1, RegA, 16), + ?assertNotEqual(RegA, ResultReg), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 10a0e2 movi a14, 16\n" + " 6: 400e00 ssr a14\n" + " 9: 91e0f0 srl a14, a15" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% shift_right_arith: {free, Reg} (srai in-place) +%%----------------------------------------------------------------------------- + +shift_right_arith_free_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegA} = ?BACKEND:shift_right_arith(State1, {free, RegA}, 3), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 21f3f0 srai a15, a15, 3" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% shift_right_arith: plain Reg (srai to new result reg) +%%----------------------------------------------------------------------------- + +shift_right_arith_new_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, ResultReg} = ?BACKEND:shift_right_arith(State1, RegA, 3), + ?assertNotEqual(RegA, ResultReg), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 21e3f0 srai a14, a15, 3" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% div_reg: quos instruction +%%----------------------------------------------------------------------------- + +div_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, RegA} = ?BACKEND:div_reg(State2, RegA, RegB), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: d2ffe0 quos a15, a15, a14" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% rem_reg: rems instruction +%%----------------------------------------------------------------------------- + +rem_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, RegA} = ?BACKEND:rem_reg(State2, RegA, RegB), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: f2ffe0 rems a15, a15, a14" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% move_to_vm_register_emit: native register -> {x_reg, extra} +%%----------------------------------------------------------------------------- + +move_to_vm_register_native_to_xreg_extra_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:move_to_vm_register(State1, Reg, {x_reg, extra}), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 1662f2 s32i a15, a2, 88" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% move_to_vm_register_emit: native register -> {ptr, Reg} +%%----------------------------------------------------------------------------- + +move_to_vm_register_native_to_ptr_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:move_to_vm_register(State1, Reg, {ptr, a5}), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0065f2 s32i a15, a5, 0" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% move_to_vm_register_emit: native register -> {y_reg, Y} +%%----------------------------------------------------------------------------- + +move_to_vm_register_native_to_yreg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:move_to_vm_register(State1, Reg, {y_reg, 3}), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0522e2 l32i a14, a2, 20\n" + " 6: 036ef2 s32i a15, a14, 12" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% move_to_vm_register_emit: large integer (> 255) -> {x_reg, 0} +%%----------------------------------------------------------------------------- + +move_to_vm_register_large_int_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, 256, {x_reg, 0}), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 00a1f2 movi a15, 0x100\n" + " 3: 0662f2 s32i a15, a2, 24" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% move_to_vm_register_emit: {x_reg, extra} source -> {x_reg, 0} +%%----------------------------------------------------------------------------- + +move_to_vm_register_xreg_extra_src_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {x_reg, extra}, {x_reg, 0}), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 1622f2 l32i a15, a2, 88\n" + " 3: 0662f2 s32i a15, a2, 24" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% move_to_vm_register_emit: {x_reg, X} source -> {x_reg, 1} +%%----------------------------------------------------------------------------- + +move_to_vm_register_xreg_src_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {x_reg, 0}, {x_reg, 1}), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0762f2 s32i a15, a2, 28" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% move_to_vm_register_emit: {ptr, Reg} source -> {x_reg, 0} +%%----------------------------------------------------------------------------- + +move_to_vm_register_ptr_src_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {ptr, a5}, {x_reg, 0}), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 0025f2 l32i a15, a5, 0\n" + " 3: 0662f2 s32i a15, a2, 24" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% move_to_vm_register_emit: {y_reg, Y} source -> {x_reg, 1} +%%----------------------------------------------------------------------------- + +move_to_vm_register_yreg_src_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {y_reg, 0}, {x_reg, 1}), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 0522e2 l32i a14, a2, 20\n" + " 3: 002ef2 l32i a15, a14, 0\n" + " 6: 0762f2 s32i a15, a2, 28" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% call_primitive with {avm_int64_t, Value}: int64 at a10+a11 (even pair, first) +%%----------------------------------------------------------------------------- + +call_primitive_int64_at_a10_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _} = ?BACKEND:call_primitive(State0, 0, [{avm_int64_t, 16#123456789ABCDEF0}]), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 0024f2 l32i a15, a4, 0\n" + " 3: 000146 j 0xc\n" + " 6: f0ff00 subx8 a15, a15, a0\n" + " 9: de .byte 0xde\n" + " a: 9abc beqz.n a10, 0x47\n" + " c: ffffa1 l32r a10, 0x8 (0x9abcdef0)\n" + " f: 000146 j 0x18\n" + " 12: 78ff00 lsi f0, a15, 0x1e0\n" + " 15: 123456 bnez a4, 0x13c\n" + " 18: ffffb1 l32r a11, 0x14 (0x12345678)\n" + " 1b: 000fe0 callx8 a15\n" + " 1e: 0a7d mov.n a7, a10" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% call_primitive with {avm_int64_t, Value}: ctx at a10, int64 skips a11, uses a12+a13 +%%----------------------------------------------------------------------------- + +call_primitive_int64_skip_a11_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _} = ?BACKEND:call_primitive(State0, 0, [ctx, {avm_int64_t, 16#123456789ABCDEF0}]), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 0024f2 l32i a15, a4, 0\n" + " 3: 02ad mov.n a10, a2\n" + " 5: 0000c6 j 0xc\n" + " 8: bcdef0 lsi f15, a14, 0x2f0\n" + " b: c19a add.n a12, a1, a9\n" + " d: ff .byte 0xff\n" + " e: ff .byte 0xff\n" + " f: 000146 j 0x18\n" + " 12: 78ff00 lsi f0, a15, 0x1e0\n" + " 15: 123456 bnez a4, 0x13c\n" + " 18: ffffd1 l32r a13, 0x14 (0x12345678)\n" + " 1b: 000fe0 callx8 a15\n" + " 1e: 0a7d mov.n a7, a10" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% call_primitive with {avm_int64_t, Value}: ctx+jit_state at a10+a11, int64 at a12+a13 +%%----------------------------------------------------------------------------- + +call_primitive_int64_at_a12_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _} = ?BACKEND:call_primitive(State0, 0, [ + ctx, jit_state, {avm_int64_t, 16#123456789ABCDEF0} + ]), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 0024f2 l32i a15, a4, 0\n" + " 3: 02ad mov.n a10, a2\n" + " 5: 03bd mov.n a11, a3\n" + " 7: 000146 j 0x10\n" + " a: f0ff00 subx8 a15, a15, a0\n" + " d: de .byte 0xde\n" + " e: 9abc beqz.n a10, 0x4b\n" + " 10: ffffc1 l32r a12, 0xc (0x9abcdef0)\n" + " 13: 000146 j 0x1c\n" + " 16: 78ff00 lsi f0, a15, 0x1e0\n" + " 19: 123456 bnez a4, 0x140\n" + " 1c: ffffd1 l32r a13, 0x18 (0x12345678)\n" + " 1f: 000fe0 callx8 a15\n" + " 22: 0a7d mov.n a7, a10" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% call_primitive with {avm_int64_t, Value}: ctx+jit_state+offset, int64 skips a13, uses a14+a15 +%%----------------------------------------------------------------------------- + +call_primitive_int64_skip_a13_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _} = ?BACKEND:call_primitive(State0, 0, [ + ctx, jit_state, offset, {avm_int64_t, 16#123456789ABCDEF0} + ]), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 0024f2 l32i a15, a4, 0\n" + " 3: 0fdd mov.n a13, a15\n" + " 5: 02ad mov.n a10, a2\n" + " 7: 03bd mov.n a11, a3\n" + " 9: 03a0c2 movi a12, 3\n" + " c: 000106 j 0x14\n" + " f: ff .byte 0xff\n" + " 10: bcdef0 lsi f15, a14, 0x2f0\n" + " 13: e19a add.n a14, a1, a9\n" + " 15: ff .byte 0xff\n" + " 16: ff .byte 0xff\n" + " 17: 000146 j 0x20\n" + " 1a: 78ff00 lsi f0, a15, 0x1e0\n" + " 1d: 123456 bnez a4, 0x144\n" + " 20: fffff1 l32r a15, 0x1c (0x12345678)\n" + " 23: 000de0 callx8 a13\n" + " 26: 0a7d mov.n a7, a10" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% call_primitive with {avm_int64_t, Value}: ctx+jit_state+offset+{x_reg,0}, int64 at a14+a15 +%%----------------------------------------------------------------------------- + +call_primitive_int64_at_a14_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _} = ?BACKEND:call_primitive(State0, 0, [ + ctx, jit_state, offset, {x_reg, 0}, {avm_int64_t, 16#123456789ABCDEF0} + ]), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 0024f2 l32i a15, a4, 0\n" + " 3: 0f9d mov.n a9, a15\n" + " 5: 02ad mov.n a10, a2\n" + " 7: 03bd mov.n a11, a3\n" + " 9: 03a0c2 movi a12, 3\n" + " c: 0622d2 l32i a13, a2, 24\n" + " f: 000146 j 0x18\n" + " 12: f0ff00 subx8 a15, a15, a0\n" + " 15: de .byte 0xde\n" + " 16: 9abc beqz.n a10, 0x53\n" + " 18: ffffe1 l32r a14, 0x14 (0x9abcdef0)\n" + " 1b: 000146 j 0x24\n" + " 1e: 78ff00 lsi f0, a15, 0x1e0\n" + " 21: 123456 bnez a4, 0x148\n" + " 24: fffff1 l32r a15, 0x20 (0x12345678)\n" + " 27: 0009e0 callx8 a9\n" + " 2a: 0a7d mov.n a7, a10" + >>, + ?assertStream(xtensa, Dump, Stream). + +%%----------------------------------------------------------------------------- +%% Register cache tests +%%----------------------------------------------------------------------------- + +%% Verify move_array_element for {x_reg, X} destination updates the cache for +%% the Temp register so a subsequent move_to_native_register({x_reg, X}) returns +%% Temp directly without emitting a second load. +move_array_element_x_reg_invalidates_vm_loc_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, a15} = ?BACKEND:move_to_native_register(State0, {x_reg, 5}), + {State2, a14} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + S3 = ?BACKEND:move_array_element(State2, a14, 0, {x_reg, 5}), + {S4, _} = ?BACKEND:move_to_native_register(S3, {x_reg, 5}), + Stream = ?BACKEND:stream(S4), + Dump = << + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0622e2 l32i a14, a2, 24\n" + " 6: 002ed2 l32i a13, a14, 0\n" + " 9: 0b62d2 s32i a13, a2, 44" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% Verify move_to_native_register/3 for {x_reg, N} sets the cache so that a +%% subsequent move_to_native_register/2 for the same value returns the register +%% from cache without emitting a second load. +fixed_dst_x_reg_load_preserves_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, a13), + {State2, a13} = ?BACKEND:move_to_native_register(State1, {x_reg, 2}), + Stream = ?BACKEND:stream(State2), + Dump = <<" 0: 0822d2 l32i a13, a2, 32">>, + ?assertStream(xtensa, Dump, Stream). + +%% Verify move_to_native_register/3 for {y_reg, Y} sets the cache so that a +%% subsequent move_to_native_register/2 for the same value returns the register +%% from cache without emitting a second load. +fixed_dst_y_reg_load_preserves_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, a13), + {State2, a13} = ?BACKEND:move_to_native_register(State1, {y_reg, 2}), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0522f2 l32i a15, a2, 20\n" + " 3: 022fd2 l32i a13, a15, 8" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% Verify that an if_block whose body is a terminal (call_primitive_last, +%% which uses jit_regs:unreachable) preserves the pre-block cache at the merge +%% point, so move_to_native_register after the block hits the cache. +call_primitive_last_if_block_preserves_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, a15} = ?BACKEND:move_to_native_register(State0, 1), + {State2, a14} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block( + State2, + {a15, '==', 0}, + fun(S) -> ?BACKEND:call_primitive_last(S, 0, [ctx, jit_state]) end + ), + {State4, a14} = ?BACKEND:move_to_native_register(State3, {x_reg, 0}), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 01a0f2 movi a15, 1\n" + " 3: 0622e2 l32i a14, a2, 24\n" + " 6: 002f16 beqz a15, 0xc\n" + " 9: 000386 j 0x1b\n" + " c: 0024f2 l32i a15, a4, 0\n" + " f: 02ad mov.n a10, a2\n" + " 11: 03bd mov.n a11, a3\n" + " 13: 000fe0 callx8 a15\n" + " 16: 0a2d mov.n a2, a10\n" + " 18: 000090 retw" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% Verify that an if_block whose body is a terminal (jump_to_label, +%% which uses jit_regs:unreachable) preserves the pre-block cache at the merge +%% point, so move_to_native_register after the block hits the cache. +jump_to_label_if_block_preserves_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, a15} = ?BACKEND:move_to_native_register(State0, 1), + {State2, a14} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block( + State2, + {a15, '==', 0}, + fun(S) -> ?BACKEND:jump_to_label(S, 42) end + ), + {State4, a14} = ?BACKEND:move_to_native_register(State3, {x_reg, 0}), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 01a0f2 movi a15, 1\n" + " 3: 0622e2 l32i a14, a2, 24\n" + " 6: 002f16 beqz a15, 0xc\n" + " 9: 0005c6 j 0x24\n" + " c: ff .byte 0xff\n" + " d: ff .byte 0xff\n" + " e: ff .byte 0xff\n" + " f: ff .byte 0xff\n" + " 10: ff .byte 0xff\n" + " 11: ff .byte 0xff\n" + " 12: ff .byte 0xff\n" + " 13: ff .byte 0xff\n" + " 14: ff .byte 0xff\n" + " 15: ff .byte 0xff\n" + " 16: ff .byte 0xff\n" + " 17: ff .byte 0xff\n" + " 18: ff .byte 0xff\n" + " 19: ff .byte 0xff\n" + " 1a: ff .byte 0xff\n" + " 1b: ff .byte 0xff\n" + " 1c: ff .byte 0xff\n" + " 1d: ff .byte 0xff\n" + " 1e: ff .byte 0xff\n" + " 1f: ff .byte 0xff\n" + " 20: ff .byte 0xff\n" + " 21: ff .byte 0xff\n" + " 22: ff .byte 0xff\n" + " 23: ff .byte 0xff" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% Verify ldr_y_reg invalidates its hidden temp register's cache entry. +%% ldr_y_reg uses first_avail(AvailT) as a scratch register to load the +%% Y_REGS pointer, but without the fix this temp was not invalidated. +ldr_y_reg_invalidates_hidden_temp_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, a15} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, a14} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, a13} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + State4 = ?BACKEND:free_native_registers(State3, [a14, a13]), + %% y_reg load: Reg=a14 (first avail), hidden temp=a13 (first avail of remaining) + {State5, a14} = ?BACKEND:move_to_native_register(State4, {y_reg, 0}), + %% a13 was hidden temp — if not invalidated, cache still says a13={x_reg,2} + {State6, a13} = ?BACKEND:move_to_native_register(State5, {x_reg, 2}), + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 0822d2 l32i a13, a2, 32\n" + " 9: 0522d2 l32i a13, a2, 20\n" + " c: 002de2 l32i a14, a13, 0\n" + " f: 0822d2 l32i a13, a2, 32" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% Verify decrement_reductions_and_maybe_schedule_next invalidates the cache +%% so that a subsequent move_to_native_register emits a fresh load. +%% schedule_next clobbers caller-saved registers at the continuation point, +%% so the pre-block cache must not be reused there. +decrement_reductions_invalidates_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, a15} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State1), + %% After schedule_next, cache must be invalidated: a fresh load is emitted. + {State3, a14} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0223e2 l32i a14, a3, 8\n" + " 6: ffcee2 addi a14, a14, -1\n" + " 9: 0263e2 s32i a14, a3, 8\n" + " c: 029e56 bnez a14, 0x39\n" + " f: 0323e2 l32i a14, a3, 12\n" + " 12: 36a082 movi a8, 54\n" + " 15: 0020f0 nop\n" + " 18: 0020f0 nop\n" + " 1b: 0020f0 nop\n" + " 1e: 0020f0 nop\n" + " 21: 80ee80 add a14, a14, a8\n" + " 24: 0163e2 s32i a14, a3, 4\n" + " 27: 0224f2 l32i a15, a4, 8\n" + " 2a: 02ad mov.n a10, a2\n" + " 2c: 03bd mov.n a11, a3\n" + " 2e: 000fe0 callx8 a15\n" + " 31: 0a2d mov.n a2, a10\n" + " 33: 000090 retw\n" + " 36: 00c136 entry a1, 96\n" + " 39: 0622e2 l32i a14, a2, 24" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% Verify move_to_vm_register_emit for {x_reg, N} leaves Temp in cache so a +%% subsequent move_to_native_register for the same x_reg reuses Temp without load. +cached_move_to_vm_x_reg_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {x_reg, 1}, {x_reg, 0}), + %% a15 (Temp) is in cache as {x_reg, 1}: no extra load emitted. + {State2, a15} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0722f2 l32i a15, a2, 28\n" + " 3: 0662f2 s32i a15, a2, 24" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% Verify move_to_vm_register_emit for {y_reg, Y} leaves Temp in cache so a +%% subsequent move_to_native_register for the same y_reg reuses Temp without load. +cached_move_to_vm_y_reg_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {y_reg, 0}, {x_reg, 0}), + %% a15 (Temp) is in cache as {y_reg, 0}: no extra load emitted. + {State2, a15} = ?BACKEND:move_to_native_register(State1, {y_reg, 0}), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0522e2 l32i a14, a2, 20\n" + " 3: 002ef2 l32i a15, a14, 0\n" + " 6: 0662f2 s32i a15, a2, 24" + >>, + ?assertStream(xtensa, Dump, Stream). + +%% Regression tests for the hidden-temp cache-invalidation bug in the binary +%% arithmetic and bitwise ops (add/sub/or_/xor_). When the immediate operand +%% does not fit the instruction's inline form, these ops pick first_avail/1 as +%% a scratch register (the hidden Temp), materialize the immediate there with +%% mov_immediate, then combine it with Reg. mov_immediate invalidates Temp's +%% cache entry, but the ops used to keep the *pre*-mov_immediate cache (Regs0), +%% which resurrected Temp's stale value. A later move_to_native_register for +%% that value then wrongly reused the clobbered register instead of reloading. +%% +%% Each test caches {x_reg, 1} in a14, frees it (so it stays cached but becomes +%% available), then runs the op on a15: a14 is picked as the hidden Temp and +%% clobbered with the immediate. Re-requesting {x_reg, 1} must emit a fresh +%% l32i a14, a2, 28 rather than reuse the now-clobbered a14. + +add_invalidates_hidden_temp_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, a15} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, a14} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, a13} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + State4 = ?BACKEND:free_native_registers(State3, [a14, a13]), + %% Hidden temp = a14 (first avail); add materializes 1000 through a movi temp. + State5 = ?BACKEND:add(State4, a15, 1000), + %% a14 was the hidden temp: it must be reloaded, not reused from cache. + {State6, a14} = ?BACKEND:move_to_native_register(State5, {x_reg, 1}), + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 0822d2 l32i a13, a2, 32\n" + " 9: e8a3e2 movi a14, 0x3e8\n" + " c: 80ffe0 add a15, a15, a14\n" + " f: 0722e2 l32i a14, a2, 28" + >>, + ?assertStream(xtensa, Dump, Stream). + +sub_invalidates_hidden_temp_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, a15} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, a14} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, a13} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + State4 = ?BACKEND:free_native_registers(State3, [a14, a13]), + %% Hidden temp = a14 (first avail); sub materializes 1000 through a movi temp. + State5 = ?BACKEND:sub(State4, a15, 1000), + %% a14 was the hidden temp: it must be reloaded, not reused from cache. + {State6, a14} = ?BACKEND:move_to_native_register(State5, {x_reg, 1}), + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 0822d2 l32i a13, a2, 32\n" + " 9: e8a3e2 movi a14, 0x3e8\n" + " c: c0ffe0 sub a15, a15, a14\n" + " f: 0722e2 l32i a14, a2, 28" + >>, + ?assertStream(xtensa, Dump, Stream). + +or_invalidates_hidden_temp_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, a15} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, a14} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, a13} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + State4 = ?BACKEND:free_native_registers(State3, [a14, a13]), + %% Hidden temp = a14 (first avail); xtensa lacks ori, so 1000 uses a movi temp. + State5 = ?BACKEND:or_(State4, a15, 1000), + %% a14 was the hidden temp: it must be reloaded, not reused from cache. + {State6, a14} = ?BACKEND:move_to_native_register(State5, {x_reg, 1}), + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 0822d2 l32i a13, a2, 32\n" + " 9: e8a3e2 movi a14, 0x3e8\n" + " c: 20ffe0 or a15, a15, a14\n" + " f: 0722e2 l32i a14, a2, 28" + >>, + ?assertStream(xtensa, Dump, Stream). + +xor_invalidates_hidden_temp_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, a15} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, a14} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, a13} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + State4 = ?BACKEND:free_native_registers(State3, [a14, a13]), + %% Hidden temp = a14 (first avail); xtensa lacks xori, so 1000 uses a movi temp. + State5 = ?BACKEND:xor_(State4, a15, 1000), + %% a14 was the hidden temp: it must be reloaded, not reused from cache. + {State6, a14} = ?BACKEND:move_to_native_register(State5, {x_reg, 1}), + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 0622f2 l32i a15, a2, 24\n" + " 3: 0722e2 l32i a14, a2, 28\n" + " 6: 0822d2 l32i a13, a2, 32\n" + " 9: e8a3e2 movi a14, 0x3e8\n" + " c: 30ffe0 xor a15, a15, a14\n" + " f: 0722e2 l32i a14, a2, 28" + >>, + ?assertStream(xtensa, Dump, Stream). diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl index ba8d2d36fc..7c5c53d43a 100644 --- a/tests/libs/jit/tests.erl +++ b/tests/libs/jit/tests.erl @@ -43,6 +43,7 @@ start() -> jit_riscv64_asm_tests, jit_x86_64_tests, jit_x86_64_asm_tests, + jit_xtensa_tests, jit_xtensa_asm_tests ]), case Result of From aa677cccf8d8c3c9d8653fc54838ca1108c274f0 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 20 Apr 2026 07:15:46 +0200 Subject: [PATCH 3/8] JIT wasm32: fix warning Signed-off-by: Paul Guyot --- src/libAtomVM/opcodesswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h index fbdbd63668..85f055841f 100644 --- a/src/libAtomVM/opcodesswitch.h +++ b/src/libAtomVM/opcodesswitch.h @@ -1733,7 +1733,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) assert(native_pc); #endif struct JITState jit_state; - jit_state.continuation = NULL; + jit_state.continuation = (NativeContinuation) 0; jit_state.module = mod; jit_state.remaining_reductions = remaining_reductions; // __asm__ volatile("int $0x03"); From dbf24ed996566c922b26b6ab51ea74ffa9ca81be Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 18 Apr 2026 21:57:25 +0200 Subject: [PATCH 4/8] JIT xtensa: generic unix support and CI Add text-section-literals mmap copies for alignment (like arm/aarch64) Signed-off-by: Paul Guyot --- .github/workflows/build-and-test.yaml | 113 +++++++++++- .github/workflows/esp32-build.yaml | 37 +++- CMakeLists.txt | 2 +- CMakeModules/BuildErlang.cmake | 165 ++++++++++++++++-- libs/CMakeLists.txt | 14 +- libs/avm_esp32/src/CMakeLists.txt | 2 +- libs/eavmlib/src/CMakeLists.txt | 2 +- libs/esp32boot/CMakeLists.txt | 2 +- libs/esp32boot/esp32init.erl | 1 + libs/jit/src/CMakeLists.txt | 51 +++++- src/platforms/esp32/CMakeLists.txt | 58 +++--- src/platforms/esp32/GetBootAVM.cmake | 37 +++- .../esp32/components/libatomvm/CMakeLists.txt | 17 ++ src/platforms/esp32/partitions-jit.csv | 16 ++ src/platforms/esp32/sdkconfig.jit | 1 + src/platforms/esp32/sdkconfig.jit.license | 2 + src/platforms/esp32/test/CMakeLists.txt | 56 ++++-- .../test/main/test_erl_sources/CMakeLists.txt | 34 +++- .../test_erl_sources/test_jit_compile.erl | 32 ++++ .../main/test_erl_sources/test_jit_simple.erl | 27 +++ src/platforms/esp32/test/main/test_main.c | 47 +++++ src/platforms/esp32/tools/mkimage.config.in | 2 +- src/platforms/esp32/tools/mkimage.sh.in | 2 +- .../esp32/tools/mkimage_nvs.config.in | 2 +- src/platforms/generic_unix/CMakeLists.txt | 8 + src/platforms/generic_unix/lib/sys.c | 2 +- tests/erlang_tests/CMakeLists.txt | 6 +- tests/libs/jit/CMakeLists.txt | 20 ++- 28 files changed, 673 insertions(+), 85 deletions(-) create mode 100644 src/platforms/esp32/partitions-jit.csv create mode 100644 src/platforms/esp32/sdkconfig.jit create mode 100644 src/platforms/esp32/sdkconfig.jit.license create mode 100644 src/platforms/esp32/test/main/test_erl_sources/test_jit_compile.erl create mode 100644 src/platforms/esp32/test/main/test_erl_sources/test_jit_simple.erl diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index 5a6c2ac187..71c00193fc 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -406,6 +406,47 @@ jobs: library-arch: riscv32-linux-gnu-ilp32 jit_target_arch: "riscv32" + # xtensa build (esp32, lx6) + - os: "ubuntu-24.04" + cc: "xtensa-lx6-linux-gnu-gcc" + cxx: "xtensa-lx6-linux-gnu-g++" + cflags: "-O2" + otp: "28" + elixir_version: "1.17" + rebar3_version: "3.24.0" + cmake_opts_other: "-DAVM_WARNINGS_ARE_ERRORS=ON -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/xtensa_toolchain.cmake" + compiler_pkgs: "" + arch: "xtensa" + library-arch: xtensa-lx6-linux-gnu + + # xtensa build + jit (esp32, lx6) + - os: "ubuntu-24.04" + cc: "xtensa-lx6-linux-gnu-gcc" + cxx: "xtensa-lx6-linux-gnu-g++" + cflags: "-O2" + otp: "28" + elixir_version: "1.17" + rebar3_version: "3.24.0" + cmake_opts_other: "-DAVM_DISABLE_JIT=OFF -DAVM_JIT_TARGET_ARCH=xtensa -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/xtensa_toolchain.cmake" + compiler_pkgs: "" + arch: "xtensa" + library-arch: xtensa-lx6-linux-gnu + jit_target_arch: "xtensa" + + # JIT + DWARF build (xtensa, esp32, lx6) + - os: "ubuntu-24.04" + cc: "xtensa-lx6-linux-gnu-gcc" + cxx: "xtensa-lx6-linux-gnu-g++" + cflags: "-O2" + otp: "28" + elixir_version: "1.17" + rebar3_version: "3.24.0" + cmake_opts_other: "-DAVM_DISABLE_JIT=OFF -DAVM_DISABLE_JIT_DWARF=OFF -DAVM_JIT_TARGET_ARCH=xtensa -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/xtensa_toolchain.cmake" + compiler_pkgs: "" + arch: "xtensa" + library-arch: xtensa-lx6-linux-gnu + jit_target_arch: "xtensa" + # libsodium enabled build - os: "ubuntu-24.04" cc: "cc" @@ -458,7 +499,7 @@ jobs: run: sudo dpkg --add-architecture i386 - name: "Setup cross compilation architecture" - if: matrix.library-arch != '' && matrix.library-arch != 'riscv32-linux-gnu-ilp32' + if: matrix.library-arch != '' && matrix.library-arch != 'riscv32-linux-gnu-ilp32' && matrix.library-arch != 'xtensa-lx6-linux-gnu' run: | # Replace Azure mirrors with official Ubuntu repositories sudo sed -i 's|azure\.||g' /etc/apt/sources.list @@ -583,6 +624,76 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: "Setup cross compilation architecture (xtensa)" + if: matrix.library-arch == 'xtensa-lx6-linux-gnu' + run: | + sudo dpkg --add-architecture ${{ matrix.arch }} + + # Download packages from pguyot/crossbuild-essential-xtensa + gh release download xtensa-toolchain-14.2.0.20260515 \ + -R pguyot/crossbuild-essential-xtensa \ + --pattern 'qemu-xtensa-esp_*.deb' \ + --pattern 'xtensa-lx6-linux-gnu-toolchain_*.deb' \ + --pattern 'libc6-lx6_*.deb' \ + --pattern 'libc6-dev-lx6_*.deb' \ + --pattern 'libc6-dbg-lx6_*.deb' \ + --pattern 'zlib1g-lx6_*.deb' \ + --pattern 'zlib1g-dev-lx6_*.deb' \ + --pattern 'libmbedcrypto7-lx6_*.deb' \ + --pattern 'libmbedx509-1-lx6_*.deb' \ + --pattern 'libmbedtls14-lx6_*.deb' \ + --pattern 'libmbedtls-dev-lx6_*.deb' + + # Install QEMU with binfmt support (activates automatically on systemd) + sudo dpkg -i qemu-xtensa-esp_*.deb + + # Install toolchain and add to PATH + sudo dpkg -i xtensa-lx6-linux-gnu-toolchain_*.deb + echo "/opt/xtensa-lx6/bin" >> $GITHUB_PATH + + # Install runtime libraries + sudo dpkg -i libc6-lx6_*.deb + sudo dpkg -i libc6-dev-lx6_*.deb + sudo dpkg -i libc6-dbg-lx6_*.deb + sudo dpkg -i zlib1g-lx6_*.deb + sudo dpkg -i zlib1g-dev-lx6_*.deb + sudo dpkg -i libmbedcrypto7-lx6_*.deb + sudo dpkg -i libmbedx509-1-lx6_*.deb + sudo dpkg -i libmbedtls14-lx6_*.deb + sudo dpkg -i libmbedtls-dev-lx6_*.deb + + # Create CMake toolchain file + cat > ${RUNNER_TEMP}/xtensa_toolchain.cmake <<'EOF' + set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR xtensa) + + set(CMAKE_C_COMPILER /opt/xtensa-lx6/bin/xtensa-lx6-linux-gnu-gcc) + set(CMAKE_CXX_COMPILER /opt/xtensa-lx6/bin/xtensa-lx6-linux-gnu-g++) + + set(CMAKE_SYSROOT /opt/xtensa-lx6/xtensa-lx6-linux-gnu/sysroot) + list(APPEND CMAKE_FIND_ROOT_PATH /usr) + set(CMAKE_C_LIBRARY_ARCHITECTURE xtensa-lx6-linux-gnu) + + include_directories(SYSTEM /usr/xtensa-lx6-linux-gnu/include) + + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + + set(ZLIB_LIBRARY /usr/xtensa-lx6-linux-gnu/lib/libz.so) + set(MBEDTLS_ROOT_DIR /usr) + set(MBEDTLS_LIBRARIES_DIR /usr/xtensa-lx6-linux-gnu/lib) + EOF + + # Register binfmt if not activated automatically + if [ ! -f /proc/sys/fs/binfmt_misc/qemu-xtensa-esp32 ]; then + echo ':qemu-xtensa-esp32:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x5e\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-xtensa:' | sudo tee /proc/sys/fs/binfmt_misc/register || true + fi + + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: "APT update" run: sudo apt update -y diff --git a/.github/workflows/esp32-build.yaml b/.github/workflows/esp32-build.yaml index 909b0d121c..6854d8ce8b 100644 --- a/.github/workflows/esp32-build.yaml +++ b/.github/workflows/esp32-build.yaml @@ -51,20 +51,36 @@ jobs: - 'v5.3.4' - 'v5.4.3' - 'v5.5.3' + jit: [false] include: - esp-idf-target: "esp32p4" idf-version: 'v5.4.4' + jit: false - esp-idf-target: "esp32p4" idf-version: 'v5.5.3' + jit: false - esp-idf-target: "esp32s3" idf-version: 'v5.5.3' + jit: false - esp-idf-target: "esp32s3" idf-version: 'v5.5.3' usb-serial: 'ON' + jit: false - esp-idf-target: "esp32" idf-version: 'v5.4.3' libsodium: 'ON' + jit: false + # JIT builds (v5.5.3 only) + - esp-idf-target: "esp32c3" + idf-version: 'v5.5.3' + jit: true + - esp-idf-target: "esp32" + idf-version: 'v5.5.3' + jit: true + - esp-idf-target: "esp32s3" + idf-version: 'v5.5.3' + jit: true steps: - name: Checkout repo @@ -105,8 +121,13 @@ jobs: echo 'CONFIG_USE_USB_SERIAL=y' >> sdkconfig.defaults.in fi export IDF_TARGET=${{matrix.esp-idf-target}} - idf.py set-target ${{matrix.esp-idf-target}} - idf.py ${{ matrix.libsodium == 'ON' && '-DAVM_USE_LIBSODIUM=ON' || '' }} build + if [ "${{ matrix.jit }}" = "true" ]; then + SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.jit" idf.py set-target ${{matrix.esp-idf-target}} + SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.jit" idf.py build + else + idf.py set-target ${{matrix.esp-idf-target}} + idf.py ${{ matrix.libsodium == 'ON' && '-DAVM_USE_LIBSODIUM=ON' || '' }} build + fi idf.py size - name: Print component size info with idf.py @@ -216,6 +237,9 @@ jobs: echo "CONFIG_HEAP_POISONING_COMPREHENSIVE=y" >> sdkconfig.defaults echo "CONFIG_ESP_WIFI_IRAM_OPT=n" >> sdkconfig.defaults echo "CONFIG_ESP_WIFI_RX_IRAM_OPT=n" >> sdkconfig.defaults + if [ "${{ matrix.jit }}" = "true" ]; then + echo "CONFIG_JIT_ENABLED=y" >> sdkconfig.defaults + fi . $IDF_PATH/export.sh export IDF_TARGET=${{matrix.esp-idf-target}} idf.py set-target ${{matrix.esp-idf-target}} @@ -239,7 +263,7 @@ jobs: if: failure() && matrix.esp-idf-target != 'esp32p4' uses: actions/upload-artifact@v4 with: - name: atomvm-esp32-test-${{ matrix.esp-idf-target }}-${{ matrix.idf-version }}-memcheck.elf + name: atomvm-esp32-test-${{ matrix.esp-idf-target }}-${{ matrix.idf-version }}${{ matrix.jit && '-jit' || '' }}-memcheck.elf path: ./src/platforms/esp32/test/build/atomvm-esp32-test.elf if-no-files-found: error @@ -252,6 +276,10 @@ jobs: . $IDF_PATH/export.sh export IDF_TARGET=${{matrix.esp-idf-target}} export PATH=${PATH}:${HOME}/.cache/rebar3/bin + cp sdkconfig.defaults sdkconfig.defaults.backup + if [ "${{ matrix.jit }}" = "true" ]; then + echo "CONFIG_JIT_ENABLED=y" >> sdkconfig.defaults + fi idf.py set-target ${{matrix.esp-idf-target}} idf.py build @@ -265,12 +293,13 @@ jobs: . $IDF_PATH/export.sh export PATH=/opt/qemu/bin:${PATH} pytest --target=${{matrix.esp-idf-target}} --embedded-services=idf,qemu -s + cp sdkconfig.defaults.backup sdkconfig.defaults - name: Upload ESP32 tests ELF artifact # TODO: remove the following exclusion when ESP32P4 support is added to espressif/qemu if: failure() && matrix.esp-idf-target != 'esp32p4' uses: actions/upload-artifact@v4 with: - name: atomvm-esp32-test-${{ matrix.esp-idf-target }}-${{ matrix.idf-version }}.elf + name: atomvm-esp32-test-${{ matrix.esp-idf-target }}-${{ matrix.idf-version }}${{ matrix.jit && '-jit' || '' }}.elf path: ./src/platforms/esp32/test/build/atomvm-esp32-test.elf if-no-files-found: error diff --git a/CMakeLists.txt b/CMakeLists.txt index 91eaec8bc7..85e8ea201d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,7 +68,7 @@ if (NOT AVM_DISABLE_JIT AND NOT DEFINED AVM_JIT_TARGET_ARCH) endif() endif() -set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;arm32;armv6m;armv6m+float32;armv6m+thumb2;riscv32;riscv64;wasm32;xtensa" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") +set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;arm32;armv6m;armv6m+thumb2;armv6m+thumb2+float32;riscv32;riscv64;wasm32;xtensa;xtensa+float32" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") # DWARF is not supported on wasm32 if (NOT AVM_DISABLE_JIT_DWARF) diff --git a/CMakeModules/BuildErlang.cmake b/CMakeModules/BuildErlang.cmake index ed207a95e5..196475804f 100644 --- a/CMakeModules/BuildErlang.cmake +++ b/CMakeModules/BuildErlang.cmake @@ -20,7 +20,7 @@ macro(pack_archive avm_name) - set(multiValueArgs ERLC_FLAGS MODULES DEPENDS_ON) + set(multiValueArgs ERLC_FLAGS MODULES PRECOMPILED_MODULES DEPENDS_ON) cmake_parse_arguments(PACK_ARCHIVE "" "" "${multiValueArgs}" ${ARGN}) # Build -pa flags and file dependencies from DEPENDS_ON @@ -82,7 +82,7 @@ endmacro() macro(pack_precompiled_archive avm_name) pack_archive(${avm_name} ${ARGN}) - set(multiValueArgs ERLC_FLAGS MODULES) + set(multiValueArgs ERLC_FLAGS MODULES PRECOMPILED_MODULES DEPENDS_ON) cmake_parse_arguments(PACK_ARCHIVE "" "" "${multiValueArgs}" ${ARGN}) if(NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED) @@ -97,24 +97,51 @@ macro(pack_precompiled_archive avm_name) string(REGEX REPLACE "\\+.*$" "" jit_target_arch "${jit_target_arch_variant}") set(jit_compiler_modules ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit.beam - ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_dwarf.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_precompile.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_stream_binary.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${jit_target_arch}.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${jit_target_arch}_asm.beam ) + if (NOT AVM_DISABLE_JIT_DWARF) + list(APPEND jit_compiler_modules + ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_dwarf.beam + ) + endif() if("${jit_target_arch_variant}" MATCHES "thumb2") list(APPEND jit_compiler_modules ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_armv7m_asm.beam ) endif() + if("${jit_target_arch}" STREQUAL "riscv64") + # jit_riscv64_asm delegates many encodings to jit_riscv32_asm; + # jit_riscv64:dwarf_register_number/1 calls jit_riscv32 when DWARF is on. + list(APPEND jit_compiler_modules + ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_riscv32.beam + ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_riscv32_asm.beam + ) + endif() if (NOT AVM_DISABLE_JIT_DWARF) set(jit_precompile_dwarf_flag "dwarf") else() set(jit_precompile_dwarf_flag "") endif() - foreach(module_name IN LISTS ${PACK_ARCHIVE_MODULES} PACK_ARCHIVE_MODULES PACK_ARCHIVE_UNPARSED_ARGUMENTS) + if(PACK_ARCHIVE_PRECOMPILED_MODULES) + set(_precompile_module_list "") + foreach(_mod IN LISTS PACK_ARCHIVE_PRECOMPILED_MODULES) + string(REPLACE "@ARCH@" "${jit_target_arch}" _mod_resolved "${_mod}") + list(APPEND _precompile_module_list "${_mod_resolved}") + endforeach() + if("${jit_target_arch_variant}" MATCHES "thumb2") + list(APPEND _precompile_module_list jit_armv7m_asm) + endif() + if("${jit_target_arch}" STREQUAL "riscv64") + list(APPEND _precompile_module_list jit_riscv32 jit_riscv32_asm) + endif() + else() + set(_precompile_module_list ${PACK_ARCHIVE_MODULES} ${PACK_ARCHIVE_UNPARSED_ARGUMENTS}) + endif() + foreach(module_name IN LISTS _precompile_module_list) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch_variant}/${module_name}.beam COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch_variant}/ @@ -153,7 +180,25 @@ endmacro() macro(pack_lib avm_name) set(options UF2) - cmake_parse_arguments(PACK_LIB "${options}" "" "" ${ARGN}) + set(multiValueArgs TARGETS) + cmake_parse_arguments(PACK_LIB "${options}" "" "${multiValueArgs}" ${ARGN}) + + if(PACK_LIB_TARGETS) + set(_pack_lib_targets ${PACK_LIB_TARGETS}) + else() + set(_pack_lib_targets ${AVM_PRECOMPILED_TARGETS}) + endif() + + # Drop any requested targets that aren't actually being precompiled + # (e.g. wasm32 is removed from AVM_PRECOMPILED_TARGETS when DWARF is on), + # otherwise the build would reference a non-existent jit-.avm rule. + set(_pack_lib_targets_filtered "") + foreach(_t IN LISTS _pack_lib_targets) + if("${_t}" IN_LIST AVM_PRECOMPILED_TARGETS) + list(APPEND _pack_lib_targets_filtered "${_t}") + endif() + endforeach() + set(_pack_lib_targets ${_pack_lib_targets_filtered}) set(pack_lib_${avm_name}_archive_targets "") set(pack_lib_${avm_name}_archives "") @@ -191,7 +236,7 @@ macro(pack_lib avm_name) set(target_deps ${avm_name}.avm) if(NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED) - foreach(jit_target_arch_variant ${AVM_PRECOMPILED_TARGETS}) + foreach(jit_target_arch_variant ${_pack_lib_targets}) # Build JIT archives list for this specific target architecture set(pack_lib_${avm_name}_jit_archives_${jit_target_arch_variant} ${CMAKE_BINARY_DIR}/libs/jit/src/jit-${jit_target_arch_variant}.avm) foreach(archive_name ${PACK_LIB_UNPARSED_ARGUMENTS}) @@ -228,7 +273,7 @@ macro(pack_lib avm_name) ) set(target_deps ${target_deps} ${avm_name}-pico.uf2 ${avm_name}-pico2.uf2) - if((NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED) AND ("armv6m" IN_LIST AVM_PRECOMPILED_TARGETS)) + if((NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED) AND ("armv6m" IN_LIST _pack_lib_targets)) add_custom_command( OUTPUT ${avm_name}-armv6m-pico.uf2 DEPENDS ${avm_name}-armv6m.avm UF2Tool @@ -246,22 +291,26 @@ macro(pack_lib avm_name) set(target_deps ${target_deps} ${avm_name}-armv6m-pico.uf2 ${avm_name}-armv6m-pico2.uf2) endif() - if((NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED) AND ("armv6m+float32" IN_LIST AVM_PRECOMPILED_TARGETS)) + if((NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED) AND ("armv6m+thumb2" IN_LIST _pack_lib_targets)) add_custom_command( - OUTPUT ${avm_name}-armv6m+float32-pico.uf2 - DEPENDS ${avm_name}-armv6m+float32.avm UF2Tool - COMMAND ${CMAKE_BINARY_DIR}/tools/uf2tool/uf2tool create -o ${avm_name}-armv6m+float32-pico.uf2 -s 0x10100000 ${avm_name}-armv6m+float32.avm - COMMENT "Creating UF2 file ${avm_name}-armv6m+float32-pico.uf2" + OUTPUT ${avm_name}-armv6m+thumb2-pico2.uf2 + DEPENDS ${avm_name}-armv6m+thumb2.avm UF2Tool + COMMAND ${CMAKE_BINARY_DIR}/tools/uf2tool/uf2tool create -o ${avm_name}-armv6m+thumb2-pico2.uf2 -f data -s 0x10100000 ${avm_name}-armv6m+thumb2.avm + COMMENT "Creating UF2 file ${avm_name}-armv6m+thumb2-pico2.uf2" VERBATIM ) + set(target_deps ${target_deps} ${avm_name}-armv6m+thumb2-pico2.uf2) + endif() + + if((NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED) AND ("armv6m+thumb2+float32" IN_LIST _pack_lib_targets)) add_custom_command( - OUTPUT ${avm_name}-armv6m+float32-pico2.uf2 - DEPENDS ${avm_name}-armv6m+float32.avm UF2Tool - COMMAND ${CMAKE_BINARY_DIR}/tools/uf2tool/uf2tool create -o ${avm_name}-armv6m+float32-pico2.uf2 -f data -s 0x10100000 ${avm_name}-armv6m+float32.avm - COMMENT "Creating UF2 file ${avm_name}-armv6m+float32-pico2.uf2" + OUTPUT ${avm_name}-armv6m+thumb2+float32-pico2.uf2 + DEPENDS ${avm_name}-armv6m+thumb2+float32.avm UF2Tool + COMMAND ${CMAKE_BINARY_DIR}/tools/uf2tool/uf2tool create -o ${avm_name}-armv6m+thumb2+float32-pico2.uf2 -f data -s 0x10100000 ${avm_name}-armv6m+thumb2+float32.avm + COMMENT "Creating UF2 file ${avm_name}-armv6m+thumb2+float32-pico2.uf2" VERBATIM ) - set(target_deps ${target_deps} ${avm_name}-armv6m+float32-pico.uf2 ${avm_name}-armv6m+float32-pico2.uf2) + set(target_deps ${target_deps} ${avm_name}-armv6m+thumb2+float32-pico2.uf2) endif() endif() @@ -278,7 +327,7 @@ endmacro() macro(pack_runnable avm_name main) - set(multiValueArgs DIALYZE_AGAINST) + set(multiValueArgs DIALYZE_AGAINST TARGETS JIT_ARCHIVES) cmake_parse_arguments(PACK_RUNNABLE "" "" "${multiValueArgs}" ${ARGN}) add_custom_command( @@ -371,6 +420,86 @@ macro(pack_runnable avm_name main) ${avm_name} ALL DEPENDS ${avm_name}.avm ) + + if(PACK_RUNNABLE_TARGETS AND (NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED)) + foreach(jit_target_arch_variant IN LISTS PACK_RUNNABLE_TARGETS) + string(REGEX REPLACE "\\+.*$" "" jit_target_arch "${jit_target_arch_variant}") + set(jit_compiler_modules + ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit.beam + ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_precompile.beam + ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_stream_binary.beam + ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${jit_target_arch}.beam + ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${jit_target_arch}_asm.beam + ) + if(NOT AVM_DISABLE_JIT_DWARF) + list(APPEND jit_compiler_modules ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_dwarf.beam) + set(jit_precompile_dwarf_flag "dwarf") + else() + set(jit_precompile_dwarf_flag "") + endif() + if("${jit_target_arch_variant}" MATCHES "thumb2") + list(APPEND jit_compiler_modules ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_armv7m_asm.beam) + endif() + if("${jit_target_arch}" STREQUAL "riscv64") + list(APPEND jit_compiler_modules + ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_riscv32.beam + ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_riscv32_asm.beam + ) + endif() + + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${jit_target_arch_variant}/${main}.beam + COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/${jit_target_arch_variant}/ + && erl -pa ${CMAKE_BINARY_DIR}/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop + -- ${jit_target_arch_variant} ${CMAKE_CURRENT_BINARY_DIR}/${jit_target_arch_variant}/ ${jit_precompile_dwarf_flag} + ${CMAKE_CURRENT_BINARY_DIR}/${main}.beam + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${main}.beam ${jit_compiler_modules} jit + COMMENT "Compiling ${main}.beam to ${jit_target_arch_variant}" + VERBATIM + ) + + # Build esp32boot with fully AOT archives (native code, no bytecode, no JIT compiler). + # JIT_ARCHIVES limits which libraries are included in the JIT boot AVM; + # defaults to the same set as the non-JIT AVM. + if(PACK_RUNNABLE_JIT_ARCHIVES) + set(_jit_archive_list "${PACK_RUNNABLE_JIT_ARCHIVES}") + else() + set(_jit_archive_list "${PACK_RUNNABLE_UNPARSED_ARGUMENTS}") + endif() + set(pack_runnable_${avm_name}_jit_archives_${jit_target_arch_variant} "") + set(pack_runnable_${avm_name}_jit_archive_targets_${jit_target_arch_variant} "") + foreach(archive_name ${_jit_archive_list}) + if(${archive_name} STREQUAL "exavmlib") + list(APPEND pack_runnable_${avm_name}_jit_archives_${jit_target_arch_variant} + ${CMAKE_BINARY_DIR}/libs/${archive_name}/lib/${archive_name}.avm) + else() + list(APPEND pack_runnable_${avm_name}_jit_archives_${jit_target_arch_variant} + ${CMAKE_BINARY_DIR}/libs/${archive_name}/src/${archive_name}-${jit_target_arch_variant}.avm) + endif() + list(APPEND pack_runnable_${avm_name}_jit_archive_targets_${jit_target_arch_variant} ${archive_name}) + endforeach() + + add_custom_command( + OUTPUT ${avm_name}-${jit_target_arch_variant}.avm + DEPENDS + ${CMAKE_CURRENT_BINARY_DIR}/${jit_target_arch_variant}/${main}.beam + ${pack_runnable_${avm_name}_jit_archives_${jit_target_arch_variant}} + ${pack_runnable_${avm_name}_jit_archive_targets_${jit_target_arch_variant}} + PackBEAM + COMMAND ${CMAKE_BINARY_DIR}/tools/packbeam/packbeam create ${PACKBEAM_PRUNE_ARGS} -s ${main} ${INCLUDE_LINES} + ${avm_name}-${jit_target_arch_variant}.avm + ${CMAKE_CURRENT_BINARY_DIR}/${jit_target_arch_variant}/${main}.beam + ${pack_runnable_${avm_name}_jit_archives_${jit_target_arch_variant}} + COMMENT "Packing JIT runnable ${avm_name}-${jit_target_arch_variant}.avm" + VERBATIM + ) + add_custom_target( + ${avm_name}_${jit_target_arch_variant} ALL + DEPENDS ${avm_name}-${jit_target_arch_variant}.avm + ) + add_dependencies(${avm_name} ${avm_name}_${jit_target_arch_variant}) + endforeach() + endif() endmacro() diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index 23d6035124..4c6d7f2b7d 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -55,14 +55,16 @@ else() message(WARNING "Unable to find gleam -- skipping Gleam libs") endif() -# Base (generic_unix): common + network + unix -pack_lib(atomvmlib ${ATOMVM_COMMON_LIBS} avm_network avm_unix) +# Base (generic_unix): common + network + unix — wasm32 is emscripten-only +set(AVM_NATIVE_PRECOMPILED_TARGETS ${AVM_PRECOMPILED_TARGETS}) +list(REMOVE_ITEM AVM_NATIVE_PRECOMPILED_TARGETS wasm32 armv6m+thumb2+float32 xtensa+float32) +pack_lib(atomvmlib ${ATOMVM_COMMON_LIBS} avm_network avm_unix TARGETS ${AVM_NATIVE_PRECOMPILED_TARGETS}) # Platform-specific variants -pack_lib(atomvmlib-esp32 ${ATOMVM_COMMON_LIBS} avm_network avm_esp32) -pack_lib(atomvmlib-rp2 UF2 ${ATOMVM_COMMON_LIBS} avm_network avm_rp2) -pack_lib(atomvmlib-stm32 ${ATOMVM_COMMON_LIBS} avm_stm32) -pack_lib(atomvmlib-emscripten ${ATOMVM_COMMON_LIBS} avm_network avm_emscripten) +pack_lib(atomvmlib-esp32 ${ATOMVM_COMMON_LIBS} avm_network avm_esp32 TARGETS xtensa xtensa+float32 riscv32) +pack_lib(atomvmlib-rp2 UF2 ${ATOMVM_COMMON_LIBS} avm_network avm_rp2 TARGETS armv6m armv6m+thumb2 armv6m+thumb2+float32 riscv32) +pack_lib(atomvmlib-stm32 ${ATOMVM_COMMON_LIBS} avm_stm32 TARGETS armv6m armv6m+thumb2 armv6m+thumb2+float32) +pack_lib(atomvmlib-emscripten ${ATOMVM_COMMON_LIBS} avm_network avm_emscripten TARGETS wasm32) if (Dialyzer_FOUND) # Helper macro to generate a beams list file from an archive diff --git a/libs/avm_esp32/src/CMakeLists.txt b/libs/avm_esp32/src/CMakeLists.txt index 07afa6d387..4922100dac 100644 --- a/libs/avm_esp32/src/CMakeLists.txt +++ b/libs/avm_esp32/src/CMakeLists.txt @@ -33,7 +33,7 @@ set(ERLANG_MODULES uart ) -pack_archive(avm_esp32 DEPENDS_ON eavmlib ERLC_FLAGS +warnings_as_errors MODULES ${ERLANG_MODULES}) +pack_precompiled_archive(avm_esp32 DEPENDS_ON eavmlib ERLC_FLAGS +warnings_as_errors MODULES ${ERLANG_MODULES}) include(../../../version.cmake) diff --git a/libs/eavmlib/src/CMakeLists.txt b/libs/eavmlib/src/CMakeLists.txt index f544f3e787..7fc5784616 100644 --- a/libs/eavmlib/src/CMakeLists.txt +++ b/libs/eavmlib/src/CMakeLists.txt @@ -36,7 +36,7 @@ set(ERLANG_MODULES uart_hal ) -pack_archive(eavmlib ${ERLANG_MODULES}) +pack_precompiled_archive(eavmlib ${ERLANG_MODULES}) include(../../../version.cmake) diff --git a/libs/esp32boot/CMakeLists.txt b/libs/esp32boot/CMakeLists.txt index 316b42136e..a53e99e05f 100644 --- a/libs/esp32boot/CMakeLists.txt +++ b/libs/esp32boot/CMakeLists.txt @@ -26,4 +26,4 @@ if (Elixir_FOUND) pack_runnable(elixir_esp32boot esp32init esp32devmode eavmlib estdlib alisp avm_network avm_esp32 exavmlib) endif() -pack_runnable(esp32boot esp32init esp32devmode eavmlib estdlib alisp avm_network avm_esp32) +pack_runnable(esp32boot esp32init esp32devmode eavmlib estdlib alisp avm_network avm_esp32 TARGETS xtensa xtensa+float32 riscv32 JIT_ARCHIVES estdlib eavmlib avm_esp32) diff --git a/libs/esp32boot/esp32init.erl b/libs/esp32boot/esp32init.erl index e8d2633908..efe15be4ec 100644 --- a/libs/esp32boot/esp32init.erl +++ b/libs/esp32boot/esp32init.erl @@ -24,6 +24,7 @@ start() -> console:print(<<"AtomVM init.\n">>), + {ok, _} = kernel:start(boot, []), boot(). is_dev_mode_enabled(SystemStatus) -> diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index 68d0a6bad2..375a79f5ae 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -23,9 +23,7 @@ project(jit) include(BuildErlang) set(ERLANG_MODULES - jit_dwarf_pt jit - jit_dwarf jit_precompile jit_regs jit_stream_binary @@ -52,16 +50,61 @@ set(ERLANG_MODULES include(../../../version.cmake) +set(JIT_PRECOMPILED_MODULES + jit + jit_regs + jit_stream_binary + jit_stream_flash + jit_stream_mmap + jit_@ARCH@ + jit_@ARCH@_asm +) + if (NOT AVM_DISABLE_JIT_DWARF) + list(INSERT ERLANG_MODULES 1 jit_dwarf) + list(INSERT JIT_PRECOMPILED_MODULES 1 jit_dwarf) set(erlc_flags -DJIT_DWARF -DATOMVM_VERSION=\"${ATOMVM_BASE_VERSION}\" -pa ${CMAKE_CURRENT_BINARY_DIR}/beams) else() set(erlc_flags) + # Compile jit_dwarf.beam for BEAM tests even when DWARF is disabled (not packed in .avm) + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/jit_dwarf.beam + COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/beams + && erlc +debug_info + -o ${CMAKE_CURRENT_BINARY_DIR}/beams + -I ${CMAKE_SOURCE_DIR}/libs/include + -I ${CMAKE_SOURCE_DIR}/libs + -I ${CMAKE_CURRENT_SOURCE_DIR}/../include + ${CMAKE_CURRENT_SOURCE_DIR}/jit_dwarf.erl + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jit_dwarf.erl + COMMENT "Compiling jit_dwarf.erl (BEAM tests only, not packed in .avm)" + VERBATIM + ) endif() -pack_precompiled_archive(jit ERLC_FLAGS ${erlc_flags} MODULES ${ERLANG_MODULES}) +pack_precompiled_archive(jit ERLC_FLAGS ${erlc_flags} MODULES ${ERLANG_MODULES} PRECOMPILED_MODULES ${JIT_PRECOMPILED_MODULES}) + +if (AVM_DISABLE_JIT_DWARF) + add_custom_target(jit_dwarf_for_beam_tests ALL + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/beams/jit_dwarf.beam + ) +endif() if (NOT AVM_DISABLE_JIT_DWARF) - # jit.beam needs jit_dwarf_pt.beam compiled first (parse transform dependency) + # jit_dwarf_pt is a parse transform needed to compile jit.beam; it is not packed + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/jit_dwarf_pt.beam + COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/beams + && erlc +debug_info + -o ${CMAKE_CURRENT_BINARY_DIR}/beams + -I ${CMAKE_SOURCE_DIR}/libs/include + -I ${CMAKE_SOURCE_DIR}/libs + -I ${CMAKE_CURRENT_SOURCE_DIR}/../include + ${CMAKE_CURRENT_SOURCE_DIR}/jit_dwarf_pt.erl + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jit_dwarf_pt.erl + COMMENT "Compiling jit_dwarf_pt.erl" + VERBATIM + ) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/jit.beam DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/beams/jit_dwarf_pt.beam diff --git a/src/platforms/esp32/CMakeLists.txt b/src/platforms/esp32/CMakeLists.txt index 96915ec3d7..35fdcef0ff 100644 --- a/src/platforms/esp32/CMakeLists.txt +++ b/src/platforms/esp32/CMakeLists.txt @@ -57,16 +57,45 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) -# By default, JIT is disabled -set(AVM_DISABLE_JIT ON) +## Configure partition table based on boot flavor and JIT +# Detect JIT from existing sdkconfig before project() where CONFIG_* is not yet available. +set(_jit_partition NO) +if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/sdkconfig") + file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/sdkconfig" _sdkconfig_lines) + if("CONFIG_JIT_ENABLED=y" IN_LIST _sdkconfig_lines) + set(_jit_partition YES) + endif() +endif() -## Configure partition table based on boot flavor -if (ATOMVM_ELIXIR_SUPPORT) +if (_jit_partition) + # JIT partition layout: fully AOT boot.avm (estdlib + esp32init, no JIT compiler) + set(AVM_PARTITION_TABLE_FILENAME "partitions-jit.csv") +elseif (ATOMVM_ELIXIR_SUPPORT) set(AVM_PARTITION_TABLE_FILENAME "partitions-elixir.csv") else() set(AVM_PARTITION_TABLE_FILENAME "partitions.csv") endif() +# Sync partition table in sdkconfig so that IDF uses the correct partition layout. +# sdkconfig treats its settings as user values that override defaults, so writing +# sdkconfig.defaults alone is not enough, we must patch sdkconfig directly. +# Only write when the value actually differs to avoid spurious timestamp changes. +if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/sdkconfig") + file(READ "${CMAKE_CURRENT_SOURCE_DIR}/sdkconfig" _sdkconfig_content) + if(NOT _sdkconfig_content MATCHES "CONFIG_PARTITION_TABLE_CUSTOM_FILENAME=\"${AVM_PARTITION_TABLE_FILENAME}\"") + string(REGEX REPLACE + "(CONFIG_PARTITION_TABLE_CUSTOM_FILENAME=)[^\n]*" + "\\1\"${AVM_PARTITION_TABLE_FILENAME}\"" + _sdkconfig_content "${_sdkconfig_content}") + string(REGEX REPLACE + "(CONFIG_PARTITION_TABLE_FILENAME=)[^\n]*" + "\\1\"${AVM_PARTITION_TABLE_FILENAME}\"" + _sdkconfig_content "${_sdkconfig_content}") + file(WRITE "${CMAKE_CURRENT_SOURCE_DIR}/sdkconfig" "${_sdkconfig_content}") + message(STATUS "Updated sdkconfig partition table to ${AVM_PARTITION_TABLE_FILENAME}") + endif() +endif() + if (ATOMVM_RELEASE) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/sdkconfig.release-defaults.in ${CMAKE_CURRENT_SOURCE_DIR}/sdkconfig.defaults @ONLY) else() @@ -75,25 +104,8 @@ endif() project(atomvm-esp32) -# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4) -# Configuration comes from idf.py menuconfig (KConfig), not CMake options -if(CONFIG_JIT_ENABLED) - if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c5|esp32c6|esp32c61|esp32h2|esp32p4") - set(AVM_DISABLE_JIT OFF) - set(AVM_JIT_TARGET_ARCH riscv32) - message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)") - elseif(${IDF_TARGET} MATCHES "esp32|esp32s2|esp32s3") - set(AVM_DISABLE_JIT OFF) - set(AVM_JIT_TARGET_ARCH xtensa) - message(STATUS "JIT compilation enabled for ${IDF_TARGET} (Xtensa)") - else() - message(WARNING "JIT is not supported on ${IDF_TARGET}") - set(AVM_DISABLE_JIT ON) - endif() -else() - set(AVM_DISABLE_JIT ON) - message(STATUS "JIT compilation disabled") -endif() +# JIT configuration is handled in components/libatomvm/CMakeLists.txt after +# idf_component_register, where CONFIG_* variables are available. # esp-idf does not use compile_feature but instead sets version in # c_compile_options diff --git a/src/platforms/esp32/GetBootAVM.cmake b/src/platforms/esp32/GetBootAVM.cmake index 1a08d537f9..394da8a1dc 100644 --- a/src/platforms/esp32/GetBootAVM.cmake +++ b/src/platforms/esp32/GetBootAVM.cmake @@ -20,8 +20,11 @@ partition_table_get_partition_info(app_offset "--partition-name main.avm" "offset") set(AVM_APP_OFFSET "${app_offset}") -# Both partitions.csv and partitions-elixir.csv use 0x250000 for main.avm. -# Use ATOMVM_ELIXIR_SUPPORT to select the boot library flavor. +partition_table_get_partition_info(lib_offset "--partition-name boot.avm" "offset") +set(AVM_LIB_OFFSET "${lib_offset}") +# partitions.csv and partitions-elixir.csv use 0x250000 for main.avm (non-JIT). +# partitions-jit.csv uses 0x300000 for main.avm (JIT: AOT boot.avm 1.375MB on 4MB flash). +# Use ATOMVM_ELIXIR_SUPPORT to select the elixir boot library flavor. if ("${app_offset}" STREQUAL "0x250000") if (ATOMVM_ELIXIR_SUPPORT) set(BOOT_LIBS "elixir_esp32boot.avm") @@ -30,6 +33,36 @@ if ("${app_offset}" STREQUAL "0x250000") set(BOOT_LIBS "esp32boot.avm") set(ATOMVM_FLAVOR "") endif() +elseif ("${app_offset}" STREQUAL "0x300000") + # JIT partition layout: select arch-specific precompiled boot AVM. + # AVM_JIT_TARGET_ARCH is set in components/libatomvm/CMakeLists.txt when + # included from a component context; derive from IDF_TARGET otherwise. + if (DEFINED AVM_JIT_TARGET_ARCH) + set(_jit_arch "${AVM_JIT_TARGET_ARCH}") + elseif (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c5|esp32c6|esp32c61|esp32h2|esp32p4") + set(_jit_arch "riscv32") + elseif (${IDF_TARGET} MATCHES "^esp32") + set(_jit_arch "xtensa") + else() + set(_jit_arch "") + endif() + if (AVM_USE_32BIT_FLOAT AND _jit_arch STREQUAL "xtensa") + set(_jit_variant "${_jit_arch}+float32") + else() + set(_jit_variant "${_jit_arch}") + endif() + if (_jit_variant) + if (ATOMVM_ELIXIR_SUPPORT) + set(BOOT_LIBS "elixir_esp32boot-${_jit_variant}.avm") + set(ATOMVM_FLAVOR "-elixir-jit") + else() + set(BOOT_LIBS "esp32boot-${_jit_variant}.avm") + set(ATOMVM_FLAVOR "-jit") + endif() + else() + set(BOOT_LIBS "NONE") + set(ATOMVM_FLAVOR "") + endif() else() set(BOOT_LIBS "NONE") set(ATOMVM_FLAVOR "") diff --git a/src/platforms/esp32/components/libatomvm/CMakeLists.txt b/src/platforms/esp32/components/libatomvm/CMakeLists.txt index b516f829c2..5378782dde 100644 --- a/src/platforms/esp32/components/libatomvm/CMakeLists.txt +++ b/src/platforms/esp32/components/libatomvm/CMakeLists.txt @@ -25,6 +25,23 @@ endif() idf_component_register(INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../libAtomVM" REQUIRES ${LIBSODIUM_REQUIRE}) +# CONFIG_* is available after idf_component_register but not before project(). +# AVM_DISABLE_JIT must be set here, before add_subdirectory(libAtomVM), so that +# libAtomVM/CMakeLists.txt sees the correct value. +if(CONFIG_JIT_ENABLED) + if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c5|esp32c6|esp32c61|esp32h2|esp32p4") + set(AVM_DISABLE_JIT OFF) + set(AVM_JIT_TARGET_ARCH riscv32) + elseif(${IDF_TARGET} MATCHES "esp32|esp32s2|esp32s3") + set(AVM_DISABLE_JIT OFF) + set(AVM_JIT_TARGET_ARCH xtensa) + else() + set(AVM_DISABLE_JIT ON) + endif() +else() + set(AVM_DISABLE_JIT ON) +endif() + # GCC complains about "warning: #include_next is a GCC extension" when using pedantic flag # "pedantic" flag should be disabled rather poluting diagnostics with warnings caused from 3rd party option(AVM_PEDANTIC_WARNINGS "Pedantic compiler warnings" OFF) diff --git a/src/platforms/esp32/partitions-jit.csv b/src/platforms/esp32/partitions-jit.csv new file mode 100644 index 0000000000..b2315ae475 --- /dev/null +++ b/src/platforms/esp32/partitions-jit.csv @@ -0,0 +1,16 @@ +# Copyright 2018-2021 Davide Bettio +# Copyright 2018-2021 Fred Dushin +# Copyright 2025 Paul Guyot +# +# SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + +# Name, Type, SubType, Offset, Size, Flags +# Note: if you change the phy_init or app partition offset, make sure to change the offset in Kconfig.projbuild +# JIT partition layout (4MB flash): boot.avm holds AOT-compiled esp32init, estdlib, +# eavmlib and avm_esp32 (no JIT compiler). alisp, avm_network and esp32devmode are +# excluded to fit the partition; they must be included in the app partition. +nvs, data, nvs, 0x9000, 0x6000, +phy_init, data, phy, 0xf000, 0x1000, +factory, app, factory, 0x10000, 0x170000, +boot.avm, data, phy, 0x180000, 0x180000, +main.avm, data, phy, 0x300000, 0x100000 diff --git a/src/platforms/esp32/sdkconfig.jit b/src/platforms/esp32/sdkconfig.jit new file mode 100644 index 0000000000..ca31d69aab --- /dev/null +++ b/src/platforms/esp32/sdkconfig.jit @@ -0,0 +1 @@ +CONFIG_JIT_ENABLED=y diff --git a/src/platforms/esp32/sdkconfig.jit.license b/src/platforms/esp32/sdkconfig.jit.license new file mode 100644 index 0000000000..6da126cc3a --- /dev/null +++ b/src/platforms/esp32/sdkconfig.jit.license @@ -0,0 +1,2 @@ +SPDX-License-Identifier: Apache-2.0 +SPDX-FileCopyrightText: 2026 Paul Guyot diff --git a/src/platforms/esp32/test/CMakeLists.txt b/src/platforms/esp32/test/CMakeLists.txt index d4b1cf3a91..e248dc84a2 100644 --- a/src/platforms/esp32/test/CMakeLists.txt +++ b/src/platforms/esp32/test/CMakeLists.txt @@ -57,22 +57,42 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) -# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C5, ESP32-C6, ESP32-C61, ESP32-H2, ESP32-P4) -# Configuration comes from idf.py menuconfig (KConfig), not CMake options -if(CONFIG_JIT_ENABLED) - if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c5|esp32c6|esp32c61|esp32h2|esp32p4") - set(AVM_DISABLE_JIT OFF) - set(AVM_JIT_TARGET_ARCH riscv32) - message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)") - else() - message(WARNING "JIT is not supported on ${IDF_TARGET} (Xtensa architecture)") - set(AVM_DISABLE_JIT ON) +## Configure partition table based on JIT +# Detect JIT from existing sdkconfig before project() where CONFIG_* is not yet available. +set(_jit_partition NO) +if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/sdkconfig") + file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/sdkconfig" _sdkconfig_lines) + if("CONFIG_JIT_ENABLED=y" IN_LIST _sdkconfig_lines) + set(_jit_partition YES) endif() +endif() + +if (_jit_partition) + set(AVM_PARTITION_TABLE_FILENAME "../partitions-jit.csv") else() - set(AVM_DISABLE_JIT ON) - message(STATUS "JIT compilation disabled") + set(AVM_PARTITION_TABLE_FILENAME "../partitions.csv") +endif() + +# Sync partition table in sdkconfig so that IDF uses the correct partition layout. +if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/sdkconfig") + file(READ "${CMAKE_CURRENT_SOURCE_DIR}/sdkconfig" _sdkconfig_content) + if(NOT _sdkconfig_content MATCHES "CONFIG_PARTITION_TABLE_CUSTOM_FILENAME=\"${AVM_PARTITION_TABLE_FILENAME}\"") + string(REGEX REPLACE + "(CONFIG_PARTITION_TABLE_CUSTOM_FILENAME=)[^\n]*" + "\\1\"${AVM_PARTITION_TABLE_FILENAME}\"" + _sdkconfig_content "${_sdkconfig_content}") + string(REGEX REPLACE + "(CONFIG_PARTITION_TABLE_FILENAME=)[^\n]*" + "\\1\"${AVM_PARTITION_TABLE_FILENAME}\"" + _sdkconfig_content "${_sdkconfig_content}") + file(WRITE "${CMAKE_CURRENT_SOURCE_DIR}/sdkconfig" "${_sdkconfig_content}") + message(STATUS "Updated sdkconfig partition table to ${AVM_PARTITION_TABLE_FILENAME}") + endif() endif() +# JIT configuration is handled in components/libatomvm/CMakeLists.txt after +# idf_component_register, where CONFIG_* variables are available. + project(atomvm-esp32-test) # esp-idf does not use compile_feature but instead sets version in @@ -84,3 +104,15 @@ if (-std=gnu99 IN_LIST c_compile_options ) list(APPEND c_compile_options -std=gnu11) idf_build_set_property(C_COMPILE_OPTIONS ${c_compile_options}) endif() + +include(${CMAKE_CURRENT_SOURCE_DIR}/../GetBootAVM.cmake) +message(STATUS "-- Configuring atomvmlib esp32boot flavor: ${BOOT_LIBS}") +if (NOT ("${BOOT_LIBS}" STREQUAL "NONE")) + set(BOOT_LIB_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../../build/libs/esp32boot/${BOOT_LIBS}") + if (NOT EXISTS "${BOOT_LIB_PATH}") + message(WARNING "A generic_unix build must be done first in the top level AtomVM/build directory!") + else() + partition_table_get_partition_info(lib_offset "--partition-name boot.avm" "offset") + esptool_py_flash_target_image(flash boot.avm "${lib_offset}" "${BOOT_LIB_PATH}") + endif() +endif() diff --git a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt index 43b2c44cb6..77cafc7696 100644 --- a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt +++ b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt @@ -58,6 +58,21 @@ function(compile_erlang module_name) set_property(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam") endfunction() +# Compile an Erlang module WITHOUT JIT precompilation. +# Used to include plain BEAM modules that will be compiled at runtime by jit_stream_flash. +function(compile_erlang_no_jit module_name) + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam" + COMMAND erlc ${CMAKE_CURRENT_SOURCE_DIR}/${module_name}.erl + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${module_name}.erl" + COMMENT "Compiling ${module_name}.erl (no JIT precompilation)" + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) + set_property(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam") +endfunction() + +compile_erlang(test_jit_compile) +compile_erlang_no_jit(test_jit_simple) compile_erlang(test_esp_partition) compile_erlang(test_esp_timer_get_time) compile_erlang(test_file) @@ -82,6 +97,7 @@ compile_erlang(test_deep_sleep_hold) compile_erlang(test_wifi_scan) set(erlang_test_beams + test_jit_compile.beam test_esp_partition.beam test_esp_timer_get_time.beam test_file.beam @@ -109,17 +125,27 @@ set(erlang_test_beams if(NOT AVM_DISABLE_JIT) set(erlang_test_beams_${AVM_JIT_TARGET_ARCH} ${erlang_test_beams}) list(TRANSFORM erlang_test_beams_${AVM_JIT_TARGET_ARCH} PREPEND ${AVM_JIT_TARGET_ARCH}/) - set(erlang_test_beams_to_package ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) - set(erlang_test_beams_depends ${erlang_test_beams} ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) + # test_jit_simple is always plain BEAM (no JIT precompile) so the code_server + # can compile it at runtime via jit_stream_flash, testing the full JIT pipeline. + set(erlang_test_beams_to_package + ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}} + test_jit_simple.beam) + set(erlang_test_beams_depends + ${erlang_test_beams} + ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}} + test_jit_simple.beam) + set(jit_avm_arg HostAtomVM-prefix/src/HostAtomVM-build/libs/jit/src/jit.avm) else() - set(erlang_test_beams_to_package ${erlang_test_beams}) - set(erlang_test_beams_depends ${erlang_test_beams}) + set(erlang_test_beams_to_package ${erlang_test_beams} test_jit_simple.beam) + set(erlang_test_beams_depends ${erlang_test_beams} test_jit_simple.beam) + set(jit_avm_arg "") endif() add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/esp32_test_modules.avm" COMMAND HostAtomVM-prefix/src/HostAtomVM-build/tools/packbeam/packbeam create esp32_test_modules.avm HostAtomVM-prefix/src/HostAtomVM-build/libs/atomvmlib-esp32.avm + ${jit_avm_arg} ${erlang_test_beams_to_package} DEPENDS HostAtomVM diff --git a/src/platforms/esp32/test/main/test_erl_sources/test_jit_compile.erl b/src/platforms/esp32/test/main/test_erl_sources/test_jit_compile.erl new file mode 100644 index 0000000000..c89be94265 --- /dev/null +++ b/src/platforms/esp32/test/main/test_erl_sources/test_jit_compile.erl @@ -0,0 +1,32 @@ +% +% This file is part of AtomVM. +% +% Copyright 2026 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +% Tests runtime JIT compilation via jit_stream_flash. +% test_jit_simple is pre-loaded as plain BEAM (no native code) by the C test. +% Calling code_server:load/1 triggers the JIT compiler to compile it at runtime +% and store the result in flash, then execute it natively. +-module(test_jit_compile). + +-export([start/0]). + +start() -> + ok = code_server:load(test_jit_simple), + 42 = test_jit_simple:run(), + ok. diff --git a/src/platforms/esp32/test/main/test_erl_sources/test_jit_simple.erl b/src/platforms/esp32/test/main/test_erl_sources/test_jit_simple.erl new file mode 100644 index 0000000000..9d64e90a8e --- /dev/null +++ b/src/platforms/esp32/test/main/test_erl_sources/test_jit_simple.erl @@ -0,0 +1,27 @@ +% +% This file is part of AtomVM. +% +% Copyright 2026 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +% This module is intentionally NOT JIT-precompiled. +% It is included as plain BEAM to test runtime JIT compilation via jit_stream_flash. +-module(test_jit_simple). + +-export([run/0]). + +run() -> 6 * 7. diff --git a/src/platforms/esp32/test/main/test_main.c b/src/platforms/esp32/test/main/test_main.c index fe5ac38b0e..62f6d5fc77 100644 --- a/src/platforms/esp32/test/main/test_main.c +++ b/src/platforms/esp32/test/main/test_main.c @@ -180,6 +180,53 @@ term avm_test_case(const char *test_module) return ret_value; } +#ifndef AVM_NO_JIT +TEST_CASE("test_jit_compile", "[test_run]") +{ + esp32_sys_queue_init(); + + GlobalContext *glb = globalcontext_new(); + TEST_ASSERT(glb != NULL); + + port_driver_init_all(glb); + nif_collection_init_all(glb); + + TEST_ASSERT(avmpack_is_valid(main_avm, size) != 0); + + struct ConstAVMPack *avmpack_data = malloc(sizeof(struct ConstAVMPack)); + TEST_ASSERT(avmpack_data != NULL); + avmpack_data_init(&avmpack_data->base, &const_avm_pack_info); + avmpack_data->base.in_use = true; + avmpack_data->base.data = main_avm; + synclist_append(&glb->avmpack_data, &avmpack_data->base.avmpack_head); + + // Pre-load test_jit_simple as plain BEAM so code_server:code_chunk/1 can find it. + // code_server:load/1 will then JIT-compile it at runtime via jit_stream_flash. + Module *jit_simple_mod = globalcontext_load_module_from_avm(glb, "test_jit_simple.beam"); + TEST_ASSERT(jit_simple_mod != NULL); + globalcontext_insert_module(glb, jit_simple_mod); + + Module *mod = globalcontext_load_module_from_avm(glb, "test_jit_compile.beam"); + TEST_ASSERT(mod != NULL); + globalcontext_insert_module(glb, mod); + + Context *ctx = context_new(glb); + TEST_ASSERT(ctx != NULL); + ctx->leader = 1; + + ESP_LOGI(TAG, "Running start/0 from test_jit_compile.beam...\n"); + context_execute_loop(ctx, mod, "start", 0); + term ret_value = ctx->x[0]; + + context_destroy(ctx); + nif_collection_destroy_all(glb); + port_driver_destroy_all(glb); + globalcontext_destroy(glb); + + TEST_ASSERT(ret_value == OK_ATOM); +} +#endif + TEST_CASE("test_esp_partition", "[test_run]") { term ret_value = avm_test_case("test_esp_partition.beam"); diff --git a/src/platforms/esp32/tools/mkimage.config.in b/src/platforms/esp32/tools/mkimage.config.in index 8c19021d00..3a223fab93 100644 --- a/src/platforms/esp32/tools/mkimage.config.in +++ b/src/platforms/esp32/tools/mkimage.config.in @@ -37,7 +37,7 @@ }, #{ name => "AtomVM Boot and Core BEAM Library", - offset => "0x1D0000", + offset => "@AVM_LIB_OFFSET@", path => ["@ROOT_DIR@/build/libs/esp32boot/@BOOT_LIBS@"] } ] diff --git a/src/platforms/esp32/tools/mkimage.sh.in b/src/platforms/esp32/tools/mkimage.sh.in index a0b99bab6f..a8488aaaaa 100644 --- a/src/platforms/esp32/tools/mkimage.sh.in +++ b/src/platforms/esp32/tools/mkimage.sh.in @@ -35,7 +35,7 @@ fail() exit 1 } -if [ "${@}" = "--help" ]; then +if [ "$1" = "--help" ]; then escript "@CMAKE_BINARY_DIR@/mkimage.erl" --help exit 0 fi diff --git a/src/platforms/esp32/tools/mkimage_nvs.config.in b/src/platforms/esp32/tools/mkimage_nvs.config.in index efe9e54c2c..63f88e392a 100644 --- a/src/platforms/esp32/tools/mkimage_nvs.config.in +++ b/src/platforms/esp32/tools/mkimage_nvs.config.in @@ -43,7 +43,7 @@ }, #{ name => "AtomVM Boot and Core BEAM Library", - offset => "0x1D0000", + offset => "@AVM_LIB_OFFSET@", path => ["@ROOT_DIR@/build/libs/esp32boot/@BOOT_LIBS@"] } ] diff --git a/src/platforms/generic_unix/CMakeLists.txt b/src/platforms/generic_unix/CMakeLists.txt index 87f9eec1d9..7f64b31be8 100644 --- a/src/platforms/generic_unix/CMakeLists.txt +++ b/src/platforms/generic_unix/CMakeLists.txt @@ -25,8 +25,16 @@ set_target_properties(AtomVM PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../../ ENABLE_E target_compile_features(AtomVM PUBLIC c_std_11) if(CMAKE_COMPILER_IS_GNUCC) target_compile_options(AtomVM PUBLIC -Wall -pedantic -Wextra -ggdb) + # Add text-section-literals for Xtensa to fix literal pool out of range issues + if(CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_TARGET MATCHES "xtensa") + target_compile_options(AtomVM PUBLIC -mtext-section-literals) + endif() elseif(CMAKE_C_COMPILER_ID MATCHES "Clang") target_compile_options(AtomVM PUBLIC -Wall -pedantic -Wextra -g) + # Add text-section-literals for Xtensa to fix literal pool out of range issues + if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_TARGET MATCHES "xtensa") + target_compile_options(AtomVM PUBLIC -mtext-section-literals) + endif() endif() if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") diff --git a/src/platforms/generic_unix/lib/sys.c b/src/platforms/generic_unix/lib/sys.c index 0bd4aa1e11..ce6a032fc9 100644 --- a/src/platforms/generic_unix/lib/sys.c +++ b/src/platforms/generic_unix/lib/sys.c @@ -825,7 +825,7 @@ struct NativeCodeMmapHeader ModuleNativeEntryPoint sys_map_native_code(const uint8_t *code, size_t code_size) { size_t total = sizeof(struct NativeCodeMmapHeader) + code_size; -#if defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) || defined(__aarch64__) || defined(__xtensa__) #if defined(__APPLE__) struct NativeCodeMmapHeader *header = mmap(0, total, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_JIT, -1, 0); #else diff --git a/tests/erlang_tests/CMakeLists.txt b/tests/erlang_tests/CMakeLists.txt index c4022fccea..a9aa020c64 100644 --- a/tests/erlang_tests/CMakeLists.txt +++ b/tests/erlang_tests/CMakeLists.txt @@ -29,7 +29,6 @@ macro(jit_precompile module_name) ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_precompile.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_stream_binary.beam - ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_dwarf.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${_jit_base_arch}.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${_jit_base_arch}_asm.beam ) @@ -38,6 +37,11 @@ macro(jit_precompile module_name) ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_armv7m_asm.beam ) endif() + if (NOT AVM_DISABLE_JIT_DWARF) + list(APPEND jit_compiler_modules + ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_dwarf.beam + ) + endif() if (NOT AVM_DISABLE_JIT_DWARF) set(jit_precompile_dwarf_flag "dwarf") else() diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 870aa971aa..282388e4cb 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -26,7 +26,6 @@ set(ERLANG_MODULES tests jit_tests jit_tests_common - jit_dwarf_tests jit_regs_tests jit_aarch64_tests jit_aarch64_asm_tests @@ -49,8 +48,25 @@ set(ERLANG_MODULES ) if (NOT AVM_DISABLE_JIT_DWARF) - pack_archive(test_jit_lib ERLC_FLAGS -DTEST -DJIT_DWARF MODULES ${ERLANG_MODULES}) + pack_archive(test_jit_lib ERLC_FLAGS -DTEST -DJIT_DWARF MODULES jit_dwarf_tests ${ERLANG_MODULES}) else() pack_archive(test_jit_lib ERLC_FLAGS -DTEST MODULES ${ERLANG_MODULES}) + # Compile jit_dwarf_tests.beam for BEAM tests even when DWARF is disabled (not packed in .avm) + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/jit_dwarf_tests.beam + COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/beams + && erlc +debug_info -DTEST + -o ${CMAKE_CURRENT_BINARY_DIR}/beams + -I ${CMAKE_SOURCE_DIR}/libs/include + -I ${CMAKE_SOURCE_DIR}/libs + -I ${CMAKE_CURRENT_SOURCE_DIR}/../include + ${CMAKE_CURRENT_SOURCE_DIR}/jit_dwarf_tests.erl + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jit_dwarf_tests.erl + COMMENT "Compiling jit_dwarf_tests.erl (BEAM tests only, not packed in .avm)" + VERBATIM + ) + add_custom_target(jit_dwarf_tests_for_beam_tests ALL + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/beams/jit_dwarf_tests.beam + ) endif() pack_eunit(test_jit estdlib eavmlib etest jit) From 0293b872ac693d2356cb1bf91206b9e88685c5fb Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 19 Apr 2026 23:19:44 +0200 Subject: [PATCH 5/8] JIT xtensa: execution on ESP32 Signed-off-by: Paul Guyot --- .../avm_sys/jit_stream_flash_platform.c | 73 +++++++++++++++++-- src/platforms/esp32/components/avm_sys/sys.c | 66 +++++++++++++++++ .../esp32/test/main/Kconfig.projbuild | 15 ++++ 3 files changed, 147 insertions(+), 7 deletions(-) create mode 100644 src/platforms/esp32/test/main/Kconfig.projbuild diff --git a/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c index bfaed52215..ce772df5d1 100644 --- a/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c +++ b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c @@ -40,6 +40,16 @@ struct JSFlashPlatformContext const esp_partition_t *partition; }; +#ifndef CONFIG_IDF_TARGET_ARCH_RISCV +// On Xtensa, flash DROM (0x3F4xxxxx) is not executable; code must run from IROM. +// We keep a permanent IBUS mmap alongside the existing DBUS mmap so that +// ptr_to_executable / executable_to_ptr can convert between the two. +static spi_flash_mmap_handle_t g_ibus_handle; +static uintptr_t g_ibus_base = 0; +static uintptr_t g_dbus_base = 0; +static bool g_xtensa_mmap_initialized = false; +#endif + struct JSFlashPlatformContext *jit_stream_flash_platform_init(void) { const esp_partition_t *partition = esp_partition_find_first( @@ -49,6 +59,39 @@ struct JSFlashPlatformContext *jit_stream_flash_platform_init(void) return NULL; } +#ifndef CONFIG_IDF_TARGET_ARCH_RISCV + if (!g_xtensa_mmap_initialized) { + // Map partition via instruction bus so that the native code is executable. + const void *ibus_ptr; + spi_flash_mmap_handle_t ibus_handle; + esp_err_t err = esp_partition_mmap(partition, 0, partition->size, SPI_FLASH_MMAP_INST, &ibus_ptr, &ibus_handle); + if (UNLIKELY(err != ESP_OK)) { + fprintf(stderr, "Failed to map JIT partition for instruction access: %d\n", err); + return NULL; + } + + // Map via data bus as well to learn the DBUS base address. + // ESP-IDF reuses existing MMU pages, so this returns the same virtual + // address as the mapping already created by esp32_sys_mmap_partition. + const void *dbus_ptr; + spi_flash_mmap_handle_t dbus_handle; + err = esp_partition_mmap(partition, 0, partition->size, SPI_FLASH_MMAP_DATA, &dbus_ptr, &dbus_handle); + if (UNLIKELY(err != ESP_OK)) { + spi_flash_munmap(ibus_handle); + fprintf(stderr, "Failed to map JIT partition for data access: %d\n", err); + return NULL; + } + // Release our extra DBUS reference; the original mapping from + // esp32_sys_mmap_partition keeps the pages live for the lifetime of the VM. + spi_flash_munmap(dbus_handle); + + g_ibus_handle = ibus_handle; + g_ibus_base = (uintptr_t) ibus_ptr; + g_dbus_base = (uintptr_t) dbus_ptr; + g_xtensa_mmap_initialized = true; + } +#endif + struct JSFlashPlatformContext *pf_ctx = malloc(sizeof(struct JSFlashPlatformContext)); if (IS_NULL_PTR(pf_ctx)) { return NULL; @@ -109,30 +152,46 @@ bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *ctx, ui uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr) { - // Convert data cache address to instruction cache address for RISC-V targets - // On ESP32-C3/C6/H2, flash is mapped to both DBUS (0x3C...) and IBUS (0x42...) - // but only IBUS addresses are executable + // Convert data cache (DBUS) address to instruction cache (IBUS) address. + // On RISC-V targets the DBUS and IBUS windows share the same MMU pages but + // live at different base addresses that differ only in the upper bits. + // On Xtensa targets we maintain a permanent IBUS mmap whose base address is + // stored in g_ibus_base; the corresponding DBUS base is in g_dbus_base. #ifdef CONFIG_IDF_TARGET_ARCH_RISCV if ((addr & ~SOC_MMU_VADDR_MASK) == SOC_MMU_DBUS_VADDR_BASE) { return (addr & SOC_MMU_VADDR_MASK) | SOC_MMU_IBUS_VADDR_BASE; } return addr; #else - return addr; + if (UNLIKELY(!g_xtensa_mmap_initialized)) { + // Returning addr here would hand out a non-executable DROM pointer. + // Return 0 so callers crash early with a clear NULL deref rather than + // jumping into data memory. + fprintf(stderr, "jit_stream_flash_platform_ptr_to_executable: " + "g_xtensa_mmap_initialized is false for addr 0x%lx\n", + (unsigned long) addr); + return 0; + } + return g_ibus_base + (addr - g_dbus_base); #endif } uintptr_t jit_stream_flash_platform_executable_to_ptr(uintptr_t addr) { - // Convert instruction cache address to data cache address for RISC-V targets - // This is the reverse of ptr_to_executable + // Reverse of ptr_to_executable: IBUS address -> DBUS address. #ifdef CONFIG_IDF_TARGET_ARCH_RISCV if ((addr & ~SOC_MMU_VADDR_MASK) == SOC_MMU_IBUS_VADDR_BASE) { return (addr & SOC_MMU_VADDR_MASK) | SOC_MMU_DBUS_VADDR_BASE; } return addr; #else - return addr; + if (UNLIKELY(!g_xtensa_mmap_initialized)) { + fprintf(stderr, "jit_stream_flash_platform_executable_to_ptr: " + "g_xtensa_mmap_initialized is false for addr 0x%lx\n", + (unsigned long) addr); + return 0; + } + return g_dbus_base + (addr - g_ibus_base); #endif } diff --git a/src/platforms/esp32/components/avm_sys/sys.c b/src/platforms/esp32/components/avm_sys/sys.c index 41492e6ba6..665da32f56 100644 --- a/src/platforms/esp32/components/avm_sys/sys.c +++ b/src/platforms/esp32/components/avm_sys/sys.c @@ -49,6 +49,7 @@ #if ESP_IDF_VERSION_MAJOR >= 5 #include "esp_chip_info.h" +#include #endif #include @@ -327,6 +328,20 @@ void sys_free_platform(GlobalContext *glb) free(platform); } +#if !defined(CONFIG_IDF_TARGET_ARCH_RISCV) && !defined(AVM_NO_JIT) +struct xtensa_part_mapping { + uintptr_t dbus_base; + uintptr_t ibus_base; + size_t size; + spi_flash_mmap_handle_t ibus_handle; +}; +static struct xtensa_part_mapping *s_xtensa_part_mappings = NULL; +static int s_xtensa_part_count = 0; +// Protects s_xtensa_part_mappings and s_xtensa_part_count. Reachable from +// any scheduler thread via sys_open_avm_from_file / sys_map_native_code. +static pthread_mutex_t s_xtensa_part_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif + const void *esp32_sys_mmap_partition(const char *partition_name, spi_flash_mmap_handle_t *handle, int *size) { const esp_partition_t *partition = esp_partition_find_first(ESP_PARTITION_TYPE_DATA, @@ -349,6 +364,42 @@ const void *esp32_sys_mmap_partition(const char *partition_name, spi_flash_mmap_ ESP_LOGI(TAG, "Loaded BEAM partition %s at address 0x%"PRIx32" (size=%"PRIu32" bytes)", partition_name, partition->address, partition->size); +#ifndef CONFIG_IDF_TARGET_ARCH_RISCV + // On Xtensa, flash DROM is not executable. Map each AVM partition via the + // instruction bus (IBUS) as well so that AOT native code chunks can be + // executed directly from flash. Handles are kept alive for the VM lifetime. + // We store (dbus_base, ibus_base, size) so sys_map_native_code can convert + // DBUS addresses to IBUS addresses by direct offset arithmetic rather than + // relying on spi_flash_phys2cache which fails on LX6 even after a successful IBUS mmap. +#ifndef AVM_NO_JIT + { + const void *ibus_ptr; + spi_flash_mmap_handle_t ibus_handle; + if (esp_partition_mmap(partition, 0, partition->size, SPI_FLASH_MMAP_INST, + &ibus_ptr, &ibus_handle) == ESP_OK) { + pthread_mutex_lock(&s_xtensa_part_mutex); + struct xtensa_part_mapping *new_mappings = realloc(s_xtensa_part_mappings, + (s_xtensa_part_count + 1) * sizeof(struct xtensa_part_mapping)); + if (IS_NULL_PTR(new_mappings)) { + pthread_mutex_unlock(&s_xtensa_part_mutex); + spi_flash_munmap(ibus_handle); + ESP_LOGW(TAG, "Failed to allocate IBUS mapping entry for %s", partition_name); + } else { + s_xtensa_part_mappings = new_mappings; + s_xtensa_part_mappings[s_xtensa_part_count].dbus_base = (uintptr_t) mapped_memory; + s_xtensa_part_mappings[s_xtensa_part_count].ibus_base = (uintptr_t) ibus_ptr; + s_xtensa_part_mappings[s_xtensa_part_count].size = partition->size; + s_xtensa_part_mappings[s_xtensa_part_count].ibus_handle = ibus_handle; + s_xtensa_part_count++; + pthread_mutex_unlock(&s_xtensa_part_mutex); + } + } else { + ESP_LOGW(TAG, "Failed to map partition %s for instruction access", partition_name); + } + } +#endif // AVM_NO_JIT +#endif // CONFIG_IDF_TARGET_ARCH_RISCV + return mapped_memory; } @@ -881,6 +932,21 @@ ModuleNativeEntryPoint sys_map_native_code(const uint8_t *code, size_t code_size } #endif // ESP32-C6, H2, and P4 have unified DROM/IROM, no conversion needed +#else + // On Xtensa, DROM (0x3F4xxxxx) is not executable. Convert to the IBUS + // address by finding the partition whose DBUS window contains addr and + // computing the offset into its IBUS window. This is more reliable than + // spi_flash_phys2cache which fails on LX6 even after a successful IBUS mmap. + pthread_mutex_lock(&s_xtensa_part_mutex); + for (int i = 0; i < s_xtensa_part_count; i++) { + uintptr_t dbus_base = s_xtensa_part_mappings[i].dbus_base; + size_t mapping_size = s_xtensa_part_mappings[i].size; + if (addr >= dbus_base && (addr + code_size) <= (dbus_base + mapping_size)) { + addr = s_xtensa_part_mappings[i].ibus_base + (addr - dbus_base); + break; + } + } + pthread_mutex_unlock(&s_xtensa_part_mutex); #endif return (ModuleNativeEntryPoint) addr; diff --git a/src/platforms/esp32/test/main/Kconfig.projbuild b/src/platforms/esp32/test/main/Kconfig.projbuild new file mode 100644 index 0000000000..f0cd8dc051 --- /dev/null +++ b/src/platforms/esp32/test/main/Kconfig.projbuild @@ -0,0 +1,15 @@ +## This file is part of AtomVM. +## +## Copyright 2026 Paul Guyot +## +## SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +## +menu "AtomVM configuration" + + config JIT_ENABLED + bool "Enable just in time compilation" + default n + help + Enable Just in time compilation, or just execution of precompiled native code + +endmenu From fe7c24474bd12133035400bea7ff7ba80a20a9f4 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 27 Apr 2026 22:10:09 +0200 Subject: [PATCH 6/8] JIT xtensa: test with qemu Signed-off-by: Paul Guyot --- libs/esp32boot/esp32init.erl | 2 +- src/platforms/esp32/components/avm_sys/sys.c | 42 +++++++++++++- src/platforms/esp32/partitions-test.csv | 31 +++++++++++ src/platforms/esp32/test/CMakeLists.txt | 19 ++----- .../test/main/test_erl_sources/CMakeLists.txt | 55 +++++++++++++------ .../test_erl_sources/test_esp_partition.erl | 8 +-- .../test_erl_sources/test_jit_compile.erl | 7 +-- .../main/test_erl_sources/test_jit_simple.erl | 2 - src/platforms/esp32/test/main/test_main.c | 5 +- src/platforms/esp32/test/sdkconfig.defaults | 4 +- tests/libs/jit/CMakeLists.txt | 3 + 11 files changed, 131 insertions(+), 47 deletions(-) create mode 100644 src/platforms/esp32/partitions-test.csv diff --git a/libs/esp32boot/esp32init.erl b/libs/esp32boot/esp32init.erl index efe15be4ec..092dd5deba 100644 --- a/libs/esp32boot/esp32init.erl +++ b/libs/esp32boot/esp32init.erl @@ -24,7 +24,7 @@ start() -> console:print(<<"AtomVM init.\n">>), - {ok, _} = kernel:start(boot, []), + {ok, _} = kernel:start(normal, []), boot(). is_dev_mode_enabled(SystemStatus) -> diff --git a/src/platforms/esp32/components/avm_sys/sys.c b/src/platforms/esp32/components/avm_sys/sys.c index 665da32f56..42786b6c8e 100644 --- a/src/platforms/esp32/components/avm_sys/sys.c +++ b/src/platforms/esp32/components/avm_sys/sys.c @@ -918,10 +918,10 @@ void sys_mbedtls_ctr_drbg_context_unlock(GlobalContext *global) ModuleNativeEntryPoint sys_map_native_code(const uint8_t *code, size_t code_size) { - UNUSED(code_size); uintptr_t addr = (uintptr_t) code; #if defined(CONFIG_IDF_TARGET_ARCH_RISCV) + UNUSED(code_size); // On RISC-V ESP32 targets, native code in flash needs to be accessed // through the instruction cache (IROM) not data cache (DROM) #if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C2) @@ -947,6 +947,46 @@ ModuleNativeEntryPoint sys_map_native_code(const uint8_t *code, size_t code_size } } pthread_mutex_unlock(&s_xtensa_part_mutex); + // If addr is still in DROM, no partition mapping was found. Fall back to + // on-demand spi_flash_mmap(INST) for native code embedded in the app ELF + // binary (e.g. via _binary_xxx_start / ConstAVMPack in the test harness). + // Failure to install an IBUS mapping returns NULL so the caller falls back + // to interpreted mode rather than jumping to a non-executable DROM address. + if (addr >= SOC_DROM_LOW && addr < SOC_DROM_HIGH) { + size_t phys_addr = spi_flash_cache2phys((void *) addr); + if (phys_addr == SPI_FLASH_CACHE2PHYS_FAIL) { + ESP_LOGE(TAG, "spi_flash_cache2phys failed for 0x%" PRIx32, (uint32_t) addr); + return NULL; + } + size_t page_size = SPI_FLASH_MMU_PAGE_SIZE; + size_t phys_page_base = phys_addr & ~(page_size - 1); + size_t offset_in_page = phys_addr - phys_page_base; + size_t map_size = (offset_in_page + code_size + page_size - 1) & ~(page_size - 1); + const void *ibus_ptr; + spi_flash_mmap_handle_t ibus_handle; + if (spi_flash_mmap(phys_page_base, map_size, SPI_FLASH_MMAP_INST, &ibus_ptr, &ibus_handle) != ESP_OK) { + ESP_LOGE(TAG, "spi_flash_mmap(INST) failed for 0x%" PRIx32, (uint32_t) addr); + return NULL; + } + pthread_mutex_lock(&s_xtensa_part_mutex); + struct xtensa_part_mapping *new_mappings = realloc(s_xtensa_part_mappings, + (s_xtensa_part_count + 1) * sizeof(struct xtensa_part_mapping)); + if (IS_NULL_PTR(new_mappings)) { + pthread_mutex_unlock(&s_xtensa_part_mutex); + spi_flash_munmap(ibus_handle); + ESP_LOGE(TAG, "Failed to alloc IBUS mapping for embedded native code"); + return NULL; + } + uintptr_t dbus_page_base = addr & ~((uintptr_t) (page_size - 1)); + s_xtensa_part_mappings = new_mappings; + s_xtensa_part_mappings[s_xtensa_part_count].dbus_base = dbus_page_base; + s_xtensa_part_mappings[s_xtensa_part_count].ibus_base = (uintptr_t) ibus_ptr; + s_xtensa_part_mappings[s_xtensa_part_count].size = map_size; + s_xtensa_part_mappings[s_xtensa_part_count].ibus_handle = ibus_handle; + s_xtensa_part_count++; + pthread_mutex_unlock(&s_xtensa_part_mutex); + addr = (uintptr_t) ibus_ptr + offset_in_page; + } #endif return (ModuleNativeEntryPoint) addr; diff --git a/src/platforms/esp32/partitions-test.csv b/src/platforms/esp32/partitions-test.csv new file mode 100644 index 0000000000..259f20360a --- /dev/null +++ b/src/platforms/esp32/partitions-test.csv @@ -0,0 +1,31 @@ +# +# This file is part of AtomVM. +# +# Copyright 2026 Paul Guyot +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + +# Test partition layout (8MB flash): the factory partition is large enough to +# embed all Erlang test modules together with the AOT-compiled atomvmlib needed +# to resolve estdlib calls when JIT is enabled (AVM_NO_EMU mode). The main.avm +# partition is unused by the host firmware but kept so the test_esp_partition +# enumeration test has a stable layout. +# Name, Type, SubType, Offset, Size, Flags +nvs, data, nvs, 0x9000, 0x6000, +phy_init, data, phy, 0xf000, 0x1000, +factory, app, factory, 0x10000, 0x500000, +lib.avm, data, phy, 0x510000, 0x80000, +main.avm, data, phy, 0x590000, 0x40000, +data, data, phy, 0x5d0000, 0x10000 diff --git a/src/platforms/esp32/test/CMakeLists.txt b/src/platforms/esp32/test/CMakeLists.txt index e248dc84a2..45bbe2c8d3 100644 --- a/src/platforms/esp32/test/CMakeLists.txt +++ b/src/platforms/esp32/test/CMakeLists.txt @@ -57,21 +57,10 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) -## Configure partition table based on JIT -# Detect JIT from existing sdkconfig before project() where CONFIG_* is not yet available. -set(_jit_partition NO) -if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/sdkconfig") - file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/sdkconfig" _sdkconfig_lines) - if("CONFIG_JIT_ENABLED=y" IN_LIST _sdkconfig_lines) - set(_jit_partition YES) - endif() -endif() - -if (_jit_partition) - set(AVM_PARTITION_TABLE_FILENAME "../partitions-jit.csv") -else() - set(AVM_PARTITION_TABLE_FILENAME "../partitions.csv") -endif() +# The test uses partitions-test.csv which provides a large factory partition +# (2.8MB) for the test binary that embeds all Erlang test modules, plus a +# main.avm partition used as JIT flash cache when JIT is enabled. +set(AVM_PARTITION_TABLE_FILENAME "../partitions-test.csv") # Sync partition table in sdkconfig so that IDF uses the correct partition layout. if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/sdkconfig") diff --git a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt index 77cafc7696..ebae8f4b79 100644 --- a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt +++ b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt @@ -20,16 +20,40 @@ add_library(esp32_test_modules) +# CONFIG_* is available here because idf_component_register was already called +# by the parent main/CMakeLists.txt. Replicate the JIT detection logic from +# components/libatomvm/CMakeLists.txt since that directory's variables do not +# propagate into this sibling subdirectory scope. +if(CONFIG_JIT_ENABLED) + if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c5|esp32c6|esp32c61|esp32h2|esp32p4") + set(AVM_DISABLE_JIT OFF) + set(AVM_JIT_TARGET_ARCH riscv32) + elseif(${IDF_TARGET} MATCHES "esp32|esp32s2|esp32s3") + set(AVM_DISABLE_JIT OFF) + set(AVM_JIT_TARGET_ARCH xtensa) + else() + set(AVM_DISABLE_JIT ON) + endif() +else() + set(AVM_DISABLE_JIT ON) +endif() + include(ExternalProject) if(NOT AVM_DISABLE_JIT) -set(host_atomvm_jit_target "--target=jit") + # AVM_ENABLE_PRECOMPILED is needed for esp32boot-${arch}.avm (AOT-compiled + # estdlib/eavmlib/avm_esp32) which the JIT-precompiled tests rely on for + # estdlib NIFs in AVM_NO_EMU mode. + set(host_atomvm_cmake_args "-DAVM_ENABLE_PRECOMPILED=ON") + set(host_atomvm_extra_targets "--target=jit" "--target=esp32boot") else() -set(host_atomvm_jit_target "") + set(host_atomvm_cmake_args "") + set(host_atomvm_extra_targets "") endif() ExternalProject_Add(HostAtomVM SOURCE_DIR ../../../../../../../../ + CMAKE_ARGS ${host_atomvm_cmake_args} INSTALL_COMMAND cmake -E echo "Skipping install step." - BUILD_COMMAND cmake --build . --target=atomvmlib-esp32 ${host_atomvm_jit_target} --target=PackBEAM + BUILD_COMMAND cmake --build . --target=atomvmlib-esp32 ${host_atomvm_extra_targets} --target=PackBEAM ) macro(jit_precompile module_name) @@ -72,7 +96,7 @@ function(compile_erlang_no_jit module_name) endfunction() compile_erlang(test_jit_compile) -compile_erlang_no_jit(test_jit_simple) +compile_erlang(test_jit_simple) compile_erlang(test_esp_partition) compile_erlang(test_esp_timer_get_time) compile_erlang(test_file) @@ -98,6 +122,7 @@ compile_erlang(test_wifi_scan) set(erlang_test_beams test_jit_compile.beam + test_jit_simple.beam test_esp_partition.beam test_esp_timer_get_time.beam test_file.beam @@ -125,27 +150,25 @@ set(erlang_test_beams if(NOT AVM_DISABLE_JIT) set(erlang_test_beams_${AVM_JIT_TARGET_ARCH} ${erlang_test_beams}) list(TRANSFORM erlang_test_beams_${AVM_JIT_TARGET_ARCH} PREPEND ${AVM_JIT_TARGET_ARCH}/) - # test_jit_simple is always plain BEAM (no JIT precompile) so the code_server - # can compile it at runtime via jit_stream_flash, testing the full JIT pipeline. set(erlang_test_beams_to_package - ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}} - test_jit_simple.beam) + ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) set(erlang_test_beams_depends ${erlang_test_beams} - ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}} - test_jit_simple.beam) - set(jit_avm_arg HostAtomVM-prefix/src/HostAtomVM-build/libs/jit/src/jit.avm) + ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) + # In AVM_NO_EMU mode (JIT-only) tests need AOT-compiled estdlib/eavmlib/avm_esp32 + # to resolve calls like erlang:md5/1 (which is an estdlib wrapper around crypto:hash/2). + # esp32boot-${arch}.avm packages exactly those, AOT-compiled for the target arch. + set(atomvmlib_avm "HostAtomVM-prefix/src/HostAtomVM-build/libs/esp32boot/esp32boot-${AVM_JIT_TARGET_ARCH}.avm") else() - set(erlang_test_beams_to_package ${erlang_test_beams} test_jit_simple.beam) - set(erlang_test_beams_depends ${erlang_test_beams} test_jit_simple.beam) - set(jit_avm_arg "") + set(erlang_test_beams_to_package ${erlang_test_beams}) + set(erlang_test_beams_depends ${erlang_test_beams}) + set(atomvmlib_avm "HostAtomVM-prefix/src/HostAtomVM-build/libs/atomvmlib-esp32.avm") endif() add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/esp32_test_modules.avm" COMMAND HostAtomVM-prefix/src/HostAtomVM-build/tools/packbeam/packbeam create esp32_test_modules.avm - HostAtomVM-prefix/src/HostAtomVM-build/libs/atomvmlib-esp32.avm - ${jit_avm_arg} + ${atomvmlib_avm} ${erlang_test_beams_to_package} DEPENDS HostAtomVM diff --git a/src/platforms/esp32/test/main/test_erl_sources/test_esp_partition.erl b/src/platforms/esp32/test/main/test_erl_sources/test_esp_partition.erl index 9387fd2fb3..48517831d2 100644 --- a/src/platforms/esp32/test/main/test_erl_sources/test_esp_partition.erl +++ b/src/platforms/esp32/test/main/test_erl_sources/test_esp_partition.erl @@ -25,10 +25,10 @@ start() -> [ {<<"nvs">>, 1, 2, 16#9000, 16#6000, []}, {<<"phy_init">>, 1, 1, 16#f000, 16#1000, []}, - {<<"factory">>, 0, 0, 16#10000, 16#2C0000, []}, - {<<"lib.avm">>, 1, 1, 16#2D0000, 16#40000, []}, - {<<"main.avm">>, 1, 1, 16#310000, 16#40000, []}, - {<<"data">>, 1, 1, 16#350000, 16#10000, []} + {<<"factory">>, 0, 0, 16#10000, 16#500000, []}, + {<<"lib.avm">>, 1, 1, 16#510000, 16#80000, []}, + {<<"main.avm">>, 1, 1, 16#590000, 16#40000, []}, + {<<"data">>, 1, 1, 16#5D0000, 16#10000, []} ] = esp:partition_list(), ok = esp:partition_erase_range(<<"data">>, 0), ok = esp:partition_write(<<"data">>, 0, <<"hello">>), diff --git a/src/platforms/esp32/test/main/test_erl_sources/test_jit_compile.erl b/src/platforms/esp32/test/main/test_erl_sources/test_jit_compile.erl index c89be94265..7a73c4c299 100644 --- a/src/platforms/esp32/test/main/test_erl_sources/test_jit_compile.erl +++ b/src/platforms/esp32/test/main/test_erl_sources/test_jit_compile.erl @@ -18,15 +18,12 @@ % SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later % -% Tests runtime JIT compilation via jit_stream_flash. -% test_jit_simple is pre-loaded as plain BEAM (no native code) by the C test. -% Calling code_server:load/1 triggers the JIT compiler to compile it at runtime -% and store the result in flash, then execute it natively. +% Tests that this JIT-precompiled xtensa module can call another JIT-precompiled module. +% Both test_jit_compile and test_jit_simple are precompiled for the target arch at build time. -module(test_jit_compile). -export([start/0]). start() -> - ok = code_server:load(test_jit_simple), 42 = test_jit_simple:run(), ok. diff --git a/src/platforms/esp32/test/main/test_erl_sources/test_jit_simple.erl b/src/platforms/esp32/test/main/test_erl_sources/test_jit_simple.erl index 9d64e90a8e..6935a95204 100644 --- a/src/platforms/esp32/test/main/test_erl_sources/test_jit_simple.erl +++ b/src/platforms/esp32/test/main/test_erl_sources/test_jit_simple.erl @@ -18,8 +18,6 @@ % SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later % -% This module is intentionally NOT JIT-precompiled. -% It is included as plain BEAM to test runtime JIT compilation via jit_stream_flash. -module(test_jit_simple). -export([run/0]). diff --git a/src/platforms/esp32/test/main/test_main.c b/src/platforms/esp32/test/main/test_main.c index 62f6d5fc77..752b2153bf 100644 --- a/src/platforms/esp32/test/main/test_main.c +++ b/src/platforms/esp32/test/main/test_main.c @@ -200,8 +200,9 @@ TEST_CASE("test_jit_compile", "[test_run]") avmpack_data->base.data = main_avm; synclist_append(&glb->avmpack_data, &avmpack_data->base.avmpack_head); - // Pre-load test_jit_simple as plain BEAM so code_server:code_chunk/1 can find it. - // code_server:load/1 will then JIT-compile it at runtime via jit_stream_flash. + // Pre-load test_jit_simple (JIT precompiled) before running test_jit_compile. + // test_jit_compile:start/0 calls test_jit_simple:run/0 to verify that one + // JIT-precompiled module can call another. Module *jit_simple_mod = globalcontext_load_module_from_avm(glb, "test_jit_simple.beam"); TEST_ASSERT(jit_simple_mod != NULL); globalcontext_insert_module(glb, jit_simple_mod); diff --git a/src/platforms/esp32/test/sdkconfig.defaults b/src/platforms/esp32/test/sdkconfig.defaults index 9a791113ef..d7b870cae2 100644 --- a/src/platforms/esp32/test/sdkconfig.defaults +++ b/src/platforms/esp32/test/sdkconfig.defaults @@ -1,5 +1,7 @@ CONFIG_PARTITION_TABLE_CUSTOM=y -CONFIG_ESPTOOLPY_FLASHSIZE_4MB=y +CONFIG_PARTITION_TABLE_CUSTOM_FILENAME="../partitions-test.csv" +CONFIG_PARTITION_TABLE_FILENAME="../partitions-test.csv" +CONFIG_ESPTOOLPY_FLASHSIZE_8MB=y CONFIG_ESPTOOLPY_FLASHSIZE_DETECT=n CONFIG_ESP_INT_WDT_TIMEOUT_MS=10000 CONFIG_ETH_USE_OPENETH=y diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 282388e4cb..7433266c58 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -70,3 +70,6 @@ else() ) endif() pack_eunit(test_jit estdlib eavmlib etest jit) +if (AVM_DISABLE_JIT_DWARF) + add_dependencies(test_jit jit_dwarf_tests_for_beam_tests jit_dwarf_for_beam_tests) +endif() From d47241bf1f3b51e0155a6cd9f58763e454f93214 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 16 May 2026 08:17:15 +0200 Subject: [PATCH 7/8] Bump timeout for xtensa tests Signed-off-by: Paul Guyot --- .github/workflows/build-and-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index 71c00193fc..50900608e3 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -929,7 +929,7 @@ jobs: valgrind --error-exitcode=1 ./src/AtomVM tests/libs/jit/test_jit.avm - name: "Test: test_jit.avm" - timeout-minutes: 60 + timeout-minutes: 120 working-directory: build run: | ulimit -c unlimited From e9c3d65f4005aff4b07cea2c411e1489bf5c6c94 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 19 May 2026 17:19:40 +0000 Subject: [PATCH 8/8] Optimize garbage collection with a generational GC Running jit tests with AtomVM is now 20% faster. Implement BEAM's `fullsweep_after` `spawn_opt/1` option and `process_flag/2` flag. Also fix `process_flag/2` spec. Also fix a bug where a finishing process in spawning state wasn't properly removed from the processes list. Signed-off-by: Paul Guyot --- doc/src/memory-management.md | 37 ++ doc/src/programmers-guide.md | 1 + libs/estdlib/src/erlang.erl | 5 +- libs/jit/src/jit_aarch64.erl | 16 +- libs/jit/src/jit_armv6m.erl | 14 +- libs/jit/src/jit_riscv32.erl | 17 +- libs/jit/src/jit_x86_64.erl | 24 +- libs/jit/src/jit_xtensa.erl | 16 +- src/libAtomVM/context.c | 25 + src/libAtomVM/context.h | 2 + src/libAtomVM/defaultatoms.def | 1 + src/libAtomVM/erl_nif_priv.h | 10 + src/libAtomVM/jit.c | 24 +- src/libAtomVM/memory.c | 505 ++++++++++++++-- src/libAtomVM/memory.h | 22 + src/libAtomVM/nifs.c | 16 + tests/erlang_tests/CMakeLists.txt | 2 + tests/erlang_tests/test_heap_growth.erl | 12 +- .../test_process_flag_fullsweep_after.erl | 73 +++ tests/libs/jit/jit_aarch64_tests.erl | 284 ++++----- tests/libs/jit/jit_armv6m_tests.erl | 540 +++++++++--------- tests/libs/jit/jit_riscv32_tests.erl | 506 ++++++++-------- tests/libs/jit/jit_tests.erl | 32 +- tests/libs/jit/jit_tests_common.erl | 88 ++- tests/libs/jit/jit_x86_64_tests.erl | 357 ++++++------ tests/libs/jit/jit_xtensa_tests.erl | 318 +++++------ tests/test-heap.c | 297 ++++++++++ tests/test.c | 1 + 28 files changed, 2114 insertions(+), 1131 deletions(-) create mode 100644 tests/erlang_tests/test_process_flag_fullsweep_after.erl diff --git a/doc/src/memory-management.md b/doc/src/memory-management.md index 77ed91f2b8..17042182a6 100644 --- a/doc/src/memory-management.md +++ b/doc/src/memory-management.md @@ -922,3 +922,40 @@ match binaries, as with the case of refc binaries on the process heap. #### Deletion Once all terms have been copied from the old heap to the new heap, and once the MSO list has been swept for unreachable references, the old heap is simply discarded via the `free` function. + +### Generational Garbage Collection + +The garbage collection described above is a *full sweep*: every live term is copied from the old heap to the new heap and the entire old heap is freed. While correct, this can be expensive for processes with large heaps, because long-lived data that has already survived previous collections must be copied again each time. + +AtomVM implements *generational* (or *minor*) garbage collection to reduce this cost, using the same approach as BEAM. The key observation is that most terms die young: they are allocated, used briefly, and become garbage. Terms that have survived at least one collection are likely to survive many more. Generational GC exploits this by dividing the heap into two generations: + +* **Young generation**: recently allocated terms, between the *high water mark* and the current heap pointer. +* **Old (mature) generation**: terms that have survived at least one minor collection, stored in a separate old heap. + +#### High Water Mark + +After each garbage collection, the heap pointer position is recorded as the *high water mark*. On the next collection, terms allocated below the high water mark (i.e., terms that existed at the time of the previous collection) are considered mature. Terms allocated above the high water mark are young. + +#### Minor Collection + +During a minor collection: + +1. A new young heap is allocated. +2. Mature terms (below the high water mark) are *promoted*: copied to the old heap rather than the new young heap. +3. Young terms that are still reachable are copied to the new young heap. +4. Both the new young heap and the newly promoted old region are scanned for references, since promoted terms may reference young terms and vice versa. +5. Only the young MSO list is swept; the old MSO list is preserved. +6. The previous heap is freed, but the old heap persists across minor collections. + +Because the old heap is not scanned for garbage during a minor collection, the cost is proportional to the size of the young generation rather than the entire heap. + +#### When Full vs. Minor Collection Occurs + +AtomVM keeps a counter (`gc_count`) of how many minor collections have occurred since the last full sweep. A full sweep is forced when: + +* The process has never been garbage collected (no high water mark exists). +* `gc_count` reaches the `fullsweep_after` threshold. +* The old heap does not have enough space to accommodate promoted terms. +* A `MEMORY_FORCE_SHRINK` request is made (e.g., via `erlang:garbage_collect/0`). + +The `fullsweep_after` value can be set per-process via [`spawn_opt`](./programmers-guide.md#spawning-processes) or [`erlang:process_flag/2`](./apidocs/erlang/estdlib/erlang.md#process_flag2). The default value is 65535, meaning full sweeps are infrequent under normal operation. Setting it to `0` disables generational collection entirely, forcing a full sweep on every garbage collection event. diff --git a/doc/src/programmers-guide.md b/doc/src/programmers-guide.md index 6dd288a0b6..adf445555b 100644 --- a/doc/src/programmers-guide.md +++ b/doc/src/programmers-guide.md @@ -365,6 +365,7 @@ The [options](./apidocs/erlang/estdlib/erlang.md#spawn_option) argument is a pro |-----|------------|---------------|-------------| | `min_heap_size` | `non_neg_integer()` | none | Minimum heap size of the process. The heap will shrink no smaller than this size. | | `max_heap_size` | `non_neg_integer()` | unbounded | Maximum heap size of the process. The heap will grow no larger than this size. | +| `fullsweep_after` | `non_neg_integer()` | 65535 | Maximum number of [minor garbage collections](./memory-management.md#generational-garbage-collection) before a full sweep is forced. Set to `0` to disable generational garbage collection. | | `link` | `boolean()` | `false` | Whether to link the spawned process to the spawning process. | | `monitor` | `boolean()` | `false` | Whether to link the spawning process should monitor the spawned process. | | `atomvm_heap_growth` | `bounded_free \| minimum \| fibonacci` | `bounded_free` | [Strategy](./memory-management.md#heap-growth-strategies) to grow the heap of the process. | diff --git a/libs/estdlib/src/erlang.erl b/libs/estdlib/src/erlang.erl index 760063cec6..7867eb6756 100644 --- a/libs/estdlib/src/erlang.erl +++ b/libs/estdlib/src/erlang.erl @@ -210,6 +210,7 @@ -type spawn_option() :: {min_heap_size, pos_integer()} | {max_heap_size, pos_integer()} + | {fullsweep_after, non_neg_integer()} | {atomvm_heap_growth, atomvm_heap_growth_strategy()} | link | monitor. @@ -1441,7 +1442,9 @@ group_leader(_Leader, _Pid) -> %% %% @end %%----------------------------------------------------------------------------- --spec process_flag(Flag :: trap_exit, Value :: boolean()) -> pid(). +-spec process_flag + (trap_exit, boolean()) -> boolean(); + (fullsweep_after, non_neg_integer()) -> non_neg_integer(). process_flag(_Flag, _Value) -> erlang:nif_error(undefined). diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index d1054e9f85..5bf024d7dc 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -186,16 +186,16 @@ | {maybe_free_aarch64_register(), '&', non_neg_integer(), '!=', integer()} | {{free, aarch64_register()}, '==', {free, aarch64_register()}}. -% ctx->e is 0x28 -% ctx->x is 0x30 +% ctx->e is 0x50 +% ctx->x is 0x58 -define(WORD_SIZE, 8). -define(CTX_REG, r0). -define(JITSTATE_REG, r1). -define(NATIVE_INTERFACE_REG, r2). --define(Y_REGS, {?CTX_REG, 16#28}). --define(X_REG(N), {?CTX_REG, 16#30 + (N * ?WORD_SIZE)}). --define(CP, {?CTX_REG, 16#B8}). --define(FP_REGS, {?CTX_REG, 16#C0}). +-define(Y_REGS, {?CTX_REG, 16#50}). +-define(X_REG(N), {?CTX_REG, 16#58 + (N * ?WORD_SIZE)}). +-define(CP, {?CTX_REG, 16#E0}). +-define(FP_REGS, {?CTX_REG, 16#E8}). -define(FP_REG_OFFSET(State, F), (F * case (State)#state.variant band ?JIT_VARIANT_FLOAT32 of @@ -203,8 +203,8 @@ _ -> 4 end) ). --define(BS, {?CTX_REG, 16#C8}). --define(BS_OFFSET, {?CTX_REG, 16#D0}). +-define(BS, {?CTX_REG, 16#F0}). +-define(BS_OFFSET, {?CTX_REG, 16#F8}). -define(JITSTATE_MODULE, {?JITSTATE_REG, 0}). -define(JITSTATE_CONTINUATION, {?JITSTATE_REG, 16#8}). -define(JITSTATE_REDUCTIONCOUNT, {?JITSTATE_REG, 16#10}). diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index bd0c67b419..c581d2449e 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -189,15 +189,15 @@ | {{free, armv6m_register()}, '==', {free, armv6m_register()}}. % ctx->e is 0x28 -% ctx->x is 0x30 +% ctx->x is 0x2C -define(CTX_REG, r0). -define(NATIVE_INTERFACE_REG, r2). --define(Y_REGS, {?CTX_REG, 16#14}). --define(X_REG(N), {?CTX_REG, 16#18 + (N * 4)}). --define(CP, {?CTX_REG, 16#5C}). --define(FP_REGS, {?CTX_REG, 16#60}). --define(BS, {?CTX_REG, 16#64}). --define(BS_OFFSET, {?CTX_REG, 16#68}). +-define(Y_REGS, {?CTX_REG, 16#28}). +-define(X_REG(N), {?CTX_REG, 16#2C + (N * 4)}). +-define(CP, {?CTX_REG, 16#70}). +-define(FP_REGS, {?CTX_REG, 16#74}). +-define(BS, {?CTX_REG, 16#78}). +-define(BS_OFFSET, {?CTX_REG, 16#7C}). % JITSTATE is on stack, accessed via stack offset % These macros now expect a register that contains the jit_state pointer -define(JITSTATE_MODULE(Reg), {Reg, 0}). diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index 9784aabc3e..e5bb53a41f 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -204,16 +204,17 @@ | {{free, riscv32_register()}, '==', {free, riscv32_register()}}. % Context offsets (32-bit architecture) -% ctx->e is 0x14 -% ctx->x is 0x18 +% ctx->e is 0x28 +% ctx->x is 0x2C -define(CTX_REG, a0). -define(NATIVE_INTERFACE_REG, a2). --define(Y_REGS, {?CTX_REG, 16#14}). --define(X_REG(N), {?CTX_REG, 16#18 + (N * 4)}). --define(CP, {?CTX_REG, 16#5C}). --define(FP_REGS, {?CTX_REG, 16#60}). --define(BS, {?CTX_REG, 16#64}). --define(BS_OFFSET, {?CTX_REG, 16#68}). +-define(Y_REGS, {?CTX_REG, 16#28}). +-define(X_REG(N), {?CTX_REG, 16#2C + (N * 4)}). +-define(CP, {?CTX_REG, 16#70}). +-define(FP_REGS, {?CTX_REG, 16#74}). +-define(BS, {?CTX_REG, 16#78}). +-define(BS_OFFSET, {?CTX_REG, 16#7C}). +% JITSTATE is in a1 register (no prolog, following aarch64 model) -define(JITSTATE_REG, a1). -define(RA_REG, ra). -define(JITSTATE_MODULE_OFFSET, 0). diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index 353982683f..9f02240088 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -172,19 +172,19 @@ -define(WORD_SIZE, 8). % Following offsets are verified with static asserts in jit.c -% ctx->e is 0x28 -% ctx->x is 0x30 -% ctx->cp is 0xB8 -% ctx->fr is 0xC0 -% ctx->bs is 0xC8 -% ctx->bs_offset is 0xD0 +% ctx->e is 0x50 +% ctx->x is 0x58 +% ctx->cp is 0xE0 +% ctx->fr is 0xE8 +% ctx->bs is 0xF0 +% ctx->bs_offset is 0xF8 -define(CTX_REG, rdi). -define(JITSTATE_REG, rsi). -define(NATIVE_INTERFACE_REG, rdx). --define(Y_REGS, {16#28, ?CTX_REG}). --define(X_REG(N), {16#30 + (N * ?WORD_SIZE), ?CTX_REG}). --define(CP, {16#B8, ?CTX_REG}). --define(FP_REGS, {16#C0, ?CTX_REG}). +-define(Y_REGS, {16#50, ?CTX_REG}). +-define(X_REG(N), {16#58 + (N * ?WORD_SIZE), ?CTX_REG}). +-define(CP, {16#E0, ?CTX_REG}). +-define(FP_REGS, {16#E8, ?CTX_REG}). -define(FP_REG_OFFSET(State, F), (F * case (State)#state.variant band ?JIT_VARIANT_FLOAT32 of @@ -192,8 +192,8 @@ _ -> 4 end) ). --define(BS, {16#C8, ?CTX_REG}). --define(BS_OFFSET, {16#D0, ?CTX_REG}). +-define(BS, {16#F0, ?CTX_REG}). +-define(BS_OFFSET, {16#F8, ?CTX_REG}). -define(JITSTATE_MODULE, {0, ?JITSTATE_REG}). -define(JITSTATE_CONTINUATION, {16#8, ?JITSTATE_REG}). -define(JITSTATE_REMAINING_REDUCTIONS, {16#10, ?JITSTATE_REG}). diff --git a/libs/jit/src/jit_xtensa.erl b/libs/jit/src/jit_xtensa.erl index b72f2d6004..bd67598930 100644 --- a/libs/jit/src/jit_xtensa.erl +++ b/libs/jit/src/jit_xtensa.erl @@ -197,16 +197,16 @@ | {{free, xtensa_register()}, '==', {free, xtensa_register()}}. % Context offsets (32-bit architecture) -% ctx->e is 0x14 -% ctx->x is 0x18 +% ctx->e is 0x28 +% ctx->x is 0x2C -define(CTX_REG, a2). -define(NATIVE_INTERFACE_REG, a4). --define(Y_REGS, {?CTX_REG, 16#14}). --define(X_REG(N), {?CTX_REG, 16#18 + (N * 4)}). --define(CP, {?CTX_REG, 16#5C}). --define(FP_REGS, {?CTX_REG, 16#60}). --define(BS, {?CTX_REG, 16#64}). --define(BS_OFFSET, {?CTX_REG, 16#68}). +-define(Y_REGS, {?CTX_REG, 16#28}). +-define(X_REG(N), {?CTX_REG, 16#2C + (N * 4)}). +-define(CP, {?CTX_REG, 16#70}). +-define(FP_REGS, {?CTX_REG, 16#74}). +-define(BS, {?CTX_REG, 16#78}). +-define(BS_OFFSET, {?CTX_REG, 16#7C}). -define(JITSTATE_REG, a3). -define(JITSTATE_MODULE_OFFSET, 0). -define(JITSTATE_CONTINUATION_OFFSET, 16#4). diff --git a/src/libAtomVM/context.c b/src/libAtomVM/context.c index b8b29273d5..a8d2f75333 100644 --- a/src/libAtomVM/context.c +++ b/src/libAtomVM/context.c @@ -81,6 +81,8 @@ Context *context_new(GlobalContext *glb) ctx->min_heap_size = 0; ctx->max_heap_size = 0; ctx->heap_growth_strategy = BoundedFreeHeapGrowth; + ctx->fullsweep_after = 65535; + ctx->gc_count = 0; ctx->has_min_heap_size = 0; ctx->has_max_heap_size = 0; @@ -139,6 +141,14 @@ void context_destroy(Context *ctx) // Hold and release the spin lock for timers and cancel any timer scheduler_cancel_timeout(ctx); + // If the process was never scheduled (still in Spawning state), + // it is still in the waiting_processes list and must be removed. + if (ctx->flags & Spawning) { + SMP_SPINLOCK_LOCK(&ctx->global->processes_spinlock); + list_remove(&ctx->processes_list_head); + SMP_SPINLOCK_UNLOCK(&ctx->global->processes_spinlock); + } + // Another process can get an access to our mailbox until this point. struct ListHead *processes_table_list = synclist_wrlock(&ctx->global->processes_table); UNUSED(processes_table_list); @@ -533,6 +543,7 @@ bool context_get_process_info(Context *ctx, term *out, size_t *term_size, term a case MESSAGE_QUEUE_LEN_ATOM: case REGISTERED_NAME_ATOM: case MEMORY_ATOM: + case FULLSWEEP_AFTER_ATOM: ret_size = TUPLE_SIZE(2); break; case LINKS_ATOM: { @@ -683,6 +694,12 @@ bool context_get_process_info(Context *ctx, term *out, size_t *term_size, term a break; } + case FULLSWEEP_AFTER_ATOM: { + term_put_tuple_element(ret, 0, FULLSWEEP_AFTER_ATOM); + term_put_tuple_element(ret, 1, term_from_int(ctx->fullsweep_after)); + break; + } + case CURRENT_STACKTRACE_ATOM: { term_put_tuple_element(ret, 0, CURRENT_STACKTRACE_ATOM); // FIXME: since it's not possible how to build stacktrace here with the current API, @@ -1218,6 +1235,14 @@ COLD_FUNC void context_dump(Context *ctx) ct++; } + fprintf(stderr, "\n\nHeap\n----\n"); + fprintf(stderr, "young heap: %zu words\n", (size_t) (ctx->heap.heap_end - ctx->heap.heap_start)); + if (ctx->heap.old_heap_start) { + fprintf(stderr, "old heap: %zu words (used: %zu)\n", + (size_t) (ctx->heap.old_heap_end - ctx->heap.old_heap_start), + (size_t) (ctx->heap.old_heap_ptr - ctx->heap.old_heap_start)); + } + fprintf(stderr, "\n\nMailbox\n-------\n"); mailbox_crashdump(ctx); diff --git a/src/libAtomVM/context.h b/src/libAtomVM/context.h index b425e514ce..9fcaf69bc4 100644 --- a/src/libAtomVM/context.h +++ b/src/libAtomVM/context.h @@ -113,6 +113,8 @@ struct Context size_t min_heap_size; size_t max_heap_size; enum HeapGrowthStrategy heap_growth_strategy; + unsigned int fullsweep_after; + unsigned int gc_count; // saved state when scheduled out Module *saved_module; diff --git a/src/libAtomVM/defaultatoms.def b/src/libAtomVM/defaultatoms.def index 6dc95175d9..6efb902ec1 100644 --- a/src/libAtomVM/defaultatoms.def +++ b/src/libAtomVM/defaultatoms.def @@ -213,6 +213,7 @@ X(EMU_ATOM, "\x3", "emu") X(JIT_ATOM, "\x3", "jit") X(EMU_FLAVOR_ATOM, "\xA", "emu_flavor") X(CODE_SERVER_ATOM, "\xB", "code_server") +X(FULLSWEEP_AFTER_ATOM, "\xF", "fullsweep_after") X(LOAD_ATOM, "\x4", "load") X(JIT_X86_64_ATOM, "\xA", "jit_x86_64") X(JIT_AARCH64_ATOM, "\xB", "jit_aarch64") diff --git a/src/libAtomVM/erl_nif_priv.h b/src/libAtomVM/erl_nif_priv.h index 008dca8a8b..86d83ccb90 100644 --- a/src/libAtomVM/erl_nif_priv.h +++ b/src/libAtomVM/erl_nif_priv.h @@ -63,6 +63,11 @@ static inline void erl_nif_env_partial_init_from_globalcontext(ErlNifEnv *env, G env->heap.heap_start = NULL; env->heap.heap_ptr = NULL; env->heap.heap_end = NULL; + env->heap.high_water_mark = NULL; + env->heap.old_heap_start = NULL; + env->heap.old_heap_ptr = NULL; + env->heap.old_heap_end = NULL; + env->heap.old_mso_list = term_nil(); env->stack_pointer = NULL; env->x[0] = term_nil(); env->x[1] = term_nil(); @@ -76,6 +81,11 @@ static inline void erl_nif_env_partial_init_from_resource(ErlNifEnv *env, void * env->heap.heap_start = NULL; env->heap.heap_ptr = NULL; env->heap.heap_end = NULL; + env->heap.high_water_mark = NULL; + env->heap.old_heap_start = NULL; + env->heap.old_heap_ptr = NULL; + env->heap.old_heap_end = NULL; + env->heap.old_mso_list = term_nil(); env->stack_pointer = NULL; env->x[0] = term_nil(); env->x[1] = term_nil(); diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 2cd85ef86c..f32c860345 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -148,23 +148,23 @@ _Static_assert( // Verify offsets in jit_x86_64.erl #if JIT_ARCH_TARGET == JIT_ARCH_X86_64 || JIT_ARCH_TARGET == JIT_ARCH_AARCH64 || JIT_ARCH_TARGET == JIT_ARCH_RISCV64 -_Static_assert(offsetof(Context, e) == 0x28, "ctx->e is 0x28 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); -_Static_assert(offsetof(Context, x) == 0x30, "ctx->x is 0x30 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); -_Static_assert(offsetof(Context, cp) == 0xB8, "ctx->cp is 0xB8 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); -_Static_assert(offsetof(Context, fr) == 0xC0, "ctx->fr is 0xC0 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); -_Static_assert(offsetof(Context, bs) == 0xC8, "ctx->bs is 0xC8 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); -_Static_assert(offsetof(Context, bs_offset) == 0xD0, "ctx->bs_offset is 0xD0 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); +_Static_assert(offsetof(Context, e) == 0x50, "ctx->e is 0x50 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); +_Static_assert(offsetof(Context, x) == 0x58, "ctx->x is 0x58 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); +_Static_assert(offsetof(Context, cp) == 0xE0, "ctx->cp is 0xE0 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); +_Static_assert(offsetof(Context, fr) == 0xE8, "ctx->fr is 0xE8 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); +_Static_assert(offsetof(Context, bs) == 0xF0, "ctx->bs is 0xF0 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); +_Static_assert(offsetof(Context, bs_offset) == 0xF8, "ctx->bs_offset is 0xF8 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); _Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); _Static_assert(offsetof(JITState, continuation) == 0x8, "jit_state->continuation is 0x8 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); _Static_assert(offsetof(JITState, remaining_reductions) == 0x10, "jit_state->remaining_reductions is 0x10 in jit/src/jit_{aarch64,x86_64,riscv64}.erl"); #elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M || JIT_ARCH_TARGET == JIT_ARCH_ARM32 || JIT_ARCH_TARGET == JIT_ARCH_RISCV32 || JIT_ARCH_TARGET == JIT_ARCH_WASM32 || JIT_ARCH_TARGET == JIT_ARCH_XTENSA -_Static_assert(offsetof(Context, e) == 0x14, "ctx->e is 0x14 in 32-bit backends"); -_Static_assert(offsetof(Context, x) == 0x18, "ctx->x is 0x18 in 32-bit backends"); -_Static_assert(offsetof(Context, cp) == 0x5C, "ctx->cp is 0x5C in 32-bit backends"); -_Static_assert(offsetof(Context, fr) == 0x60, "ctx->fr is 0x60 in 32-bit backends"); -_Static_assert(offsetof(Context, bs) == 0x64, "ctx->bs is 0x64 in 32-bit backends"); -_Static_assert(offsetof(Context, bs_offset) == 0x68, "ctx->bs_offset is 0x68 in 32-bit backends"); +_Static_assert(offsetof(Context, e) == 0x28, "ctx->e is 0x28 in 32-bit backends"); +_Static_assert(offsetof(Context, x) == 0x2C, "ctx->x is 0x2C in 32-bit backends"); +_Static_assert(offsetof(Context, cp) == 0x70, "ctx->cp is 0x70 in 32-bit backends"); +_Static_assert(offsetof(Context, fr) == 0x74, "ctx->fr is 0x74 in 32-bit backends"); +_Static_assert(offsetof(Context, bs) == 0x78, "ctx->bs is 0x78 in 32-bit backends"); +_Static_assert(offsetof(Context, bs_offset) == 0x7C, "ctx->bs_offset is 0x7C in 32-bit backends"); _Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in 32-bit backends"); _Static_assert(offsetof(JITState, continuation) == 0x4, "jit_state->continuation is 0x4 in 32-bit backends"); diff --git a/src/libAtomVM/memory.c b/src/libAtomVM/memory.c index 7c545b9d49..ac5b09c67b 100644 --- a/src/libAtomVM/memory.c +++ b/src/libAtomVM/memory.c @@ -46,12 +46,14 @@ #ifdef ENABLE_REALLOC_GC #define MEMORY_SHRINK memory_shrink #else -#define MEMORY_SHRINK memory_gc +#define MEMORY_SHRINK memory_full_gc #endif static void memory_scan_and_copy(HeapFragment *old_fragment, term *mem_start, const term *mem_end, term **new_heap_pos, term *mso_list, bool move); static term memory_shallow_copy_term(HeapFragment *old_fragment, term t, term **new_heap, bool move); static enum MemoryGCResult memory_gc(Context *ctx, size_t new_size, size_t num_roots, term *roots); +static enum MemoryGCResult memory_full_gc(Context *ctx, size_t new_size, size_t num_roots, term *roots); +static enum MemoryGCResult memory_minor_gc(Context *ctx, size_t new_size, size_t num_roots, term *roots); #ifdef ENABLE_REALLOC_GC static enum MemoryGCResult memory_shrink(Context *ctx, size_t new_size, size_t num_roots, term *roots); static void memory_scan_and_rewrite(size_t count, term *terms, const term *old_start, const term *old_end, intptr_t delta, bool is_heap); @@ -76,6 +78,11 @@ void memory_init_heap_root_fragment(Heap *heap, HeapFragment *root, size_t size) heap->heap_start = root->storage; heap->heap_ptr = heap->heap_start; heap->heap_end = heap->heap_start + size; + heap->high_water_mark = NULL; + heap->old_heap_start = NULL; + heap->old_heap_ptr = NULL; + heap->old_heap_end = NULL; + heap->old_mso_list = term_nil(); } #ifdef ENABLE_REALLOC_GC @@ -204,42 +211,55 @@ enum MemoryGCResult memory_ensure_free_with_roots(Context *c, size_t size, size_ if (UNLIKELY(c->has_max_heap_size && (target_size > c->max_heap_size))) { return MEMORY_GC_DENIED_ALLOCATION; } - if (UNLIKELY(memory_gc(c, target_size, num_roots, roots) != MEMORY_GC_OK)) { + enum MemoryGCResult gc_result; + if (alloc_mode == MEMORY_FORCE_SHRINK) { + gc_result = memory_full_gc(c, target_size, num_roots, roots); + if (gc_result == MEMORY_GC_OK) { + c->heap.high_water_mark = c->heap.heap_ptr; + c->gc_count = 0; + } + } else { + gc_result = memory_gc(c, target_size, num_roots, roots); + } + if (UNLIKELY(gc_result != MEMORY_GC_OK)) { // TODO: handle this more gracefully TRACE("Unable to allocate memory for GC. target_size=%zu\n", target_size); return MEMORY_GC_ERROR_FAILED_ALLOCATION; } - should_gc = alloc_mode == MEMORY_FORCE_SHRINK; - size_t new_memory_size = memory_heap_memory_size(&c->heap); - size_t new_target_size = new_memory_size; - size_t new_free_space = context_avail_free_memory(c); - switch (c->heap_growth_strategy) { - case BoundedFreeHeapGrowth: { - size_t maximum_free_space = 2 * (size + MIN_FREE_SPACE_SIZE); - should_gc = should_gc || (alloc_mode != MEMORY_NO_SHRINK && new_free_space > maximum_free_space); - if (should_gc) { - new_target_size = (new_memory_size - new_free_space) + maximum_free_space; - } - } break; - case MinimumHeapGrowth: - should_gc = should_gc || (alloc_mode != MEMORY_NO_SHRINK && new_free_space > 0); - if (should_gc) { - new_target_size = new_memory_size - new_free_space + size; - } - break; - case FibonacciHeapGrowth: - should_gc = should_gc || (new_memory_size > FIBONACCI_HEAP_GROWTH_REDUCTION_THRESHOLD && new_free_space >= 3 * new_memory_size / 4); - if (should_gc) { - new_target_size = next_fibonacci_heap_size(new_memory_size - new_free_space + size); - } - break; - } - if (should_gc) { - new_target_size = MAX(c->has_min_heap_size ? c->min_heap_size : 0, new_target_size); - if (new_target_size != new_memory_size) { - if (UNLIKELY(MEMORY_SHRINK(c, new_target_size, num_roots, roots) != MEMORY_GC_OK)) { - TRACE("Unable to allocate memory for GC shrink. new_memory_size=%zu new_free_space=%zu size=%u\n", new_memory_size, new_free_space, (unsigned int) size); - return MEMORY_GC_ERROR_FAILED_ALLOCATION; + { + should_gc = alloc_mode == MEMORY_FORCE_SHRINK; + size_t new_memory_size = memory_heap_memory_size(&c->heap); + size_t new_free_space = context_avail_free_memory(c); + size_t new_target_size = new_memory_size; + switch (c->heap_growth_strategy) { + case BoundedFreeHeapGrowth: { + size_t maximum_free_space = 2 * (size + MIN_FREE_SPACE_SIZE); + should_gc = should_gc || (alloc_mode != MEMORY_NO_SHRINK && new_free_space > maximum_free_space); + if (should_gc) { + new_target_size = (new_memory_size - new_free_space) + maximum_free_space; + } + } break; + case MinimumHeapGrowth: + should_gc = should_gc || (alloc_mode != MEMORY_NO_SHRINK && new_free_space > 0); + if (should_gc) { + new_target_size = new_memory_size - new_free_space + size; + } + break; + case FibonacciHeapGrowth: + should_gc = should_gc || (new_memory_size > FIBONACCI_HEAP_GROWTH_REDUCTION_THRESHOLD && new_free_space >= 3 * new_memory_size / 4); + if (should_gc) { + new_target_size = next_fibonacci_heap_size(new_memory_size - new_free_space + size); + } + break; + } + if (should_gc) { + new_target_size = MAX(c->has_min_heap_size ? c->min_heap_size : 0, new_target_size); + if (new_target_size != new_memory_size) { + if (UNLIKELY(MEMORY_SHRINK(c, new_target_size, num_roots, roots) != MEMORY_GC_OK)) { + TRACE("Unable to allocate memory for GC shrink. new_memory_size=%zu new_free_space=%zu size=%u\n", new_memory_size, new_free_space, (unsigned int) size); + return MEMORY_GC_ERROR_FAILED_ALLOCATION; + } + c->heap.high_water_mark = c->heap.heap_ptr; } } } @@ -257,13 +277,46 @@ static inline void push_to_stack(term **stack, term value) static enum MemoryGCResult memory_gc(Context *ctx, size_t new_size, size_t num_roots, term *roots) { - TRACE("Going to perform gc on process %i\n", ctx->process_id); + bool force_full = ctx->fullsweep_after == 0 || ctx->gc_count >= ctx->fullsweep_after; + if (ctx->heap.high_water_mark == NULL || force_full) { + enum MemoryGCResult result = memory_full_gc(ctx, new_size, num_roots, roots); + if (result == MEMORY_GC_OK) { + ctx->heap.high_water_mark = ctx->heap.heap_ptr; + ctx->gc_count = 0; + } + return result; + } + return memory_minor_gc(ctx, new_size, num_roots, roots); +} + +static enum MemoryGCResult memory_full_gc(Context *ctx, size_t new_size, size_t num_roots, term *roots) +{ + TRACE("Going to perform full gc on process %i\n", ctx->process_id); term old_mso_list = ctx->heap.root->mso_list; + term old_old_mso_list = ctx->heap.old_mso_list; term *old_stack_ptr = context_stack_base(ctx); term *old_heap_end = ctx->heap.heap_end; HeapFragment *old_root_fragment = ctx->heap.root; + // Chain old heap into fragment list so all terms are from-space + HeapFragment *old_chain_tail = NULL; + if (ctx->heap.old_heap_start) { + HeapFragment *old_heap_fragment = OLD_HEAP_TO_FRAGMENT(ctx->heap.old_heap_start); + old_heap_fragment->heap_end = ctx->heap.old_heap_ptr; + old_heap_fragment->next = NULL; + // Append at the end of the fragment chain + old_chain_tail = old_root_fragment; + while (old_chain_tail->next != NULL) { + old_chain_tail = old_chain_tail->next; + } + old_chain_tail->next = old_heap_fragment; + } + if (UNLIKELY(memory_init_heap(&ctx->heap, new_size) != MEMORY_GC_OK)) { + // Undo fragment chain modification so the context remains consistent + if (old_chain_tail) { + old_chain_tail->next = NULL; + } return MEMORY_GC_ERROR_FAILED_ALLOCATION; } // We need old heap fragment to only copy terms that were in the heap (as opposed to in messages) @@ -320,10 +373,18 @@ static enum MemoryGCResult memory_gc(Context *ctx, size_t new_size, size_t num_r ctx->heap.heap_ptr = temp_end; memory_sweep_mso_list(old_mso_list, ctx->global, false); + memory_sweep_mso_list(old_old_mso_list, ctx->global, false); ctx->heap.root->mso_list = new_mso_list; + // old heap fragment is already chained into old_root_fragment, freed together memory_destroy_heap_fragment(old_root_fragment); + // Reset old generation + ctx->heap.old_heap_start = NULL; + ctx->heap.old_heap_ptr = NULL; + ctx->heap.old_heap_end = NULL; + ctx->heap.old_mso_list = term_nil(); + return MEMORY_GC_OK; } @@ -904,6 +965,382 @@ HOT_FUNC static term memory_shallow_copy_term(HeapFragment *old_fragment, term t } } +HOT_FUNC static inline bool memory_is_in_old_heap(const Heap *heap, const term *ptr) +{ + return ptr >= heap->old_heap_start && ptr < heap->old_heap_end; +} + +HOT_FUNC static term memory_shallow_copy_term_generational( + HeapFragment *old_fragment, const Heap *heap, term t, + term **new_young_heap, term **old_heap_ptr) +{ + switch (t & TERM_PRIMARY_MASK) { + case TERM_PRIMARY_IMMED: + return t; + + case TERM_PRIMARY_CP: + return t; + + case TERM_PRIMARY_BOXED: { + term *boxed_value = term_to_term_ptr(t); + + if (memory_is_in_old_heap(heap, boxed_value)) { + return t; + } + + if (old_fragment != NULL && !memory_heap_fragment_contains_pointer(old_fragment, boxed_value)) { + return t; + } + + if (memory_is_moved_marker(boxed_value)) { + return memory_dereference_moved_marker(boxed_value); + } + + int boxed_size = term_boxed_size(t) + 1; + + if (boxed_size == 1) { + return ((term) &empty_tuple) | TERM_PRIMARY_BOXED; + } + + term *dest; + if (boxed_value >= heap->heap_start && boxed_value < heap->high_water_mark) { + dest = *old_heap_ptr; + *old_heap_ptr += boxed_size; + } else { + dest = *new_young_heap; + *new_young_heap += boxed_size; + } + + for (int i = 0; i < boxed_size; i++) { + dest[i] = boxed_value[i]; + } + + term new_term = ((term) dest) | TERM_PRIMARY_BOXED; + memory_replace_with_moved_marker(boxed_value, new_term); + return new_term; + } + case TERM_PRIMARY_LIST: { + term *list_ptr = term_get_list_ptr(t); + + if (memory_is_in_old_heap(heap, list_ptr)) { + return t; + } + + if (old_fragment != NULL && !memory_heap_fragment_contains_pointer(old_fragment, list_ptr)) { + return t; + } + + if (memory_is_moved_marker(list_ptr)) { + return memory_dereference_moved_marker(list_ptr); + } + + term *dest; + if (list_ptr >= heap->heap_start && list_ptr < heap->high_water_mark) { + dest = *old_heap_ptr; + *old_heap_ptr += 2; + } else { + dest = *new_young_heap; + *new_young_heap += 2; + } + + dest[0] = list_ptr[0]; + dest[1] = list_ptr[1]; + + term new_term = ((term) dest) | 0x1; + memory_replace_with_moved_marker(list_ptr, new_term); + return new_term; + } + default: + UNREACHABLE(); + } +} + +static void memory_scan_and_copy_generational( + HeapFragment *old_fragment, const Heap *heap, + term *mem_start, const term *mem_end, + term **new_young_heap, term **old_heap_ptr, + term *young_mso_list, term *old_mso_list) +{ + term *ptr = mem_start; + + while (ptr < mem_end) { + term t = *ptr; + switch (t & TERM_PRIMARY_MASK) { + case TERM_PRIMARY_IMMED: + ptr++; + break; + case TERM_PRIMARY_CP: { + size_t arity = term_get_size_from_boxed_header(t); + switch (t & TERM_BOXED_TAG_MASK) { + case TERM_BOXED_TUPLE: { + for (size_t i = 1; i <= arity; i++) { + ptr[i] = memory_shallow_copy_term_generational(old_fragment, heap, ptr[i], new_young_heap, old_heap_ptr); + } + break; + } + case TERM_BOXED_BIN_MATCH_STATE: { + ptr[1] = memory_shallow_copy_term_generational(old_fragment, heap, ptr[1], new_young_heap, old_heap_ptr); + break; + } + case TERM_BOXED_POSITIVE_INTEGER: + case TERM_BOXED_NEGATIVE_INTEGER: + case TERM_BOXED_EXTERNAL_PID: + case TERM_BOXED_EXTERNAL_PORT: + case TERM_BOXED_EXTERNAL_REF: + case TERM_BOXED_FLOAT: + case TERM_BOXED_HEAP_BINARY: + break; + + case TERM_BOXED_REF: { + term ref = ((term) ptr) | TERM_PRIMARY_BOXED; + if (term_is_resource_reference(ref)) { + term *target_mso = memory_is_in_old_heap(heap, ptr) ? old_mso_list : young_mso_list; + *target_mso = term_list_init_prepend(ptr + REFERENCE_RESOURCE_CONS_OFFSET, ref, *target_mso); + refc_binary_increment_refcount((struct RefcBinary *) term_resource_refc_binary_ptr(ref)); + } + break; + } + + case TERM_BOXED_FUN: { + for (size_t i = 3; i <= arity; i++) { + ptr[i] = memory_shallow_copy_term_generational(old_fragment, heap, ptr[i], new_young_heap, old_heap_ptr); + } + break; + } + + case TERM_BOXED_REFC_BINARY: { + term ref = ((term) ptr) | TERM_PRIMARY_BOXED; + if (!term_refc_binary_is_const(ref)) { + term *target_mso = memory_is_in_old_heap(heap, ptr) ? old_mso_list : young_mso_list; + *target_mso = term_list_init_prepend(ptr + REFC_BINARY_CONS_OFFSET, ref, *target_mso); + refc_binary_increment_refcount((struct RefcBinary *) term_refc_binary_ptr(ref)); + } + break; + } + + case TERM_BOXED_SUB_BINARY: { + ptr[3] = memory_shallow_copy_term_generational(old_fragment, heap, ptr[3], new_young_heap, old_heap_ptr); + break; + } + + case TERM_BOXED_MAP: { + size_t map_size = arity - 1; + size_t keys_offset = term_get_map_keys_offset(); + size_t value_offset = term_get_map_value_offset(); + ptr[keys_offset] = memory_shallow_copy_term_generational(old_fragment, heap, ptr[keys_offset], new_young_heap, old_heap_ptr); + for (size_t i = value_offset; i < value_offset + map_size; ++i) { + ptr[i] = memory_shallow_copy_term_generational(old_fragment, heap, ptr[i], new_young_heap, old_heap_ptr); + } + break; + } + + default: + fprintf(stderr, "- Found unknown boxed type: %" TERM_X_FMT "\n", (t >> 2) & 0xF); + AVM_ABORT(); + } + ptr += arity + 1; + break; + } + case TERM_PRIMARY_LIST: + *ptr = memory_shallow_copy_term_generational(old_fragment, heap, t, new_young_heap, old_heap_ptr); + ptr++; + break; + case TERM_PRIMARY_BOXED: + *ptr = memory_shallow_copy_term_generational(old_fragment, heap, t, new_young_heap, old_heap_ptr); + ptr++; + break; + default: + UNREACHABLE(); + } + } +} + +static size_t initial_old_heap_size(enum HeapGrowthStrategy strategy, size_t mature_size) +{ + switch (strategy) { + case MinimumHeapGrowth: + return mature_size; + case BoundedFreeHeapGrowth: + return mature_size + 2 * MIN_FREE_SPACE_SIZE; + case FibonacciHeapGrowth: + default: + return next_fibonacci_heap_size(mature_size); + } +} + +static enum MemoryGCResult memory_minor_gc(Context *ctx, size_t new_size, size_t num_roots, term *roots) +{ + TRACE("Going to perform minor gc on process %i\n", ctx->process_id); + + term old_young_mso_list = ctx->heap.root->mso_list; + term *old_stack_ptr = context_stack_base(ctx); + term *old_heap_end = ctx->heap.heap_end; + HeapFragment *old_root_fragment = ctx->heap.root; + term *high_water_mark = ctx->heap.high_water_mark; + + size_t mature_size = high_water_mark - ctx->heap.heap_start; + + // Save old heap state before memory_init_heap clears it + term *saved_old_heap_start = ctx->heap.old_heap_start; + term *saved_old_heap_ptr = ctx->heap.old_heap_ptr; + term *saved_old_heap_end = ctx->heap.old_heap_end; + term saved_old_mso_list = ctx->heap.old_mso_list; + + bool newly_allocated_old_heap = false; + + if (saved_old_heap_start == NULL) { + if (mature_size > 0) { + size_t old_heap_size = initial_old_heap_size(ctx->heap_growth_strategy, mature_size); + HeapFragment *old_fragment = (HeapFragment *) malloc(sizeof(HeapFragment) + old_heap_size * sizeof(term)); + if (IS_NULL_PTR(old_fragment)) { + goto fallback_full_gc; + } + old_fragment->next = NULL; + saved_old_heap_start = old_fragment->storage; + saved_old_heap_ptr = old_fragment->storage; + saved_old_heap_end = old_fragment->storage + old_heap_size; + newly_allocated_old_heap = true; + } + } else { + size_t old_free = saved_old_heap_end - saved_old_heap_ptr; + if (old_free < mature_size) { + goto fallback_full_gc; + } + } + + if (UNLIKELY(memory_init_heap(&ctx->heap, new_size) != MEMORY_GC_OK)) { + if (newly_allocated_old_heap) { + free(OLD_HEAP_TO_FRAGMENT(saved_old_heap_start)); + } + return MEMORY_GC_ERROR_FAILED_ALLOCATION; + } + old_root_fragment->heap_end = old_heap_end; + + { + Heap gen_heap = { 0 }; + gen_heap.heap_start = old_root_fragment->storage; + gen_heap.high_water_mark = high_water_mark; + gen_heap.old_heap_start = saved_old_heap_start; + gen_heap.old_heap_ptr = saved_old_heap_ptr; + gen_heap.old_heap_end = saved_old_heap_end; + + term *new_young_heap = ctx->heap.heap_start; + term *old_heap_ptr = saved_old_heap_ptr; + + // Root scanning: stack + term *stack_ptr = new_young_heap + new_size; + while (old_stack_ptr > ctx->e) { + term new_root = memory_shallow_copy_term_generational( + old_root_fragment, &gen_heap, *(--old_stack_ptr), + &ctx->heap.heap_ptr, &old_heap_ptr); + push_to_stack(&stack_ptr, new_root); + } + ctx->e = stack_ptr; + + struct ListHead *item; + LIST_FOR_EACH (item, &ctx->dictionary) { + struct DictEntry *entry = GET_LIST_ENTRY(item, struct DictEntry, head); + entry->key = memory_shallow_copy_term_generational( + old_root_fragment, &gen_heap, entry->key, + &ctx->heap.heap_ptr, &old_heap_ptr); + entry->value = memory_shallow_copy_term_generational( + old_root_fragment, &gen_heap, entry->value, + &ctx->heap.heap_ptr, &old_heap_ptr); + } + + LIST_FOR_EACH (item, &ctx->extended_x_regs) { + struct ExtendedRegister *ext_reg = GET_LIST_ENTRY(item, struct ExtendedRegister, head); + ext_reg->value = memory_shallow_copy_term_generational( + old_root_fragment, &gen_heap, ext_reg->value, + &ctx->heap.heap_ptr, &old_heap_ptr); + } + + ctx->exit_reason = memory_shallow_copy_term_generational( + old_root_fragment, &gen_heap, ctx->exit_reason, + &ctx->heap.heap_ptr, &old_heap_ptr); + ctx->group_leader = memory_shallow_copy_term_generational( + old_root_fragment, &gen_heap, ctx->group_leader, + &ctx->heap.heap_ptr, &old_heap_ptr); + + for (size_t i = 0; i < num_roots; i++) { + roots[i] = memory_shallow_copy_term_generational( + old_root_fragment, &gen_heap, roots[i], + &ctx->heap.heap_ptr, &old_heap_ptr); + } + + // Dual scan loop on new young heap and promoted old region + { + term *young_scan = new_young_heap; + term *young_end = ctx->heap.heap_ptr; + term *old_scan = saved_old_heap_ptr; + term *old_end = old_heap_ptr; + term new_young_mso_list = term_nil(); + term new_old_mso_list = saved_old_mso_list; + + do { + term *next_young_end = young_end; + term *next_old_end = old_end; + + if (young_scan < young_end) { + memory_scan_and_copy_generational( + old_root_fragment, &gen_heap, + young_scan, young_end, + &next_young_end, &old_heap_ptr, + &new_young_mso_list, &new_old_mso_list); + young_scan = young_end; + young_end = next_young_end; + next_old_end = old_heap_ptr; + } + + if (old_scan < old_end) { + memory_scan_and_copy_generational( + old_root_fragment, &gen_heap, + old_scan, old_end, + &young_end, &old_heap_ptr, + &new_young_mso_list, &new_old_mso_list); + old_scan = old_end; + old_end = old_heap_ptr; + next_young_end = young_end; + } + + young_end = next_young_end; + old_end = next_old_end > old_heap_ptr ? next_old_end : old_heap_ptr; + } while (young_scan != young_end || old_scan != old_end); + + ctx->heap.heap_ptr = young_end; + + memory_sweep_mso_list(old_young_mso_list, ctx->global, false); + ctx->heap.root->mso_list = new_young_mso_list; + ctx->heap.old_mso_list = new_old_mso_list; + } + + ctx->heap.old_heap_start = saved_old_heap_start; + ctx->heap.old_heap_ptr = old_heap_ptr; + ctx->heap.old_heap_end = saved_old_heap_end; + ctx->heap.high_water_mark = ctx->heap.heap_ptr; + ctx->gc_count++; + + memory_destroy_heap_fragment(old_root_fragment); + + return MEMORY_GC_OK; + } + +fallback_full_gc: + // Restore old heap pointers + ctx->heap.old_heap_start = saved_old_heap_start; + ctx->heap.old_heap_ptr = saved_old_heap_ptr; + ctx->heap.old_heap_end = saved_old_heap_end; + ctx->heap.old_mso_list = saved_old_mso_list; + { + enum MemoryGCResult result = memory_full_gc(ctx, new_size, num_roots, roots); + if (result == MEMORY_GC_OK) { + ctx->heap.high_water_mark = ctx->heap.heap_ptr; + ctx->gc_count = 0; + } + return result; + } +} + void memory_heap_append_fragment(Heap *heap, HeapFragment *fragment, term mso_list) { // The fragment we are appending may have next fragments diff --git a/src/libAtomVM/memory.h b/src/libAtomVM/memory.h index e7317bebd4..7969753fa5 100644 --- a/src/libAtomVM/memory.h +++ b/src/libAtomVM/memory.h @@ -85,6 +85,11 @@ struct Heap term *heap_start; term *heap_ptr; term *heap_end; + term *high_water_mark; + term *old_heap_start; + term *old_heap_ptr; + term *old_heap_end; + term old_mso_list; }; #ifndef TYPEDEF_HEAP @@ -180,6 +185,9 @@ static inline size_t memory_heap_memory_size(const Heap *heap) if (heap->root->next) { result += memory_heap_fragment_memory_size(heap->root->next); } + if (heap->old_heap_start) { + result += heap->old_heap_end - heap->old_heap_start; + } return result; } @@ -358,6 +366,12 @@ static inline void memory_destroy_heap_fragment(HeapFragment *fragment) free((void *) fragment); } +/** + * @brief Recover the HeapFragment pointer from an old_heap_start pointer. + */ +#define OLD_HEAP_TO_FRAGMENT(ptr) \ + ((HeapFragment *) ((char *) (ptr) -offsetof(HeapFragment, storage))) + /** * @brief Destroy a root heap. First sweep its mso list. * @@ -371,6 +385,10 @@ static inline void memory_destroy_heap_fragment(HeapFragment *fragment) static inline void memory_destroy_heap(Heap *heap, GlobalContext *global) { memory_sweep_mso_list(heap->root->mso_list, global, false); + if (heap->old_heap_start) { + memory_sweep_mso_list(heap->old_mso_list, global, false); + free(OLD_HEAP_TO_FRAGMENT(heap->old_heap_start)); + } memory_destroy_heap_fragment(heap->root); } @@ -386,6 +404,10 @@ static inline void memory_destroy_heap(Heap *heap, GlobalContext *global) static inline void memory_destroy_heap_from_task(Heap *heap, GlobalContext *global) { memory_sweep_mso_list(heap->root->mso_list, global, true); + if (heap->old_heap_start) { + memory_sweep_mso_list(heap->old_mso_list, global, true); + free(OLD_HEAP_TO_FRAGMENT(heap->old_heap_start)); + } memory_destroy_heap_fragment(heap->root); } #endif diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index 886c66a922..381c2779be 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -1423,6 +1423,7 @@ static term do_spawn(Context *ctx, Context *new_ctx, size_t arity, size_t n_free { term min_heap_size_term = interop_proplist_get_value(opts_term, MIN_HEAP_SIZE_ATOM); term max_heap_size_term = interop_proplist_get_value(opts_term, MAX_HEAP_SIZE_ATOM); + term fullsweep_after_term = interop_proplist_get_value(opts_term, FULLSWEEP_AFTER_ATOM); term link_term = interop_proplist_get_value(opts_term, LINK_ATOM); term monitor_term = interop_proplist_get_value(opts_term, MONITOR_ATOM); term heap_growth_strategy = interop_proplist_get_value_default(opts_term, ATOMVM_HEAP_GROWTH_ATOM, BOUNDED_FREE_ATOM); @@ -1464,6 +1465,13 @@ static term do_spawn(Context *ctx, Context *new_ctx, size_t arity, size_t n_free RAISE_ERROR(BADARG_ATOM); } } + if (fullsweep_after_term != term_nil()) { + if (UNLIKELY(!term_is_integer(fullsweep_after_term) || term_to_int(fullsweep_after_term) < 0)) { + context_destroy(new_ctx); + RAISE_ERROR(BADARG_ATOM); + } + new_ctx->fullsweep_after = term_to_int(fullsweep_after_term); + } int size = 0; for (uint32_t i = 0; i < n_freeze; i++) { @@ -3109,6 +3117,14 @@ static term nif_erlang_process_flag(Context *ctx, int argc, term argv[]) } return prev; } + case FULLSWEEP_AFTER_ATOM: { + if (UNLIKELY(!term_is_integer(value) || term_to_int(value) < 0)) { + RAISE_ERROR(BADARG_ATOM); + } + term prev = term_from_int(ctx->fullsweep_after); + ctx->fullsweep_after = term_to_int(value); + return prev; + } } // TODO: check erlang:process_flag/3 implementation diff --git a/tests/erlang_tests/CMakeLists.txt b/tests/erlang_tests/CMakeLists.txt index a9aa020c64..0cfb3def7a 100644 --- a/tests/erlang_tests/CMakeLists.txt +++ b/tests/erlang_tests/CMakeLists.txt @@ -598,6 +598,7 @@ compile_erlang(link_kill_parent) compile_erlang(link_throw) compile_erlang(unlink_error) compile_erlang(trap_exit_flag) +compile_erlang(test_process_flag_fullsweep_after) compile_erlang(test_exit1) compile_erlang(test_exit2) @@ -1160,6 +1161,7 @@ set(erlang_test_beams link_throw.beam unlink_error.beam trap_exit_flag.beam + test_process_flag_fullsweep_after.beam test_exit1.beam test_exit2.beam diff --git a/tests/erlang_tests/test_heap_growth.erl b/tests/erlang_tests/test_heap_growth.erl index f8c025c1ba..eefc5bbbc3 100644 --- a/tests/erlang_tests/test_heap_growth.erl +++ b/tests/erlang_tests/test_heap_growth.erl @@ -43,7 +43,7 @@ test_grow_beyond_min_heap_size() -> % do something with Var to avoid compiler optimizations true = 200 =:= length(Var) end, - [monitor, {min_heap_size, 100}] + [monitor, {min_heap_size, 100}, {fullsweep_after, 0}] ), ok = receive @@ -70,7 +70,7 @@ test_bounded_free_strategy(UseDefault) -> true = X3 < X2, true = X3 - X1 - erts_debug:flat_size(Var1) < 32 end, - [monitor | Opt] + [monitor, {fullsweep_after, 0} | Opt] ), ok = receive @@ -107,7 +107,7 @@ test_minimum_strategy() -> end, 20 = erts_debug:flat_size(Var1) end, - [monitor, {atomvm_heap_growth, minimum}] + [monitor, {atomvm_heap_growth, minimum}, {fullsweep_after, 0}] ), % Get heap size from the outside to have no influence on the heap Pid1 ! {step, 1}, @@ -172,7 +172,7 @@ test_fibonacci_strategy() -> NewHeap = allocate_until_heap_size_changes(MaxHeap), true = NewHeap < MaxHeap end, - [monitor, link, {atomvm_heap_growth, fibonacci}] + [monitor, link, {atomvm_heap_growth, fibonacci}, {fullsweep_after, 0}] ), % Test large increments no longer follow fibonacci {Pid2, Ref2} = spawn_opt( @@ -182,7 +182,7 @@ test_fibonacci_strategy() -> NewHeap = allocate_until_heap_size_changes(MaxHeap), true = NewHeap < MaxHeap end, - [monitor, link, {atomvm_heap_growth, fibonacci}] + [monitor, link, {atomvm_heap_growth, fibonacci}, {fullsweep_after, 0}] ), ok = receive @@ -278,7 +278,7 @@ test_messages_get_gcd() -> fun() -> loop([]) end, - [monitor, {atomvm_heap_growth, minimum}] + [monitor, {atomvm_heap_growth, minimum}, {fullsweep_after, 0}] ), FinalHeapSize = loop_send(Pid1, 20), Pid1 ! quit, diff --git a/tests/erlang_tests/test_process_flag_fullsweep_after.erl b/tests/erlang_tests/test_process_flag_fullsweep_after.erl new file mode 100644 index 0000000000..bf50e11a91 --- /dev/null +++ b/tests/erlang_tests/test_process_flag_fullsweep_after.erl @@ -0,0 +1,73 @@ +% +% This file is part of AtomVM. +% +% Copyright 2026 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(test_process_flag_fullsweep_after). + +-export([start/0]). + +start() -> + case get_otp_version() > 23 of + true -> + ok = test_process_flag_fullsweep_after(), + ok = test_spawn_opt_fullsweep_after(); + false -> + % Can we please get rid of OTP < 24 support? + ok + end, + 0. + +test_process_flag_fullsweep_after() -> + OldVal = erlang:process_flag(fullsweep_after, 10), + 10 = erlang:process_flag(fullsweep_after, 0), + 0 = erlang:process_flag(fullsweep_after, OldVal), + ok = expect_badarg(fun() -> erlang:process_flag(fullsweep_after, -1) end), + ok = expect_badarg(fun() -> erlang:process_flag(fullsweep_after, foo) end), + ok. + +test_spawn_opt_fullsweep_after() -> + Parent = self(), + spawn_opt( + fun() -> + {fullsweep_after, Val} = erlang:process_info(self(), fullsweep_after), + Parent ! {fullsweep_after, Val} + end, + [{fullsweep_after, 42}] + ), + receive + {fullsweep_after, 42} -> ok + after 500 -> timeout + end, + ok = expect_badarg(fun() -> spawn_opt(fun() -> ok end, [{fullsweep_after, -1}]) end), + ok = expect_badarg(fun() -> spawn_opt(fun() -> ok end, [{fullsweep_after, foo}]) end), + ok. + +expect_badarg(Fun) -> + try + Fun(), + unexpected + catch + error:badarg -> ok + end. + +get_otp_version() -> + case erlang:system_info(machine) of + "BEAM" -> list_to_integer(erlang:system_info(otp_release)); + _ -> atomvm + end. diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 85fab41830..e1fad03957 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -115,9 +115,9 @@ call_primitive_6_args_test() -> Stream = ?BACKEND:stream(State4), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" " 4: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" - " 8: f9401c08 ldr x8, [x0, #56]\n" + " 8: f9403008 ldr x8, [x0, #96]\n" " c: f940b850 ldr x16, [x2, #368]\n" " 10: a9bf03fe stp x30, x0, [sp, #-16]!\n" " 14: a9bf0be1 stp x1, x2, [sp, #-16]!\n" @@ -258,7 +258,7 @@ call_primitive_last_5_args_test() -> ]), Stream = ?BACKEND:stream(State2), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" " 4: f9404c48 ldr x8, [x2, #152]\n" " 8: d2800102 mov x2, #0x8 // #8\n" " c: d2805963 mov x3, #0x2cb // #715\n" @@ -328,7 +328,7 @@ call_primitive_last_if_block_preserves_cache_test() -> Stream = ?BACKEND:stream(State0), Dump = << " 0: d2800027 mov x7, #0x1\n" - " 4: f9401808 ldr x8, [x0, #48]\n" + " 4: f9402c08 ldr x8, [x0, #88]\n" " 8: b5000067 cbnz x7, 0x14\n" " c: f9400047 ldr x7, [x2]\n" " 10: d61f00e0 br x7" @@ -342,7 +342,7 @@ jump_to_label_if_block_preserves_cache_test() -> Stream = ?BACKEND:stream(State0), Dump = << " 0: d2800027 mov x7, #0x1\n" - " 4: f9401808 ldr x8, [x0, #48]\n" + " 4: f9402c08 ldr x8, [x0, #88]\n" " 8: b5000047 cbnz x7, 0x10\n" " c: 14000000 b 0xc" >>, @@ -355,7 +355,7 @@ jump_to_offset_if_block_preserves_cache_test() -> Stream = ?BACKEND:stream(State0), Dump = << " 0: d2800027 mov x7, #0x1\n" - " 4: f9401808 ldr x8, [x0, #48]\n" + " 4: f9402c08 ldr x8, [x0, #88]\n" " 8: b5000047 cbnz x7, 0x10\n" " c: 1400003d b 0x100" >>, @@ -376,7 +376,7 @@ jump_to_continuation_if_block_preserves_cache_test() -> Dump = << " 0: d2802007 mov x7, #0x100\n" " 4: d2800028 mov x8, #0x1\n" - " 8: f9401809 ldr x9, [x0, #48]\n" + " 8: f9402c09 ldr x9, [x0, #88]\n" " c: b5000088 cbnz x8, 0x1c\n" " 10: 10ffff88 adr x8, 0x0\n" " 14: 8b070108 add x8, x8, x7\n" @@ -392,10 +392,10 @@ move_array_element_x_reg_invalidates_vm_loc_cache_test() -> {State4, _Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 5}), Stream = ?BACKEND:stream(State4), Dump = << - " 0: f9402c07 ldr x7, [x0, #88]\n" - " 4: f9401808 ldr x8, [x0, #48]\n" + " 0: f9404007 ldr x7, [x0, #128]\n" + " 4: f9402c08 ldr x8, [x0, #88]\n" " 8: f9400109 ldr x9, [x8]\n" - " c: f9002c09 str x9, [x0, #88]" + " c: f9004009 str x9, [x0, #128]" >>, ?assertStream(aarch64, Dump, Stream). @@ -468,9 +468,9 @@ move_to_cp_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401407 ldr x7, [x0, #40]\n" + " 0: f9402807 ldr x7, [x0, #80]\n" " 4: f94000e7 ldr x7, [x7]\n" - " 8: f9005c07 str x7, [x0, #184]" + " 8: f9007007 str x7, [x0, #224]" >>, ?assertStream(aarch64, Dump, Stream). @@ -480,9 +480,9 @@ increment_sp_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401407 ldr x7, [x0, #40]\n" + " 0: f9402807 ldr x7, [x0, #80]\n" " 4: 9100e0e7 add x7, x7, #0x38\n" - " 8: f9001407 str x7, [x0, #40]" + " 8: f9002807 str x7, [x0, #80]" >>, ?assertStream(aarch64, Dump, Stream). @@ -506,8 +506,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: b6f80047 tbz x7, #63, 0x10\n" " c: 91000908 add x8, x8, #0x2" >>, @@ -524,8 +524,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: eb0800ff cmp x7, x8\n" " c: 5400004a b.ge 0x14 // b.tcont\n" " 10: 91000908 add x8, x8, #0x2" @@ -543,8 +543,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: b5000047 cbnz x7, 0x10\n" " c: 91000908 add x8, x8, #0x2" >>, @@ -561,8 +561,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: b5000047 cbnz x7, 0x10\n" " c: 91000908 add x8, x8, #0x2" >>, @@ -579,8 +579,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: 35000047 cbnz w7, 0x10\n" " c: 91000908 add x8, x8, #0x2" >>, @@ -597,8 +597,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: 35000047 cbnz w7, 0x10\n" " c: 91000908 add x8, x8, #0x2" >>, @@ -615,8 +615,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: f100ecff cmp x7, #0x3b\n" " c: 54000040 b.eq 0x14 // b.none\n" " 10: 91000908 add x8, x8, #0x2" @@ -634,8 +634,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: f100ecff cmp x7, #0x3b\n" " c: 54000040 b.eq 0x14 // b.none\n" " 10: 91000908 add x8, x8, #0x2" @@ -653,8 +653,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: 7100a8ff cmp w7, #0x2a\n" " c: 54000040 b.eq 0x14 // b.none\n" " 10: 91000908 add x8, x8, #0x2" @@ -672,8 +672,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: 7100a8ff cmp w7, #0x2a\n" " c: 54000040 b.eq 0x14 // b.none\n" " 10: 91000908 add x8, x8, #0x2" @@ -691,8 +691,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: f100ecff cmp x7, #0x3b\n" " c: 54000041 b.ne 0x14 // b.any\n" " 10: 91000908 add x8, x8, #0x2" @@ -710,8 +710,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: f100ecff cmp x7, #0x3b\n" " c: 54000041 b.ne 0x14 // b.any\n" " 10: 91000908 add x8, x8, #0x2" @@ -729,8 +729,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: 7100a8ff cmp w7, #0x2a\n" " c: 54000041 b.ne 0x14 // b.any\n" " 10: 91000908 add x8, x8, #0x2" @@ -748,8 +748,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: 7100a8ff cmp w7, #0x2a\n" " c: 54000041 b.ne 0x14 // b.any\n" " 10: 91000908 add x8, x8, #0x2" @@ -767,8 +767,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: 37000047 tbnz w7, #0, 0x10\n" " c: 91000908 add x8, x8, #0x2" >>, @@ -785,8 +785,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: 37000047 tbnz w7, #0, 0x10\n" " c: 91000908 add x8, x8, #0x2" >>, @@ -803,8 +803,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: 36000047 tbz w7, #0, 0x10\n" " c: 91000908 add x8, x8, #0x2" >>, @@ -821,8 +821,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: 36000047 tbz w7, #0, 0x10\n" " c: 91000908 add x8, x8, #0x2" >>, @@ -839,8 +839,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: f24008ff tst x7, #0x7\n" " c: 54000040 b.eq 0x14 // b.none\n" " 10: 91000908 add x8, x8, #0x2" @@ -858,8 +858,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: d28000a9 mov x9, #0x5 // #5\n" " c: ea0900ff tst x7, x9\n" " 10: 54000040 b.eq 0x18 // b.none\n" @@ -878,8 +878,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: f24008ff tst x7, #0x7\n" " c: 54000040 b.eq 0x14 // b.none\n" " 10: 91000908 add x8, x8, #0x2" @@ -897,8 +897,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: 92400ce9 and x9, x7, #0xf\n" " c: f1003d3f cmp x9, #0xf\n" " 10: 54000040 b.eq 0x18 // b.none\n" @@ -917,8 +917,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: 92400ce7 and x7, x7, #0xf\n" " c: f1003cff cmp x7, #0xf\n" " 10: 54000040 b.eq 0x18 // b.none\n" @@ -937,8 +937,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: f10190ff cmp x7, #0x64\n" " c: 5400004d b.le 0x14\n" " 10: 91000908 add x8, x8, #0x2" @@ -956,8 +956,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: f10190ff cmp x7, #0x64\n" " c: 5400004d b.le 0x14\n" " 10: 91000908 add x8, x8, #0x2" @@ -975,8 +975,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: f10190ff cmp x7, #0x64\n" " c: 5400004a b.ge 0x14 // b.tcont\n" " 10: 91000908 add x8, x8, #0x2" @@ -994,8 +994,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: f10190ff cmp x7, #0x64\n" " c: 5400004a b.ge 0x14 // b.tcont\n" " 10: 91000908 add x8, x8, #0x2" @@ -1023,8 +1023,8 @@ if_else_block_test() -> Stream = ?BACKEND:stream(State3), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9403008 ldr x8, [x0, #96]\n" " 8: f100ecff cmp x7, #0x3b\n" " c: 54000061 b.ne 0x18 // b.any\n" " 10: 91000908 add x8, x8, #0x2\n" @@ -1042,7 +1042,7 @@ shift_right_test_() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" " 4: d343fce7 lsr x7, x7, #3" >>, ?assertStream(aarch64, Dump, Stream) @@ -1055,7 +1055,7 @@ shift_right_test_() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" " 4: d343fce8 lsr x8, x7, #3" >>, ?assertStream(aarch64, Dump, Stream) @@ -1069,7 +1069,7 @@ shift_left_test() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" " 4: d37df0e7 lsl x7, x7, #3" >>, ?assertStream(aarch64, Dump, Stream). @@ -1180,7 +1180,7 @@ call_bif_with_large_literal_integer_test() -> " 5c: a9bf0be1 stp x1, x2, [sp, #-16]!\n" " 60: d2800001 mov x1, #0x0 // #0\n" " 64: d2800022 mov x2, #0x1 // #1\n" - " 68: f9401803 ldr x3, [x0, #48]\n" + " 68: f9402c03 ldr x3, [x0, #88]\n" " 6c: aa0803e4 mov x4, x8\n" " 70: d63f00e0 blr x7\n" " 74: aa0003e7 mov x7, x0\n" @@ -1190,7 +1190,7 @@ call_bif_with_large_literal_integer_test() -> " 84: f9401847 ldr x7, [x2, #48]\n" " 88: d2801102 mov x2, #0x88 // #136\n" " 8c: d61f00e0 br x7\n" - " 90: f9001807 str x7, [x0, #48]" + " 90: f9002c07 str x7, [x0, #88]" >>, ?assertStream(aarch64, Dump, Stream). @@ -1204,12 +1204,12 @@ get_list_test() -> ?BACKEND:assert_all_native_free(State5), Stream = ?BACKEND:stream(State5), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" " 4: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" - " 8: f9401408 ldr x8, [x0, #40]\n" + " 8: f9402808 ldr x8, [x0, #80]\n" " c: f94004e9 ldr x9, [x7, #8]\n" " 10: f9000509 str x9, [x8, #8]\n" - " 14: f9401408 ldr x8, [x0, #40]\n" + " 14: f9402808 ldr x8, [x0, #80]\n" " 18: f94000e9 ldr x9, [x7]\n" " 1c: f9000109 str x9, [x8]" >>, @@ -1254,7 +1254,7 @@ is_integer_test() -> Dump = << " 0: 14000001 b 0x4\n" " 4: 14000050 b 0x144\n" - " 8: f9401807 ldr x7, [x0, #48]\n" + " 8: f9402c07 ldr x7, [x0, #88]\n" " c: 92400ce8 and x8, x7, #0xf\n" " 10: f1003d1f cmp x8, #0xf\n" " 14: 54000180 b.eq 0x44 // b.none\n" @@ -1310,7 +1310,7 @@ is_number_test() -> Dump = << " 0: 14000001 b 0x4\n" " 4: 14000053 b 0x150\n" - " 8: f9401807 ldr x7, [x0, #48]\n" + " 8: f9402c07 ldr x7, [x0, #88]\n" " c: 92400ce8 and x8, x7, #0xf\n" " 10: f1003d1f cmp x8, #0xf\n" " 14: 540001e0 b.eq 0x50 // b.none\n" @@ -1350,7 +1350,7 @@ is_boolean_test() -> Dump = << " 0: 14000001 b 0x4\n" " 4: 14000047 b 0x120\n" - " 8: f9401807 ldr x7, [x0, #48]\n" + " 8: f9402c07 ldr x7, [x0, #88]\n" " c: f1012cff cmp x7, #0x4b\n" " 10: 54000080 b.eq 0x20\n" " 14: f1002cff cmp x7, #0xb\n" @@ -1529,9 +1529,9 @@ gc_bif2_test() -> " 28: a9bf0be1 stp x1, x2, [sp, #-16]!\n" " 2c: d2800001 mov x1, #0x0 // #0\n" " 30: d2800062 mov x2, #0x3 // #3\n" - " 34: f9401403 ldr x3, [x0, #40]\n" + " 34: f9402803 ldr x3, [x0, #80]\n" " 38: f9400063 ldr x3, [x3]\n" - " 3c: f9401804 ldr x4, [x0, #48]\n" + " 3c: f9402c04 ldr x4, [x0, #88]\n" " 40: d63f00e0 blr x7\n" " 44: aa0003e7 mov x7, x0\n" " 48: a8c10be1 ldp x1, x2, [sp], #16\n" @@ -1581,7 +1581,7 @@ call_ext_test() -> " 28: d3689ce7 lsl x7, x7, #24\n" " 2c: d2802610 mov x16, #0x130 // #304\n" " 30: aa1000e7 orr x7, x7, x16\n" - " 34: f9005c07 str x7, [x0, #184]\n" + " 34: f9007007 str x7, [x0, #224]\n" " 38: f9401047 ldr x7, [x2, #32]\n" " 3c: d2800042 mov x2, #0x2 // #2\n" " 40: d28000a3 mov x3, #0x5 // #5\n" @@ -1628,7 +1628,7 @@ call_fun_test() -> " 14: f9000427 str x7, [x1, #8]\n" " 18: f9400847 ldr x7, [x2, #16]\n" " 1c: d61f00e0 br x7\n" - " 20: f9401807 ldr x7, [x0, #48]\n" + " 20: f9402c07 ldr x7, [x0, #88]\n" " 24: aa0703e8 mov x8, x7\n" " 28: 92400509 and x9, x8, #0x3\n" " 2c: f100093f cmp x9, #0x2\n" @@ -1653,7 +1653,7 @@ call_fun_test() -> " 78: d3689d08 lsl x8, x8, #24\n" " 7c: d2804c10 mov x16, #0x260 // #608\n" " 80: aa100108 orr x8, x8, x16\n" - " 84: f9005c08 str x8, [x0, #184]\n" + " 84: f9007008 str x8, [x0, #224]\n" " 88: f9408048 ldr x8, [x2, #256]\n" " 8c: aa0703e2 mov x2, x7\n" " 90: d2800003 mov x3, #0x0 // #0\n" @@ -1669,7 +1669,7 @@ decrement_reductions_invalidates_cache_test() -> {State4, Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 0}), Stream = ?BACKEND:stream(State4), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" " 4: b9401027 ldr w7, [x1, #16]\n" " 8: f10004e7 subs x7, x7, #0x1\n" " c: b9001027 str w7, [x1, #16]\n" @@ -1678,7 +1678,7 @@ decrement_reductions_invalidates_cache_test() -> " 18: f9000427 str x7, [x1, #8]\n" " 1c: f9400847 ldr x7, [x2, #16]\n" " 20: d61f00e0 br x7\n" - " 24: f9401807 ldr x7, [x0, #48]" + " 24: f9402c07 ldr x7, [x0, #88]" >>, ?assertStream(aarch64, Dump, Stream). @@ -1696,12 +1696,12 @@ move_to_vm_register_test_() -> [ ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, 0}, << - " 0: f900181f str xzr, [x0, #48]" + " 0: f9002c1f str xzr, [x0, #88]" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, extra}, << - " 0: f900581f str xzr, [x0, #176]" + " 0: f9006c1f str xzr, [x0, #216]" >>) end), ?_test(begin @@ -1711,13 +1711,13 @@ move_to_vm_register_test_() -> end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 2}, << - " 0: f9401407 ldr x7, [x0, #40]\n" + " 0: f9402807 ldr x7, [x0, #80]\n" " 4: f90008ff str xzr, [x7, #16]" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 20}, << - " 0: f9401407 ldr x7, [x0, #40]\n" + " 0: f9402807 ldr x7, [x0, #80]\n" " 4: f90050ff str xzr, [x7, #160]" >>) end), @@ -1725,26 +1725,26 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, 0}, << " 0: d2800547 mov x7, #0x2a // #42\n" - " 4: f9001807 str x7, [x0, #48]" + " 4: f9002c07 str x7, [x0, #88]" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, extra}, << " 0: d2800547 mov x7, #0x2a // #42\n" - " 4: f9005807 str x7, [x0, #176]" + " 4: f9006c07 str x7, [x0, #216]" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 2}, << " 0: d2800547 mov x7, #0x2a // #42\n" - " 4: f9401408 ldr x8, [x0, #40]\n" + " 4: f9402808 ldr x8, [x0, #80]\n" " 8: f9000907 str x7, [x8, #16]" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 20}, << " 0: d2800547 mov x7, #0x2a // #42\n" - " 4: f9401408 ldr x8, [x0, #40]\n" + " 4: f9402808 ldr x8, [x0, #80]\n" " 8: f9005107 str x7, [x8, #160]" >>) end), @@ -1758,14 +1758,14 @@ move_to_vm_register_test_() -> %% Test: x_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << - " 0: f9401c07 ldr x7, [x0, #56]\n" - " 4: f9002007 str x7, [x0, #64]" + " 0: f9403007 ldr x7, [x0, #96]\n" + " 4: f9003407 str x7, [x0, #104]" >>) end), %% Test: x_reg to ptr ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, r8}, << - " 0: f9401c07 ldr x7, [x0, #56]\n" + " 0: f9403007 ldr x7, [x0, #96]\n" " 4: f9000107 str x7, [x8]" >>) end), @@ -1773,42 +1773,42 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, {ptr, r9}, {x_reg, 3}, << " 0: f9400127 ldr x7, [x9]\n" - " 4: f9002407 str x7, [x0, #72]" + " 4: f9003807 str x7, [x0, #112]" >>) end), %% Test: x_reg to y_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401408 ldr x8, [x0, #40]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9402808 ldr x8, [x0, #80]\n" " 8: f9000507 str x7, [x8, #8]" >>) end), %% Test: y_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << - " 0: f9401407 ldr x7, [x0, #40]\n" + " 0: f9402807 ldr x7, [x0, #80]\n" " 4: f94000e7 ldr x7, [x7]\n" - " 8: f9002407 str x7, [x0, #72]" + " 8: f9003807 str x7, [x0, #112]" >>) end), %% Test: y_reg to y_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << - " 0: f9401407 ldr x7, [x0, #40]\n" + " 0: f9402807 ldr x7, [x0, #80]\n" " 4: f94004e7 ldr x7, [x7, #8]\n" - " 8: f9002407 str x7, [x0, #72]" + " 8: f9003807 str x7, [x0, #112]" >>) end), %% Test: Native register to x_reg ?_test(begin move_to_vm_register_test0(State0, r10, {x_reg, 0}, << - " 0: f900180a str x10, [x0, #48]" + " 0: f9002c0a str x10, [x0, #88]" >>) end), ?_test(begin move_to_vm_register_test0(State0, r10, {x_reg, extra}, << - " 0: f900580a str x10, [x0, #176]" + " 0: f9006c0a str x10, [x0, #216]" >>) end), %% Test: Native register to ptr @@ -1820,7 +1820,7 @@ move_to_vm_register_test_() -> %% Test: Native register to y_reg ?_test(begin move_to_vm_register_test0(State0, r10, {y_reg, 0}, << - " 0: f9401407 ldr x7, [x0, #40]\n" + " 0: f9402807 ldr x7, [x0, #80]\n" " 4: f90000ea str x10, [x7]" >>) end), @@ -1831,7 +1831,7 @@ move_to_vm_register_test_() -> " 4: f2b35787 movk x7, #0x9abc, lsl #16\n" " 8: f2cacf07 movk x7, #0x5678, lsl #32\n" " c: f2e24687 movk x7, #0x1234, lsl #48\n" - " 10: f9001807 str x7, [x0, #48]" + " 10: f9002c07 str x7, [x0, #88]" >>) end), ?_test(begin @@ -1840,7 +1840,7 @@ move_to_vm_register_test_() -> " 4: f2b35787 movk x7, #0x9abc, lsl #16\n" " 8: f2cacf07 movk x7, #0x5678, lsl #32\n" " c: f2e24687 movk x7, #0x1234, lsl #48\n" - " 10: f9005807 str x7, [x0, #176]\n" + " 10: f9006c07 str x7, [x0, #216]\n" >>) end), ?_test(begin @@ -1849,7 +1849,7 @@ move_to_vm_register_test_() -> " 4: f2b35787 movk x7, #0x9abc, lsl #16\n" " 8: f2cacf07 movk x7, #0x5678, lsl #32\n" " c: f2e24687 movk x7, #0x1234, lsl #48\n" - " 10: f9401408 ldr x8, [x0, #40]\n" + " 10: f9402808 ldr x8, [x0, #80]\n" " 14: f9000907 str x7, [x8, #16]" >>) end), @@ -1859,7 +1859,7 @@ move_to_vm_register_test_() -> " 4: f2b35787 movk x7, #0x9abc, lsl #16\n" " 8: f2cacf07 movk x7, #0x5678, lsl #32\n" " c: f2e24687 movk x7, #0x1234, lsl #48\n" - " 10: f9401408 ldr x8, [x0, #40]\n" + " 10: f9402808 ldr x8, [x0, #80]\n" " 14: f9005107 str x7, [x8, #160]" >>) end), @@ -1876,24 +1876,24 @@ move_to_vm_register_test_() -> %% Test: x_reg to y_reg (high index) ?_test(begin move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << - " 0: f9405407 ldr x7, [x0, #168]\n" - " 4: f9401408 ldr x8, [x0, #40]\n" + " 0: f9406807 ldr x7, [x0, #208]\n" + " 4: f9402808 ldr x8, [x0, #80]\n" " 8: f9007d07 str x7, [x8, #248]" >>) end), %% Test: y_reg to x_reg (high index) ?_test(begin move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << - " 0: f9401407 ldr x7, [x0, #40]\n" + " 0: f9402807 ldr x7, [x0, #80]\n" " 4: f9407ce7 ldr x7, [x7, #248]\n" - " 8: f9005407 str x7, [x0, #168]" + " 8: f9006807 str x7, [x0, #208]" >>) end), %% Test: Negative immediate to x_reg ?_test(begin move_to_vm_register_test0(State0, -1, {x_reg, 0}, << " 0: 92800007 mov x7, #0xffffffffffffffff // #-1\n" - " 4: f9001807 str x7, [x0, #48]" + " 4: f9002c07 str x7, [x0, #88]" >>) end), %% Test: ptr with offset to fp_reg (term_to_float) @@ -1904,9 +1904,9 @@ move_to_vm_register_test_() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" " 4: f94004e7 ldr x7, [x7, #8]\n" - " 8: f9406008 ldr x8, [x0, #192]\n" + " 8: f9407408 ldr x8, [x0, #232]\n" " c: f9000d07 str x7, [x8, #24]" >>, ?assertStream(aarch64, Dump, Stream) @@ -1930,7 +1930,7 @@ move_array_element_test_() -> ?_test(begin move_array_element_test0(State0, r8, 2, {x_reg, 0}, << " 0: f9400907 ldr x7, [x8, #16]\n" - " 4: f9001807 str x7, [x0, #48]" + " 4: f9002c07 str x7, [x0, #88]" >>) end), %% move_array_element: reg[x] to ptr @@ -1943,7 +1943,7 @@ move_array_element_test_() -> %% move_array_element: reg[x] to y_reg ?_test(begin move_array_element_test0(State0, r8, 1, {y_reg, 2}, << - " 0: f9401407 ldr x7, [x0, #40]\n" + " 0: f9402807 ldr x7, [x0, #80]\n" " 4: f9400508 ldr x8, [x8, #8]\n" " 8: f90008e8 str x8, [x7, #16]" >>) @@ -1957,7 +1957,7 @@ move_array_element_test_() -> %% move_array_element: reg[x] to y_reg ?_test(begin move_array_element_test0(State0, r8, 7, {y_reg, 31}, << - " 0: f9401407 ldr x7, [x0, #40]\n" + " 0: f9402807 ldr x7, [x0, #80]\n" " 4: f9401d08 ldr x8, [x8, #56]\n" " 8: f9007ce8 str x8, [x7, #248]" >>) @@ -1966,7 +1966,7 @@ move_array_element_test_() -> ?_test(begin move_array_element_test0(State0, r8, 7, {x_reg, 15}, << " 0: f9401d07 ldr x7, [x8, #56]\n" - " 4: f9005407 str x7, [x0, #168]" + " 4: f9006807 str x7, [x0, #208]" >>) end), %% move_array_element: reg_x[reg_y] to x_reg @@ -1975,7 +1975,7 @@ move_array_element_test_() -> move_array_element_test0(State1, r8, {free, Reg}, {x_reg, 2}, << " 0: f9401107 ldr x7, [x8, #32]\n" " 4: f8677907 ldr x7, [x8, x7, lsl #3]\n" - " 8: f9002007 str x7, [x0, #64]" + " 8: f9003407 str x7, [x0, #104]" >>) end), %% move_array_element: reg_x[reg_y] to pointer (large x reg) @@ -1992,7 +1992,7 @@ move_array_element_test_() -> {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), move_array_element_test0(State1, r8, {free, Reg}, {y_reg, 31}, << " 0: f9401107 ldr x7, [x8, #32]\n" - " 4: f9401408 ldr x8, [x0, #40]\n" + " 4: f9402808 ldr x8, [x0, #80]\n" " 8: f8677907 ldr x7, [x8, x7, lsl #3]\n" " c: f9007d07 str x7, [x8, #248]" >>) @@ -2032,7 +2032,7 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, 2), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" " 4: f9000907 str x7, [x8, #16]" >>, ?assertStream(aarch64, Dump, Stream) @@ -2042,7 +2042,7 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, r9), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" " 4: f8297907 str x7, [x8, x9, lsl #3]" >>, ?assertStream(aarch64, Dump, Stream) @@ -2062,7 +2062,7 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, r8, r9), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401407 ldr x7, [x0, #40]\n" + " 0: f9402807 ldr x7, [x0, #80]\n" " 4: f94008e7 ldr x7, [x7, #16]\n" " 8: f8297907 str x7, [x8, x9, lsl #3]" >>, @@ -2073,7 +2073,7 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, 2, 1), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" " 4: f9000d07 str x7, [x8, #24]" >>, ?assertStream(aarch64, Dump, Stream) @@ -2088,7 +2088,7 @@ move_to_array_element_test_() -> State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r8, r9, 1), Stream = ?BACKEND:stream(State3), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" + " 0: f9402c07 ldr x7, [x0, #88]\n" " 4: 9100052a add x10, x9, #0x1\n" " 8: f82a7907 str x7, [x8, x10, lsl #3]" >>, @@ -2146,7 +2146,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(r7, Reg), Dump = << - " 0: f9402407 ldr x7, [x0, #72]" + " 0: f9403807 ldr x7, [x0, #112]" >>, ?assertStream(aarch64, Dump, Stream) end), @@ -2156,7 +2156,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(r7, Reg), Dump = << - " 0: f9401407 ldr x7, [x0, #40]\n" + " 0: f9402807 ldr x7, [x0, #80]\n" " 4: f9400ce7 ldr x7, [x7, #24]" >>, ?assertStream(aarch64, Dump, Stream) @@ -2193,7 +2193,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, r8), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9402008 ldr x8, [x0, #64]" + " 0: f9403408 ldr x8, [x0, #104]" >>, ?assertStream(aarch64, Dump, Stream) end), @@ -2202,7 +2202,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, r8), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401408 ldr x8, [x0, #40]\n" + " 0: f9402808 ldr x8, [x0, #80]\n" " 4: f9400908 ldr x8, [x8, #16]" >>, ?assertStream(aarch64, Dump, Stream) @@ -2368,7 +2368,7 @@ cached_load_after_free_test() -> Stream = ?BACKEND:stream(State3), Dump = << - " 0: f9401807 ldr x7, [x0, #48]" + " 0: f9402c07 ldr x7, [x0, #88]" >>, ?assertStream(aarch64, Dump, Stream). @@ -2381,7 +2381,7 @@ fixed_dst_x_reg_load_preserves_cache_test() -> ?assertEqual(Offset1, ?BACKEND:offset(State2)), Stream = ?BACKEND:stream(State2), Dump = << - " 0: f9402008 ldr x8, [x0, #64]" + " 0: f9403408 ldr x8, [x0, #104]" >>, ?assertStream(aarch64, Dump, Stream). @@ -2394,7 +2394,7 @@ fixed_dst_y_reg_load_preserves_cache_test() -> ?assertEqual(Offset1, ?BACKEND:offset(State2)), Stream = ?BACKEND:stream(State2), Dump = << - " 0: f9401408 ldr x8, [x0, #40]\n" + " 0: f9402808 ldr x8, [x0, #80]\n" " 4: f9400908 ldr x8, [x8, #16]" >>, ?assertStream(aarch64, Dump, Stream). @@ -2409,8 +2409,8 @@ cached_move_to_vm_x_reg_reuse_test() -> ?assertEqual(Offset1, ?BACKEND:offset(State2)), Stream = ?BACKEND:stream(State2), Dump = << - " 0: f9401c07 ldr x7, [x0, #56]\n" - " 4: f9001807 str x7, [x0, #48]" + " 0: f9403007 ldr x7, [x0, #96]\n" + " 4: f9002c07 str x7, [x0, #88]" >>, ?assertStream(aarch64, Dump, Stream). @@ -2424,8 +2424,8 @@ cached_move_to_vm_y_reg_reuse_test() -> ?assertEqual(Offset1, ?BACKEND:offset(State2)), Stream = ?BACKEND:stream(State2), Dump = << - " 0: f9401407 ldr x7, [x0, #40]\n" + " 0: f9402807 ldr x7, [x0, #80]\n" " 4: f94000e7 ldr x7, [x7]\n" - " 8: f9001807 str x7, [x0, #48]" + " 8: f9002c07 str x7, [x0, #88]" >>, ?assertStream(aarch64, Dump, Stream). diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 408f908c16..f7d0a3b50a 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -116,10 +116,10 @@ call_primitive_6_args_test() -> Stream = ?BACKEND:stream(State4), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" " 2: 2603 movs r6, #3\n" " 4: 43b7 bics r7, r6\n" - " 6: 69c6 ldr r6, [r0, #28]\n" + " 6: 6b06 ldr r6, [r0, #48]\n" " 8: 25b8 movs r5, #184 ; 0xb8\n" " a: 5955 ldr r5, [r2, r5]\n" " c: b405 push {r0, r2}\n" @@ -304,7 +304,7 @@ call_primitive_last_5_args_test() -> Stream = ?BACKEND:stream(State2), Dump = << % {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" % State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [... " 2: 6cd6 ldr r6, [r2, #76] ; 0x4c\n" " 4: b082 sub sp, #8\n" @@ -441,9 +441,9 @@ move_to_cp_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6946 ldr r6, [r0, #20]\n" + " 0: 6a86 ldr r6, [r0, #40]\n" " 2: 6837 ldr r7, [r6, #0]\n" - " 4: 65c7 str r7, [r0, #92] ; 0x5c" + " 4: 6707 str r7, [r0, #112]" >>, ?assertStream(arm, Dump, Stream). @@ -453,9 +453,9 @@ increment_sp_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6947 ldr r7, [r0, #20]\n" + " 0: 6a87 ldr r7, [r0, #40]\n" " 2: 371c adds r7, #28\n" - " 4: 6147 str r7, [r0, #20]" + " 4: 6287 str r7, [r0, #40]" >>, ?assertStream(arm, Dump, Stream). @@ -479,8 +479,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f00 cmp r7, #0\n" " 6: d500 bpl.n 0xa\n" " 8: 3602 adds r6, #2" @@ -498,8 +498,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 42b7 cmp r7, r6\n" " 6: da00 bge.n 0xa\n" " 8: 3602 adds r6, #2" @@ -517,8 +517,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f2a cmp r7, #42 ; 0x2a\n" " 6: da00 bge.n 0xa\n" " 8: 3602 adds r6, #2" @@ -537,8 +537,8 @@ if_block_test_() -> State2 = ?BACKEND:jump_to_offset(State1, 16#100), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2580 movs r5, #128 ; 0x80\n" " 6: 00ed lsls r5, r5, #3\n" " 8: 42af cmp r7, r5\n" @@ -559,8 +559,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f00 cmp r7, #0\n" " 6: d100 bne.n 0xa\n" " 8: 3602 adds r6, #2" @@ -578,8 +578,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f00 cmp r7, #0\n" " 6: d100 bne.n 0xa\n" " 8: 3602 adds r6, #2" @@ -597,8 +597,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2501 movs r5, #1\n" " 6: 426d negs r5, r5\n" " 8: 42af cmp r7, r5\n" @@ -618,8 +618,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f00 cmp r7, #0\n" " 6: d100 bne.n 0xa\n" " 8: 3602 adds r6, #2" @@ -637,8 +637,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f00 cmp r7, #0\n" " 6: d100 bne.n 0xa\n" " 8: 3602 adds r6, #2" @@ -656,8 +656,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f3b cmp r7, #59 ; 0x3b\n" " 6: d000 beq.n 0xa\n" " 8: 3602 adds r6, #2" @@ -675,8 +675,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f3b cmp r7, #59 ; 0x3b\n" " 6: d000 beq.n 0xa\n" " 8: 3602 adds r6, #2" @@ -694,8 +694,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f2a cmp r7, #42 ; 0x2a\n" " 6: d000 beq.n 0xa\n" " 8: 3602 adds r6, #2" @@ -715,8 +715,8 @@ if_block_test_() -> State2 = ?BACKEND:jump_to_offset(State1, 16#100), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 4d02 ldr r5, [pc, #8] ; (0x10)\n" " 6: 42af cmp r7, r5\n" " 8: d000 beq.n 0xc\n" @@ -738,8 +738,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f2a cmp r7, #42 ; 0x2a\n" " 6: d000 beq.n 0xa\n" " 8: 3602 adds r6, #2" @@ -757,8 +757,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f3b cmp r7, #59 ; 0x3b\n" " 6: d100 bne.n 0xa\n" " 8: 3602 adds r6, #2" @@ -776,8 +776,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f3b cmp r7, #59 ; 0x3b\n" " 6: d100 bne.n 0xa\n" " 8: 3602 adds r6, #2" @@ -795,8 +795,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f2a cmp r7, #42 ; 0x2a\n" " 6: d100 bne.n 0xa\n" " 8: 3602 adds r6, #2" @@ -814,8 +814,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f2a cmp r7, #42 ; 0x2a\n" " 6: d100 bne.n 0xa\n" " 8: 3602 adds r6, #2" @@ -833,8 +833,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 07fd lsls r5, r7, #31\n" " 6: d400 bmi.n 0xa\n" " 8: 3602 adds r6, #2" @@ -852,8 +852,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 07fd lsls r5, r7, #31\n" " 6: d400 bmi.n 0xa\n" " 8: 3602 adds r6, #2" @@ -871,8 +871,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 07fd lsls r5, r7, #31\n" " 6: d500 bpl.n 0xa\n" " 8: 3602 adds r6, #2" @@ -890,8 +890,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 07fd lsls r5, r7, #31\n" " 6: d500 bpl.n 0xa\n" " 8: 3602 adds r6, #2" @@ -909,8 +909,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 077d lsls r5, r7, #29\n" " 6: d000 beq.n 0xa\n" " 8: 3602 adds r6, #2" @@ -928,8 +928,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2505 movs r5, #5\n" " 6: 422f tst r7, r5\n" " 8: d000 beq.n 0xc\n" @@ -948,8 +948,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 077d lsls r5, r7, #29\n" " 6: d000 beq.n 0xa\n" " 8: 3602 adds r6, #2" @@ -967,8 +967,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 43fd mvns r5, r7\n" " 6: 072d lsls r5, r5, #28\n" " 8: d000 beq.n 0xc\n" @@ -987,8 +987,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 43ff mvns r7, r7\n" " 6: 073f lsls r7, r7, #28\n" " 8: d000 beq.n 0xc\n" @@ -1007,8 +1007,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 463d mov r5, r7\n" " 6: 243f movs r4, #63 ; 0x3f\n" " 8: 4025 ands r5, r4\n" @@ -1029,8 +1029,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 42b7 cmp r7, r6\n" " 6: da00 bge.n 0xa\n" " 8: 3602 adds r6, #2" @@ -1054,8 +1054,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 253f movs r5, #63 ; 0x3f\n" " 6: 402f ands r7, r5\n" " 8: 2f08 cmp r7, #8\n" @@ -1075,8 +1075,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f64 cmp r7, #100 ; 0x64\n" " 6: dd00 ble.n 0xa\n" " 8: 3602 adds r6, #2" @@ -1094,8 +1094,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f64 cmp r7, #100 ; 0x64\n" " 6: dd00 ble.n 0xa\n" " 8: 3602 adds r6, #2" @@ -1114,8 +1114,8 @@ if_block_test_() -> State2 = ?BACKEND:jump_to_offset(State1, 16#100), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2580 movs r5, #128 ; 0x80\n" " 6: 00ed lsls r5, r5, #3\n" " 8: 42af cmp r7, r5\n" @@ -1137,8 +1137,8 @@ if_block_test_() -> State2 = ?BACKEND:jump_to_offset(State1, 16#100), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2580 movs r5, #128 ; 0x80\n" " 6: 00ed lsls r5, r5, #3\n" " 8: 42af cmp r7, r5\n" @@ -1160,8 +1160,8 @@ if_block_test_() -> State2 = ?BACKEND:jump_to_offset(State1, 16#100), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 4d02 ldr r5, [pc, #8] ; (0x10)\n" " 6: 42af cmp r7, r5\n" " 8: dd00 ble.n 0xc\n" @@ -1185,8 +1185,8 @@ if_block_test_() -> State2 = ?BACKEND:jump_to_offset(State1, 16#100), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 4d02 ldr r5, [pc, #8] ; (0x10)\n" " 6: 42af cmp r7, r5\n" " 8: dd00 ble.n 0xc\n" @@ -1219,8 +1219,8 @@ bitwise_and_optimization_test_() -> ), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" - " 2: 6b46 ldr r6, [r0, #52] ; 0x34\n" + " 0: 6c47 ldr r7, [r0, #68]\n" + " 2: 6c86 ldr r6, [r0, #72]\n" " 4: 07bd lsls r5, r7, #30\n" " 6: d000 beq.n 0xa\n" " 8: 3602 adds r6, #2" @@ -1239,8 +1239,8 @@ bitwise_and_optimization_test_() -> ), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" - " 2: 6b46 ldr r6, [r0, #52] ; 0x34\n" + " 0: 6c47 ldr r7, [r0, #68]\n" + " 2: 6c86 ldr r6, [r0, #72]\n" " 4: 073d lsls r5, r7, #28\n" " 6: d000 beq.n 0xa\n" " 8: 3602 adds r6, #2" @@ -1259,8 +1259,8 @@ bitwise_and_optimization_test_() -> ), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" - " 2: 6b46 ldr r6, [r0, #52] ; 0x34\n" + " 0: 6c47 ldr r7, [r0, #68]\n" + " 2: 6c86 ldr r6, [r0, #72]\n" " 4: 06bd lsls r5, r7, #26\n" " 6: d000 beq.n 0xa\n" " 8: 3602 adds r6, #2" @@ -1279,8 +1279,8 @@ bitwise_and_optimization_test_() -> ), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" - " 2: 6b46 ldr r6, [r0, #52] ; 0x34\n" + " 0: 6c47 ldr r7, [r0, #68]\n" + " 2: 6c86 ldr r6, [r0, #72]\n" " 4: 2505 movs r5, #5\n" " 6: 422f tst r7, r5\n" " 8: d000 beq.n 0xc\n" @@ -1308,8 +1308,8 @@ if_else_block_test() -> Stream = ?BACKEND:stream(State3), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" " 4: 2f3b cmp r7, #59 ; 0x3b\n" " 6: d101 bne.n 0xc\n" " 8: 3602 adds r6, #2\n" @@ -1327,7 +1327,7 @@ shift_right_test_() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" " 2: 08ff lsrs r7, r7, #3" >>, ?assertStream(arm, Dump, Stream) @@ -1340,7 +1340,7 @@ shift_right_test_() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" " 2: 08fe lsrs r6, r7, #3" >>, ?assertStream(arm, Dump, Stream) @@ -1354,7 +1354,7 @@ shift_left_test() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" " 2: 00ff lsls r7, r7, #3" >>, ?assertStream(arm, Dump, Stream). @@ -1657,7 +1657,7 @@ call_bif_with_large_literal_integer_test() -> " 1e: 9500 str r5, [sp, #0]\n" " 20: 2100 movs r1, #0\n" " 22: 2201 movs r2, #1\n" - " 24: 6983 ldr r3, [r0, #24]\n" + " 24: 6ac3 ldr r3, [r0, #44]\n" " 26: 47b8 blx r7\n" " 28: 4607 mov r7, r0\n" " 2a: b002 add sp, #8\n" @@ -1672,7 +1672,7 @@ call_bif_with_large_literal_integer_test() -> " 3c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " 3e: 0000 movs r0, r0\n" " 40: e895 3b7f ldmia.w r5, {r0, r1, r2, r3, r4, r5, r6, r8, r9, fp, ip, sp}\n" - " 44: 6187 str r7, [r0, #24]" + " 44: 62c7 str r7, [r0, #44]" >>, ?assertStream(arm, Dump, Stream). @@ -1686,14 +1686,14 @@ get_list_test() -> ?BACKEND:assert_all_native_free(State5), Stream = ?BACKEND:stream(State5), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" " 2: 2603 movs r6, #3\n" " 4: 43b7 bics r7, r6\n" " 6: 687d ldr r5, [r7, #4]\n" - " 8: 6946 ldr r6, [r0, #20]\n" + " 8: 6a86 ldr r6, [r0, #40]\n" " a: 6075 str r5, [r6, #4]\n" " c: 683d ldr r5, [r7, #0]\n" - " e: 6946 ldr r6, [r0, #20]\n" + " e: 6a86 ldr r6, [r0, #40]\n" " 10: 6035 str r5, [r6, #0]" >>, ?assertStream(arm, Dump, Stream). @@ -1727,7 +1727,7 @@ is_integer_test() -> State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" " 2: 43fe mvns r6, r7\n" " 4: 0736 lsls r6, r6, #28\n" " 6: d015 beq.n 0x34\n" @@ -1791,7 +1791,7 @@ is_number_test() -> State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" " 2: 43fe mvns r6, r7\n" " 4: 0736 lsls r6, r6, #28\n" " 6: d01b beq.n 0x40\n" @@ -1854,7 +1854,7 @@ is_boolean_test() -> " 12: 46c0 nop\n" " 14: 00ed lsls r5, r5, #3\n" " 16: 0000 movs r0, r0\n" - " 18: 6987 ldr r7, [r0, #24]\n" + " 18: 6ac7 ldr r7, [r0, #44]\n" " 1a: 2f4b cmp r7, #75\n" " 1c: d006 beq.n 0x2c\n" " 1e: 2f0b cmp r7, #11\n" @@ -1895,7 +1895,7 @@ is_boolean_far_test() -> " 12: 46c0 nop\n" " 14: 0fed lsrs r5, r5, #31\n" " 16: 0000 movs r0, r0\n" - " 18: 6987 ldr r7, [r0, #24]\n" + " 18: 6ac7 ldr r7, [r0, #44]\n" " 1a: 2f4b cmp r7, #75\n" " 1c: d006 beq.n 0x2c\n" " 1e: 2f0b cmp r7, #11\n" @@ -1930,7 +1930,7 @@ is_boolean_far_unaligned_test() -> Stream = ?BACKEND:stream(State5), Dump = << " 0: 4770 bx lr\n" - " 2: 6987 ldr r7, [r0, #24]\n" + " 2: 6ac7 ldr r7, [r0, #44]\n" " 4: 2f4b cmp r7, #75 @ 0x4b\n" " 6: d007 beq.n 0x18\n" " 8: 2f0b cmp r7, #11\n" @@ -1972,7 +1972,7 @@ is_boolean_far_known_test() -> " 12: 46c0 nop\n" " 14: 0fed lsrs r5, r5, #31\n" " 16: 0000 movs r0, r0\n" - " 18: 6987 ldr r7, [r0, #24]\n" + " 18: 6ac7 ldr r7, [r0, #44]\n" " 1a: 2f4b cmp r7, #75\n" " 1c: d006 beq.n 0x2c\n" " 1e: 2f0b cmp r7, #11\n" @@ -2020,7 +2020,7 @@ is_boolean_far_known_unaligned_test() -> " 14: 46c0 nop\n" " 16: 0feb lsrs r3, r5, #31\n" " 18: 0000 movs r0, r0\n" - " 1a: 6987 ldr r7, [r0, #24]\n" + " 1a: 6ac7 ldr r7, [r0, #44]\n" " 1c: 2f4b cmp r7, #75\n" " 1e: d007 beq.n 0x30\n" " 20: 2f0b cmp r7, #11\n" @@ -2290,11 +2290,11 @@ gc_bif2_test() -> " c: bc05 pop {r0, r2}\n" " e: b405 push {r0, r2}\n" " 10: b082 sub sp, #8\n" - " 12: 6986 ldr r6, [r0, #24]\n" + " 12: 6ac6 ldr r6, [r0, #44]\n" " 14: 9600 str r6, [sp, #0]\n" " 16: 2100 movs r1, #0\n" " 18: 2203 movs r2, #3\n" - " 1a: 6946 ldr r6, [r0, #20]\n" + " 1a: 6a86 ldr r6, [r0, #40]\n" " 1c: 6833 ldr r3, [r6, #0]\n" " 1e: 47b8 blx r7\n" " 20: 4607 mov r7, r0\n" @@ -2357,7 +2357,7 @@ call_ext_test() -> " 24: 063f lsls r7, r7, #24\n" " 26: 4e07 ldr r6, [pc, #28] ; (0x44)\n" " 28: 4337 orrs r7, r6\n" - " 2a: 65c7 str r7, [r0, #92] ; 0x5c\n" + " 2a: 6707 str r7, [r0, #112]\n" " 2c: 6917 ldr r7, [r2, #16]\n" " 2e: b082 sub sp, #8\n" " 30: 2601 movs r6, #1\n" @@ -2421,7 +2421,7 @@ call_fun_test() -> " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " 1a: 46c0 nop ; (mov r8, r8)\n" " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 1e: 6987 ldr r7, [r0, #24]\n" + " 1e: 6ac7 ldr r7, [r0, #44]\n" " 20: 463e mov r6, r7\n" " 22: 4635 mov r5, r6\n" " 24: 2403 movs r4, #3\n" @@ -2462,7 +2462,7 @@ call_fun_test() -> " 6a: 0636 lsls r6, r6, #24\n" " 6c: 4d05 ldr r5, [pc, #20] ; (0x84)\n" " 6e: 432e orrs r6, r5\n" - " 70: 65c6 str r6, [r0, #92] ; 0x5c\n" + " 70: 6706 str r6, [r0, #112]\n" " 72: 2680 movs r6, #128 ; 0x80\n" " 74: 5996 ldr r6, [r2, r6]\n" " 76: 463a mov r2, r7\n" @@ -2486,7 +2486,7 @@ decrement_reductions_invalidates_cache_test() -> {State4, Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 0}), Stream = ?BACKEND:stream(State4), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44] @ 0x2c\n" " 2: 9e00 ldr r6, [sp, #0]\n" " 4: 68b7 ldr r7, [r6, #8]\n" " 6: 3f01 subs r7, #1\n" @@ -2501,7 +2501,7 @@ decrement_reductions_invalidates_cache_test() -> " 18: 46b6 mov lr, r6\n" " 1a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 1e: 6987 ldr r7, [r0, #24]" + " 1e: 6ac7 ldr r7, [r0, #44] @ 0x2c" >>, ?assertStream(arm, Dump, Stream). @@ -2521,14 +2521,14 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, 0}, << " 0: 2700 movs r7, #0\n" - " 2: 6187 str r7, [r0, #24]\n" + " 2: 62c7 str r7, [r0, #44]\n" " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, extra}, << " 0: 2700 movs r7, #0\n" - " 2: 6587 str r7, [r0, #88] ; 0x58\n" + " 2: 66c7 str r7, [r0, #108]\n" " 4: e07c b.n 0x100" >>) end), @@ -2542,7 +2542,7 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 2}, << " 0: 2600 movs r6, #0\n" - " 2: 6947 ldr r7, [r0, #20]\n" + " 2: 6a87 ldr r7, [r0, #40]\n" " 4: 60be str r6, [r7, #8]\n" " 6: e07b b.n 0x100" >>) @@ -2550,7 +2550,7 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 20}, << " 0: 2600 movs r6, #0\n" - " 2: 6947 ldr r7, [r0, #20]\n" + " 2: 6a87 ldr r7, [r0, #40]\n" " 4: 653e str r6, [r7, #80] ; 0x50\n" " 6: e07b b.n 0x100" >>) @@ -2559,21 +2559,21 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, 0}, << " 0: 272a movs r7, #42 ; 0x2a\n" - " 2: 6187 str r7, [r0, #24]\n" + " 2: 62c7 str r7, [r0, #44]\n" " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, extra}, << " 0: 272a movs r7, #42 ; 0x2a\n" - " 2: 6587 str r7, [r0, #88] ; 0x58\n" + " 2: 66c7 str r7, [r0, #108]\n" " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 2}, << " 0: 262a movs r6, #42 ; 0x2a\n" - " 2: 6947 ldr r7, [r0, #20]\n" + " 2: 6a87 ldr r7, [r0, #40]\n" " 4: 60be str r6, [r7, #8]\n" " 6: e07b b.n 0x100" >>) @@ -2581,7 +2581,7 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 20}, << " 0: 262a movs r6, #42 ; 0x2a\n" - " 2: 6947 ldr r7, [r0, #20]\n" + " 2: 6a87 ldr r7, [r0, #40]\n" " 4: 653e str r6, [r7, #80] ; 0x50\n" " 6: e07b b.n 0x100" >>) @@ -2597,15 +2597,15 @@ move_to_vm_register_test_() -> %% Test: x_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << - " 0: 69c7 ldr r7, [r0, #28]\n" - " 2: 6207 str r7, [r0, #32]\n" + " 0: 6b07 ldr r7, [r0, #48]\n" + " 2: 6347 str r7, [r0, #52]\n" " 4: e07c b.n 0x100" >>) end), %% Test: x_reg to ptr ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, r1}, << - " 0: 69c7 ldr r7, [r0, #28]\n" + " 0: 6b07 ldr r7, [r0, #48]\n" " 2: 600f str r7, [r1, #0]\n" " 4: e07c b.n 0x100" >>) @@ -2614,15 +2614,15 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, {ptr, r4}, {x_reg, 3}, << " 0: 6827 ldr r7, [r4, #0]\n" - " 2: 6247 str r7, [r0, #36] ; 0x24\n" + " 2: 6387 str r7, [r0, #56]\n" " 4: e07c b.n 0x100" >>) end), %% Test: x_reg to y_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 6946 ldr r6, [r0, #20]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6a86 ldr r6, [r0, #40]\n" " 4: 6077 str r7, [r6, #4]\n" " 6: e07b b.n 0x100" >>) @@ -2630,31 +2630,31 @@ move_to_vm_register_test_() -> %% Test: y_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << - " 0: 6946 ldr r6, [r0, #20]\n" + " 0: 6a86 ldr r6, [r0, #40]\n" " 2: 6837 ldr r7, [r6, #0]\n" - " 4: 6247 str r7, [r0, #36] ; 0x24\n" + " 4: 6387 str r7, [r0, #56]\n" " 6: e07b b.n 0x100" >>) end), %% Test: y_reg to y_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << - " 0: 6946 ldr r6, [r0, #20]\n" + " 0: 6a86 ldr r6, [r0, #40]\n" " 2: 6877 ldr r7, [r6, #4]\n" - " 4: 6247 str r7, [r0, #36] ; 0x24\n" + " 4: 6387 str r7, [r0, #56]\n" " 6: e07b b.n 0x100" >>) end), %% Test: Native register to x_reg ?_test(begin move_to_vm_register_test0(State0, r5, {x_reg, 0}, << - " 0: 6185 str r5, [r0, #24]\n" + " 0: 62c5 str r5, [r0, #44]\n" " 2: e07d b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, r6, {x_reg, extra}, << - " 0: 6586 str r6, [r0, #88] ; 0x58\n" + " 0: 66c6 str r6, [r0, #108]\n" " 2: e07d b.n 0x100" >>) end), @@ -2668,7 +2668,7 @@ move_to_vm_register_test_() -> %% Test: Native register to y_reg ?_test(begin move_to_vm_register_test0(State0, r1, {y_reg, 0}, << - " 0: 6947 ldr r7, [r0, #20]\n" + " 0: 6a87 ldr r7, [r0, #40]\n" " 2: 6039 str r1, [r7, #0]\n" " 4: e07c b.n 0x100" >>) @@ -2677,7 +2677,7 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, << " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" - " 2: 6187 str r7, [r0, #24]\n" + " 2: 62c7 str r7, [r0, #44]\n" " 4: e07c b.n 0x100\n" " 6: 0000 movs r0, r0\n" " 8: 5678 ldrsb r0, [r7, r1]\n" @@ -2695,7 +2695,7 @@ move_to_vm_register_test_() -> Dump = << " 0: 6019 str r1, [r3, #0]\n" " 2: 4f01 ldr r7, [pc, #4] ; (0x8)\n" - " 4: 6187 str r7, [r0, #24]\n" + " 4: 62c7 str r7, [r0, #44]\n" " 6: e07b b.n 0x100\n" " 8: 5678 ldrsb r0, [r7, r1]\n" " a: 1234 asrs r4, r6, #8" @@ -2705,7 +2705,7 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, << " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" - " 2: 6587 str r7, [r0, #88] ; 0x58\n" + " 2: 66c7 str r7, [r0, #108]\n" " 4: e07c b.n 0x100\n" " 6: 0000 movs r0, r0\n" " 8: 5678 ldrsb r0, [r7, r1]\n" @@ -2715,7 +2715,7 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, << " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" - " 2: 6946 ldr r6, [r0, #20]\n" + " 2: 6a86 ldr r6, [r0, #40]\n" " 4: 60b7 str r7, [r6, #8]\n" " 6: e07b b.n 0x100\n" " 8: 5678 ldrsb r0, [r7, r1]\n" @@ -2725,7 +2725,7 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, << " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" - " 2: 6946 ldr r6, [r0, #20]\n" + " 2: 6a86 ldr r6, [r0, #40]\n" " 4: 6537 str r7, [r6, #80] ; 0x50\n" " 6: e07b b.n 0x100\n" " 8: 5678 ldrsb r0, [r7, r1]\n" @@ -2746,8 +2746,8 @@ move_to_vm_register_test_() -> %% Test: x_reg to y_reg (high index) ?_test(begin move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << - " 0: 6d47 ldr r7, [r0, #84] ; 0x54\n" - " 2: 6946 ldr r6, [r0, #20]\n" + " 0: 6e87 ldr r7, [r0, #104]\n" + " 2: 6a86 ldr r6, [r0, #40]\n" " 4: 67f7 str r7, [r6, #124] ; 0x7c\n" " 6: e07b b.n 0x100" >>) @@ -2755,9 +2755,9 @@ move_to_vm_register_test_() -> %% Test: y_reg to x_reg (high index) ?_test(begin move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << - " 0: 6946 ldr r6, [r0, #20]\n" + " 0: 6a86 ldr r6, [r0, #40]\n" " 2: 6ff7 ldr r7, [r6, #124] ; 0x7c\n" - " 4: 6547 str r7, [r0, #84] ; 0x54\n" + " 4: 6687 str r7, [r0, #104]\n" " 6: e07b b.n 0x100" >>) end), @@ -2765,7 +2765,7 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 32}, << " 0: 262a movs r6, #42 ; 0x2a\n" - " 2: 6947 ldr r7, [r0, #20]\n" + " 2: 6a87 ldr r7, [r0, #40]\n" " 4: 2580 movs r5, #128 ; 0x80\n" " 6: 443d add r5, r7\n" " 8: 602e str r6, [r5, #0]\n" @@ -2777,7 +2777,7 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, -1, {x_reg, 0}, << " 0: 2701 movs r7, #1\n" " 2: 427f negs r7, r7\n" - " 4: 6187 str r7, [r0, #24]\n" + " 4: 62c7 str r7, [r0, #44]\n" " 6: e07b b.n 0x100" >>) end) @@ -2800,7 +2800,7 @@ move_array_element_test_() -> ?_test(begin move_array_element_test0(State0, r3, 2, {x_reg, 0}, << " 0: 689f ldr r7, [r3, #8]\n" - " 2: 6187 str r7, [r0, #24]" + " 2: 62c7 str r7, [r0, #44]" >>) end), %% move_array_element: reg[x] to ptr @@ -2814,7 +2814,7 @@ move_array_element_test_() -> ?_test(begin move_array_element_test0(State0, r3, 1, {y_reg, 2}, << " 0: 685e ldr r6, [r3, #4]\n" - " 2: 6947 ldr r7, [r0, #20]\n" + " 2: 6a87 ldr r7, [r0, #40]\n" " 4: 60be str r6, [r7, #8]" >>) end), @@ -2828,7 +2828,7 @@ move_array_element_test_() -> ?_test(begin move_array_element_test0(State0, r3, 7, {y_reg, 31}, << " 0: 69de ldr r6, [r3, #28]\n" - " 2: 6947 ldr r7, [r0, #20]\n" + " 2: 6a87 ldr r7, [r0, #40]\n" " 4: 67fe str r6, [r7, #124] ; 0x7c" >>) end), @@ -2836,7 +2836,7 @@ move_array_element_test_() -> ?_test(begin move_array_element_test0(State0, r3, 7, {x_reg, 15}, << " 0: 69df ldr r7, [r3, #28]\n" - " 2: 6547 str r7, [r0, #84] ; 0x54" + " 2: 6687 str r7, [r0, #104]" >>) end), %% move_array_element: reg_x[reg_y] to x_reg @@ -2846,7 +2846,7 @@ move_array_element_test_() -> " 0: 691f ldr r7, [r3, #16]\n" " 2: 00bf lsls r7, r7, #2\n" " 4: 59df ldr r7, [r3, r7]\n" - " 6: 6207 str r7, [r0, #32]" + " 6: 6347 str r7, [r0, #52]" >>) end), %% move_array_element: reg_x[reg_y] to pointer (large x reg) @@ -2866,7 +2866,7 @@ move_array_element_test_() -> " 0: 691f ldr r7, [r3, #16]\n" " 2: 00bf lsls r7, r7, #2\n" " 4: 59df ldr r7, [r3, r7]\n" - " 6: 6946 ldr r6, [r0, #20]\n" + " 6: 6a86 ldr r6, [r0, #40]\n" " 8: 67f7 str r7, [r6, #124] ; 0x7c" >>) end), @@ -2877,7 +2877,7 @@ move_array_element_test_() -> " 0: 691f ldr r7, [r3, #16]\n" " 2: 00bf lsls r7, r7, #2\n" " 4: 59df ldr r7, [r3, r7]\n" - " 6: 6946 ldr r6, [r0, #20]\n" + " 6: 6a86 ldr r6, [r0, #40] @ 0x28\n" " 8: 2580 movs r5, #128\t; 0x80\n" " a: 4435 add r5, r6\n" " c: 602f str r7, [r5, #0]" @@ -2887,9 +2887,9 @@ move_array_element_test_() -> ?_test(begin {State1, BaseReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), move_array_element_test0(State1, BaseReg, 2, {x_reg, 5}, << - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" " 2: 68be ldr r6, [r7, #8]\n" - " 4: 62c6 str r6, [r0, #44] ; 0x2c" + " 4: 6406 str r6, [r0, #64]" >>) end), %% move_array_element: reg[32] to x_reg (large offset, index 32, offset 128) @@ -2898,7 +2898,7 @@ move_array_element_test_() -> " 0: 2704 movs r7, #4\n" " 2: 441f add r7, r3\n" " 4: 6ffe ldr r6, [r7, #124] ; 0x7c\n" - " 6: 6186 str r6, [r0, #24]" + " 6: 62c6 str r6, [r0, #44]" >>) end), %% move_array_element: reg[32] to ptr (large offset) @@ -2916,7 +2916,7 @@ move_array_element_test_() -> " 0: 2604 movs r6, #4\n" " 2: 441e add r6, r3\n" " 4: 6ff6 ldr r6, [r6, #124] ; 0x7c\n" - " 6: 6947 ldr r7, [r0, #20]\n" + " 6: 6a87 ldr r7, [r0, #40]\n" " 8: 60be str r6, [r7, #8]" >>) end) @@ -2968,7 +2968,7 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, 2), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" " 2: 609f str r7, [r3, #8]" >>, ?assertStream(arm, Dump, Stream) @@ -2978,7 +2978,7 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, 32), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" " 2: 2604 movs r6, #4\n" " 4: 441e add r6, r3\n" " 6: 67f7 str r7, [r6, #124] ; 0x7c" @@ -2990,7 +2990,7 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, r4), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" " 2: 4626 mov r6, r4\n" " 4: 00b6 lsls r6, r6, #2\n" " 6: 519f str r7, [r3, r6]" @@ -3014,7 +3014,7 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, r3, r4), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6946 ldr r6, [r0, #20]\n" + " 0: 6a86 ldr r6, [r0, #40]\n" " 2: 68b7 ldr r7, [r6, #8]\n" " 4: 4626 mov r6, r4\n" " 6: 00b6 lsls r6, r6, #2\n" @@ -3027,7 +3027,7 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, 2, 1), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" " 2: 609f str r7, [r3, #8]" >>, ?assertStream(arm, Dump, Stream) @@ -3042,7 +3042,7 @@ move_to_array_element_test_() -> State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r3, r4, 1), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" " 2: 1c66 adds r6, r4, #1\n" " 4: 00b6 lsls r6, r6, #2\n" " 6: 519f str r7, [r3, r6]" @@ -3137,7 +3137,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(r7, Reg), Dump = << - " 0: 6a47 ldr r7, [r0, #36] ; 0x24" + " 0: 6b87 ldr r7, [r0, #56]" >>, ?assertStream(arm, Dump, Stream) end), @@ -3147,7 +3147,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(r7, Reg), Dump = << - " 0: 6946 ldr r6, [r0, #20]\n" + " 0: 6a86 ldr r6, [r0, #40]\n" " 2: 68f7 ldr r7, [r6, #12]" >>, ?assertStream(arm, Dump, Stream) @@ -3184,7 +3184,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, r3), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6a03 ldr r3, [r0, #32]" + " 0: 6b43 ldr r3, [r0, #52]" >>, ?assertStream(arm, Dump, Stream) end), @@ -3193,7 +3193,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, r1), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6947 ldr r7, [r0, #20]\n" + " 0: 6a87 ldr r7, [r0, #40]\n" " 2: 68b9 ldr r1, [r7, #8]" >>, ?assertStream(arm, Dump, Stream) @@ -3206,8 +3206,8 @@ move_to_native_register_test_() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 6e06 ldr r6, [r0, #96] ; 0x60\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6f46 ldr r6, [r0, #116]\n" " 4: 687d ldr r5, [r7, #4]\n" " 6: 61b5 str r5, [r6, #24]\n" " 8: 68bd ldr r5, [r7, #8]\n" @@ -3487,7 +3487,7 @@ set_args1_y_reg_test() -> " 2: 00bf lsls r7, r7, #2\n" " 4: 59d7 ldr r7, [r2, r7]\n" " 6: b405 push {r0, r2}\n" - " 8: 6946 ldr r6, [r0, #20]\n" + " 8: 6a86 ldr r6, [r0, #40]\n" " a: 6970 ldr r0, [r6, #20]\n" " c: 47b8 blx r7\n" " e: 4607 mov r7, r0\n" @@ -3503,7 +3503,7 @@ large_y_reg_read_test() -> Stream = ?BACKEND:stream(State1), % Expected: uses helper with temp register since offset 128 > 124 Dump = << - " 0: 6946 ldr r6, [r0, #20]\n" + " 0: 6a86 ldr r6, [r0, #40]\n" " 2: 2780 movs r7, #128 ; 0x80\n" " 4: 4437 add r7, r6\n" " 6: 683f ldr r7, [r7, #0]" @@ -3521,8 +3521,8 @@ large_y_reg_write_test() -> Stream = ?BACKEND:stream(State2), % Expected: uses helper with two temp registers since we have registers available Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 6946 ldr r6, [r0, #20]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6a86 ldr r6, [r0, #40]\n" " 4: 25a0 movs r5, #160 ; 0xa0\n" " 6: 4435 add r5, r6\n" " 8: 602f str r7, [r5, #0]" @@ -3543,12 +3543,12 @@ large_y_reg_read_register_exhaustion_test() -> Stream = ?BACKEND:stream(StateFinal), % Expected: uses IP_REG (r12) fallback sequence Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 6a05 ldr r5, [r0, #32]\n" - " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" - " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" - " a: 6941 ldr r1, [r0, #20]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" + " 4: 6b45 ldr r5, [r0, #52]\n" + " 6: 6b84 ldr r4, [r0, #56]\n" + " 8: 6bc3 ldr r3, [r0, #60]\n" + " a: 6a81 ldr r1, [r0, #40]\n" " c: 468c mov ip, r1\n" " e: 218c movs r1, #140 ; 0x8c\n" " 10: 4461 add r1, ip\n" @@ -3572,12 +3572,12 @@ large_y_reg_write_register_exhaustion_test() -> Stream = ?BACKEND:stream(StateFinal), % Expected: uses IP_REG (r12) fallback sequence Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 6a05 ldr r5, [r0, #32]\n" - " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" - " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" - " a: 6941 ldr r1, [r0, #20]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" + " 4: 6b45 ldr r5, [r0, #52]\n" + " 6: 6b84 ldr r4, [r0, #56]\n" + " 8: 6bc3 ldr r3, [r0, #60]\n" + " a: 6a81 ldr r1, [r0, #40]\n" " c: 468c mov ip, r1\n" " e: 21c8 movs r1, #200 ; 0xc8\n" " 10: 4461 add r1, ip\n" @@ -3592,7 +3592,7 @@ y_reg_boundary_direct_test() -> Stream = ?BACKEND:stream(State1), % Expected: uses direct addressing since 31 * 4 = 124 <= 124 Dump = << - " 0: 6946 ldr r6, [r0, #20]\n" + " 0: 6a86 ldr r6, [r0, #40]\n" " 2: 6ff7 ldr r7, [r6, #124] ; 0x7c" >>, ?assertStream(arm, Dump, Stream), @@ -3621,12 +3621,12 @@ and_register_exhaustion_negative_test() -> {StateResult, r7} = ?BACKEND:and_(StateNoRegs, {free, r7}, -4), Stream = ?BACKEND:stream(StateResult), ExpectedDump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 6a05 ldr r5, [r0, #32]\n" - " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" - " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" - " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" + " 4: 6b45 ldr r5, [r0, #52]\n" + " 6: 6b84 ldr r4, [r0, #56]\n" + " 8: 6bc3 ldr r3, [r0, #60]\n" + " a: 6c01 ldr r1, [r0, #64]\n" " c: 4684 mov ip, r0\n" " e: 2003 movs r0, #3\n" " 10: 4387 bics r7, r0\n" @@ -3647,12 +3647,12 @@ and_register_exhaustion_positive_test() -> {StateResult, r7} = ?BACKEND:and_(StateNoRegs, {free, r7}, 16#3F), Stream = ?BACKEND:stream(StateResult), ExpectedDump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 6a05 ldr r5, [r0, #32]\n" - " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" - " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" - " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" + " 4: 6b45 ldr r5, [r0, #52]\n" + " 6: 6b84 ldr r4, [r0, #56]\n" + " 8: 6bc3 ldr r3, [r0, #60]\n" + " a: 6c01 ldr r1, [r0, #64]\n" " c: 4684 mov ip, r0\n" " e: 203f movs r0, #63 ; 0x3f\n" " 10: 4007 ands r7, r0\n" @@ -3735,9 +3735,9 @@ call_func_ptr_stack_alignment_test() -> Stream = ?BACKEND:stream(State4), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 6a05 ldr r5, [r0, #32]\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" + " 4: 6b45 ldr r5, [r0, #52]\n" " 6: b4ed push {r0, r2, r3, r5, r6, r7}\n" " 8: 202a movs r0, #42 ; 0x2a\n" " a: 4798 blx r3\n" @@ -3774,12 +3774,12 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 6a05 ldr r5, [r0, #32]\n" - " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" - " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" - " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" + " 4: 6b45 ldr r5, [r0, #52]\n" + " 6: 6b84 ldr r4, [r0, #56]\n" + " 8: 6bc3 ldr r3, [r0, #60]\n" + " a: 6c01 ldr r1, [r0, #64]\n" " c: b4b7 push {r0, r1, r2, r4, r5, r7}\n" " e: b082 sub sp, #8\n" " 10: 2701 movs r7, #1\n" @@ -3803,12 +3803,12 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 6a05 ldr r5, [r0, #32]\n" - " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" - " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" - " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" + " 4: 6b45 ldr r5, [r0, #52]\n" + " 6: 6b84 ldr r4, [r0, #56]\n" + " 8: 6bc3 ldr r3, [r0, #60]\n" + " a: 6c01 ldr r1, [r0, #64]\n" " c: b4b7 push {r0, r1, r2, r4, r5, r7}\n" " e: b082 sub sp, #8\n" " 10: 9100 str r1, [sp, #0]\n" @@ -3831,12 +3831,12 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 6a05 ldr r5, [r0, #32]\n" - " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" - " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" - " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" + " 4: 6b45 ldr r5, [r0, #52]\n" + " 6: 6b84 ldr r4, [r0, #56]\n" + " 8: 6bc3 ldr r3, [r0, #60]\n" + " a: 6c01 ldr r1, [r0, #64]\n" " c: b4b7 push {r0, r1, r2, r4, r5, r7}\n" " e: b082 sub sp, #8\n" " 10: 2701 movs r7, #1\n" @@ -3862,12 +3862,12 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 6a05 ldr r5, [r0, #32]\n" - " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" - " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" - " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" + " 4: 6b45 ldr r5, [r0, #52]\n" + " 6: 6b84 ldr r4, [r0, #56]\n" + " 8: 6bc3 ldr r3, [r0, #60]\n" + " a: 6c01 ldr r1, [r0, #64]\n" " c: b4ff push {r0, r1, r2, r3, r4, r5, r6, r7}\n" " e: 460f mov r7, r1\n" " 10: 4630 mov r0, r6\n" @@ -3888,12 +3888,12 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 6a05 ldr r5, [r0, #32]\n" - " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" - " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" - " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " 0: 6ac7 ldr r7, [r0, #44]\n" + " 2: 6b06 ldr r6, [r0, #48]\n" + " 4: 6b45 ldr r5, [r0, #52]\n" + " 6: 6b84 ldr r4, [r0, #56]\n" + " 8: 6bc3 ldr r3, [r0, #60]\n" + " a: 6c01 ldr r1, [r0, #64]\n" " c: b4ff push {r0, r1, r2, r3, r4, r5, r6, r7}\n" " e: 6897 ldr r7, [r2, #8]\n" " 10: 4630 mov r0, r6\n" @@ -3984,10 +3984,10 @@ add_beam_test() -> % label 1 % {move,{integer,9},{x,1}}. " 30: 279f movs r7, #159 ; 0x9f\n" - " 32: 61c7 str r7, [r0, #28]\n" + " 32: 6307 str r7, [r0, #48]\n" % {move,{integer,8},{x,0}} " 34: 278f movs r7, #143 ; 0x8f\n" - " 36: 6187 str r7, [r0, #24]\n" + " 36: 62c7 str r7, [r0, #44]\n" % {call_only,2,{f,2}}. " 38: 9e00 ldr r6, [sp, #0]\n" " 3a: 68b7 ldr r7, [r6, #8]\n" @@ -4036,7 +4036,7 @@ add_beam_test() -> % {init_yregs,{list,[{y,0}]}}. %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), " 8c: 263b movs r6, #59 ; 0x3b\n" - " 8e: 6947 ldr r7, [r0, #20]\n" + " 8e: 6a87 ldr r7, [r0, #40]\n" " 90: 603e str r6, [r7, #0]\n" % {call,1,{f,3}} %% call_or_schedule_next(State9, 3), @@ -4046,7 +4046,7 @@ add_beam_test() -> " 98: 063f lsls r7, r7, #24\n" " 9a: 4e0c ldr r6, [pc, #48] ; (0xcc)\n" " 9c: 4337 orrs r7, r6\n" - " 9e: 65c7 str r7, [r0, #92] ; 0x5c\n" + " 9e: 6707 str r7, [r0, #112]\n" " a0: 9e00 ldr r6, [sp, #0]\n" " a2: 68b7 ldr r7, [r6, #8]\n" " a4: 3f01 subs r7, #1\n" @@ -4099,7 +4099,7 @@ cached_load_after_free_test() -> Stream = ?BACKEND:stream(State3), Dump = << - " 0: 6987 ldr r7, [r0, #24]" + " 0: 6ac7 ldr r7, [r0, #44] @ 0x2c" >>, ?assertStream(arm, Dump, Stream). @@ -4112,7 +4112,7 @@ fixed_dst_x_reg_load_preserves_cache_test() -> ?assertEqual(Offset1, ?BACKEND:offset(State2)), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6a03 ldr r3, [r0, #32]" + " 0: 6b43 ldr r3, [r0, #52] @ 0x34" >>, ?assertStream(arm, Dump, Stream). @@ -4125,7 +4125,7 @@ fixed_dst_y_reg_load_preserves_cache_test() -> ?assertEqual(Offset1, ?BACKEND:offset(State2)), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6947 ldr r7, [r0, #20]\n" + " 0: 6a87 ldr r7, [r0, #40] @ 0x28\n" " 2: 68b9 ldr r1, [r7, #8]" >>, ?assertStream(arm, Dump, Stream). @@ -4146,11 +4146,11 @@ and_negative_imm_invalidates_temp_cache_test() -> Stream = ?BACKEND:stream(State5), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44] @ 0x2c\n" + " 2: 6b06 ldr r6, [r0, #48] @ 0x30\n" " 4: 2603 movs r6, #3\n" " 6: 43b7 bics r7, r6\n" - " 8: 69c6 ldr r6, [r0, #28]" + " 8: 6b06 ldr r6, [r0, #48] @ 0x30" >>, ?assertStream(arm, Dump, Stream). @@ -4165,11 +4165,11 @@ and_positive_imm_invalidates_temp_cache_test() -> Stream = ?BACKEND:stream(State5), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44] @ 0x2c\n" + " 2: 6b06 ldr r6, [r0, #48] @ 0x30\n" " 4: 263f movs r6, #63 ; 0x3f\n" " 6: 4037 ands r7, r6\n" - " 8: 69c6 ldr r6, [r0, #28]" + " 8: 6b06 ldr r6, [r0, #48] @ 0x30" >>, ?assertStream(arm, Dump, Stream). @@ -4189,13 +4189,13 @@ if_block_cond_free_reg_invalidates_cache_test() -> Stream = ?BACKEND:stream(State4), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" + " 0: 6ac7 ldr r7, [r0, #44] @ 0x2c\n" + " 2: 6b06 ldr r6, [r0, #48] @ 0x30\n" " 4: 43ff mvns r7, r7\n" " 6: 073f lsls r7, r7, #28\n" " 8: d000 beq.n 0xc\n" " a: 3602 adds r6, #2\n" - " c: 6987 ldr r7, [r0, #24]" + " c: 6ac7 ldr r7, [r0, #44] @ 0x2c" >>, ?assertStream(arm, Dump, Stream). @@ -4211,10 +4211,10 @@ jump_to_label_invalidates_cache_test() -> Stream = ?BACKEND:stream(State4), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" + " 0: 6ac7 ldr r7, [r0, #44] @ 0x2c\n" " 2: ffff ffff ; instruction: 0xffffffff\n" " 6: ffff ffff ; instruction: 0xffffffff\n" - " a: ffff 6987 vtbl.8 d22, {d31->, ?assertStream(arm, Dump, Stream). @@ -4245,7 +4245,7 @@ call_primitive_last_if_block_preserves_cache_test() -> Stream = ?BACKEND:stream(State0), Dump = << " 0: 2701 movs r7, #1\n" - " 2: 6986 ldr r6, [r0, #24]\n" + " 2: 6ac6 ldr r6, [r0, #44] @ 0x2c\n" " 4: 2f00 cmp r7, #0\n" " 6: d104 bne.n 0x12\n" " 8: 6817 ldr r7, [r2, #0]\n" @@ -4263,7 +4263,7 @@ jump_to_label_if_block_preserves_cache_test() -> Stream = ?BACKEND:stream(State0), Dump = << " 0: 2701 movs r7, #1\n" - " 2: 6986 ldr r6, [r0, #24]\n" + " 2: 6ac6 ldr r6, [r0, #44] @ 0x2c\n" " 4: 2f00 cmp r7, #0\n" " 6: d105 bne.n 0x14\n" " 8: ffff ffff @ instruction: 0xffffffff\n" @@ -4279,7 +4279,7 @@ jump_to_offset_if_block_preserves_cache_test() -> Stream = ?BACKEND:stream(State0), Dump = << " 0: 2701 movs r7, #1\n" - " 2: 6986 ldr r6, [r0, #24]\n" + " 2: 6ac6 ldr r6, [r0, #44] @ 0x2c\n" " 4: 2f00 cmp r7, #0\n" " 6: d100 bne.n 0xa\n" " 8: e07a b.n 0x100" @@ -4302,7 +4302,7 @@ jump_to_continuation_if_block_preserves_cache_test() -> " 0: 27ff movs r7, #255 @ 0xff\n" " 2: 3701 adds r7, #1\n" " 4: 2601 movs r6, #1\n" - " 6: 6985 ldr r5, [r0, #24]\n" + " 6: 6ac5 ldr r5, [r0, #44] @ 0x2c\n" " 8: 2e00 cmp r6, #0\n" " a: d108 bne.n 0x1e\n" " c: a600 add r6, pc, #0 @ (adr r6, 0x10)\n" @@ -4327,10 +4327,10 @@ move_array_element_x_reg_invalidates_vm_loc_cache_test() -> Stream = ?BACKEND:stream(S4), Dump = << - " 0: 6ac7 ldr r7, [r0, #44] ; 0x2c\n" - " 2: 6986 ldr r6, [r0, #24]\n" + " 0: 6c07 ldr r7, [r0, #44] ; 0x2c\n" + " 2: 6ac6 ldr r6, [r0, #44] @ 0x2c\n" " 4: 6835 ldr r5, [r6, #0]\n" - " 6: 62c5 str r5, [r0, #44] ; 0x2c" + " 6: 6405 str r5, [r0, #64] ; 0x40" >>, ?assertStream(arm, Dump, Stream). @@ -4350,12 +4350,12 @@ ldr_y_reg_invalidates_hidden_temp_cache_test() -> Stream = ?BACKEND:stream(State6), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 6a05 ldr r5, [r0, #32]\n" - " 6: 6945 ldr r5, [r0, #20]\n" + " 0: 6ac7 ldr r7, [r0, #44] @ 0x2c\n" + " 2: 6b06 ldr r6, [r0, #48] @ 0x30\n" + " 4: 6b45 ldr r5, [r0, #52] @ 0x34\n" + " 6: 6a85 ldr r5, [r0, #40] @ 0x28\n" " 8: 682e ldr r6, [r5, #0]\n" - " a: 6a05 ldr r5, [r0, #32]" + " a: 6b45 ldr r5, [r0, #52] @ 0x34" >>, ?assertStream(arm, Dump, Stream). @@ -4367,8 +4367,8 @@ cached_move_to_vm_x_reg_reuse_test() -> ?assertEqual(Offset1, ?BACKEND:offset(State2)), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 69c7 ldr r7, [r0, #28]\n" - " 2: 6187 str r7, [r0, #24]" + " 0: 6b07 ldr r7, [r0, #48] @ 0x30\n" + " 2: 62c7 str r7, [r0, #44] @ 0x2c" >>, ?assertStream(arm, Dump, Stream). @@ -4380,9 +4380,9 @@ cached_move_to_vm_y_reg_reuse_test() -> ?assertEqual(Offset1, ?BACKEND:offset(State2)), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6946 ldr r6, [r0, #20]\n" + " 0: 6a86 ldr r6, [r0, #40] @ 0x28\n" " 2: 6837 ldr r7, [r6, #0]\n" - " 4: 6187 str r7, [r0, #24]" + " 4: 62c7 str r7, [r0, #44] @ 0x2c" >>, ?assertStream(arm, Dump, Stream). @@ -4394,8 +4394,8 @@ cached_move_to_vm_imm_reuse_test() -> ?assertEqual(Offset1, ?BACKEND:offset(State2)), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 272a movs r7, #42\n" - " 2: 6187 str r7, [r0, #24]" + " 0: 272a movs r7, #42 @ 0x2a\n" + " 2: 62c7 str r7, [r0, #44] @ 0x2c" >>, ?assertStream(arm, Dump, Stream). @@ -4414,12 +4414,12 @@ y_reg_load_last_available_register_test() -> Stream = ?BACKEND:stream(State6), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 6a05 ldr r5, [r0, #32]\n" - " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" - " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" - " a: 6941 ldr r1, [r0, #20]\n" + " 0: 6ac7 ldr r7, [r0, #44] @ 0x2c\n" + " 2: 6b06 ldr r6, [r0, #48] @ 0x30\n" + " 4: 6b45 ldr r5, [r0, #52] @ 0x34\n" + " 6: 6b84 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6bc3 ldr r3, [r0, #40] ; 0x28\n" + " a: 6a81 ldr r1, [r0, #40] @ 0x28\n" " c: 6809 ldr r1, [r1, #0]" >>, ?assertStream(arm, Dump, Stream). diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl index 2d94aa7def..ae7a632df2 100644 --- a/tests/libs/jit/jit_riscv32_tests.erl +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -133,9 +133,9 @@ call_primitive_6_args_test() -> Stream = ?BACKEND:stream(State4), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" + " 0: 02c52f83 lw t6,44(a0)\n" " 4: ffcfff93 andi t6,t6,-4\n" - " 8: 01c52f03 lw t5,28(a0)\n" + " 8: 03052f03 lw t5,48(a0)\n" " c: 0b800e93 li t4,184\n" " 10: 9eb2 add t4,t4,a2\n" " 12: 000eae83 lw t4,0(t4)\n" @@ -321,7 +321,7 @@ call_primitive_last_5_args_test() -> ]), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" " 4: 04c62f03 lw t5,76(a2)\n" " 8: 4621 li a2,8\n" " a: 2cb00693 li a3,715\n" @@ -446,9 +446,9 @@ move_to_cp_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01452f03 lw t5,20(a0)\n" + " 0: 02852f03 lw t5,20(a0)\n" " 4: 000f2f83 lw t6,0(t5)\n" - " 8: 05f52e23 sw t6,92(a0)" + " 8: 07f52823 sw t6,92(a0)" >>, ?assertStream(riscv32, Dump, Stream). @@ -458,9 +458,9 @@ increment_sp_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01452f83 lw t6,20(a0)\n" + " 0: 02852f83 lw t6,20(a0)\n" " 4: 0ff1 addi t6,t6,28\n" - " 6: 01f52a23 sw t6,20(a0)" + " 6: 03f52423 sw t6,20(a0)" >>, ?assertStream(riscv32, Dump, Stream). @@ -484,8 +484,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 000fd363 bgez t6,0xe\n" " c: 0f09 addi t5,t5,2" >>, @@ -502,8 +502,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 01efd363 bge t6,t5,0xe\n" " c: 0f09 addi t5,t5,2" >>, @@ -520,8 +520,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 02a00e93 li t4,42\n" " c: 01dfd363 bge t6,t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -540,8 +540,8 @@ if_block_test_() -> State2 = ?BACKEND:jump_to_offset(State1, 16#100), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 40000e93 li t4,1024\n" " c: 01dfd363 bge t6,t4,0x12\n" " 10: 0f09 addi t5,t5,2\n" @@ -560,8 +560,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 000f9363 bnez t6,0xe\n" " c: 0f09 addi t5,t5,2" >>, @@ -578,8 +578,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 000f9363 bnez t6,0xe\n" " c: 0f09 addi t5,t5,2" >>, @@ -596,8 +596,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 5efd li t4,-1\n" " a: 01df9363 bne t6,t4,0x10\n" " e: 0f09 addi t5,t5,2" @@ -615,8 +615,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 000f9363 bnez t6,0xe\n" " c: 0f09 addi t5,t5,2" >>, @@ -633,8 +633,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 000f9363 bnez t6,0xe\n" " c: 0f09 addi t5,t5,2" >>, @@ -651,8 +651,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 03b00e93 li t4,59\n" " c: 01df8363 beq t6,t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -670,8 +670,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 03b00e93 li t4,59\n" " c: 01df8363 beq t6,t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -689,8 +689,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 02a00e93 li t4,42\n" " c: 01df8363 beq t6,t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -710,8 +710,8 @@ if_block_test_() -> State2 = ?BACKEND:jump_to_offset(State1, 16#100), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 7cb00e93 li t4,1995\n" " c: 01df8363 beq t6,t4,0x12\n" " 10: 0f05 addi t5,t5,1\n" @@ -729,8 +729,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 02a00e93 li t4,42\n" " c: 01df8363 beq t6,t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -748,8 +748,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 03b00e93 li t4,59\n" " c: 01df9363 bne t6,t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -767,8 +767,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 03b00e93 li t4,59\n" " c: 01df9363 bne t6,t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -786,8 +786,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 02a00e93 li t4,42\n" " c: 01df9363 bne t6,t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -805,8 +805,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 02a00e93 li t4,42\n" " c: 01df9363 bne t6,t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -824,8 +824,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 01ff9e93 slli t4,t6,0x1f\n" " c: 000ec363 bltz t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -843,8 +843,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 01ff9e93 slli t4,t6,0x1f\n" " c: 000ec363 bltz t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -862,8 +862,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 01ff9e93 slli t4,t6,0x1f\n" " c: 000ed363 bgez t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -881,8 +881,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 01ff9e93 slli t4,t6,0x1f\n" " c: 000ed363 bgez t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -900,8 +900,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 007ffe93 andi t4,t6,7\n" " c: 000e8363 beqz t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -919,8 +919,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 005ffe93 andi t4,t6,5\n" " c: 000e8363 beqz t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -938,8 +938,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 007ffe93 andi t4,t6,7\n" " c: 000e8363 beqz t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -957,8 +957,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: ffffce93 not t4,t6\n" " c: 0ef2 slli t4,t4,0x1c\n" " e: 000e8363 beqz t4,0x14\n" @@ -977,8 +977,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: ffffcf93 not t6,t6\n" " c: 0ff2 slli t6,t6,0x1c\n" " e: 000f8363 beqz t6,0x14\n" @@ -997,8 +997,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 8efe mv t4,t6\n" " a: 03fefe93 andi t4,t4,63\n" " e: 4e21 li t3,8\n" @@ -1018,8 +1018,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 01efd363 bge t6,t5,0xe\n" " c: 0f09 addi t5,t5,2" >>, @@ -1042,8 +1042,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,48(a0)\n" " 8: 03ffff93 andi t6,t6,63\n" " c: 4ea1 li t4,8\n" " e: 01df8363 beq t6,t4,0x14\n" @@ -1063,8 +1063,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" " 8: 003ffe93 andi t4,t6,3\n" " c: 000e8363 beqz t4,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -1082,8 +1082,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,44(a0)\n" + " 4: 03052f03 lw t5,48(a0)\n" " 8: 06400e93 li t4,100\n" " c: 01fed363 bge t4,t6,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -1101,8 +1101,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,44(a0)\n" + " 4: 03052f03 lw t5,48(a0)\n" " 8: 06400e93 li t4,100\n" " c: 01fed363 bge t4,t6,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -1120,8 +1120,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,44(a0)\n" + " 4: 03052f03 lw t5,48(a0)\n" " 8: 40000e93 li t4,1024\n" " c: 01fed363 bge t4,t6,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -1139,8 +1139,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,44(a0)\n" + " 4: 03052f03 lw t5,48(a0)\n" " 8: 40000e93 li t4,1024\n" " c: 01fed363 bge t4,t6,0x12\n" " 10: 0f09 addi t5,t5,2" @@ -1168,8 +1168,8 @@ if_else_block_test() -> Stream = ?BACKEND:stream(State3), Dump = << - "0: 01852f83 lw t6,24(a0)\n" - "4: 01c52f03 lw t5,28(a0)\n" + "0: 02c52f83 lw t6,24(a0)\n" + "4: 03052f03 lw t5,28(a0)\n" "8: 03b00e93 li t4,59\n" "c: 01df9463 bne t6,t4,0x14\n" "10: 0f09 addi t5,t5,2\n" @@ -1187,7 +1187,7 @@ shift_right_test_() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" " 4: 003fdf93 srli t6,t6,0x3" >>, ?assertStream(riscv32, Dump, Stream) @@ -1200,7 +1200,7 @@ shift_right_test_() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" " 4: 003fdf13 srli t5,t6,0x3" >>, ?assertStream(riscv32, Dump, Stream) @@ -1214,7 +1214,7 @@ shift_left_test() -> Stream = ?BACKEND:stream(State2), Dump = << - "0: 01852f83 lw t6,24(a0)\n" + "0: 02c52f83 lw t6,24(a0)\n" "4: 0f8e slli t6,t6,0x3" >>, ?assertStream(riscv32, Dump, Stream). @@ -1397,7 +1397,7 @@ call_bif_with_large_literal_integer_test() -> " 52: c632 sw a2,12(sp)\n" " 54: 4581 li a1,0\n" " 56: 4605 li a2,1\n" - " 58: 4d14 lw a3,24(a0)\n" + " 58: 5554 lw a3,24(a0)\n" " 5a: 877a mv a4,t5\n" " 5c: 9f82 jalr t6\n" " 5e: 8faa mv t6,a0\n" @@ -1410,7 +1410,7 @@ call_bif_with_large_literal_integer_test() -> " 6e: 01862f83 lw t6,24(a2)\n" " 72: 07200613 li a2,114\n" " 76: 8f82 jr t6\n" - " 78: 01f52c23 sw t6,24(a0)" + " 78: 03f52623 sw t6,24(a0)" >>, ?assertStream(riscv32, Dump, Stream). @@ -1425,13 +1425,13 @@ get_list_test() -> Stream = ?BACKEND:stream(State5), Dump = << - "0: 01852f83 lw t6,24(a0)\n" + "0: 02c52f83 lw t6,44(a0)\n" "4: ffcfff93 andi t6,t6,-4\n" "8: 004fae83 lw t4,4(t6)\n" - "c: 01452f03 lw t5,20(a0)\n" + "c: 02852f03 lw t5,40(a0)\n" "10: 01df2223 sw t4,4(t5)\n" "14: 000fae83 lw t4,0(t6)\n" - "18: 01452f03 lw t5,20(a0)\n" + "18: 02852f03 lw t5,40(a0)\n" "1c: 01df2023 sw t4,0(t5)" >>, ?assertStream(riscv32, Dump, Stream). @@ -1473,7 +1473,7 @@ is_integer_test() -> " 6: ffff .insn 2, 0xffff\n" " 8: 00000697 auipc a3,0x0\n" " c: 0f868067 jr 248(a3) # 0x100\n" - " 10: 01852f83 lw t6,24(a0)\n" + " 10: 02c52f83 lw t6,24(a0)\n" " 14: ffffcf13 not t5,t6\n" " 18: 0f72 slli t5,t5,0x1c\n" " 1a: 020f0963 beqz t5,0x4c\n" @@ -1538,7 +1538,7 @@ is_number_test() -> " 6: ffff .insn 2, 0xffff\n" " 8: 00000697 auipc a3,0x0\n" " c: 0f868067 jr 248(a3) # 0x100\n" - " 10: 01852f83 lw t6,24(a0)\n" + " 10: 02c52f83 lw t6,24(a0)\n" " 14: ffffcf13 not t5,t6\n" " 18: 0f72 slli t5,t5,0x1c\n" " 1a: 020f0f63 beqz t5,0x58\n" @@ -1586,7 +1586,7 @@ is_boolean_test() -> " 6: ffff .insn 2, 0xffff\n" " 8: 00000697 auipc a3,0x0\n" " c: 0f868067 jr 248(a3) # 0x100\n" - " 10: 01852f83 lw t6,24(a0)\n" + " 10: 02c52f83 lw t6,24(a0)\n" " 14: 04b00f13 li t5,75\n" " 18: 01ef8963 beq t6,t5,0x2a\n" " 1c: 4f2d li t5,11\n" @@ -1620,7 +1620,7 @@ is_boolean_far_test() -> " 6: ffff .insn 2, 0xffff\n" " 8: 00001697 auipc a3,0x1\n" " c: ff868067 jr -8(a3) # 0x1000\n" - " 10: 01852f83 lw t6,24(a0)\n" + " 10: 02c52f83 lw t6,24(a0)\n" " 14: 04b00f13 li t5,75\n" " 18: 01ef8963 beq t6,t5,0x2a\n" " 1c: 4f2d li t5,11\n" @@ -1653,7 +1653,7 @@ is_boolean_far_known_test() -> " 6: ffff .insn 2, 0xffff\n" " 8: 00001697 auipc a3,0x1\n" " c: ff868067 jr -8(a3) # 0x1000\n" - " 10: 01852f83 lw t6,24(a0)\n" + " 10: 02c52f83 lw t6,24(a0)\n" " 14: 04b00f13 li t5,75\n" " 18: 01ef8963 beq t6,t5,0x2a\n" " 1c: 4f2d li t5,11\n" @@ -1909,9 +1909,9 @@ gc_bif2_test() -> " 2a: c632 sw a2,12(sp)\n" " 2c: 4581 li a1,0\n" " 2e: 460d li a2,3\n" - " 30: 01452f03 lw t5,20(a0)\n" + " 30: 02852f03 lw t5,20(a0)\n" " 34: 000f2683 lw a3,0(t5)\n" - " 38: 4d18 lw a4,24(a0)\n" + " 38: 5558 lw a4,24(a0)\n" " 3a: 9f82 jalr t6\n" " 3c: 8faa mv t6,a0\n" " 3e: 4082 lw ra,0(sp)\n" @@ -1978,7 +1978,7 @@ call_ext_test() -> " 2a: 11800f93 li t6,280\n" " 2e: 00000013 nop\n" " 32: 01ff6f33 or t5,t5,t6\n" - " 36: 05e52e23 sw t5,92(a0)\n" + " 36: 07e52823 sw t5,92(a0)\n" " 3a: 01062f83 lw t6,16(a2)\n" " 3e: 4609 li a2,2\n" " 40: 4695 li a3,5\n" @@ -2028,7 +2028,7 @@ call_fun_test() -> " 16: 01f5a223 sw t6,4(a1)\n" " 1a: 00862f83 lw t6,8(a2)\n" " 1e: 8f82 jr t6\n" - " 20: 01852f83 lw t6,24(a0)\n" + " 20: 02c52f83 lw t6,24(a0)\n" " 24: 8f7e mv t5,t6\n" " 26: 8efa mv t4,t5\n" " 28: 003efe93 andi t4,t4,3\n" @@ -2056,7 +2056,7 @@ call_fun_test() -> " 70: 24000f13 li t5,576\n" " 74: 00000013 nop\n" " 78: 01eeeeb3 or t4,t4,t5\n" - " 7c: 05d52e23 sw t4,92(a0)\n" + " 7c: 07d52823 sw t4,112(a0)\n" " 80: 08000f13 li t5,128\n" " 84: 9f32 add t5,t5,a2\n" " 86: 000f2f03 lw t5,0(t5)\n" @@ -2074,7 +2074,7 @@ decrement_reductions_invalidates_cache_test() -> {State4, Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 0}), Stream = ?BACKEND:stream(State4), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" + " 0: 02c52f83 lw t6,44(a0)\n" " 4: 0085af83 lw t6,8(a1)\n" " 8: 1ffd addi t6,t6,-1\n" " a: 01f5a423 sw t6,8(a1)\n" @@ -2085,7 +2085,7 @@ decrement_reductions_invalidates_cache_test() -> " 1a: 01f5a223 sw t6,4(a1)\n" " 1e: 00862f83 lw t6,8(a2)\n" " 22: 8f82 jr t6\n" - " 24: 01852f83 lw t6,24(a0)" + " 24: 02c52f83 lw t6,44(a0)" >>, ?assertStream(riscv32, Dump, Stream). @@ -2105,14 +2105,14 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, 0}, << " 0: 4f81 li t6,0\n" - " 2: 01f52c23 sw t6,24(a0)\n" + " 2: 03f52623 sw t6,24(a0)\n" " 6: a8ed j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, extra}, << " 0: 4f81 li t6,0\n" - " 2: 05f52c23 sw t6,88(a0)\n" + " 2: 07f52623 sw t6,88(a0)\n" " 6: a8ed j 0x100" >>) end), @@ -2126,7 +2126,7 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 2}, << " 0: 4f01 li t5,0\n" - " 2: 01452f83 lw t6,20(a0)\n" + " 2: 02852f83 lw t6,20(a0)\n" " 6: 01efa423 sw t5,8(t6)\n" " a: a8dd j 0x100" >>) @@ -2134,7 +2134,7 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 20}, << " 0: 4f01 li t5,0\n" - " 2: 01452f83 lw t6,20(a0)\n" + " 2: 02852f83 lw t6,20(a0)\n" " 6: 05efa823 sw t5,80(t6)\n" " a: a8dd j 0x100" >>) @@ -2143,21 +2143,21 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, 0}, << " 0: 02a00f93 li t6,42\n" - " 4: 01f52c23 sw t6,24(a0)\n" + " 4: 03f52623 sw t6,24(a0)\n" " 8: a8e5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, extra}, << " 0: 02a00f93 li t6,42\n" - " 4: 05f52c23 sw t6,88(a0)\n" + " 4: 07f52623 sw t6,88(a0)\n" " 8: a8e5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 2}, << " 0: 02a00f13 li t5,42\n" - " 4: 01452f83 lw t6,20(a0)\n" + " 4: 02852f83 lw t6,20(a0)\n" " 8: 01efa423 sw t5,8(t6)\n" " c: a8d5 j 0x100" >>) @@ -2165,7 +2165,7 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 20}, << " 0: 02a00f13 li t5,42\n" - " 4: 01452f83 lw t6,20(a0)\n" + " 4: 02852f83 lw t6,20(a0)\n" " 8: 05efa823 sw t5,80(t6)\n" " c: a8d5 j 0x100" >>) @@ -2181,15 +2181,15 @@ move_to_vm_register_test_() -> %% Test: x_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << - " 0: 01c52f83 lw t6,28(a0)\n" - " 4: 03f52023 sw t6,32(a0)\n" + " 0: 03052f83 lw t6,28(a0)\n" + " 4: 03f52a23 sw t6,32(a0)\n" " 8: a8e5 j 0x100" >>) end), %% Test: x_reg to ptr ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, a1}, << - " 0: 01c52f83 lw t6,28(a0)\n" + " 0: 03052f83 lw t6,28(a0)\n" " 4: 01f5a023 sw t6,0(a1)\n" " 8: a8e5 j 0x100" >>) @@ -2198,15 +2198,15 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, {ptr, t3}, {x_reg, 3}, << " 0: 000e2f83 lw t6,0(t3)\n" - " 4: 03f52223 sw t6,36(a0)\n" + " 4: 03f52c23 sw t6,36(a0)\n" " 8: a8e5 j 0x100" >>) end), %% Test: x_reg to y_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01452f03 lw t5,20(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 02852f03 lw t5,20(a0)\n" " 8: 01ff2223 sw t6,4(t5)\n" " c: a8d5 j 0x100" >>) @@ -2214,31 +2214,31 @@ move_to_vm_register_test_() -> %% Test: y_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << - " 0: 01452f03 lw t5,20(a0)\n" + " 0: 02852f03 lw t5,20(a0)\n" " 4: 000f2f83 lw t6,0(t5)\n" - " 8: 03f52223 sw t6,36(a0)\n" + " 8: 03f52c23 sw t6,36(a0)\n" " c: a8d5 j 0x100" >>) end), %% Test: y_reg to y_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << - " 0: 01452f03 lw t5,20(a0)\n" + " 0: 02852f03 lw t5,20(a0)\n" " 4: 004f2f83 lw t6,4(t5)\n" - " 8: 03f52223 sw t6,36(a0)\n" + " 8: 03f52c23 sw t6,36(a0)\n" " c: a8d5 j 0x100" >>) end), %% Test: Native register to x_reg ?_test(begin move_to_vm_register_test0(State0, t4, {x_reg, 0}, << - " 0: 01d52c23 sw t4,24(a0)\n" + " 0: 03d52623 sw t4,24(a0)\n" " 4: a8f5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, t5, {x_reg, extra}, << - " 0: 05e52c23 sw t5,88(a0)\n" + " 0: 07e52623 sw t5,88(a0)\n" " 4: a8f5 j 0x100" >>) end), @@ -2252,7 +2252,7 @@ move_to_vm_register_test_() -> %% Test: Native register to y_reg ?_test(begin move_to_vm_register_test0(State0, a1, {y_reg, 0}, << - " 0: 01452f83 lw t6,20(a0)\n" + " 0: 02852f83 lw t6,20(a0)\n" " 4: 00bfa023 sw a1,0(t6)\n" " 8: a8e5 j 0x100" >>) @@ -2262,7 +2262,7 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, << " 0: 12345fb7 lui t6,0x12345\n" " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" - " 8: 01f52c23 sw t6,24(a0)\n" + " 8: 03f52623 sw t6,24(a0)\n" " c: a8d5 j 0x100" >>) end), @@ -2270,7 +2270,7 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, << " 0: 12345fb7 lui t6,0x12345\n" " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" - " 8: 05f52c23 sw t6,88(a0)\n" + " 8: 07f52623 sw t6,88(a0)\n" " c: a8d5 j 0x100" >>) end), @@ -2278,7 +2278,7 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, << " 0: 12345fb7 lui t6,0x12345\n" " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" - " 8: 01452f03 lw t5,20(a0)\n" + " 8: 02852f03 lw t5,20(a0)\n" " c: 01ff2423 sw t6,8(t5)\n" " 10: a8c5 j 0x100" >>) @@ -2287,7 +2287,7 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, << " 0: 12345fb7 lui t6,0x12345\n" " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" - " 8: 01452f03 lw t5,20(a0)\n" + " 8: 02852f03 lw t5,20(a0)\n" " c: 05ff2823 sw t6,80(t5)\n" " 10: a8c5 j 0x100" >>) @@ -2304,8 +2304,8 @@ move_to_vm_register_test_() -> %% Test: x_reg to y_reg (high index) ?_test(begin move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << - " 0: 05452f83 lw t6,84(a0)\n" - " 4: 01452f03 lw t5,20(a0)\n" + " 0: 06852f83 lw t6,84(a0)\n" + " 4: 02852f03 lw t5,20(a0)\n" " 8: 07ff2e23 sw t6,124(t5)\n" " c: a8d5 j 0x100" >>) @@ -2313,9 +2313,9 @@ move_to_vm_register_test_() -> %% Test: y_reg to x_reg (high index) ?_test(begin move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << - " 0: 01452f03 lw t5,20(a0)\n" + " 0: 02852f03 lw t5,20(a0)\n" " 4: 07cf2f83 lw t6,124(t5)\n" - " 8: 05f52a23 sw t6,84(a0)\n" + " 8: 07f52423 sw t6,84(a0)\n" " c: a8d5 j 0x100" >>) end), @@ -2323,7 +2323,7 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 32}, << " 0: 02a00f13 li t5,42\n" - " 4: 01452f83 lw t6,20(a0)\n" + " 4: 02852f83 lw t6,20(a0)\n" " 8: 08000e93 li t4,128\n" " c: 9efe add t4,t4,t6\n" " e: 01eea023 sw t5,0(t4)\n" @@ -2334,21 +2334,21 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, -1, {x_reg, 0}, << " 0: 5ffd li t6,-1\n" - " 2: 01f52c23 sw t6,24(a0)\n" + " 2: 03f52623 sw t6,24(a0)\n" " 6: a8ed j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, -100, {x_reg, 0}, << " 0: f9c00f93 li t6,-100\n" - " 4: 01f52c23 sw t6,24(a0)\n" + " 4: 03f52623 sw t6,24(a0)\n" " 8: a8e5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, -1000, {x_reg, 0}, << " 0: c1800f93 li t6,-1000\n" - " 4: 01f52c23 sw t6,24(a0)\n" + " 4: 03f52623 sw t6,24(a0)\n" " 8: a8e5 j 0x100" >>) end) @@ -2371,7 +2371,7 @@ move_array_element_test_() -> ?_test(begin move_array_element_test0(State0, a3, 2, {x_reg, 0}, << " 0: 0086af83 lw t6,8(a3)\n" - " 4: 01f52c23 sw t6,24(a0)" + " 4: 03f52623 sw t6,24(a0)" >>) end), %% move_array_element: reg[x] to ptr @@ -2385,7 +2385,7 @@ move_array_element_test_() -> ?_test(begin move_array_element_test0(State0, a3, 1, {y_reg, 2}, << " 0: 0046af03 lw t5,4(a3)\n" - " 4: 01452f83 lw t6,20(a0)\n" + " 4: 02852f83 lw t6,20(a0)\n" " 8: 01efa423 sw t5,8(t6)" >>) end), @@ -2399,7 +2399,7 @@ move_array_element_test_() -> ?_test(begin move_array_element_test0(State0, a3, 7, {y_reg, 31}, << " 0: 01c6af03 lw t5,28(a3)\n" - " 4: 01452f83 lw t6,20(a0)\n" + " 4: 02852f83 lw t6,20(a0)\n" " 8: 07efae23 sw t5,124(t6)" >>) end), @@ -2407,7 +2407,7 @@ move_array_element_test_() -> ?_test(begin move_array_element_test0(State0, a3, 7, {x_reg, 15}, << " 0: 01c6af83 lw t6,28(a3)\n" - " 4: 05f52a23 sw t6,84(a0)" + " 4: 07f52423 sw t6,84(a0)" >>) end), %% move_array_element: reg_x[reg_y] to x_reg @@ -2418,7 +2418,7 @@ move_array_element_test_() -> " 4: 0f8a slli t6,t6,0x2\n" " 6: 01f68fb3 add t6,a3,t6\n" " a: 000faf83 lw t6,0(t6)\n" - " e: 03f52023 sw t6,32(a0)" + " e: 03f52a23 sw t6,32(a0)" >>) end), %% move_array_element: reg_x[reg_y] to pointer (large x reg) @@ -2440,7 +2440,7 @@ move_array_element_test_() -> " 4: 0f8a slli t6,t6,0x2\n" " 6: 01f68fb3 add t6,a3,t6\n" " a: 000faf83 lw t6,0(t6)\n" - " e: 01452f03 lw t5,20(a0)\n" + " e: 02852f03 lw t5,20(a0)\n" " 12: 07ff2e23 sw t6,124(t5)" >>) end), @@ -2448,9 +2448,9 @@ move_array_element_test_() -> ?_test(begin {State1, BaseReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), move_array_element_test0(State1, BaseReg, 2, {x_reg, 5}, << - " 0: 01852f83 lw t6,24(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" " 4: 008faf03 lw t5,8(t6)\n" - " 8: 03e52623 sw t5,44(a0)" + " 8: 05e52023 sw t5,44(a0)" >>) end) ] @@ -2488,7 +2488,7 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" " 4: 01f6a423 sw t6,8(a3)" >>, ?assertStream(riscv32, Dump, Stream) @@ -2498,7 +2498,7 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, t3), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" " 4: 8f72 mv t5,t3\n" " 6: 0f0a slli t5,t5,0x2\n" " 8: 01e68f33 add t5,a3,t5\n" @@ -2524,7 +2524,7 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, a3, t3), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01452f03 lw t5,20(a0)\n" + " 0: 02852f03 lw t5,20(a0)\n" " 4: 008f2f83 lw t6,8(t5)\n" " 8: 8f72 mv t5,t3\n" " a: 0f0a slli t5,t5,0x2\n" @@ -2538,7 +2538,7 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2, 1), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" " 4: 01f6a423 sw t6,8(a3)" >>, ?assertStream(riscv32, Dump, Stream) @@ -2553,7 +2553,7 @@ move_to_array_element_test_() -> State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, a3, t3, 1), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" " 4: 001e0f13 addi t5,t3,1\n" " 8: 0f0a slli t5,t5,0x2\n" " a: 01e68f33 add t5,a3,t5\n" @@ -2647,7 +2647,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(t6, Reg), Dump = << - " 0: 02c52f83 lw t6,44(a0)" + " 0: 04052f83 lw t6,64(a0)" >>, ?assertStream(riscv32, Dump, Stream) end), @@ -2657,7 +2657,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(t6, Reg), Dump = << - " 0: 01452f03 lw t5,20(a0)\n" + " 0: 02852f03 lw t5,20(a0)\n" " 4: 00cf2f83 lw t6,12(t5)" >>, ?assertStream(riscv32, Dump, Stream) @@ -2694,7 +2694,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, a3), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 5114 lw a3,32(a0)" + " 0: 5954 lw a3,32(a0)" >>, ?assertStream(riscv32, Dump, Stream) end), @@ -2703,7 +2703,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, a1), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01452f83 lw t6,20(a0)\n" + " 0: 02852f83 lw t6,20(a0)\n" " 4: 008fa583 lw a1,8(t6)" >>, ?assertStream(riscv32, Dump, Stream) @@ -2716,8 +2716,8 @@ move_to_native_register_test_() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 06052f03 lw t5,96(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 07452f03 lw t5,96(a0)\n" " 8: 004fae83 lw t4,4(t6)\n" " c: 01df2c23 sw t4,24(t5)\n" " 10: 008fae83 lw t4,8(t6)\n" @@ -2897,7 +2897,7 @@ set_args1_y_reg_test() -> " 10: c22a sw a0,4(sp)\n" " 12: c42e sw a1,8(sp)\n" " 14: c632 sw a2,12(sp)\n" - " 16: 01452f03 lw t5,20(a0)\n" + " 16: 02852f03 lw t5,20(a0)\n" " 1a: 014f2503 lw a0,20(t5)\n" " 1e: 9f82 jalr t6\n" " 20: 8faa mv t6,a0\n" @@ -2917,7 +2917,7 @@ large_y_reg_read_test() -> Stream = ?BACKEND:stream(State1), % Expected: uses helper with temp register for large offset Dump = << - " 0: 01452f03 lw t5,20(a0)\n" + " 0: 02852f03 lw t5,20(a0)\n" " 4: 1ec00f93 li t6,492\n" " 8: 9ffa add t6,t6,t5\n" " a: 000faf83 lw t6,0(t6)" @@ -2934,7 +2934,7 @@ large_y_reg_write_test() -> % Expected: uses helper with temp registers for large offset Dump = << " 0: 02a00f13 li t5,42\n" - " 4: 01452f83 lw t6,20(a0)\n" + " 4: 02852f83 lw t6,20(a0)\n" " 8: 1ec00e93 li t4,492\n" " c: 9efe add t4,t4,t6\n" " e: 01eea023 sw t5,0(t4)" @@ -2955,12 +2955,12 @@ large_y_reg_read_register_exhaustion_test() -> Stream = ?BACKEND:stream(StateFinal), % Expected: uses t0+t1 fallback sequence when temps are exhausted Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 01452283 lw t0,20(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" + " 8: 03452e83 lw t4,32(a0)\n" + " c: 03852e03 lw t3,36(a0)\n" + " 10: 03c52383 lw t2,40(a0)\n" + " 14: 02852283 lw t0,20(a0)\n" " 18: 08c00313 li t1,140\n" " 1c: 9316 add t1,t1,t0\n" " 1e: 00032303 lw t1,0(t1)" @@ -2983,12 +2983,12 @@ large_y_reg_write_register_exhaustion_test() -> Stream = ?BACKEND:stream(StateFinal), % Expected: uses t1/t0 fallback sequence Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 01452303 lw t1,20(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" + " 8: 03452e83 lw t4,32(a0)\n" + " c: 03852e03 lw t3,36(a0)\n" + " 10: 03c52383 lw t2,40(a0)\n" + " 14: 02852303 lw t1,20(a0)\n" " 18: 0c800293 li t0,200\n" " 1c: 929a add t0,t0,t1\n" " 1e: 01f2a023 sw t6,0(t0)" @@ -3002,7 +3002,7 @@ y_reg_boundary_direct_test() -> Stream = ?BACKEND:stream(State1), % Expected: uses direct addressing since 31 * 4 = 124 < 2048 Dump = << - " 0: 01452f03 lw t5,20(a0)\n" + " 0: 02852f03 lw t5,20(a0)\n" " 4: 07cf2f83 lw t6,124(t5)" >>, ?assertStream(riscv32, Dump, Stream), @@ -3031,12 +3031,12 @@ and_register_exhaustion_negative_test() -> {StateResult, t6} = ?BACKEND:and_(StateNoRegs, {free, t6}, -4), Stream = ?BACKEND:stream(StateResult), ExpectedDump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" + " 0: 02c52f83 lw t6,44(a0)\n" + " 4: 03052f03 lw t5,48(a0)\n" + " 8: 03452e83 lw t4,52(a0)\n" + " c: 03852e03 lw t3,56(a0)\n" + " 10: 03c52383 lw t2,60(a0)\n" + " 14: 04052303 lw t1,64(a0)\n" " 18: ffcfff93 andi t6,t6,-4" >>, ?assertStream(riscv32, ExpectedDump, Stream). @@ -3054,12 +3054,12 @@ and_register_exhaustion_positive_test() -> {StateResult, t6} = ?BACKEND:and_(StateNoRegs, {free, t6}, 16#3F), Stream = ?BACKEND:stream(StateResult), ExpectedDump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" + " 0: 02c52f83 lw t6,44(a0)\n" + " 4: 03052f03 lw t5,48(a0)\n" + " 8: 03452e83 lw t4,52(a0)\n" + " c: 03852e03 lw t3,56(a0)\n" + " 10: 03c52383 lw t2,60(a0)\n" + " 14: 04052303 lw t1,64(a0)\n" " 18: 03ffff93 andi t6,t6,63" >>, ?assertStream(riscv32, ExpectedDump, Stream). @@ -3148,10 +3148,10 @@ call_func_ptr_stack_alignment_test() -> Stream = ?BACKEND:stream(State5), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" + " 8: 03452e83 lw t4,32(a0)\n" + " c: 03852e03 lw t3,36(a0)\n" " 10: 1101 addi sp,sp,-32\n" " 12: c006 sw ra,0(sp)\n" " 14: c22a sw a0,4(sp)\n" @@ -3202,12 +3202,12 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" + " 8: 03452e83 lw t4,32(a0)\n" + " c: 03852e03 lw t3,36(a0)\n" + " 10: 03c52383 lw t2,40(a0)\n" + " 14: 04052303 lw t1,44(a0)\n" " 18: 1101 addi sp,sp,-32\n" " 1a: c006 sw ra,0(sp)\n" " 1c: c22a sw a0,4(sp)\n" @@ -3243,12 +3243,12 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" + " 8: 03452e83 lw t4,32(a0)\n" + " c: 03852e03 lw t3,36(a0)\n" + " 10: 03c52383 lw t2,40(a0)\n" + " 14: 04052303 lw t1,44(a0)\n" " 18: 1101 addi sp,sp,-32\n" " 1a: c006 sw ra,0(sp)\n" " 1c: c22a sw a0,4(sp)\n" @@ -3284,12 +3284,12 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" + " 8: 03452e83 lw t4,32(a0)\n" + " c: 03852e03 lw t3,36(a0)\n" + " 10: 03c52383 lw t2,40(a0)\n" + " 14: 04052303 lw t1,44(a0)\n" " 18: 1101 addi sp,sp,-32\n" " 1a: c006 sw ra,0(sp)\n" " 1c: c22a sw a0,4(sp)\n" @@ -3326,12 +3326,12 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" + " 8: 03452e83 lw t4,32(a0)\n" + " c: 03852e03 lw t3,36(a0)\n" + " 10: 03c52383 lw t2,40(a0)\n" + " 14: 04052303 lw t1,44(a0)\n" " 18: fd010113 addi sp,sp,-48\n" " 1c: c006 sw ra,0(sp)\n" " 1e: c22a sw a0,4(sp)\n" @@ -3372,12 +3372,12 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" + " 0: 02c52f83 lw t6,24(a0)\n" + " 4: 03052f03 lw t5,28(a0)\n" + " 8: 03452e83 lw t4,32(a0)\n" + " c: 03852e03 lw t3,36(a0)\n" + " 10: 03c52383 lw t2,40(a0)\n" + " 14: 04052303 lw t1,44(a0)\n" " 18: fd010113 addi sp,sp,-48\n" " 1c: c006 sw ra,0(sp)\n" " 1e: c22a sw a0,4(sp)\n" @@ -3494,10 +3494,10 @@ add_beam_test() -> % label 1 % {move,{integer,9},{x,1}}. " 20: 09f00f93 li t6,159\n" - " 24: 01f52e23 sw t6,28(a0)\n" + " 24: 03f52823 sw t6,28(a0)\n" % {move,{integer,8},{x,0}} " 28: 08f00f93 li t6,143\n" - " 2c: 01f52c23 sw t6,24(a0)\n" + " 2c: 03f52623 sw t6,24(a0)\n" % {call_only,2,{f,2}}. " 30: 0085af83 lw t6,8(a1)\n" " 34: 1ffd addi t6,t6,-1\n" @@ -3538,7 +3538,7 @@ add_beam_test() -> % {init_yregs,{list,[{y,0}]}}. %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), " 8c: 03b00f13 li t5,59\n" - " 90: 01452f83 lw t6,20(a0)\n" + " 90: 02852f83 lw t6,20(a0)\n" " 94: 01efa023 sw t5,0(t6)\n" % {call,1,{f,3}} %% call_or_schedule_next(State9, 3), @@ -3548,7 +3548,7 @@ add_beam_test() -> " a2: 36800f93 li t6,872\n" " a6: 00000013 nop\n" " aa: 01ff6f33 or t5,t5,t6\n" - " ae: 05e52e23 sw t5,92(a0)\n" + " ae: 07e52823 sw t5,92(a0)\n" " b2: 0085af83 lw t6,8(a1)\n" " b6: 1ffd addi t6,t6,-1\n" " b8: 01f5a423 sw t6,8(a1)\n" @@ -3581,7 +3581,7 @@ cached_load_after_free_test() -> Stream = ?BACKEND:stream(State3), Dump = << - " 0: 01852f83 lw t6,24(a0)" + " 0: 02c52f83 lw t6,44(a0)" >>, ?assertStream(riscv32, Dump, Stream). @@ -3594,7 +3594,7 @@ fixed_dst_x_reg_load_preserves_cache_test() -> ?assertEqual(Offset1, ?BACKEND:offset(State2)), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 02052f83 lw t6,32(a0)" + " 0: 03452f83 lw t6,52(a0)" >>, ?assertStream(riscv32, Dump, Stream). @@ -3607,8 +3607,8 @@ fixed_dst_y_reg_load_preserves_cache_test() -> ?assertEqual(Offset1, ?BACKEND:offset(State2)), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01452f83 lw t6,20(a0)\n" - " 4: 008faf03 lw t5,8(t6)" + " 0: 02852f83 lw t6,40(a0)\n" + " 4: 008faf03 lw t5,8(t6)" >>, ?assertStream(riscv32, Dump, Stream). @@ -3628,12 +3628,12 @@ and_positive_imm_invalidates_temp_cache_test() -> Stream = ?BACKEND:stream(State5), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,44(a0)\n" + " 4: 03052f03 lw t5,48(a0)\n" " 8: 6f11 lui t5,0x4\n" " a: f00f0f13 addi t5,t5,-256\n" " e: 01efffb3 and t6,t6,t5\n" - " 12: 01c52f03 lw t5,28(a0)" + " 12: 03052f03 lw t5,48(a0)" >>, ?assertStream(riscv32, Dump, Stream). @@ -3650,13 +3650,13 @@ if_block_cond_free_reg_invalidates_cache_test() -> Stream = ?BACKEND:stream(State4), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" + " 0: 02c52f83 lw t6,44(a0)\n" + " 4: 03052f03 lw t5,48(a0)\n" " 8: ffffcf93 not t6,t6\n" " c: 0ff2 slli t6,t6,0x1c\n" " e: 000f8363 beqz t6,0x14\n" " 12: 0f09 addi t5,t5,2\n" - " 14: 01852f83 lw t6,24(a0)" + " 14: 02c52f83 lw t6,44(a0)" >>, ?assertStream(riscv32, Dump, Stream). @@ -3669,12 +3669,12 @@ jump_to_label_invalidates_cache_test() -> Stream = ?BACKEND:stream(State4), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" + " 0: 02c52f83 lw t6,44(a0)\n" " 4: ffff .insn 2, 0xffff\n" " 6: ffff .insn 2, 0xffff\n" " 8: ffff .insn 2, 0xffff\n" " a: ffff .insn 2, 0xffff\n" - " c: 01852f83 lw t6,24(a0)" + " c: 02c52f83 lw t6,44(a0)" >>, ?assertStream(riscv32, Dump, Stream). @@ -3705,7 +3705,7 @@ call_primitive_last_if_block_preserves_cache_test() -> Stream = ?BACKEND:stream(State0), Dump = << " 0: 4f85 li t6,1\n" - " 2: 01852f03 lw t5,24(a0)\n" + " 2: 02c52f03 lw t5,44(a0)\n" " 6: 000f9563 bnez t6,0x10\n" " a: 00062f83 lw t6,0(a2)\n" " e: 8f82 jr t6" @@ -3719,7 +3719,7 @@ jump_to_label_if_block_preserves_cache_test() -> Stream = ?BACKEND:stream(State0), Dump = << " 0: 4f85 li t6,1\n" - " 2: 01852f03 lw t5,24(a0)\n" + " 2: 02c52f03 lw t5,44(a0)\n" " 6: 000f9663 bnez t6,0x12\n" " a: ffff .insn 2, 0xffff\n" " c: ffff .insn 2, 0xffff\n" @@ -3735,7 +3735,7 @@ jump_to_offset_if_block_preserves_cache_test() -> Stream = ?BACKEND:stream(State0), Dump = << " 0: 4f85 li t6,1\n" - " 2: 01852f03 lw t5,24(a0)\n" + " 2: 02c52f03 lw t5,44(a0)\n" " 6: 000f9363 bnez t6,0xc\n" " a: a8dd j 0x100" >>, @@ -3756,7 +3756,7 @@ jump_to_continuation_if_block_preserves_cache_test() -> Dump = << " 0: 10000f93 li t6,256\n" " 4: 4f05 li t5,1\n" - " 6: 01852e83 lw t4,24(a0)\n" + " 6: 02c52e83 lw t4,44(a0)\n" " a: 000f1763 bnez t5,0x18\n" " e: 00000f17 auipc t5,0x0\n" " 12: 1f49 addi t5,t5,-14 # 0x0\n" @@ -3773,10 +3773,10 @@ move_array_element_x_reg_invalidates_vm_loc_cache_test() -> {State4, _Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 5}), Stream = ?BACKEND:stream(State4), Dump = << - " 0: 02c52f83 lw t6,44(a0)\n" - " 4: 01852f03 lw t5,24(a0)\n" + " 0: 04052f83 lw t6,64(a0)\n" + " 4: 02c52f03 lw t5,44(a0)\n" " 8: 000f2e83 lw t4,0(t5)\n" - " c: 03d52623 sw t4,44(a0)" + " c: 05d52023 sw t4,64(a0)" >>, ?assertStream(riscv32, Dump, Stream). @@ -3791,12 +3791,12 @@ ldr_y_reg_invalidates_hidden_temp_cache_test() -> Stream = ?BACKEND:stream(State6), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 01452e83 lw t4,20(a0)\n" + " 0: 02c52f83 lw t6,44(a0)\n" + " 4: 03052f03 lw t5,48(a0)\n" + " 8: 03452e83 lw t4,52(a0)\n" + " c: 02852e83 lw t4,40(a0)\n" " 10: 000eaf03 lw t5,0(t4)\n" - " 14: 02052e83 lw t4,32(a0)" + " 14: 03452e83 lw t4,52(a0)" >>, ?assertStream(riscv32, Dump, Stream). @@ -3812,13 +3812,13 @@ y_reg_load_last_available_register_test() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" - " 18: 01452283 lw t0,20(a0)\n" + " 0: 02c52f83 lw t6,44(a0)\n" + " 4: 03052f03 lw t5,48(a0)\n" + " 8: 03452e83 lw t4,52(a0)\n" + " c: 03852e03 lw t3,56(a0)\n" + " 10: 03c52383 lw t2,60(a0)\n" + " 14: 04052303 lw t1,64(a0)\n" + " 18: 02852283 lw t0,40(a0)\n" " 1c: 0002a283 lw t0,0(t0)" >>, ?assertStream(riscv32, Dump, Stream). @@ -3831,8 +3831,8 @@ cached_move_to_vm_x_reg_reuse_test() -> ?assertEqual(Offset1, ?BACKEND:offset(State2)), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01c52f83 lw t6,28(a0)\n" - " 4: 01f52c23 sw t6,24(a0)" + " 0: 03052f83 lw t6,48(a0)\n" + " 4: 03f52623 sw t6,44(a0)" >>, ?assertStream(riscv32, Dump, Stream). @@ -3844,9 +3844,9 @@ cached_move_to_vm_y_reg_reuse_test() -> ?assertEqual(Offset1, ?BACKEND:offset(State2)), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01452f03 lw t5,20(a0)\n" + " 0: 02852f03 lw t5,40(a0)\n" " 4: 000f2f83 lw t6,0(t5)\n" - " 8: 01f52c23 sw t6,24(a0)" + " 8: 03f52623 sw t6,44(a0)" >>, ?assertStream(riscv32, Dump, Stream). @@ -3859,6 +3859,6 @@ cached_move_to_vm_imm_reuse_test() -> Stream = ?BACKEND:stream(State2), Dump = << " 0: 02a00f93 li t6,42\n" - " 4: 01f52c23 sw t6,24(a0)" + " 4: 03f52623 sw t6,44(a0)" >>, ?assertStream(riscv32, Dump, Stream). diff --git a/tests/libs/jit/jit_tests.erl b/tests/libs/jit/jit_tests.erl index 81384748ee..73db3bba32 100644 --- a/tests/libs/jit/jit_tests.erl +++ b/tests/libs/jit/jit_tests.erl @@ -217,11 +217,11 @@ term_to_int_verify_is_match_state_typed_optimization_x86_64_test() -> ), % Check the reading of x[1] is immediatly followed by a shift right. - % 15c: 4c 8b 5f 38 mov 0x38(%rdi),%r11 + % 15c: 4c 8b 5f 60 mov 0x60(%rdi),%r11 % 160: 49 c1 eb 04 shr $0x4,%r11 % As opposed to testing its type - % 15c: 4c 8b 5f 38 mov 0x38(%rdi),%r11 + % 15c: 4c 8b 5f 60 mov 0x60(%rdi),%r11 % 160: 4d 89 da mov %r11,%r10 % 163: 41 80 e2 0f and $0xf,%r10b % 167: 41 80 fa 0f cmp $0xf,%r10b @@ -230,29 +230,29 @@ term_to_int_verify_is_match_state_typed_optimization_x86_64_test() -> % 172: 49 c1 eb 04 shr $0x4,%r11 ?assertMatch( {_, 8}, - binary:match(CompiledCode, <<16#4c, 16#8b, 16#5f, 16#38, 16#49, 16#c1, 16#eb, 16#04>>) + binary:match(CompiledCode, <<16#4c, 16#8b, 16#5f, 16#60, 16#49, 16#c1, 16#eb, 16#04>>) ), % Check call to bs_start_match3 is followed by a skip of verify_is_boxed % The register value cache eliminates the redundant load after the store, % since %rax already holds the value. - % 48 8b 77 30 mov 0x30(%rdi),%rsi + % 48 8b 77 58 mov 0x58(%rdi),%rsi % 31 d2 xor %edx,%edx % ff d0 callq *%rax % 5a pop %rdx % 5e pop %rsi % 5f pop %rdi - % 48 89 47 40 mov %rax,0x40(%rdi) + % 48 89 47 68 mov %rax,0x68(%rdi) % 48 83 e0 fc and $0xfffffffffffffffc,%rax % As opposed to (without typed optimization, verify_is_boxed would be emitted): - % 48 8b 77 30 mov 0x30(%rdi),%rsi + % 48 8b 77 58 mov 0x58(%rdi),%rsi % 31 d2 xor %edx,%edx % ff d0 callq *%rax % 5a pop %rdx % 5e pop %rsi % 5f pop %rdi - % 48 89 47 40 mov %rax,0x40(%rdi) + % 48 89 47 68 mov %rax,0x68(%rdi) % 49 89 c3 mov %rax,%r11 % 41 80 e3 03 and $0x3,%r11b % 41 80 fb 02 cmp $0x2,%r11b @@ -266,8 +266,8 @@ term_to_int_verify_is_match_state_typed_optimization_x86_64_test() -> {_, 19}, binary:match( CompiledCode, - <<16#48, 16#8b, 16#77, 16#30, 16#31, 16#d2, 16#ff, 16#d0, 16#5a, 16#5e, 16#5f, 16#48, - 16#89, 16#47, 16#40, 16#48, 16#83, 16#e0, 16#fc>> + <<16#48, 16#8b, 16#77, 16#58, 16#31, 16#d2, 16#ff, 16#d0, 16#5a, 16#5e, 16#5f, 16#48, + 16#89, 16#47, 16#68, 16#48, 16#83, 16#e0, 16#fc>> ) ), @@ -282,7 +282,7 @@ verify_is_function_typed_optimization_x86_64_test() -> % for call % b6: 48 8b 42 10 mov 0x10(%rdx),%rax % ba: ff e0 jmpq *%rax - % bc: 48 8b 47 38 mov 0x38(%rdi),%rax + % bc: 48 8b 47 60 mov 0x60(%rdi),%rax % c0: 4c 8b 1e mov (%rsi),%r11 % c3: 45 8b 1b mov (%r11),%r11d % c6: 49 c1 e3 18 shl $0x18,%r11 @@ -291,7 +291,7 @@ verify_is_function_typed_optimization_x86_64_test() -> % As opposed to: % b6: 48 8b 42 10 mov 0x10(%rdx),%rax % ba: ff e0 jmpq *%rax - % bc: 48 8b 47 38 mov 0x38(%rdi),%rax + % bc: 48 8b 47 60 mov 0x60(%rdi),%rax % c0: 49 89 c3 mov %rax,%r11 % c3: 4d 89 da mov %r11,%r10 % c6: 41 80 e2 03 and $0x3,%r10b @@ -322,7 +322,7 @@ verify_is_function_typed_optimization_x86_64_test() -> {_, 20}, binary:match( CompiledCode, - <<16#48, 16#8b, 16#42, 16#10, 16#ff, 16#e0, 16#48, 16#8b, 16#47, 16#38, 16#4c, 16#8b, + <<16#48, 16#8b, 16#42, 16#10, 16#ff, 16#e0, 16#48, 16#8b, 16#47, 16#60, 16#4c, 16#8b, 16#1e, 16#45, 16#8b, 16#1b, 16#49, 16#c1, 16#e3, 16#18>> ) ), @@ -574,16 +574,16 @@ fuse_tuple_multi_get_x86_64_test() -> ), % All three elements loaded from the same untagged pointer: % 4c 8b 58 08 mov 0x8(%rax),%r11 (element 0 -> x[1]) - % 4c 89 5f 38 mov %r11,0x38(%rdi) + % 4c 89 5f 60 mov %r11,0x60(%rdi) % 4c 8b 58 10 mov 0x10(%rax),%r11 (element 1 -> x[2]) - % 4c 89 5f 40 mov %r11,0x40(%rdi) + % 4c 89 5f 68 mov %r11,0x68(%rdi) % 4c 8b 58 18 mov 0x18(%rax),%r11 (element 2 -> x[0]) ?assertMatch( {_, _}, binary:match( CompiledCode, - <<16#4c, 16#8b, 16#58, 16#08, 16#4c, 16#89, 16#5f, 16#38, 16#4c, 16#8b, 16#58, 16#10, - 16#4c, 16#89, 16#5f, 16#40, 16#4c, 16#8b, 16#58, 16#18>> + <<16#4c, 16#8b, 16#58, 16#08, 16#4c, 16#89, 16#5f, 16#60, 16#4c, 16#8b, 16#58, 16#10, + 16#4c, 16#89, 16#5f, 16#68, 16#4c, 16#8b, 16#58, 16#18>> ) ), ok. diff --git a/tests/libs/jit/jit_tests_common.erl b/tests/libs/jit/jit_tests_common.erl index 21783251be..3d9107f94e 100644 --- a/tests/libs/jit/jit_tests_common.erl +++ b/tests/libs/jit/jit_tests_common.erl @@ -405,7 +405,10 @@ assert_stream(Arch, Dump, Stream) -> true -> ok; false -> - diff_disasm(Arch, Expected, Actual), + case erlang:system_info(machine) of + "BEAM" -> diff_disasm(Arch, Expected, Actual); + "ATOM" -> ok + end, ?assertEqual(Expected, Actual) end. @@ -433,8 +436,10 @@ assert_stream(Arch, Dump, Stream, File, _Line) -> Actual end, update_test_source(Arch, Dump, DisasmInput, File); - _ -> - diff_disasm(Arch, Expected, Actual) + {"BEAM", _} -> + diff_disasm(Arch, Expected, Actual); + {"ATOM", _} -> + ok end, ?assertEqual(Expected, Actual) end. @@ -629,18 +634,38 @@ replace_dump_in_source(File, OldDump, NewDumpRaw) -> case find_dump_tokens(Tokens, OldDump) of {StartLine, EndLine} -> Lines = binary:split(Content, <<"\n">>, [global]), - %% Get indent from the first string line (StartLine + 1 is first content) - FirstContentLine = lists:nth(StartLine + 1, Lines), - Indent = get_indent(FirstContentLine), - NewDumpLines = format_dump_lines(NewDumpRaw, Indent), - %% Replace: keep lines up to StartLine (the << line), - %% insert new content, keep from EndLine (the >> line) onward - Before = lists:sublist(Lines, 1, StartLine), - After = lists:nthtail(EndLine - 1, Lines), - NewLines = Before ++ NewDumpLines ++ After, - NewContent = iolist_to_binary(lists:join(<<"\n">>, NewLines)), - ok = file:write_file(File, NewContent), - io:format("Updated ~s at line ~p~n", [File, StartLine]), + %% When `<<` and `>>` are on different lines, we keep the `<<` line + %% (StartLine) and the `>>` line (EndLine) and replace what's + %% between. When they're on the same line (single-line Dump = + %% <<"...">>), we replace that line entirely with `<<`, the new + %% strings, and `>>,` on separate lines. + if + StartLine < EndLine -> + %% Get indent from the first string line (StartLine + 1 is first content) + FirstContentLine = lists:nth(StartLine + 1, Lines), + Indent = get_indent(FirstContentLine), + NewDumpLines = format_dump_lines(NewDumpRaw, Indent), + Before = lists:sublist(Lines, 1, StartLine), + After = lists:nthtail(EndLine - 1, Lines), + NewLines = Before ++ NewDumpLines ++ After, + NewContent = iolist_to_binary(lists:join(<<"\n">>, NewLines)), + ok = file:write_file(File, NewContent), + io:format("Updated ~s at line ~p~n", [File, StartLine]); + StartLine =:= EndLine -> + %% Same-line `Dump = <<"...">>,` — replace inside the + %% literal in place, preserving the line's structure. + LineBin = lists:nth(StartLine, Lines), + LineStr = binary_to_list(LineBin), + {Prefix, Suffix} = split_bin_literal(LineStr), + NewBin = format_bin_literal(NewDumpRaw), + NewLineStr = Prefix ++ NewBin ++ Suffix, + Before = lists:sublist(Lines, 1, StartLine - 1), + After = lists:nthtail(StartLine, Lines), + NewLines = Before ++ [iolist_to_binary(NewLineStr)] ++ After, + NewContent = iolist_to_binary(lists:join(<<"\n">>, NewLines)), + ok = file:write_file(File, NewContent), + io:format("Updated ~s at line ~p~n", [File, StartLine]) + end, ok; not_found -> io:format("WARNING: Could not find old dump in ~s~n", [File]), @@ -711,3 +736,36 @@ format_dump_lines([Line], Indent, Acc) -> format_dump_lines([Line | Rest], Indent, Acc) -> Formatted = iolist_to_binary([Indent, $", Line, "\\n", $"]), format_dump_lines(Rest, Indent, [Formatted | Acc]). + +%% Split a line like ` Dump = <<"...">>,` into the part before the literal +%% (` Dump = `) and the part after (`,`). The literal itself (`<<...>>`) +%% is dropped. +split_bin_literal(Line) -> + {Prefix, Tail} = split_at(Line, "<<", []), + {_, AfterClose} = split_at(Tail, ">>", []), + {Prefix ++ "<<", ">>" ++ AfterClose}. + +split_at([], _Needle, Acc) -> + {lists:reverse(Acc), []}; +split_at(Str, Needle, Acc) -> + case lists:prefix(Needle, Str) of + true -> + {lists:reverse(Acc), lists:nthtail(length(Needle), Str)}; + false -> + [C | Rest] = Str, + split_at(Rest, Needle, [C | Acc]) + end. + +%% Format a raw dump string as a quoted literal suitable for placement +%% inside a `<<...>>` on a single line: `"line1\nline2"`. Newlines in +%% the input become escaped `\n` inside the literal. +format_bin_literal(DumpRaw) -> + RawLines0 = string:split(DumpRaw, "\n", all), + RawLines = lists:reverse( + lists:dropwhile( + fun(L) -> string:trim(L) =:= "" end, + lists:reverse(RawLines0) + ) + ), + Joined = lists:join("\\n", RawLines), + [$" | lists:flatten(Joined) ++ [$"]]. diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl index 6ef0033d9f..be1c7225a1 100644 --- a/tests/libs/jit/jit_x86_64_tests.erl +++ b/tests/libs/jit/jit_x86_64_tests.erl @@ -183,36 +183,36 @@ call_primitive_few_regs_test() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" - " 8: 4c 8b 57 40 mov 0x40(%rdi),%r10\n" - " c: 4c 8b 4f 48 mov 0x48(%rdi),%r9\n" - " 10: 4c 8b 47 50 mov 0x50(%rdi),%r8\n" - " 14: 48 8b 4f 58 mov 0x58(%rdi),%rcx\n" - " 18: 57 push %rdi\n" - " 19: 56 push %rsi\n" - " 1a: 52 push %rdx\n" - " 1b: 41 51 push %r9\n" - " 1d: 41 52 push %r10\n" - " 1f: 41 53 push %r11\n" - " 21: 50 push %rax\n" - " 22: 48 8b 92 c8 01 00 00 mov 0x1c8(%rdx),%rdx\n" - " 29: 52 push %rdx\n" - " 2a: 48 89 c7 mov %rax,%rdi\n" - " 2d: 4c 89 de mov %r11,%rsi\n" - " 30: 4c 89 c2 mov %r8,%rdx\n" - " 33: 4c 87 c9 xchg %r9,%rcx\n" - " 36: 4d 89 c8 mov %r9,%r8\n" - " 39: 58 pop %rax\n" - " 3a: ff d0 callq *%rax\n" - " 3c: 49 89 c0 mov %rax,%r8\n" - " 3f: 58 pop %rax\n" - " 40: 41 5b pop %r11\n" - " 42: 41 5a pop %r10\n" - " 44: 41 59 pop %r9\n" - " 46: 5a pop %rdx\n" - " 47: 5e pop %rsi\n" - " 48: 5f pop %rdi" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" + " 8: 4c 8b 57 68 mov 0x68(%rdi),%r10\n" + " c: 4c 8b 4f 70 mov 0x70(%rdi),%r9\n" + " 10: 4c 8b 47 78 mov 0x78(%rdi),%r8\n" + " 14: 48 8b 8f 80 00 00 00 mov 0x80(%rdi),%rcx\n" + " 1b: 57 push %rdi\n" + " 1c: 56 push %rsi\n" + " 1d: 52 push %rdx\n" + " 1e: 41 51 push %r9\n" + " 20: 41 52 push %r10\n" + " 22: 41 53 push %r11\n" + " 24: 50 push %rax\n" + " 25: 48 8b 92 c8 01 00 00 mov 0x1c8(%rdx),%rdx\n" + " 2c: 52 push %rdx\n" + " 2d: 48 89 c7 mov %rax,%rdi\n" + " 30: 4c 89 de mov %r11,%rsi\n" + " 33: 4c 89 c2 mov %r8,%rdx\n" + " 36: 4c 87 c9 xchg %r9,%rcx\n" + " 39: 4d 89 c8 mov %r9,%r8\n" + " 3c: 58 pop %rax\n" + " 3d: ff d0 callq *%rax\n" + " 3f: 49 89 c0 mov %rax,%r8\n" + " 42: 58 pop %rax\n" + " 43: 41 5b pop %r11\n" + " 45: 41 5a pop %r10\n" + " 47: 41 59 pop %r9\n" + " 49: 5a pop %rdx\n" + " 4a: 5e pop %rsi\n" + " 4b: 5f pop %rdi" >>, ?assertStream(x86_64, Dump, Stream). @@ -297,12 +297,11 @@ call_primitive_last_if_block_preserves_cache_test() -> Stream = ?BACKEND:stream(State0), Dump = << " 0: b8 01 00 00 00 mov $0x1,%eax\n" - " 5: 4c 8b 5f 30 mov 0x30(%rdi),%r11\n" + " 5: 4c 8b 5f 58 mov 0x58(%rdi),%r11\n" " 9: 48 85 c0 test %rax,%rax\n" " c: 75 05 jne 0x13\n" " e: 48 8b 02 mov (%rdx),%rax\n" - " 11: ff .byte 0xff\n" - " 12: e0 .byte 0xe0" + " 11: ff e0 jmp *%rax" >>, ?assertStream(x86_64, Dump, Stream). @@ -313,7 +312,7 @@ jump_to_label_if_block_preserves_cache_test() -> Stream = ?BACKEND:stream(State0), Dump = << " 0: b8 01 00 00 00 mov $0x1,%eax\n" - " 5: 4c 8b 5f 30 mov 0x30(%rdi),%r11\n" + " 5: 4c 8b 5f 58 mov 0x58(%rdi),%r11\n" " 9: 48 85 c0 test %rax,%rax\n" " c: 75 05 jne 0x13\n" " e: e9 fc ff ff ff jmp 0xf" @@ -327,7 +326,7 @@ jump_to_offset_if_block_preserves_cache_test() -> Stream = ?BACKEND:stream(State0), Dump = << " 0: b8 01 00 00 00 mov $0x1,%eax\n" - " 5: 4c 8b 5f 30 mov 0x30(%rdi),%r11\n" + " 5: 4c 8b 5f 58 mov 0x58(%rdi),%r11\n" " 9: 48 85 c0 test %rax,%rax\n" " c: 75 05 jne 0x13\n" " e: e9 ed 00 00 00 jmp 0x100" @@ -349,14 +348,12 @@ jump_to_continuation_if_block_preserves_cache_test() -> Dump = << " 0: b8 00 01 00 00 mov $0x100,%eax\n" " 5: 41 bb 01 00 00 00 mov $0x1,%r11d\n" - " b: 4c 8b 57 30 mov 0x30(%rdi),%r10\n" + " b: 4c 8b 57 58 mov 0x58(%rdi),%r10\n" " f: 4d 85 db test %r11,%r11\n" " 12: 75 0d jne 0x21\n" - " 14: 4c 8d 1d e5 ff ff ff lea -0x1b(%rip),%r11\n" + " 14: 4c 8d 1d e5 ff ff ff lea -0x1b(%rip),%r11 # 0x0\n" " 1b: 49 01 c3 add %rax,%r11\n" - " 1e: 41 rex.B\n" - " 1f: ff .byte 0xff\n" - " 20: e3 .byte 0xe3" + " 1e: 41 ff e3 jmp *%r11" >>, ?assertStream(x86_64, Dump, Stream). @@ -368,10 +365,10 @@ move_array_element_x_reg_invalidates_vm_loc_cache_test() -> {State4, _Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 5}), Stream = ?BACKEND:stream(State4), Dump = << - " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" - " 4: 4c 8b 5f 30 mov 0x30(%rdi),%r11\n" - " 8: 4d 8b 13 mov (%r11),%r10\n" - " b: 4c 89 57 58 mov %r10,0x58(%rdi)" + " 0: 48 8b 87 80 00 00 00 mov 0x80(%rdi),%rax\n" + " 7: 4c 8b 5f 58 mov 0x58(%rdi),%r11\n" + " b: 4d 8b 13 mov (%r11),%r10\n" + " e: 4c 89 97 80 00 00 00 mov %r10,0x80(%rdi)" >>, ?assertStream(x86_64, Dump, Stream). @@ -445,9 +442,9 @@ move_to_cp_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 0: 48 8b 47 50 mov 0x50(%rdi),%rax\n" " 4: 48 8b 00 mov (%rax),%rax\n" - " 7: 48 89 87 b8 00 00 00 mov %rax,0xb8(%rdi)\n" + " 7: 48 89 87 e0 00 00 00 mov %rax,0xe0(%rdi)\n" >>, ?assertStream(x86_64, Dump, Stream). @@ -457,9 +454,9 @@ increment_sp_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 0: 48 8b 47 50 mov 0x50(%rdi),%rax\n" " 4: 48 83 c0 38 add $0x38,%rax\n" - " 8: 48 89 47 28 mov %rax,0x28(%rdi)\n" + " 8: 48 89 47 50 mov %rax,0x50(%rdi)\n" >>, ?assertStream(x86_64, Dump, Stream). @@ -483,8 +480,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 48 85 c0 test %rax,%rax\n" " b: 7d 04 jge 0x11\n" " d: 49 83 c3 02 add $0x2,%r11" @@ -502,8 +499,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 4c 39 d8 cmp %r11,%rax\n" " b: 7d 04 jge 0x11\n" " d: 49 83 c3 02 add $0x2,%r11" @@ -521,8 +518,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 48 85 c0 test %rax,%rax\n" " b: 75 04 jne 0x11\n" " d: 49 83 c3 02 add $0x2,%r11" @@ -540,8 +537,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 48 85 c0 test %rax,%rax\n" " b: 75 04 jne 0x11\n" " d: 49 83 c3 02 add $0x2,%r11" @@ -559,8 +556,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 85 c0 test %eax,%eax\n" " a: 75 04 jne 0x10\n" " c: 49 83 c3 02 add $0x2,%r11" @@ -578,8 +575,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 85 c0 test %eax,%eax\n" " a: 75 04 jne 0x10\n" " c: 49 83 c3 02 add $0x2,%r11" @@ -597,8 +594,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 48 83 f8 3b cmp $0x3b,%rax\n" " c: 74 04 je 0x12\n" " e: 49 83 c3 02 add $0x2,%r11" @@ -616,8 +613,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 48 83 f8 3b cmp $0x3b,%rax\n" " c: 74 04 je 0x12\n" " e: 49 83 c3 02 add $0x2,%r11" @@ -635,8 +632,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 83 f8 2a cmp $0x2a,%eax\n" " b: 74 04 je 0x11\n" " d: 49 83 c3 02 add $0x2,%r11" @@ -654,8 +651,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 83 f8 2a cmp $0x2a,%eax\n" " b: 74 04 je 0x11\n" " d: 49 83 c3 02 add $0x2,%r11" @@ -673,8 +670,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 48 83 f8 3b cmp $0x3b,%rax\n" " c: 75 04 jne 0x12\n" " e: 49 83 c3 02 add $0x2,%r11" @@ -692,8 +689,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 48 83 f8 3b cmp $0x3b,%rax\n" " c: 75 04 jne 0x12\n" " e: 49 83 c3 02 add $0x2,%r11" @@ -711,8 +708,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 83 f8 2a cmp $0x2a,%eax\n" " b: 75 04 jne 0x11\n" " d: 49 83 c3 02 add $0x2,%r11" @@ -730,8 +727,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 83 f8 2a cmp $0x2a,%eax\n" " b: 75 04 jne 0x11\n" " d: 49 83 c3 02 add $0x2,%r11" @@ -749,8 +746,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 84 c0 test %al,%al\n" " a: 75 04 jne 0x10\n" " c: 49 83 c3 02 add $0x2,%r11" @@ -768,8 +765,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 84 c0 test %al,%al\n" " a: 75 04 jne 0x10\n" " c: 49 83 c3 02 add $0x2,%r11" @@ -787,8 +784,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 84 c0 test %al,%al\n" " a: 74 04 je 0x10\n" " c: 49 83 c3 02 add $0x2,%r11" @@ -806,8 +803,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 84 c0 test %al,%al\n" " a: 74 04 je 0x10\n" " c: 49 83 c3 02 add $0x2,%r11" @@ -825,8 +822,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: a8 07 test $0x7,%al\n" " a: 74 04 je 0x10\n" " c: 49 83 c3 02 add $0x2,%r11" @@ -844,8 +841,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: a8 07 test $0x7,%al\n" " a: 74 04 je 0x10\n" " c: 49 83 c3 02 add $0x2,%r11" @@ -863,8 +860,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 49 89 c2 mov %rax,%r10\n" " b: 41 80 e2 0f and $0xf,%r10b\n" " f: 41 80 fa 0f cmp $0xf,%r10b\n" @@ -884,8 +881,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 24 0f and $0xf,%al\n" " a: 80 f8 0f cmp $0xf,%al\n" " d: 74 04 je 0x13\n" @@ -904,8 +901,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 48 83 f8 64 cmp $0x64,%rax\n" " c: 7e 04 jle 0x12\n" " e: 49 83 c3 02 add $0x2,%r11" @@ -923,8 +920,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 48 83 f8 64 cmp $0x64,%rax\n" " c: 7e 04 jle 0x12\n" " e: 49 83 c3 02 add $0x2,%r11" @@ -942,8 +939,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 48 83 f8 64 cmp $0x64,%rax\n" " c: 7d 04 jge 0x12\n" " e: 49 83 c3 02 add $0x2,%r11" @@ -961,8 +958,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 48 83 f8 64 cmp $0x64,%rax\n" " c: 7d 04 jge 0x12\n" " e: 49 83 c3 02 add $0x2,%r11" @@ -980,8 +977,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 49 ba 00 00 00 00 01 movabs $0x100000000,%r10\n" " f: 00 00 00 \n" " 12: 4c 39 d0 cmp %r10,%rax\n" @@ -1001,8 +998,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 49 ba 00 00 00 00 01 movabs $0x100000000,%r10\n" " f: 00 00 00 \n" " 12: 4c 39 d0 cmp %r10,%rax\n" @@ -1022,8 +1019,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 49 ba 00 00 00 00 01 movabs $0x100000000,%r10\n" " f: 00 00 00 \n" " 12: 4c 39 d0 cmp %r10,%rax\n" @@ -1043,8 +1040,8 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 49 ba 00 00 00 00 01 movabs $0x100000000,%r10\n" " f: 00 00 00 \n" " 12: 4c 39 d0 cmp %r10,%rax\n" @@ -1074,8 +1071,8 @@ if_else_block_test() -> Stream = ?BACKEND:stream(State3), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11\n" " 8: 48 83 f8 3b cmp $0x3b,%rax\n" " c: 75 06 jne 0x14\n" " e: 49 83 c3 02 add $0x2,%r11\n" @@ -1093,7 +1090,7 @@ shift_right_test_() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" " 4: 48 c1 e8 03 shr $0x3,%rax" >>, ?assertStream(x86_64, Dump, Stream) @@ -1106,7 +1103,7 @@ shift_right_test_() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" " 4: 49 89 c3 mov %rax,%r11\n" " 7: 49 c1 eb 03 shr $0x3,%r11" >>, @@ -1121,7 +1118,7 @@ shift_left_test() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" " 4: 48 c1 e0 03 shl $0x3,%rax" >>, ?assertStream(x86_64, Dump, Stream). @@ -1234,7 +1231,7 @@ call_bif_with_large_literal_integer_test() -> " 34: 52 push %rdx\n" " 35: 31 f6 xor %esi,%esi\n" " 37: ba 01 00 00 00 mov $0x1,%edx\n" - " 3c: 48 8b 4f 30 mov 0x30(%rdi),%rcx\n" + " 3c: 48 8b 4f 58 mov 0x58(%rdi),%rcx\n" " 40: 4d 89 d8 mov %r11,%r8\n" " 43: ff d0 callq *%rax\n" " 45: 5a pop %rdx\n" @@ -1245,7 +1242,7 @@ call_bif_with_large_literal_integer_test() -> " 4d: 48 8b 42 30 mov 0x30(%rdx),%rax\n" " 51: ba 51 00 00 00 mov $0x51,%edx\n" " 56: ff e0 jmpq *%rax\n" - " 58: 48 89 47 30 mov %rax,0x30(%rdi)" + " 58: 48 89 47 58 mov %rax,0x58(%rdi)" >>, ?assertStream(x86_64, Dump, Stream). @@ -1259,12 +1256,12 @@ get_list_test() -> ?BACKEND:assert_all_native_free(State5), Stream = ?BACKEND:stream(State5), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" " 4: 48 83 e0 fc and $0xfffffffffffffffc,%rax\n" - " 8: 4c 8b 5f 28 mov 0x28(%rdi),%r11\n" + " 8: 4c 8b 5f 50 mov 0x50(%rdi),%r11\n" " c: 4c 8b 50 08 mov 0x8(%rax),%r10\n" " 10: 4d 89 53 08 mov %r10,0x8(%r11)\n" - " 14: 4c 8b 5f 28 mov 0x28(%rdi),%r11\n" + " 14: 4c 8b 5f 50 mov 0x50(%rdi),%r11\n" " 18: 4c 8b 10 mov (%rax),%r10\n" " 1b: 4d 89 13 mov %r10,(%r11)\n" >>, @@ -1309,7 +1306,7 @@ is_integer_test() -> Dump = << " 0: e9 ff ff ff ff jmpq 0x4\n" " 5: e9 36 01 00 00 jmpq 0x140\n" - " a: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " a: 48 8b 47 58 mov 0x58(%rdi),%rax\n" " e: 49 89 c3 mov %rax,%r11\n" " 11: 41 80 e3 0f and $0xf,%r11b\n" " 15: 41 80 fb 0f cmp $0xf,%r11b\n" @@ -1366,7 +1363,7 @@ is_number_test() -> Dump = << " 0: e9 ff ff ff ff jmpq 0x4\n" " 5: e9 43 01 00 00 jmpq 0x14d\n" - " a: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " a: 48 8b 47 58 mov 0x58(%rdi),%rax\n" " e: 49 89 c3 mov %rax,%r11\n" " 11: 41 80 e3 0f and $0xf,%r11b\n" " 15: 41 80 fb 0f cmp $0xf,%r11b\n" @@ -1408,7 +1405,7 @@ is_boolean_test() -> Dump = << " 0: e9 ff ff ff ff jmpq 0x4\n" " 5: e9 15 01 00 00 jmpq 0x11f\n" - " a: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " a: 48 8b 47 58 mov 0x58(%rdi),%rax\n" " e: 48 83 f8 4b cmp $0x4b,%rax\n" " 12: 74 0b je 0x1f\n" " 14: 48 83 f8 0b cmp $0xb,%rax\n" @@ -1435,7 +1432,7 @@ call_ext_test() -> " 19: 8b 00 mov (%rax),%eax\n" " 1b: 48 c1 e0 18 shl $0x18,%rax\n" " 1f: 48 0d 0c 01 00 00 or $0x10c,%rax\n" - " 25: 48 89 87 b8 00 00 00 mov %rax,0xb8(%rdi)\n" + " 25: 48 89 87 e0 00 00 00 mov %rax,0xe0(%rdi)\n" " 2c: 48 8b 42 20 mov 0x20(%rdx),%rax\n" " 30: ba 02 00 00 00 mov $0x2,%edx\n" " 35: b9 05 00 00 00 mov $0x5,%ecx\n" @@ -1480,7 +1477,7 @@ call_fun_test() -> " c: 48 89 46 08 mov %rax,0x8(%rsi)\n" " 10: 48 8b 42 10 mov 0x10(%rdx),%rax\n" " 14: ff e0 jmpq *%rax\n" - " 16: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 16: 48 8b 47 58 mov 0x58(%rdi),%rax\n" " 1a: 49 89 c3 mov %rax,%r11\n" " 1d: 4d 89 da mov %r11,%r10\n" " 20: 41 80 e2 03 and $0x3,%r10b\n" @@ -1506,7 +1503,7 @@ call_fun_test() -> " 6d: 45 8b 1b mov (%r11),%r11d\n" " 70: 49 c1 e3 18 shl $0x18,%r11\n" " 74: 49 81 cb 44 02 00 00 or $0x244,%r11\n" - " 7b: 4c 89 9f b8 00 00 00 mov %r11,0xb8(%rdi)\n" + " 7b: 4c 89 9f e0 00 00 00 mov %r11,0xe0(%rdi)\n" " 82: 4c 8b 9a 00 01 00 00 mov 0x100(%rdx),%r11\n" " 89: 48 89 c2 mov %rax,%rdx\n" " 8c: 31 c9 xor %ecx,%ecx\n" @@ -1522,14 +1519,14 @@ decrement_reductions_invalidates_cache_test() -> {State4, Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 0}), Stream = ?BACKEND:stream(State4), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" " 4: ff 4e 10 decl 0x10(%rsi)\n" " 7: 75 11 jne 0x1a\n" " 9: 48 8d 05 0a 00 00 00 lea 0xa(%rip),%rax # 0x1a\n" " 10: 48 89 46 08 mov %rax,0x8(%rsi)\n" " 14: 48 8b 42 10 mov 0x10(%rdx),%rax\n" " 18: ff e0 jmp *%rax\n" - " 1a: 48 8b 47 30 mov 0x30(%rdi),%rax" + " 1a: 48 8b 47 58 mov 0x58(%rdi),%rax" >>, ?assertStream(x86_64, Dump, Stream). @@ -1547,12 +1544,12 @@ move_to_vm_register_test_() -> [ ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, 0}, << - " 0: 48 83 67 30 00 andq $0x0,0x30(%rdi)" + " 0: 48 83 67 58 00 andq $0x0,0x58(%rdi)" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, extra}, << - " 0: 48 83 a7 b0 00 00 00 andq $0x0,0xb0(%rdi)\n" + " 0: 48 83 a7 d8 00 00 00 andq $0x0,0xd8(%rdi)\n" " 7: 00 " >>) end), @@ -1563,13 +1560,13 @@ move_to_vm_register_test_() -> end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 2}, << - " 0: 48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 0: 48 8b 47 50 mov 0x50(%rdi),%rax\n" " 4: 48 83 60 10 00 andq $0x0,0x10(%rax)" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 20}, << - " 0: 48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 0: 48 8b 47 50 mov 0x50(%rdi),%rax\n" " 4: 48 83 a0 a0 00 00 00 andq $0x0,0xa0(%rax)\n" " b: 00 " >>) @@ -1577,26 +1574,26 @@ move_to_vm_register_test_() -> %% Test: Immediate to x_reg ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, 0}, << - " 0: 48 c7 47 30 2a 00 00 movq $0x2a,0x30(%rdi)\n" + " 0: 48 c7 47 58 2a 00 00 movq $0x2a,0x58(%rdi)\n" " 7: 00 " >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, extra}, << - " 0: 48 c7 87 b0 00 00 00 movq $0x2a,0xb0(%rdi)\n" + " 0: 48 c7 87 d8 00 00 00 movq $0x2a,0xd8(%rdi)\n" " 7: 2a 00 00 00 " >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 2}, << - " 0: 48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 0: 48 8b 47 50 mov 0x50(%rdi),%rax\n" " 4: 48 c7 40 10 2a 00 00 movq $0x2a,0x10(%rax)\n" " b: 00 " >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 20}, << - " 0: 48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 0: 48 8b 47 50 mov 0x50(%rdi),%rax\n" " 4: 48 c7 80 a0 00 00 00 movq $0x2a,0xa0(%rax)\n" " b: 2a 00 00 00 " >>) @@ -1611,14 +1608,14 @@ move_to_vm_register_test_() -> %% Test: x_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << - " 0: 48 8b 47 38 mov 0x38(%rdi),%rax\n" - " 4: 48 89 47 40 mov %rax,0x40(%rdi)" + " 0: 48 8b 47 60 mov 0x60(%rdi),%rax\n" + " 4: 48 89 47 68 mov %rax,0x68(%rdi)" >>) end), %% Test: x_reg to ptr ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, r8}, << - " 0: 48 8b 47 38 mov 0x38(%rdi),%rax\n" + " 0: 48 8b 47 60 mov 0x60(%rdi),%rax\n" " 4: 49 89 00 mov %rax,(%r8)" >>) end), @@ -1626,42 +1623,42 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, {ptr, r9}, {x_reg, 3}, << " 0: 49 8b 01 mov (%r9),%rax\n" - " 3: 48 89 47 48 mov %rax,0x48(%rdi)" + " 3: 48 89 47 70 mov %rax,0x70(%rdi)" >>) end), %% Test: x_reg to y_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 28 mov 0x28(%rdi),%r11\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 50 mov 0x50(%rdi),%r11\n" " 8: 49 89 43 08 mov %rax,0x8(%r11)" >>) end), %% Test: y_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << - " 0: 48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 0: 48 8b 47 50 mov 0x50(%rdi),%rax\n" " 4: 48 8b 00 mov (%rax),%rax\n" - " 7: 48 89 47 48 mov %rax,0x48(%rdi)" + " 7: 48 89 47 70 mov %rax,0x70(%rdi)" >>) end), %% Test: y_reg to y_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << - " 0: 48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 0: 48 8b 47 50 mov 0x50(%rdi),%rax\n" " 4: 48 8b 40 08 mov 0x8(%rax),%rax\n" - " 8: 48 89 47 48 mov %rax,0x48(%rdi)" + " 8: 48 89 47 70 mov %rax,0x70(%rdi)" >>) end), %% Test: Native register to x_reg ?_test(begin move_to_vm_register_test0(State0, rax, {x_reg, 0}, << - " 0: 48 89 47 30 mov %rax,0x30(%rdi)" + " 0: 48 89 47 58 mov %rax,0x58(%rdi)" >>) end), ?_test(begin move_to_vm_register_test0(State0, rax, {x_reg, extra}, << - " 0: 48 89 87 b0 00 00 00 mov %rax,0xb0(%rdi)" + " 0: 48 89 87 d8 00 00 00 mov %rax,0xd8(%rdi)" >>) end), %% Test: Atom register to ptr @@ -1673,7 +1670,7 @@ move_to_vm_register_test_() -> %% Test: Native register to y_reg ?_test(begin move_to_vm_register_test0(State0, rax, {y_reg, 0}, << - " 0:\t48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 0:\t48 8b 47 50 mov 0x50(%rdi),%rax\n" " 4:\t48 89 00 mov %rax,(%rax)" >>) end), @@ -1682,19 +1679,19 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, 16#123456789abcdef0, {x_reg, 0}, << " 0: 48 b8 f0 de bc 9a 78 movabs $0x123456789abcdef0,%rax\n" " 7: 56 34 12 \n" - " a: 48 89 47 30 mov %rax,0x30(%rdi)" + " a: 48 89 47 58 mov %rax,0x58(%rdi)" >>) end), ?_test(begin move_to_vm_register_test0(State0, 16#123456789abcdef0, {x_reg, extra}, << " 0: 48 b8 f0 de bc 9a 78 movabs $0x123456789abcdef0,%rax\n" " 7: 56 34 12 \n" - " a: 48 89 87 b0 00 00 00 mov %rax,0xb0(%rdi)" + " a: 48 89 87 d8 00 00 00 mov %rax,0xd8(%rdi)" >>) end), ?_test(begin move_to_vm_register_test0(State0, 16#123456789abcdef0, {y_reg, 2}, << - " 0: 48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 0: 48 8b 47 50 mov 0x50(%rdi),%rax\n" " 4: 49 bb f0 de bc 9a 78 movabs $0x123456789abcdef0,%r11\n" " b: 56 34 12 \n" " e: 4c 89 58 10 mov %r11,0x10(%rax)" @@ -1702,7 +1699,7 @@ move_to_vm_register_test_() -> end), ?_test(begin move_to_vm_register_test0(State0, 16#123456789abcdef0, {y_reg, 20}, << - " 0: 48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 0: 48 8b 47 50 mov 0x50(%rdi),%rax\n" " 4: 49 bb f0 de bc 9a 78 movabs $0x123456789abcdef0,%r11\n" " b: 56 34 12 \n" " e: 4c 89 98 a0 00 00 00 mov %r11,0xa0(%rax)" @@ -1719,30 +1716,30 @@ move_to_vm_register_test_() -> %% Test: x_reg to y_reg (high index) ?_test(begin move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << - " 0: 48 8b 87 a8 00 00 00 mov 0xa8(%rdi),%rax\n" - " 7: 4c 8b 5f 28 mov 0x28(%rdi),%r11\n" + " 0: 48 8b 87 d0 00 00 00 mov 0xd0(%rdi),%rax\n" + " 7: 4c 8b 5f 50 mov 0x50(%rdi),%r11\n" " b: 49 89 83 f8 00 00 00 mov %rax,0xf8(%r11)" >>) end), ?_test(begin move_to_vm_register_test0(State0, {x_reg, extra}, {y_reg, 31}, << - " 0: 48 8b 87 b0 00 00 00 mov 0xb0(%rdi),%rax\n" - " 7: 4c 8b 5f 28 mov 0x28(%rdi),%r11\n" + " 0: 48 8b 87 d8 00 00 00 mov 0xd8(%rdi),%rax\n" + " 7: 4c 8b 5f 50 mov 0x50(%rdi),%r11\n" " b: 49 89 83 f8 00 00 00 mov %rax,0xf8(%r11)" >>) end), %% Test: y_reg to x_reg (high index) ?_test(begin move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << - " 0: 48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 0: 48 8b 47 50 mov 0x50(%rdi),%rax\n" " 4: 48 8b 80 f8 00 00 00 mov 0xf8(%rax),%rax\n" - " b: 48 89 87 a8 00 00 00 mov %rax,0xa8(%rdi)" + " b: 48 89 87 d0 00 00 00 mov %rax,0xd0(%rdi)" >>) end), %% Test: Negative immediate to x_reg ?_test(begin move_to_vm_register_test0(State0, -1, {x_reg, 0}, << - " 0: 48 c7 47 30 ff ff ff movq $0xffffffffffffffff,0x30(%rdi)\n" + " 0: 48 c7 47 58 ff ff ff movq $0xffffffffffffffff,0x58(%rdi)\n" " 7: ff " >>) end), @@ -1754,9 +1751,9 @@ move_to_vm_register_test_() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" " 4: 48 8b 40 08 mov 0x8(%rax),%rax\n" - " 8: 4c 8b 9f c0 00 00 00 mov 0xc0(%rdi),%r11\n" + " 8: 4c 8b 9f e8 00 00 00 mov 0xe8(%rdi),%r11\n" " f: 49 89 43 18 mov %rax,0x18(%r11)" >>, ?assertStream(x86_64, Dump, Stream) @@ -1780,7 +1777,7 @@ move_array_element_test_() -> ?_test(begin move_array_element_test0(State0, r8, 2, {x_reg, 0}, << " 0: 49 8b 40 10 mov 0x10(%r8),%rax\n" - " 4: 48 89 47 30 mov %rax,0x30(%rdi)" + " 4: 48 89 47 58 mov %rax,0x58(%rdi)" >>) end), %% move_array_element: reg[x] to ptr @@ -1793,7 +1790,7 @@ move_array_element_test_() -> %% move_array_element: reg[x] to y_reg ?_test(begin move_array_element_test0(State0, r8, 1, {y_reg, 2}, << - " 0: 48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 0: 48 8b 47 50 mov 0x50(%rdi),%rax\n" " 4: 4d 8b 58 08 mov 0x8(%r8),%r11\n" " 8: 4c 89 58 10 mov %r11,0x10(%rax)" >>) @@ -1807,7 +1804,7 @@ move_array_element_test_() -> %% move_array_element: reg[x] to y_reg (high index) ?_test(begin move_array_element_test0(State0, r8, 7, {y_reg, 31}, << - " 0: 48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 0: 48 8b 47 50 mov 0x50(%rdi),%rax\n" " 4: 4d 8b 58 38 mov 0x38(%r8),%r11\n" " 8: 4c 89 98 f8 00 00 00 mov %r11,0xf8(%rax)" >>) @@ -1816,7 +1813,7 @@ move_array_element_test_() -> ?_test(begin move_array_element_test0(State0, r8, 7, {x_reg, 15}, << " 0: 49 8b 40 38 mov 0x38(%r8),%rax\n" - " 4: 48 89 87 a8 00 00 00 mov %rax,0xa8(%rdi)" + " 4: 48 89 87 d0 00 00 00 mov %rax,0xd0(%rdi)" >>) end), %% move_array_element: reg_x[reg_y] to x_reg @@ -1827,7 +1824,7 @@ move_array_element_test_() -> " 4: 48 c1 e0 03 shl $0x3,%rax\n" " 8: 4c 01 c0 add %r8,%rax\n" " b: 48 8b 00 mov (%rax),%rax\n" - " e: 48 89 47 40 mov %rax,0x40(%rdi)\n" + " e: 48 89 47 68 mov %rax,0x68(%rdi)\n" >>) end), %% move_array_element: reg_x[reg_y] to pointer (large x reg) @@ -1846,7 +1843,7 @@ move_array_element_test_() -> {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), move_array_element_test0(State1, r8, {free, Reg}, {y_reg, 31}, << " 0: 49 8b 40 20 mov 0x20(%r8),%rax\n" - " 4: 4c 8b 5f 28 mov 0x28(%rdi),%r11\n" + " 4: 4c 8b 5f 50 mov 0x50(%rdi),%r11\n" " 8: 48 c1 e0 03 shl $0x3,%rax\n" " c: 4c 01 c0 add %r8,%rax\n" " f: 48 8b 00 mov (%rax),%rax\n" @@ -1888,7 +1885,7 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, 2), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" " 4: 49 89 40 10 mov %rax,0x10(%r8)" >>, ?assertStream(x86_64, Dump, Stream) @@ -1898,7 +1895,7 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, 2, 1), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" " 4: 49 89 40 18 mov %rax,0x18(%r8)" >>, ?assertStream(x86_64, Dump, Stream) @@ -1985,7 +1982,7 @@ cached_load_same_xreg_test() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax" >>, ?assertStream(x86_64, Dump, Stream). @@ -1998,8 +1995,8 @@ cached_load_different_xreg_test() -> Stream = ?BACKEND:stream(State3), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 60 mov 0x60(%rdi),%r11" >>, ?assertStream(x86_64, Dump, Stream). @@ -2011,7 +2008,7 @@ cached_load_cp_test() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 48 8b 87 b8 00 00 00 mov 0xb8(%rdi),%rax" + " 0: 48 8b 87 e0 00 00 00 mov 0xe0(%rdi),%rax" >>, ?assertStream(x86_64, Dump, Stream). @@ -2024,7 +2021,7 @@ cached_load_after_free_test() -> Stream = ?BACKEND:stream(State3), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax" >>, ?assertStream(x86_64, Dump, Stream). @@ -2038,7 +2035,7 @@ cached_move_to_vm_large_imm_reuse_test() -> Dump = << " 0: 48 b8 00 00 00 00 01 movabs $0x100000000,%rax\n" " 7: 00 00 00 \n" - " a: 48 89 47 30 mov %rax,0x30(%rdi)" + " a: 48 89 47 58 mov %rax,0x58(%rdi)" >>, ?assertStream(x86_64, Dump, Stream). @@ -2050,8 +2047,8 @@ cached_move_to_vm_x_reg_reuse_test() -> {State2, rax} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 48 8b 47 38 mov 0x38(%rdi),%rax\n" - " 4: 48 89 47 30 mov %rax,0x30(%rdi)" + " 0: 48 8b 47 60 mov 0x60(%rdi),%rax\n" + " 4: 48 89 47 58 mov %rax,0x58(%rdi)" >>, ?assertStream(x86_64, Dump, Stream). @@ -2063,9 +2060,9 @@ cached_move_to_vm_y_reg_reuse_test() -> {State2, rax} = ?BACKEND:move_to_native_register(State1, {y_reg, 0}), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 0: 48 8b 47 50 mov 0x50(%rdi),%rax\n" " 4: 48 8b 00 mov (%rax),%rax\n" - " 7: 48 89 47 30 mov %rax,0x30(%rdi)" + " 7: 48 89 47 58 mov %rax,0x58(%rdi)" >>, ?assertStream(x86_64, Dump, Stream). @@ -2077,7 +2074,7 @@ cached_move_to_array_element_x_reg_reuse_test() -> {State2, rax} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" " 4: 49 89 43 10 mov %rax,0x10(%r11)" >>, ?assertStream(x86_64, Dump, Stream). @@ -2094,7 +2091,7 @@ if_block_large_cond_reuse_imm_test() -> end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" " 4: 49 bb 00 00 00 00 01 movabs $0x100000000,%r11\n" " b: 00 00 00 \n" " e: 4c 39 d8 cmp %r11,%rax\n" diff --git a/tests/libs/jit/jit_xtensa_tests.erl b/tests/libs/jit/jit_xtensa_tests.erl index 2a7b677f16..f8151be3bd 100644 --- a/tests/libs/jit/jit_xtensa_tests.erl +++ b/tests/libs/jit/jit_xtensa_tests.erl @@ -160,7 +160,7 @@ move_to_native_register_xreg_test() -> Stream = ?BACKEND:stream(State1), ?assert(is_atom(Reg)), Dump = << - " 0: 0622f2 l32i a15, a2, 24" + " 0: 0b22f2 l32i a15, a2, 44" >>, ?assertStream(xtensa, Dump, Stream). @@ -181,7 +181,7 @@ move_to_native_register_yreg_test() -> Stream = ?BACKEND:stream(State1), ?assert(byte_size(Stream) > 0), Dump = << - " 0: 0522e2 l32i a14, a2, 20\n" + " 0: 0a22e2 l32i a14, a2, 40\n" " 3: 032ef2 l32i a15, a14, 12" >>, ?assertStream(xtensa, Dump, Stream). @@ -192,7 +192,7 @@ move_to_vm_register_test() -> Stream = ?BACKEND:stream(State1), Dump = << " 0: 2aa0f2 movi a15, 42\n" - " 3: 0662f2 s32i a15, a2, 24" + " 3: 0b62f2 s32i a15, a2, 44" >>, ?assertStream(xtensa, Dump, Stream). @@ -202,7 +202,7 @@ move_to_vm_register_yreg_test() -> Stream = ?BACKEND:stream(State1), Dump = << " 0: 2aa0e2 movi a14, 42\n" - " 3: 0522f2 l32i a15, a2, 20\n" + " 3: 0a22f2 l32i a15, a2, 40\n" " 6: 026fe2 s32i a14, a15, 8" >>, ?assertStream(xtensa, Dump, Stream). @@ -213,7 +213,7 @@ and_test() -> {State2, RegA} = ?BACKEND:and_(State1, {free, RegA}, 16#3F), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 3fa0e2 movi a14, 63\n" " 6: 10ffe0 and a15, a15, a14" >>, @@ -225,7 +225,7 @@ or_test() -> {State2, RegA} = ?BACKEND:or_(State1, {free, RegA}, 16#0F), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 0fa0e2 movi a14, 15\n" " 6: 20ffe0 or a15, a15, a14" >>, @@ -237,7 +237,7 @@ add_test() -> {State2, RegA} = ?BACKEND:add(State1, {free, RegA}, 4), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 04cff2 addi a15, a15, 4" >>, ?assertStream(xtensa, Dump, Stream). @@ -248,7 +248,7 @@ sub_test() -> {State2, RegA} = ?BACKEND:sub(State1, {free, RegA}, 4), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: fccff2 addi a15, a15, -4" >>, ?assertStream(xtensa, Dump, Stream). @@ -296,7 +296,7 @@ shift_right_test() -> {State2, RegA} = ?BACKEND:shift_right(State1, {free, RegA}, 2), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 41f2f0 srli a15, a15, 2" >>, ?assertStream(xtensa, Dump, Stream). @@ -307,7 +307,7 @@ shift_left_test() -> {State2, RegA} = ?BACKEND:shift_left(State1, {free, RegA}, 2), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 11ffe0 slli a15, a15, 2" >>, ?assertStream(xtensa, Dump, Stream). @@ -323,9 +323,9 @@ register_allocation_test() -> ?assertNotEqual(Reg1, Reg3), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" - " 6: 0822d2 l32i a13, a2, 32" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" + " 6: 0d22d2 l32i a13, a2, 52" >>, ?assertStream(xtensa, Dump, Stream). @@ -394,9 +394,9 @@ move_to_cp_test() -> State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 0522e2 l32i a14, a2, 20\n" + " 0: 0a22e2 l32i a14, a2, 40\n" " 3: 002ef2 l32i a15, a14, 0\n" - " 6: 1762f2 s32i a15, a2, 92" + " 6: 1c62f2 s32i a15, a2, 112" >>, ?assertStream(xtensa, Dump, Stream). @@ -660,10 +660,10 @@ return_if_not_equal_to_ctx_test() -> State2 = ?BACKEND:return_if_not_equal_to_ctx(State1, {free, Reg}), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 041f27 beq a15, a2, 0xb\n" " 6: 0f2d mov.n a2, a15\n" - " 8: 000090 retw" + " 8: 000090 excw" >>, ?assertStream(xtensa, Dump, Stream). @@ -745,13 +745,13 @@ cond_jump_to_label_test() -> " 1: ff .byte 0xff\n" " 2: ff .byte 0xff\n" " 3: ff .byte 0xff\n" - " 4: 00c136 entry a1, 96\n" + " 4: 00c136 excw\n" " 7: fffe51 l32r a5, 0x0 (0xffffffff)\n" " a: 032382 l32i a8, a3, 12\n" " d: 808850 add a8, a8, a5\n" " 10: 0008a0 jx a8\n" " 13: ff .byte 0xff\n" - " 14: 0622f2 l32i a15, a2, 24\n" + " 14: 0b22f2 l32i a15, a2, 44\n" " 17: 002f96 bltz a15, 0x1d\n" " 1a: 0005c6 j 0x35\n" " 1d: 000506 j 0x35\n" @@ -775,7 +775,7 @@ jump_to_continuation_test() -> State2 = ?BACKEND:jump_to_continuation(State1, {free, Reg}), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 0323e2 l32i a14, a3, 12\n" " 6: 80eef0 add a14, a14, a15\n" " 9: 03cee2 addi a14, a14, 3\n" @@ -797,7 +797,7 @@ if_block_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 002f16 beqz a15, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -819,7 +819,7 @@ if_else_block_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 002f16 beqz a15, 0x9\n" " 6: 000146 j 0xf\n" " 9: 0041f0 break 1, 15\n" @@ -850,8 +850,8 @@ if_block_and_test() -> ), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" " 6: 002f16 beqz a15, 0xc\n" " 9: 000206 j 0x15\n" " c: 002e16 beqz a14, 0x12\n" @@ -871,7 +871,7 @@ if_block_cond_lt_0_bare_test() -> State2 = ?BACKEND:if_block(State1, {Reg, '<', 0}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 002f96 bltz a15, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -885,7 +885,7 @@ if_block_cond_lt_b4const_test() -> State2 = ?BACKEND:if_block(State1, {Reg, '<', 1}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 021fa6 blti a15, 1, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -899,7 +899,7 @@ if_block_cond_lt_uint8_test() -> State2 = ?BACKEND:if_block(State1, {Reg, '<', 100}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 64a0e2 movi a14, 100\n" " 6: 022fe7 blt a15, a14, 0xc\n" " 9: 000086 j 0xf\n" @@ -914,7 +914,7 @@ if_block_cond_lt_large_test() -> State2 = ?BACKEND:if_block(State1, {Reg, '<', 1000}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: e8a3e2 movi a14, 0x3e8\n" " 6: 022fe7 blt a15, a14, 0xc\n" " 9: 000086 j 0xf\n" @@ -929,7 +929,7 @@ if_block_cond_uint8_lt_reg_test() -> State2 = ?BACKEND:if_block(State1, {42, '<', Reg}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 2aa0e2 movi a14, 42\n" " 6: 022ef7 blt a14, a15, 0xc\n" " 9: 000086 j 0xf\n" @@ -944,7 +944,7 @@ if_block_cond_large_lt_reg_test() -> State2 = ?BACKEND:if_block(State1, {1000, '<', Reg}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: e8a3e2 movi a14, 0x3e8\n" " 6: 022ef7 blt a14, a15, 0xc\n" " 9: 000086 j 0xf\n" @@ -960,8 +960,8 @@ if_block_cond_reg_lt_reg_test() -> State3 = ?BACKEND:if_block(State2, {Reg1, '<', Reg2}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" " 6: 022fe7 blt a15, a14, 0xc\n" " 9: 000086 j 0xf\n" " c: 0041f0 break 1, 15" @@ -979,7 +979,7 @@ if_block_cond_free_eq_0_test() -> State2 = ?BACKEND:if_block(State1, {{free, Reg}, '==', 0}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 002f16 beqz a15, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -994,8 +994,8 @@ if_block_cond_eq_reg_test() -> State3 = ?BACKEND:if_block(State2, {Reg1, '==', Reg2}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" " 6: 021fe7 beq a15, a14, 0xc\n" " 9: 000086 j 0xf\n" " c: 0041f0 break 1, 15" @@ -1012,8 +1012,8 @@ if_block_cond_free_eq_reg_test() -> ), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" " 6: 021fe7 beq a15, a14, 0xc\n" " 9: 000086 j 0xf\n" " c: 0041f0 break 1, 15" @@ -1029,7 +1029,7 @@ if_block_cond_int_eq_zero_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 002f16 beqz a15, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -1045,7 +1045,7 @@ if_block_cond_int_eq_val_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 2aa0e2 movi a14, 42\n" " 6: 021fe7 beq a15, a14, 0xc\n" " 9: 000086 j 0xf\n" @@ -1060,7 +1060,7 @@ if_block_cond_eq_b4const_test() -> State2 = ?BACKEND:if_block(State1, {Reg, '==', 1}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 021f26 beqi a15, 1, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -1074,7 +1074,7 @@ if_block_cond_eq_uint8_test() -> State2 = ?BACKEND:if_block(State1, {Reg, '==', 42}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 2aa0e2 movi a14, 42\n" " 6: 021fe7 beq a15, a14, 0xc\n" " 9: 000086 j 0xf\n" @@ -1094,8 +1094,8 @@ if_block_cond_free_eq_free_test() -> ), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" " 6: 021fe7 beq a15, a14, 0xc\n" " 9: 000086 j 0xf\n" " c: 0041f0 break 1, 15" @@ -1109,7 +1109,7 @@ if_block_cond_eq_large_test() -> State2 = ?BACKEND:if_block(State1, {Reg, '==', 1000}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: e8a3e2 movi a14, 0x3e8\n" " 6: 021fe7 beq a15, a14, 0xc\n" " 9: 000086 j 0xf\n" @@ -1128,7 +1128,7 @@ if_block_cond_ne_zero_test() -> State2 = ?BACKEND:if_block(State1, {Reg, '!=', 0}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 002f56 bnez a15, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -1142,7 +1142,7 @@ if_block_cond_free_ne_zero_test() -> State2 = ?BACKEND:if_block(State1, {{free, Reg}, '!=', 0}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 002f56 bnez a15, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -1156,7 +1156,7 @@ if_block_cond_ne_b4const_test() -> State2 = ?BACKEND:if_block(State1, {Reg, '!=', 1}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 021f66 bnei a15, 1, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -1170,7 +1170,7 @@ if_block_cond_ne_uint8_test() -> State2 = ?BACKEND:if_block(State1, {Reg, '!=', 42}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 2aa0e2 movi a14, 42\n" " 6: 029fe7 bne a15, a14, 0xc\n" " 9: 000086 j 0xf\n" @@ -1186,8 +1186,8 @@ if_block_cond_ne_reg_test() -> State3 = ?BACKEND:if_block(State2, {Reg1, '!=', Reg2}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" " 6: 029fe7 bne a15, a14, 0xc\n" " 9: 000086 j 0xf\n" " c: 0041f0 break 1, 15" @@ -1203,7 +1203,7 @@ if_block_cond_int_ne_val_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 2aa0e2 movi a14, 42\n" " 6: 029fe7 bne a15, a14, 0xc\n" " 9: 000086 j 0xf\n" @@ -1218,7 +1218,7 @@ if_block_cond_ne_large_test() -> State2 = ?BACKEND:if_block(State1, {Reg, '!=', 1000}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: e8a3e2 movi a14, 0x3e8\n" " 6: 029fe7 bne a15, a14, 0xc\n" " 9: 000086 j 0xf\n" @@ -1239,7 +1239,7 @@ if_block_cond_bool_eq_false_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 002f16 beqz a15, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -1255,7 +1255,7 @@ if_block_cond_bool_ne_false_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 002f56 bnez a15, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -1275,7 +1275,7 @@ if_block_cond_and_ne_zero_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 0fa0e2 movi a14, 15\n" " 6: 10efe0 and a14, a15, a14\n" " 9: 002e56 bnez a14, 0xf\n" @@ -1293,7 +1293,7 @@ if_block_cond_and_nibble_ne_f_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: ffafe2 movi a14, -1\n" " 6: 30eef0 xor a14, a14, a15\n" " 9: 01ee40 slli a14, a14, 28\n" @@ -1312,7 +1312,7 @@ if_block_cond_free_and_nibble_ne_f_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 60f0f0 neg a15, a15\n" " 6: ffcff2 addi a15, a15, -1\n" " 9: 01ff40 slli a15, a15, 28\n" @@ -1332,8 +1332,8 @@ if_block_cond_and_ne_reg_test() -> ), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" " 6: 0fdd mov.n a13, a15\n" " 8: ffa0c2 movi a12, 255\n" " b: 10ddc0 and a13, a13, a12\n" @@ -1352,7 +1352,7 @@ if_block_cond_and_ne_imm_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 0fed mov.n a14, a15\n" " 5: ffa0d2 movi a13, 255\n" " 8: 10eed0 and a14, a14, a13\n" @@ -1373,8 +1373,8 @@ if_block_cond_free_and_ne_reg_test() -> ), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" " 6: ffa0d2 movi a13, 255\n" " 9: 10ffd0 and a15, a15, a13\n" " c: 029fe7 bne a15, a14, 0x12\n" @@ -1392,7 +1392,7 @@ if_block_cond_free_and_ne_imm_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: ffa0e2 movi a14, 255\n" " 6: 10ffe0 and a15, a15, a14\n" " 9: 2aa0e2 movi a14, 42\n" @@ -1511,16 +1511,16 @@ call_primitive_no_avail_test() -> {State11, _} = ?BACKEND:call_primitive(State10, 0, [ctx, jit_state]), Stream = ?BACKEND:stream(State11), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" - " 6: 0822d2 l32i a13, a2, 32\n" - " 9: 0922c2 l32i a12, a2, 36\n" - " c: 0a22b2 l32i a11, a2, 40\n" - " f: 0b22a2 l32i a10, a2, 44\n" - " 12: 0c2292 l32i a9, a2, 48\n" - " 15: 0d2272 l32i a7, a2, 52\n" - " 18: 0e2262 l32i a6, a2, 56\n" - " 1b: 0f2252 l32i a5, a2, 60\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" + " 6: 0d22d2 l32i a13, a2, 52\n" + " 9: 0e22c2 l32i a12, a2, 56\n" + " c: 0f22b2 l32i a11, a2, 60\n" + " f: 1022a2 l32i a10, a2, 64\n" + " 12: 112292 l32i a9, a2, 68\n" + " 15: 122272 l32i a7, a2, 72\n" + " 18: 132262 l32i a6, a2, 76\n" + " 1b: 142252 l32i a5, a2, 80\n" " 1e: 0c61f2 s32i a15, a1, 48\n" " 21: 0d61e2 s32i a14, a1, 52\n" " 24: 0e61d2 s32i a13, a1, 56\n" @@ -1531,7 +1531,7 @@ call_primitive_no_avail_test() -> " 33: 0024f2 l32i a15, a4, 0\n" " 36: 02ad mov.n a10, a2\n" " 38: 03bd mov.n a11, a3\n" - " 3a: 000fe0 callx8 a15\n" + " 3a: 000fe0 excw\n" " 3d: 0a8d mov.n a8, a10\n" " 3f: 0c21f2 l32i a15, a1, 48\n" " 42: 0d21e2 l32i a14, a1, 52\n" @@ -1622,7 +1622,7 @@ if_block_cond_free_lt_b4const_test() -> State2 = ?BACKEND:if_block(State1, {{free, Reg}, '<', 1}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 021fa6 blti a15, 1, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -1635,7 +1635,7 @@ if_block_cond_free_lt_uint8_test() -> State2 = ?BACKEND:if_block(State1, {{free, Reg}, '<', 42}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 2aa0e2 movi a14, 42\n" " 6: 022fe7 blt a15, a14, 0xc\n" " 9: 000086 j 0xf\n" @@ -1649,7 +1649,7 @@ if_block_cond_free_lt_large_test() -> State2 = ?BACKEND:if_block(State1, {{free, Reg}, '<', 1000}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: e8a3e2 movi a14, 0x3e8\n" " 6: 022fe7 blt a15, a14, 0xc\n" " 9: 000086 j 0xf\n" @@ -1663,7 +1663,7 @@ if_block_cond_free_uint8_lt_test() -> State2 = ?BACKEND:if_block(State1, {42, '<', {free, Reg}}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 2aa0e2 movi a14, 42\n" " 6: 022ef7 blt a14, a15, 0xc\n" " 9: 000086 j 0xf\n" @@ -1679,7 +1679,7 @@ if_block_cond_free_large_lt_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: e8a3e2 movi a14, 0x3e8\n" " 6: 022ef7 blt a14, a15, 0xc\n" " 9: 000086 j 0xf\n" @@ -1696,8 +1696,8 @@ if_block_cond_free_reg_lt_reg_test() -> ), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" " 6: 022fe7 blt a15, a14, 0xc\n" " 9: 000086 j 0xf\n" " c: 0041f0 break 1, 15" @@ -1714,7 +1714,7 @@ if_block_cond_free_ne_b4const_test() -> State2 = ?BACKEND:if_block(State1, {{free, Reg}, '!=', 1}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 021f66 bnei a15, 1, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -1727,7 +1727,7 @@ if_block_cond_free_ne_uint8_test() -> State2 = ?BACKEND:if_block(State1, {{free, Reg}, '!=', 42}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 2aa0e2 movi a14, 42\n" " 6: 029fe7 bne a15, a14, 0xc\n" " 9: 000086 j 0xf\n" @@ -1744,8 +1744,8 @@ if_block_cond_free_ne_reg_test() -> ), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" " 6: 029fe7 bne a15, a14, 0xc\n" " 9: 000086 j 0xf\n" " c: 0041f0 break 1, 15" @@ -1760,7 +1760,7 @@ if_block_cond_free_ne_large_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: e8a3e2 movi a14, 0x3e8\n" " 6: 029fe7 bne a15, a14, 0xc\n" " 9: 000086 j 0xf\n" @@ -1778,7 +1778,7 @@ if_block_cond_free_eq_b4const_test() -> State2 = ?BACKEND:if_block(State1, {{free, Reg}, '==', 1}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 021f26 beqi a15, 1, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -1791,7 +1791,7 @@ if_block_cond_free_eq_uint8_test() -> State2 = ?BACKEND:if_block(State1, {{free, Reg}, '==', 42}, fun(S) -> ?BACKEND:debugger(S) end), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 2aa0e2 movi a14, 42\n" " 6: 021fe7 beq a15, a14, 0xc\n" " 9: 000086 j 0xf\n" @@ -1807,7 +1807,7 @@ if_block_cond_free_eq_large_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: e8a3e2 movi a14, 0x3e8\n" " 6: 021fe7 beq a15, a14, 0xc\n" " 9: 000086 j 0xf\n" @@ -1827,7 +1827,7 @@ if_block_cond_free_bool_eq_false_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 002f16 beqz a15, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -1842,7 +1842,7 @@ if_block_cond_free_bool_ne_false_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 002f56 bnez a15, 0x9\n" " 6: 000086 j 0xc\n" " 9: 0041f0 break 1, 15" @@ -1861,7 +1861,7 @@ if_block_cond_free_and_ne_zero_test() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 0fa0e2 movi a14, 15\n" " 6: 10efe0 and a14, a15, a14\n" " 9: 002e56 bnez a14, 0xf\n" @@ -1880,7 +1880,7 @@ shift_right_free_large_test() -> {State2, RegA} = ?BACKEND:shift_right(State1, {free, RegA}, 16), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 10a0e2 movi a14, 16\n" " 6: 400e00 ssr a14\n" " 9: 91f0f0 srl a15, a15" @@ -1898,7 +1898,7 @@ shift_right_new_reg_small_test() -> ?assertNotEqual(RegA, ResultReg), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 41e3f0 srli a14, a15, 3" >>, ?assertStream(xtensa, Dump, Stream). @@ -1914,7 +1914,7 @@ shift_right_new_reg_large_test() -> ?assertNotEqual(RegA, ResultReg), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 10a0e2 movi a14, 16\n" " 6: 400e00 ssr a14\n" " 9: 91e0f0 srl a14, a15" @@ -1931,7 +1931,7 @@ shift_right_arith_free_test() -> {State2, RegA} = ?BACKEND:shift_right_arith(State1, {free, RegA}, 3), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 21f3f0 srai a15, a15, 3" >>, ?assertStream(xtensa, Dump, Stream). @@ -1947,7 +1947,7 @@ shift_right_arith_new_reg_test() -> ?assertNotEqual(RegA, ResultReg), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 21e3f0 srai a14, a15, 3" >>, ?assertStream(xtensa, Dump, Stream). @@ -1963,9 +1963,9 @@ div_reg_test() -> {State3, RegA} = ?BACKEND:div_reg(State2, RegA, RegB), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" - " 6: d2ffe0 quos a15, a15, a14" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" + " 6: d2ffe0 excw" >>, ?assertStream(xtensa, Dump, Stream). @@ -1980,9 +1980,9 @@ rem_reg_test() -> {State3, RegA} = ?BACKEND:rem_reg(State2, RegA, RegB), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" - " 6: f2ffe0 rems a15, a15, a14" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" + " 6: f2ffe0 excw" >>, ?assertStream(xtensa, Dump, Stream). @@ -1996,8 +1996,8 @@ move_to_vm_register_native_to_xreg_extra_test() -> State2 = ?BACKEND:move_to_vm_register(State1, Reg, {x_reg, extra}), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 1662f2 s32i a15, a2, 88" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 1b62f2 s32i a15, a2, 108" >>, ?assertStream(xtensa, Dump, Stream). @@ -2011,7 +2011,7 @@ move_to_vm_register_native_to_ptr_test() -> State2 = ?BACKEND:move_to_vm_register(State1, Reg, {ptr, a5}), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 0065f2 s32i a15, a5, 0" >>, ?assertStream(xtensa, Dump, Stream). @@ -2026,8 +2026,8 @@ move_to_vm_register_native_to_yreg_test() -> State2 = ?BACKEND:move_to_vm_register(State1, Reg, {y_reg, 3}), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0522e2 l32i a14, a2, 20\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0a22e2 l32i a14, a2, 40\n" " 6: 036ef2 s32i a15, a14, 12" >>, ?assertStream(xtensa, Dump, Stream). @@ -2042,7 +2042,7 @@ move_to_vm_register_large_int_test() -> Stream = ?BACKEND:stream(State1), Dump = << " 0: 00a1f2 movi a15, 0x100\n" - " 3: 0662f2 s32i a15, a2, 24" + " 3: 0b62f2 s32i a15, a2, 44" >>, ?assertStream(xtensa, Dump, Stream). @@ -2055,8 +2055,8 @@ move_to_vm_register_xreg_extra_src_test() -> State1 = ?BACKEND:move_to_vm_register(State0, {x_reg, extra}, {x_reg, 0}), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 1622f2 l32i a15, a2, 88\n" - " 3: 0662f2 s32i a15, a2, 24" + " 0: 1b22f2 l32i a15, a2, 108\n" + " 3: 0b62f2 s32i a15, a2, 44" >>, ?assertStream(xtensa, Dump, Stream). @@ -2069,8 +2069,8 @@ move_to_vm_register_xreg_src_test() -> State1 = ?BACKEND:move_to_vm_register(State0, {x_reg, 0}, {x_reg, 1}), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0762f2 s32i a15, a2, 28" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c62f2 s32i a15, a2, 48" >>, ?assertStream(xtensa, Dump, Stream). @@ -2084,7 +2084,7 @@ move_to_vm_register_ptr_src_test() -> Stream = ?BACKEND:stream(State1), Dump = << " 0: 0025f2 l32i a15, a5, 0\n" - " 3: 0662f2 s32i a15, a2, 24" + " 3: 0b62f2 s32i a15, a2, 44" >>, ?assertStream(xtensa, Dump, Stream). @@ -2097,9 +2097,9 @@ move_to_vm_register_yreg_src_test() -> State1 = ?BACKEND:move_to_vm_register(State0, {y_reg, 0}, {x_reg, 1}), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 0522e2 l32i a14, a2, 20\n" + " 0: 0a22e2 l32i a14, a2, 40\n" " 3: 002ef2 l32i a15, a14, 0\n" - " 6: 0762f2 s32i a15, a2, 28" + " 6: 0c62f2 s32i a15, a2, 48" >>, ?assertStream(xtensa, Dump, Stream). @@ -2227,17 +2227,17 @@ call_primitive_int64_at_a14_test() -> " 5: 02ad mov.n a10, a2\n" " 7: 03bd mov.n a11, a3\n" " 9: 03a0c2 movi a12, 3\n" - " c: 0622d2 l32i a13, a2, 24\n" + " c: 0b22d2 l32i a13, a2, 44\n" " f: 000146 j 0x18\n" " 12: f0ff00 subx8 a15, a15, a0\n" " 15: de .byte 0xde\n" " 16: 9abc beqz.n a10, 0x53\n" " 18: ffffe1 l32r a14, 0x14 (0x9abcdef0)\n" " 1b: 000146 j 0x24\n" - " 1e: 78ff00 lsi f0, a15, 0x1e0\n" + " 1e: 78ff00 excw\n" " 21: 123456 bnez a4, 0x148\n" " 24: fffff1 l32r a15, 0x20 (0x12345678)\n" - " 27: 0009e0 callx8 a9\n" + " 27: 0009e0 excw\n" " 2a: 0a7d mov.n a7, a10" >>, ?assertStream(xtensa, Dump, Stream). @@ -2257,10 +2257,10 @@ move_array_element_x_reg_invalidates_vm_loc_cache_test() -> {S4, _} = ?BACKEND:move_to_native_register(S3, {x_reg, 5}), Stream = ?BACKEND:stream(S4), Dump = << - " 0: 0b22f2 l32i a15, a2, 44\n" - " 3: 0622e2 l32i a14, a2, 24\n" + " 0: 1022f2 l32i a15, a2, 64\n" + " 3: 0b22e2 l32i a14, a2, 44\n" " 6: 002ed2 l32i a13, a14, 0\n" - " 9: 0b62d2 s32i a13, a2, 44" + " 9: 1062d2 s32i a13, a2, 64" >>, ?assertStream(xtensa, Dump, Stream). @@ -2272,7 +2272,7 @@ fixed_dst_x_reg_load_preserves_cache_test() -> State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, a13), {State2, a13} = ?BACKEND:move_to_native_register(State1, {x_reg, 2}), Stream = ?BACKEND:stream(State2), - Dump = <<" 0: 0822d2 l32i a13, a2, 32">>, + Dump = <<" 0: 0d22d2 l32i a13, a2, 52">>, ?assertStream(xtensa, Dump, Stream). %% Verify move_to_native_register/3 for {y_reg, Y} sets the cache so that a @@ -2284,7 +2284,7 @@ fixed_dst_y_reg_load_preserves_cache_test() -> {State2, a13} = ?BACKEND:move_to_native_register(State1, {y_reg, 2}), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0522f2 l32i a15, a2, 20\n" + " 0: 0a22f2 l32i a15, a2, 40\n" " 3: 022fd2 l32i a13, a15, 8" >>, ?assertStream(xtensa, Dump, Stream). @@ -2305,15 +2305,15 @@ call_primitive_last_if_block_preserves_cache_test() -> Stream = ?BACKEND:stream(State4), Dump = << " 0: 01a0f2 movi a15, 1\n" - " 3: 0622e2 l32i a14, a2, 24\n" + " 3: 0b22e2 l32i a14, a2, 44\n" " 6: 002f16 beqz a15, 0xc\n" " 9: 000386 j 0x1b\n" " c: 0024f2 l32i a15, a4, 0\n" " f: 02ad mov.n a10, a2\n" " 11: 03bd mov.n a11, a3\n" - " 13: 000fe0 callx8 a15\n" + " 13: 000fe0 excw\n" " 16: 0a2d mov.n a2, a10\n" - " 18: 000090 retw" + " 18: 000090 excw" >>, ?assertStream(xtensa, Dump, Stream). @@ -2333,7 +2333,7 @@ jump_to_label_if_block_preserves_cache_test() -> Stream = ?BACKEND:stream(State4), Dump = << " 0: 01a0f2 movi a15, 1\n" - " 3: 0622e2 l32i a14, a2, 24\n" + " 3: 0b22e2 l32i a14, a2, 44\n" " 6: 002f16 beqz a15, 0xc\n" " 9: 0005c6 j 0x24\n" " c: ff .byte 0xff\n" @@ -2378,12 +2378,12 @@ ldr_y_reg_invalidates_hidden_temp_cache_test() -> {State6, a13} = ?BACKEND:move_to_native_register(State5, {x_reg, 2}), Stream = ?BACKEND:stream(State6), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" - " 6: 0822d2 l32i a13, a2, 32\n" - " 9: 0522d2 l32i a13, a2, 20\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" + " 6: 0d22d2 l32i a13, a2, 52\n" + " 9: 0a22d2 l32i a13, a2, 40\n" " c: 002de2 l32i a14, a13, 0\n" - " f: 0822d2 l32i a13, a2, 32" + " f: 0d22d2 l32i a13, a2, 52" >>, ?assertStream(xtensa, Dump, Stream). @@ -2399,7 +2399,7 @@ decrement_reductions_invalidates_cache_test() -> {State3, a14} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" + " 0: 0b22f2 l32i a15, a2, 44\n" " 3: 0223e2 l32i a14, a3, 8\n" " 6: ffcee2 addi a14, a14, -1\n" " 9: 0263e2 s32i a14, a3, 8\n" @@ -2415,11 +2415,11 @@ decrement_reductions_invalidates_cache_test() -> " 27: 0224f2 l32i a15, a4, 8\n" " 2a: 02ad mov.n a10, a2\n" " 2c: 03bd mov.n a11, a3\n" - " 2e: 000fe0 callx8 a15\n" + " 2e: 000fe0 excw\n" " 31: 0a2d mov.n a2, a10\n" - " 33: 000090 retw\n" - " 36: 00c136 entry a1, 96\n" - " 39: 0622e2 l32i a14, a2, 24" + " 33: 000090 excw\n" + " 36: 00c136 excw\n" + " 39: 0b22e2 l32i a14, a2, 44" >>, ?assertStream(xtensa, Dump, Stream). @@ -2432,8 +2432,8 @@ cached_move_to_vm_x_reg_reuse_test() -> {State2, a15} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0722f2 l32i a15, a2, 28\n" - " 3: 0662f2 s32i a15, a2, 24" + " 0: 0c22f2 l32i a15, a2, 48\n" + " 3: 0b62f2 s32i a15, a2, 44" >>, ?assertStream(xtensa, Dump, Stream). @@ -2446,9 +2446,9 @@ cached_move_to_vm_y_reg_reuse_test() -> {State2, a15} = ?BACKEND:move_to_native_register(State1, {y_reg, 0}), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0522e2 l32i a14, a2, 20\n" + " 0: 0a22e2 l32i a14, a2, 40\n" " 3: 002ef2 l32i a15, a14, 0\n" - " 6: 0662f2 s32i a15, a2, 24" + " 6: 0b62f2 s32i a15, a2, 44" >>, ?assertStream(xtensa, Dump, Stream). @@ -2464,7 +2464,7 @@ cached_move_to_vm_y_reg_reuse_test() -> %% Each test caches {x_reg, 1} in a14, frees it (so it stays cached but becomes %% available), then runs the op on a15: a14 is picked as the hidden Temp and %% clobbered with the immediate. Re-requesting {x_reg, 1} must emit a fresh -%% l32i a14, a2, 28 rather than reuse the now-clobbered a14. +%% l32i a14, a2, 48 rather than reuse the now-clobbered a14. add_invalidates_hidden_temp_cache_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), @@ -2478,12 +2478,12 @@ add_invalidates_hidden_temp_cache_test() -> {State6, a14} = ?BACKEND:move_to_native_register(State5, {x_reg, 1}), Stream = ?BACKEND:stream(State6), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" - " 6: 0822d2 l32i a13, a2, 32\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" + " 6: 0d22d2 l32i a13, a2, 52\n" " 9: e8a3e2 movi a14, 0x3e8\n" " c: 80ffe0 add a15, a15, a14\n" - " f: 0722e2 l32i a14, a2, 28" + " f: 0c22e2 l32i a14, a2, 48" >>, ?assertStream(xtensa, Dump, Stream). @@ -2499,12 +2499,12 @@ sub_invalidates_hidden_temp_cache_test() -> {State6, a14} = ?BACKEND:move_to_native_register(State5, {x_reg, 1}), Stream = ?BACKEND:stream(State6), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" - " 6: 0822d2 l32i a13, a2, 32\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" + " 6: 0d22d2 l32i a13, a2, 52\n" " 9: e8a3e2 movi a14, 0x3e8\n" " c: c0ffe0 sub a15, a15, a14\n" - " f: 0722e2 l32i a14, a2, 28" + " f: 0c22e2 l32i a14, a2, 48" >>, ?assertStream(xtensa, Dump, Stream). @@ -2520,12 +2520,12 @@ or_invalidates_hidden_temp_cache_test() -> {State6, a14} = ?BACKEND:move_to_native_register(State5, {x_reg, 1}), Stream = ?BACKEND:stream(State6), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" - " 6: 0822d2 l32i a13, a2, 32\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" + " 6: 0d22d2 l32i a13, a2, 52\n" " 9: e8a3e2 movi a14, 0x3e8\n" " c: 20ffe0 or a15, a15, a14\n" - " f: 0722e2 l32i a14, a2, 28" + " f: 0c22e2 l32i a14, a2, 48" >>, ?assertStream(xtensa, Dump, Stream). @@ -2541,11 +2541,11 @@ xor_invalidates_hidden_temp_cache_test() -> {State6, a14} = ?BACKEND:move_to_native_register(State5, {x_reg, 1}), Stream = ?BACKEND:stream(State6), Dump = << - " 0: 0622f2 l32i a15, a2, 24\n" - " 3: 0722e2 l32i a14, a2, 28\n" - " 6: 0822d2 l32i a13, a2, 32\n" + " 0: 0b22f2 l32i a15, a2, 44\n" + " 3: 0c22e2 l32i a14, a2, 48\n" + " 6: 0d22d2 l32i a13, a2, 52\n" " 9: e8a3e2 movi a14, 0x3e8\n" " c: 30ffe0 xor a15, a15, a14\n" - " f: 0722e2 l32i a14, a2, 28" + " f: 0c22e2 l32i a14, a2, 48" >>, ?assertStream(xtensa, Dump, Stream). diff --git a/tests/test-heap.c b/tests/test-heap.c index 2378bce25b..fd31971da0 100644 --- a/tests/test-heap.c +++ b/tests/test-heap.c @@ -108,6 +108,298 @@ void test_gc_ref_count(void) assert(list_is_empty(refc_binaries)); } +void test_generational_gc_basic(void) +{ + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + ctx->heap_growth_strategy = MinimumHeapGrowth; + + // Allocate a tuple and GC to set HWM + enum MemoryGCResult res = memory_ensure_free(ctx, TUPLE_SIZE(2)); + assert(res == MEMORY_GC_OK); + + term tuple1 = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(tuple1, 0, term_from_int(42)); + term_put_tuple_element(tuple1, 1, term_from_int(43)); + + term roots[2]; + roots[0] = tuple1; + roots[1] = term_nil(); + + // First GC sets HWM + res = memory_ensure_free_with_roots(ctx, TUPLE_SIZE(2), 2, roots, MEMORY_CAN_SHRINK); + assert(res == MEMORY_GC_OK); + assert(ctx->heap.high_water_mark != NULL); + tuple1 = roots[0]; + assert(term_get_tuple_arity(tuple1) == 2); + assert(term_get_tuple_element(tuple1, 0) == term_from_int(42)); + assert(term_get_tuple_element(tuple1, 1) == term_from_int(43)); + + // Allocate more data above HWM + term tuple2 = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(tuple2, 0, term_from_int(100)); + term_put_tuple_element(tuple2, 1, tuple1); + roots[1] = tuple2; + + // Second GC should be minor (HWM is set, gc_count < fullsweep_after) + res = memory_ensure_free_with_roots(ctx, TUPLE_SIZE(2), 2, roots, MEMORY_CAN_SHRINK); + assert(res == MEMORY_GC_OK); + tuple1 = roots[0]; + tuple2 = roots[1]; + + // Verify data survived + assert(term_get_tuple_arity(tuple1) == 2); + assert(term_get_tuple_element(tuple1, 0) == term_from_int(42)); + assert(term_get_tuple_element(tuple1, 1) == term_from_int(43)); + assert(term_get_tuple_arity(tuple2) == 2); + assert(term_get_tuple_element(tuple2, 0) == term_from_int(100)); + assert(term_get_tuple_element(tuple2, 1) == tuple1); + + // Verify heap is usable after GC + term tuple3 = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(tuple3, 0, term_from_int(200)); + term_put_tuple_element(tuple3, 1, tuple2); + + context_destroy(ctx); + globalcontext_destroy(glb); +} + +void test_generational_gc_promotion(void) +{ + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + ctx->heap_growth_strategy = MinimumHeapGrowth; + + // Allocate and GC to promote data to mature + enum MemoryGCResult res = memory_ensure_free(ctx, TUPLE_SIZE(2)); + assert(res == MEMORY_GC_OK); + + term tuple1 = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(tuple1, 0, term_from_int(1)); + term_put_tuple_element(tuple1, 1, term_from_int(2)); + + term roots[1]; + roots[0] = tuple1; + + // First GC: sets HWM + res = memory_ensure_free_with_roots(ctx, TUPLE_SIZE(2), 1, roots, MEMORY_CAN_SHRINK); + assert(res == MEMORY_GC_OK); + tuple1 = roots[0]; + + // Allocate young data + term tuple2 = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(tuple2, 0, term_from_int(3)); + term_put_tuple_element(tuple2, 1, tuple1); + roots[0] = tuple2; + + // Second GC: minor GC should promote tuple1 to old heap + res = memory_ensure_free_with_roots(ctx, TUPLE_SIZE(2), 1, roots, MEMORY_NO_SHRINK); + assert(res == MEMORY_GC_OK); + assert(ctx->heap.old_heap_start != NULL); + assert(ctx->heap.old_heap_ptr > ctx->heap.old_heap_start); + + // Verify data is intact + tuple2 = roots[0]; + assert(term_get_tuple_element(tuple2, 0) == term_from_int(3)); + tuple1 = term_get_tuple_element(tuple2, 1); + assert(term_get_tuple_element(tuple1, 0) == term_from_int(1)); + assert(term_get_tuple_element(tuple1, 1) == term_from_int(2)); + + // Verify heap is usable after GC + term tuple3 = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(tuple3, 0, term_from_int(4)); + term_put_tuple_element(tuple3, 1, tuple2); + + context_destroy(ctx); + globalcontext_destroy(glb); +} + +void test_generational_gc_major_on_force_shrink(void) +{ + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + ctx->heap_growth_strategy = MinimumHeapGrowth; + + // Build up an old heap + enum MemoryGCResult res = memory_ensure_free(ctx, TUPLE_SIZE(2)); + assert(res == MEMORY_GC_OK); + + term tuple1 = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(tuple1, 0, term_from_int(1)); + term_put_tuple_element(tuple1, 1, term_from_int(2)); + + term roots[1]; + roots[0] = tuple1; + + // First GC: sets HWM + res = memory_ensure_free_with_roots(ctx, TUPLE_SIZE(2), 1, roots, MEMORY_CAN_SHRINK); + assert(res == MEMORY_GC_OK); + tuple1 = roots[0]; + + // Allocate young, then minor GC to promote + term tuple2 = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(tuple2, 0, term_from_int(3)); + term_put_tuple_element(tuple2, 1, tuple1); + roots[0] = tuple2; + + res = memory_ensure_free_with_roots(ctx, TUPLE_SIZE(2), 1, roots, MEMORY_NO_SHRINK); + assert(res == MEMORY_GC_OK); + assert(ctx->heap.old_heap_start != NULL); + + // Verify heap is usable after minor GC + term tuple3 = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(tuple3, 0, term_from_int(4)); + term_put_tuple_element(tuple3, 1, roots[0]); + + // FORCE_SHRINK triggers major GC which frees old heap + roots[0] = tuple3; + res = memory_ensure_free_with_roots(ctx, 0, 1, roots, MEMORY_FORCE_SHRINK); + assert(res == MEMORY_GC_OK); + assert(ctx->heap.old_heap_start == NULL); + + // Data should still be intact + tuple3 = roots[0]; + tuple2 = term_get_tuple_element(tuple3, 1); + assert(term_get_tuple_element(tuple2, 0) == term_from_int(3)); + tuple1 = term_get_tuple_element(tuple2, 1); + assert(term_get_tuple_element(tuple1, 0) == term_from_int(1)); + assert(term_get_tuple_element(tuple1, 1) == term_from_int(2)); + + context_destroy(ctx); + globalcontext_destroy(glb); +} + +void test_generational_gc_mso(void) +{ + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + ctx->heap_growth_strategy = MinimumHeapGrowth; + + struct ListHead *refc_binaries = synclist_nolock(&glb->refc_binaries); + assert(list_is_empty(refc_binaries)); + + // Allocate a refc binary and GC to set HWM + enum MemoryGCResult res = memory_ensure_free(ctx, TERM_BOXED_REFC_BINARY_SIZE + TUPLE_SIZE(1)); + assert(res == MEMORY_GC_OK); + + term refc = term_alloc_refc_binary(42, false, &ctx->heap, glb); + struct RefcBinary *refc_ptr = term_refc_binary_ptr(refc); + assert(refc_ptr->ref_count == 1); + + term tuple1 = term_alloc_tuple(1, &ctx->heap); + term_put_tuple_element(tuple1, 0, refc); + + term roots[2]; + roots[0] = tuple1; + roots[1] = term_nil(); + + // First GC: sets HWM, tuple1+refc are below HWM after this + res = memory_ensure_free_with_roots(ctx, TERM_BOXED_REFC_BINARY_SIZE + TUPLE_SIZE(2), 2, roots, MEMORY_CAN_SHRINK); + assert(res == MEMORY_GC_OK); + assert(refc_ptr->ref_count == 1); + tuple1 = roots[0]; + + // Allocate a second refc binary (young) and a tuple referencing both + term refc2 = term_alloc_refc_binary(43, false, &ctx->heap, glb); + struct RefcBinary *refc2_ptr = term_refc_binary_ptr(refc2); + + term tuple2 = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(tuple2, 0, refc2); + term_put_tuple_element(tuple2, 1, tuple1); + roots[0] = tuple2; + roots[1] = term_nil(); + + // Minor GC: refc (via tuple1) should be promoted to old heap, refc2 stays young + res = memory_ensure_free_with_roots(ctx, TERM_BOXED_REFC_BINARY_SIZE + TUPLE_SIZE(2), 2, roots, MEMORY_NO_SHRINK); + assert(res == MEMORY_GC_OK); + assert(refc_ptr->ref_count == 1); + assert(refc2_ptr->ref_count == 1); + assert(ctx->heap.old_heap_start != NULL); + + // Drop refc2: keep only tuple1 (from old heap) via a new young tuple + tuple2 = roots[0]; + tuple1 = term_get_tuple_element(tuple2, 1); + + // Verify heap is usable after minor GC + term tuple3 = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(tuple3, 0, tuple1); + term_put_tuple_element(tuple3, 1, term_from_int(99)); + + roots[0] = tuple3; + roots[1] = term_nil(); + res = memory_ensure_free_with_roots(ctx, TUPLE_SIZE(1), 2, roots, MEMORY_NO_SHRINK); + assert(res == MEMORY_GC_OK); + + // refc should still be alive in old heap + assert(refc_ptr->ref_count == 1); + + // Verify heap is usable + tuple3 = roots[0]; + term tuple4 = term_alloc_tuple(1, &ctx->heap); + term_put_tuple_element(tuple4, 0, tuple3); + + // Major GC: drop everything + roots[0] = term_nil(); + res = memory_ensure_free_with_roots(ctx, 0, 1, roots, MEMORY_FORCE_SHRINK); + assert(res == MEMORY_GC_OK); + + refc_binaries = synclist_nolock(&glb->refc_binaries); + assert(list_is_empty(refc_binaries)); + + context_destroy(ctx); + globalcontext_destroy(glb); +} + +void test_fullsweep_after_zero(void) +{ + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + ctx->heap_growth_strategy = MinimumHeapGrowth; + ctx->fullsweep_after = 0; + + // Allocate and GC + enum MemoryGCResult res = memory_ensure_free(ctx, TUPLE_SIZE(2)); + assert(res == MEMORY_GC_OK); + + term tuple1 = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(tuple1, 0, term_from_int(42)); + term_put_tuple_element(tuple1, 1, term_from_int(43)); + + term roots[1]; + roots[0] = tuple1; + + // With fullsweep_after=0, GC should always be full, never creating old heap + res = memory_ensure_free_with_roots(ctx, TUPLE_SIZE(2), 1, roots, MEMORY_CAN_SHRINK); + assert(res == MEMORY_GC_OK); + assert(ctx->heap.old_heap_start == NULL); + + tuple1 = roots[0]; + term tuple2 = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(tuple2, 0, term_from_int(100)); + term_put_tuple_element(tuple2, 1, tuple1); + roots[0] = tuple2; + + // Second GC: still full sweep + res = memory_ensure_free_with_roots(ctx, TUPLE_SIZE(2), 1, roots, MEMORY_CAN_SHRINK); + assert(res == MEMORY_GC_OK); + assert(ctx->heap.old_heap_start == NULL); + + // Verify data intact + tuple2 = roots[0]; + assert(term_get_tuple_element(tuple2, 0) == term_from_int(100)); + tuple1 = term_get_tuple_element(tuple2, 1); + assert(term_get_tuple_element(tuple1, 0) == term_from_int(42)); + assert(term_get_tuple_element(tuple1, 1) == term_from_int(43)); + + // Verify heap is usable after GC + term tuple3 = term_alloc_tuple(2, &ctx->heap); + term_put_tuple_element(tuple3, 0, term_from_int(200)); + term_put_tuple_element(tuple3, 1, tuple2); + + context_destroy(ctx); + globalcontext_destroy(glb); +} + int main(int argc, char **argv) { UNUSED(argc); @@ -115,6 +407,11 @@ int main(int argc, char **argv) test_memory_ensure_free(); test_gc_ref_count(); + test_generational_gc_basic(); + test_generational_gc_promotion(); + test_generational_gc_major_on_force_shrink(); + test_generational_gc_mso(); + test_fullsweep_after_zero(); return EXIT_SUCCESS; } diff --git a/tests/test.c b/tests/test.c index 2aae87a981..5776bd341d 100644 --- a/tests/test.c +++ b/tests/test.c @@ -596,6 +596,7 @@ struct Test tests[] = { TEST_CASE_EXPECTED(link_throw, 1), TEST_CASE_EXPECTED(unlink_error, 1), TEST_CASE(trap_exit_flag), + TEST_CASE(test_process_flag_fullsweep_after), TEST_CASE(test_exit1), TEST_CASE(test_exit2), TEST_CASE_COND(test_stacktrace, 0, SKIP_STACKTRACES),