Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 171 additions & 0 deletions executor/programs/asm/test_compressed.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
.attribute 5, "rv64i2p1_m2p0_c2p0"
.option rvc
.globl main
# Self-checking RV64C test. Each check computes a difference that is zero when the
# compressed instruction behaved correctly, and ORs it into the error accumulator
# s0 (x8). The program exits with a0 = s0, so a0 == 0 iff every check passed.
#
# Compressed register-register ops (c.and/c.or/c.xor/c.sub/c.add/c.mv) only address
# x8..x15, so working values live in s0,s1,a0..a5.
main:
li s0, 0 # error accumulator

# --- C.LI, C.ADDI, C.MV, C.SUB ---
c.li a0, 10
c.addi a0, 5 # a0 = 15
c.li a1, 15
c.mv a2, a0
c.sub a2, a1 # a2 = 0
c.or s0, a2

# --- C.ADD ---
c.li a0, 7
c.li a1, 3
c.add a0, a1 # a0 = 10
c.li a1, 10
c.mv a2, a0
c.sub a2, a1
c.or s0, a2

# --- C.AND / C.OR / C.XOR ---
c.li a0, 12
c.li a1, 10
c.mv a2, a0
c.and a2, a1 # 12 & 10 = 8
c.li a3, 8
c.mv a4, a2
c.sub a4, a3
c.or s0, a4

c.mv a2, a0
c.or a2, a1 # 12 | 10 = 14
c.li a3, 14
c.mv a4, a2
c.sub a4, a3
c.or s0, a4

c.mv a2, a0
c.xor a2, a1 # 12 ^ 10 = 6
c.li a3, 6
c.mv a4, a2
c.sub a4, a3
c.or s0, a4

# --- C.ANDI / C.SLLI / C.SRLI / C.SRAI ---
c.li a0, 15
c.andi a0, 10 # 15 & 10 = 10
c.li a1, 10
c.mv a2, a0
c.sub a2, a1
c.or s0, a2

c.li a0, 1
c.slli a0, 4 # 1 << 4 = 16
c.li a1, 16
c.mv a2, a0
c.sub a2, a1
c.or s0, a2

c.li a0, 28
c.srli a0, 2 # 28 >> 2 = 7
c.li a1, 7
c.mv a2, a0
c.sub a2, a1
c.or s0, a2

li a0, -16
c.srai a0, 2 # -16 >> 2 = -4 (arithmetic)
li a1, -4
c.mv a2, a0
c.sub a2, a1
c.or s0, a2

# --- C.LUI ---
c.lui a0, 1 # a0 = 0x1000
li a1, 0x1000
c.mv a2, a0
c.sub a2, a1
c.or s0, a2

# --- C.SWSP / C.LWSP and C.SDSP / C.LDSP ---
li sp, 0x2000
li a0, 1234
c.swsp a0, 0(sp)
c.lwsp a1, 0(sp) # a1 = 1234
c.mv a2, a1
li a3, 1234
c.sub a2, a3
c.or s0, a2

li a0, 0x123456789
c.sdsp a0, 8(sp)
c.ldsp a1, 8(sp) # full 64-bit round trip
c.mv a2, a1
li a3, 0x123456789
c.sub a2, a3
c.or s0, a2

# --- C.SW / C.LW and C.SD / C.LD (base in x8..x15) ---
li a3, 0x3000
c.li a0, 30
c.sw a0, 0(a3)
c.lw a1, 0(a3) # a1 = 30
c.mv a2, a1
c.li a4, 30
c.sub a2, a4
c.or s0, a2

li a0, 0x5678abcd
c.sd a0, 8(a3)
c.ld a1, 8(a3) # 64-bit round trip
c.mv a2, a1
li a4, 0x5678abcd
c.sub a2, a4
c.or s0, a2

# --- C.ADDI4SPN / C.ADDI16SP ---
li sp, 0x4000
c.addi4spn a0, sp, 8 # a0 = sp + 8 = 0x4008
li a1, 0x4008
c.mv a2, a0
c.sub a2, a1
c.or s0, a2

c.addi16sp sp, 16 # sp = 0x4010
li a1, 0x4010
c.mv a2, sp
c.sub a2, a1
c.or s0, a2

# --- branches: C.BEQZ / C.BNEZ / C.J (with a straddling region) ---
c.li a0, 0
c.beqz a0, beqz_ok # taken
c.li s1, 1
c.or s0, s1 # only reached on failure
beqz_ok:
c.li a0, 5
c.bnez a0, bnez_ok # taken
c.li s1, 1
c.or s0, s1
bnez_ok:
c.j after_j
c.li s1, 1
c.or s0, s1
after_j:

# --- call/return: jal (4-byte) + C.JR (return) ---
c.li a0, 0
jal ra, func # func sets a0 = 1
c.li a1, 1
c.mv a2, a0
c.sub a2, a1
c.or s0, a2

# --- exit: a0 = s0 (0 on success) ---
c.mv a0, s0
li a7, 93
ecall

func:
c.li a0, 1
c.jr ra
15 changes: 12 additions & 3 deletions executor/src/elf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,9 @@ impl ProgramHeader {
pub struct Segment {
/// Base virtual address for this segment
pub base_addr: u64,
/// The 32-bit instruction words in this segment
/// The raw 4-byte little-endian words of this segment. For executable
/// segments these may encode a mix of 2-byte (compressed) and 4-byte
/// instructions when reinterpreted as a halfword stream.
pub values: Vec<u32>,
/// Whether this segment is executable (has PF_X flag)
pub is_executable: bool,
Expand All @@ -230,6 +232,11 @@ pub struct Elf {

pub(crate) const WORD_SIZE: u64 = 4;

/// Minimum alignment for instruction addresses (entry point, executable segment
/// base). The RV64C "C" extension allows instructions on 2-byte boundaries; the
/// base ISA only ever produces 4-byte-aligned addresses, so 2 is the safe floor.
const INSTRUCTION_ALIGN: u64 = 2;

#[derive(Debug, thiserror::Error)]
pub enum ElfError {
#[error("Not a 64-bit ELF")]
Expand Down Expand Up @@ -274,7 +281,9 @@ impl Elf {
return Err(ElfError::NotExecutable);
}
let entry_point: u64 = elf_program.ehdr.e_entry;
if !entry_point.is_multiple_of(WORD_SIZE) {
// Instructions only need 2-byte alignment with the "C" (compressed)
// extension; without it everything is 4-byte aligned anyway.
if !entry_point.is_multiple_of(INSTRUCTION_ALIGN) {
return Err(ElfError::InvalidEntryPoint);
}
let phdrs = elf_program.phdrs;
Expand All @@ -287,7 +296,7 @@ impl Elf {
.iter()
.filter(|program_header| program_header.p_type == PT_LOAD)
{
if !program_header.p_vaddr.is_multiple_of(WORD_SIZE) {
if !program_header.p_vaddr.is_multiple_of(INSTRUCTION_ALIGN) {
return Err(ElfError::UnalignedVAddr);
}
let mut values = Vec::new();
Expand Down
2 changes: 1 addition & 1 deletion executor/src/flamegraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ impl FlamegraphGenerator {
// Update call stack based on instruction type
let instruction = instructions
.get(log.current_pc)
.copied()
.map(|decoded| decoded.instr)
.ok_or(FlamegraphError::InstructionNotFound)?;
self.update_stack(log, instruction);
}
Expand Down
167 changes: 167 additions & 0 deletions executor/src/tests/decompress_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
//! Tests for RV64C (compressed instruction) decompression.

use crate::vm::instruction::decoding::{ArithOp, Instruction, InstructionError, LoadStoreWidth};
use crate::vm::instruction::decompress::{decompress, instr_len};

#[test]
fn instr_len_distinguishes_compressed_from_full() {
assert_eq!(instr_len(0x0000), 2); // low bits 00
assert_eq!(instr_len(0x4515), 2); // low bits 01 (c.li)
assert_eq!(instr_len(0x8082), 2); // low bits 10 (c.jr ra)
assert_eq!(instr_len(0x0033), 4); // low bits 11 (a base instruction)
}

#[test]
fn all_zero_halfword_is_illegal() {
assert!(matches!(
decompress(0x0000),
Err(InstructionError::IllegalCompressed(0x0000))
));
}

#[test]
fn c_li_expands_to_addi_from_x0() {
// c.li x10, 5
match decompress(0x4515).unwrap() {
Instruction::ArithImm {
dst,
src,
imm,
op: ArithOp::Add,
} => {
assert_eq!((dst, src, imm), (10, 0, 5));
}
other => panic!("expected ArithImm, got {other:?}"),
}
}

#[test]
fn c_li_sign_extends_negative_immediate() {
// c.li x10, -1 (6-bit immediate, all bits set) exercises the sign-extension path.
let enc: u16 = (0b010 << 13) | (1 << 12) | (10 << 7) | (0b11111 << 2) | 0b01;
match decompress(enc).unwrap() {
Instruction::ArithImm {
dst,
src,
imm,
op: ArithOp::Add,
} => {
assert_eq!((dst, src, imm), (10, 0, -1));
}
other => panic!("expected ArithImm, got {other:?}"),
}
}

#[test]
fn c_addi4spn_expands_to_addi_from_sp() {
// c.addi4spn x8, x2, 4
match decompress(0x0040).unwrap() {
Instruction::ArithImm {
dst,
src,
imm,
op: ArithOp::Add,
} => {
assert_eq!((dst, src, imm), (8, 2, 4));
}
other => panic!("expected ArithImm, got {other:?}"),
}
}

#[test]
fn c_mv_and_c_add() {
// c.mv x10, x11 -> add x10, x0, x11
match decompress(0x852E).unwrap() {
Instruction::Arith {
dst,
src1,
src2,
op: ArithOp::Add,
} => assert_eq!((dst, src1, src2), (10, 0, 11)),
other => panic!("expected Arith (c.mv), got {other:?}"),
}
// c.add x10, x11 -> add x10, x10, x11
match decompress(0x952E).unwrap() {
Instruction::Arith {
dst,
src1,
src2,
op: ArithOp::Add,
} => assert_eq!((dst, src1, src2), (10, 10, 11)),
other => panic!("expected Arith (c.add), got {other:?}"),
}
}

#[test]
fn c_jr_and_c_jalr() {
// c.jr ra (0x8082) -> jalr x0, 0(x1)
match decompress(0x8082).unwrap() {
Instruction::JumpAndLinkRegister { base, dst, offset } => {
assert_eq!((base, dst, offset), (1, 0, 0))
}
other => panic!("expected JALR (c.jr), got {other:?}"),
}
// c.jalr ra (0x9082) -> jalr x1, 0(x1)
match decompress(0x9082).unwrap() {
Instruction::JumpAndLinkRegister { base, dst, offset } => {
assert_eq!((base, dst, offset), (1, 1, 0))
}
other => panic!("expected JALR (c.jalr), got {other:?}"),
}
}

#[test]
fn c_ebreak_expands_to_ecall_ebreak() {
assert!(matches!(decompress(0x9002), Ok(Instruction::EcallEbreak)));
}

#[test]
fn c_sub_expands_to_register_sub_not_imm() {
// c.sub x8, x9 -> sub x8, x8, x9 (must be Arith, never ArithImm).
// funct3=100, funct2=11, rd'=x8 (field 0), sub-bits=00 (C.SUB), rs2'=x9 (field 1)
let enc: u16 = (0b100 << 13) | (0b11 << 10) | (1 << 2) | 0b01;
match decompress(enc).unwrap() {
Instruction::Arith {
dst,
src1,
src2,
op: ArithOp::Sub,
} => assert_eq!((dst, src1, src2), (8, 8, 9)),
other => panic!("expected Arith Sub, got {other:?}"),
}
}

#[test]
fn c_lwsp_x0_is_reserved() {
// c.lwsp with rd == 0 is reserved.
let enc: u16 = (0b010 << 13) | 0b10;
assert!(matches!(
decompress(enc),
Err(InstructionError::ReservedCompressed(_))
));
}

#[test]
fn c_lwsp_loads_from_sp() {
// c.lwsp x10, 0(x2)
let enc: u16 = (0b010 << 13) | (10 << 7) | 0b10;
match decompress(enc).unwrap() {
Instruction::Load {
dst,
offset,
base,
width: LoadStoreWidth::Word,
} => assert_eq!((dst, offset, base), (10, 0, 2)),
other => panic!("expected Load (c.lwsp), got {other:?}"),
}
}

#[test]
fn float_compressed_is_excluded() {
// c.fld (Q0 funct3=001) must not decode to an integer instruction.
let enc: u16 = 0b001 << 13;
assert!(matches!(
decompress(enc),
Err(InstructionError::IllegalCompressed(_))
));
}
Loading
Loading