From f1dd722da50b66344ea896e071dc19a953e5e30d Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 21 Dec 2025 14:34:44 +0300 Subject: [PATCH 001/141] init interpeter --- virtual_machine/interpreter.c | 270 ++++++++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 virtual_machine/interpreter.c diff --git a/virtual_machine/interpreter.c b/virtual_machine/interpreter.c new file mode 100644 index 000000000..2cd39aeb1 --- /dev/null +++ b/virtual_machine/interpreter.c @@ -0,0 +1,270 @@ +#include +#include +#include +#include + +typedef enum { + OP_BINOP_ADD = 0x01, + OP_BINOP_SUB = 0x02, + OP_BINOP_MUL = 0x03, + OP_BINOP_DIV = 0x04, + OP_BINOP_MOD = 0x05, + OP_CONST = 0x10, + OP_END = 0x16, + OP_DROP = 0x18, + OP_DUP = 0x19, + OP_SWAP = 0x1A, + OP_LD = 0x20, + OP_ST = 0x40, + OP_BEGIN = 0x52, + OP_BEGIN_CLOSURE = 0x53, + OP_LINE = 0x5A, + OP_READ = 0x70, + OP_WRITE = 0x71, + OP_STOP = 0xFF, +} opcode_t; + +typedef struct { + const uint8_t *code; + uint8_t *code_buf; // non-const pointer for freeing + int code_size; + int entry_point; + int globals_count; +} bytecode; + +static inline int read_i32(const uint8_t data[], int offset) { + return data[offset] | (data[offset + 1] << 8) | (data[offset + 2] << 16) | + (data[offset + 3] << 24); +} + +#define HEADER_SIZE 12 +#define PUB_ENTRY_SIZE 8 + +static int find_entry_point(const uint8_t *data, int pubs_offset, int num_pubs, + const uint8_t *string_table, const char *name) { + for (int i = 0; i < num_pubs; i++) { + int entry_offset = pubs_offset + i * PUB_ENTRY_SIZE; + int name_offset = read_i32(data, entry_offset); + char *f_name = (char *)(string_table + name_offset); + int address = read_i32(data, entry_offset + 4); + if (strcmp(f_name, name) == 0) { + return address; + } + } + return -1; +} + +bytecode *load_bytecode(const char *filename) { + FILE *f = fopen(filename, "rb"); + if (!f) { + perror("fopen"); + return NULL; + } + + fseek(f, 0, SEEK_END); + long size = ftell(f); + rewind(f); + + uint8_t *data = malloc(size); + + if (!data) { + fclose(f); + return NULL; + } + + if (fread(data, 1, size, f) != size) { + perror("fread"); + fclose(f); + free(data); + return NULL; + } + fclose(f); + + int st_size = read_i32(data, 0); + int globals_count = read_i32(data, 4); + int num_pubs = read_i32(data, 8); + int num_imports = read_i32(data, 12); + int num_ext_fixups = read_i32(data, 16); + + int pubs_offset = HEADER_SIZE; + int st_offset = pubs_offset + num_pubs * PUB_ENTRY_SIZE; + int code_offset = st_offset + st_size; + int code_size = size - code_offset; + + uint8_t *string_table = data + st_offset; + int main_entry_point = + find_entry_point(data, pubs_offset, num_pubs, string_table, "main"); + + bytecode *bc = malloc(sizeof(bytecode)); + bc->code = malloc(code_size); + memcpy(bc->code, data + code_offset, code_size); + bc->code_size = code_size; + bc->entry_point = main_entry_point; + bc->globals_count = globals_count; + + free(data); + return bc; +} + +#define STACK_SIZE 1024 +static int stack[STACK_SIZE]; +static int sp = 0; + +static void push(int val) { + if (sp >= STACK_SIZE) { + fprintf(stderr, "Stack overflow\n"); + exit(1); + } + stack[sp++] = val; +} + +static int pop(void) { + if (sp <= 0) { + fprintf(stderr, "Cannot pop from an empty stack"); + exit(1); + } + return stack[--sp]; +} + +static int peek(void) { + if (sp <= 0) { + fprintf(stderr, "Cannot peek from an empty stack"); + exit(1); + } + return stack[sp - 1]; +} + +void run(bytecode *bc) { + int *globals = malloc(sizeof(int) * bc->globals_count); + int ip = bc->entry_point; + + while (ip < bc->code_size) { + uint8_t opcode = bc->code[ip++]; + int l = opcode & 0xF; + + switch (opcode) { + case OP_CONST: { + int n = read_i32(bc->code, ip); + ip += 4; + push(n); + break; + } + case OP_BINOP_ADD: + case OP_BINOP_SUB: + case OP_BINOP_MUL: + case OP_BINOP_DIV: + case OP_BINOP_MOD: { + int y = pop(); + int x = pop(); + int result; + switch (l) { + case 1: + result = x + y; + break; + case 2: + result = x - y; + break; + case 3: + result = x * y; + break; + case 4: + if (y == 0) { + fprintf(stderr, "Division by zero\n"); + goto end; + } + result = x / y; + break; + case 5: + if (y == 0) { + fprintf(stderr, "Division by zero\n"); + goto end; + } + result = x % y; + break; + } + push(result); + break; + } + case OP_LD: { + int idx = read_i32(bc->code, ip); + ip += 4; + push(globals[idx]); + break; + } + case OP_ST: { + int idx = read_i32(bc->code, ip); + ip += 4; + int val = pop(); + globals[idx] = val; + push(val); + break; + } + case OP_DROP: + pop(); + break; + case OP_DUP: { + int x = peek(); + push(x); + break; + } + case OP_SWAP: { + int y = pop(); + int x = pop(); + push(y); + push(x); + break; + } + case OP_BEGIN: + case OP_BEGIN_CLOSURE: + // TODO: skip for now + ip += 8; + break; + case OP_READ: { + int x; + // TODO: scanf ? + if (scanf("%d", &x) != 1) { + fprintf(stderr, "Failed to read\n"); + goto end; + } + push(x); + break; + } + case OP_WRITE: { + int x = pop(); + printf("%d\n", x); + push(x); + break; + } + case OP_END: + case OP_STOP: + goto end; + case OP_LINE: + ip += 4; + break; + default: + fprintf(stderr, "Not yet supported opcode 0x%02X at ip=%d\n", opcode, ip); + goto end; + } + } + +end: + free(globals); +} + +int main(int argc, char *argv[]) { + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + bytecode *bc = load_bytecode(argv[1]); + if (!bc) { + return 1; + } + + run(bc); + + free(bc->code); + free(bc); + return 0; +} From dfeb137b81ec9dfd6d277a6991d4477ad387d751 Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 22 Dec 2025 16:14:31 +0300 Subject: [PATCH 002/141] add tests --- virtual_machine/dune | 42 +++++++++++++++++++++++++++++++++++++ virtual_machine/test001.bc | Bin 0 -> 89 bytes virtual_machine/test001.t | 2 ++ virtual_machine/test002.bc | Bin 0 -> 104 bytes virtual_machine/test002.t | 2 ++ virtual_machine/test003.bc | Bin 0 -> 118 bytes virtual_machine/test003.t | 4 ++++ virtual_machine/test004.bc | Bin 0 -> 134 bytes virtual_machine/test004.t | 2 ++ virtual_machine/test005.bc | Bin 0 -> 134 bytes virtual_machine/test005.t | 2 ++ 11 files changed, 54 insertions(+) create mode 100644 virtual_machine/dune create mode 100644 virtual_machine/test001.bc create mode 100644 virtual_machine/test001.t create mode 100644 virtual_machine/test002.bc create mode 100644 virtual_machine/test002.t create mode 100644 virtual_machine/test003.bc create mode 100644 virtual_machine/test003.t create mode 100644 virtual_machine/test004.bc create mode 100644 virtual_machine/test004.t create mode 100644 virtual_machine/test005.bc create mode 100644 virtual_machine/test005.t diff --git a/virtual_machine/dune b/virtual_machine/dune new file mode 100644 index 000000000..387a87a9e --- /dev/null +++ b/virtual_machine/dune @@ -0,0 +1,42 @@ +(rule + (target interpreter.exe) + (deps + (:main interpreter.c) + mac-specific-flags.txt) + (mode + (promote (until-clean))) + (action + (run + gcc + %{read-lines:mac-specific-flags.txt} + -g + %{main} + -o + %{target}))) + +(rule + (target mac-specific-flags.txt) + (enabled_if + (= %{system} "linux")) + (action + (write-file %{target} ""))) + +(rule + (target mac-specific-flags.txt) + (enabled_if + (= %{ocaml-config:system} macosx)) + (action + (write-file %{target} "-Wl,-no_pie"))) + +(cram (deps interpreter.exe)) + +(cram (applies_to test001) + (deps test001.bc ../regression/test001.input)) +(cram (applies_to test002) + (deps test002.bc ../regression/test002.input)) +(cram (applies_to test003) + (deps test003.bc ../regression/test003.input)) +(cram (applies_to test004) + (deps test004.bc ../regression/test004.input)) +(cram (applies_to test005) + (deps test005.bc ../regression/test005.input)) \ No newline at end of file diff --git a/virtual_machine/test001.bc b/virtual_machine/test001.bc new file mode 100644 index 0000000000000000000000000000000000000000..1cb023faee1429980f7149f488b1654192614ce9 GIT binary patch literal 89 zcmZQ&U|?VdVn!f_f!xH*Jcb}9D7PRAB;O_M5d~r^ JfS841{{j6-2B-i4 literal 0 HcmV?d00001 diff --git a/virtual_machine/test001.t b/virtual_machine/test001.t new file mode 100644 index 000000000..7be09d4ce --- /dev/null +++ b/virtual_machine/test001.t @@ -0,0 +1,2 @@ + $ ./interpreter.exe test001.bc < ../regression/test001.input + 90 diff --git a/virtual_machine/test002.bc b/virtual_machine/test002.bc new file mode 100644 index 0000000000000000000000000000000000000000..1e07fb1410d9a563c53c08b5f58d7952c7af0de4 GIT binary patch literal 104 zcmZQ&U|?VdVn!f_f!xH*Jcb}9C^rfuT@VFQ-~bYmh++Yf1r8tqi6~YesQ_Yv31$b7 TE{P~MkN^-fD1i8kg<}5!Yf}f& literal 0 HcmV?d00001 diff --git a/virtual_machine/test002.t b/virtual_machine/test002.t new file mode 100644 index 000000000..fe404eb32 --- /dev/null +++ b/virtual_machine/test002.t @@ -0,0 +1,2 @@ + $ ./interpreter.exe test002.bc < ../regression/test002.input + 41 diff --git a/virtual_machine/test003.bc b/virtual_machine/test003.bc new file mode 100644 index 0000000000000000000000000000000000000000..1df9712121e0e91aaf36188f71e56382a63657f4 GIT binary patch literal 118 zcmZQ&U|?VZVn!f_f!xH*Jcb~UCNu literal 0 HcmV?d00001 diff --git a/virtual_machine/test003.t b/virtual_machine/test003.t new file mode 100644 index 000000000..be60f925b --- /dev/null +++ b/virtual_machine/test003.t @@ -0,0 +1,4 @@ + $ ./interpreter.exe test003.bc < ../regression/test003.input + 7 + 3 + 1 diff --git a/virtual_machine/test004.bc b/virtual_machine/test004.bc new file mode 100644 index 0000000000000000000000000000000000000000..c66dd7ce0fff664b37857981220fd5c02bc9c86a GIT binary patch literal 134 zcmZQ&U|?VdVn!f_f!xH*Jcb}9C^rfuT@VFQ-~bYmh++Yf1r8tqi6~YesQ| Date: Tue, 23 Dec 2025 16:21:23 +0300 Subject: [PATCH 003/141] remove unused code buffer --- virtual_machine/interpreter.c | 1 - 1 file changed, 1 deletion(-) diff --git a/virtual_machine/interpreter.c b/virtual_machine/interpreter.c index 2cd39aeb1..15e8ad027 100644 --- a/virtual_machine/interpreter.c +++ b/virtual_machine/interpreter.c @@ -26,7 +26,6 @@ typedef enum { typedef struct { const uint8_t *code; - uint8_t *code_buf; // non-const pointer for freeing int code_size; int entry_point; int globals_count; From 5e8c53932e3ba979c749ef784bdae4f3909db207 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 25 Dec 2025 10:35:07 +0300 Subject: [PATCH 004/141] add calls, stack frames --- virtual_machine/interpreter.c | 182 ++++++++++++++++++++++++++-------- 1 file changed, 139 insertions(+), 43 deletions(-) diff --git a/virtual_machine/interpreter.c b/virtual_machine/interpreter.c index 15e8ad027..6e955ca40 100644 --- a/virtual_machine/interpreter.c +++ b/virtual_machine/interpreter.c @@ -11,17 +11,23 @@ typedef enum { OP_BINOP_MOD = 0x05, OP_CONST = 0x10, OP_END = 0x16, + OP_RET = 0x17, OP_DROP = 0x18, OP_DUP = 0x19, OP_SWAP = 0x1A, OP_LD = 0x20, + OP_LD_LOC = 0x21, + OP_LD_ARG = 0x22, OP_ST = 0x40, + OP_ST_LOC = 0x41, + OP_ST_ARG = 0x42, OP_BEGIN = 0x52, OP_BEGIN_CLOSURE = 0x53, + OP_CALL = 0x56, OP_LINE = 0x5A, OP_READ = 0x70, OP_WRITE = 0x71, - OP_STOP = 0xFF, + OP_HALT = 0xFF, } opcode_t; typedef struct { @@ -106,46 +112,71 @@ bytecode *load_bytecode(const char *filename) { } #define STACK_SIZE 1024 -static int stack[STACK_SIZE]; -static int sp = 0; -static void push(int val) { - if (sp >= STACK_SIZE) { - fprintf(stderr, "Stack overflow\n"); - exit(1); - } - stack[sp++] = val; +static inline void push(int **sp, int val) { + *(*sp)++ = val; } -static int pop(void) { - if (sp <= 0) { - fprintf(stderr, "Cannot pop from an empty stack"); - exit(1); - } - return stack[--sp]; +static inline int pop(int **sp) { + return *--(*sp); } -static int peek(void) { - if (sp <= 0) { - fprintf(stderr, "Cannot peek from an empty stack"); - exit(1); - } - return stack[sp - 1]; +static inline int peek(int **sp) { + return *(*sp - 1); +} + +typedef struct frame { + struct frame *parent; + int return_ip; + int n_args; + int n_locals; + int locals[]; +} frame; + +static frame *current_frame = NULL; +static int return_ip = -1; + +static frame *frame_create(frame *parent, int ret_ip, int n_args, int n_locals) { + int total_locals = n_args + n_locals; + frame *f = malloc(sizeof(frame) + total_locals * sizeof(int)); + f->parent = parent; + f->return_ip = ret_ip; + f->n_args = n_args; + f->n_locals = n_locals; + memset(f->locals, 0, total_locals * sizeof(int)); + return f; +} + +static int *frame_local(frame *f, int idx) { + return &f->locals[idx]; +} + +static int *frame_arg(frame *f, int idx) { + return &f->locals[f->n_locals + idx]; +} + +static void frame_drop(frame *f) { + free(f); } void run(bytecode *bc) { + int stack[STACK_SIZE]; + int *sp = stack; int *globals = malloc(sizeof(int) * bc->globals_count); int ip = bc->entry_point; + int pending_args = 0; while (ip < bc->code_size) { uint8_t opcode = bc->code[ip++]; int l = opcode & 0xF; + printf("ip=%d opcode=0x%02X\n", ip, opcode); + switch (opcode) { case OP_CONST: { int n = read_i32(bc->code, ip); ip += 4; - push(n); + push(&sp, n); break; } case OP_BINOP_ADD: @@ -153,8 +184,8 @@ void run(bytecode *bc) { case OP_BINOP_MUL: case OP_BINOP_DIV: case OP_BINOP_MOD: { - int y = pop(); - int x = pop(); + int y = pop(&sp); + int x = pop(&sp); int result; switch (l) { case 1: @@ -181,43 +212,104 @@ void run(bytecode *bc) { result = x % y; break; } - push(result); + push(&sp, result); break; } case OP_LD: { int idx = read_i32(bc->code, ip); ip += 4; - push(globals[idx]); + push(&sp, globals[idx]); + break; + } + case OP_LD_LOC: { + int idx = read_i32(bc->code, ip); + ip += 4; + push(&sp, *frame_local(current_frame, idx)); + break; + } + case OP_LD_ARG: { + int idx = read_i32(bc->code, ip); + ip += 4; + push(&sp, *frame_arg(current_frame, idx)); break; } case OP_ST: { int idx = read_i32(bc->code, ip); ip += 4; - int val = pop(); + int val = pop(&sp); globals[idx] = val; - push(val); + push(&sp, val); break; } - case OP_DROP: - pop(); + case OP_ST_LOC: { + int idx = read_i32(bc->code, ip); + ip += 4; + int val = pop(&sp); + *frame_local(current_frame, idx) = val; + push(&sp, val); break; - case OP_DUP: { - int x = peek(); - push(x); + } + case OP_ST_ARG: { + int idx = read_i32(bc->code, ip); + ip += 4; + int val = pop(&sp); + *frame_arg(current_frame, idx) = val; + push(&sp, val); break; } + case OP_DROP: + pop(&sp); + break; + case OP_DUP: + push(&sp, peek(&sp)); + break; case OP_SWAP: { - int y = pop(); - int x = pop(); - push(y); - push(x); + int y = pop(&sp); + int x = pop(&sp); + push(&sp, y); + push(&sp, x); + break; + } + case OP_BEGIN: { + int n_args = read_i32(bc->code, ip); + ip += 4; + int n_locals = read_i32(bc->code, ip); + ip += 4; + frame *new_frame = frame_create(current_frame, return_ip, n_args, n_locals); + for (int i = n_args - 1; i >= 0; i--) { + *frame_arg(new_frame, i) = pop(&sp); + } + current_frame = new_frame; break; } - case OP_BEGIN: case OP_BEGIN_CLOSURE: // TODO: skip for now ip += 8; break; + case OP_CALL: { + int addr = read_i32(bc->code, ip); + ip += 4; + // discarding n_args + ip += 4; + return_ip = ip; + ip = addr; + break; + } + case OP_RET: + case OP_END: { + if (current_frame == NULL) { + goto end; + } + int ret_ip = current_frame->return_ip; + frame *parent = current_frame->parent; + frame_drop(current_frame); + current_frame = parent; + if (ret_ip < 0) { + goto end; + } + ip = ret_ip; + break; + } case OP_READ: { int x; // TODO: scanf ? @@ -225,17 +317,16 @@ void run(bytecode *bc) { fprintf(stderr, "Failed to read\n"); goto end; } - push(x); + push(&sp, x); break; } case OP_WRITE: { - int x = pop(); + int x = pop(&sp); printf("%d\n", x); - push(x); + push(&sp, x); break; } - case OP_END: - case OP_STOP: + case OP_HALT: goto end; case OP_LINE: ip += 4; @@ -247,6 +338,11 @@ void run(bytecode *bc) { } end: + while (current_frame) { + frame *parent = current_frame->parent; + frame_drop(current_frame); + current_frame = parent; + } free(globals); } From 241a9c42378a9c3bcf39c4722beddfabf1137acd Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 25 Dec 2025 11:20:34 +0300 Subject: [PATCH 005/141] project structure, build with make --- virtual_machine/Makefile | 23 +++++++ virtual_machine/bytecode.c | 85 +++++++++++++++++++++++++ virtual_machine/bytecode.h | 18 ++++++ virtual_machine/interpreter.c | 116 ++-------------------------------- virtual_machine/opcodes.h | 31 +++++++++ 5 files changed, 161 insertions(+), 112 deletions(-) create mode 100644 virtual_machine/Makefile create mode 100644 virtual_machine/bytecode.c create mode 100644 virtual_machine/bytecode.h create mode 100644 virtual_machine/opcodes.h diff --git a/virtual_machine/Makefile b/virtual_machine/Makefile new file mode 100644 index 000000000..1c871baee --- /dev/null +++ b/virtual_machine/Makefile @@ -0,0 +1,23 @@ +CC = gcc +CFLAGS = -Wall -Wextra -std=c99 -O2 -g + +TARGET = interpreter +SOURCES = interpreter.c bytecode.c +HEADERS = opcodes.h bytecode.h +OBJECTS = $(SOURCES:.c=.o) + +all: $(TARGET) + +$(TARGET): $(OBJECTS) + $(CC) -o $@ $^ + +%.o: %.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $@ $< + +clean: + rm -f $(OBJECTS) $(TARGET) + +.PHONY: all clean distclean + +interpreter.o: interpreter.c bytecode.h opcodes.h +bytecode.o: bytecode.c bytecode.h diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c new file mode 100644 index 000000000..fb3387edc --- /dev/null +++ b/virtual_machine/bytecode.c @@ -0,0 +1,85 @@ +#include "bytecode.h" +#include +#include +#include + +int read_i32(const uint8_t data[], int offset) { + return data[offset] | (data[offset + 1] << 8) | (data[offset + 2] << 16) | + (data[offset + 3] << 24); +} + +#define HEADER_SIZE 12 +#define PUB_ENTRY_SIZE 8 + +static int find_entry_point(const uint8_t *data, int pubs_offset, int num_pubs, + const uint8_t *string_table, const char *name) { + for (int i = 0; i < num_pubs; i++) { + int entry_offset = pubs_offset + i * PUB_ENTRY_SIZE; + int name_offset = read_i32(data, entry_offset); + char *f_name = (char *)(string_table + name_offset); + int address = read_i32(data, entry_offset + 4); + if (strcmp(f_name, name) == 0) { + return address; + } + } + return -1; +} + +bytecode *load_bytecode(const char *filename) { + FILE *f = fopen(filename, "rb"); + if (!f) { + perror("fopen"); + return NULL; + } + + fseek(f, 0, SEEK_END); + long size = ftell(f); + rewind(f); + + uint8_t *data = malloc(size); + + if (!data) { + fclose(f); + return NULL; + } + + if (fread(data, 1, size, f) != size) { + perror("fread"); + fclose(f); + free(data); + return NULL; + } + fclose(f); + + int st_size = read_i32(data, 0); + int globals_count = read_i32(data, 4); + int num_pubs = read_i32(data, 8); + int num_imports = read_i32(data, 12); + int num_ext_fixups = read_i32(data, 16); + + int pubs_offset = HEADER_SIZE; + int st_offset = pubs_offset + num_pubs * PUB_ENTRY_SIZE; + int code_offset = st_offset + st_size; + int code_size = size - code_offset; + + uint8_t *string_table = data + st_offset; + int main_entry_point = + find_entry_point(data, pubs_offset, num_pubs, string_table, "main"); + + bytecode *bc = malloc(sizeof(bytecode)); + bc->code = malloc(code_size); + memcpy((void *)bc->code, data + code_offset, code_size); + bc->code_size = code_size; + bc->entry_point = main_entry_point; + bc->globals_count = globals_count; + + free(data); + return bc; +} + +void free_bytecode(bytecode *bc) { + if (bc) { + free((void *)bc->code); + free(bc); + } +} diff --git a/virtual_machine/bytecode.h b/virtual_machine/bytecode.h new file mode 100644 index 000000000..5a8b6debb --- /dev/null +++ b/virtual_machine/bytecode.h @@ -0,0 +1,18 @@ +#ifndef BYTECODE_H +#define BYTECODE_H + +#include + +typedef struct { + const uint8_t *code; + int code_size; + int entry_point; + int globals_count; +} bytecode; + + +int read_i32(const uint8_t data[], int offset); +bytecode *load_bytecode(const char *filename); +void free_bytecode(bytecode *bc); + +#endif diff --git a/virtual_machine/interpreter.c b/virtual_machine/interpreter.c index 6e955ca40..6438b4c32 100644 --- a/virtual_machine/interpreter.c +++ b/virtual_machine/interpreter.c @@ -1,115 +1,8 @@ -#include #include #include #include - -typedef enum { - OP_BINOP_ADD = 0x01, - OP_BINOP_SUB = 0x02, - OP_BINOP_MUL = 0x03, - OP_BINOP_DIV = 0x04, - OP_BINOP_MOD = 0x05, - OP_CONST = 0x10, - OP_END = 0x16, - OP_RET = 0x17, - OP_DROP = 0x18, - OP_DUP = 0x19, - OP_SWAP = 0x1A, - OP_LD = 0x20, - OP_LD_LOC = 0x21, - OP_LD_ARG = 0x22, - OP_ST = 0x40, - OP_ST_LOC = 0x41, - OP_ST_ARG = 0x42, - OP_BEGIN = 0x52, - OP_BEGIN_CLOSURE = 0x53, - OP_CALL = 0x56, - OP_LINE = 0x5A, - OP_READ = 0x70, - OP_WRITE = 0x71, - OP_HALT = 0xFF, -} opcode_t; - -typedef struct { - const uint8_t *code; - int code_size; - int entry_point; - int globals_count; -} bytecode; - -static inline int read_i32(const uint8_t data[], int offset) { - return data[offset] | (data[offset + 1] << 8) | (data[offset + 2] << 16) | - (data[offset + 3] << 24); -} - -#define HEADER_SIZE 12 -#define PUB_ENTRY_SIZE 8 - -static int find_entry_point(const uint8_t *data, int pubs_offset, int num_pubs, - const uint8_t *string_table, const char *name) { - for (int i = 0; i < num_pubs; i++) { - int entry_offset = pubs_offset + i * PUB_ENTRY_SIZE; - int name_offset = read_i32(data, entry_offset); - char *f_name = (char *)(string_table + name_offset); - int address = read_i32(data, entry_offset + 4); - if (strcmp(f_name, name) == 0) { - return address; - } - } - return -1; -} - -bytecode *load_bytecode(const char *filename) { - FILE *f = fopen(filename, "rb"); - if (!f) { - perror("fopen"); - return NULL; - } - - fseek(f, 0, SEEK_END); - long size = ftell(f); - rewind(f); - - uint8_t *data = malloc(size); - - if (!data) { - fclose(f); - return NULL; - } - - if (fread(data, 1, size, f) != size) { - perror("fread"); - fclose(f); - free(data); - return NULL; - } - fclose(f); - - int st_size = read_i32(data, 0); - int globals_count = read_i32(data, 4); - int num_pubs = read_i32(data, 8); - int num_imports = read_i32(data, 12); - int num_ext_fixups = read_i32(data, 16); - - int pubs_offset = HEADER_SIZE; - int st_offset = pubs_offset + num_pubs * PUB_ENTRY_SIZE; - int code_offset = st_offset + st_size; - int code_size = size - code_offset; - - uint8_t *string_table = data + st_offset; - int main_entry_point = - find_entry_point(data, pubs_offset, num_pubs, string_table, "main"); - - bytecode *bc = malloc(sizeof(bytecode)); - bc->code = malloc(code_size); - memcpy(bc->code, data + code_offset, code_size); - bc->code_size = code_size; - bc->entry_point = main_entry_point; - bc->globals_count = globals_count; - - free(data); - return bc; -} +#include "bytecode.h" +#include "opcodes.h" #define STACK_SIZE 1024 @@ -358,8 +251,7 @@ int main(int argc, char *argv[]) { } run(bc); - - free(bc->code); - free(bc); + + free_bytecode(bc); return 0; } diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h new file mode 100644 index 000000000..88ffb37c0 --- /dev/null +++ b/virtual_machine/opcodes.h @@ -0,0 +1,31 @@ +#ifndef OPCODES_H +#define OPCODES_H + +typedef enum { + OP_BINOP_ADD = 0x01, + OP_BINOP_SUB = 0x02, + OP_BINOP_MUL = 0x03, + OP_BINOP_DIV = 0x04, + OP_BINOP_MOD = 0x05, + OP_CONST = 0x10, + OP_END = 0x16, + OP_RET = 0x17, + OP_DROP = 0x18, + OP_DUP = 0x19, + OP_SWAP = 0x1A, + OP_LD = 0x20, + OP_LD_LOC = 0x21, + OP_LD_ARG = 0x22, + OP_ST = 0x40, + OP_ST_LOC = 0x41, + OP_ST_ARG = 0x42, + OP_BEGIN = 0x52, + OP_BEGIN_CLOSURE = 0x53, + OP_CALL = 0x56, + OP_LINE = 0x5A, + OP_READ = 0x70, + OP_WRITE = 0x71, + OP_HALT = 0xFF, +} opcode_t; + +#endif From 3804ecddad6c45f70f49f13b90ebc8a6a1b7222f Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 25 Dec 2025 11:24:07 +0300 Subject: [PATCH 006/141] test with `regressin_check.sh` --- virtual_machine/Makefile | 2 +- virtual_machine/regression_check.sh | 30 ++++++++++++++++++++++++++++ virtual_machine/test001.bc | Bin 89 -> 0 bytes virtual_machine/test001.t | 2 -- virtual_machine/test002.bc | Bin 104 -> 0 bytes virtual_machine/test002.t | 2 -- virtual_machine/test003.bc | Bin 118 -> 0 bytes virtual_machine/test003.t | 4 ---- virtual_machine/test004.bc | Bin 134 -> 0 bytes virtual_machine/test004.t | 2 -- virtual_machine/test005.bc | Bin 134 -> 0 bytes virtual_machine/test005.t | 2 -- 12 files changed, 31 insertions(+), 13 deletions(-) create mode 100755 virtual_machine/regression_check.sh delete mode 100644 virtual_machine/test001.bc delete mode 100644 virtual_machine/test001.t delete mode 100644 virtual_machine/test002.bc delete mode 100644 virtual_machine/test002.t delete mode 100644 virtual_machine/test003.bc delete mode 100644 virtual_machine/test003.t delete mode 100644 virtual_machine/test004.bc delete mode 100644 virtual_machine/test004.t delete mode 100644 virtual_machine/test005.bc delete mode 100644 virtual_machine/test005.t diff --git a/virtual_machine/Makefile b/virtual_machine/Makefile index 1c871baee..6f7ca3487 100644 --- a/virtual_machine/Makefile +++ b/virtual_machine/Makefile @@ -1,7 +1,7 @@ CC = gcc CFLAGS = -Wall -Wextra -std=c99 -O2 -g -TARGET = interpreter +TARGET = interpreter.exe SOURCES = interpreter.c bytecode.c HEADERS = opcodes.h bytecode.h OBJECTS = $(SOURCES:.c=.o) diff --git a/virtual_machine/regression_check.sh b/virtual_machine/regression_check.sh new file mode 100755 index 000000000..7835cb8a6 --- /dev/null +++ b/virtual_machine/regression_check.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +# credit: ProgramSnail + +make build + +prefix="../regression/" +suffix=".lama" + +compiler=../_build/default/src/Driver.exe + +echo "Used compiler path:" +echo $compiler + +for test in ../regression/*.lama; do + echo $test + $compiler -b $test > /dev/null + test_path="${test%.*}" + test_file="${test_path##*/}" + echo $test_path: $test_file + cat $test_path.input | ./interpreter.exe $test_file.bc > test.log 2>&1 + sed -E '1d;s/^[[:space:]]*(>[[:space:]]*)*//' $test_path.t > test_orig.log + diff test.log test_orig.log + + rm $test_file.bc + rm test.log test_orig.log + echo "done" +done + +rm *.o diff --git a/virtual_machine/test001.bc b/virtual_machine/test001.bc deleted file mode 100644 index 1cb023faee1429980f7149f488b1654192614ce9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 89 zcmZQ&U|?VdVn!f_f!xH*Jcb}9D7PRAB;O_M5d~r^ JfS841{{j6-2B-i4 diff --git a/virtual_machine/test001.t b/virtual_machine/test001.t deleted file mode 100644 index 7be09d4ce..000000000 --- a/virtual_machine/test001.t +++ /dev/null @@ -1,2 +0,0 @@ - $ ./interpreter.exe test001.bc < ../regression/test001.input - 90 diff --git a/virtual_machine/test002.bc b/virtual_machine/test002.bc deleted file mode 100644 index 1e07fb1410d9a563c53c08b5f58d7952c7af0de4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 104 zcmZQ&U|?VdVn!f_f!xH*Jcb}9C^rfuT@VFQ-~bYmh++Yf1r8tqi6~YesQ_Yv31$b7 TE{P~MkN^-fD1i8kg<}5!Yf}f& diff --git a/virtual_machine/test002.t b/virtual_machine/test002.t deleted file mode 100644 index fe404eb32..000000000 --- a/virtual_machine/test002.t +++ /dev/null @@ -1,2 +0,0 @@ - $ ./interpreter.exe test002.bc < ../regression/test002.input - 41 diff --git a/virtual_machine/test003.bc b/virtual_machine/test003.bc deleted file mode 100644 index 1df9712121e0e91aaf36188f71e56382a63657f4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 118 zcmZQ&U|?VZVn!f_f!xH*Jcb~UCNu diff --git a/virtual_machine/test003.t b/virtual_machine/test003.t deleted file mode 100644 index be60f925b..000000000 --- a/virtual_machine/test003.t +++ /dev/null @@ -1,4 +0,0 @@ - $ ./interpreter.exe test003.bc < ../regression/test003.input - 7 - 3 - 1 diff --git a/virtual_machine/test004.bc b/virtual_machine/test004.bc deleted file mode 100644 index c66dd7ce0fff664b37857981220fd5c02bc9c86a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 134 zcmZQ&U|?VdVn!f_f!xH*Jcb}9C^rfuT@VFQ-~bYmh++Yf1r8tqi6~YesQ| Date: Thu, 25 Dec 2025 23:12:43 +0300 Subject: [PATCH 007/141] separate stack --- virtual_machine/interpreter.c | 70 +++++++++++++---------------------- virtual_machine/stack.c | 43 +++++++++++++++++++++ virtual_machine/stack.h | 20 ++++++++++ 3 files changed, 89 insertions(+), 44 deletions(-) create mode 100644 virtual_machine/stack.c create mode 100644 virtual_machine/stack.h diff --git a/virtual_machine/interpreter.c b/virtual_machine/interpreter.c index 6438b4c32..9901a0603 100644 --- a/virtual_machine/interpreter.c +++ b/virtual_machine/interpreter.c @@ -3,20 +3,7 @@ #include #include "bytecode.h" #include "opcodes.h" - -#define STACK_SIZE 1024 - -static inline void push(int **sp, int val) { - *(*sp)++ = val; -} - -static inline int pop(int **sp) { - return *--(*sp); -} - -static inline int peek(int **sp) { - return *(*sp - 1); -} +#include "stack.h" typedef struct frame { struct frame *parent; @@ -53,23 +40,22 @@ static void frame_drop(frame *f) { } void run(bytecode *bc) { - int stack[STACK_SIZE]; - int *sp = stack; + stack_t stack; + stack_init(&stack); int *globals = malloc(sizeof(int) * bc->globals_count); int ip = bc->entry_point; - int pending_args = 0; while (ip < bc->code_size) { uint8_t opcode = bc->code[ip++]; int l = opcode & 0xF; - printf("ip=%d opcode=0x%02X\n", ip, opcode); + // printf("ip=%d opcode=0x%02X\n", ip, opcode); switch (opcode) { case OP_CONST: { int n = read_i32(bc->code, ip); ip += 4; - push(&sp, n); + stack_push(&stack, n); break; } case OP_BINOP_ADD: @@ -77,8 +63,8 @@ void run(bytecode *bc) { case OP_BINOP_MUL: case OP_BINOP_DIV: case OP_BINOP_MOD: { - int y = pop(&sp); - int x = pop(&sp); + int y = stack_pop(&stack); + int x = stack_pop(&stack); int result; switch (l) { case 1: @@ -105,64 +91,60 @@ void run(bytecode *bc) { result = x % y; break; } - push(&sp, result); + stack_push(&stack, result); break; } case OP_LD: { int idx = read_i32(bc->code, ip); ip += 4; - push(&sp, globals[idx]); + stack_push(&stack, globals[idx]); break; } case OP_LD_LOC: { int idx = read_i32(bc->code, ip); ip += 4; - push(&sp, *frame_local(current_frame, idx)); + stack_push(&stack, *frame_local(current_frame, idx)); break; } case OP_LD_ARG: { int idx = read_i32(bc->code, ip); ip += 4; - push(&sp, *frame_arg(current_frame, idx)); + stack_push(&stack, *frame_arg(current_frame, idx)); break; } case OP_ST: { int idx = read_i32(bc->code, ip); ip += 4; - int val = pop(&sp); + int val = stack_pop(&stack); globals[idx] = val; - push(&sp, val); + stack_push(&stack, val); break; } case OP_ST_LOC: { int idx = read_i32(bc->code, ip); ip += 4; - int val = pop(&sp); + int val = stack_pop(&stack); *frame_local(current_frame, idx) = val; - push(&sp, val); + stack_push(&stack, val); break; } case OP_ST_ARG: { int idx = read_i32(bc->code, ip); ip += 4; - int val = pop(&sp); + int val = stack_pop(&stack); *frame_arg(current_frame, idx) = val; - push(&sp, val); + stack_push(&stack, val); break; } case OP_DROP: - pop(&sp); + stack_pop(&stack); break; case OP_DUP: - push(&sp, peek(&sp)); + stack_dup(&stack); break; - case OP_SWAP: { - int y = pop(&sp); - int x = pop(&sp); - push(&sp, y); - push(&sp, x); + case OP_SWAP: + stack_swap(&stack); break; - } case OP_BEGIN: { int n_args = read_i32(bc->code, ip); ip += 4; @@ -170,7 +152,7 @@ void run(bytecode *bc) { ip += 4; frame *new_frame = frame_create(current_frame, return_ip, n_args, n_locals); for (int i = n_args - 1; i >= 0; i--) { - *frame_arg(new_frame, i) = pop(&sp); + *frame_arg(new_frame, i) = stack_pop(&stack); } current_frame = new_frame; break; @@ -210,13 +192,13 @@ void run(bytecode *bc) { fprintf(stderr, "Failed to read\n"); goto end; } - push(&sp, x); + stack_push(&stack, x); break; } case OP_WRITE: { - int x = pop(&sp); + int x = stack_pop(&stack); printf("%d\n", x); - push(&sp, x); + stack_push(&stack, x); break; } case OP_HALT: @@ -251,7 +233,7 @@ int main(int argc, char *argv[]) { } run(bc); - + free_bytecode(bc); return 0; } diff --git a/virtual_machine/stack.c b/virtual_machine/stack.c new file mode 100644 index 000000000..7cfd1816b --- /dev/null +++ b/virtual_machine/stack.c @@ -0,0 +1,43 @@ +#include "stack.h" +#include +#include + +void stack_init(stack_t *s) { + s->sp = s->data; +} + +void stack_push(stack_t *s, int val) { + if (s->sp >= s->data + STACK_SIZE) { + fprintf(stderr, "Stack overflow\n"); + exit(1); + } + *s->sp++ = val; +} + +int stack_pop(stack_t *s) { + if (s->sp <= s->data) { + fprintf(stderr, "Cannot pop from an empty stack"); + exit(1); + } + return *--s->sp; +} + +int stack_peek(const stack_t *s) { + if (s->sp <= s->data) { + fprintf(stderr, "Cannot peek from an empty stack"); + exit(1); + } + return *(s->sp - 1); +} + +void stack_dup(stack_t *s) { + int top = stack_peek(s); + stack_push(s, top); +} + +void stack_swap(stack_t *s) { + int y = stack_pop(s); + int x = stack_pop(s); + stack_push(s, y); + stack_push(s, x); +} diff --git a/virtual_machine/stack.h b/virtual_machine/stack.h new file mode 100644 index 000000000..d4639c176 --- /dev/null +++ b/virtual_machine/stack.h @@ -0,0 +1,20 @@ +#ifndef STACK_H +#define STACK_H + +#include + +#define STACK_SIZE 1024 + +typedef struct { + int data[STACK_SIZE]; + int *sp; +} stack_t; + +void stack_init(stack_t *s); +void stack_push(stack_t *s, int val); +int stack_pop(stack_t *s); +int stack_peek(const stack_t *s); +void stack_dup(stack_t *s); +void stack_swap(stack_t *s); + +#endif From a463b2d6c1c4b6fa7c267aef7e6fe724a8ae99b3 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 26 Dec 2025 03:15:20 +0300 Subject: [PATCH 008/141] change stack frame organization --- virtual_machine/Makefile | 8 ++- virtual_machine/call_stack.c | 50 +++++++++++++++ virtual_machine/call_stack.h | 33 ++++++++++ virtual_machine/interpreter.c | 112 ++++++++++++++++------------------ 4 files changed, 141 insertions(+), 62 deletions(-) create mode 100644 virtual_machine/call_stack.c create mode 100644 virtual_machine/call_stack.h diff --git a/virtual_machine/Makefile b/virtual_machine/Makefile index 6f7ca3487..e3bb51347 100644 --- a/virtual_machine/Makefile +++ b/virtual_machine/Makefile @@ -2,8 +2,8 @@ CC = gcc CFLAGS = -Wall -Wextra -std=c99 -O2 -g TARGET = interpreter.exe -SOURCES = interpreter.c bytecode.c -HEADERS = opcodes.h bytecode.h +SOURCES = interpreter.c bytecode.c stack.c call_stack.c +HEADERS = opcodes.h bytecode.h stack.h call_stack.h OBJECTS = $(SOURCES:.c=.o) all: $(TARGET) @@ -19,5 +19,7 @@ clean: .PHONY: all clean distclean -interpreter.o: interpreter.c bytecode.h opcodes.h +interpreter.o: interpreter.c bytecode.h opcodes.h stack.h call_stack.h bytecode.o: bytecode.c bytecode.h +stack.o: stack.c stack.h +call_stack.o: call_stack.c call_stack.h diff --git a/virtual_machine/call_stack.c b/virtual_machine/call_stack.c new file mode 100644 index 000000000..8abfb91d5 --- /dev/null +++ b/virtual_machine/call_stack.c @@ -0,0 +1,50 @@ +#include "call_stack.h" +#include +#include +#include + +void call_stack_init(call_stack_t *cs) { + cs->top = 0; + memset(cs->frames, 0, sizeof(cs->frames)); +} + +void call_stack_push(call_stack_t *cs, int return_ip, int base, int n_args, + int n_locals) { + if (cs->top >= MAX_CALL_DEPTH) { + fprintf(stderr, "Call stack overflow\n"); + exit(1); + } + + call_frame_t *frame = &cs->frames[cs->top++]; + frame->return_ip = return_ip; + frame->base = base; + frame->n_args = n_args; + frame->n_locals = n_locals; +} + +call_frame_t call_stack_pop(call_stack_t *cs) { + if (cs->top <= 0) { + fprintf(stderr, "Cannot pop from an empty call stack\n"); + exit(1); + } + + return cs->frames[--cs->top]; +} + +call_frame_t call_stack_peek(call_stack_t *cs) { + if (cs->top <= 0) { + fprintf(stderr, "Cannot peek from an empty call stack\n"); + exit(1); + } + + return cs->frames[cs->top - 1]; +} + +call_frame_t *call_stack_current(call_stack_t *cs) { + if (cs->top <= 0) { + return NULL; + } + return &cs->frames[cs->top - 1]; +} + +int call_stack_is_empty(call_stack_t *cs) { return cs->top == 0; } diff --git a/virtual_machine/call_stack.h b/virtual_machine/call_stack.h new file mode 100644 index 000000000..a081fcd84 --- /dev/null +++ b/virtual_machine/call_stack.h @@ -0,0 +1,33 @@ +#ifndef CALL_STACK_H +#define CALL_STACK_H + +#include + +#define MAX_CALL_DEPTH 1024 + +typedef struct { + int return_ip; + int base; + int n_args; + int n_locals; +} call_frame_t; + +typedef struct { + call_frame_t frames[MAX_CALL_DEPTH]; + int top; +} call_stack_t; + +void call_stack_init(call_stack_t *cs); + +void call_stack_push(call_stack_t *cs, int return_ip, int base, int n_args, + int n_locals); + +call_frame_t call_stack_pop(call_stack_t *cs); + +call_frame_t call_stack_peek(call_stack_t *cs); + +call_frame_t *call_stack_current(call_stack_t *cs); + +int call_stack_is_empty(call_stack_t *cs); + +#endif diff --git a/virtual_machine/interpreter.c b/virtual_machine/interpreter.c index 9901a0603..ec4129df8 100644 --- a/virtual_machine/interpreter.c +++ b/virtual_machine/interpreter.c @@ -1,49 +1,29 @@ -#include -#include -#include #include "bytecode.h" +#include "call_stack.h" #include "opcodes.h" #include "stack.h" +#include +#include +#include -typedef struct frame { - struct frame *parent; - int return_ip; - int n_args; - int n_locals; - int locals[]; -} frame; - -static frame *current_frame = NULL; -static int return_ip = -1; - -static frame *frame_create(frame *parent, int ret_ip, int n_args, int n_locals) { - int total_locals = n_args + n_locals; - frame *f = malloc(sizeof(frame) + total_locals * sizeof(int)); - f->parent = parent; - f->return_ip = ret_ip; - f->n_args = n_args; - f->n_locals = n_locals; - memset(f->locals, 0, total_locals * sizeof(int)); - return f; -} - -static int *frame_local(frame *f, int idx) { - return &f->locals[idx]; -} - -static int *frame_arg(frame *f, int idx) { - return &f->locals[f->n_locals + idx]; +static inline int *get_local(stack_t *stack, call_frame_t *frame, int idx) { + return &stack->data[frame->base + frame->n_args + idx]; } -static void frame_drop(frame *f) { - free(f); +static inline int *get_arg(stack_t *stack, call_frame_t *frame, int idx) { + return &stack->data[frame->base + idx]; } void run(bytecode *bc) { stack_t stack; + call_stack_t call_stack; stack_init(&stack); + call_stack_init(&call_stack); + int *globals = malloc(sizeof(int) * bc->globals_count); + int ip = bc->entry_point; + int return_ip = -1; while (ip < bc->code_size) { uint8_t opcode = bc->code[ip++]; @@ -103,13 +83,15 @@ void run(bytecode *bc) { case OP_LD_LOC: { int idx = read_i32(bc->code, ip); ip += 4; - stack_push(&stack, *frame_local(current_frame, idx)); + call_frame_t *frame = call_stack_current(&call_stack); + stack_push(&stack, *get_local(&stack, frame, idx)); break; } case OP_LD_ARG: { int idx = read_i32(bc->code, ip); ip += 4; - stack_push(&stack, *frame_arg(current_frame, idx)); + call_frame_t *frame = call_stack_current(&call_stack); + stack_push(&stack, *get_arg(&stack, frame, idx)); break; } case OP_ST: { @@ -123,16 +105,18 @@ void run(bytecode *bc) { case OP_ST_LOC: { int idx = read_i32(bc->code, ip); ip += 4; + call_frame_t *frame = call_stack_current(&call_stack); int val = stack_pop(&stack); - *frame_local(current_frame, idx) = val; + *get_local(&stack, frame, idx) = val; stack_push(&stack, val); break; } case OP_ST_ARG: { int idx = read_i32(bc->code, ip); ip += 4; + call_frame_t *frame = call_stack_current(&call_stack); int val = stack_pop(&stack); - *frame_arg(current_frame, idx) = val; + *get_arg(&stack, frame, idx) = val; stack_push(&stack, val); break; } @@ -150,11 +134,15 @@ void run(bytecode *bc) { ip += 4; int n_locals = read_i32(bc->code, ip); ip += 4; - frame *new_frame = frame_create(current_frame, return_ip, n_args, n_locals); - for (int i = n_args - 1; i >= 0; i--) { - *frame_arg(new_frame, i) = stack_pop(&stack); + + int base = (stack.sp - stack.data) - n_args; + + // space for locals + for (int i = 0; i < n_locals; i++) { + stack_push(&stack, 0); } - current_frame = new_frame; + + call_stack_push(&call_stack, return_ip, base, n_args, n_locals); break; } case OP_BEGIN_CLOSURE: @@ -172,22 +160,33 @@ void run(bytecode *bc) { } case OP_RET: case OP_END: { - if (current_frame == NULL) { - goto end; - } - int ret_ip = current_frame->return_ip; - frame *parent = current_frame->parent; - frame_drop(current_frame); - current_frame = parent; - if (ret_ip < 0) { - goto end; - } - ip = ret_ip; - break; + if (call_stack_is_empty(&call_stack)) { + goto end; + } + call_frame_t frame = call_stack_pop(&call_stack); + + int current_top = stack.sp - stack.data; + int returns_start = frame.base + frame.n_args + frame.n_locals; + int n_returns = current_top - returns_start; + + if (n_returns <= 0) { + n_returns = 0; + } else { + for (int i = 0; i < n_returns; i++) { + stack.data[frame.base + i] = stack.data[returns_start + i]; + } + } + + stack.sp = stack.data + frame.base + n_returns; + if (frame.return_ip < 0) { + goto end; + } + ip = frame.return_ip; + break; } + case OP_READ: { int x; - // TODO: scanf ? if (scanf("%d", &x) != 1) { fprintf(stderr, "Failed to read\n"); goto end; @@ -213,11 +212,6 @@ void run(bytecode *bc) { } end: - while (current_frame) { - frame *parent = current_frame->parent; - frame_drop(current_frame); - current_frame = parent; - } free(globals); } From 7e03eabd4ba2f6a257d8a191162d307e45f699b6 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 26 Dec 2025 06:46:04 +0300 Subject: [PATCH 009/141] add jmp, cjmp, comprasion opcodes --- virtual_machine/interpreter.c | 56 +++++++++++++++++++++++++++++++---- virtual_machine/opcodes.h | 9 ++++++ 2 files changed, 60 insertions(+), 5 deletions(-) diff --git a/virtual_machine/interpreter.c b/virtual_machine/interpreter.c index ec4129df8..fad3e276b 100644 --- a/virtual_machine/interpreter.c +++ b/virtual_machine/interpreter.c @@ -4,7 +4,6 @@ #include "stack.h" #include #include -#include static inline int *get_local(stack_t *stack, call_frame_t *frame, int idx) { return &stack->data[frame->base + frame->n_args + idx]; @@ -29,7 +28,7 @@ void run(bytecode *bc) { uint8_t opcode = bc->code[ip++]; int l = opcode & 0xF; - // printf("ip=%d opcode=0x%02X\n", ip, opcode); + // printf("ip=0x%08X opcode=0x%02X\n", ip-1, opcode); switch (opcode) { case OP_CONST: { @@ -42,7 +41,13 @@ void run(bytecode *bc) { case OP_BINOP_SUB: case OP_BINOP_MUL: case OP_BINOP_DIV: - case OP_BINOP_MOD: { + case OP_BINOP_MOD: + case OP_BINOP_EQ: + case OP_BINOP_NE: + case OP_BINOP_LT: + case OP_BINOP_LE: + case OP_BINOP_GT: + case OP_BINOP_GE: { int y = stack_pop(&stack); int x = stack_pop(&stack); int result; @@ -70,10 +75,51 @@ void run(bytecode *bc) { } result = x % y; break; + case 6: + result = x < y; + break; + case 7: + result = x <= y; + break; + case 8: + result = x > y; + break; + case 9: + result = x >= y; + break; + case 10: + result = x == y; + break; + case 11: + result = x != y; + break; } stack_push(&stack, result); break; } + case OP_JMP: { + int addr = read_i32(bc->code, ip); + ip = addr; + break; + } + case OP_CJMP_Z: { + int addr = read_i32(bc->code, ip); + ip += 4; + int val = stack_pop(&stack); + if (val == 0) { + ip = addr; + } + break; + } + case OP_CJMP_NZ: { + int addr = read_i32(bc->code, ip); + ip += 4; + int val = stack_pop(&stack); + if (val != 0) { + ip = addr; + } + break; + } case OP_LD: { int idx = read_i32(bc->code, ip); ip += 4; @@ -145,7 +191,7 @@ void run(bytecode *bc) { call_stack_push(&call_stack, return_ip, base, n_args, n_locals); break; } - case OP_BEGIN_CLOSURE: + case OP_BEGIN_CLOSURE: // TODO: skip for now ip += 8; break; @@ -206,7 +252,7 @@ void run(bytecode *bc) { ip += 4; break; default: - fprintf(stderr, "Not yet supported opcode 0x%02X at ip=%d\n", opcode, ip); + fprintf(stderr, "Not yet supported opcode 0x%02X at ip=0x%08x\n", opcode, ip-1); goto end; } } diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h index 88ffb37c0..c5df04a1c 100644 --- a/virtual_machine/opcodes.h +++ b/virtual_machine/opcodes.h @@ -7,7 +7,14 @@ typedef enum { OP_BINOP_MUL = 0x03, OP_BINOP_DIV = 0x04, OP_BINOP_MOD = 0x05, + OP_BINOP_LT = 0x06, + OP_BINOP_LE = 0x07, + OP_BINOP_GT = 0x08, + OP_BINOP_GE = 0x09, + OP_BINOP_EQ = 0x0A, + OP_BINOP_NE = 0x0B, OP_CONST = 0x10, + OP_JMP = 0x15, OP_END = 0x16, OP_RET = 0x17, OP_DROP = 0x18, @@ -19,6 +26,8 @@ typedef enum { OP_ST = 0x40, OP_ST_LOC = 0x41, OP_ST_ARG = 0x42, + OP_CJMP_Z = 0x50, + OP_CJMP_NZ = 0x51, OP_BEGIN = 0x52, OP_BEGIN_CLOSURE = 0x53, OP_CALL = 0x56, From 3366130b0b26ff29bb64a81a144bdbfead303cc3 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sat, 27 Dec 2025 03:59:41 +0300 Subject: [PATCH 010/141] statically link runtime functions --- virtual_machine/Makefile | 6 +- virtual_machine/interpreter.c | 103 +++++++++++++++------------- virtual_machine/regression_check.sh | 4 +- virtual_machine/stack.c | 12 ++-- virtual_machine/stack.h | 11 +-- 5 files changed, 74 insertions(+), 62 deletions(-) diff --git a/virtual_machine/Makefile b/virtual_machine/Makefile index e3bb51347..808a3ca5f 100644 --- a/virtual_machine/Makefile +++ b/virtual_machine/Makefile @@ -2,8 +2,8 @@ CC = gcc CFLAGS = -Wall -Wextra -std=c99 -O2 -g TARGET = interpreter.exe -SOURCES = interpreter.c bytecode.c stack.c call_stack.c -HEADERS = opcodes.h bytecode.h stack.h call_stack.h +SOURCES = interpreter.c bytecode.c stack.c call_stack.c ../runtime/runtime.c ../runtime/gc.c +HEADERS = opcodes.h bytecode.h stack.h call_stack.h ../runtime/runtime.h ../runtime/gc.h OBJECTS = $(SOURCES:.c=.o) all: $(TARGET) @@ -23,3 +23,5 @@ interpreter.o: interpreter.c bytecode.h opcodes.h stack.h call_stack.h bytecode.o: bytecode.c bytecode.h stack.o: stack.c stack.h call_stack.o: call_stack.c call_stack.h +runtime.o: ../runtime/runtime.c ../runtime/runtime.h +gc.o: ..runtime/runtime/gc.h ../runtime/gc.h diff --git a/virtual_machine/interpreter.c b/virtual_machine/interpreter.c index fad3e276b..a2c25b846 100644 --- a/virtual_machine/interpreter.c +++ b/virtual_machine/interpreter.c @@ -4,12 +4,27 @@ #include "stack.h" #include #include +#include "../runtime/runtime_common.h" -static inline int *get_local(stack_t *stack, call_frame_t *frame, int idx) { +extern aint Lread(void); +extern aint Lwrite(aint n); +extern aint Ls__Infix_43(void *p, void *q); +extern aint Ls__Infix_45(void *p, void *q); +extern aint Ls__Infix_42(void *p, void *q); +extern aint Ls__Infix_47(void *p, void *q); +extern aint Ls__Infix_37(void *p, void *q); +extern aint Ls__Infix_60(void *p, void *q); +extern aint Ls__Infix_6061(void *p, void *q); +extern aint Ls__Infix_62(void *p, void *q); +extern aint Ls__Infix_6261(void *p, void *q); +extern aint Ls__Infix_6161(void *p, void *q); +extern aint Ls__Infix_3361(void *p, void *q); + +static inline aint *get_local(stack_t *stack, call_frame_t *frame, int idx) { return &stack->data[frame->base + frame->n_args + idx]; } -static inline int *get_arg(stack_t *stack, call_frame_t *frame, int idx) { +static inline aint *get_arg(stack_t *stack, call_frame_t *frame, int idx) { return &stack->data[frame->base + idx]; } @@ -19,7 +34,7 @@ void run(bytecode *bc) { stack_init(&stack); call_stack_init(&call_stack); - int *globals = malloc(sizeof(int) * bc->globals_count); + aint *globals = malloc(sizeof(aint) * bc->globals_count); int ip = bc->entry_point; int return_ip = -1; @@ -34,7 +49,7 @@ void run(bytecode *bc) { case OP_CONST: { int n = read_i32(bc->code, ip); ip += 4; - stack_push(&stack, n); + stack_push(&stack, BOX(n)); break; } case OP_BINOP_ADD: @@ -48,50 +63,50 @@ void run(bytecode *bc) { case OP_BINOP_LE: case OP_BINOP_GT: case OP_BINOP_GE: { - int y = stack_pop(&stack); - int x = stack_pop(&stack); - int result; + aint y = stack_pop(&stack); + aint x = stack_pop(&stack); + aint result; switch (l) { - case 1: - result = x + y; + case 1: // + + result = Ls__Infix_43((void*)x, (void*)y); break; - case 2: - result = x - y; + case 2: // - + result = Ls__Infix_45((void*)x, (void*)y); break; - case 3: - result = x * y; + case 3: // * + result = Ls__Infix_42((void*)x, (void*)y); break; - case 4: - if (y == 0) { + case 4: // / + if (UNBOX(y) == 0) { fprintf(stderr, "Division by zero\n"); goto end; } - result = x / y; + result = Ls__Infix_47((void*)x, (void*)y); break; - case 5: - if (y == 0) { + case 5: // % + if (UNBOX(y) == 0) { fprintf(stderr, "Division by zero\n"); goto end; } - result = x % y; + result = Ls__Infix_37((void*)x, (void*)y); break; - case 6: - result = x < y; + case 6: // < + result = Ls__Infix_60((void*)x, (void*)y); break; - case 7: - result = x <= y; + case 7: // <= + result = Ls__Infix_6061((void*)x, (void*)y); break; - case 8: - result = x > y; + case 8: // > + result = Ls__Infix_62((void*)x, (void*)y); break; - case 9: - result = x >= y; + case 9: // >= + result = Ls__Infix_6261((void*)x, (void*)y); break; - case 10: - result = x == y; + case 10: // == + result = Ls__Infix_6161((void*)x, (void*)y); break; - case 11: - result = x != y; + case 11: // != + result = Ls__Infix_3361((void*)x, (void*)y); break; } stack_push(&stack, result); @@ -105,8 +120,8 @@ void run(bytecode *bc) { case OP_CJMP_Z: { int addr = read_i32(bc->code, ip); ip += 4; - int val = stack_pop(&stack); - if (val == 0) { + aint val = stack_pop(&stack); + if (UNBOX(val) == 0) { ip = addr; } break; @@ -114,8 +129,8 @@ void run(bytecode *bc) { case OP_CJMP_NZ: { int addr = read_i32(bc->code, ip); ip += 4; - int val = stack_pop(&stack); - if (val != 0) { + aint val = stack_pop(&stack); + if (UNBOX(val) != 0) { ip = addr; } break; @@ -143,7 +158,7 @@ void run(bytecode *bc) { case OP_ST: { int idx = read_i32(bc->code, ip); ip += 4; - int val = stack_pop(&stack); + aint val = stack_pop(&stack); globals[idx] = val; stack_push(&stack, val); break; @@ -152,7 +167,7 @@ void run(bytecode *bc) { int idx = read_i32(bc->code, ip); ip += 4; call_frame_t *frame = call_stack_current(&call_stack); - int val = stack_pop(&stack); + aint val = stack_pop(&stack); *get_local(&stack, frame, idx) = val; stack_push(&stack, val); break; @@ -161,7 +176,7 @@ void run(bytecode *bc) { int idx = read_i32(bc->code, ip); ip += 4; call_frame_t *frame = call_stack_current(&call_stack); - int val = stack_pop(&stack); + aint val = stack_pop(&stack); *get_arg(&stack, frame, idx) = val; stack_push(&stack, val); break; @@ -232,18 +247,12 @@ void run(bytecode *bc) { } case OP_READ: { - int x; - if (scanf("%d", &x) != 1) { - fprintf(stderr, "Failed to read\n"); - goto end; - } - stack_push(&stack, x); + stack_push(&stack, Lread()); break; } case OP_WRITE: { - int x = stack_pop(&stack); - printf("%d\n", x); - stack_push(&stack, x); + aint val = stack_pop(&stack); + stack_push(&stack, Lwrite(val)); break; } case OP_HALT: diff --git a/virtual_machine/regression_check.sh b/virtual_machine/regression_check.sh index 7835cb8a6..b28aa736e 100755 --- a/virtual_machine/regression_check.sh +++ b/virtual_machine/regression_check.sh @@ -19,8 +19,8 @@ for test in ../regression/*.lama; do test_file="${test_path##*/}" echo $test_path: $test_file cat $test_path.input | ./interpreter.exe $test_file.bc > test.log 2>&1 - sed -E '1d;s/^[[:space:]]*(>[[:space:]]*)*//' $test_path.t > test_orig.log - diff test.log test_orig.log + sed -E '1d;s/^//' $test_path.t > test_orig.log + diff -w test.log test_orig.log rm $test_file.bc rm test.log test_orig.log diff --git a/virtual_machine/stack.c b/virtual_machine/stack.c index 7cfd1816b..3e52d2f66 100644 --- a/virtual_machine/stack.c +++ b/virtual_machine/stack.c @@ -6,7 +6,7 @@ void stack_init(stack_t *s) { s->sp = s->data; } -void stack_push(stack_t *s, int val) { +void stack_push(stack_t *s, aint val) { if (s->sp >= s->data + STACK_SIZE) { fprintf(stderr, "Stack overflow\n"); exit(1); @@ -14,7 +14,7 @@ void stack_push(stack_t *s, int val) { *s->sp++ = val; } -int stack_pop(stack_t *s) { +aint stack_pop(stack_t *s) { if (s->sp <= s->data) { fprintf(stderr, "Cannot pop from an empty stack"); exit(1); @@ -22,7 +22,7 @@ int stack_pop(stack_t *s) { return *--s->sp; } -int stack_peek(const stack_t *s) { +aint stack_peek(const stack_t *s) { if (s->sp <= s->data) { fprintf(stderr, "Cannot peek from an empty stack"); exit(1); @@ -31,13 +31,13 @@ int stack_peek(const stack_t *s) { } void stack_dup(stack_t *s) { - int top = stack_peek(s); + aint top = stack_peek(s); stack_push(s, top); } void stack_swap(stack_t *s) { - int y = stack_pop(s); - int x = stack_pop(s); + aint y = stack_pop(s); + aint x = stack_pop(s); stack_push(s, y); stack_push(s, x); } diff --git a/virtual_machine/stack.h b/virtual_machine/stack.h index d4639c176..f2c8ebef3 100644 --- a/virtual_machine/stack.h +++ b/virtual_machine/stack.h @@ -2,18 +2,19 @@ #define STACK_H #include +#include "../runtime/runtime_common.h" #define STACK_SIZE 1024 typedef struct { - int data[STACK_SIZE]; - int *sp; + aint data[STACK_SIZE]; + aint *sp; } stack_t; void stack_init(stack_t *s); -void stack_push(stack_t *s, int val); -int stack_pop(stack_t *s); -int stack_peek(const stack_t *s); +void stack_push(stack_t *s, aint val); +aint stack_pop(stack_t *s); +aint stack_peek(const stack_t *s); void stack_dup(stack_t *s); void stack_swap(stack_t *s); From 05660851187269f132ce2ac7baf606d66109d352 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 28 Dec 2025 12:51:56 +0300 Subject: [PATCH 011/141] first gc, arrays --- virtual_machine/interpreter.c | 131 +++++++++++++++++++++++++--------- virtual_machine/opcodes.h | 20 ++++-- 2 files changed, 109 insertions(+), 42 deletions(-) diff --git a/virtual_machine/interpreter.c b/virtual_machine/interpreter.c index a2c25b846..306694630 100644 --- a/virtual_machine/interpreter.c +++ b/virtual_machine/interpreter.c @@ -4,8 +4,13 @@ #include "stack.h" #include #include +#include "../runtime/gc.h" #include "../runtime/runtime_common.h" +extern size_t __gc_stack_top, __gc_stack_bottom; +extern void __gc_init(void); +extern void __init(void); + extern aint Lread(void); extern aint Lwrite(aint n); extern aint Ls__Infix_43(void *p, void *q); @@ -19,6 +24,13 @@ extern aint Ls__Infix_62(void *p, void *q); extern aint Ls__Infix_6261(void *p, void *q); extern aint Ls__Infix_6161(void *p, void *q); extern aint Ls__Infix_3361(void *p, void *q); +extern aint Ls__Infix_3838(void *p, void *q); +extern aint Ls__Infix_3333(void *p, void *q); + +extern aint Llength(void *p); +extern void *Barray(aint *args, aint bn); +extern void *Belem(void *p, aint i); +extern void *Bsta(void *x, aint i, void *v); static inline aint *get_local(stack_t *stack, call_frame_t *frame, int idx) { return &stack->data[frame->base + frame->n_args + idx]; @@ -34,6 +46,11 @@ void run(bytecode *bc) { stack_init(&stack); call_stack_init(&call_stack); + __init(); + + __gc_stack_top = (size_t)&stack.data[0]; + __gc_stack_bottom = (size_t)&stack.data[STACK_SIZE]; + aint *globals = malloc(sizeof(aint) * bc->globals_count); int ip = bc->entry_point; @@ -56,57 +73,65 @@ void run(bytecode *bc) { case OP_BINOP_SUB: case OP_BINOP_MUL: case OP_BINOP_DIV: - case OP_BINOP_MOD: + case OP_BINOP_MOD: case OP_BINOP_EQ: case OP_BINOP_NE: case OP_BINOP_LT: case OP_BINOP_LE: case OP_BINOP_GT: - case OP_BINOP_GE: { + case OP_BINOP_GE: + case OP_BINOP_AND: + case OP_BINOP_OR: { aint y = stack_pop(&stack); aint x = stack_pop(&stack); aint result; switch (l) { case 1: // + - result = Ls__Infix_43((void*)x, (void*)y); + result = Ls__Infix_43((void *)x, (void *)y); break; case 2: // - - result = Ls__Infix_45((void*)x, (void*)y); + result = Ls__Infix_45((void *)x, (void *)y); break; case 3: // * - result = Ls__Infix_42((void*)x, (void*)y); + result = Ls__Infix_42((void *)x, (void *)y); break; case 4: // / if (UNBOX(y) == 0) { fprintf(stderr, "Division by zero\n"); goto end; } - result = Ls__Infix_47((void*)x, (void*)y); + result = Ls__Infix_47((void *)x, (void *)y); break; case 5: // % if (UNBOX(y) == 0) { fprintf(stderr, "Division by zero\n"); goto end; } - result = Ls__Infix_37((void*)x, (void*)y); + result = Ls__Infix_37((void *)x, (void *)y); break; case 6: // < - result = Ls__Infix_60((void*)x, (void*)y); + result = Ls__Infix_60((void *)x, (void *)y); break; case 7: // <= - result = Ls__Infix_6061((void*)x, (void*)y); + result = Ls__Infix_6061((void *)x, (void *)y); break; case 8: // > - result = Ls__Infix_62((void*)x, (void*)y); + result = Ls__Infix_62((void *)x, (void *)y); break; case 9: // >= - result = Ls__Infix_6261((void*)x, (void*)y); + result = Ls__Infix_6261((void *)x, (void *)y); break; case 10: // == - result = Ls__Infix_6161((void*)x, (void*)y); + result = Ls__Infix_6161((void *)x, (void *)y); break; - case 11: // != - result = Ls__Infix_3361((void*)x, (void*)y); + case 11: // != + result = Ls__Infix_3361((void *)x, (void *)y); + break; + case 12: // && + result = Ls__Infix_3838((void *)x, (void *)y); + break; + case 13: // !! + result = Ls__Infix_3333((void *)x, (void *)y); break; } stack_push(&stack, result); @@ -206,7 +231,7 @@ void run(bytecode *bc) { call_stack_push(&call_stack, return_ip, base, n_args, n_locals); break; } - case OP_BEGIN_CLOSURE: + case OP_BEGIN_CLOSURE: // TODO: skip for now ip += 8; break; @@ -221,29 +246,29 @@ void run(bytecode *bc) { } case OP_RET: case OP_END: { - if (call_stack_is_empty(&call_stack)) { - goto end; - } - call_frame_t frame = call_stack_pop(&call_stack); + if (call_stack_is_empty(&call_stack)) { + goto end; + } + call_frame_t frame = call_stack_pop(&call_stack); - int current_top = stack.sp - stack.data; - int returns_start = frame.base + frame.n_args + frame.n_locals; - int n_returns = current_top - returns_start; + int current_top = stack.sp - stack.data; + int returns_start = frame.base + frame.n_args + frame.n_locals; + int n_returns = current_top - returns_start; - if (n_returns <= 0) { - n_returns = 0; - } else { - for (int i = 0; i < n_returns; i++) { - stack.data[frame.base + i] = stack.data[returns_start + i]; - } + if (n_returns <= 0) { + n_returns = 0; + } else { + for (int i = 0; i < n_returns; i++) { + stack.data[frame.base + i] = stack.data[returns_start + i]; } + } - stack.sp = stack.data + frame.base + n_returns; - if (frame.return_ip < 0) { - goto end; - } - ip = frame.return_ip; - break; + stack.sp = stack.data + frame.base + n_returns; + if (frame.return_ip < 0) { + goto end; + } + ip = frame.return_ip; + break; } case OP_READ: { @@ -255,13 +280,49 @@ void run(bytecode *bc) { stack_push(&stack, Lwrite(val)); break; } + case OP_ELEM: { + // [index, array] -> [element] + aint idx = stack_pop(&stack); + aint arr = stack_pop(&stack); + void *elem = Belem((void *)arr, idx); + stack_push(&stack, (aint)elem); + break; + } + case OP_STA: { + // TODO: support string (two operands) + aint val = stack_pop(&stack); + aint idx = stack_pop(&stack); + aint arr = stack_pop(&stack); + Bsta((void *)arr, idx, (void *)val); + stack_push(&stack, val); + break; + } + case OP_LENGTH: { + aint val = stack_pop(&stack); + aint len = Llength((void *)val); + stack_push(&stack, len); + break; + } + case OP_BARRAY: { + int n = read_i32(bc->code, ip); + ip += 4; + aint *args = malloc(n * sizeof(aint)); + for (int i = n - 1; i >= 0; i--) { + args[i] = stack_pop(&stack); + } + void *arr = Barray(args, BOX(n)); + free(args); + stack_push(&stack, (aint)arr); + break; + } case OP_HALT: goto end; case OP_LINE: ip += 4; break; default: - fprintf(stderr, "Not yet supported opcode 0x%02X at ip=0x%08x\n", opcode, ip-1); + fprintf(stderr, "Not yet supported opcode 0x%02X at ip=0x%08x\n", opcode, + ip - 1); goto end; } } diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h index c5df04a1c..60aff4dcb 100644 --- a/virtual_machine/opcodes.h +++ b/virtual_machine/opcodes.h @@ -7,26 +7,30 @@ typedef enum { OP_BINOP_MUL = 0x03, OP_BINOP_DIV = 0x04, OP_BINOP_MOD = 0x05, - OP_BINOP_LT = 0x06, - OP_BINOP_LE = 0x07, - OP_BINOP_GT = 0x08, - OP_BINOP_GE = 0x09, - OP_BINOP_EQ = 0x0A, - OP_BINOP_NE = 0x0B, + OP_BINOP_LT = 0x06, + OP_BINOP_LE = 0x07, + OP_BINOP_GT = 0x08, + OP_BINOP_GE = 0x09, + OP_BINOP_EQ = 0x0A, + OP_BINOP_NE = 0x0B, + OP_BINOP_AND = 0x0C, + OP_BINOP_OR = 0x0D, OP_CONST = 0x10, + OP_STA = 0x14, OP_JMP = 0x15, OP_END = 0x16, OP_RET = 0x17, OP_DROP = 0x18, OP_DUP = 0x19, OP_SWAP = 0x1A, + OP_ELEM = 0x1B, OP_LD = 0x20, OP_LD_LOC = 0x21, OP_LD_ARG = 0x22, OP_ST = 0x40, OP_ST_LOC = 0x41, OP_ST_ARG = 0x42, - OP_CJMP_Z = 0x50, + OP_CJMP_Z = 0x50, OP_CJMP_NZ = 0x51, OP_BEGIN = 0x52, OP_BEGIN_CLOSURE = 0x53, @@ -34,6 +38,8 @@ typedef enum { OP_LINE = 0x5A, OP_READ = 0x70, OP_WRITE = 0x71, + OP_LENGTH = 0x72, + OP_BARRAY = 0x74, OP_HALT = 0xFF, } opcode_t; From 1b35dd98cb881172aed1215fc6cb5ad288cef5ab Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 29 Dec 2025 05:39:21 +0300 Subject: [PATCH 012/141] store global vars on the stack for gc, change makefile --- virtual_machine/Makefile | 14 +++++++++----- virtual_machine/interpreter.c | 20 +++++++++++++------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/virtual_machine/Makefile b/virtual_machine/Makefile index 808a3ca5f..e8886b3d3 100644 --- a/virtual_machine/Makefile +++ b/virtual_machine/Makefile @@ -2,15 +2,21 @@ CC = gcc CFLAGS = -Wall -Wextra -std=c99 -O2 -g TARGET = interpreter.exe -SOURCES = interpreter.c bytecode.c stack.c call_stack.c ../runtime/runtime.c ../runtime/gc.c -HEADERS = opcodes.h bytecode.h stack.h call_stack.h ../runtime/runtime.h ../runtime/gc.h +SOURCES = interpreter.c bytecode.c stack.c call_stack.c +HEADERS = opcodes.h bytecode.h stack.h call_stack.h OBJECTS = $(SOURCES:.c=.o) +RUNTIME_DIR = ../runtime +RUNTIME_LIB = $(RUNTIME_DIR)/runtime.a + all: $(TARGET) -$(TARGET): $(OBJECTS) +$(TARGET): $(OBJECTS) $(RUNTIME_LIB) $(CC) -o $@ $^ +$(RUNTIME_LIB): + $(MAKE) -C $(RUNTIME_DIR) + %.o: %.c $(HEADERS) $(CC) $(CFLAGS) -c -o $@ $< @@ -23,5 +29,3 @@ interpreter.o: interpreter.c bytecode.h opcodes.h stack.h call_stack.h bytecode.o: bytecode.c bytecode.h stack.o: stack.c stack.h call_stack.o: call_stack.c call_stack.h -runtime.o: ../runtime/runtime.c ../runtime/runtime.h -gc.o: ..runtime/runtime/gc.h ../runtime/gc.h diff --git a/virtual_machine/interpreter.c b/virtual_machine/interpreter.c index 306694630..588c8507d 100644 --- a/virtual_machine/interpreter.c +++ b/virtual_machine/interpreter.c @@ -7,9 +7,12 @@ #include "../runtime/gc.h" #include "../runtime/runtime_common.h" +void *__start_custom_data; +void *__stop_custom_data; + extern size_t __gc_stack_top, __gc_stack_bottom; extern void __gc_init(void); -extern void __init(void); +extern void set_stack(size_t stack_top, size_t stack_bottom); extern aint Lread(void); extern aint Lwrite(aint n); @@ -46,12 +49,16 @@ void run(bytecode *bc) { stack_init(&stack); call_stack_init(&call_stack); - __init(); - - __gc_stack_top = (size_t)&stack.data[0]; - __gc_stack_bottom = (size_t)&stack.data[STACK_SIZE]; + __gc_init(); - aint *globals = malloc(sizeof(aint) * bc->globals_count); + set_stack((size_t)*stack.sp, (size_t)&stack.data[0]); + + aint *globals = stack.data; + // space for globals + // TODO: might not be the place to store globals + for (int i = 0; i < bc->globals_count; i++) { + stack_push(&stack, 0); + } int ip = bc->entry_point; int return_ip = -1; @@ -328,7 +335,6 @@ void run(bytecode *bc) { } end: - free(globals); } int main(int argc, char *argv[]) { From 10ee89bfb9aa32bb2b5aa8f8e61674bb6007d122 Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 29 Dec 2025 13:43:57 +0300 Subject: [PATCH 013/141] add strings, change stack for gc --- virtual_machine/bytecode.c | 11 +++++++ virtual_machine/bytecode.h | 4 ++- virtual_machine/interpreter.c | 54 +++++++++++++++++++++++------------ virtual_machine/opcodes.h | 4 ++- virtual_machine/stack.c | 27 ++++++++++++------ 5 files changed, 71 insertions(+), 29 deletions(-) diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index fb3387edc..2dbdd98b0 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -72,6 +72,15 @@ bytecode *load_bytecode(const char *filename) { bc->code_size = code_size; bc->entry_point = main_entry_point; bc->globals_count = globals_count; + bc->public_symbols_count = num_pubs; + bc->public_symbols = malloc(num_pubs * sizeof(int)); + for (int i = 0; i < num_pubs; i++) { + int entry_offset = pubs_offset + i * PUB_ENTRY_SIZE; + bc->public_symbols[i] = read_i32(data, entry_offset + 4); + } + + bc->string_table = malloc(st_size); + memcpy((void *)bc->string_table, string_table, st_size); free(data); return bc; @@ -80,6 +89,8 @@ bytecode *load_bytecode(const char *filename) { void free_bytecode(bytecode *bc) { if (bc) { free((void *)bc->code); + free((void *)bc->string_table); + free(bc->public_symbols); free(bc); } } diff --git a/virtual_machine/bytecode.h b/virtual_machine/bytecode.h index 5a8b6debb..61cee7739 100644 --- a/virtual_machine/bytecode.h +++ b/virtual_machine/bytecode.h @@ -8,9 +8,11 @@ typedef struct { int code_size; int entry_point; int globals_count; + int *public_symbols; + int public_symbols_count; + const char *string_table; } bytecode; - int read_i32(const uint8_t data[], int offset); bytecode *load_bytecode(const char *filename); void free_bytecode(bytecode *bc); diff --git a/virtual_machine/interpreter.c b/virtual_machine/interpreter.c index 588c8507d..ac1473e01 100644 --- a/virtual_machine/interpreter.c +++ b/virtual_machine/interpreter.c @@ -1,18 +1,17 @@ +#include "../runtime/gc.h" +#include "../runtime/runtime_common.h" #include "bytecode.h" #include "call_stack.h" #include "opcodes.h" #include "stack.h" #include #include -#include "../runtime/gc.h" -#include "../runtime/runtime_common.h" +#include void *__start_custom_data; void *__stop_custom_data; -extern size_t __gc_stack_top, __gc_stack_bottom; -extern void __gc_init(void); -extern void set_stack(size_t stack_top, size_t stack_bottom); +extern void __init(void); extern aint Lread(void); extern aint Lwrite(aint n); @@ -31,16 +30,18 @@ extern aint Ls__Infix_3838(void *p, void *q); extern aint Ls__Infix_3333(void *p, void *q); extern aint Llength(void *p); +extern void *Lstring(aint *args); extern void *Barray(aint *args, aint bn); +extern void *Bstring(aint *args); extern void *Belem(void *p, aint i); extern void *Bsta(void *x, aint i, void *v); static inline aint *get_local(stack_t *stack, call_frame_t *frame, int idx) { - return &stack->data[frame->base + frame->n_args + idx]; + return &stack->data[frame->base - frame->n_args - idx]; } static inline aint *get_arg(stack_t *stack, call_frame_t *frame, int idx) { - return &stack->data[frame->base + idx]; + return &stack->data[frame->base - idx]; } void run(bytecode *bc) { @@ -49,15 +50,14 @@ void run(bytecode *bc) { stack_init(&stack); call_stack_init(&call_stack); - __gc_init(); + // gc initialization + __init(); - set_stack((size_t)*stack.sp, (size_t)&stack.data[0]); - aint *globals = stack.data; // space for globals // TODO: might not be the place to store globals for (int i = 0; i < bc->globals_count; i++) { - stack_push(&stack, 0); + stack_push(&stack, 0); } int ip = bc->entry_point; @@ -68,6 +68,7 @@ void run(bytecode *bc) { int l = opcode & 0xF; // printf("ip=0x%08X opcode=0x%02X\n", ip-1, opcode); + // printf("stack pointer: %p\n", stack.sp); switch (opcode) { case OP_CONST: { @@ -228,7 +229,8 @@ void run(bytecode *bc) { int n_locals = read_i32(bc->code, ip); ip += 4; - int base = (stack.sp - stack.data) - n_args; + // base points to arg0 (highest address of args) + int base = (stack.sp - stack.data) + n_args; // space for locals for (int i = 0; i < n_locals; i++) { @@ -259,18 +261,20 @@ void run(bytecode *bc) { call_frame_t frame = call_stack_pop(&call_stack); int current_top = stack.sp - stack.data; - int returns_start = frame.base + frame.n_args + frame.n_locals; - int n_returns = current_top - returns_start; + int returns_start = frame.base - frame.n_args - frame.n_locals; + int n_returns = returns_start - current_top; if (n_returns <= 0) { n_returns = 0; } else { for (int i = 0; i < n_returns; i++) { - stack.data[frame.base + i] = stack.data[returns_start + i]; + // TODO: make stack function for this + stack.data[frame.base - i] = stack.data[returns_start - i]; } } - stack.sp = stack.data + frame.base + n_returns; + // sp points to empty slot below the return values + stack.sp = stack.data + frame.base - n_returns; if (frame.return_ip < 0) { goto end; } @@ -287,6 +291,15 @@ void run(bytecode *bc) { stack_push(&stack, Lwrite(val)); break; } + case OP_STRING: { + // push string from string table onto stack + int str_offset = read_i32(bc->code, ip); + ip += 4; + const char *src = bc->string_table + str_offset; + void *str = Bstring((void *)&src); + stack_push(&stack, (aint)str); + break; + } case OP_ELEM: { // [index, array] -> [element] aint idx = stack_pop(&stack); @@ -310,15 +323,20 @@ void run(bytecode *bc) { stack_push(&stack, len); break; } + case OP_LSTRING: { + aint val = stack_pop(&stack); + void *str = Lstring(&val); + stack_push(&stack, (aint)str); + break; + } case OP_BARRAY: { int n = read_i32(bc->code, ip); ip += 4; - aint *args = malloc(n * sizeof(aint)); + aint args[n]; for (int i = n - 1; i >= 0; i--) { args[i] = stack_pop(&stack); } void *arr = Barray(args, BOX(n)); - free(args); stack_push(&stack, (aint)arr); break; } diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h index 60aff4dcb..81f829e13 100644 --- a/virtual_machine/opcodes.h +++ b/virtual_machine/opcodes.h @@ -16,6 +16,7 @@ typedef enum { OP_BINOP_AND = 0x0C, OP_BINOP_OR = 0x0D, OP_CONST = 0x10, + OP_STRING = 0x11, OP_STA = 0x14, OP_JMP = 0x15, OP_END = 0x16, @@ -30,7 +31,7 @@ typedef enum { OP_ST = 0x40, OP_ST_LOC = 0x41, OP_ST_ARG = 0x42, - OP_CJMP_Z = 0x50, + OP_CJMP_Z = 0x50, OP_CJMP_NZ = 0x51, OP_BEGIN = 0x52, OP_BEGIN_CLOSURE = 0x53, @@ -39,6 +40,7 @@ typedef enum { OP_READ = 0x70, OP_WRITE = 0x71, OP_LENGTH = 0x72, + OP_LSTRING = 0x73, OP_BARRAY = 0x74, OP_HALT = 0xFF, } opcode_t; diff --git a/virtual_machine/stack.c b/virtual_machine/stack.c index 3e52d2f66..b91615dab 100644 --- a/virtual_machine/stack.c +++ b/virtual_machine/stack.c @@ -2,32 +2,41 @@ #include #include +extern size_t __gc_stack_top, __gc_stack_bottom; + void stack_init(stack_t *s) { - s->sp = s->data; + // mandated by gc + s->sp = s->data + STACK_SIZE - 1; + __gc_stack_bottom = ((size_t)(s->data + STACK_SIZE)); + __gc_stack_top = (size_t)s->sp & ~0xFUL; } void stack_push(stack_t *s, aint val) { - if (s->sp >= s->data + STACK_SIZE) { + if (s->sp <= s->data) { fprintf(stderr, "Stack overflow\n"); exit(1); } - *s->sp++ = val; + *s->sp-- = val; + if (((size_t)s->sp & 0xF) == 0) { + __gc_stack_top = (size_t)s->sp; + } } aint stack_pop(stack_t *s) { - if (s->sp <= s->data) { - fprintf(stderr, "Cannot pop from an empty stack"); + if (s->sp >= s->data + STACK_SIZE - 1) { + fprintf(stderr, "Cannot pop from an empty stack\n"); exit(1); } - return *--s->sp; + aint val = *++s->sp; + return val; } aint stack_peek(const stack_t *s) { - if (s->sp <= s->data) { - fprintf(stderr, "Cannot peek from an empty stack"); + if (s->sp >= s->data + STACK_SIZE - 1) { + fprintf(stderr, "Cannot peek from an empty stack\n"); exit(1); } - return *(s->sp - 1); + return *(s->sp + 1); } void stack_dup(stack_t *s) { From 9060d9d58185eec7e442156195ca58dfc2570117 Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 29 Dec 2025 15:05:11 +0300 Subject: [PATCH 014/141] add S-expressions --- virtual_machine/interpreter.c | 19 +++++++++++++++++++ virtual_machine/opcodes.h | 1 + 2 files changed, 20 insertions(+) diff --git a/virtual_machine/interpreter.c b/virtual_machine/interpreter.c index ac1473e01..a4e736ce1 100644 --- a/virtual_machine/interpreter.c +++ b/virtual_machine/interpreter.c @@ -31,7 +31,9 @@ extern aint Ls__Infix_3333(void *p, void *q); extern aint Llength(void *p); extern void *Lstring(aint *args); +extern aint LtagHash(char *s); extern void *Barray(aint *args, aint bn); +extern void *Bsexp(aint *args, aint bn); extern void *Bstring(aint *args); extern void *Belem(void *p, aint i); extern void *Bsta(void *x, aint i, void *v); @@ -340,6 +342,23 @@ void run(bytecode *bc) { stack_push(&stack, (aint)arr); break; } + case OP_SEXP: { + int tag_offset = read_i32(bc->code, ip); + ip += 4; + int n_fields = read_i32(bc->code, ip); + ip += 4; + const char *tag_str = bc->string_table + tag_offset; + aint tag_hash = LtagHash((char *)tag_str); + aint args[n_fields + 1]; + for (int i = n_fields - 1; i >= 0; i--) { + args[i] = stack_pop(&stack); + } + args[n_fields] = tag_hash; + + void *s = Bsexp(args, BOX(n_fields + 1)); + stack_push(&stack, (aint)s); + break; + } case OP_HALT: goto end; case OP_LINE: diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h index 81f829e13..64d09eabd 100644 --- a/virtual_machine/opcodes.h +++ b/virtual_machine/opcodes.h @@ -17,6 +17,7 @@ typedef enum { OP_BINOP_OR = 0x0D, OP_CONST = 0x10, OP_STRING = 0x11, + OP_SEXP = 0x12, OP_STA = 0x14, OP_JMP = 0x15, OP_END = 0x16, From 4635e3cc68796c73bb1635e27de6dc32f0215aeb Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 29 Dec 2025 15:05:11 +0300 Subject: [PATCH 015/141] add closures (wip) add patterns better add better debugging --- virtual_machine/call_stack.c | 3 +- virtual_machine/call_stack.h | 4 +- virtual_machine/interpreter.c | 264 ++++++++++++++++++++++++++++++++-- virtual_machine/opcodes.c | 124 ++++++++++++++++ virtual_machine/opcodes.h | 32 ++++- virtual_machine/stack.c | 0 virtual_machine/stack.h | 2 +- 7 files changed, 408 insertions(+), 21 deletions(-) mode change 100644 => 100755 virtual_machine/interpreter.c create mode 100644 virtual_machine/opcodes.c mode change 100644 => 100755 virtual_machine/opcodes.h mode change 100644 => 100755 virtual_machine/stack.c mode change 100644 => 100755 virtual_machine/stack.h diff --git a/virtual_machine/call_stack.c b/virtual_machine/call_stack.c index 8abfb91d5..4de1e8381 100644 --- a/virtual_machine/call_stack.c +++ b/virtual_machine/call_stack.c @@ -9,7 +9,7 @@ void call_stack_init(call_stack_t *cs) { } void call_stack_push(call_stack_t *cs, int return_ip, int base, int n_args, - int n_locals) { + int n_locals, aint closure) { if (cs->top >= MAX_CALL_DEPTH) { fprintf(stderr, "Call stack overflow\n"); exit(1); @@ -20,6 +20,7 @@ void call_stack_push(call_stack_t *cs, int return_ip, int base, int n_args, frame->base = base; frame->n_args = n_args; frame->n_locals = n_locals; + frame->closure = closure; } call_frame_t call_stack_pop(call_stack_t *cs) { diff --git a/virtual_machine/call_stack.h b/virtual_machine/call_stack.h index a081fcd84..aaea3599e 100644 --- a/virtual_machine/call_stack.h +++ b/virtual_machine/call_stack.h @@ -2,6 +2,7 @@ #define CALL_STACK_H #include +#include "../runtime/runtime_common.h" #define MAX_CALL_DEPTH 1024 @@ -10,6 +11,7 @@ typedef struct { int base; int n_args; int n_locals; + aint closure; // 0 if not a closure call } call_frame_t; typedef struct { @@ -20,7 +22,7 @@ typedef struct { void call_stack_init(call_stack_t *cs); void call_stack_push(call_stack_t *cs, int return_ip, int base, int n_args, - int n_locals); + int n_locals, aint closure); call_frame_t call_stack_pop(call_stack_t *cs); diff --git a/virtual_machine/interpreter.c b/virtual_machine/interpreter.c old mode 100644 new mode 100755 index a4e736ce1..151cda90f --- a/virtual_machine/interpreter.c +++ b/virtual_machine/interpreter.c @@ -4,9 +4,45 @@ #include "call_stack.h" #include "opcodes.h" #include "stack.h" +#include #include #include -#include + +#ifdef DEBUG_PRINT +#define STACK_PEEK_SIZE 5 +#define VM_DEBUG(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) +#define VM_TRACE_OP(opcode, ip) \ + fprintf(stderr, "ip: 0x%08X opcode: %s (0x%02X)\n", (ip), \ + opcode_to_string(opcode), (opcode)) +#define VM_TRACE_STACK(stack) \ + do { \ + long sp_idx = (stack)->sp - (stack)->data; \ + fprintf(stderr, "stack [sp=%p, idx=%ld]: ", (stack)->sp, sp_idx); \ + for (int i = 1; i <= STACK_PEEK_SIZE; i++) { \ + if (sp_idx + i < STACK_SIZE) { \ + fprintf(stderr, "%ld ", (long)(stack)->data[sp_idx + i]); \ + } \ + } \ + fprintf(stderr, "\n"); \ + } while (0) +#define VM_TRACE_CALL(fmt, ...) fprintf(stderr, "[CALL] " fmt, ##__VA_ARGS__) +#define VM_ASSERT(cond, msg) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "Assert failed: %s at %s:%d\n", msg, __FILE__, \ + __LINE__); \ + exit(1); \ + } \ + } while (0) +#else +#define VM_DEBUG(fmt, ...) +#define VM_TRACE_OP(opcode, ip) +#define VM_TRACE_STACK(stack) +#define VM_TRACE_CALL(fmt, ...) +#define VM_ASSERT(cond, msg) +#endif + +static aint pending_closure = 0; void *__start_custom_data; void *__stop_custom_data; @@ -34,10 +70,21 @@ extern void *Lstring(aint *args); extern aint LtagHash(char *s); extern void *Barray(aint *args, aint bn); extern void *Bsexp(aint *args, aint bn); +extern void *Bclosure(aint *args, aint bn); extern void *Bstring(aint *args); extern void *Belem(void *p, aint i); extern void *Bsta(void *x, aint i, void *v); +extern aint Btag(void *d, aint t, aint n); +extern aint Barray_patt(void *d, aint n); +extern aint Bstring_patt(void *x, void *y); +extern aint Bclosure_tag_patt(void *x); +extern aint Bboxed_patt(void *x); +extern aint Bunboxed_patt(void *x); +extern aint Barray_tag_patt(void *x); +extern aint Bstring_tag_patt(void *x); +extern aint Bsexp_tag_patt(void *x); + static inline aint *get_local(stack_t *stack, call_frame_t *frame, int idx) { return &stack->data[frame->base - frame->n_args - idx]; } @@ -46,6 +93,35 @@ static inline aint *get_arg(stack_t *stack, call_frame_t *frame, int idx) { return &stack->data[frame->base - idx]; } +static inline aint *get_closure_var(call_frame_t *frame, int idx) { + data *closure_data = TO_DATA(frame->closure); + aint *contents = (aint *)closure_data->contents; + // +1 because contents[0] is the entry point + return &contents[idx + 1]; +} + +static aint read_designation(stack_t *stack, call_frame_t *frame, aint *globals, + const uint8_t *code, int *ip_ptr) { + uint8_t type_byte = code[(*ip_ptr)++]; + int idx = read_i32(code, *ip_ptr); + *ip_ptr += 4; + + int designation_type = type_byte & 0xF; + switch (designation_type) { + case 0: + return globals[idx]; + case 1: + return *get_local(stack, frame, idx); + case 2: + return *get_arg(stack, frame, idx); + case 3: + return *get_closure_var(frame, idx); + default: + fprintf(stderr, "Unknown designation type: %d\n", designation_type); + exit(1); + } +} + void run(bytecode *bc) { stack_t stack; call_stack_t call_stack; @@ -69,13 +145,14 @@ void run(bytecode *bc) { uint8_t opcode = bc->code[ip++]; int l = opcode & 0xF; - // printf("ip=0x%08X opcode=0x%02X\n", ip-1, opcode); - // printf("stack pointer: %p\n", stack.sp); + VM_TRACE_OP(opcode, ip - 1); + VM_TRACE_STACK(&stack); switch (opcode) { case OP_CONST: { int n = read_i32(bc->code, ip); ip += 4; + VM_DEBUG("CONST: %d\n", n); stack_push(&stack, BOX(n)); break; } @@ -170,30 +247,47 @@ void run(bytecode *bc) { } break; } + // TODO: unify ld and st case OP_LD: { int idx = read_i32(bc->code, ip); ip += 4; - stack_push(&stack, globals[idx]); + aint val = globals[idx]; + VM_DEBUG("LD global[%d] = %ld\n", idx, val); + stack_push(&stack, val); break; } case OP_LD_LOC: { int idx = read_i32(bc->code, ip); ip += 4; call_frame_t *frame = call_stack_current(&call_stack); - stack_push(&stack, *get_local(&stack, frame, idx)); + aint val = *get_local(&stack, frame, idx); + VM_DEBUG("LD_LOC local[%d] = %ld\n", idx, val); + stack_push(&stack, val); break; } case OP_LD_ARG: { int idx = read_i32(bc->code, ip); ip += 4; call_frame_t *frame = call_stack_current(&call_stack); - stack_push(&stack, *get_arg(&stack, frame, idx)); + aint val = *get_arg(&stack, frame, idx); + VM_DEBUG("LD_ARG arg[%d] = %ld\n", idx, val); + stack_push(&stack, val); + break; + } + case OP_LD_CLO: { + int idx = read_i32(bc->code, ip); + ip += 4; + call_frame_t *frame = call_stack_current(&call_stack); + aint val = *get_closure_var(frame, idx); + VM_DEBUG("LD_CLO closure[%d] = %ld\n", idx, val); + stack_push(&stack, val); break; } case OP_ST: { int idx = read_i32(bc->code, ip); ip += 4; aint val = stack_pop(&stack); + VM_DEBUG("ST global[%d] = %ld\n", idx, val); globals[idx] = val; stack_push(&stack, val); break; @@ -203,6 +297,7 @@ void run(bytecode *bc) { ip += 4; call_frame_t *frame = call_stack_current(&call_stack); aint val = stack_pop(&stack); + VM_DEBUG("ST_LOC local[%d] = %ld\n", idx, val); *get_local(&stack, frame, idx) = val; stack_push(&stack, val); break; @@ -212,10 +307,21 @@ void run(bytecode *bc) { ip += 4; call_frame_t *frame = call_stack_current(&call_stack); aint val = stack_pop(&stack); + VM_DEBUG("ST_ARG arg[%d] = %ld\n", idx, val); *get_arg(&stack, frame, idx) = val; stack_push(&stack, val); break; } + case OP_ST_CLO: { + int idx = read_i32(bc->code, ip); + ip += 4; + call_frame_t *frame = call_stack_current(&call_stack); + aint val = stack_pop(&stack); + VM_DEBUG("ST_CLO closure[%d] = %ld\n", idx, val); + *get_closure_var(frame, idx) = val; + stack_push(&stack, val); + break; + } case OP_DROP: stack_pop(&stack); break; @@ -225,11 +331,13 @@ void run(bytecode *bc) { case OP_SWAP: stack_swap(&stack); break; + // TODO: possibly unify as well case OP_BEGIN: { int n_args = read_i32(bc->code, ip); ip += 4; int n_locals = read_i32(bc->code, ip); ip += 4; + VM_TRACE_CALL("BEGIN n_args=%d n_locals=%d\n", n_args, n_locals); // base points to arg0 (highest address of args) int base = (stack.sp - stack.data) + n_args; @@ -239,18 +347,81 @@ void run(bytecode *bc) { stack_push(&stack, 0); } - call_stack_push(&call_stack, return_ip, base, n_args, n_locals); + call_stack_push(&call_stack, return_ip, base, n_args, n_locals, 0); + break; + } + case OP_BEGIN_CLOSURE: { + int n_args = read_i32(bc->code, ip); + ip += 4; + int n_locals = read_i32(bc->code, ip); + ip += 4; + VM_TRACE_CALL("BEGIN_CLOSURE n_args=%d n_locals=%d\n", n_args, n_locals); + + // CALLC already shifted args and removed closure from stack + int base = (stack.sp - stack.data) + n_args; + aint closure = pending_closure; + + // space for locals + for (int i = 0; i < n_locals; i++) { + stack_push(&stack, 0); + } + + call_stack_push(&call_stack, return_ip, base, n_args, n_locals, closure); break; } - case OP_BEGIN_CLOSURE: - // TODO: skip for now - ip += 8; + case OP_CLOSURE: { + // addr:32 n_captured:32 [type:8 idx:32]... + int addr = read_i32(bc->code, ip); + ip += 4; + int n_captured = read_i32(bc->code, ip); + ip += 4; + + VM_DEBUG("CLOSURE addr=0x%08X n_captured=%d\n", addr, n_captured); + + aint args[n_captured + 1]; + args[0] = BOX(addr); + + for (int i = 0; i < n_captured; i++) { + aint val = read_designation(&stack, call_stack_current(&call_stack), + globals, bc->code, &ip); + VM_DEBUG("Captured[%d] = %ld\n", i, val); + args[i + 1] = val; + } + + void *closure = Bclosure(args, BOX(n_captured + 1)); + stack_push(&stack, (aint)closure); break; + } + case OP_CALLC: { + int n_args = read_i32(bc->code, ip); + ip += 4; + + // stack: [... closure arg0 arg1 ... argN-1] + int base = (stack.sp - stack.data) + n_args + 1; + aint closure = stack.data[base]; + + // save closure for BEGIN_CLOSURE to retrieve + pending_closure = closure; + + // shift args over closure slot, removing closure from stack + for (int i = 0; i < n_args; i++) { + stack.data[base - i] = stack.data[base - i - 1]; + } + stack.sp++; + + aint entry_point = UNBOX(((aint *)closure)[0]); + VM_TRACE_CALL("CALLC n_args=%d closure=0x%lx entry=0x%lx\n", n_args, + closure, entry_point); + return_ip = ip; + ip = entry_point; + break; + } case OP_CALL: { int addr = read_i32(bc->code, ip); ip += 4; // discarding n_args ip += 4; + VM_TRACE_CALL("CALL addr=0x%08X\n", addr); return_ip = ip; ip = addr; break; @@ -303,7 +474,7 @@ void run(bytecode *bc) { break; } case OP_ELEM: { - // [index, array] -> [element] + // [top --> index, array] -> [element] aint idx = stack_pop(&stack); aint arr = stack_pop(&stack); void *elem = Belem((void *)arr, idx); @@ -359,6 +530,77 @@ void run(bytecode *bc) { stack_push(&stack, (aint)s); break; } + case OP_TAG: { + int tag_offset = read_i32(bc->code, ip); + ip += 4; + int n_fields = read_i32(bc->code, ip); + ip += 4; + const char *tag_str = bc->string_table + tag_offset; + aint tag_hash = LtagHash((char *)tag_str); + aint val = stack_pop(&stack); + aint result = Btag((void *)val, tag_hash, BOX(n_fields)); + stack_push(&stack, result); + break; + } + case OP_ARRAY: { + int n = read_i32(bc->code, ip); + ip += 4; + aint val = stack_pop(&stack); + aint result = Barray_patt((void *)val, BOX(n)); + stack_push(&stack, result); + break; + } + case OP_FAIL: { + int line = read_i32(bc->code, ip); + ip += 4; + int col = read_i32(bc->code, ip); + ip += 4; + fprintf(stderr, "Match failure at line %d, column %d\n", line, col); + goto end; + } + case OP_PATT_STR_CMP: { + aint y = stack_pop(&stack); + aint x = stack_pop(&stack); + aint result = Bstring_patt((void *)x, (void *)y); + stack_push(&stack, result); + break; + } + case OP_PATT_STRING: { + aint val = stack_pop(&stack); + aint result = Bstring_tag_patt((void *)val); + stack_push(&stack, result); + break; + } + case OP_PATT_ARRAY: { + aint val = stack_pop(&stack); + aint result = Barray_tag_patt((void *)val); + stack_push(&stack, result); + break; + } + case OP_PATT_SEXP: { + aint val = stack_pop(&stack); + aint result = Bsexp_tag_patt((void *)val); + stack_push(&stack, result); + break; + } + case OP_PATT_BOXED: { + aint val = stack_pop(&stack); + aint result = Bboxed_patt((void *)val); + stack_push(&stack, result); + break; + } + case OP_PATT_UNBOXED: { + aint val = stack_pop(&stack); + aint result = Bunboxed_patt((void *)val); + stack_push(&stack, result); + break; + } + case OP_PATT_CLOSURE: { + aint val = stack_pop(&stack); + aint result = Bclosure_tag_patt((void *)val); + stack_push(&stack, result); + break; + } case OP_HALT: goto end; case OP_LINE: diff --git a/virtual_machine/opcodes.c b/virtual_machine/opcodes.c new file mode 100644 index 000000000..53d18b5ae --- /dev/null +++ b/virtual_machine/opcodes.c @@ -0,0 +1,124 @@ +#include "opcodes.h" +#include +#include +#include + +const char *opcode_to_string(uint8_t opcode) { + switch (opcode) { + case OP_BINOP_ADD: + return "BINOP_ADD"; + case OP_BINOP_SUB: + return "BINOP_SUB"; + case OP_BINOP_MUL: + return "BINOP_MUL"; + case OP_BINOP_DIV: + return "BINOP_DIV"; + case OP_BINOP_MOD: + return "BINOP_MOD"; + case OP_BINOP_LT: + return "BINOP_LT"; + case OP_BINOP_LE: + return "BINOP_LE"; + case OP_BINOP_GT: + return "BINOP_GT"; + case OP_BINOP_GE: + return "BINOP_GE"; + case OP_BINOP_EQ: + return "BINOP_EQ"; + case OP_BINOP_NE: + return "BINOP_NE"; + case OP_BINOP_AND: + return "BINOP_AND"; + case OP_BINOP_OR: + return "BINOP_OR"; + case OP_CONST: + return "CONST"; + case OP_STRING: + return "STRING"; + case OP_SEXP: + return "SEXP"; + case OP_STA: + return "STA"; + case OP_JMP: + return "JMP"; + case OP_END: + return "END"; + case OP_RET: + return "RET"; + case OP_DROP: + return "DROP"; + case OP_DUP: + return "DUP"; + case OP_SWAP: + return "SWAP"; + case OP_ELEM: + return "ELEM"; + case OP_LD: + return "LD"; + case OP_LD_LOC: + return "LD_LOC"; + case OP_LD_ARG: + return "LD_ARG"; + case OP_LD_CLO: + return "LD_CLO"; + case OP_ST: + return "ST"; + case OP_ST_LOC: + return "ST_LOC"; + case OP_ST_ARG: + return "ST_ARG"; + case OP_ST_CLO: + return "ST_CLO"; + case OP_CJMP_Z: + return "CJMP_Z"; + case OP_CJMP_NZ: + return "CJMP_NZ"; + case OP_BEGIN: + return "BEGIN"; + case OP_BEGIN_CLOSURE: + return "BEGIN_CLOSURE"; + case OP_CLOSURE: + return "CLOSURE"; + case OP_CALLC: + return "CALLC"; + case OP_CALL: + return "CALL"; + case OP_TAG: + return "TAG"; + case OP_ARRAY: + return "ARRAY"; + case OP_FAIL: + return "FAIL"; + case OP_LINE: + return "LINE"; + case OP_PATT_STR_CMP: + return "PATT_STR_CMP"; + case OP_PATT_STRING: + return "PATT_STRING"; + case OP_PATT_ARRAY: + return "PATT_ARRAY"; + case OP_PATT_SEXP: + return "PATT_SEXP"; + case OP_PATT_BOXED: + return "PATT_BOXED"; + case OP_PATT_UNBOXED: + return "PATT_UNBOXED"; + case OP_PATT_CLOSURE: + return "PATT_CLOSURE"; + case OP_READ: + return "READ"; + case OP_WRITE: + return "WRITE"; + case OP_LENGTH: + return "LENGTH"; + case OP_LSTRING: + return "LSTRING"; + case OP_BARRAY: + return "BARRAY"; + case OP_HALT: + return "HALT"; + default: + fprintf(stderr, "Unknown opcode: 0x%02X\n", opcode); + exit(1); + } +} diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h old mode 100644 new mode 100755 index 64d09eabd..f61396c2d --- a/virtual_machine/opcodes.h +++ b/virtual_machine/opcodes.h @@ -1,20 +1,22 @@ #ifndef OPCODES_H #define OPCODES_H +#include + typedef enum { OP_BINOP_ADD = 0x01, OP_BINOP_SUB = 0x02, OP_BINOP_MUL = 0x03, OP_BINOP_DIV = 0x04, OP_BINOP_MOD = 0x05, - OP_BINOP_LT = 0x06, - OP_BINOP_LE = 0x07, - OP_BINOP_GT = 0x08, - OP_BINOP_GE = 0x09, - OP_BINOP_EQ = 0x0A, - OP_BINOP_NE = 0x0B, + OP_BINOP_LT = 0x06, + OP_BINOP_LE = 0x07, + OP_BINOP_GT = 0x08, + OP_BINOP_GE = 0x09, + OP_BINOP_EQ = 0x0A, + OP_BINOP_NE = 0x0B, OP_BINOP_AND = 0x0C, - OP_BINOP_OR = 0x0D, + OP_BINOP_OR = 0x0D, OP_CONST = 0x10, OP_STRING = 0x11, OP_SEXP = 0x12, @@ -29,15 +31,29 @@ typedef enum { OP_LD = 0x20, OP_LD_LOC = 0x21, OP_LD_ARG = 0x22, + OP_LD_CLO = 0x23, OP_ST = 0x40, OP_ST_LOC = 0x41, OP_ST_ARG = 0x42, + OP_ST_CLO = 0x43, OP_CJMP_Z = 0x50, OP_CJMP_NZ = 0x51, OP_BEGIN = 0x52, OP_BEGIN_CLOSURE = 0x53, + OP_CLOSURE = 0x54, + OP_CALLC = 0x55, OP_CALL = 0x56, + OP_TAG = 0x57, + OP_ARRAY = 0x58, + OP_FAIL = 0x59, OP_LINE = 0x5A, + OP_PATT_STR_CMP = 0x60, + OP_PATT_STRING = 0x61, + OP_PATT_ARRAY = 0x62, + OP_PATT_SEXP = 0x63, + OP_PATT_BOXED = 0x64, + OP_PATT_UNBOXED = 0x65, + OP_PATT_CLOSURE = 0x66, OP_READ = 0x70, OP_WRITE = 0x71, OP_LENGTH = 0x72, @@ -46,4 +62,6 @@ typedef enum { OP_HALT = 0xFF, } opcode_t; +const char *opcode_to_string(uint8_t opcode); + #endif diff --git a/virtual_machine/stack.c b/virtual_machine/stack.c old mode 100644 new mode 100755 diff --git a/virtual_machine/stack.h b/virtual_machine/stack.h old mode 100644 new mode 100755 index f2c8ebef3..720684cac --- a/virtual_machine/stack.h +++ b/virtual_machine/stack.h @@ -1,8 +1,8 @@ #ifndef STACK_H #define STACK_H -#include #include "../runtime/runtime_common.h" +#include #define STACK_SIZE 1024 From e40822a4cd84dd7758637c7dc75b7aac42bc2a74 Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 6 Jan 2026 17:26:42 +0300 Subject: [PATCH 016/141] add debug target ot makefile add proper docs --- virtual_machine/Makefile | 11 +++++++---- virtual_machine/README.md | 26 ++++++++++++++++++++++++++ virtual_machine/arch.png | Bin 0 -> 16505 bytes virtual_machine/bytecode.c | 10 +++++++--- virtual_machine/call_stack.c | 6 ++++++ virtual_machine/interpreter.c | 24 ++++++++++++++++++++++++ virtual_machine/opcodes.c | 6 ++++++ virtual_machine/stack.c | 6 ++++++ 8 files changed, 82 insertions(+), 7 deletions(-) create mode 100644 virtual_machine/README.md create mode 100644 virtual_machine/arch.png diff --git a/virtual_machine/Makefile b/virtual_machine/Makefile index e8886b3d3..74b404bc7 100644 --- a/virtual_machine/Makefile +++ b/virtual_machine/Makefile @@ -1,8 +1,7 @@ CC = gcc -CFLAGS = -Wall -Wextra -std=c99 -O2 -g - +CFLAGS = -Wall -Wextra -std=c99 -O2 TARGET = interpreter.exe -SOURCES = interpreter.c bytecode.c stack.c call_stack.c +SOURCES = interpreter.c bytecode.c stack.c call_stack.c opcodes.c HEADERS = opcodes.h bytecode.h stack.h call_stack.h OBJECTS = $(SOURCES:.c=.o) @@ -11,6 +10,9 @@ RUNTIME_LIB = $(RUNTIME_DIR)/runtime.a all: $(TARGET) +debug: CFLAGS += -DDEBUG_PRINT -g3 -Og +debug: clean all + $(TARGET): $(OBJECTS) $(RUNTIME_LIB) $(CC) -o $@ $^ @@ -23,9 +25,10 @@ $(RUNTIME_LIB): clean: rm -f $(OBJECTS) $(TARGET) -.PHONY: all clean distclean +.PHONY: all debug clean distclean interpreter.o: interpreter.c bytecode.h opcodes.h stack.h call_stack.h bytecode.o: bytecode.c bytecode.h stack.o: stack.c stack.h call_stack.o: call_stack.c call_stack.h +opcodes.o: opcodes.c opcodes.h diff --git a/virtual_machine/README.md b/virtual_machine/README.md new file mode 100644 index 000000000..921f6b5f3 --- /dev/null +++ b/virtual_machine/README.md @@ -0,0 +1,26 @@ +# Lama virtual machine + +This directory contains the implementation of the virtual machine for the Lama programming language. The VM is a stack-based execution engine designed to run Lama bytecode. + +## Architecture overview (work in progress) + +The Lama VM follows a stack-based architecture where operands are pushed onto a data stack, and operations consume these operands and push results back. + +![Architecture](arch.png) +(work in progress, each iteration the architecture will change) +### Key Components + +* **Interpreter (`interpreter.c`)**: The core execution loop that fetches, decodes, and executes bytecode instructions. +* **Data stack (`stack.c`, `stack.h`)**: A growable stack used for evaluating expressions, passing function arguments, and storing local variables. +* **Call stack (`call_stack.c`, `call_stack.h`)**: Manages function activation records (frames), tracking return addresses and stack base pointers. +* **Instruction set (`opcodes.h`)**: Defines the bytecode opcodes + +### Interaction with Runtime + +The VM is tightly integrated with the Lama runtime (`../runtime/`). It relies on the runtime for: +* **Memory management**: Automatic garbage collection for heap-allocated objects. +* **Built-in functions**: IO operations (read/write), array/S-expression/string handling. + +## Bytecode format + +The VM executes a dense bytecode format where each instruction consists of a 1-byte opcode followed by optional immediate values or offsets. Function definitions include metadata about the number of arguments and local variables required. diff --git a/virtual_machine/arch.png b/virtual_machine/arch.png new file mode 100644 index 0000000000000000000000000000000000000000..51400b6284fdd9ed4fa9f25edf49674f89ca49d3 GIT binary patch literal 16505 zcma)jV{~QB*JsR5$7aV?$L!eX*tTuk=-9TCPHwF3*yfFG8*`uM|IW;MXXeA)4|lDz ztLk@l?b=mm*G8nGyaW;g9s(E`7?PBvs1g_$1ON;Sya^5xM1kO$ngsp(E6S>ffgWLC z;bGz6;ox8p5#hdkML|ZzKt)Bu#6&^I!o$KN#KA_x$HT1dL9tK{}PgIUyLQzmsNl1`YLX1;FLPA(pO+-#ZLP<|joKs4IOIDgkPEJlrMPFLY zP)^fSQJ!B(QBYM`NL^iBQO8o*z*g1BK~r5+TT@(5M^Z~mOViv{%fe0H*2h3k+SpLm zz`(%J-rvMI$lNc@!c5WHLdo9V-qJJ5&M(0};Fm>UrfqnEeGoudlCXOrB4CkzZnoPjZcCYLiDsn@?`9Uw&VJuTfaA zd3cCLaBy%yQdvN9c}RLqSXO;#QD0QJRa~@fTwGjKUUPJQOMFR3RB3-)#ow6Psl+(@ zU-1sfiB7+M{Yoh5Oe*jGRrNQeZunQjbXJ;sUY1u@R#tl5VD|6vJiv5b>r7FeZ&^`5 zQBhGr%UEIid}+^0S?_8^Nl;x?ctu4;RrhR7?`*{%VD->W-RMEx_(^?rctcGDpeYsr z0Mz%*0Y-uKV}}jnM=i4#fVr!-=C}?(LU;SGo}Qkzv5nUG>%Pw9f!?&C{*2z<-od%8 z;Xj#Eqj^(PQ^WJyGvft|GsR0wODpqbYm4Q;<;sl$s!@WQDeGvXbB*pz@=i_zv{r&yz{nO*i+e^FMOQ+*Ycfk8<=G#i$`{v-&P0!my z|L61H&zFbK&(En8$R8jTq;(O~a51)b@U$^Ca{-euwKH`xbTKs{G4dp_aB*>P=3!!T zuraiAakaH!G`6>On;azuDR8i*iiXSoq=SKja6GclmE{!HexP(+)}_N#1O+Q+hoSkD zlMt7N=97|SCsf~i8LKt8*8tgVwZ7d0?#k}dTx{&Kf0WN}T=!{BdFSS2a9%EJRN7{k z@5%Ka3OnJ*o1-ru=MkJMc-cCs53h7&_w;VQLAHq)B2DrL-`RwGrq^t%REH!l7tFYdzHxIHR9pL9owl?+YbM(*0FSwHKBh-BQd2G>E%kc}NlC3x^xZ~KX5B8> zWA;uBc&^{)8}y*nH|2V^L6cIe!C*f*&bUM~45WM`B|nr<_ico1VM_w95Uw7{aI60{{YPxGb%I1 z<#H$0CC6L~xVs4(BH2Wh@;%tZat9a1CauGGd3`y5araWpF0v4vJ~}&%5-^!E)aey! z8D62?t9lV@14#npo1wpSwu-Elt0TtnE$J%~N4#47Tv7pR%?~^4auvg&b`T675_HJTlnfG^KhK}TTY~ev@?~021Wg>8HtYXbw zQnbWhN6qOn@%zhyF70C+$XT-I7qzeuO4?fReB&rCAKotU#ibJ_Z)9ID@W#99x}bq z_AZn^(vYx30PL)7bB5Jguc2JXh$kAyR!8aS+E(;(%qSME*Vt8DWXvD8(doX|D0(({ zlct4{;BD2c^$>?1!mfMX4HO@w4s}3w@HMhGTkT;4h@~9&P%y~`Loe=*6xkcOZKErY zf4E(K^A=H(BF*~GSP5w@V#vpv?$COd=SDR%_?BP4XDOE414q;x1OW{wTYIc}NjgUN zqhlKyoKAYXG4b}hvnZmztI||!Q*ux^yk&zE^xLa!M@!dLj0xB0*D&?y+%WK&N{o)Z zrVbP4=p-y_lV#r`UY@3H2~m925wp97>HB2KGf%*CYdUynHpHso-rIGIcmu{((R|l9 z7<}-8%HxrNr6Q_WDneFhO(A%s*V*v$sXeI()>Z=#?WZQZnykHCYZ?&Vh)P>; zqRZA69ae;Lq5715i_rSZ;&_g=dD2!w%c$IHQ!On=k=A@jE^bS@D=bIs7&YVV7Q z;(m2EUU+WDZ>&gZ^DZBV`j{^~!XZ!ZyMybRuw3k@WchNE+BcJ$y8T^0OKEm)eB3ny zppjno7yPp(yy-l>ncuQBCawoR_GSUJ6PCy1-)!pQ@;GC(vY1xQ(9E0R)Ukqhg@e1= zGxXMM<(p1?DOsRSu>9UzhPY%hX+rqnaSt2w6znRh$6EAQ?>l?j3!qX%-x9Zjbw#R*RK11 zKawpdu*Ba9YZ^vei7;D->y#p} zJzZ2u=2|1}<|Z%*p_XE?So1!nyYCr)tc!^CQ1SQI-Zx!`d?A{ds+092`jOg9*qH2l=1KHCDFgdHLGhvx> z)aCT$_pKdMcvm1)5_MzorLG;YU1bt zYG>UQVsMJw8Rob58*LR1V~&G!!g_IZ0aHGUm)+&hne52;2QSC^mxZq^%;HPud1big z^5cTbD=FbKuJ~P$PaO&yC>h9aq!7U;i`^{TtD8U0+-$u1ud;r&&*CTnI+l1PfTivf zzbU;}+14bQfY#THatM$oZX=VqbH+vJlG50wbdlh$S>Enn-WfCef~vmqej)u*uI;O@ zJ00G>!i;A>2c^Tx|CexSLqB1iy_}!Y9geYw%2beMZFATKM{w)tW&$Z=#7muANpX5( zbqY4EXS60iw>Ex*HT;DN3lqf`7{$QOGB0uyKkFB?t3&#Hzc1^s)B*^oVF_0tOV-`g zb1BGKz%9`ASkul!(yG`*Y(o`(JaRBB4-y7qrI@Kv*)#w7+c|5pR%si4ngmE+X7d)( zZLtmAn)H9&xf{(;7U3SB{r2-gLl*bD-7)rQnn;0NbmT*m0pZM7f;BTOes42)okvfB zEVb#TkS9NE6M@bZ9lzHI#X?)WBcw0~#d=4_jr3)=IGXvIlCv_N$8S4-_$gbrF$)UYxt&U(xHAWn4mDVQgS#t4N2Wuas{?PF2rU;X1p$f| zKb9-Bjjr1?;F8ZW?cz{-!U9^!zFlx#fi*j{cMZX|vX$|QtsATgXNrTv_x3eE;IcfZ z*WR@SujpqkiTm`|!oR<+;oaAYq(}1U%XyMk@>&>O{1RK`E1PVKAJm#Fpk#55#xguV zzYsG%FvR9E$)4+&h(f}b5ItV(zIw}Id5kTiJUHhiI)2cg*XfU#J)hK$!KI-I!r^v0 zy~`b#v3HN528Qiniu~O>v|s2zFeE8-i7pqW_puaP^?(Q({>JK}eI2vG9wE$;a;yu9 z6bT2k^R)=sP>m3N;bSNBI+TqL_^7d-i3AjV!f?Iq*{txGj_dy9!K?N6w$97GBN|mh zzu9z^kC>ed`Zzz2!R4~AQAKB=Rb}NeN|TPR>V-1tMzLKzARzi2|VKzzNXSqUMzN?(H5?G z^q(lOK=^26xZP$?&U|mrPqRBztBJgyw%Mk9nb*UCY|B;xPz#&PiE)F|Tl%?rc;yKL3qy6qckL;a&YA$FlT^I-noW5)YF zeemOaKX8NR;#IIq0e55rOyN`)HZ-KI%T<;sqMrbph-Y-nCHKSt@U$pR(cE#Xddi29A zB!V_D6=MfsP~fricuB&x8p9~JX#`DNbPheXWcCeuPN|cuJFKUzAnM13RP+j#jhQj0G4cu`=C}aWoyf z9}r10*a6iF-Caw##{61Q+-X9}@&XzcvhoDj&OPtqS|!B!VPFy6Gol!#U$)wNeouNB z;9;O;t+;S?vO{hNR=C@i9QFD-B{dEler4(7ryRBw0N&gdN)KCnOqg&x?a9tPUX2}h zz)f5P({d0}qUMA+?%vhb5GVo~e!$LFUg=}glM5WaVVBOY|FUxu7SS$L$bxXll?#{c zqNBy1w>_H%PgVYtsJiKE8l*2DP|-#ie8$;$EH1outv$rkHHEz0@b*E2-J7_`p$Pl% z2wt657nVBU!;^EGg?(RJM)2F#{&2ne@%bBs+ISn|zVM*iSgW8pTkH#r!GbQY!T6&N zPtI{i{70cXzbT44`CmdVzlpUgWjdBljMopP}b{ePES}`{!;AlbFl%h ztT$z;=jU;sk6{~Bkr6C2tfqMke_gT4@L`c&0hknYF>dTAMfsp3I1TrL5t9W0?*W~| zz&hF@H0+#Vl*5U8VyMc~z`d8g6Ma_V02Tsf^wrEYR^q2pcp`E)Hg3}T7~(l{26)qz z{P4-`FV9buBcZG!hvu;&>WV+Zuz6_%I1k^Yba$;jln-0UgIgHl%m>;1wLIoZ!e1U|}< z6(8F{6TRNwM%1xKMnYX)JUJmhmOA623{BP+gCN?Rb;u zjhVux2aTM$=5WN`W%s z$~*~GH8Zx)1J(5tk-BD7JDZ9M!;67jdX%_`S+V93e_S2jbU`#XPRIMV0x|DXj~jLZ znF#Wo14enzm=_aI^J9D(1tjCywho%rLpba;N=O~b^(03=h=k2NTn^SqcmAm8*FF2s zaTMEiZP7QXP{oJcci5?3fNe&&v+U2uxN7c%*(`Y@cXahBVyz>$$@ix)5fhe+uMc`u zmo=&W<8PznzNd2q&n{F3`j-?<^>GpbyL+p)DwGAMi8OIfE;bi~n~|JfU*n#qy`H1D z!jR5HOiRnF1=a+zg^9y9W0{!1@b1tTOALNbO9n4g?5bPLg==K`CwoON%~}h^8h>jN zHUq6Hm^MLunHgA$)Rvb?G-Ul|S2>l37Afuxhy6w;mY&&WkwLWA(?XuTp;iCIU>Y{u z{n}a-GKYoxJF01R$LZ|ecZEy?yX^&ayN8Xp+3YMOv#By~1Qo2sYq+INzvJi7oN^?H zB{FS--m(kNTQH6X3{BD!!|3*h*$>=t!)4az8;F9y_?b9kc+?<3JgQceHV^qL(DD)e z723Lb7gftoZvXCY2N6XSqD`bpl#?!?q3qBuu&OL49@P}T@2~0en%C8lDpe{bymG4r zf-TQ&WasJ6!q(nY$@cO_c1`{+w_AOF@TYa~?UA>=087=+D*|iNS9XL4aUdyQ4tB_; z#^?sk4}Byf(fNfMxyw1BJ<@t2uQ2N`XVP~+W}}C>vH7EsXWFR6Re`IQ@j)3a#!!J| zGv9TJkedqai#sDaooAY!P;#P;R2I%VNqxRk%{mfRS-CP#L2J?4-j!vNxdiWWxkdS! z7ziqj2=ZGr6FXn=X4`#J8otL%iVOl@0hmaxL0)G zyHX03?aj%$?GqzhMyj%&^FQ1fOvs&gX`*qLjU^l+P`=h zI2ckyC(Y!w&3g5ai0A$$I*G!flVYVFiOwt*yM?f{59kMf)tC%#%0%!tmpagHwtPV> z6#}nGdSmnnI;9ew8KRJ)9+U?hQOwcKdi$S8J2ex9PiOQEYR@=qJnEEod zY9~pU+IMw~IGS=TVZ3$GES;(>CXH$vPoExaie!IHyyAq)4|v@eY9Q!r$wg&b|2%Vd z5u%DG)WG>9i$*>q)$Z(V!|8OK308C&!$%2G3ppFueigGA{+k;Q^Z!m;5U16N_evrB@Rj!+!N|HS$mYeISZTwfDh42wmt_kDZh@MM@%Zkh31X$Z& zuaUrz`gb$|8K5_v24Zkw2Xiudp+woqoMt@ZjmC==U~gZUIX|D1Rg3B)BP?Q~pAu*G zJxpMmaM}!(8*x2d*`3hxCh33cF~}Z-s~Vsko3-w1`ZUdsS{zHGxAnb{E@S{YrG>g+ zwFL4cy}!rpaY)==dWd{$m}@jq7olTya}pzxs5Lx2;1nK5FIjk8Ur4&mwE(* zBLV@_Z45pBRu%AZZy&#}xcWex!7BJ}vVKMDA6Sjx~_B-wtHPZ0XRyNn9hlMsEl|%4}iH~Q{fP^*g>$1{Wmkd6Mjcp+2F$hvl zu`#-US!OfltwxDWztb1sHCCJ6KaF0lSdP?eq!jb}N{uXH*lrQtO44JTzB4d6Kmt<1 zkYa6XGgt6&>5CE79(o$D4eB)~fT=r@2`jCIkH0-Pw6E%yFCS!)tNUpk3 z_o&q=$WjU>r!FfrhG=fpEcr1xnjLKbg5bwd;@`o^$*E*2pG~#&Y+*6?|r^-Fncef~yBx{k!3YGY$ zz4_dG2+HOht{(Ep@nwVc6gk$ZFGr=&(R`}-M`5yhmg`Qn8tqK-M7(!K6PZrSjBG?ZAu%%J z#r5(!^v^rQGxKW4n))q!0oUnOOFFsoj}UxAv{%O@c3FG!(9i+Yb^QfI|Jw@di{B}O z)_mJpPMWWpt87gdb?^LdPrXjHDXQ7TCTi3kJTZ&q21uvl=sThN!eP0S7LYY%45C46 zQsmiIorOqwWr+xQqh{cYQRODd3WhO?h7r?l*vB{pePM7Zp=EaQpD(bUylzJWem5O| zkfpl1&=D!6d1*jcpGgUKrd@^9OCbQ3MWO!3Q z&yOOHG5%f~&z;TT4yXA;f7oP7@O#!w)4q(s_&2aw9%Gq<#?BWn%WKxO21W z+X**IKSTUWw;*4N@NGn{;KEx!P>?Cz&GjI$v%Y)l4WKiu=%@W0UaD?BMQ_<=$Z1}P z35A47`6dB!Xm0;DjcLLC&7@v2r-oPeQ8Ow+eR(WWm39DpvXm!Wxt_tt_3JT8=jY-WE20Ac-tOs=f12`WY`dSKo4md95!1g)Z-M@ zqtpVAF-)E6C%-b&{FK)ow5W;$$)Ib;Z@H=r9AlNhfM)IYsxO?)f;;LdkE*z0%`QMq zB|XbO_~QGe*Rj2s9ng*}4=%I#77y(5^@(m1wjojt4E;E1N|;Jg>=|PS4l^9C#3%%Y zm{bMJs~K=_P`dd{@*YH!GQGSg>T7x|%g-nFcDC~6s_YjL@cG`W{%OQov{Uvs1kNA& zyij>+WQ$Y~g6fbZU94Wa9OdSFd9YixK6BfOWtcOMYFHV+xoWmRwac7`9k8#nQrOQ` z?`qVV#0~8;X@s}_tgcc?cJ3&IXbYQL9z-y!W zVY!wFq-N74^r7HdFtD5p;@T1LlmeYD8tOgx(Pow@H zt>%Kgo&6j9E%j@K(2I6!m+lCNN^=&ckP5`RS&*s%y*-C7t2_CxG1>all$MApUBVD? zaVe4*T9~1y@|^o7zY+C+UXSC4@i;$q*Aoc6;E>JQoraoS5AOH7d~MVB3DKD1-!*MI zmF2JbxWUV6b=fx!K@@!dCql5{KkF=bodSGJ&2O-Gm8S3}Dmal1aIz{}tWYaMq3hb( zB{{)2=}thMv||*sDBp1?vf7aVWL4#PjniGfiRrKS+QZ{nPepzwCs56m(CDQ4!7YbW z5b}c9HVc@#_frBk8>8GKvFZ=QnR6m@rL-;St$WpPi!c0j7c5v@j8t*mg+EFKdqcCL zYZ|_k_IAxo(WtXsQk;o>_A_j0J$KDOm{{Uw!j_fF+9m`<3J4PxqvloR?`QL~5U!^% zXjTOMX~e9l8ZP{xUOCT~UwBjTnn>p7KiH2zx3`thD~iVaYj`SpU&5qg+krSwGh=;6 zS>ZEkb!jWRXU<8S1-BQyU;A~OXP;v)JHjZ5tVw-rM24pcnN@|I3MSAV`0-B7C%83P z#6-ys;M*WvrXX0!)M9r+ps$quWbib^qpv4YAd*|FOlL?LV=P{jM_92GOKK|3x7^*3THi3WsH-l^|WAi*Jvp$8Jk<@kMLbSC$ChQP$*ea;MPl^ z|8VwpJF>0MAG_W=nQ{LRQPF?tXBe)|myguy__aZvqM&)+6%!e>GEpXwFf{iU281aMDG_pSSI64`x+GQC2-@!<}Re>u?<; z4>y-`-%*bIBXLXFI}P)l5D09+WLMHCxwt#Mj3A3bo7S1kq*YxNONbb%%R-Z?nI(a@eqg^AFuNI%lzh-sJexZQS|^S)TU z#l^M$RCYc5$;_!v&Jw;)$A`6Z`9ueJf7~1KP^%G*Rx6}=uHjH?;Po`1)#;eD#9VxQ z91k^?kJLd|TcFDmJr53ez>d3ZuoG-fjY3DT`9Z_FcX(P$Xg@TXUutTy9ZF zR&f|E>xRS07$QwW8en%iEJ=aI<(;UhRk2N?%+IIo?yAIh9DmZgUjmp9l7RY!0*RV} z%_hVri~G(bgy(F7!(Zm6Kf9o!#GVjX!NaL%@^wtL)s|M3(~Y-nqRL9l7;=e>x;qi$xkSr;-6lM`%xyaI=EaDAZy7=Z$Bj?Da} zx$vR$+h^m3D+v0FI%(o>H*M!Gda-2&z!x99zY4hU_Ais9)$v!!e4L&K%4*&U#-xHB z-0=8R$svyTApMI}db%pgZ{cQvgLp!(CgkL3+iWyzkO_Eqdn$f-k%`Py_Zv8PIrO@r z>Kd@}?LwVYp{Z7TPA*+%*XD!8I3hzKw%br;$0NDN=G5q@ZiAECY+nKG>hsj=uWV>? zSRFalMLepv6LE(HYmJXS5_MRMOQ*?Dswu-|^$)p&qJD19Xfk!s_gjSL;MC@H^~rL9A8lbpoV=LZQq~hasF3KZeEh8sib_b+YM<> z=DqBpw&;5T6Oe?Nj6X@1?L`B|)OoZe7@0h9){QVjID{s8~=94ax=tQX% z z^)^QyQ(-H}8gf&u#1byyM_ml>vDnD^`<%Rx8@V?E4Td=hn%z5VMr$rnQi}K40ARi# z3|uwl+ukG4SLVZ1X6HIWlB%Dm@u`Mm}?oX=Q^iyjEXRzG(g=Xy}S%X!VHM;>vRIB#78$X)Hj;wfJK+0nlB03xCQR zZo@jZVRva2V-bQe=bKNFF|roN)=pYh$-z^&q-CEDYmbE{-(hKV&O@w;l06c2V;Zb) z^&(zRQv!dK>kf8gU|rdh3%(HK+D)CYvDBG3iuuPG+vO$LMR&rXmKZTKJmJe&Ps>_> zo`IE?^Wh_8Jg1Z{@djY26{F1Rw48p2F}LAohHVXIWjBuE?_@735!@|J273jt1gP+d z+g&&d5Ru{YB#s=jzjLE2&5&jN+KSn)Q0r!MEFPA^Erg%vzqYaGWupwr%$`ux_z~Vj zh>^OLgr1X=IhOixA`P7eN@M##nYtgoRGl;m#WaM&ER8!D> zetsiOf6Weu&l#>o_!5m#ZJjvr&#=ut+)dmwUq#RmB|GU%l*4GzyA*x5#C<+PhCR|ns@$P1lHcB%vjnc_T9rvPN>S9q31(TMpkZeBhK5(&4GpKH9Uc{lK7AVjkbhfPD)_7Fhu+K$!z z6G^m#Cgyw4d&pJeB>%+Ypo5D8?knlMTA8je1v9bau0h!s`NxSnxuiRKXxp?s~G>MU!R}wa(zlN zluxywNgBtHS?Fw7cJ8h3Y*8zo7FO^UWaJRRwADD-ga&2>c{c_tS+L5M1g?QMKBG=k z9uk3y)G4aYWOwUHEg^H5k6ifc57HJ~wF>6=-w;)W`qM)=j8XIhtWI(s@&j=}$?=KJ zOf4V%q9%I+ig31Jtc46##vr-A;QSf=+EG11{$e%Pw{M6KXr0U@rVil{!JWZs{!RNL zSl^-!Z&*K;4QYCm=C#BI^(Jb6hJ}4o>(}?dLEjp6FcOWk`d+`GiUe+39rZ7<`iRzf z7T4J@*Q0Jc} zliB~-^a?OE*d~7thRSqtkdi4#a>G^f8)%3O>WyLamOeO^-+M<#H%x5iK`_gEh?y_v zE!RF@@e+VbOXCKnG6(|bEo8RYU7kco>ss#ueHfW{6DxAo)i2k@;^JpMgcnb!vwc^| zl!`v`?bwGtZe9!;k>7-DfwO+1=;#88cXW)v9!e6!vca=ioIadS}>#q+fJr^+R^Ll zZhvV${o8LtUbL&vFKBQ)1N+SWEt6QK7Q&bYBOMl6!;7KZk={gYn7(@9C)ew50Qyh+ z`mTjkSG5e}QI9a2F?GP#n38FUr@v_j>&dL%0zl>bMu$W^=q3ekZq$1i-ABmeKi4N!Osysdu6 zog^`^%18tX(+Q!W-(rb$O+}II(uIIlLtou3&Pypah-UrzNSL6=_N_&UjcvB+pRFUj zzf#Y2I^75}D|L(OmrCgNebks$6QQ1*FC^sj@#B4!v`&e>qo2JMy=}k=JrJEqo*5_l zUFS&8jZW4?cj-5eJ71%w;`6Ro-l5m50<3(~a=36>y8$2Po6&(259Dr8? z;<^v?af`C_)3WLUaWsIAC`@$yJAj`WQIq69zUSN=s!YT_YG$fjGzDDS!bhCmNrH)s zQ3aVA&gGtk2svDYLD5^4^Q~0{%psxdNKkL2K58~9FIn7)v%a)mITnVv@^a$k4XfbW ziF<^HZo9fnLLF>=WL#c_JO6Z)7-8qaq2w9H_>8e9n$iJO%zH@JDkKWyWS_aiTBRmN z!d{^b>4sZT&hOuF#12HSlV`qqAu|%>K4Q#y1;0bdm2*{F@Yv855Zu?_X(=Mt&Vih| zVB899_(4A{;0IAs&_%Y)q|Y& zPaEqYcC~WykJ7=;X1d+G>_5fai8d-@QdP#+Tn0(${G*5p`fZ-`kk7prJUcUqih@VR zs+V}dArHX?cFhKlFG!;Z%MI=gjS$pN9x=_>Yf0{!|B?1a_7OWs*sw#E5ebV}AO+mk zYF9FZKXG#aPp2WlJ$C$t>YF#S45YR1{`#)8l7tvV*~JtFGd-326F_TRrE*v@D1eZs zlR$pr!@vgh3Qya_79#blWY}%Q6P(^m2v{j2R-*OwFJE^4U*vLaSIq(?XBig<_Y+^n z#4jgms`J)%1;}w*FW|0Kv~^hJYAS9FZVtY|?W*Xn+xYWzUqBn-2e5Kvld=M**&*Tt zonI8STpQEz0>8${^8M?vuK+3tMeKt`cw%}GSk(}GiauZC)!MRNW97NrcMqC1fA^>| zrxVtHK;!REkb-_1z%Fr?YcEjSkNyPoUMw(nP0<*E7#++Uu}VvKR=RYK(oh6gZ7bep zKu!=}Q+(G4$#Q2GwQ?$X-o9>w3bWy#XUO{^Tw~ZJs&efVYEDb$V~t6vE(JOz5YFG1 zNIVQ46y8<=5a+?xp9VDAnj$qJIwtq-Py zKT$)Hey#Y3Ni*#A>zZO<1&!-__^FlR^9TID9s^G6#Q~>QLeqOvS+E$S`l4MjZo{v%a7gMrNdkh*%hn>&vD9k)$}oG8AZV8vtF~tgwhvHb^(V{ zQ2ZG}Gdd_Y1Y>2g@;r-4_OJAHZjeua5n_X@E=%4Y0ZSE@q{PWNu2#rFjds_F#hou=$#mMAO zOPUm)IQ|hLCX0DUM?4w_@PKSQpbu?!8nZ`zm+E*Oe=|dmy=5=`c~+!ZW1L3NQ5dcJ zALKdn=`NVpg2&R>OZwq!l?@Ze)*+MpQ43gyY>lcD)NVu^gmUeHxB0b0N-Le;27tH5 zh)%tgaF~zLmOH&I7d|lt-QJ0WlX}RzZ>a}=z|v5D{kBItpdQH@j?5l6oX~@~(m^id z-q;BMvaddEsqX$d_FC$_>f8iW)yeJ7MYKFE@jbssEO|YWe?-oqsY7=Ih#QSZ!si71 znGJ^*DK5KNI?;11dW?akJyY+HcUtc|=8p@E%WoeOJsa?{aR69d$D;EAL5ex<5rWVSS@+Mte#BVbpjkohC1i!t(aKGW32fdL=PElbwGbLv>$aBPyM8M5uy zElmTfe`XfmHv=a{soND`1;I)l&(Y+iYqYIpCN852glkoL(s?8?`t3s*KgU@Md)rp4 zW4x!4o@Jx&g@>1pi<6K*0(^_=+Ses2dzKZdfh_s1ejnuTe=5$7Xd(HpMr-ep_=3Ez z%Jitx6n7yDTXeZnO&?yipxu?rgy7Iqgn+aIQi$7E;lli&G`>}SZ$5s;QOfFE{wAo} zS3dv$3Np5f8HXva%&6j-ZKaqV@otc{e&w-5Id8{hw>t!%qkHG)h@%YylQcWSULga3 zj*JqWdOGpq!J1IF~*B zHd&+gk=6cccv+;yF~@D0B@|@YJwF)Wnl|35aY;uDJ71$TX|9NP-4y;%jPpS!pI__h zBf^iu`7zy;A#E0}oH3&cGt*0VJ&|v8AzS=f&}a|EPUyLa1{!-d@^<(HLjrs8 zK1-yg0Y#@$WbhpV;0mSlNiRU&Py2^Yp%J?UL-%asNqA;w|Vtj;47|KJFj{ z(;&>WU_xC}16Vyx@epPqDTBH5J)n{|&flo$^BSd-*{3N5Ssb`WI6cL5_2rHstSLgZ z@#oD4Y#v*4H8WJYQ>URrbGChF7J*5ir{zt?^9OHhU~7B_#U5&w-GKEOkuTi?eL@ufE-kj=E+@;1WWUV~lh%uMQV;`qUr?F_OQo3c9)BlcyxrvGU;WV4; zSiyGUt)4l>&|^jt?845`<7dyw7psy&hQ!E?;FWm!8yvSda?s9|R{rH6w}|sgs$8mR ztE|qbn76h17)!KPyRB3^;5?cpk72i?)BNm;h3RJ-9+t@}9jdc~<*+pAvFW*}B$+P^9p zIm~_fgCwoXE#95fx*fXU-%j@{josR#A*YrDT@Y4+9&iSS6r3l%IRjfyHApDk)82?<;EwIa~NHwWsMu`@S5YaC?(P z7BH@#9U@Am-CV9+%%Nko^9Ra+hHaV zl)_;1wn8WZqDFYIb9aX$g7JC(p<|XN-1v4cy~>io&DR)t5g<-EHtH_-?SrsN$EFN< zP~03mr;!hkC1PV_596O(I{#MT9)T{5f0D7$DDv&z>t8T6SJ0?lHc~%0?+-4{n}jYA z(YW*Ne0P0JH3GAJ;-{`~bw{4>x=x;38+9iw>L)89P8KAUD#UOQp=Z1^i$zUZ}c@^g_eJP+&QwLFdxp7=yO2J;6t)S75>`!St&N8O(eQ zgc@?5EMr{G^SFC1&4hl1Tj$v-Grv-tk0Jo!sqKZl&4a@==8})Q)O~^1KreKojDTj# zSCb_7c|V>|J%aN9GS|Z0!T{0Gt~eKJ}k;S66I@!&*)-!xtHs zm8EMJ3tT}m!R<)0E#{(y8*K^dx`+oJ9C2#s+$%#1z;6wXB)CphC)B-5#op2FpTxIO zBFz@sTtQ<2kEVgggT6*)@`Yv*7;zVrNpWcyH_8$mnPcA@dARih{beIojUMWq>ixrg+Wu=O@ir_Y7F(kV*%|cQX3cV=r*xCzXgcf z?W8sOs}bl7{BCC3}Q+erLf_ zti~nzhC=~#JNNeuW3%+0i%|$g09ZjIrI&tc1lXGPipF~kLd=!dto3M%)8-L+m8$pT0`i}=pCIeB zAZ;_BAlR@-3zn3p)ho|hG@xA-S~;%g>i0o6afMrPcG7 zDgsWXH!mRr`)rr0T=RwWPkk#~N*qFQz~xwlRd5>JbL~9VB4|Z;IJ^PxN*J}NyC)(0bIIfWsbp2V? z1zcteebQW2klzSvra;N*7X!Xf!h4M%uO&7KRPgqKrYNS#1T$tdN?KSgC$b-EdZd3f zyIg)o3-U^8#jHSVtrs1CD}OTJrqbQ=P_<)Ae9f$K>1JF7%`>`>7u?O#Cx)Wf$+>c? z&7{qne8ewnL*pMiz3mVDL9OXkX-jxROow+ZB8;D&+NzwV;UJ7_70f{`K!Gu@wYyVq zch^L)!+gF}NMG6 zU*4GHcgg?nh-QwGgDCJA{}DRApbuL-u|x&I1tB5u3=9NKe~1 zMu;X=w8rI~gX(cYDey4nZ7u$d(Z1XDkvIsn^B`flaOdjqzfAhipYtbkU#=7Y80vE+ z-_Aif6Jf=_M9Xyu(sXW7Y%^+f+`cut&B_%J-LIu70r|Uq*@YBi`q5$$PB=v68D${- z1})a2D|k1EUbhnQ?TVJF)b$<)+Y@i`%zb>c=PEyG0+<%>D6}_9_cg`fFTxb#kNw^U zY2;)?2dL?1>J2_QL}4N~Hwd>d)mhzb-p*W$2wUi44Uxow0CN=-8v8D5*%X<9Rd)-nf!B2rdl$fU~Ml-974(>PD# zFvd?71Q|B^a&1sL16Ve4G7UOH&6~N{_bXElb<;kn|FkYEC=` zYr2vQfkmQoIx0^39bw1c{F|}svS;kly9l*o5Ge34W0EfM>?+# zk9&aw3po?gdjNt~P=GyG0@z@g(~8qW;gu`$JK($q4Q+4W79)yRPAaCDiMF=>9Ow9F zOyNJ$UzF77+03}vLg}(bh0nK(6$OAyJ)z=&UHJc_y7Ip(TaAyb`8;ET{GhUtvMw`) z_~BW?21vu5=L+)4G1A+o#7DVKu=@z^MPp>&1E$7;3=YEgD^t_j73FI^L1by}#K$7A zd0Xa~xPl^aj%9@(|35W&|GPrd7ro9MS|$G*?PNdxe2x98NXdq(;`nOI3@*=?cZ2Q* z6Q(BeV_kSLwrZfh$!rH&W`N`Qo1#Fs<|Qq4@%ea#m%0$XBH+V875B9(?Wy|6#Aa@aNi literal 0 HcmV?d00001 diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index 2dbdd98b0..05cc54654 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -1,3 +1,9 @@ +/* + * Bytecode loader for Lama VM. + * Handles reading .bc files, including the string table, public symbols, + * and the bytecode instructions themselves. + */ + #include "bytecode.h" #include #include @@ -54,9 +60,7 @@ bytecode *load_bytecode(const char *filename) { int st_size = read_i32(data, 0); int globals_count = read_i32(data, 4); int num_pubs = read_i32(data, 8); - int num_imports = read_i32(data, 12); - int num_ext_fixups = read_i32(data, 16); - + int pubs_offset = HEADER_SIZE; int st_offset = pubs_offset + num_pubs * PUB_ENTRY_SIZE; int code_offset = st_offset + st_size; diff --git a/virtual_machine/call_stack.c b/virtual_machine/call_stack.c index 4de1e8381..25d9386d6 100644 --- a/virtual_machine/call_stack.c +++ b/virtual_machine/call_stack.c @@ -1,3 +1,9 @@ +/* + * Call stack management for the Lama VM. + * Tracks function activation records (frames), including return addresses, + * base pointers, and arguments. + */ + #include "call_stack.h" #include #include diff --git a/virtual_machine/interpreter.c b/virtual_machine/interpreter.c index 151cda90f..0a05b014b 100755 --- a/virtual_machine/interpreter.c +++ b/virtual_machine/interpreter.c @@ -1,3 +1,9 @@ +/* + * Core bytecode interpreter for the Lama VM. + * Implements the fetch-decode-execute loop for all supported opcodes. + * Manages the data stack, call stack, and interacts with the C runtime. + */ + #include "../runtime/gc.h" #include "../runtime/runtime_common.h" #include "bytecode.h" @@ -85,14 +91,24 @@ extern aint Barray_tag_patt(void *x); extern aint Bstring_tag_patt(void *x); extern aint Bsexp_tag_patt(void *x); +/** + * Retrieves a pointer to a local variable in the current stack frame. + * Locals are stored below the arguments in the stack. + */ static inline aint *get_local(stack_t *stack, call_frame_t *frame, int idx) { return &stack->data[frame->base - frame->n_args - idx]; } +/** + * Retrieves a pointer to an argument in the current stack frame. + */ static inline aint *get_arg(stack_t *stack, call_frame_t *frame, int idx) { return &stack->data[frame->base - idx]; } +/** + * Retrieves a pointer to a variable stored in a closure's environment. + */ static inline aint *get_closure_var(call_frame_t *frame, int idx) { data *closure_data = TO_DATA(frame->closure); aint *contents = (aint *)closure_data->contents; @@ -122,6 +138,10 @@ static aint read_designation(stack_t *stack, call_frame_t *frame, aint *globals, } } +/** + * The main execution loop of the virtual machine. + * Consumes bytecode and updates the stack and call stack accordingly. + */ void run(bytecode *bc) { stack_t stack; call_stack_t call_stack; @@ -614,8 +634,12 @@ void run(bytecode *bc) { } end: + return; } +/** + * Entry point for the VM. Loads bytecode from a file and starts execution. + */ int main(int argc, char *argv[]) { if (argc < 2) { fprintf(stderr, "Usage: %s \n", argv[0]); diff --git a/virtual_machine/opcodes.c b/virtual_machine/opcodes.c index 53d18b5ae..677e2a1fc 100644 --- a/virtual_machine/opcodes.c +++ b/virtual_machine/opcodes.c @@ -1,3 +1,9 @@ +/* + * Utility functions for Lama VM opcodes. + * Provides debugging support, such as converting opcode values to string + * representations. + */ + #include "opcodes.h" #include #include diff --git a/virtual_machine/stack.c b/virtual_machine/stack.c index b91615dab..daff25409 100755 --- a/virtual_machine/stack.c +++ b/virtual_machine/stack.c @@ -1,3 +1,9 @@ +/* + * Data stack implementation for the Lama VM. + * Handles operand storage for expressions and parameters. + * Integrated with the garbage collector for root scanning. + */ + #include "stack.h" #include #include From 3039a6adb8d69c035ad9b9393788769fad476b92 Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 7 Jan 2026 20:27:17 +0300 Subject: [PATCH 017/141] change bytecode loading to mmap --- virtual_machine/bytecode.c | 57 ++++++++++++++++++++++---------------- virtual_machine/bytecode.h | 3 ++ 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index 05cc54654..4e8183580 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -5,9 +5,13 @@ */ #include "bytecode.h" +#include #include #include #include +#include +#include +#include int read_i32(const uint8_t data[], int offset) { return data[offset] | (data[offset + 1] << 8) | (data[offset + 2] << 16) | @@ -32,35 +36,34 @@ static int find_entry_point(const uint8_t *data, int pubs_offset, int num_pubs, } bytecode *load_bytecode(const char *filename) { - FILE *f = fopen(filename, "rb"); - if (!f) { - perror("fopen"); + int fd = open(filename, O_RDONLY); + if (fd < 0) { + perror("open"); return NULL; } - fseek(f, 0, SEEK_END); - long size = ftell(f); - rewind(f); - - uint8_t *data = malloc(size); - - if (!data) { - fclose(f); + struct stat st; + if (fstat(fd, &st) < 0) { + perror("fstat"); + close(fd); return NULL; } - if (fread(data, 1, size, f) != size) { - perror("fread"); - fclose(f); - free(data); + size_t size = st.st_size; + void *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + + if (map == MAP_FAILED) { + perror("mmap"); return NULL; } - fclose(f); + + uint8_t *data = (uint8_t *)map; int st_size = read_i32(data, 0); int globals_count = read_i32(data, 4); int num_pubs = read_i32(data, 8); - + int pubs_offset = HEADER_SIZE; int st_offset = pubs_offset + num_pubs * PUB_ENTRY_SIZE; int code_offset = st_offset + st_size; @@ -71,8 +74,12 @@ bytecode *load_bytecode(const char *filename) { find_entry_point(data, pubs_offset, num_pubs, string_table, "main"); bytecode *bc = malloc(sizeof(bytecode)); - bc->code = malloc(code_size); - memcpy((void *)bc->code, data + code_offset, code_size); + if (!bc) { + munmap(map, size); + return NULL; + } + + bc->code = data + code_offset; bc->code_size = code_size; bc->entry_point = main_entry_point; bc->globals_count = globals_count; @@ -83,17 +90,19 @@ bytecode *load_bytecode(const char *filename) { bc->public_symbols[i] = read_i32(data, entry_offset + 4); } - bc->string_table = malloc(st_size); - memcpy((void *)bc->string_table, string_table, st_size); + bc->string_table = (const char *)string_table; + + bc->map_base = map; + bc->map_size = size; - free(data); return bc; } void free_bytecode(bytecode *bc) { if (bc) { - free((void *)bc->code); - free((void *)bc->string_table); + if (bc->map_base) { + munmap(bc->map_base, bc->map_size); + } free(bc->public_symbols); free(bc); } diff --git a/virtual_machine/bytecode.h b/virtual_machine/bytecode.h index 61cee7739..6099cb1e3 100644 --- a/virtual_machine/bytecode.h +++ b/virtual_machine/bytecode.h @@ -2,6 +2,7 @@ #define BYTECODE_H #include +#include typedef struct { const uint8_t *code; @@ -11,6 +12,8 @@ typedef struct { int *public_symbols; int public_symbols_count; const char *string_table; + void *map_base; + size_t map_size; } bytecode; int read_i32(const uint8_t data[], int offset); From 1b82803d331dbbac0a7cd9afee263a37d0a48741 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 8 Feb 2026 12:03:57 +0300 Subject: [PATCH 018/141] add working vm --- runtime/Makefile | 11 +- runtime/gc.c | 2 +- src/SM.ml | 128 ++- virtual_machine/Makefile | 33 +- virtual_machine/arena.c | 153 +++ virtual_machine/arena.h | 52 + virtual_machine/bytecode.c | 135 +-- virtual_machine/bytecode.h | 110 +- virtual_machine/decoder.c | 1636 +++++++++++++++++++++++++++ virtual_machine/decoder.h | 113 ++ virtual_machine/ffi.c | 178 +++ virtual_machine/ffi.h | 13 + virtual_machine/linker.c | 65 ++ virtual_machine/linker.h | 10 + virtual_machine/module_manager.c | 144 +++ virtual_machine/module_manager.h | 41 + virtual_machine/opcodes.c | 4 +- virtual_machine/opcodes.h | 1 - virtual_machine/regression_check.sh | 8 +- virtual_machine/vm.c | 81 ++ virtual_machine/vm.h | 18 + 21 files changed, 2796 insertions(+), 140 deletions(-) create mode 100644 virtual_machine/arena.c create mode 100644 virtual_machine/arena.h create mode 100644 virtual_machine/decoder.c create mode 100644 virtual_machine/decoder.h create mode 100644 virtual_machine/ffi.c create mode 100644 virtual_machine/ffi.h create mode 100644 virtual_machine/linker.c create mode 100644 virtual_machine/linker.h create mode 100644 virtual_machine/module_manager.c create mode 100644 virtual_machine/module_manager.h create mode 100644 virtual_machine/vm.c create mode 100644 virtual_machine/vm.h diff --git a/runtime/Makefile b/runtime/Makefile index f71efd909..64a6a816b 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -7,9 +7,18 @@ else ifeq ($(UNAME_S),Darwin) ARCH = -arch x86_64 endif +LAMA_ENV ?= 1 + +# Virtual machine doesn't work well with this parameter +ifeq ($(LAMA_ENV), 1) + ENV_FLAGS := -DLAMA_ENV +else + ENV_FLAGS := +endif + DISABLE_WARNINGS=-Wno-shift-negative-value COMMON_FLAGS=$(DISABLE_WARNINGS) -g -fstack-protector-all $(ARCH) --std=c11 -PROD_FLAGS=$(COMMON_FLAGS) -DLAMA_ENV +PROD_FLAGS=$(COMMON_FLAGS) $(ENV_FLAGS) TEST_FLAGS=$(COMMON_FLAGS) -DDEBUG_VERSION UNIT_TESTS_FLAGS=$(TEST_FLAGS) INVARIANTS_CHECK_FLAGS=$(TEST_FLAGS) -DFULL_INVARIANT_CHECKS diff --git a/runtime/gc.c b/runtime/gc.c index 050049d41..5353ed58a 100644 --- a/runtime/gc.c +++ b/runtime/gc.c @@ -63,7 +63,7 @@ void *alloc (size_t size) { size = BYTES_TO_WORDS(size); size_t padding = size * sizeof(size_t) - obj_size; #if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) - fprintf(stderr, "allocation of size %zu words (%zu bytes): ", size, bytes_sz); + fprintf(stderr, "allocation of size %zu words (%zu bytes): ", size, padding); #endif void *p = gc_alloc_on_existing_heap(size); if (!p) { diff --git a/src/SM.ml b/src/SM.ml index 34463e32c..6b6e6e25a 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -173,37 +173,85 @@ module ByteCode = struct let pubs = Stdlib.ref [] in let imports = Stdlib.ref [] in let globals = Hashtbl.create 16 in - let glob_count = Stdlib.ref 0 in + let extern_globals = Stdlib.ref S.empty in let fixups = Stdlib.ref [] in + let func_fixups = Stdlib.ref [] in let add_lab l = Hashtbl.replace lmap l (Buffer.length code) in + let add_global name = + try Hashtbl.find globals name + with Not_found -> + let i = Hashtbl.length globals in + Hashtbl.add globals name i; + i + in + let add_extern name is_fun = + if not is_fun then + extern_globals := S.add name !extern_globals + in let add_public name is_fun = let flag = if is_fun then pub_flag_function else pub_flag_global in pubs := (name, flag) :: !pubs in let add_import l = imports := l :: !imports in let add_fixup l = fixups := (Buffer.length code, l) :: !fixups in + let add_func_fixup l = func_fixups := (Buffer.length code, l) :: !func_fixups in let add_bytes = List.iter (fun x -> Buffer.add_char code @@ Char.chr x) in let add_ints = - List.iter (fun x -> Buffer.add_int32_ne code @@ Int32.of_int x) + List.iter (fun x -> Buffer.add_int32_le code @@ Int32.of_int x) in let add_strings = + let unescape x = + let n = String.length x in + let buf = Buffer.create n in + let rec iterate i = + if i < n then + match x.[i] with + | '\\' -> ( + if i + 1 >= n then + Buffer.add_char buf '\\' + else + match x.[i + 1] with + | 'n' -> + Buffer.add_char buf '\n'; + iterate (i + 2) + | 't' -> + Buffer.add_char buf '\t'; + iterate (i + 2) + | 'r' -> + Buffer.add_char buf '\r'; + iterate (i + 2) + | '"' -> + Buffer.add_char buf '"'; + iterate (i + 2) + | '\\' -> + Buffer.add_char buf '\\'; + iterate (i + 2) + | _ -> + Buffer.add_char buf '\\'; + iterate (i + 1)) + | c -> + Buffer.add_char buf c; + iterate (i + 1) + in + iterate 0; + Buffer.contents buf + in List.iter (fun x -> - Buffer.add_int32_ne code @@ Int32.of_int @@ StringTab.add st x) + Buffer.add_int32_le code @@ Int32.of_int @@ StringTab.add st @@ unescape x) in let add_designations n = let b x = match n with None -> x | Some b -> (b * 16) + x in List.iter (function | Value.Global s -> - let i = - try Hashtbl.find globals s - with Not_found -> - let i = !glob_count in - incr glob_count; - Hashtbl.add globals s i; - i - in - add_bytes [ b 0 ]; - add_ints [ i ] + if S.mem s !extern_globals then begin + let str_off = StringTab.add st s in + add_bytes [ b 0 ]; + add_ints [ -(str_off + 1) ] + end else begin + let i = add_global s in + add_bytes [ b 0 ]; + add_ints [ i ] + end | Value.Local n -> add_bytes [ b 1 ]; add_ints [ n ] @@ -294,7 +342,7 @@ module ByteCode = struct (* 0x54 l:32 n:32 d*:32 *) | CLOSURE (s, ds) -> add_bytes [ (5 * 16) + 4 ]; - add_fixup s; + add_func_fixup s; add_ints [ 0; List.length ds ]; add_designations None ds (* 0x55 n:32 *) @@ -304,7 +352,7 @@ module ByteCode = struct (* 0x56 l:32 n:32 *) | CALL (fn, n, _) -> add_bytes [ (5 * 16) + 6 ]; - add_fixup fn; + add_func_fixup fn; add_ints [ 0; n ] (* 0x57 s:32 n:32 *) | TAG (s, n) -> @@ -325,7 +373,7 @@ module ByteCode = struct add_ints [ n ] (* 0x6p *) | PATT p -> add_bytes [ (6 * 16) + enum patt p ] - | EXTERN _ -> () + | EXTERN (name, is_fun) -> add_extern name is_fun | PUBLIC (name, is_fun) -> add_public name is_fun | IMPORT s -> add_import s | _ -> @@ -335,18 +383,29 @@ module ByteCode = struct List.iter insn_code insns; add_bytes [ 255 ]; let code = Buffer.to_bytes code in + List.iter + (fun (addr_ofs, l) -> + let resolved_addr = + try Hashtbl.find lmap l + with Not_found -> + (* External function: use negative string offset *) + let str_off = StringTab.add st l in + -(str_off + 1) + in + Bytes.set_int32_ne code addr_ofs (Int32.of_int resolved_addr)) + !func_fixups; List.iter (fun (ofs, l) -> Bytes.set_int32_ne code ofs (Int32.of_int @@ - try Hashtbl.find lmap l - with Not_found -> - failwith (Printf.sprintf "ERROR: undefined label '%s'" l))) + try Hashtbl.find lmap l + with Not_found -> + failwith (Printf.sprintf "ERROR: undefined label '%s'" l))) !fixups; let pubs_resolved = List.rev_map (fun (name, flag) -> - let pos = + let pos = if flag = pub_flag_global then try Hashtbl.find globals name with Not_found -> @@ -356,21 +415,26 @@ module ByteCode = struct with Not_found -> failwith (Printf.sprintf "ERROR: undefined label of public '%s'" name) in - (Int32.of_int @@ StringTab.add st name, Int32.of_int pos, flag)) + (Int32.of_int @@ StringTab.add st name, Int32.of_int pos, Int32.of_int flag)) !pubs in - let st = Buffer.to_bytes st.StringTab.buffer in + let imports = + List.rev_map (fun l -> Int32.of_int @@ StringTab.add st l) !imports + in + let str_table = Buffer.to_bytes st.StringTab.buffer in let file = Buffer.create 1024 in - Buffer.add_int32_ne file (Int32.of_int @@ Bytes.length st); - Buffer.add_int32_ne file (Int32.of_int @@ !glob_count); - Buffer.add_int32_ne file (Int32.of_int @@ List.length pubs_resolved); + Buffer.add_int32_le file (Int32.of_int @@ Bytes.length str_table); + Buffer.add_int32_le file (Int32.of_int @@ Hashtbl.length globals); + Buffer.add_int32_le file (Int32.of_int @@ List.length imports); + Buffer.add_int32_le file (Int32.of_int @@ List.length pubs_resolved); + Buffer.add_bytes file str_table; + List.iter (fun n -> Buffer.add_int32_le file n) imports; List.iter - (fun (n, o, f) -> - Buffer.add_int32_ne file n; - Buffer.add_int32_ne file o; - Buffer.add_uint8 file f) + (fun (name_off, offset, flag) -> + Buffer.add_int32_le file name_off; + Buffer.add_int32_le file offset; + Buffer.add_int32_le file flag) pubs_resolved; - Buffer.add_bytes file st; Buffer.add_bytes file code; let f = open_out_bin (Printf.sprintf "%s.bc" cmd#basename) in Buffer.output_buffer f file; @@ -1565,8 +1629,8 @@ let compile cmd ((imports, _), p) = Some lfalse, i + 1, ((match lab with - | None -> [ SLABEL blab ] - | Some l -> [ SLABEL blab; LABEL l; DUP ]) + | None -> [ SLABEL blab ] + | Some l -> [ SLABEL blab; LABEL l; DUP ]) @ pcode @ bindcode @ scode @ jmp @ [ SLABEL elab ]) :: code, lfalse' ) diff --git a/virtual_machine/Makefile b/virtual_machine/Makefile index 74b404bc7..1f9d72503 100644 --- a/virtual_machine/Makefile +++ b/virtual_machine/Makefile @@ -1,8 +1,10 @@ CC = gcc -CFLAGS = -Wall -Wextra -std=c99 -O2 -TARGET = interpreter.exe -SOURCES = interpreter.c bytecode.c stack.c call_stack.c opcodes.c -HEADERS = opcodes.h bytecode.h stack.h call_stack.h +#TODO: O2 doesn't work +CFLAGS = -Wall -Wextra -std=c99 -O0 +LIBS = -lffi -ldl +LDFLAGS = -rdynamic +TARGET = vm.exe +SOURCES = decoder.c vm.c bytecode.c linker.c ffi.c module_manager.c arena.c OBJECTS = $(SOURCES:.c=.o) RUNTIME_DIR = ../runtime @@ -10,25 +12,20 @@ RUNTIME_LIB = $(RUNTIME_DIR)/runtime.a all: $(TARGET) -debug: CFLAGS += -DDEBUG_PRINT -g3 -Og -debug: clean all +debug: CFLAGS += -DDEBUG_PRINT -g3 -Og -O0 +debug: $(TARGET) $(TARGET): $(OBJECTS) $(RUNTIME_LIB) - $(CC) -o $@ $^ + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) -Wl,--whole-archive $(RUNTIME_LIB) -Wl,--no-whole-archive $(LIBS) $(RUNTIME_LIB): - $(MAKE) -C $(RUNTIME_DIR) + $(MAKE) LAMA_ENV=0 -C $(RUNTIME_DIR) -%.o: %.c $(HEADERS) - $(CC) $(CFLAGS) -c -o $@ $< +%.o: %.c + $(CC) $(CFLAGS) $(DEPFLAGS) -c -o $@ $< clean: - rm -f $(OBJECTS) $(TARGET) + rm -f $(OBJECTS) $(TARGET) $(OBJECTS:.o=.d) + $(MAKE) -C $(RUNTIME_DIR) clean -.PHONY: all debug clean distclean - -interpreter.o: interpreter.c bytecode.h opcodes.h stack.h call_stack.h -bytecode.o: bytecode.c bytecode.h -stack.o: stack.c stack.h -call_stack.o: call_stack.c call_stack.h -opcodes.o: opcodes.c opcodes.h +.PHONY: all debug clean diff --git a/virtual_machine/arena.c b/virtual_machine/arena.c new file mode 100644 index 000000000..daaa7acb9 --- /dev/null +++ b/virtual_machine/arena.c @@ -0,0 +1,153 @@ +#include "arena.h" +#include +#include +#include +#include + +#define MIN_BLOCK_SIZE 4096 + +static arena_block_t *block_create(size_t data_size) { + arena_block_t *b = malloc(sizeof(arena_block_t) + data_size); + if (!b) { + perror("arena: block_create malloc"); + exit(1); + } + b->next = NULL; + b->size = data_size; + b->used = 0; + return b; +} + +arena_t *arena_create(size_t init_cap) { + arena_t *a = malloc(sizeof(arena_t)); + if (!a) { + perror("arena: arena_create malloc"); + exit(1); + } + + size_t cap = init_cap < MIN_BLOCK_SIZE ? MIN_BLOCK_SIZE : init_cap; + arena_block_t *b = block_create(cap); + a->head = b; + a->current = b; + a->block_size = cap; + return a; +} + +void *arena_alloc(arena_t *arena, size_t size, size_t align) { + assert((align & (align - 1)) == 0); + + arena_block_t *blk = arena->current; + + // Align within current block + size_t mask = align - 1; + uintptr_t base = (uintptr_t)(blk->data + blk->used); + size_t padding = (align - (base & mask)) & mask; + size_t needed = padding + size; + + if (blk->used + needed <= blk->size) { + void *ptr = blk->data + blk->used + padding; + blk->used += needed; + return ptr; + } + + // New block must be large enough for this request (including worst-case + // alignment padding) and at least as big as the default block_size. + size_t new_cap = arena->block_size; + size_t alloc_need = size + align; // worst-case with alignment + if (new_cap < alloc_need) + new_cap = alloc_need; + + arena_block_t *nb = block_create(new_cap); + blk->next = nb; + arena->current = nb; + + // Align within the fresh block (used == 0, so padding is usually 0) + base = (uintptr_t)(nb->data); + padding = (align - (base & mask)) & mask; + + void *ptr = nb->data + padding; + nb->used = padding + size; + return ptr; +} + +// TODO: cleanup macro? +arena_savepoint_t arena_save(arena_t *arena) { + arena_savepoint_t sp = {.block = arena->current, + .used = arena->current->used}; + return sp; +}; + +void arena_restore(arena_t *arena, arena_savepoint_t sp) { + if (!arena || !sp.block) + return; + + arena_block_t *b = arena->head; + + // Walk to the savepoint block + while (b && b != sp.block) { + b->used = 0; + b = b->next; + } + // Restore usage + b->used = sp.used; + + arena_block_t *to_free = b->next; + b->next = NULL; + + while (to_free) { + arena_block_t *next = to_free->next; + free(to_free); + to_free = next; + } + + arena->current = b; +} + +memory *memory_create(size_t main_init_cap, size_t tmp_init_cap) { + memory *mem = malloc(sizeof(memory)); + if (!mem) { + perror("memory_create malloc"); + exit(1); + } + mem->main = arena_create(main_init_cap); + mem->tmp = arena_create(tmp_init_cap); + mem->code = arena_create(4096); + return mem; +} + +memory *memory_destroy(memory *mem) { + if (!mem) + return NULL; + + if (mem->main) + arena_destroy(mem->main); + if (mem->tmp) + arena_destroy(mem->tmp); + if (mem->code) + arena_destroy(mem->code); + free(mem); + return NULL; +} + +char *arena_strdup(arena_t *arena, const char *s) { + if (!s) + return NULL; + + size_t len = strlen(s) + 1; + char *dst = (char *)arena_alloc(arena, len, 1); + memcpy(dst, s, len); + return dst; +} + +void arena_destroy(arena_t *arena) { + if (!arena) + return; + + arena_block_t *b = arena->head; + while (b) { + arena_block_t *next = b->next; + free(b); + b = next; + } + free(arena); +} diff --git a/virtual_machine/arena.h b/virtual_machine/arena.h new file mode 100644 index 000000000..3926996b6 --- /dev/null +++ b/virtual_machine/arena.h @@ -0,0 +1,52 @@ +#ifndef ARENA_H +#define ARENA_H + +#include +#include + +typedef struct arena_block { + struct arena_block *next; + size_t size; // Total capacity of this block's data region + size_t used; // Bytes used in this block + char data[]; +} arena_block_t; + +typedef struct { + arena_block_t *block; + size_t used; +} arena_savepoint_t; + +typedef struct { + arena_block_t *head; // First block (for traversal / destroy) + arena_block_t *current; // Current block we're allocating from + size_t block_size; // Default size for new blocks +} arena_t; + +typedef struct { + arena_t *main; + arena_t *tmp; + arena_t *code; // For now this is only FFI stubs +} memory; + +arena_t *arena_create(size_t init_cap); + +void *arena_alloc(arena_t *arena, size_t size, size_t align); + +char *arena_strdup(arena_t *arena, const char *s); + +void arena_destroy(arena_t *arena); + +arena_savepoint_t arena_save(arena_t *arena); +void arena_restore(arena_t *arena, arena_savepoint_t sp); + +memory *memory_create(size_t main_init_cap, size_t tmp_init_cap); +memory *memory_destroy(memory *mem); + +#define ARENA_ALLOC(a, T, n) \ + ((T *)arena_alloc((a), sizeof(T) * (n), _Alignof(T))) + +#define ARENA_NEW(a, T) ARENA_ALLOC(a, T, 1) + +#define ARENA_STRDUP(a, s) arena_strdup((a), (s)) + +#endif // ARENA_H diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index 4e8183580..d6c5c11f9 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -1,11 +1,8 @@ -/* - * Bytecode loader for Lama VM. - * Handles reading .bc files, including the string table, public symbols, - * and the bytecode instructions themselves. - */ - +#define _POSIX_C_SOURCE 200809L #include "bytecode.h" +#include "arena.h" #include +#include #include #include #include @@ -13,97 +10,101 @@ #include #include -int read_i32(const uint8_t data[], int offset) { - return data[offset] | (data[offset + 1] << 8) | (data[offset + 2] << 16) | - (data[offset + 3] << 24); -} - -#define HEADER_SIZE 12 -#define PUB_ENTRY_SIZE 8 - -static int find_entry_point(const uint8_t *data, int pubs_offset, int num_pubs, - const uint8_t *string_table, const char *name) { - for (int i = 0; i < num_pubs; i++) { - int entry_offset = pubs_offset + i * PUB_ENTRY_SIZE; - int name_offset = read_i32(data, entry_offset); - char *f_name = (char *)(string_table + name_offset); - int address = read_i32(data, entry_offset + 4); - if (strcmp(f_name, name) == 0) { - return address; - } - } - return -1; -} +#define HEADER_SIZE 16 +#define PUB_ENTRY_SIZE 12 +#define IMPORT_ENTRY_SIZE 4 -bytecode *load_bytecode(const char *filename) { +bytecode *load_bytecode(const char *filename, memory *mem) { int fd = open(filename, O_RDONLY); if (fd < 0) { - perror("open"); + perror("bytecode_load: open"); return NULL; } struct stat st; if (fstat(fd, &st) < 0) { - perror("fstat"); + perror("bytecode_load: fstat"); close(fd); return NULL; } - size_t size = st.st_size; - void *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - close(fd); + size_t file_size = (size_t)st.st_size; + + void *map = mmap(NULL, file_size, PROT_READ, MAP_PRIVATE, fd, 0); if (map == MAP_FAILED) { - perror("mmap"); + perror("bytecode_load: mmap"); return NULL; } - uint8_t *data = (uint8_t *)map; + close(fd); - int st_size = read_i32(data, 0); - int globals_count = read_i32(data, 4); - int num_pubs = read_i32(data, 8); + byte_reader_t reader; + reader_init(&reader, (const uint8_t *)map, file_size); - int pubs_offset = HEADER_SIZE; - int st_offset = pubs_offset + num_pubs * PUB_ENTRY_SIZE; - int code_offset = st_offset + st_size; - int code_size = size - code_offset; + int32_t string_table_size = reader_i32(&reader); + int32_t globals_count = reader_i32(&reader); + int32_t num_imports = reader_i32(&reader); + int32_t num_pubs = reader_i32(&reader); - uint8_t *string_table = data + st_offset; - int main_entry_point = - find_entry_point(data, pubs_offset, num_pubs, string_table, "main"); + size_t st_offset = HEADER_SIZE; + size_t imports_offset = st_offset + (size_t)string_table_size; + size_t pubs_offset = imports_offset + (size_t)num_imports * IMPORT_ENTRY_SIZE; + size_t code_offset = pubs_offset + (size_t)num_pubs * PUB_ENTRY_SIZE; + size_t code_size = file_size - code_offset; - bytecode *bc = malloc(sizeof(bytecode)); - if (!bc) { - munmap(map, size); - return NULL; - } + // TODO: VALIdation + const uint8_t *data = (const uint8_t *)map; + const char *string_table = (const char *)(data + st_offset); + + bytecode *bc = ARENA_NEW(mem->main, bytecode); + + bc->map_base = map; + bc->map_size = file_size; + + bc->string_table = string_table; + bc->string_table_size = (size_t)string_table_size; bc->code = data + code_offset; bc->code_size = code_size; - bc->entry_point = main_entry_point; - bc->globals_count = globals_count; - bc->public_symbols_count = num_pubs; - bc->public_symbols = malloc(num_pubs * sizeof(int)); - for (int i = 0; i < num_pubs; i++) { - int entry_offset = pubs_offset + i * PUB_ENTRY_SIZE; - bc->public_symbols[i] = read_i32(data, entry_offset + 4); + bc->globals_count = (size_t)globals_count; + + // Allocate and resolve public symbols + bc->public_symbols_count = (size_t)num_pubs; + if (num_pubs > 0) { + bc->public_symbols = + ARENA_ALLOC(mem->main, public_symbol_t, (size_t)num_pubs); + + reader_seek(&reader, pubs_offset); + for (int32_t i = 0; i < num_pubs; i++) { + int32_t name_offset = reader_i32(&reader); + int32_t code_off = reader_i32(&reader); + int32_t flag = reader_i32(&reader); + + bc->public_symbols[i].name = string_table + name_offset; + bc->public_symbols[i].code_offset = code_off; + bc->public_symbols[i].flag = flag; + } } - bc->string_table = (const char *)string_table; + // Allocate and resolve imports + bc->import_count = (size_t)num_imports; + if (num_imports > 0) { + bc->imports = ARENA_ALLOC(mem->main, const char *, (size_t)num_imports); - bc->map_base = map; - bc->map_size = size; + reader_seek(&reader, imports_offset); + for (int32_t i = 0; i < num_imports; i++) { + int32_t name_offset = reader_i32(&reader); + + bc->imports[i] = string_table + name_offset; + } + } return bc; } void free_bytecode(bytecode *bc) { - if (bc) { - if (bc->map_base) { - munmap(bc->map_base, bc->map_size); - } - free(bc->public_symbols); - free(bc); - } + munmap(bc->map_base, bc->map_size); + // NOTE: bc itself, public_symbols, imports, and module_name + // are all allocated from arena and will be freed when arena is destroyed. } diff --git a/virtual_machine/bytecode.h b/virtual_machine/bytecode.h index 6099cb1e3..4066c3f64 100644 --- a/virtual_machine/bytecode.h +++ b/virtual_machine/bytecode.h @@ -1,23 +1,107 @@ -#ifndef BYTECODE_H -#define BYTECODE_H +#ifndef BYTECODE_NEW_H +#define BYTECODE_NEW_H -#include +#include "arena.h" +#include #include +#include + +#define PUB_FLAG_FUNCTION 0 +#define PUB_FLAG_GLOBAL 1 typedef struct { - const uint8_t *code; - int code_size; - int entry_point; - int globals_count; - int *public_symbols; - int public_symbols_count; - const char *string_table; + const uint8_t *data; + size_t size; + size_t pos; +} byte_reader_t; + +static inline void reader_init(byte_reader_t *r, const uint8_t *data, + size_t size) { + r->data = data; + r->size = size; + r->pos = 0; +} + +/* + * Read 32-bit little-endian integer and advance position + */ +static inline int32_t reader_i32(byte_reader_t *r) { + if (r->pos + 4 > r->size) { + return 0; // TODO: better error handling + } + const uint8_t *p = r->data + r->pos; + r->pos += 4; + return (int32_t)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24)); +} + +static inline uint8_t reader_u8(byte_reader_t *r) { + if (r->pos >= r->size) { + return 0; + } + return r->data[r->pos++]; +} + +static inline void reader_skip(byte_reader_t *r, size_t n) { + r->pos += n; + if (r->pos > r->size) { + r->pos = r->size; + } +} + +static inline void reader_seek(byte_reader_t *r, size_t pos) { + r->pos = pos; + if (r->pos > r->size) { + r->pos = r->size; + } +} + +static inline size_t reader_pos(const byte_reader_t *r) { return r->pos; } + +static inline bool reader_eof(const byte_reader_t *r) { + return r->pos >= r->size; +} + +typedef struct { + const char *name; // Direct pointer to string + int32_t code_offset; // Offset into bytecode section (for functions) or global + // index + int32_t flag; // PUB_FLAG_FUNCTION or PUB_FLAG_GLOBAL +} public_symbol_t; + +typedef struct { + // Memory-mapped file void *map_base; size_t map_size; + + const char *string_table; + size_t string_table_size; + + const uint8_t *code; + size_t code_size; + + public_symbol_t *public_symbols; + size_t public_symbols_count; + + const char **imports; + size_t import_count; + + size_t globals_count; + char *module_name; } bytecode; -int read_i32(const uint8_t data[], int offset); -bytecode *load_bytecode(const char *filename); +bytecode *load_bytecode(const char *filename, memory *mem); + void free_bytecode(bytecode *bc); -#endif +/* + * Get string from string table by offset + */ +static inline const char *bytecode_get_string(const bytecode *bc, + int32_t offset) { + if (!bc || offset < 0 || (size_t)offset >= bc->string_table_size) { + return NULL; + } + return bc->string_table + offset; +} + +#endif // BYTECODE_NEW_H diff --git a/virtual_machine/decoder.c b/virtual_machine/decoder.c new file mode 100644 index 000000000..5483e676c --- /dev/null +++ b/virtual_machine/decoder.c @@ -0,0 +1,1636 @@ +#include "decoder.h" +#include "../runtime/runtime_common.h" +#include "bytecode.h" +#include "da.h" +#include "ffi.h" +#include "opcodes.h" +#include +#include +#include +#include +#include + +/* + * Debug macros + */ +#ifdef DEBUG_PRINT +#define VM_DEBUG(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) +#define VM_TRACE_STACK(stack) \ + do { \ + long sp_idx = (stack)->sp - (stack)->data; \ + fprintf(stderr, " stack [sp=%p, idx=%ld]: ", (stack)->sp, sp_idx); \ + for (int i = 1; i <= STACK_PEEK_SIZE; i++) { \ + if (sp_idx + i < STACK_SIZE) { \ + fprintf(stderr, "%ld ", (long)(stack)->data[sp_idx + i]); \ + } \ + } \ + fprintf(stderr, "\n"); \ + } while (0) +#define VM_TRACE_CALL(fmt, ...) fprintf(stderr, "[CALL] " fmt, ##__VA_ARGS__) +#define VM_ASSERT(cond, msg) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "Assert failed: %s at %s:%d\n", msg, __FILE__, \ + __LINE__); \ + exit(1); \ + } \ + } while (0) +#else +#define VM_DEBUG(fmt, ...) +#define VM_TRACE_STACK(stack) +#define VM_TRACE_CALL(fmt, ...) +#define VM_ASSERT(cond, msg) +#endif + +/* + * Stack manipulation macros (stack grows downwards) + */ +#define STACK_PUSH(sp, val) (*sp-- = (val)) +#define STACK_POP(sp) (*++sp) +#define STACK_PEEK(sp) (*(sp + 1)) + +/* + * Symbolic stack depth tracking macros used during decoding + * depth = -1 means unreachable code + */ +#define DEPTH_INC(d, n) \ + do { \ + if ((d) != -1) \ + (d) += (n); \ + } while (0) +#define DEPTH_DEC(d, n) \ + do { \ + if ((d) != -1) \ + (d) -= (n); \ + } while (0) +#define DEPTH_PUSH(d) DEPTH_INC(d, 1) +#define DEPTH_POP(d) DEPTH_DEC(d, 1) +#define DEPTH_DEAD(d) ((d) = -1) + +/* + * Code emission macros - append to code array in context + */ +#define EMIT_FUNC(ctx, f) \ + do { \ + (ctx)->code[(ctx)->code_len++].func = (f); \ + } while (0) +#define EMIT_NUM(ctx, n) \ + do { \ + (ctx)->code[(ctx)->code_len++].num = (n); \ + } while (0) +#define EMIT_STR(ctx, s) \ + do { \ + (ctx)->code[(ctx)->code_len++].str = (s); \ + } while (0) +#define EMIT_TARGET(ctx, t) \ + do { \ + (ctx)->code[(ctx)->code_len++].target = (t); \ + } while (0) + +typedef struct fixup_node { + size_t insn_idx; // Index in code array that needs the jump target + struct fixup_node *next; +} fixup_node_t; + +// Metadata for each bytecode offset +typedef struct { + int32_t resolved_idx; // Index in generated code array (-1 if not visited) + int32_t stack_depth; // Expected stack depth (-1 if not visited yet) + fixup_node_t *fixups; // Linked list of forward jumps pointing here +} meta_info_t; + +/* + * External runtime functions (runtime.c) + */ +extern aint Lread(void); +extern aint Lwrite(aint n); +extern aint Ls__Infix_43(void *p, void *q); // + +extern aint Ls__Infix_45(void *p, void *q); // - +extern aint Ls__Infix_42(void *p, void *q); // * +extern aint Ls__Infix_47(void *p, void *q); // / +extern aint Ls__Infix_37(void *p, void *q); // % +extern aint Ls__Infix_60(void *p, void *q); // < +extern aint Ls__Infix_6061(void *p, void *q); // <= +extern aint Ls__Infix_62(void *p, void *q); // > +extern aint Ls__Infix_6261(void *p, void *q); // >= +extern aint Ls__Infix_6161(void *p, void *q); // == +extern aint Ls__Infix_3361(void *p, void *q); // != +extern aint Ls__Infix_3838(void *p, void *q); // && +extern aint Ls__Infix_3333(void *p, void *q); // || + +extern aint Llength(void *p); +extern void *Lstring(aint *args); +extern aint LtagHash(char *s); +extern void *Barray(aint *args, aint bn); +extern void *Bsexp(aint *args, aint bn); +extern void *Bclosure(aint *args, aint bn); +extern void *Bstring(aint *args); +extern void *Belem(void *p, aint i); +extern void *Bsta(void *x, aint i, void *v); + +extern aint Btag(void *d, aint t, aint n); +extern aint Barray_patt(void *d, aint n); +extern aint Bstring_patt(void *x, void *y); +extern aint Bclosure_tag_patt(void *x); +extern aint Bboxed_patt(void *x); +extern aint Bunboxed_patt(void *x); +extern aint Barray_tag_patt(void *x); +extern aint Bstring_tag_patt(void *x); +extern aint Bsexp_tag_patt(void *x); + +#define DISPATCH() \ + do { \ + ip++; \ + __attribute__((musttail)) return ip->func(STATE); \ + } while (0) + +#define DISPATCH_JUMP() \ + do { \ + __attribute__((musttail)) return ip->func(STATE); \ + } while (0) + +/* + * Opcode handlers + */ +void op_const(DECL_STATE) { + ip++; + aint val = ip->num; + VM_DEBUG("CONST: %ld\n", (long)val); + STACK_PUSH(sp, BOX(val)); + DISPATCH(); +} + +#define DEFINE_BINOP(name, fn, opname) \ + static void name(DECL_STATE) { \ + aint y = STACK_POP(sp); \ + aint x = STACK_POP(sp); \ + VM_DEBUG(opname ": x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); \ + aint res = fn((void *)x, (void *)y); \ + VM_DEBUG(opname " result=%ld\n", (long)UNBOX(res)); \ + STACK_PUSH(sp, res); \ + DISPATCH(); \ + } + +DEFINE_BINOP(op_add, Ls__Infix_43, "ADD") +DEFINE_BINOP(op_sub, Ls__Infix_45, "SUB") +DEFINE_BINOP(op_mul, Ls__Infix_42, "MUL") +DEFINE_BINOP(op_lt, Ls__Infix_60, "LT") +DEFINE_BINOP(op_le, Ls__Infix_6061, "LE") +DEFINE_BINOP(op_gt, Ls__Infix_62, "GT") +DEFINE_BINOP(op_ge, Ls__Infix_6261, "GE") +DEFINE_BINOP(op_eq, Ls__Infix_6161, "EQ") +DEFINE_BINOP(op_ne, Ls__Infix_3361, "NE") +DEFINE_BINOP(op_and, Ls__Infix_3838, "AND") +DEFINE_BINOP(op_or, Ls__Infix_3333, "OR") + +void symbol_table_init(symbol_table *table) { da_init(*table); } + +void symbol_table_free(symbol_table *table) { da_free(*table); } + +static resolved_symbol *symbol_table_find(symbol_table *table, + const char *name) { + for (size_t i = 0; i < table->len; i++) { + if (strcmp(table->data[i].name, name) == 0) { + return &table->data[i]; + } + } + return NULL; +} + +// TODO: Make two separate functions for functions and globals? +// and structures? +static int symbol_table_add(symbol_table *table, const char *name, + insn *code_ptr, int32_t global_idx, + bool is_function) { + + // TODO: handle main in another way? + if (strcmp(name, "main") != 0) { + resolved_symbol *existing = symbol_table_find(table, name); + // Update with the new definition + if (existing) { + existing->code_ptr = code_ptr; + existing->global_idx = global_idx; + existing->is_function = is_function; + return 0; + } + } + + resolved_symbol entry = { + .name = name, + .code_ptr = code_ptr, + .global_idx = global_idx, + .is_function = is_function, + }; + + symbol_table tmp = *table; + da_append(tmp, entry); + *table = tmp; + + return 0; +} + +/* + * Register sysargs separately because it's not stored explicitly during + * execution + */ +void register_sysargs(symbol_table *table) { + symbol_table_add(table, "global_sysargs", NULL, 0, false); +} + +void ext_func_stub_table_init(ext_func_stub_table *table) { da_init(*table); } + +static insn *ext_func_stub_table_find(ext_func_stub_table *table, + const char *name) { + for (size_t i = 0; i < table->len; i++) { + if (strcmp(table->data[i].name, name) == 0) { + return table->data[i].stub; + } + } + return NULL; +} + +static insn *ext_func_stub_table_add(ext_func_stub_table *table, + const char *name, fn stub_fn, + arena_t *code_arena) { + insn *stub = ARENA_ALLOC(code_arena, insn, 2); + + char *persistent_name = ARENA_STRDUP(code_arena, name); + + stub[0].func = stub_fn; + stub[1].str = persistent_name; + + ext_func_stub_entry entry = {.name = persistent_name, .stub = stub}; + ext_func_stub_table tmp = *table; + da_append(tmp, entry); + *table = tmp; + + VM_DEBUG("EXT_FUNC_STUB_TABLE: added '%s' -> stub=%p\n", name, (void *)stub); + return stub; +} + +int register_public_symbols(symbol_table *st, insn *code, bytecode *bc, + int32_t *offset_to_insn, int32_t global_base) { + + // TODO: Pass symbols only + for (size_t i = 0; i < bc->public_symbols_count; i++) { + public_symbol_t *pub = &bc->public_symbols[i]; + + insn *code_ptr = NULL; + int32_t global_idx = 0; + // TODO: ugly + bool is_function = (pub->flag == PUB_FLAG_FUNCTION); + + if (is_function) { + int32_t insn_idx = offset_to_insn[pub->code_offset]; + code_ptr = &code[insn_idx]; + } else { + // Global variable - rebase index with module's global base + global_idx = pub->code_offset + global_base; + } + + symbol_table_add(st, pub->name, code_ptr, global_idx, is_function); + } + return 0; +} + +void op_div(DECL_STATE) { + aint y = STACK_POP(sp); + aint x = STACK_POP(sp); + VM_DEBUG("DIV: x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); + if (UNBOX(y) == 0) { + fprintf(stderr, "Division by zero\n"); + exit(1); + } + aint res = Ls__Infix_47((void *)x, (void *)y); + VM_DEBUG("DIV result=%ld\n", (long)UNBOX(res)); + STACK_PUSH(sp, res); + DISPATCH(); +} + +void op_mod(DECL_STATE) { + aint y = STACK_POP(sp); + aint x = STACK_POP(sp); + VM_DEBUG("MOD: x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); + if (UNBOX(y) == 0) { + fprintf(stderr, "Division by zero\n"); + exit(1); + } + aint res = Ls__Infix_37((void *)x, (void *)y); + VM_DEBUG("MOD result=%ld\n", (long)UNBOX(res)); + STACK_PUSH(sp, res); + DISPATCH(); +} + +void op_read(DECL_STATE) { + (void)bp; + (void)globals; + aint val = Lread(); + VM_DEBUG("READ: %ld\n", (long)UNBOX(val)); + STACK_PUSH(sp, val); + DISPATCH(); +} + +void op_write(DECL_STATE) { + aint val = STACK_POP(sp); + VM_DEBUG("WRITE: %ld\n", (long)UNBOX(val)); + aint res = Lwrite(val); + STACK_PUSH(sp, res); + DISPATCH(); +} + +void op_drop(DECL_STATE) { + VM_DEBUG("DROP\n"); + sp++; + DISPATCH(); +} + +void op_dup(DECL_STATE) { + (void)bp; + (void)globals; + aint val = STACK_PEEK(sp); + VM_DEBUG("DUP: %ld\n", (long)UNBOX(val)); + STACK_PUSH(sp, val); + DISPATCH(); +} + +void op_swap(DECL_STATE) { + aint a = STACK_POP(sp); + aint b = STACK_POP(sp); + VM_DEBUG("SWAP: a=%ld, b=%ld\n", (long)UNBOX(a), (long)UNBOX(b)); + STACK_PUSH(sp, a); + STACK_PUSH(sp, b); + DISPATCH(); +} + +void op_elem(DECL_STATE) { + aint idx = STACK_POP(sp); + aint arr = STACK_POP(sp); + VM_DEBUG("ELEM: arr=%p, idx=%ld\n", (void *)arr, (long)UNBOX(idx)); + void *elem = Belem((void *)arr, idx); + STACK_PUSH(sp, (aint)elem); + DISPATCH(); +} + +void op_sta(DECL_STATE) { + (void)bp; + (void)globals; + aint val = STACK_POP(sp); + aint idx = STACK_POP(sp); + aint arr = STACK_POP(sp); + VM_DEBUG("STA: arr=%p, idx=%ld, val=%ld\n", (void *)arr, (long)UNBOX(idx), + (long)UNBOX(val)); + Bsta((void *)arr, idx, (void *)val); + STACK_PUSH(sp, val); + DISPATCH(); +} + +/* + * Jumps + */ +void op_jmp(DECL_STATE) { + (void)bp; + (void)globals; + ip++; + VM_DEBUG("JMP: target=%p\n", (void *)ip->target); + ip = ip->target; + DISPATCH_JUMP(); +} + +void op_cjmp_z(DECL_STATE) { + (void)bp; + (void)globals; + ip++; + insn *target = ip->target; + ip++; + aint val = STACK_POP(sp); + VM_DEBUG("CJMP_Z: val=%ld, target=%p, will_jump=%d\n", (long)UNBOX(val), + (void *)target, UNBOX(val) == 0); + if (UNBOX(val) == 0) { + ip = target; + } + DISPATCH_JUMP(); +} + +void op_cjmp_nz(DECL_STATE) { + (void)bp; + (void)globals; + ip++; + insn *target = ip->target; + ip++; + aint val = STACK_POP(sp); + VM_DEBUG("CJMP_NZ: val=%ld, target=%p, will_jump=%d\n", (long)UNBOX(val), + (void *)target, UNBOX(val) != 0); + if (UNBOX(val) != 0) { + ip = target; + } + DISPATCH_JUMP(); +} + +/* + * String, data etc. + */ +void op_string(DECL_STATE) { + (void)bp; + (void)globals; + ip++; + const char *str = ip->str; + VM_DEBUG("STRING: \"%s\"\n", str); + void *result = Bstring((void *)&str); + STACK_PUSH(sp, (aint)result); + DISPATCH(); +} + +void op_length(DECL_STATE) { + (void)bp; + (void)globals; + aint val = STACK_POP(sp); + aint len = Llength((void *)val); + // TODO: think about debugging (becuase it prints after runtime call which + // might be bad because we won't see it) + VM_DEBUG("LENGTH: val=0x%lx -> len=%ld\n", val, UNBOX(len)); + STACK_PUSH(sp, len); + DISPATCH(); +} + +void op_lstring(DECL_STATE) { + (void)bp; + (void)globals; + aint val = STACK_POP(sp); + void *str = Lstring(&val); + VM_DEBUG("LSTRING: val=%ld -> str=0x%lx\n", UNBOX(val), (aint)str); + STACK_PUSH(sp, (aint)str); + DISPATCH(); +} + +void op_barray(DECL_STATE) { + (void)bp; + (void)globals; + ip++; + int32_t n = ip->num; + VM_DEBUG("BARRAY: n=%d\n", n); + aint *args_base = sp + 1; + aint tmp_args[256]; + // TODO: optimize for passing direct pointer + // instead of population array + for (int32_t i = 0; i < n; i++) { + tmp_args[i] = args_base[n - 1 - i]; + } + sp += n; + void *arr = Barray(tmp_args, BOX(n)); + STACK_PUSH(sp, (aint)arr); + DISPATCH(); +} + +void op_sexp(DECL_STATE) { + (void)bp; + (void)globals; + ip++; + const char *tag_str = ip->str; + ip++; + int32_t n_fields = ip->num; + + aint tag_hash = LtagHash((char *)tag_str); + VM_DEBUG("SEXP: tag=\"%s\" (hash=0x%lx), n_fields=%d\n", tag_str, tag_hash, + n_fields); + aint args[256]; + aint *args_base = sp + 1; + // TODO: optimize for passing direct pointer + // instead of population array + for (int32_t i = 0; i < n_fields; i++) { + args[i] = args_base[n_fields - 1 - i]; + } + args[n_fields] = tag_hash; + sp += n_fields; + + void *s = Bsexp(args, BOX(n_fields + 1)); + STACK_PUSH(sp, (aint)s); + DISPATCH(); +} + +void op_tag(DECL_STATE) { + (void)bp; + (void)globals; + ip++; + const char *tag_str = ip->str; + ip++; + int32_t n_fields = ip->num; + + aint tag_hash = LtagHash((char *)tag_str); + aint val = STACK_POP(sp); + VM_DEBUG("TAG: tag='%s' hash=0x%lx n_fields=%d val=0x%lx\n", tag_str, + (long)tag_hash, n_fields, (long)val); + aint result = Btag((void *)val, tag_hash, BOX(n_fields)); + VM_DEBUG("TAG: result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_array(DECL_STATE) { + (void)bp; + (void)globals; + ip++; + int32_t n = ip->num; + aint val = STACK_POP(sp); + VM_DEBUG("ARRAY: n=%d, val=%p\n", n, (void *)val); + aint result = Barray_patt((void *)val, BOX(n)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_fail(DECL_STATE) { + (void)bp; + (void)globals; + (void)sp; + ip++; + int32_t line = ip->num; + ip++; + int32_t col = ip->num; + VM_DEBUG("FAIL: line=%d, col=%d\n", line, col); + fprintf(stderr, "Match failure at line %d, column %d\n", line, col); + exit(1); +} + +/* + * Pattern matching operations + */ +void op_patt_str_cmp(DECL_STATE) { + (void)bp; + (void)globals; + aint y = STACK_POP(sp); + aint x = STACK_POP(sp); + VM_DEBUG("PATT_STR_CMP: x=%p, y=%p\n", (void *)x, (void *)y); + aint result = Bstring_patt((void *)x, (void *)y); + VM_DEBUG("PATT_STR_CMP result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_patt_string(DECL_STATE) { + (void)bp; + (void)globals; + aint val = STACK_POP(sp); + VM_DEBUG("PATT_STRING: val=%p\n", (void *)val); + aint result = Bstring_tag_patt((void *)val); + VM_DEBUG("PATT_STRING result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_patt_array(DECL_STATE) { + (void)bp; + (void)globals; + aint val = STACK_POP(sp); + VM_DEBUG("PATT_ARRAY: val=%p\n", (void *)val); + aint result = Barray_tag_patt((void *)val); + VM_DEBUG("PATT_ARRAY result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_patt_sexp(DECL_STATE) { + (void)bp; + (void)globals; + aint val = STACK_POP(sp); + VM_DEBUG("PATT_SEXP: val=%p\n", (void *)val); + aint result = Bsexp_tag_patt((void *)val); + VM_DEBUG("PATT_SEXP result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_patt_boxed(DECL_STATE) { + (void)bp; + (void)globals; + aint val = STACK_POP(sp); + VM_DEBUG("PATT_BOXED: val=%p\n", (void *)val); + aint result = Bboxed_patt((void *)val); + VM_DEBUG("PATT_BOXED result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_patt_unboxed(DECL_STATE) { + (void)bp; + (void)globals; + aint val = STACK_POP(sp); + VM_DEBUG("PATT_UNBOXED: val=%ld\n", (long)val); + aint result = Bunboxed_patt((void *)val); + VM_DEBUG("PATT_UNBOXED result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_patt_closure(DECL_STATE) { + (void)bp; + (void)globals; + aint val = STACK_POP(sp); + VM_DEBUG("PATT_CLOSURE: val=%p\n", (void *)val); + aint result = Bclosure_tag_patt((void *)val); + VM_DEBUG("PATT_CLOSURE result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +/* + * Load / store operations + */ +void op_ld_glo(DECL_STATE) { + (void)bp; + ip++; + int32_t idx = ip->num; + VM_DEBUG("LD_GLO[%d] = %ld\n", idx, (long)globals[idx]); + STACK_PUSH(sp, globals[idx]); + DISPATCH(); +} + +void op_st_glo(DECL_STATE) { + (void)bp; + ip++; + int32_t idx = ip->num; + aint val = STACK_PEEK(sp); + VM_DEBUG("ST_GLO[%d] = %ld\n", idx, (long)val); + globals[idx] = val; + DISPATCH(); +} + +void op_ld_loc(DECL_STATE) { + ip++; + int32_t idx = ip->num; + VM_DEBUG("LD_LOC[%d] bp=%p bp[-idx]=%ld\n", idx, (void *)bp, (long)bp[-idx]); + STACK_PUSH(sp, bp[-idx]); + DISPATCH(); +} + +void op_st_loc(DECL_STATE) { + ip++; + int32_t idx = ip->num; + aint val = STACK_PEEK(sp); + VM_DEBUG("ST_LOC[%d] = %ld bp=%p\n", idx, (long)val, (void *)bp); + bp[-idx] = val; + DISPATCH(); +} + +void op_ld_arg(DECL_STATE) { + ip++; + int32_t idx = ip->num; + int32_t n_args = (int32_t)bp[1]; + aint val = bp[n_args + 1 - idx]; + VM_DEBUG("LD_ARG[%d] n_args=%d bp=%p val=%ld\n", idx, n_args, (void *)bp, + (long)val); + STACK_PUSH(sp, val); + DISPATCH(); +} + +void op_st_arg(DECL_STATE) { + ip++; + int32_t idx = ip->num; + int32_t n_args = (int32_t)bp[1]; + aint val = STACK_PEEK(sp); + VM_DEBUG("ST_ARG[%d] = %ld bp=%p\n", idx, (long)val, (void *)bp); + bp[n_args + 1 - idx] = val; + DISPATCH(); +} + +void op_ld_clo(DECL_STATE) { + ip++; + int32_t idx = ip->num; + int32_t n_args = (int32_t)bp[1]; + aint *closure = (aint *)bp[n_args + 2]; + VM_DEBUG("LD_CLO[%d] closure=%p val=%ld\n", idx, (void *)closure, + (long)closure[idx + 1]); + STACK_PUSH(sp, closure[idx + 1]); + DISPATCH(); +} + +void op_st_clo(DECL_STATE) { + ip++; + int32_t idx = ip->num; + int32_t n_args = (int32_t)bp[1]; + aint val = STACK_PEEK(sp); + aint *closure = (aint *)bp[n_args + 2]; + VM_DEBUG("ST_CLO[%d] = %ld closure=%p\n", idx, (long)val, (void *)closure); + closure[idx + 1] = val; + DISPATCH(); +} + +/* + * Function call operations + */ +void op_begin(DECL_STATE) { + + ip++; + int32_t n_args = ip->num; + (void)n_args; + ip++; + int32_t n_locals = ip->num; + ip++; + + VM_TRACE_CALL("BEGIN n_args=%d n_locals=%d bp=%p sp=%p\n", n_args, n_locals, + (void *)bp, (void *)sp); + + for (int32_t i = 0; i < n_locals; i++) { + STACK_PUSH(sp, 0); + } + + DISPATCH(); +} + +void op_call(DECL_STATE) { + ip++; + insn *target = ip->target; + ip++; + int32_t n_args = ip->num; + + VM_TRACE_CALL("CALL target=%p n_args=%d sp=%p bp=%p\n", (void *)target, + n_args, (void *)sp, (void *)bp); + + STACK_PUSH(sp, (aint)n_args); + STACK_PUSH(sp, (aint)bp); + + aint *new_bp = sp + 1; + target->func(target, sp, new_bp, globals); + + aint ret_val = *new_bp; + + sp = new_bp + n_args + 1; + + STACK_PUSH(sp, ret_val); + DISPATCH(); +} + +void op_module_end(DECL_STATE); + +void op_callc(DECL_STATE) { + ip++; + int32_t n_args = ip->num; + + aint closure_val = *(sp + 1 + n_args); + aint *closure = (aint *)closure_val; + + aint entry = closure[0]; + insn *target = (insn *)entry; + + VM_TRACE_CALL("CALLC closure=%p target=%p n_args=%d sp=%p bp=%p\n", + (void *)closure, (void *)target, n_args, (void *)sp, + (void *)bp); + + STACK_PUSH(sp, (aint)n_args); + STACK_PUSH(sp, (aint)bp); + + aint *new_bp = sp + 1; + + target->func(target, sp, new_bp, globals); + + aint ret_val = *new_bp; + VM_DEBUG("CALLC: return value=%ld new_bp=%p\n", (long)ret_val, + (void *)new_bp); + + sp = new_bp + n_args + 2; + + STACK_PUSH(sp, ret_val); + DISPATCH(); +} + +void op_ret(DECL_STATE) { + (void)ip; + (void)globals; + aint ret_val = STACK_PEEK(sp); + VM_TRACE_CALL("RET sp=%p ret_val=%ld bp=%p\n", (void *)sp, (long)ret_val, + (void *)bp); + *bp = ret_val; + return; +} + +void op_end(DECL_STATE) { + (void)ip; + (void)globals; + VM_TRACE_CALL("END sp=%p\n", (void *)sp); + aint ret_val = STACK_PEEK(sp); + *bp = ret_val; + // If a module_end bridge follows, jump to it. + // Otherwise, return to finish execution. + insn *next = ip + 1; + if (next && next->func == op_module_end) { + VM_DEBUG("END: jumping to module_end bridge at %p\n", (void *)next); + ip = next; + DISPATCH_JUMP(); + } + VM_DEBUG("END: returning (no module bridge)\n"); + return; +} + +/* + * Closures + */ + +/* + * External function closure stub - called when an external closure is invoked + * via op_callc This stub is generated for each unresolved external closure + * reference. The function name is embedded in the next instruction. + */ +static void op_callc_ext_func_stub(DECL_STATE) { + (void)sp; + (void)globals; + ip++; + const char *func_name = ip->str; + + int32_t n_args = (int32_t)bp[1]; + + VM_DEBUG("EXT_FUNC_STUB: func='%s' n_args=%d bp=%p\n", func_name, n_args, + (void *)bp); + + aint args[256]; + for (int32_t i = 0; i < n_args; i++) { + args[i] = bp[n_args + 1 - i]; + } + + aint result = ffi_call_c(func_name, args, n_args); + VM_DEBUG("EXT_FUNC_STUB: func='%s' result=%ld\n", func_name, (long)result); + + // Store result in return value slot + *bp = result; + + return; +} +void op_closure(DECL_STATE) { + (void)bp; + (void)globals; + ip++; + insn *target = ip->target; + ip++; + int32_t n_captured = ip->num; + + VM_DEBUG("CLOSURE: target=%p n_captured=%d\n", (void *)target, n_captured); + + aint tmp_args[256]; + tmp_args[0] = (aint)target; + aint *args_base = sp + 1; + for (int32_t i = 0; i < n_captured; i++) { + tmp_args[i + 1] = args_base[n_captured - 1 - i]; + VM_DEBUG("CLOSURE: captured[%d]=%ld\n", i, (long)tmp_args[i + 1]); + } + sp += n_captured; + + void *closure = Bclosure(tmp_args, BOX(n_captured)); + VM_DEBUG("CLOSURE: created=%p\n", (void *)closure); + STACK_PUSH(sp, (aint)closure); + DISPATCH(); +} + +#ifdef DEBUG_PRINT +void op_line(DECL_STATE) { + ip++; + int32_t line = ip->num; + fprintf(stderr, "LINE %d\n", line); + (void)line; + DISPATCH(); +} +#else +void op_line(DECL_STATE) { + ip++; + DISPATCH(); +} +#endif + +void op_call_ext_func(DECL_STATE) { + ip++; + const char *func_name = ip->str; + ip++; + int32_t n_args = ip->num; + + VM_DEBUG("CALL_EXT_FUNC: func='%s' n_args=%d\n", func_name, n_args); + + aint args[256]; + aint *args_base = sp + 1; + for (int32_t i = 0; i < n_args; i++) { + args[i] = args_base[n_args - 1 - i]; + } + sp += n_args; + + aint result = ffi_call_c(func_name, args, n_args); + + VM_DEBUG("CALL_EXT_FUNC: result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_module_end(DECL_STATE) { + (void)bp; + (void)globals; + ip++; + insn *next_module = ip->target; + + VM_DEBUG("MODULE_END: next_module=%p\n", (void *)next_module); + + if (next_module) { + ip = next_module; + DISPATCH_JUMP(); + } + // If no next module, just fall through (return) + VM_DEBUG("MODULE_END: no next module, returning\n"); + return; +} + +decode_ctx_t *decode_ctx_create(const bytecode *bc, int32_t global_offset, + arena_t *arena) { + decode_ctx_t *ctx = ARENA_NEW(arena, decode_ctx_t); + + ctx->bc = bc; + + // TODO: ugly? + // Code array will be allocated from arena in decode() + ctx->code_cap = 0; + ctx->code = NULL; + ctx->code_len = 0; + ctx->global_offset = global_offset; + ctx->module_end_idx = (size_t)-1; + + // Allocate offset map (one entry per bytecode byte) + ctx->offset_map.cap = bc->code_size; + ctx->offset_map.offset_to_insn = ARENA_ALLOC(arena, int32_t, bc->code_size); + + // Initialize all to -1 (unmapped) + for (size_t i = 0; i < bc->code_size; i++) { + ctx->offset_map.offset_to_insn[i] = -1; + } + + // Initialize reader + // TODO: return struct not pass? + reader_init(&ctx->reader, bc->code, bc->code_size); + + return ctx; +} + +/* + * Decoding + */ +// TODO: /?? +static fixup_node_t *add_fixup(decode_ctx_t *ctx, meta_info_t *meta, + size_t target_off, size_t insn_idx, + memory *mem) { + (void)ctx; + fixup_node_t *node = ARENA_NEW(mem->tmp, fixup_node_t); + if (!node) + return NULL; + + node->insn_idx = insn_idx; + node->next = meta[target_off].fixups; + meta[target_off].fixups = node; + return node; +} + +static bool validate_target_off(const bytecode *bc, size_t target_off, + size_t current_bc_off, const char *op_name) { + if (target_off >= bc->code_size) { + fprintf( + stderr, + "Error: %s target_off=%zu out of range (bc_off=%zu, code_size=%zu)\n", + op_name, target_off, current_bc_off, bc->code_size); + return false; + } + return true; +} + +static bool emit_ld_glo(decode_ctx_t *ctx, symbol_table *st, int32_t idx, + size_t global_base) { + const bytecode *bc = ctx->bc; + + if (IS_EXT_REF(idx)) { + // External global: resolve by name (idx is negative string offset) + int str_offset = EXT_REF_INDEX(idx); + const char *glob_name = bytecode_get_string(bc, str_offset); + resolved_symbol *sym = st ? symbol_table_find(st, glob_name) : NULL; + VM_DEBUG("DECODE: OP_LD external global '%s' resolved to idx=%d\n", + glob_name, sym->global_idx); + EMIT_FUNC(ctx, op_ld_glo); + EMIT_NUM(ctx, sym->global_idx); + } else { + // Local global: add module's global_base + EMIT_FUNC(ctx, op_ld_glo); + EMIT_NUM(ctx, global_base + idx); + } + return true; +} + +static bool emit_st_glo(decode_ctx_t *ctx, symbol_table *st, int32_t idx, + size_t global_base) { + const bytecode *bc = ctx->bc; + + if (IS_EXT_REF(idx)) { + // External global: resolve by name (idx is negative string offset) + int str_offset = EXT_REF_INDEX(idx); + const char *glob_name = bytecode_get_string(bc, str_offset); + resolved_symbol *sym = st ? symbol_table_find(st, glob_name) : NULL; + VM_DEBUG("DECODE: OP_ST external global '%s' resolved to idx=%d\n", + glob_name, sym->global_idx); + EMIT_FUNC(ctx, op_st_glo); + EMIT_NUM(ctx, sym->global_idx); + } else { + // Local global: add module's global_base + EMIT_FUNC(ctx, op_st_glo); + EMIT_NUM(ctx, global_base + idx); + } + return true; +} + +/* + * Handle jump target resolution + */ +static bool handle_jump(decode_ctx_t *ctx, meta_info_t *meta, + size_t current_bc_off, int32_t depth, memory *mem) { + // TODO: unsigned ?? + int32_t target_off = reader_i32(&ctx->reader); + + if (!validate_target_off(ctx->bc, target_off, current_bc_off, "JUMP")) { + return false; + } + + size_t my_idx = ctx->code_len; + EMIT_TARGET(ctx, NULL); // placeholder + + meta_info_t *tm = &meta[target_off]; + if (target_off < current_bc_off && tm->resolved_idx != -1) { + // Backward jump + ctx->code[my_idx].target = &ctx->code[tm->resolved_idx]; + if (depth != -1 && tm->stack_depth != -1 && tm->stack_depth != depth) { + fprintf(stderr, "Error: Loop stack mismatch\n"); + return false; + } + } else { + // Forward jump + if (!add_fixup(ctx, meta, target_off, my_idx, mem)) { + return false; + } + if (depth != -1) { + if (tm->stack_depth == -1) + tm->stack_depth = depth; + else if (tm->stack_depth != depth) { + fprintf(stderr, "Error: Jump stack mismatch\n"); + return false; + } + } + } + return true; +} + +insn *decode(decode_ctx_t *ctx, symbol_table *st, ext_func_stub_table *fst, + memory *mem) { + const bytecode *bc = ctx->bc; + size_t global_base = ctx->global_offset; + + size_t code_cap = bc->code_size * 16; // TODO: estimate better + insn *code = ARENA_ALLOC(mem->code, insn, code_cap); + ctx->code = code; + ctx->code_cap = code_cap; + + meta_info_t *meta = ARENA_ALLOC(mem->tmp, meta_info_t, bc->code_size); + + // Initialize meta table + for (size_t i = 0; i < bc->code_size; i++) { + meta[i].resolved_idx = -1; + meta[i].stack_depth = -1; + meta[i].fixups = NULL; + } + + int32_t depth = 0; + // Track if we've seen the first END + bool first_end_seen = false; + + while (!reader_eof(&ctx->reader)) { + size_t current_bc_off = reader_pos(&ctx->reader); + uint8_t opcode = reader_u8(&ctx->reader); + +#ifdef DEBUG_PRINT + if (current_bc_off < 10) { // Only log first 10 instructions + VM_DEBUG("DECODE: bc_off=%zu opcode=0x%02X\n", current_bc_off, opcode); + } +#endif + + VM_DEBUG("DECODE: visiting bc_off=%zu opcode=%d code_idx=%zu\n", + current_bc_off, opcode, ctx->code_len); + + meta_info_t *m = &meta[current_bc_off]; + m->resolved_idx = (int32_t)ctx->code_len; + + // Update offset map (bytecode offset -> instruction index) + ctx->offset_map.offset_to_insn[current_bc_off] = (int32_t)ctx->code_len; + + // Validate stack depth + if (depth != -1) { + if (m->stack_depth != -1 && m->stack_depth != depth) { + fprintf(stderr, + "Error: Stack mismatch at offset %zu (expected %d, got %d)\n", + current_bc_off, m->stack_depth, depth); + return NULL; + } + m->stack_depth = depth; + } else { + depth = m->stack_depth; + } + + // Resolve forward jumps (backpatching) + for (fixup_node_t *f = m->fixups; f; f = f->next) { + VM_DEBUG("DECODE: Resolving fixup at bc_off=%zu: insn_idx=%zu -> " + "code_idx=%zu\n", + current_bc_off, f->insn_idx, ctx->code_len); + ctx->code[f->insn_idx].target = &ctx->code[ctx->code_len]; + } + // m->fixups = NULL; + + switch (opcode) { + case OP_CONST: + DEPTH_PUSH(depth); + EMIT_FUNC(ctx, op_const); + EMIT_NUM(ctx, reader_i32(&ctx->reader)); + break; + + case OP_BINOP_ADD: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_add); + break; + + case OP_BINOP_SUB: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_sub); + break; + + case OP_BINOP_MUL: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_mul); + break; + + case OP_BINOP_DIV: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_div); + break; + + case OP_BINOP_MOD: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_mod); + break; + + case OP_BINOP_LT: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_lt); + break; + + case OP_BINOP_LE: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_le); + break; + + case OP_BINOP_GT: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_gt); + break; + + case OP_BINOP_GE: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_ge); + break; + + case OP_BINOP_EQ: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_eq); + break; + + case OP_BINOP_NE: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_ne); + break; + + case OP_BINOP_AND: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_and); + break; + + case OP_BINOP_OR: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_or); + break; + + case OP_JMP: + EMIT_FUNC(ctx, op_jmp); + if (!handle_jump(ctx, meta, current_bc_off, depth, mem)) { + return NULL; + } + DEPTH_DEAD(depth); + break; + + case OP_CJMP_Z: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_cjmp_z); + if (!handle_jump(ctx, meta, current_bc_off, depth, mem)) { + return NULL; + } + break; + + case OP_CJMP_NZ: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_cjmp_nz); + if (!handle_jump(ctx, meta, current_bc_off, depth, mem)) { + return NULL; + } + break; + + case OP_READ: + DEPTH_PUSH(depth); + EMIT_FUNC(ctx, op_read); + break; + + case OP_WRITE: + EMIT_FUNC(ctx, op_write); + break; + + case OP_DROP: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_drop); + break; + + case OP_DUP: + DEPTH_PUSH(depth); + EMIT_FUNC(ctx, op_dup); + break; + + case OP_SWAP: + EMIT_FUNC(ctx, op_swap); + break; + + case OP_ELEM: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_elem); + break; + + case OP_STA: + DEPTH_DEC(depth, 2); + EMIT_FUNC(ctx, op_sta); + break; + + case OP_LD: { + DEPTH_PUSH(depth); + int32_t idx = reader_i32(&ctx->reader); + emit_ld_glo(ctx, st, idx, global_base); + break; + } + + case OP_ST: { + int32_t idx = reader_i32(&ctx->reader); + emit_st_glo(ctx, st, idx, global_base); + break; + } + + case OP_LD_LOC: { + DEPTH_PUSH(depth); + int32_t idx = reader_i32(&ctx->reader); + EMIT_FUNC(ctx, op_ld_loc); + EMIT_NUM(ctx, idx); + break; + } + + case OP_ST_LOC: { + int32_t idx = reader_i32(&ctx->reader); + EMIT_FUNC(ctx, op_st_loc); + EMIT_NUM(ctx, idx); + break; + } + + case OP_LD_ARG: { + DEPTH_PUSH(depth); + int32_t idx = reader_i32(&ctx->reader); + EMIT_FUNC(ctx, op_ld_arg); + EMIT_NUM(ctx, idx); + break; + } + + case OP_ST_ARG: { + int32_t idx = reader_i32(&ctx->reader); + EMIT_FUNC(ctx, op_st_arg); + EMIT_NUM(ctx, idx); + break; + } + + case OP_LD_CLO: { + DEPTH_PUSH(depth); + int32_t idx = reader_i32(&ctx->reader); + EMIT_FUNC(ctx, op_ld_clo); + EMIT_NUM(ctx, idx); + break; + } + + case OP_ST_CLO: { + int32_t idx = reader_i32(&ctx->reader); + EMIT_FUNC(ctx, op_st_clo); + EMIT_NUM(ctx, idx); + break; + } + + case OP_STRING: { + DEPTH_PUSH(depth); + int32_t str_idx = reader_i32(&ctx->reader); + EMIT_FUNC(ctx, op_string); + EMIT_STR(ctx, bytecode_get_string(bc, str_idx)); + break; + } + + case OP_LENGTH: + EMIT_FUNC(ctx, op_length); + break; + + case OP_LSTRING: + EMIT_FUNC(ctx, op_lstring); + break; + + case OP_BARRAY: { + int32_t n = reader_i32(&ctx->reader); + DEPTH_DEC(depth, n - 1); + EMIT_FUNC(ctx, op_barray); + EMIT_NUM(ctx, n); + break; + } + + case OP_SEXP: { + int32_t tag_idx = reader_i32(&ctx->reader); + int32_t n_fields = reader_i32(&ctx->reader); + DEPTH_DEC(depth, n_fields - 1); + EMIT_FUNC(ctx, op_sexp); + EMIT_STR(ctx, bytecode_get_string(bc, tag_idx)); + EMIT_NUM(ctx, n_fields); + break; + } + + case OP_TAG: { + int32_t tag_idx = reader_i32(&ctx->reader); + int32_t n_fields = reader_i32(&ctx->reader); + EMIT_FUNC(ctx, op_tag); + EMIT_STR(ctx, bytecode_get_string(bc, tag_idx)); + EMIT_NUM(ctx, n_fields); + break; + } + + case OP_ARRAY: { + int32_t n = reader_i32(&ctx->reader); + EMIT_FUNC(ctx, op_array); + EMIT_NUM(ctx, n); + break; + } + + case OP_FAIL: { + int32_t line = reader_i32(&ctx->reader); + int32_t col = reader_i32(&ctx->reader); + EMIT_FUNC(ctx, op_fail); + EMIT_NUM(ctx, line); + EMIT_NUM(ctx, col); + DEPTH_DEAD(depth); + break; + } + + case OP_PATT_STR_CMP: + DEPTH_POP(depth); + EMIT_FUNC(ctx, op_patt_str_cmp); + break; + + case OP_PATT_STRING: + EMIT_FUNC(ctx, op_patt_string); + break; + + case OP_PATT_ARRAY: + EMIT_FUNC(ctx, op_patt_array); + break; + + case OP_PATT_SEXP: + EMIT_FUNC(ctx, op_patt_sexp); + break; + + case OP_PATT_BOXED: + EMIT_FUNC(ctx, op_patt_boxed); + break; + + case OP_PATT_UNBOXED: + EMIT_FUNC(ctx, op_patt_unboxed); + break; + + case OP_PATT_CLOSURE: + EMIT_FUNC(ctx, op_patt_closure); + break; + + case OP_BEGIN: { + int32_t n_args = reader_i32(&ctx->reader); + int32_t n_locals = reader_i32(&ctx->reader); + depth = 0; + EMIT_FUNC(ctx, op_begin); + EMIT_NUM(ctx, n_args); + EMIT_NUM(ctx, n_locals); + EMIT_NUM(ctx, 0); + break; + } + + case OP_BEGIN_CLOSURE: { + int32_t n_args = reader_i32(&ctx->reader); + int32_t n_locals = reader_i32(&ctx->reader); + depth = 0; + VM_DEBUG("DECODE: OP_BEGIN_CLOSURE n_args=%d n_locals=%d\n", n_args, + n_locals); + EMIT_FUNC(ctx, op_begin); + EMIT_NUM(ctx, n_args); + EMIT_NUM(ctx, n_locals); + EMIT_NUM(ctx, 0); + break; + } + + case OP_CLOSURE: { + int32_t target_raw = reader_i32(&ctx->reader); + int32_t n_captured = reader_i32(&ctx->reader); + + VM_DEBUG("DECODE: OP_CLOSURE target_raw=0x%x n_captured=%d bc_off=%zu\n", + target_raw, n_captured, current_bc_off); + + bool is_external = IS_EXT_REF(target_raw); + const char *ext_func_name = NULL; + resolved_symbol *ext_sym = NULL; + + if (is_external) { + int str_offset = EXT_REF_INDEX(target_raw); + ext_func_name = bytecode_get_string(bc, str_offset); + VM_DEBUG("DECODE: OP_CLOSURE external name='%s' str_offset=%d\n", + ext_func_name, str_offset); + ext_sym = st ? symbol_table_find(st, ext_func_name) : NULL; + } else if (!validate_target_off(bc, (uint32_t)target_raw, current_bc_off, + "CLOSURE")) { + return NULL; + } + + // Emit load instructions for each captured variable + for (int32_t i = 0; i < n_captured; i++) { + uint8_t type_byte = reader_u8(&ctx->reader); + int32_t idx = reader_i32(&ctx->reader); + + int designation_type = type_byte & 0xF; + switch (designation_type) { + case 0: // Global + DEPTH_PUSH(depth); + emit_ld_glo(ctx, st, idx, global_base); + break; + case 1: // Local + DEPTH_PUSH(depth); + EMIT_FUNC(ctx, op_ld_loc); + EMIT_NUM(ctx, idx); + break; + case 2: // Arg + DEPTH_PUSH(depth); + EMIT_FUNC(ctx, op_ld_arg); + EMIT_NUM(ctx, idx); + break; + case 3: // Closure var + DEPTH_PUSH(depth); + EMIT_FUNC(ctx, op_ld_clo); + EMIT_NUM(ctx, idx); + break; + default: + fprintf(stderr, "Unknown designation type: %d\n", designation_type); + return NULL; + } + } + + DEPTH_DEC(depth, n_captured - 1); + + if (is_external) { + if (ext_sym && ext_sym->is_function && ext_sym->code_ptr) { + // Resolved external - emit regular closure with code pointer + EMIT_FUNC(ctx, op_closure); + EMIT_TARGET(ctx, ext_sym->code_ptr); + EMIT_NUM(ctx, n_captured); + } else { + // Check if we already have a stub for this function + insn *stub = ext_func_stub_table_find(fst, ext_func_name); + if (!stub) { + stub = ext_func_stub_table_add(fst, ext_func_name, + op_callc_ext_func_stub, mem->code); + } + + EMIT_FUNC(ctx, op_closure); + EMIT_TARGET(ctx, stub); + EMIT_NUM(ctx, n_captured); + } + } else { + uint32_t target_off = (uint32_t)target_raw; + size_t target_slot = ctx->code_len + 1; + EMIT_FUNC(ctx, op_closure); + EMIT_TARGET(ctx, NULL); + EMIT_NUM(ctx, n_captured); + + VM_DEBUG("DECODE: OP_CLOSURE internal target_off=%u target_slot=%zu\n", + target_off, target_slot); + + meta_info_t *tm = &meta[target_off]; + if (target_off < current_bc_off && tm->resolved_idx != -1) { + ctx->code[target_slot].target = &ctx->code[tm->resolved_idx]; + } else { + add_fixup(ctx, meta, target_off, target_slot, mem); + } + } + break; + } + + case OP_CALL: { + int32_t target_off = reader_i32(&ctx->reader); + int32_t n_args = reader_i32(&ctx->reader); + DEPTH_DEC(depth, n_args - 1); + + VM_DEBUG("DECODE: OP_CALL target_off=0x%x n_args=%d " + "current_bc_off=%zu code_idx=%zu\n", + target_off, n_args, current_bc_off, ctx->code_len); + + if (IS_EXT_REF(target_off)) { + int str_offset = EXT_REF_INDEX(target_off); + const char *func_name = bytecode_get_string(bc, str_offset); + + // Try to resolve from symbol table (external module call) + resolved_symbol *sym = st ? symbol_table_find(st, func_name) : NULL; + + if (sym && sym->is_function && sym->code_ptr) { + VM_DEBUG("DECODE: external module call to '%s' resolved to %p\n", + func_name, (void *)sym->code_ptr); + + EMIT_FUNC(ctx, op_call); + EMIT_TARGET(ctx, sym->code_ptr); + EMIT_NUM(ctx, n_args); + } else { + VM_DEBUG("DECODE: external function call to '%s'\n", func_name); + + EMIT_FUNC(ctx, op_call_ext_func); + EMIT_STR(ctx, func_name); + EMIT_NUM(ctx, n_args); + } + } else { + // Call between modules + if (!validate_target_off(bc, (uint32_t)target_off, current_bc_off, + "CALL")) { + return NULL; + } + size_t target_slot = ctx->code_len + 1; + EMIT_FUNC(ctx, op_call); + EMIT_TARGET(ctx, NULL); + EMIT_NUM(ctx, n_args); + + meta_info_t *tm = &meta[(uint32_t)target_off]; + VM_DEBUG("DECODE: tm->resolved_idx=%d\n", tm->resolved_idx); + if ((uint32_t)target_off < current_bc_off && tm->resolved_idx != -1) { + ctx->code[target_slot].target = &ctx->code[tm->resolved_idx]; + } else { + add_fixup(ctx, meta, (uint32_t)target_off, target_slot, mem); + } + } + break; + } + + case OP_CALLC: { + int32_t n_args = reader_i32(&ctx->reader); + DEPTH_DEC(depth, n_args); + EMIT_FUNC(ctx, op_callc); + EMIT_NUM(ctx, n_args); + break; + } + + case OP_RET: + EMIT_FUNC(ctx, op_ret); + DEPTH_DEAD(depth); + break; + + case OP_END: + EMIT_FUNC(ctx, op_end); + DEPTH_DEAD(depth); + + // After the first END (main function's end), emit module bridge + if (!first_end_seen) { + first_end_seen = true; + VM_DEBUG("DECODE: First END detected, emitting op_module_end bridge\n"); + ctx->module_end_idx = ctx->code_len; + EMIT_FUNC(ctx, op_module_end); + // Will be patched by linker + EMIT_TARGET(ctx, NULL); + } + break; + + case OP_LINE: { +#ifdef DEBUG_PRINT + int32_t line = reader_i32(&ctx->reader); + EMIT_FUNC(ctx, op_line); + EMIT_NUM(ctx, line); +#else + reader_skip(&ctx->reader, 4); +#endif + break; + } + + case 0xFF: + case 0x00: + break; + + default: + fprintf(stderr, "Not yet supported opcode 0x%02X at ip=0x%08zx\n", opcode, + reader_pos(&ctx->reader) - 1); + return NULL; + } + } + return ctx->code; +} diff --git a/virtual_machine/decoder.h b/virtual_machine/decoder.h new file mode 100644 index 000000000..80d18a94f --- /dev/null +++ b/virtual_machine/decoder.h @@ -0,0 +1,113 @@ +#ifndef DECODER_NEW_H +#define DECODER_NEW_H + +#include "../runtime/runtime_common.h" +#include "arena.h" +#include "bytecode.h" +#include +#include + +union insn; + +// State: ip = instruction pointer, sp = stack pointer, bp = base pointer +// bp and globals are marked unused since not all handlers need them +#define DECL_STATE \ + union insn *ip, aint *sp, __attribute__((unused)) aint *bp, \ + __attribute__((unused)) aint *globals +#define STATE ip, sp, bp, globals + +// Function pointer type for opcode handlers (returns void for tail calls) +typedef void (*fn)(DECL_STATE); + +// Union representing a single threaded code instruction/operand +typedef union insn { + fn func; // Pointer to function + int32_t num; // Integer operand (signed) + const char *str; // String operand (direct pointer) + union insn *target; // Direct jump target (pointer to insn) +} insn; + +/* + * Sentinel value for external references (both functions and globals). + * Address = -(index + 1), so index 0 becomes -1, index 1 becomes -2, etc. + */ +#define TO_EXT_REF(idx) (-(idx) - 1) +#define IS_EXT_REF(addr) ((addr) < 0) +#define EXT_REF_INDEX(addr) (-(addr) - 1) + +/* + * Resolved symbol structure - represents a function or global variable that has + * been resolved during decoding. + */ +typedef struct { + const char *name; // Symbol name (points into bytecode's string table) + insn *code_ptr; // For functions: pointer to first instruction + int32_t global_idx; // For globals: rebased global index + bool is_function; // true = function, false = global variable + const char *module_name; // Module that defined this symbol +} resolved_symbol; + +/* + * Maps symbol names to resolved symbols (functions or globals). + * Used for resolving imports and external references during decoding. + */ +typedef struct { + resolved_symbol *data; + size_t len; + size_t cap; +} symbol_table; + +typedef struct { + const char *name; // Function name (points into bytecode string table or dup) + insn *stub; // Pointer to 2-insn stub: [op_callc_ext_stub][name_str] +} ext_func_stub_entry; + +/* + * Cache of generated stubs for unresolved external function references. + */ +typedef struct { + ext_func_stub_entry *data; + size_t len; + size_t cap; +} ext_func_stub_table; + +/* + * Mapping from bytecode offsets to instruction indices in the decoded code + */ +typedef struct { + int32_t *offset_to_insn; // offset_to_insn[bytecode_offset] = insn_index + size_t cap; // Size of the mapping array (= bytecode size) +} offset_map_t; + +typedef struct { + + const bytecode *bc; + + insn *code; // Output threaded code + size_t code_cap; + size_t code_len; + + byte_reader_t reader; + offset_map_t offset_map; + + size_t global_offset; // Offset for global variables + + size_t module_end_idx; // Pointer to this module's op_module_end instruction + // for linking (initialized to -1 if not found) + +} decode_ctx_t; + +void symbol_table_init(symbol_table *table); +void symbol_table_free(symbol_table *table); +void register_sysargs(symbol_table *table); +void ext_func_stub_table_init(ext_func_stub_table *table); +int register_public_symbols(symbol_table *st, insn *code, bytecode *bc, + int32_t *offset_to_insn, int32_t global_base); + +decode_ctx_t *decode_ctx_create(const bytecode *bc, int32_t global_offset, + arena_t *arena); + +insn *decode(decode_ctx_t *ctx, symbol_table *st, ext_func_stub_table *fst, + memory *mem); + +#endif // DECODER_NEW_H diff --git a/virtual_machine/ffi.c b/virtual_machine/ffi.c new file mode 100644 index 000000000..421c01aa1 --- /dev/null +++ b/virtual_machine/ffi.c @@ -0,0 +1,178 @@ +/* + * External function calling for Lama VM. + * Uses libffi to dynamically call C functions. + */ + +#include "ffi.h" +#include "../runtime/runtime_common.h" +#include +#include +#include +#include +#include +#include + +// TODO: ugly? +typedef struct { + const char *lama_name; + const char *target_name; + bool is_args_array; + int fixed_args; +} func_metadata; + +static const func_metadata func_table[] = { + // Args array functions + {"Lsubstring", "Lsubstring", true, 0}, + {"Lstringcat", "Lstringcat", true, 0}, + {"Lstring", "Lstring", true, 0}, + {"Li__Infix_4343", "Li__Infix_4343", true, 0}, // strcat + {"Ls__Infix_58", "Ls__Infix_58", true, 0}, // : (cons) + {"Lclone", "Lclone", true, 0}, // clone + + // Variadic functions with mapping + {"Lprintf", "Bprintf", false, 1}, + {"Lfprintf", "Bfprintf", false, 2}, + {"Lsprintf", "Bsprintf", false, 1}, + + // Sentinel + {NULL, NULL, false, 0, NULL}}; + +// TODO: cache? +static void *lookup_function(const char *name) { + void *fn = dlsym(RTLD_DEFAULT, name); + char *error = dlerror(); + if (error) { + return NULL; + } + return fn; +} + +static const func_metadata *lookup_metadata(const char *name) { + for (int i = 0; func_table[i].lama_name != NULL; i++) { + if (strcmp(name, func_table[i].lama_name) == 0) { + return &func_table[i]; + } + } + return NULL; +} + +/* + * Functions that take (aint* args) - a pointer to argument array + * TODO: a better way? + */ +static aint call_args_array_function(const char *name, aint *args) { + void *fn = lookup_function(name); + if (!fn) { + fprintf(stderr, "Undefined external function: %s\n", name); + exit(1); + } + + ffi_cif cif; + ffi_type *arg_types[1] = {&ffi_type_pointer}; + void *arg_values[1] = {&args}; + void *result = NULL; + + ffi_status status = + ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 1, &ffi_type_pointer, arg_types); + if (status != FFI_OK) { + fprintf(stderr, "FFI prep failed for '%s': status=%d\n", name, status); + exit(1); + } + + ffi_call(&cif, FFI_FN(fn), &result, arg_values); + return (aint)result; +} + +/* + * Mapping functions due to runtime.c x32 and x64 variants of printf etc. + * TODO: very ugly + */ +static aint call_variadic_function(const char *target_name, int fixed_args, + aint *args, int n_args) { + void *fn = lookup_function(target_name); + if (!fn) { + fprintf(stderr, "Undefined external function: %s\n", target_name); + exit(1); + } + + if (n_args < fixed_args) { + fprintf(stderr, "FFI call '%s': expected at least %d args, got %d\n", + target_name, fixed_args, n_args); + exit(1); + } + + ffi_cif cif; + ffi_type *arg_types[n_args]; + void *arg_values[n_args]; + aint int_values[n_args]; + void *result = NULL; + + for (int i = 0; i < n_args; i++) { + if (UNBOXED(args[i])) { + int_values[i] = UNBOX(args[i]); + arg_types[i] = &ffi_type_pointer; + arg_values[i] = &int_values[i]; + } else { + arg_types[i] = &ffi_type_pointer; + arg_values[i] = &args[i]; + } + } + + // TODO: ABI ? + ffi_status status = ffi_prep_cif_var(&cif, FFI_DEFAULT_ABI, fixed_args, + n_args, &ffi_type_pointer, arg_types); + + if (status != FFI_OK) { + fprintf(stderr, "FFI prep failed for '%s': status=%d\n", target_name, + status); + exit(1); + } + + ffi_call(&cif, FFI_FN(fn), &result, arg_values); + return (aint)result; +} + +static aint call_regular_function(const char *name, aint *args, int n_args) { + void *fn = lookup_function(name); + if (!fn) { + fprintf(stderr, "Undefined external function: %s\n", name); + exit(1); + } + + ffi_cif cif; + ffi_type *arg_types[n_args]; + void *arg_values[n_args]; + aint result; + + for (int i = 0; i < n_args; i++) { + arg_types[i] = &ffi_type_pointer; + arg_values[i] = &args[i]; + } + + ffi_status status = + ffi_prep_cif(&cif, FFI_DEFAULT_ABI, n_args, &ffi_type_pointer, arg_types); + + if (status != FFI_OK) { + fprintf(stderr, "FFI prep failed for '%s': status=%d\n", name, status); + exit(1); + } + + ffi_call(&cif, FFI_FN(fn), &result, arg_values); + + return result; +} + +aint ffi_call_c(const char *name, aint *args, int n_args) { + const func_metadata *meta = lookup_metadata(name); + + if (meta) { + if (meta->is_args_array) { + return call_args_array_function(meta->target_name, args); + } else { + return call_variadic_function(meta->target_name, meta->fixed_args, args, + n_args); + } + } + + return call_regular_function(name, args, n_args); +} diff --git a/virtual_machine/ffi.h b/virtual_machine/ffi.h new file mode 100644 index 000000000..2ebc4b37b --- /dev/null +++ b/virtual_machine/ffi.h @@ -0,0 +1,13 @@ +#ifndef FFI_CALL_H +#define FFI_CALL_H + +#include "../runtime/runtime_common.h" +#include + +/* + * Call an external function by name using libffi. + * + */ +aint ffi_call_c(const char *name, aint *args, int n_args); + +#endif diff --git a/virtual_machine/linker.c b/virtual_machine/linker.c new file mode 100644 index 000000000..f9ab688e9 --- /dev/null +++ b/virtual_machine/linker.c @@ -0,0 +1,65 @@ +#include "linker.h" +#include "arena.h" +#include "decoder.h" +#include "module_manager.h" +#include +#include +#include +#include +#include + +// opcode handler used to identify module end +extern void op_module_end(DECL_STATE); + +insn *decode_and_link(module_manager *mm, memory *mem) { + arena_savepoint_t sp = arena_save(mem->tmp); + // TODO: shoudl be redone without dynamic array + symbol_table *st = ARENA_NEW(mem->tmp, symbol_table); + symbol_table_init(st); + + register_sysargs(st); + + ext_func_stub_table *fst = ARENA_NEW(mem->tmp, ext_func_stub_table); + ext_func_stub_table_init(fst); + + insn *hd_insn = NULL; + insn *tl_insn = NULL; + for (size_t i = 0; i < mm->modules.len; i++) { + loaded_module *mod = mm->modules.data[i]; + + decode_ctx_t *ctx = decode_ctx_create(mod->bc, mod->global_base, mem->tmp); + + insn *mod_code = decode(ctx, st, fst, mem); + + // Register public symbols from this module + register_public_symbols(st, mod_code, mod->bc, + ctx->offset_map.offset_to_insn, mod->global_base); + + if (hd_insn == NULL) { + hd_insn = &mod_code[0]; + } + + // Link previous module's end to this module's start + if (tl_insn != NULL) { + // prev_module_end is pointing to op_module_end instruction + // The next slot contains the target pointer (NULL placeholder) + tl_insn[1].target = &mod_code[0]; + } + + // Remember this module's op_module_end instruction for next iteration + // It was emitted after the first END, position stored in + // ctx->module_end_idx + size_t module_end_idx = ctx->module_end_idx; + + // Last module's op_module_end target remains NULL (program ends) + if (module_end_idx == (size_t)-1) { + tl_insn = NULL; + } else { + tl_insn = &mod_code[module_end_idx]; + } + } + + arena_restore(mem->tmp, sp); + + return hd_insn; +} diff --git a/virtual_machine/linker.h b/virtual_machine/linker.h new file mode 100644 index 000000000..7a5ed336d --- /dev/null +++ b/virtual_machine/linker.h @@ -0,0 +1,10 @@ +#ifndef LINKER_H +#define LINKER_H + +#include "decoder.h" +#include "module_manager.h" +#include "stddef.h" + +insn *decode_and_link(module_manager *mm, memory *mem); + +#endif diff --git a/virtual_machine/module_manager.c b/virtual_machine/module_manager.c new file mode 100644 index 000000000..8fc8bfb06 --- /dev/null +++ b/virtual_machine/module_manager.c @@ -0,0 +1,144 @@ +/* + * Module manager implementation for Lama VM. + */ + +#define _POSIX_C_SOURCE 200809L + +#include "module_manager.h" +#include "arena.h" +#include "bytecode.h" +#include "da.h" +#include +#include +#include +#include +#include + +/* + * Build the path to a module's .bc file. + */ +static char *build_module_path(const char *module_name, const char *search_path, + arena_t *arena) { + char *path = ARENA_ALLOC(arena, char, MAX_PATH_LEN); + if (search_path && strlen(search_path) > 0) { + snprintf(path, MAX_PATH_LEN, "%s/%s.bc", search_path, module_name); + } else { + snprintf(path, MAX_PATH_LEN, "%s.bc", module_name); + } + + return path; +} + +/* Extract module name from filename (without path and extension .bc) */ +static char *extract_module_name(const char *filename, arena_t *arena) { + char *path_copy = ARENA_STRDUP(arena, filename); + char *base = basename(path_copy); + + char *dot = strrchr(base, '.'); + if (dot && strcmp(dot, ".bc") == 0) { + *dot = '\0'; + } + + return ARENA_STRDUP(arena, base); +} + +static char *get_directory(const char *filepath, arena_t *arena) { + char *path_copy = ARENA_STRDUP(arena, filepath); + + char *dir = dirname(path_copy); + return ARENA_STRDUP(arena, dir); +} + +/* + * Check if a string looks like a file path (contains '/' or ends with '.bc') + */ +static int is_filepath(const char *str) { + size_t len = strlen(str); + return strchr(str, '/') != NULL || + (len > 3 && strcmp(str + len - 3, ".bc") == 0); +} + +static loaded_module *find_module(module_manager *mm, const char *name) { + for (size_t i = 0; i < mm->modules.len; i++) { + if (strcmp(mm->modules.data[i]->bc->module_name, name) == 0) { + return mm->modules.data[i]; + } + } + return NULL; +} + +/* + * Load modules recursively. + */ +static loaded_module *load_module(module_manager *mm, const char *s, + const char *search_path, memory *mem) { + char *filepath; + char *module_name; + char *derived_search_path; + + // Determine if we're loading by path or by name + if (is_filepath(s)) { + filepath = ARENA_STRDUP(mem->tmp, s); + module_name = extract_module_name(s, mem->tmp); + } else { + filepath = build_module_path(s, search_path, mem->tmp); + module_name = ARENA_STRDUP(mem->tmp, s); + } + + // Check if module is already loaded (avoid duplicates and circular + // dependencies) + // TODO: check ciruclar imports? + loaded_module *result = find_module(mm, module_name); + if (result) { + return result; + } + + bytecode *bc = load_bytecode(filepath, mem); + // NOTE: a bit ugly: + bc->module_name = module_name; + + // Determine search path for dependencies + // TODO: -I + if (search_path) { + derived_search_path = ARENA_STRDUP(mem->tmp, search_path); + } else { + derived_search_path = get_directory(filepath, mem->tmp); + } + + // Recursively load dependencies + for (size_t i = 0; i < bc->import_count; i++) { + const char *import_name = bc->imports[i]; + + // Skip since we already have it (as runtime.a) + if (strcmp(import_name, "Std") == 0) { + continue; + } + + load_module(mm, import_name, derived_search_path, mem); + } + + result = ARENA_NEW(mem->main, loaded_module); + result->bc = bc; + result->global_base = (int32_t)mm->total_globals_count; + mm->total_globals_count += bc->globals_count; + mm->total_code_size += bc->code_size; + // TODO: also use arena + da_append(mm->modules, result); + return result; +} + +module_manager *load_modules(const char *main_module_path, + const char *search_path, memory *mem) { + + module_manager *mm = ARENA_NEW(mem->main, module_manager); + + da_init(mm->modules); + // Reserve global index 0 for sysargs + mm->total_globals_count = 1; + + arena_savepoint_t sp = arena_save(mem->tmp); + load_module(mm, main_module_path, search_path, mem); + arena_restore(mem->tmp, sp); + + return mm; +} diff --git a/virtual_machine/module_manager.h b/virtual_machine/module_manager.h new file mode 100644 index 000000000..fd5563aae --- /dev/null +++ b/virtual_machine/module_manager.h @@ -0,0 +1,41 @@ +/* + * Module loader and linker for Lama VM. + * + */ + +#ifndef MODULE_MANAGER_H +#define MODULE_MANAGER_H + +#include "arena.h" +#include "bytecode.h" +#include +#include + +#define MAX_PATH_LEN 1024 +#define INITIAL_SYMBOL_TABLE_CAP 64 +#define INITIAL_MODULE_CAP 8 + +typedef struct { + bytecode *bc; // Loaded bytecode + int32_t global_base; // Starting index for this module's globals +} loaded_module; + +typedef struct { + // Loaded modules (topological order) + struct { + loaded_module **data; + size_t len; + size_t cap; + } modules; + + // Combined globals count + // Used for stack allocation + size_t total_globals_count; + size_t total_code_size; + +} module_manager; + +module_manager *load_modules(const char *main_module_path, + const char *search_path, memory *mem); + +#endif diff --git a/virtual_machine/opcodes.c b/virtual_machine/opcodes.c index 677e2a1fc..5c29f1268 100644 --- a/virtual_machine/opcodes.c +++ b/virtual_machine/opcodes.c @@ -1,6 +1,6 @@ /* * Utility functions for Lama VM opcodes. - * Provides debugging support, such as converting opcode values to string + * Provides debugging support, such as converting opcode values to string * representations. */ @@ -121,8 +121,6 @@ const char *opcode_to_string(uint8_t opcode) { return "LSTRING"; case OP_BARRAY: return "BARRAY"; - case OP_HALT: - return "HALT"; default: fprintf(stderr, "Unknown opcode: 0x%02X\n", opcode); exit(1); diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h index f61396c2d..f80259f0b 100755 --- a/virtual_machine/opcodes.h +++ b/virtual_machine/opcodes.h @@ -59,7 +59,6 @@ typedef enum { OP_LENGTH = 0x72, OP_LSTRING = 0x73, OP_BARRAY = 0x74, - OP_HALT = 0xFF, } opcode_t; const char *opcode_to_string(uint8_t opcode); diff --git a/virtual_machine/regression_check.sh b/virtual_machine/regression_check.sh index b28aa736e..dd1098fee 100755 --- a/virtual_machine/regression_check.sh +++ b/virtual_machine/regression_check.sh @@ -12,14 +12,14 @@ compiler=../_build/default/src/Driver.exe echo "Used compiler path:" echo $compiler -for test in ../regression/*.lama; do +for test in ../regression/*.lama; do echo $test - $compiler -b $test > /dev/null + $compiler -b $test >/dev/null test_path="${test%.*}" test_file="${test_path##*/}" echo $test_path: $test_file - cat $test_path.input | ./interpreter.exe $test_file.bc > test.log 2>&1 - sed -E '1d;s/^//' $test_path.t > test_orig.log + cat $test_path.input | ./vm.exe $test_file.bc >test.log 2>&1 + sed -E '1d;s/^//' $test_path.t >test_orig.log diff -w test.log test_orig.log rm $test_file.bc diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c new file mode 100644 index 000000000..8cdc61d9e --- /dev/null +++ b/virtual_machine/vm.c @@ -0,0 +1,81 @@ +#include "vm.h" +#include "../runtime/gc.h" +#include "../runtime/runtime_common.h" +#include "arena.h" +#include "decoder.h" +#include "linker.h" +#include "module_manager.h" +#include +#include +#include + +extern void set_args(aint argc, char *argv[]); +extern size_t __gc_stack_top, __gc_stack_bottom; + +virtual_machine *vm_create(const char *main_module_path, + const char *search_path) { + + // TODO: estimates + memory *mem = memory_create(1024 * 1024, 4096); + virtual_machine *vm = ARENA_NEW(mem->main, virtual_machine); + + module_manager *mm = load_modules(main_module_path, search_path, mem); + if (!mm) { + return NULL; + } + + vm->globals_count = mm->total_globals_count; + + insn *entry_point = decode_and_link(mm, mem); + + vm->entry_point = entry_point; + + return vm; +} + +aint vm_run(virtual_machine *vm) { + + // TODO: this is all very ugly + size_t active_stack_size = 32768; + __attribute__((aligned(16))) aint stack_data[65536]; + + memset(stack_data, 0, active_stack_size * sizeof(aint)); + + __gc_stack_bottom = (size_t)(stack_data + active_stack_size); + __gc_stack_top = (size_t)(stack_data - 16); + + // Globals at the top of stack + aint *globals = stack_data; + + extern void *global_sysargs; + globals[0] = (aint)global_sysargs; + + aint *sp = &stack_data[active_stack_size - 1]; + aint *bp = sp; + + insn *ip = vm->entry_point; + + ip->func(ip, sp, bp, globals); + + return *bp; +} + +int main(int argc, char *argv[]) { + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + __gc_init(); + + set_args(argc, argv); + + virtual_machine *vm = vm_create(argv[1], NULL); + if (!vm) { + return 1; + } + + vm_run(vm); + + return 0; +} diff --git a/virtual_machine/vm.h b/virtual_machine/vm.h new file mode 100644 index 000000000..e9d2c60aa --- /dev/null +++ b/virtual_machine/vm.h @@ -0,0 +1,18 @@ +#ifndef VM_H +#define VM_H + +#include "decoder.h" +#include + +typedef struct { + size_t globals_count; // Number of globals + insn *entry_point; // Entry point instruction + +} virtual_machine; + +virtual_machine *vm_create(const char *main_module_path, + const char *search_path); + +aint vm_run(virtual_machine *vm); + +#endif // VM_H From d6764e189f974967209cb5ab3b5ea862c2477bed Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 9 Feb 2026 01:50:42 +0300 Subject: [PATCH 019/141] attribute unused, remove opcodes for specific calls --- src/SM.ml | 8 --- virtual_machine/decoder.c | 113 ++------------------------------------ virtual_machine/decoder.h | 4 +- virtual_machine/opcodes.h | 5 +- 4 files changed, 8 insertions(+), 122 deletions(-) diff --git a/src/SM.ml b/src/SM.ml index 6b6e6e25a..cdbecb6d6 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -319,14 +319,6 @@ module ByteCode = struct add_bytes [ (5 * 16) + 1 ]; add_fixup s; add_ints [ 0 ] - (* 0x70 *) - | CALL ("read", _, _) -> add_bytes [ (7 * 16) + 0 ] - (* 0x71 *) - | CALL ("write", _, _) -> add_bytes [ (7 * 16) + 1 ] - (* 0x72 *) - | CALL ("length", _, _) -> add_bytes [ (7 * 16) + 2 ] - (* 0x73 *) - | CALL ("string", _, _) -> add_bytes [ (7 * 16) + 3 ] (* 0x74 *) | CALL (".array", n, _) -> add_bytes [ (7 * 16) + 4 ]; diff --git a/virtual_machine/decoder.c b/virtual_machine/decoder.c index 5483e676c..96accb9a3 100644 --- a/virtual_machine/decoder.c +++ b/virtual_machine/decoder.c @@ -321,23 +321,6 @@ void op_mod(DECL_STATE) { DISPATCH(); } -void op_read(DECL_STATE) { - (void)bp; - (void)globals; - aint val = Lread(); - VM_DEBUG("READ: %ld\n", (long)UNBOX(val)); - STACK_PUSH(sp, val); - DISPATCH(); -} - -void op_write(DECL_STATE) { - aint val = STACK_POP(sp); - VM_DEBUG("WRITE: %ld\n", (long)UNBOX(val)); - aint res = Lwrite(val); - STACK_PUSH(sp, res); - DISPATCH(); -} - void op_drop(DECL_STATE) { VM_DEBUG("DROP\n"); sp++; @@ -345,8 +328,6 @@ void op_drop(DECL_STATE) { } void op_dup(DECL_STATE) { - (void)bp; - (void)globals; aint val = STACK_PEEK(sp); VM_DEBUG("DUP: %ld\n", (long)UNBOX(val)); STACK_PUSH(sp, val); @@ -372,8 +353,6 @@ void op_elem(DECL_STATE) { } void op_sta(DECL_STATE) { - (void)bp; - (void)globals; aint val = STACK_POP(sp); aint idx = STACK_POP(sp); aint arr = STACK_POP(sp); @@ -388,8 +367,6 @@ void op_sta(DECL_STATE) { * Jumps */ void op_jmp(DECL_STATE) { - (void)bp; - (void)globals; ip++; VM_DEBUG("JMP: target=%p\n", (void *)ip->target); ip = ip->target; @@ -397,8 +374,6 @@ void op_jmp(DECL_STATE) { } void op_cjmp_z(DECL_STATE) { - (void)bp; - (void)globals; ip++; insn *target = ip->target; ip++; @@ -412,8 +387,6 @@ void op_cjmp_z(DECL_STATE) { } void op_cjmp_nz(DECL_STATE) { - (void)bp; - (void)globals; ip++; insn *target = ip->target; ip++; @@ -440,28 +413,6 @@ void op_string(DECL_STATE) { DISPATCH(); } -void op_length(DECL_STATE) { - (void)bp; - (void)globals; - aint val = STACK_POP(sp); - aint len = Llength((void *)val); - // TODO: think about debugging (becuase it prints after runtime call which - // might be bad because we won't see it) - VM_DEBUG("LENGTH: val=0x%lx -> len=%ld\n", val, UNBOX(len)); - STACK_PUSH(sp, len); - DISPATCH(); -} - -void op_lstring(DECL_STATE) { - (void)bp; - (void)globals; - aint val = STACK_POP(sp); - void *str = Lstring(&val); - VM_DEBUG("LSTRING: val=%ld -> str=0x%lx\n", UNBOX(val), (aint)str); - STACK_PUSH(sp, (aint)str); - DISPATCH(); -} - void op_barray(DECL_STATE) { (void)bp; (void)globals; @@ -482,8 +433,6 @@ void op_barray(DECL_STATE) { } void op_sexp(DECL_STATE) { - (void)bp; - (void)globals; ip++; const char *tag_str = ip->str; ip++; @@ -508,8 +457,6 @@ void op_sexp(DECL_STATE) { } void op_tag(DECL_STATE) { - (void)bp; - (void)globals; ip++; const char *tag_str = ip->str; ip++; @@ -526,8 +473,6 @@ void op_tag(DECL_STATE) { } void op_array(DECL_STATE) { - (void)bp; - (void)globals; ip++; int32_t n = ip->num; aint val = STACK_POP(sp); @@ -538,9 +483,6 @@ void op_array(DECL_STATE) { } void op_fail(DECL_STATE) { - (void)bp; - (void)globals; - (void)sp; ip++; int32_t line = ip->num; ip++; @@ -554,8 +496,6 @@ void op_fail(DECL_STATE) { * Pattern matching operations */ void op_patt_str_cmp(DECL_STATE) { - (void)bp; - (void)globals; aint y = STACK_POP(sp); aint x = STACK_POP(sp); VM_DEBUG("PATT_STR_CMP: x=%p, y=%p\n", (void *)x, (void *)y); @@ -566,8 +506,6 @@ void op_patt_str_cmp(DECL_STATE) { } void op_patt_string(DECL_STATE) { - (void)bp; - (void)globals; aint val = STACK_POP(sp); VM_DEBUG("PATT_STRING: val=%p\n", (void *)val); aint result = Bstring_tag_patt((void *)val); @@ -577,8 +515,6 @@ void op_patt_string(DECL_STATE) { } void op_patt_array(DECL_STATE) { - (void)bp; - (void)globals; aint val = STACK_POP(sp); VM_DEBUG("PATT_ARRAY: val=%p\n", (void *)val); aint result = Barray_tag_patt((void *)val); @@ -588,8 +524,6 @@ void op_patt_array(DECL_STATE) { } void op_patt_sexp(DECL_STATE) { - (void)bp; - (void)globals; aint val = STACK_POP(sp); VM_DEBUG("PATT_SEXP: val=%p\n", (void *)val); aint result = Bsexp_tag_patt((void *)val); @@ -599,8 +533,6 @@ void op_patt_sexp(DECL_STATE) { } void op_patt_boxed(DECL_STATE) { - (void)bp; - (void)globals; aint val = STACK_POP(sp); VM_DEBUG("PATT_BOXED: val=%p\n", (void *)val); aint result = Bboxed_patt((void *)val); @@ -610,8 +542,6 @@ void op_patt_boxed(DECL_STATE) { } void op_patt_unboxed(DECL_STATE) { - (void)bp; - (void)globals; aint val = STACK_POP(sp); VM_DEBUG("PATT_UNBOXED: val=%ld\n", (long)val); aint result = Bunboxed_patt((void *)val); @@ -621,8 +551,6 @@ void op_patt_unboxed(DECL_STATE) { } void op_patt_closure(DECL_STATE) { - (void)bp; - (void)globals; aint val = STACK_POP(sp); VM_DEBUG("PATT_CLOSURE: val=%p\n", (void *)val); aint result = Bclosure_tag_patt((void *)val); @@ -635,7 +563,6 @@ void op_patt_closure(DECL_STATE) { * Load / store operations */ void op_ld_glo(DECL_STATE) { - (void)bp; ip++; int32_t idx = ip->num; VM_DEBUG("LD_GLO[%d] = %ld\n", idx, (long)globals[idx]); @@ -644,7 +571,6 @@ void op_ld_glo(DECL_STATE) { } void op_st_glo(DECL_STATE) { - (void)bp; ip++; int32_t idx = ip->num; aint val = STACK_PEEK(sp); @@ -792,8 +718,6 @@ void op_callc(DECL_STATE) { } void op_ret(DECL_STATE) { - (void)ip; - (void)globals; aint ret_val = STACK_PEEK(sp); VM_TRACE_CALL("RET sp=%p ret_val=%ld bp=%p\n", (void *)sp, (long)ret_val, (void *)bp); @@ -802,8 +726,6 @@ void op_ret(DECL_STATE) { } void op_end(DECL_STATE) { - (void)ip; - (void)globals; VM_TRACE_CALL("END sp=%p\n", (void *)sp); aint ret_val = STACK_PEEK(sp); *bp = ret_val; @@ -829,8 +751,6 @@ void op_end(DECL_STATE) { * reference. The function name is embedded in the next instruction. */ static void op_callc_ext_func_stub(DECL_STATE) { - (void)sp; - (void)globals; ip++; const char *func_name = ip->str; @@ -853,8 +773,6 @@ static void op_callc_ext_func_stub(DECL_STATE) { return; } void op_closure(DECL_STATE) { - (void)bp; - (void)globals; ip++; insn *target = ip->target; ip++; @@ -915,8 +833,6 @@ void op_call_ext_func(DECL_STATE) { } void op_module_end(DECL_STATE) { - (void)bp; - (void)globals; ip++; insn *next_module = ip->target; @@ -965,10 +881,8 @@ decode_ctx_t *decode_ctx_create(const bytecode *bc, int32_t global_offset, * Decoding */ // TODO: /?? -static fixup_node_t *add_fixup(decode_ctx_t *ctx, meta_info_t *meta, - size_t target_off, size_t insn_idx, - memory *mem) { - (void)ctx; +static fixup_node_t *add_fixup(meta_info_t *meta, size_t target_off, + size_t insn_idx, memory *mem) { fixup_node_t *node = ARENA_NEW(mem->tmp, fixup_node_t); if (!node) return NULL; @@ -1058,7 +972,7 @@ static bool handle_jump(decode_ctx_t *ctx, meta_info_t *meta, } } else { // Forward jump - if (!add_fixup(ctx, meta, target_off, my_idx, mem)) { + if (!add_fixup(meta, target_off, my_idx, mem)) { return false; } if (depth != -1) { @@ -1233,15 +1147,6 @@ insn *decode(decode_ctx_t *ctx, symbol_table *st, ext_func_stub_table *fst, } break; - case OP_READ: - DEPTH_PUSH(depth); - EMIT_FUNC(ctx, op_read); - break; - - case OP_WRITE: - EMIT_FUNC(ctx, op_write); - break; - case OP_DROP: DEPTH_POP(depth); EMIT_FUNC(ctx, op_drop); @@ -1332,14 +1237,6 @@ insn *decode(decode_ctx_t *ctx, symbol_table *st, ext_func_stub_table *fst, break; } - case OP_LENGTH: - EMIT_FUNC(ctx, op_length); - break; - - case OP_LSTRING: - EMIT_FUNC(ctx, op_lstring); - break; - case OP_BARRAY: { int32_t n = reader_i32(&ctx->reader); DEPTH_DEC(depth, n - 1); @@ -1525,7 +1422,7 @@ insn *decode(decode_ctx_t *ctx, symbol_table *st, ext_func_stub_table *fst, if (target_off < current_bc_off && tm->resolved_idx != -1) { ctx->code[target_slot].target = &ctx->code[tm->resolved_idx]; } else { - add_fixup(ctx, meta, target_off, target_slot, mem); + add_fixup(meta, target_off, target_slot, mem); } } break; @@ -1577,7 +1474,7 @@ insn *decode(decode_ctx_t *ctx, symbol_table *st, ext_func_stub_table *fst, if ((uint32_t)target_off < current_bc_off && tm->resolved_idx != -1) { ctx->code[target_slot].target = &ctx->code[tm->resolved_idx]; } else { - add_fixup(ctx, meta, (uint32_t)target_off, target_slot, mem); + add_fixup(meta, (uint32_t)target_off, target_slot, mem); } } break; diff --git a/virtual_machine/decoder.h b/virtual_machine/decoder.h index 80d18a94f..026c09223 100644 --- a/virtual_machine/decoder.h +++ b/virtual_machine/decoder.h @@ -12,8 +12,8 @@ union insn; // State: ip = instruction pointer, sp = stack pointer, bp = base pointer // bp and globals are marked unused since not all handlers need them #define DECL_STATE \ - union insn *ip, aint *sp, __attribute__((unused)) aint *bp, \ - __attribute__((unused)) aint *globals + __attribute((unused)) union insn *ip, __attribute__((unused)) aint *sp, \ + __attribute__((unused)) aint *bp, __attribute__((unused)) aint *globals #define STATE ip, sp, bp, globals // Function pointer type for opcode handlers (returns void for tail calls) diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h index f80259f0b..0244ae3d3 100755 --- a/virtual_machine/opcodes.h +++ b/virtual_machine/opcodes.h @@ -54,10 +54,7 @@ typedef enum { OP_PATT_BOXED = 0x64, OP_PATT_UNBOXED = 0x65, OP_PATT_CLOSURE = 0x66, - OP_READ = 0x70, - OP_WRITE = 0x71, - OP_LENGTH = 0x72, - OP_LSTRING = 0x73, + // TODO: remove this opcode, instead call real function OP_BARRAY = 0x74, } opcode_t; From fe44a9399c74af224100497be621ae23311a0b90 Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 9 Feb 2026 04:25:35 +0300 Subject: [PATCH 020/141] type naming shenanigans --- virtual_machine/arena.c | 36 ++++++++++++------------- virtual_machine/arena.h | 30 ++++++++++----------- virtual_machine/bytecode.c | 21 ++++++++------- virtual_machine/bytecode.h | 34 ++++++++++++++--------- virtual_machine/decoder.c | 46 ++++++++++++++++---------------- virtual_machine/decoder.h | 17 ++++++------ virtual_machine/linker.c | 6 ++--- virtual_machine/module_manager.c | 12 ++++----- 8 files changed, 106 insertions(+), 96 deletions(-) diff --git a/virtual_machine/arena.c b/virtual_machine/arena.c index daaa7acb9..a48fa8b46 100644 --- a/virtual_machine/arena.c +++ b/virtual_machine/arena.c @@ -6,8 +6,8 @@ #define MIN_BLOCK_SIZE 4096 -static arena_block_t *block_create(size_t data_size) { - arena_block_t *b = malloc(sizeof(arena_block_t) + data_size); +static arena_block *block_create(size_t data_size) { + arena_block *b = malloc(sizeof(arena_block) + data_size); if (!b) { perror("arena: block_create malloc"); exit(1); @@ -18,25 +18,25 @@ static arena_block_t *block_create(size_t data_size) { return b; } -arena_t *arena_create(size_t init_cap) { - arena_t *a = malloc(sizeof(arena_t)); +arena *arena_create(size_t init_cap) { + arena *a = malloc(sizeof(arena)); if (!a) { perror("arena: arena_create malloc"); exit(1); } size_t cap = init_cap < MIN_BLOCK_SIZE ? MIN_BLOCK_SIZE : init_cap; - arena_block_t *b = block_create(cap); + arena_block *b = block_create(cap); a->head = b; a->current = b; a->block_size = cap; return a; } -void *arena_alloc(arena_t *arena, size_t size, size_t align) { +void *arena_alloc(arena *arena, size_t size, size_t align) { assert((align & (align - 1)) == 0); - arena_block_t *blk = arena->current; + arena_block *blk = arena->current; // Align within current block size_t mask = align - 1; @@ -57,7 +57,7 @@ void *arena_alloc(arena_t *arena, size_t size, size_t align) { if (new_cap < alloc_need) new_cap = alloc_need; - arena_block_t *nb = block_create(new_cap); + arena_block *nb = block_create(new_cap); blk->next = nb; arena->current = nb; @@ -71,17 +71,17 @@ void *arena_alloc(arena_t *arena, size_t size, size_t align) { } // TODO: cleanup macro? -arena_savepoint_t arena_save(arena_t *arena) { - arena_savepoint_t sp = {.block = arena->current, +arena_savepoint arena_save(arena *arena) { + arena_savepoint sp = {.block = arena->current, .used = arena->current->used}; return sp; }; -void arena_restore(arena_t *arena, arena_savepoint_t sp) { +void arena_restore(arena *arena, arena_savepoint sp) { if (!arena || !sp.block) return; - arena_block_t *b = arena->head; + arena_block *b = arena->head; // Walk to the savepoint block while (b && b != sp.block) { @@ -91,11 +91,11 @@ void arena_restore(arena_t *arena, arena_savepoint_t sp) { // Restore usage b->used = sp.used; - arena_block_t *to_free = b->next; + arena_block *to_free = b->next; b->next = NULL; while (to_free) { - arena_block_t *next = to_free->next; + arena_block *next = to_free->next; free(to_free); to_free = next; } @@ -129,7 +129,7 @@ memory *memory_destroy(memory *mem) { return NULL; } -char *arena_strdup(arena_t *arena, const char *s) { +char *arena_strdup(arena *arena, const char *s) { if (!s) return NULL; @@ -139,13 +139,13 @@ char *arena_strdup(arena_t *arena, const char *s) { return dst; } -void arena_destroy(arena_t *arena) { +void arena_destroy(arena *arena) { if (!arena) return; - arena_block_t *b = arena->head; + arena_block *b = arena->head; while (b) { - arena_block_t *next = b->next; + arena_block *next = b->next; free(b); b = next; } diff --git a/virtual_machine/arena.h b/virtual_machine/arena.h index 3926996b6..c612e4452 100644 --- a/virtual_machine/arena.h +++ b/virtual_machine/arena.h @@ -9,35 +9,35 @@ typedef struct arena_block { size_t size; // Total capacity of this block's data region size_t used; // Bytes used in this block char data[]; -} arena_block_t; +} arena_block; typedef struct { - arena_block_t *block; + arena_block *block; size_t used; -} arena_savepoint_t; +} arena_savepoint; typedef struct { - arena_block_t *head; // First block (for traversal / destroy) - arena_block_t *current; // Current block we're allocating from + arena_block *head; // First block (for traversal / destroy) + arena_block *current; // Current block we're allocating from size_t block_size; // Default size for new blocks -} arena_t; +} arena; typedef struct { - arena_t *main; - arena_t *tmp; - arena_t *code; // For now this is only FFI stubs + arena *main; + arena *tmp; + arena *code; // For now this is only FFI stubs } memory; -arena_t *arena_create(size_t init_cap); +arena *arena_create(size_t init_cap); -void *arena_alloc(arena_t *arena, size_t size, size_t align); +void *arena_alloc(arena *arena, size_t size, size_t align); -char *arena_strdup(arena_t *arena, const char *s); +char *arena_strdup(arena *arena, const char *s); -void arena_destroy(arena_t *arena); +void arena_destroy(arena *arena); -arena_savepoint_t arena_save(arena_t *arena); -void arena_restore(arena_t *arena, arena_savepoint_t sp); +arena_savepoint arena_save(arena *arena); +void arena_restore(arena *arena, arena_savepoint sp); memory *memory_create(size_t main_init_cap, size_t tmp_init_cap); memory *memory_destroy(memory *mem); diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index d6c5c11f9..990230ebf 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -39,7 +39,7 @@ bytecode *load_bytecode(const char *filename, memory *mem) { close(fd); - byte_reader_t reader; + byte_reader reader; reader_init(&reader, (const uint8_t *)map, file_size); int32_t string_table_size = reader_i32(&reader); @@ -70,10 +70,10 @@ bytecode *load_bytecode(const char *filename, memory *mem) { bc->globals_count = (size_t)globals_count; // Allocate and resolve public symbols - bc->public_symbols_count = (size_t)num_pubs; + bc->public_symbols.len = (size_t)num_pubs; if (num_pubs > 0) { - bc->public_symbols = - ARENA_ALLOC(mem->main, public_symbol_t, (size_t)num_pubs); + bc->public_symbols.data = + ARENA_ALLOC(mem->main, public_symbol, (size_t)num_pubs); reader_seek(&reader, pubs_offset); for (int32_t i = 0; i < num_pubs; i++) { @@ -81,22 +81,23 @@ bytecode *load_bytecode(const char *filename, memory *mem) { int32_t code_off = reader_i32(&reader); int32_t flag = reader_i32(&reader); - bc->public_symbols[i].name = string_table + name_offset; - bc->public_symbols[i].code_offset = code_off; - bc->public_symbols[i].flag = flag; + bc->public_symbols.data[i].name = string_table + name_offset; + bc->public_symbols.data[i].code_offset = code_off; + bc->public_symbols.data[i].flag = flag; } } // Allocate and resolve imports - bc->import_count = (size_t)num_imports; + bc->imports.len = (size_t)num_imports; if (num_imports > 0) { - bc->imports = ARENA_ALLOC(mem->main, const char *, (size_t)num_imports); + bc->imports.data = + ARENA_ALLOC(mem->main, const char *, (size_t)num_imports); reader_seek(&reader, imports_offset); for (int32_t i = 0; i < num_imports; i++) { int32_t name_offset = reader_i32(&reader); - bc->imports[i] = string_table + name_offset; + bc->imports.data[i] = string_table + name_offset; } } diff --git a/virtual_machine/bytecode.h b/virtual_machine/bytecode.h index 4066c3f64..730bcb053 100644 --- a/virtual_machine/bytecode.h +++ b/virtual_machine/bytecode.h @@ -13,9 +13,9 @@ typedef struct { const uint8_t *data; size_t size; size_t pos; -} byte_reader_t; +} byte_reader; -static inline void reader_init(byte_reader_t *r, const uint8_t *data, +static inline void reader_init(byte_reader *r, const uint8_t *data, size_t size) { r->data = data; r->size = size; @@ -25,7 +25,7 @@ static inline void reader_init(byte_reader_t *r, const uint8_t *data, /* * Read 32-bit little-endian integer and advance position */ -static inline int32_t reader_i32(byte_reader_t *r) { +static inline int32_t reader_i32(byte_reader *r) { if (r->pos + 4 > r->size) { return 0; // TODO: better error handling } @@ -34,30 +34,30 @@ static inline int32_t reader_i32(byte_reader_t *r) { return (int32_t)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24)); } -static inline uint8_t reader_u8(byte_reader_t *r) { +static inline uint8_t reader_u8(byte_reader *r) { if (r->pos >= r->size) { return 0; } return r->data[r->pos++]; } -static inline void reader_skip(byte_reader_t *r, size_t n) { +static inline void reader_skip(byte_reader *r, size_t n) { r->pos += n; if (r->pos > r->size) { r->pos = r->size; } } -static inline void reader_seek(byte_reader_t *r, size_t pos) { +static inline void reader_seek(byte_reader *r, size_t pos) { r->pos = pos; if (r->pos > r->size) { r->pos = r->size; } } -static inline size_t reader_pos(const byte_reader_t *r) { return r->pos; } +static inline size_t reader_pos(const byte_reader *r) { return r->pos; } -static inline bool reader_eof(const byte_reader_t *r) { +static inline bool reader_eof(const byte_reader *r) { return r->pos >= r->size; } @@ -66,7 +66,17 @@ typedef struct { int32_t code_offset; // Offset into bytecode section (for functions) or global // index int32_t flag; // PUB_FLAG_FUNCTION or PUB_FLAG_GLOBAL -} public_symbol_t; +} public_symbol; + +typedef struct { + public_symbol *data; + size_t len; +} public_symbols; + +typedef struct { + const char **data; + size_t len; +} imports; typedef struct { // Memory-mapped file @@ -79,11 +89,9 @@ typedef struct { const uint8_t *code; size_t code_size; - public_symbol_t *public_symbols; - size_t public_symbols_count; + public_symbols public_symbols; - const char **imports; - size_t import_count; + imports imports; size_t globals_count; char *module_name; diff --git a/virtual_machine/decoder.c b/virtual_machine/decoder.c index 96accb9a3..b8c36ba4f 100644 --- a/virtual_machine/decoder.c +++ b/virtual_machine/decoder.c @@ -90,14 +90,14 @@ typedef struct fixup_node { size_t insn_idx; // Index in code array that needs the jump target struct fixup_node *next; -} fixup_node_t; +} fixup_node; // Metadata for each bytecode offset typedef struct { int32_t resolved_idx; // Index in generated code array (-1 if not visited) int32_t stack_depth; // Expected stack depth (-1 if not visited yet) - fixup_node_t *fixups; // Linked list of forward jumps pointing here -} meta_info_t; + fixup_node *fixups; // Linked list of forward jumps pointing here +} meta_info; /* * External runtime functions (runtime.c) @@ -251,7 +251,7 @@ static insn *ext_func_stub_table_find(ext_func_stub_table *table, static insn *ext_func_stub_table_add(ext_func_stub_table *table, const char *name, fn stub_fn, - arena_t *code_arena) { + arena *code_arena) { insn *stub = ARENA_ALLOC(code_arena, insn, 2); char *persistent_name = ARENA_STRDUP(code_arena, name); @@ -268,12 +268,12 @@ static insn *ext_func_stub_table_add(ext_func_stub_table *table, return stub; } -int register_public_symbols(symbol_table *st, insn *code, bytecode *bc, +int register_public_symbols(symbol_table *st, insn *code, + public_symbols *public_symbols, int32_t *offset_to_insn, int32_t global_base) { - // TODO: Pass symbols only - for (size_t i = 0; i < bc->public_symbols_count; i++) { - public_symbol_t *pub = &bc->public_symbols[i]; + for (size_t i = 0; i < public_symbols->len; i++) { + public_symbol *pub = &public_symbols->data[i]; insn *code_ptr = NULL; int32_t global_idx = 0; @@ -847,9 +847,9 @@ void op_module_end(DECL_STATE) { return; } -decode_ctx_t *decode_ctx_create(const bytecode *bc, int32_t global_offset, - arena_t *arena) { - decode_ctx_t *ctx = ARENA_NEW(arena, decode_ctx_t); +decode_ctx *decode_ctx_create(const bytecode *bc, int32_t global_offset, + arena *arena) { + decode_ctx *ctx = ARENA_NEW(arena, decode_ctx); ctx->bc = bc; @@ -881,9 +881,9 @@ decode_ctx_t *decode_ctx_create(const bytecode *bc, int32_t global_offset, * Decoding */ // TODO: /?? -static fixup_node_t *add_fixup(meta_info_t *meta, size_t target_off, +static fixup_node *add_fixup(meta_info *meta, size_t target_off, size_t insn_idx, memory *mem) { - fixup_node_t *node = ARENA_NEW(mem->tmp, fixup_node_t); + fixup_node *node = ARENA_NEW(mem->tmp, fixup_node); if (!node) return NULL; @@ -905,7 +905,7 @@ static bool validate_target_off(const bytecode *bc, size_t target_off, return true; } -static bool emit_ld_glo(decode_ctx_t *ctx, symbol_table *st, int32_t idx, +static bool emit_ld_glo(decode_ctx *ctx, symbol_table *st, int32_t idx, size_t global_base) { const bytecode *bc = ctx->bc; @@ -926,7 +926,7 @@ static bool emit_ld_glo(decode_ctx_t *ctx, symbol_table *st, int32_t idx, return true; } -static bool emit_st_glo(decode_ctx_t *ctx, symbol_table *st, int32_t idx, +static bool emit_st_glo(decode_ctx *ctx, symbol_table *st, int32_t idx, size_t global_base) { const bytecode *bc = ctx->bc; @@ -950,7 +950,7 @@ static bool emit_st_glo(decode_ctx_t *ctx, symbol_table *st, int32_t idx, /* * Handle jump target resolution */ -static bool handle_jump(decode_ctx_t *ctx, meta_info_t *meta, +static bool handle_jump(decode_ctx *ctx, meta_info *meta, size_t current_bc_off, int32_t depth, memory *mem) { // TODO: unsigned ?? int32_t target_off = reader_i32(&ctx->reader); @@ -962,7 +962,7 @@ static bool handle_jump(decode_ctx_t *ctx, meta_info_t *meta, size_t my_idx = ctx->code_len; EMIT_TARGET(ctx, NULL); // placeholder - meta_info_t *tm = &meta[target_off]; + meta_info *tm = &meta[target_off]; if (target_off < current_bc_off && tm->resolved_idx != -1) { // Backward jump ctx->code[my_idx].target = &ctx->code[tm->resolved_idx]; @@ -987,7 +987,7 @@ static bool handle_jump(decode_ctx_t *ctx, meta_info_t *meta, return true; } -insn *decode(decode_ctx_t *ctx, symbol_table *st, ext_func_stub_table *fst, +insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, memory *mem) { const bytecode *bc = ctx->bc; size_t global_base = ctx->global_offset; @@ -997,7 +997,7 @@ insn *decode(decode_ctx_t *ctx, symbol_table *st, ext_func_stub_table *fst, ctx->code = code; ctx->code_cap = code_cap; - meta_info_t *meta = ARENA_ALLOC(mem->tmp, meta_info_t, bc->code_size); + meta_info *meta = ARENA_ALLOC(mem->tmp, meta_info, bc->code_size); // Initialize meta table for (size_t i = 0; i < bc->code_size; i++) { @@ -1023,7 +1023,7 @@ insn *decode(decode_ctx_t *ctx, symbol_table *st, ext_func_stub_table *fst, VM_DEBUG("DECODE: visiting bc_off=%zu opcode=%d code_idx=%zu\n", current_bc_off, opcode, ctx->code_len); - meta_info_t *m = &meta[current_bc_off]; + meta_info *m = &meta[current_bc_off]; m->resolved_idx = (int32_t)ctx->code_len; // Update offset map (bytecode offset -> instruction index) @@ -1043,7 +1043,7 @@ insn *decode(decode_ctx_t *ctx, symbol_table *st, ext_func_stub_table *fst, } // Resolve forward jumps (backpatching) - for (fixup_node_t *f = m->fixups; f; f = f->next) { + for (fixup_node *f = m->fixups; f; f = f->next) { VM_DEBUG("DECODE: Resolving fixup at bc_off=%zu: insn_idx=%zu -> " "code_idx=%zu\n", current_bc_off, f->insn_idx, ctx->code_len); @@ -1418,7 +1418,7 @@ insn *decode(decode_ctx_t *ctx, symbol_table *st, ext_func_stub_table *fst, VM_DEBUG("DECODE: OP_CLOSURE internal target_off=%u target_slot=%zu\n", target_off, target_slot); - meta_info_t *tm = &meta[target_off]; + meta_info *tm = &meta[target_off]; if (target_off < current_bc_off && tm->resolved_idx != -1) { ctx->code[target_slot].target = &ctx->code[tm->resolved_idx]; } else { @@ -1469,7 +1469,7 @@ insn *decode(decode_ctx_t *ctx, symbol_table *st, ext_func_stub_table *fst, EMIT_TARGET(ctx, NULL); EMIT_NUM(ctx, n_args); - meta_info_t *tm = &meta[(uint32_t)target_off]; + meta_info *tm = &meta[(uint32_t)target_off]; VM_DEBUG("DECODE: tm->resolved_idx=%d\n", tm->resolved_idx); if ((uint32_t)target_off < current_bc_off && tm->resolved_idx != -1) { ctx->code[target_slot].target = &ctx->code[tm->resolved_idx]; diff --git a/virtual_machine/decoder.h b/virtual_machine/decoder.h index 026c09223..15a54c483 100644 --- a/virtual_machine/decoder.h +++ b/virtual_machine/decoder.h @@ -77,7 +77,7 @@ typedef struct { typedef struct { int32_t *offset_to_insn; // offset_to_insn[bytecode_offset] = insn_index size_t cap; // Size of the mapping array (= bytecode size) -} offset_map_t; +} offset_map; typedef struct { @@ -87,27 +87,28 @@ typedef struct { size_t code_cap; size_t code_len; - byte_reader_t reader; - offset_map_t offset_map; + byte_reader reader; + offset_map offset_map; size_t global_offset; // Offset for global variables size_t module_end_idx; // Pointer to this module's op_module_end instruction // for linking (initialized to -1 if not found) -} decode_ctx_t; +} decode_ctx; void symbol_table_init(symbol_table *table); void symbol_table_free(symbol_table *table); void register_sysargs(symbol_table *table); void ext_func_stub_table_init(ext_func_stub_table *table); -int register_public_symbols(symbol_table *st, insn *code, bytecode *bc, +int register_public_symbols(symbol_table *st, insn *code, + public_symbols *public_symbols, int32_t *offset_to_insn, int32_t global_base); -decode_ctx_t *decode_ctx_create(const bytecode *bc, int32_t global_offset, - arena_t *arena); +decode_ctx *decode_ctx_create(const bytecode *bc, int32_t global_offset, + arena *arena); -insn *decode(decode_ctx_t *ctx, symbol_table *st, ext_func_stub_table *fst, +insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, memory *mem); #endif // DECODER_NEW_H diff --git a/virtual_machine/linker.c b/virtual_machine/linker.c index f9ab688e9..c852b21d1 100644 --- a/virtual_machine/linker.c +++ b/virtual_machine/linker.c @@ -12,7 +12,7 @@ extern void op_module_end(DECL_STATE); insn *decode_and_link(module_manager *mm, memory *mem) { - arena_savepoint_t sp = arena_save(mem->tmp); + arena_savepoint sp = arena_save(mem->tmp); // TODO: shoudl be redone without dynamic array symbol_table *st = ARENA_NEW(mem->tmp, symbol_table); symbol_table_init(st); @@ -27,12 +27,12 @@ insn *decode_and_link(module_manager *mm, memory *mem) { for (size_t i = 0; i < mm->modules.len; i++) { loaded_module *mod = mm->modules.data[i]; - decode_ctx_t *ctx = decode_ctx_create(mod->bc, mod->global_base, mem->tmp); + decode_ctx *ctx = decode_ctx_create(mod->bc, mod->global_base, mem->tmp); insn *mod_code = decode(ctx, st, fst, mem); // Register public symbols from this module - register_public_symbols(st, mod_code, mod->bc, + register_public_symbols(st, mod_code, &mod->bc->public_symbols, ctx->offset_map.offset_to_insn, mod->global_base); if (hd_insn == NULL) { diff --git a/virtual_machine/module_manager.c b/virtual_machine/module_manager.c index 8fc8bfb06..94614eb07 100644 --- a/virtual_machine/module_manager.c +++ b/virtual_machine/module_manager.c @@ -18,7 +18,7 @@ * Build the path to a module's .bc file. */ static char *build_module_path(const char *module_name, const char *search_path, - arena_t *arena) { + arena *arena) { char *path = ARENA_ALLOC(arena, char, MAX_PATH_LEN); if (search_path && strlen(search_path) > 0) { snprintf(path, MAX_PATH_LEN, "%s/%s.bc", search_path, module_name); @@ -30,7 +30,7 @@ static char *build_module_path(const char *module_name, const char *search_path, } /* Extract module name from filename (without path and extension .bc) */ -static char *extract_module_name(const char *filename, arena_t *arena) { +static char *extract_module_name(const char *filename, arena *arena) { char *path_copy = ARENA_STRDUP(arena, filename); char *base = basename(path_copy); @@ -42,7 +42,7 @@ static char *extract_module_name(const char *filename, arena_t *arena) { return ARENA_STRDUP(arena, base); } -static char *get_directory(const char *filepath, arena_t *arena) { +static char *get_directory(const char *filepath, arena *arena) { char *path_copy = ARENA_STRDUP(arena, filepath); char *dir = dirname(path_copy); @@ -106,8 +106,8 @@ static loaded_module *load_module(module_manager *mm, const char *s, } // Recursively load dependencies - for (size_t i = 0; i < bc->import_count; i++) { - const char *import_name = bc->imports[i]; + for (size_t i = 0; i < bc->imports.len; i++) { + const char *import_name = bc->imports.data[i]; // Skip since we already have it (as runtime.a) if (strcmp(import_name, "Std") == 0) { @@ -136,7 +136,7 @@ module_manager *load_modules(const char *main_module_path, // Reserve global index 0 for sysargs mm->total_globals_count = 1; - arena_savepoint_t sp = arena_save(mem->tmp); + arena_savepoint sp = arena_save(mem->tmp); load_module(mm, main_module_path, search_path, mem); arena_restore(mem->tmp, sp); From 13d7c051b41a281913647ccbc43767a36db5c65b Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 9 Feb 2026 06:39:13 +0300 Subject: [PATCH 021/141] add separate driver `lama.c` to parse `-h` and `-I` options --- virtual_machine/Makefile | 4 +- virtual_machine/lama.c | 98 +++++++++++++++++++++++++++++ virtual_machine/module_manager.c | 64 ++++++++++--------- virtual_machine/module_manager.h | 7 ++- virtual_machine/regression_check.sh | 2 +- virtual_machine/vm.c | 25 +------- virtual_machine/vm.h | 3 +- 7 files changed, 145 insertions(+), 58 deletions(-) create mode 100644 virtual_machine/lama.c diff --git a/virtual_machine/Makefile b/virtual_machine/Makefile index 1f9d72503..8eef75d83 100644 --- a/virtual_machine/Makefile +++ b/virtual_machine/Makefile @@ -3,8 +3,8 @@ CC = gcc CFLAGS = -Wall -Wextra -std=c99 -O0 LIBS = -lffi -ldl LDFLAGS = -rdynamic -TARGET = vm.exe -SOURCES = decoder.c vm.c bytecode.c linker.c ffi.c module_manager.c arena.c +TARGET = lama.exe +SOURCES = lama.c decoder.c vm.c bytecode.c linker.c ffi.c module_manager.c arena.c OBJECTS = $(SOURCES:.c=.o) RUNTIME_DIR = ../runtime diff --git a/virtual_machine/lama.c b/virtual_machine/lama.c new file mode 100644 index 000000000..2c9d9fb14 --- /dev/null +++ b/virtual_machine/lama.c @@ -0,0 +1,98 @@ +#define _POSIX_C_SOURCE 200809L + +#include "../runtime/gc.h" +#include "../runtime/runtime_common.h" +#include "vm.h" +#include +#include +#include +#include +#include + +#define MAX_INCLUDE_PATHS 64 + +extern void set_args(aint argc, char *argv[]); + +static void print_usage(const char *prog_name) { + printf("Usage: %s [options] [args]\n", prog_name); + printf("\nWhen no options are specified, the VM will run the bytecode file " + "and look for modules in the same directory.\n"); + printf("Options:\n"); + printf(" -h, --help Show this help message\n"); + printf(" -I, --include PATH Add PATH to module search paths (can be " + "used multiple times)\n"); +} + +int main(int argc, char *argv[]) { + char *include_paths[MAX_INCLUDE_PATHS]; + int include_path_count = 1; // Reserve index 0 for bytecode file's directory + // TODO: better error handling in general + int exit_code = 0; + char *bytecode_dir = NULL; + search_paths paths = {0}; + + static struct option long_options[] = {{"help", no_argument, 0, 'h'}, + {"include", required_argument, 0, 'I'}, + {0, 0, 0, 0}}; + + int opt; + int option_index = 0; + + while ((opt = getopt_long(argc, argv, "hI:", long_options, &option_index)) != + -1) { + switch (opt) { + case 'h': + print_usage(argv[0]); + return 0; + case 'I': + if (include_path_count < MAX_INCLUDE_PATHS) { + include_paths[include_path_count++] = optarg; + } else { + fprintf(stderr, "Maximum number of include paths (%d) exceeded\n", + MAX_INCLUDE_PATHS); + return 1; + } + break; + case '?': + if (optopt) { + fprintf(stderr, "Invalid command line specifier ('-%c')\n", optopt); + return 1; + } + default: + fprintf(stderr, "Invalid command line specifier\n"); + return 1; + } + } + + if (optind >= argc) { + fprintf(stderr, "No bytecode file specified\n\n"); + print_usage(argv[0]); + return 1; + } + + char *bytecode_file = argv[optind]; + + // Inlcude main module's directory by default + bytecode_dir = strdup(dirname(bytecode_dir)); + include_paths[0] = bytecode_dir; + + paths.paths = (const char **)include_paths; + paths.len = include_path_count; + + __gc_init(); + + // Skip options, pass only program args + set_args(argc - optind, argv + optind); + + virtual_machine *vm = vm_create(bytecode_file, &paths); + if (!vm) { + exit_code = 1; + goto cleanup; + } + + vm_run(vm); + +cleanup: + free(bytecode_dir); + return exit_code; +} diff --git a/virtual_machine/module_manager.c b/virtual_machine/module_manager.c index 94614eb07..352e8a685 100644 --- a/virtual_machine/module_manager.c +++ b/virtual_machine/module_manager.c @@ -13,17 +13,20 @@ #include #include #include +#include /* - * Build the path to a module's .bc file. + * Build the path to a module's .bc file by searching through paths. */ -static char *build_module_path(const char *module_name, const char *search_path, - arena *arena) { +static char *build_module_path(const char *module_name, + const search_paths *paths, arena *arena) { char *path = ARENA_ALLOC(arena, char, MAX_PATH_LEN); - if (search_path && strlen(search_path) > 0) { - snprintf(path, MAX_PATH_LEN, "%s/%s.bc", search_path, module_name); - } else { - snprintf(path, MAX_PATH_LEN, "%s.bc", module_name); + + for (size_t i = 0; i < paths->len; i++) { + snprintf(path, MAX_PATH_LEN, "%s/%s.bc", paths->paths[i], module_name); + if (access(path, F_OK) == 0) { + return path; + } } return path; @@ -42,17 +45,10 @@ static char *extract_module_name(const char *filename, arena *arena) { return ARENA_STRDUP(arena, base); } -static char *get_directory(const char *filepath, arena *arena) { - char *path_copy = ARENA_STRDUP(arena, filepath); - - char *dir = dirname(path_copy); - return ARENA_STRDUP(arena, dir); -} - /* * Check if a string looks like a file path (contains '/' or ends with '.bc') */ -static int is_filepath(const char *str) { +static bool is_filepath(const char *str) { size_t len = strlen(str); return strchr(str, '/') != NULL || (len > 3 && strcmp(str + len - 3, ".bc") == 0); @@ -71,40 +67,37 @@ static loaded_module *find_module(module_manager *mm, const char *name) { * Load modules recursively. */ static loaded_module *load_module(module_manager *mm, const char *s, - const char *search_path, memory *mem) { + const search_paths *paths, memory *mem) { char *filepath; char *module_name; - char *derived_search_path; - // Determine if we're loading by path or by name + // The initial call uses a filepath, recursive calls use module names if (is_filepath(s)) { filepath = ARENA_STRDUP(mem->tmp, s); module_name = extract_module_name(s, mem->tmp); } else { - filepath = build_module_path(s, search_path, mem->tmp); + filepath = build_module_path(s, paths, mem->tmp); module_name = ARENA_STRDUP(mem->tmp, s); } // Check if module is already loaded (avoid duplicates and circular // dependencies) - // TODO: check ciruclar imports? + // TODO: check circular imports? loaded_module *result = find_module(mm, module_name); if (result) { return result; } bytecode *bc = load_bytecode(filepath, mem); + if (!bc) { + fprintf(stderr, "Failed to load module '%s' from '%s'\n", module_name, + filepath); + return NULL; + } + // NOTE: a bit ugly: bc->module_name = module_name; - // Determine search path for dependencies - // TODO: -I - if (search_path) { - derived_search_path = ARENA_STRDUP(mem->tmp, search_path); - } else { - derived_search_path = get_directory(filepath, mem->tmp); - } - // Recursively load dependencies for (size_t i = 0; i < bc->imports.len; i++) { const char *import_name = bc->imports.data[i]; @@ -114,7 +107,12 @@ static loaded_module *load_module(module_manager *mm, const char *s, continue; } - load_module(mm, import_name, derived_search_path, mem); + loaded_module *dep = load_module(mm, import_name, paths, mem); + if (!dep) { + fprintf(stderr, "Failed to load dependency '%s' for module '%s'\n", + import_name, module_name); + return NULL; + } } result = ARENA_NEW(mem->main, loaded_module); @@ -128,7 +126,7 @@ static loaded_module *load_module(module_manager *mm, const char *s, } module_manager *load_modules(const char *main_module_path, - const char *search_path, memory *mem) { + const search_paths *paths, memory *mem) { module_manager *mm = ARENA_NEW(mem->main, module_manager); @@ -137,8 +135,12 @@ module_manager *load_modules(const char *main_module_path, mm->total_globals_count = 1; arena_savepoint sp = arena_save(mem->tmp); - load_module(mm, main_module_path, search_path, mem); + loaded_module *main_mod = load_module(mm, main_module_path, paths, mem); arena_restore(mem->tmp, sp); + if (!main_mod) { + return NULL; + } + return mm; } diff --git a/virtual_machine/module_manager.h b/virtual_machine/module_manager.h index fd5563aae..dbee83a34 100644 --- a/virtual_machine/module_manager.h +++ b/virtual_machine/module_manager.h @@ -15,6 +15,11 @@ #define INITIAL_SYMBOL_TABLE_CAP 64 #define INITIAL_MODULE_CAP 8 +typedef struct { + const char **paths; + size_t len; +} search_paths; + typedef struct { bytecode *bc; // Loaded bytecode int32_t global_base; // Starting index for this module's globals @@ -36,6 +41,6 @@ typedef struct { } module_manager; module_manager *load_modules(const char *main_module_path, - const char *search_path, memory *mem); + const search_paths *paths, memory *mem); #endif diff --git a/virtual_machine/regression_check.sh b/virtual_machine/regression_check.sh index dd1098fee..62c9af3e4 100755 --- a/virtual_machine/regression_check.sh +++ b/virtual_machine/regression_check.sh @@ -18,7 +18,7 @@ for test in ../regression/*.lama; do test_path="${test%.*}" test_file="${test_path##*/}" echo $test_path: $test_file - cat $test_path.input | ./vm.exe $test_file.bc >test.log 2>&1 + cat $test_path.input | ./lama.exe $test_file.bc >test.log 2>&1 sed -E '1d;s/^//' $test_path.t >test_orig.log diff -w test.log test_orig.log diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index 8cdc61d9e..27e81c1c9 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -13,14 +13,15 @@ extern void set_args(aint argc, char *argv[]); extern size_t __gc_stack_top, __gc_stack_bottom; virtual_machine *vm_create(const char *main_module_path, - const char *search_path) { + const search_paths *paths) { // TODO: estimates memory *mem = memory_create(1024 * 1024, 4096); virtual_machine *vm = ARENA_NEW(mem->main, virtual_machine); - module_manager *mm = load_modules(main_module_path, search_path, mem); + module_manager *mm = load_modules(main_module_path, paths, mem); if (!mm) { + memory_destroy(mem); return NULL; } @@ -59,23 +60,3 @@ aint vm_run(virtual_machine *vm) { return *bp; } - -int main(int argc, char *argv[]) { - if (argc < 2) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return 1; - } - - __gc_init(); - - set_args(argc, argv); - - virtual_machine *vm = vm_create(argv[1], NULL); - if (!vm) { - return 1; - } - - vm_run(vm); - - return 0; -} diff --git a/virtual_machine/vm.h b/virtual_machine/vm.h index e9d2c60aa..02fadfdd9 100644 --- a/virtual_machine/vm.h +++ b/virtual_machine/vm.h @@ -2,6 +2,7 @@ #define VM_H #include "decoder.h" +#include "module_manager.h" #include typedef struct { @@ -11,7 +12,7 @@ typedef struct { } virtual_machine; virtual_machine *vm_create(const char *main_module_path, - const char *search_path); + const search_paths *paths); aint vm_run(virtual_machine *vm); From 1fc533f6f9e103937b97dae9b0b32996c95bc4d5 Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 9 Feb 2026 06:51:39 +0300 Subject: [PATCH 022/141] free vm memory --- virtual_machine/lama.c | 1 + virtual_machine/vm.c | 6 ++++-- virtual_machine/vm.h | 4 ++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/virtual_machine/lama.c b/virtual_machine/lama.c index 2c9d9fb14..8df7345bd 100644 --- a/virtual_machine/lama.c +++ b/virtual_machine/lama.c @@ -94,5 +94,6 @@ int main(int argc, char *argv[]) { cleanup: free(bytecode_dir); + vm_destroy(vm); return exit_code; } diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index 27e81c1c9..e737cfdfc 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -1,5 +1,4 @@ #include "vm.h" -#include "../runtime/gc.h" #include "../runtime/runtime_common.h" #include "arena.h" #include "decoder.h" @@ -28,12 +27,15 @@ virtual_machine *vm_create(const char *main_module_path, vm->globals_count = mm->total_globals_count; insn *entry_point = decode_and_link(mm, mem); - vm->entry_point = entry_point; + vm->mem = mem; + return vm; } +void vm_destroy(virtual_machine *vm) { memory_destroy(vm->mem); } + aint vm_run(virtual_machine *vm) { // TODO: this is all very ugly diff --git a/virtual_machine/vm.h b/virtual_machine/vm.h index 02fadfdd9..d84619a3b 100644 --- a/virtual_machine/vm.h +++ b/virtual_machine/vm.h @@ -1,6 +1,7 @@ #ifndef VM_H #define VM_H +#include "arena.h" #include "decoder.h" #include "module_manager.h" #include @@ -8,12 +9,15 @@ typedef struct { size_t globals_count; // Number of globals insn *entry_point; // Entry point instruction + memory *mem; // Memory managed by arenas } virtual_machine; virtual_machine *vm_create(const char *main_module_path, const search_paths *paths); +void vm_destroy(virtual_machine *vm); + aint vm_run(virtual_machine *vm); #endif // VM_H From b3fb5e07ec8fdb0527b1f1b5f1240c98c6f8825b Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 10 Feb 2026 10:44:16 +0300 Subject: [PATCH 023/141] `_ne` -> `_le` --- src/SM.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SM.ml b/src/SM.ml index cdbecb6d6..290514af0 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -384,11 +384,11 @@ module ByteCode = struct let str_off = StringTab.add st l in -(str_off + 1) in - Bytes.set_int32_ne code addr_ofs (Int32.of_int resolved_addr)) + Bytes.set_int32_le code addr_ofs (Int32.of_int resolved_addr)) !func_fixups; List.iter (fun (ofs, l) -> - Bytes.set_int32_ne code ofs + Bytes.set_int32_le code ofs (Int32.of_int @@ try Hashtbl.find lmap l From a779549e26d0fa11037c08c036929451a7e80021 Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 10 Feb 2026 11:02:52 +0300 Subject: [PATCH 024/141] fix `unescape` --- src/SM.ml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/SM.ml b/src/SM.ml index 290514af0..993a35ba9 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -206,6 +206,9 @@ module ByteCode = struct let rec iterate i = if i < n then match x.[i] with + | '"' -> + Buffer.add_char buf '"'; + iterate (i + 1) | '\\' -> ( if i + 1 >= n then Buffer.add_char buf '\\' @@ -220,12 +223,6 @@ module ByteCode = struct | 'r' -> Buffer.add_char buf '\r'; iterate (i + 2) - | '"' -> - Buffer.add_char buf '"'; - iterate (i + 2) - | '\\' -> - Buffer.add_char buf '\\'; - iterate (i + 2) | _ -> Buffer.add_char buf '\\'; iterate (i + 1)) From d44586d8420236681dc5d8c1c422a02e800978eb Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 10 Feb 2026 12:05:56 +0300 Subject: [PATCH 025/141] `close(fd)` --- virtual_machine/bytecode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index 990230ebf..55b698b16 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -18,6 +18,7 @@ bytecode *load_bytecode(const char *filename, memory *mem) { int fd = open(filename, O_RDONLY); if (fd < 0) { perror("bytecode_load: open"); + close(fd); return NULL; } From 4601ee591a7a55f5fb0a2a2117675b9e7c80c3a1 Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 10 Feb 2026 12:36:20 +0300 Subject: [PATCH 026/141] add missing `da.h` --- virtual_machine/da.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 virtual_machine/da.h diff --git a/virtual_machine/da.h b/virtual_machine/da.h new file mode 100644 index 000000000..7902da91b --- /dev/null +++ b/virtual_machine/da.h @@ -0,0 +1,35 @@ +#ifndef DA_H +#define DA_H + +/* + * Dynamic array macros + */ +#define da_append(xs, x) \ + do { \ + if (xs.len >= xs.cap) { \ + xs.cap = xs.cap == 0 ? 256 : xs.cap * 2; \ + xs.data = realloc(xs.data, xs.cap * sizeof(*xs.data)); \ + if (!xs.data) { \ + perror("realloc"); \ + exit(1); \ + } \ + } \ + xs.data[xs.len++] = x; \ + } while (0) + +#define da_init(xs) \ + do { \ + (xs).data = NULL; \ + (xs).len = 0; \ + (xs).cap = 0; \ + } while (0) + +#define da_free(xs) \ + do { \ + free((xs).data); \ + (xs).data = NULL; \ + (xs).len = 0; \ + (xs).cap = 0; \ + } while (0) + +#endif // DA_H From 653e004f01b55853ec82c38902f5eb78ed19f360 Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 10 Feb 2026 14:51:21 +0300 Subject: [PATCH 027/141] cleanup --- src/SM.ml | 2 +- virtual_machine/decoder.c | 24 +++++------------------- virtual_machine/decoder.h | 4 ++-- virtual_machine/opcodes.h | 1 - 4 files changed, 8 insertions(+), 23 deletions(-) diff --git a/src/SM.ml b/src/SM.ml index 993a35ba9..c1b527738 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -243,7 +243,7 @@ module ByteCode = struct if S.mem s !extern_globals then begin let str_off = StringTab.add st s in add_bytes [ b 0 ]; - add_ints [ -(str_off + 1) ] + add_ints [ -str_off - 1 ] end else begin let i = add_global s in add_bytes [ b 0 ]; diff --git a/virtual_machine/decoder.c b/virtual_machine/decoder.c index b8c36ba4f..ba3f789c2 100644 --- a/virtual_machine/decoder.c +++ b/virtual_machine/decoder.c @@ -96,7 +96,7 @@ typedef struct fixup_node { typedef struct { int32_t resolved_idx; // Index in generated code array (-1 if not visited) int32_t stack_depth; // Expected stack depth (-1 if not visited yet) - fixup_node *fixups; // Linked list of forward jumps pointing here + fixup_node *fixups; // Linked list of forward jumps pointing here } meta_info; /* @@ -704,7 +704,6 @@ void op_callc(DECL_STATE) { STACK_PUSH(sp, (aint)bp); aint *new_bp = sp + 1; - target->func(target, sp, new_bp, globals); aint ret_val = *new_bp; @@ -717,14 +716,6 @@ void op_callc(DECL_STATE) { DISPATCH(); } -void op_ret(DECL_STATE) { - aint ret_val = STACK_PEEK(sp); - VM_TRACE_CALL("RET sp=%p ret_val=%ld bp=%p\n", (void *)sp, (long)ret_val, - (void *)bp); - *bp = ret_val; - return; -} - void op_end(DECL_STATE) { VM_TRACE_CALL("END sp=%p\n", (void *)sp); aint ret_val = STACK_PEEK(sp); @@ -848,7 +839,7 @@ void op_module_end(DECL_STATE) { } decode_ctx *decode_ctx_create(const bytecode *bc, int32_t global_offset, - arena *arena) { + arena *arena) { decode_ctx *ctx = ARENA_NEW(arena, decode_ctx); ctx->bc = bc; @@ -882,7 +873,7 @@ decode_ctx *decode_ctx_create(const bytecode *bc, int32_t global_offset, */ // TODO: /?? static fixup_node *add_fixup(meta_info *meta, size_t target_off, - size_t insn_idx, memory *mem) { + size_t insn_idx, memory *mem) { fixup_node *node = ARENA_NEW(mem->tmp, fixup_node); if (!node) return NULL; @@ -950,8 +941,8 @@ static bool emit_st_glo(decode_ctx *ctx, symbol_table *st, int32_t idx, /* * Handle jump target resolution */ -static bool handle_jump(decode_ctx *ctx, meta_info *meta, - size_t current_bc_off, int32_t depth, memory *mem) { +static bool handle_jump(decode_ctx *ctx, meta_info *meta, size_t current_bc_off, + int32_t depth, memory *mem) { // TODO: unsigned ?? int32_t target_off = reader_i32(&ctx->reader); @@ -1488,11 +1479,6 @@ insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, break; } - case OP_RET: - EMIT_FUNC(ctx, op_ret); - DEPTH_DEAD(depth); - break; - case OP_END: EMIT_FUNC(ctx, op_end); DEPTH_DEAD(depth); diff --git a/virtual_machine/decoder.h b/virtual_machine/decoder.h index 15a54c483..aa3665040 100644 --- a/virtual_machine/decoder.h +++ b/virtual_machine/decoder.h @@ -29,7 +29,7 @@ typedef union insn { /* * Sentinel value for external references (both functions and globals). - * Address = -(index + 1), so index 0 becomes -1, index 1 becomes -2, etc. + * Address = -index - 1, so index 0 becomes -1, index 1 becomes -2, etc. */ #define TO_EXT_REF(idx) (-(idx) - 1) #define IS_EXT_REF(addr) ((addr) < 0) @@ -106,7 +106,7 @@ int register_public_symbols(symbol_table *st, insn *code, int32_t *offset_to_insn, int32_t global_base); decode_ctx *decode_ctx_create(const bytecode *bc, int32_t global_offset, - arena *arena); + arena *arena); insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, memory *mem); diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h index 0244ae3d3..072675de9 100755 --- a/virtual_machine/opcodes.h +++ b/virtual_machine/opcodes.h @@ -23,7 +23,6 @@ typedef enum { OP_STA = 0x14, OP_JMP = 0x15, OP_END = 0x16, - OP_RET = 0x17, OP_DROP = 0x18, OP_DUP = 0x19, OP_SWAP = 0x1A, From cc6ba3e7a2d303998fcc72c9fc1532d577afa719 Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 10 Feb 2026 14:52:10 +0300 Subject: [PATCH 028/141] add free for stub table --- virtual_machine/decoder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/virtual_machine/decoder.c b/virtual_machine/decoder.c index ba3f789c2..0f1d23ffd 100644 --- a/virtual_machine/decoder.c +++ b/virtual_machine/decoder.c @@ -239,6 +239,8 @@ void register_sysargs(symbol_table *table) { void ext_func_stub_table_init(ext_func_stub_table *table) { da_init(*table); } +void ext_func_stub_table_free(ext_func_stub_table *table) { da_free(*table); } + static insn *ext_func_stub_table_find(ext_func_stub_table *table, const char *name) { for (size_t i = 0; i < table->len; i++) { From 3df218379258a5ba2059407b045575d1b87b7384 Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 10 Feb 2026 15:14:43 +0300 Subject: [PATCH 029/141] change vm --- virtual_machine/Makefile | 2 +- virtual_machine/arena.c | 153 ---- virtual_machine/arena.h | 52 -- virtual_machine/bytecode.c | 28 +- virtual_machine/bytecode.h | 60 +- virtual_machine/bytecode_util.h | 60 ++ virtual_machine/call_stack.c | 57 -- virtual_machine/call_stack.h | 35 - virtual_machine/da.h | 14 +- virtual_machine/decoder.c | 1112 ++++++------------------------ virtual_machine/decoder.h | 120 +--- virtual_machine/ffi.c | 62 +- virtual_machine/ffi.h | 20 +- virtual_machine/insn.h | 31 + virtual_machine/interpreter.c | 658 ------------------ virtual_machine/lama.c | 28 +- virtual_machine/linker.c | 179 +++-- virtual_machine/linker.h | 17 +- virtual_machine/loader.c | 137 ++++ virtual_machine/loader.h | 18 + virtual_machine/memory.c | 31 + virtual_machine/memory.h | 20 + virtual_machine/module_manager.c | 146 ---- virtual_machine/module_manager.h | 46 -- virtual_machine/opcodes.c | 128 ---- virtual_machine/opcodes.h | 2 +- virtual_machine/ops.c | 672 ++++++++++++++++++ virtual_machine/ops.h | 69 ++ virtual_machine/symbols.c | 66 ++ virtual_machine/symbols.h | 35 + virtual_machine/vm.c | 62 +- virtual_machine/vm.h | 17 +- 32 files changed, 1676 insertions(+), 2461 deletions(-) delete mode 100644 virtual_machine/arena.c delete mode 100644 virtual_machine/arena.h create mode 100644 virtual_machine/bytecode_util.h delete mode 100644 virtual_machine/call_stack.c delete mode 100644 virtual_machine/call_stack.h create mode 100644 virtual_machine/insn.h delete mode 100755 virtual_machine/interpreter.c create mode 100644 virtual_machine/loader.c create mode 100644 virtual_machine/loader.h create mode 100644 virtual_machine/memory.c create mode 100644 virtual_machine/memory.h delete mode 100644 virtual_machine/module_manager.c delete mode 100644 virtual_machine/module_manager.h delete mode 100644 virtual_machine/opcodes.c create mode 100644 virtual_machine/ops.c create mode 100644 virtual_machine/ops.h create mode 100644 virtual_machine/symbols.c create mode 100644 virtual_machine/symbols.h diff --git a/virtual_machine/Makefile b/virtual_machine/Makefile index 8eef75d83..9de59c012 100644 --- a/virtual_machine/Makefile +++ b/virtual_machine/Makefile @@ -4,7 +4,7 @@ CFLAGS = -Wall -Wextra -std=c99 -O0 LIBS = -lffi -ldl LDFLAGS = -rdynamic TARGET = lama.exe -SOURCES = lama.c decoder.c vm.c bytecode.c linker.c ffi.c module_manager.c arena.c +SOURCES = lama.c decoder.c vm.c bytecode.c linker.c ffi.c loader.c symbols.c ops.c memory.c OBJECTS = $(SOURCES:.c=.o) RUNTIME_DIR = ../runtime diff --git a/virtual_machine/arena.c b/virtual_machine/arena.c deleted file mode 100644 index a48fa8b46..000000000 --- a/virtual_machine/arena.c +++ /dev/null @@ -1,153 +0,0 @@ -#include "arena.h" -#include -#include -#include -#include - -#define MIN_BLOCK_SIZE 4096 - -static arena_block *block_create(size_t data_size) { - arena_block *b = malloc(sizeof(arena_block) + data_size); - if (!b) { - perror("arena: block_create malloc"); - exit(1); - } - b->next = NULL; - b->size = data_size; - b->used = 0; - return b; -} - -arena *arena_create(size_t init_cap) { - arena *a = malloc(sizeof(arena)); - if (!a) { - perror("arena: arena_create malloc"); - exit(1); - } - - size_t cap = init_cap < MIN_BLOCK_SIZE ? MIN_BLOCK_SIZE : init_cap; - arena_block *b = block_create(cap); - a->head = b; - a->current = b; - a->block_size = cap; - return a; -} - -void *arena_alloc(arena *arena, size_t size, size_t align) { - assert((align & (align - 1)) == 0); - - arena_block *blk = arena->current; - - // Align within current block - size_t mask = align - 1; - uintptr_t base = (uintptr_t)(blk->data + blk->used); - size_t padding = (align - (base & mask)) & mask; - size_t needed = padding + size; - - if (blk->used + needed <= blk->size) { - void *ptr = blk->data + blk->used + padding; - blk->used += needed; - return ptr; - } - - // New block must be large enough for this request (including worst-case - // alignment padding) and at least as big as the default block_size. - size_t new_cap = arena->block_size; - size_t alloc_need = size + align; // worst-case with alignment - if (new_cap < alloc_need) - new_cap = alloc_need; - - arena_block *nb = block_create(new_cap); - blk->next = nb; - arena->current = nb; - - // Align within the fresh block (used == 0, so padding is usually 0) - base = (uintptr_t)(nb->data); - padding = (align - (base & mask)) & mask; - - void *ptr = nb->data + padding; - nb->used = padding + size; - return ptr; -} - -// TODO: cleanup macro? -arena_savepoint arena_save(arena *arena) { - arena_savepoint sp = {.block = arena->current, - .used = arena->current->used}; - return sp; -}; - -void arena_restore(arena *arena, arena_savepoint sp) { - if (!arena || !sp.block) - return; - - arena_block *b = arena->head; - - // Walk to the savepoint block - while (b && b != sp.block) { - b->used = 0; - b = b->next; - } - // Restore usage - b->used = sp.used; - - arena_block *to_free = b->next; - b->next = NULL; - - while (to_free) { - arena_block *next = to_free->next; - free(to_free); - to_free = next; - } - - arena->current = b; -} - -memory *memory_create(size_t main_init_cap, size_t tmp_init_cap) { - memory *mem = malloc(sizeof(memory)); - if (!mem) { - perror("memory_create malloc"); - exit(1); - } - mem->main = arena_create(main_init_cap); - mem->tmp = arena_create(tmp_init_cap); - mem->code = arena_create(4096); - return mem; -} - -memory *memory_destroy(memory *mem) { - if (!mem) - return NULL; - - if (mem->main) - arena_destroy(mem->main); - if (mem->tmp) - arena_destroy(mem->tmp); - if (mem->code) - arena_destroy(mem->code); - free(mem); - return NULL; -} - -char *arena_strdup(arena *arena, const char *s) { - if (!s) - return NULL; - - size_t len = strlen(s) + 1; - char *dst = (char *)arena_alloc(arena, len, 1); - memcpy(dst, s, len); - return dst; -} - -void arena_destroy(arena *arena) { - if (!arena) - return; - - arena_block *b = arena->head; - while (b) { - arena_block *next = b->next; - free(b); - b = next; - } - free(arena); -} diff --git a/virtual_machine/arena.h b/virtual_machine/arena.h deleted file mode 100644 index c612e4452..000000000 --- a/virtual_machine/arena.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef ARENA_H -#define ARENA_H - -#include -#include - -typedef struct arena_block { - struct arena_block *next; - size_t size; // Total capacity of this block's data region - size_t used; // Bytes used in this block - char data[]; -} arena_block; - -typedef struct { - arena_block *block; - size_t used; -} arena_savepoint; - -typedef struct { - arena_block *head; // First block (for traversal / destroy) - arena_block *current; // Current block we're allocating from - size_t block_size; // Default size for new blocks -} arena; - -typedef struct { - arena *main; - arena *tmp; - arena *code; // For now this is only FFI stubs -} memory; - -arena *arena_create(size_t init_cap); - -void *arena_alloc(arena *arena, size_t size, size_t align); - -char *arena_strdup(arena *arena, const char *s); - -void arena_destroy(arena *arena); - -arena_savepoint arena_save(arena *arena); -void arena_restore(arena *arena, arena_savepoint sp); - -memory *memory_create(size_t main_init_cap, size_t tmp_init_cap); -memory *memory_destroy(memory *mem); - -#define ARENA_ALLOC(a, T, n) \ - ((T *)arena_alloc((a), sizeof(T) * (n), _Alignof(T))) - -#define ARENA_NEW(a, T) ARENA_ALLOC(a, T, 1) - -#define ARENA_STRDUP(a, s) arena_strdup((a), (s)) - -#endif // ARENA_H diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index 55b698b16..79c551af0 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -1,6 +1,7 @@ #define _POSIX_C_SOURCE 200809L #include "bytecode.h" -#include "arena.h" +#include "bytecode_util.h" +#include "memory.h" #include #include #include @@ -14,11 +15,10 @@ #define PUB_ENTRY_SIZE 12 #define IMPORT_ENTRY_SIZE 4 -bytecode *load_bytecode(const char *filename, memory *mem) { +bytecode *bytecode_load(const char *filename) { int fd = open(filename, O_RDONLY); if (fd < 0) { perror("bytecode_load: open"); - close(fd); return NULL; } @@ -35,6 +35,7 @@ bytecode *load_bytecode(const char *filename, memory *mem) { if (map == MAP_FAILED) { perror("bytecode_load: mmap"); + close(fd); return NULL; } @@ -59,7 +60,7 @@ bytecode *load_bytecode(const char *filename, memory *mem) { const uint8_t *data = (const uint8_t *)map; const char *string_table = (const char *)(data + st_offset); - bytecode *bc = ARENA_NEW(mem->main, bytecode); + bytecode *bc = ALLOC(bytecode); bc->map_base = map; bc->map_size = file_size; @@ -73,8 +74,7 @@ bytecode *load_bytecode(const char *filename, memory *mem) { // Allocate and resolve public symbols bc->public_symbols.len = (size_t)num_pubs; if (num_pubs > 0) { - bc->public_symbols.data = - ARENA_ALLOC(mem->main, public_symbol, (size_t)num_pubs); + bc->public_symbols.data = ALLOC_ARRAY(public_symbol, num_pubs); reader_seek(&reader, pubs_offset); for (int32_t i = 0; i < num_pubs; i++) { @@ -91,8 +91,7 @@ bytecode *load_bytecode(const char *filename, memory *mem) { // Allocate and resolve imports bc->imports.len = (size_t)num_imports; if (num_imports > 0) { - bc->imports.data = - ARENA_ALLOC(mem->main, const char *, (size_t)num_imports); + bc->imports.data = ALLOC_ARRAY(const char *, (size_t)num_imports); reader_seek(&reader, imports_offset); for (int32_t i = 0; i < num_imports; i++) { @@ -102,11 +101,16 @@ bytecode *load_bytecode(const char *filename, memory *mem) { } } + // will be set later + bc->name = NULL; + return bc; } -void free_bytecode(bytecode *bc) { - munmap(bc->map_base, bc->map_size); - // NOTE: bc itself, public_symbols, imports, and module_name - // are all allocated from arena and will be freed when arena is destroyed. +void bytecode_free(bytecode *bc) { + munmap(bc->map_base, bc->map_size); + free(bc->public_symbols.data); + free(bc->imports.data); + free((void *)bc->name); + free(bc); } diff --git a/virtual_machine/bytecode.h b/virtual_machine/bytecode.h index 730bcb053..e8b41d5e6 100644 --- a/virtual_machine/bytecode.h +++ b/virtual_machine/bytecode.h @@ -1,7 +1,6 @@ #ifndef BYTECODE_NEW_H #define BYTECODE_NEW_H -#include "arena.h" #include #include #include @@ -9,58 +8,6 @@ #define PUB_FLAG_FUNCTION 0 #define PUB_FLAG_GLOBAL 1 -typedef struct { - const uint8_t *data; - size_t size; - size_t pos; -} byte_reader; - -static inline void reader_init(byte_reader *r, const uint8_t *data, - size_t size) { - r->data = data; - r->size = size; - r->pos = 0; -} - -/* - * Read 32-bit little-endian integer and advance position - */ -static inline int32_t reader_i32(byte_reader *r) { - if (r->pos + 4 > r->size) { - return 0; // TODO: better error handling - } - const uint8_t *p = r->data + r->pos; - r->pos += 4; - return (int32_t)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24)); -} - -static inline uint8_t reader_u8(byte_reader *r) { - if (r->pos >= r->size) { - return 0; - } - return r->data[r->pos++]; -} - -static inline void reader_skip(byte_reader *r, size_t n) { - r->pos += n; - if (r->pos > r->size) { - r->pos = r->size; - } -} - -static inline void reader_seek(byte_reader *r, size_t pos) { - r->pos = pos; - if (r->pos > r->size) { - r->pos = r->size; - } -} - -static inline size_t reader_pos(const byte_reader *r) { return r->pos; } - -static inline bool reader_eof(const byte_reader *r) { - return r->pos >= r->size; -} - typedef struct { const char *name; // Direct pointer to string int32_t code_offset; // Offset into bytecode section (for functions) or global @@ -94,12 +41,13 @@ typedef struct { imports imports; size_t globals_count; - char *module_name; + + const char *name; } bytecode; -bytecode *load_bytecode(const char *filename, memory *mem); +bytecode *bytecode_load(const char *filename); -void free_bytecode(bytecode *bc); +void bytecode_free(bytecode *bc); /* * Get string from string table by offset diff --git a/virtual_machine/bytecode_util.h b/virtual_machine/bytecode_util.h new file mode 100644 index 000000000..7afeeea72 --- /dev/null +++ b/virtual_machine/bytecode_util.h @@ -0,0 +1,60 @@ +#ifndef BYTECODE_UTIL_H +#define BYTECODE_UTIL_H + +#include +#include +#include + +typedef struct { + const uint8_t *data; + size_t size; + size_t pos; +} byte_reader; + +static inline void reader_init(byte_reader *r, const uint8_t *data, + size_t size) { + r->data = data; + r->size = size; + r->pos = 0; +} + +/* + * Read 32-bit little-endian integer and advance position + */ +static inline int32_t reader_i32(byte_reader *r) { + if (r->pos + 4 > r->size) { + return 0; // TODO: better error handling + } + const uint8_t *p = r->data + r->pos; + r->pos += 4; + return (int32_t)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24)); +} + +static inline uint8_t reader_u8(byte_reader *r) { + if (r->pos >= r->size) { + return 0; + } + return r->data[r->pos++]; +} + +static inline void reader_skip(byte_reader *r, size_t n) { + r->pos += n; + if (r->pos > r->size) { + r->pos = r->size; + } +} + +static inline void reader_seek(byte_reader *r, size_t pos) { + r->pos = pos; + if (r->pos > r->size) { + r->pos = r->size; + } +} + +static inline size_t reader_pos(const byte_reader *r) { return r->pos; } + +static inline bool reader_eof(const byte_reader *r) { + return r->pos >= r->size; +} + +#endif // BYTECODE_UTIL_H diff --git a/virtual_machine/call_stack.c b/virtual_machine/call_stack.c deleted file mode 100644 index 25d9386d6..000000000 --- a/virtual_machine/call_stack.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Call stack management for the Lama VM. - * Tracks function activation records (frames), including return addresses, - * base pointers, and arguments. - */ - -#include "call_stack.h" -#include -#include -#include - -void call_stack_init(call_stack_t *cs) { - cs->top = 0; - memset(cs->frames, 0, sizeof(cs->frames)); -} - -void call_stack_push(call_stack_t *cs, int return_ip, int base, int n_args, - int n_locals, aint closure) { - if (cs->top >= MAX_CALL_DEPTH) { - fprintf(stderr, "Call stack overflow\n"); - exit(1); - } - - call_frame_t *frame = &cs->frames[cs->top++]; - frame->return_ip = return_ip; - frame->base = base; - frame->n_args = n_args; - frame->n_locals = n_locals; - frame->closure = closure; -} - -call_frame_t call_stack_pop(call_stack_t *cs) { - if (cs->top <= 0) { - fprintf(stderr, "Cannot pop from an empty call stack\n"); - exit(1); - } - - return cs->frames[--cs->top]; -} - -call_frame_t call_stack_peek(call_stack_t *cs) { - if (cs->top <= 0) { - fprintf(stderr, "Cannot peek from an empty call stack\n"); - exit(1); - } - - return cs->frames[cs->top - 1]; -} - -call_frame_t *call_stack_current(call_stack_t *cs) { - if (cs->top <= 0) { - return NULL; - } - return &cs->frames[cs->top - 1]; -} - -int call_stack_is_empty(call_stack_t *cs) { return cs->top == 0; } diff --git a/virtual_machine/call_stack.h b/virtual_machine/call_stack.h deleted file mode 100644 index aaea3599e..000000000 --- a/virtual_machine/call_stack.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef CALL_STACK_H -#define CALL_STACK_H - -#include -#include "../runtime/runtime_common.h" - -#define MAX_CALL_DEPTH 1024 - -typedef struct { - int return_ip; - int base; - int n_args; - int n_locals; - aint closure; // 0 if not a closure call -} call_frame_t; - -typedef struct { - call_frame_t frames[MAX_CALL_DEPTH]; - int top; -} call_stack_t; - -void call_stack_init(call_stack_t *cs); - -void call_stack_push(call_stack_t *cs, int return_ip, int base, int n_args, - int n_locals, aint closure); - -call_frame_t call_stack_pop(call_stack_t *cs); - -call_frame_t call_stack_peek(call_stack_t *cs); - -call_frame_t *call_stack_current(call_stack_t *cs); - -int call_stack_is_empty(call_stack_t *cs); - -#endif diff --git a/virtual_machine/da.h b/virtual_machine/da.h index 7902da91b..395189273 100644 --- a/virtual_machine/da.h +++ b/virtual_machine/da.h @@ -1,20 +1,22 @@ #ifndef DA_H #define DA_H +#include "memory.h" + /* * Dynamic array macros */ #define da_append(xs, x) \ do { \ - if (xs.len >= xs.cap) { \ - xs.cap = xs.cap == 0 ? 256 : xs.cap * 2; \ - xs.data = realloc(xs.data, xs.cap * sizeof(*xs.data)); \ - if (!xs.data) { \ + if ((xs).len >= (xs).cap) { \ + (xs).cap = (xs).cap == 0 ? 256 : (xs).cap * 2; \ + (xs).data = EREALLOC((xs).data, (xs).cap * sizeof(*(xs).data)); \ + if (!(xs).data) { \ perror("realloc"); \ - exit(1); \ + exit(EXIT_FAILURE); \ } \ } \ - xs.data[xs.len++] = x; \ + (xs).data[(xs).len++] = x; \ } while (0) #define da_init(xs) \ diff --git a/virtual_machine/decoder.c b/virtual_machine/decoder.c index 0f1d23ffd..db0fcc4e8 100644 --- a/virtual_machine/decoder.c +++ b/virtual_machine/decoder.c @@ -1,53 +1,27 @@ #include "decoder.h" -#include "../runtime/runtime_common.h" -#include "bytecode.h" #include "da.h" -#include "ffi.h" +#include "memory.h" #include "opcodes.h" -#include +#include "ops.h" #include #include #include #include -/* - * Debug macros - */ +// TODO: conolidate #ifdef DEBUG_PRINT #define VM_DEBUG(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) -#define VM_TRACE_STACK(stack) \ - do { \ - long sp_idx = (stack)->sp - (stack)->data; \ - fprintf(stderr, " stack [sp=%p, idx=%ld]: ", (stack)->sp, sp_idx); \ - for (int i = 1; i <= STACK_PEEK_SIZE; i++) { \ - if (sp_idx + i < STACK_SIZE) { \ - fprintf(stderr, "%ld ", (long)(stack)->data[sp_idx + i]); \ - } \ - } \ - fprintf(stderr, "\n"); \ - } while (0) -#define VM_TRACE_CALL(fmt, ...) fprintf(stderr, "[CALL] " fmt, ##__VA_ARGS__) -#define VM_ASSERT(cond, msg) \ - do { \ - if (!(cond)) { \ - fprintf(stderr, "Assert failed: %s at %s:%d\n", msg, __FILE__, \ - __LINE__); \ - exit(1); \ - } \ - } while (0) #else #define VM_DEBUG(fmt, ...) -#define VM_TRACE_STACK(stack) -#define VM_TRACE_CALL(fmt, ...) -#define VM_ASSERT(cond, msg) #endif /* - * Stack manipulation macros (stack grows downwards) + * Sentinel value for external references (both functions and globals). + * Address = -index - 1, so index 0 becomes -1, index 1 becomes -2, etc. */ -#define STACK_PUSH(sp, val) (*sp-- = (val)) -#define STACK_POP(sp) (*++sp) -#define STACK_PEEK(sp) (*(sp + 1)) +#define TO_EXT_REF(idx) (-(idx) - 1) +#define IS_EXT_REF(addr) ((addr) < 0) +#define EXT_REF_INDEX(addr) (-(addr) - 1) /* * Symbolic stack depth tracking macros used during decoding @@ -87,6 +61,12 @@ (ctx)->code[(ctx)->code_len++].target = (t); \ } while (0) +fn decoder_get_op_call(void) { return op_call; } + +fn decoder_get_op_call_ffi_stub(void) { return op_call_ffi_stub; } + +fn decoder_get_op_callc_ffi_stub(void) { return op_callc_ffi_stub; } + typedef struct fixup_node { size_t insn_idx; // Index in code array that needs the jump target struct fixup_node *next; @@ -94,789 +74,65 @@ typedef struct fixup_node { // Metadata for each bytecode offset typedef struct { + insn *insn; // NULL if not visited int32_t resolved_idx; // Index in generated code array (-1 if not visited) int32_t stack_depth; // Expected stack depth (-1 if not visited yet) fixup_node *fixups; // Linked list of forward jumps pointing here } meta_info; -/* - * External runtime functions (runtime.c) - */ -extern aint Lread(void); -extern aint Lwrite(aint n); -extern aint Ls__Infix_43(void *p, void *q); // + -extern aint Ls__Infix_45(void *p, void *q); // - -extern aint Ls__Infix_42(void *p, void *q); // * -extern aint Ls__Infix_47(void *p, void *q); // / -extern aint Ls__Infix_37(void *p, void *q); // % -extern aint Ls__Infix_60(void *p, void *q); // < -extern aint Ls__Infix_6061(void *p, void *q); // <= -extern aint Ls__Infix_62(void *p, void *q); // > -extern aint Ls__Infix_6261(void *p, void *q); // >= -extern aint Ls__Infix_6161(void *p, void *q); // == -extern aint Ls__Infix_3361(void *p, void *q); // != -extern aint Ls__Infix_3838(void *p, void *q); // && -extern aint Ls__Infix_3333(void *p, void *q); // || - -extern aint Llength(void *p); -extern void *Lstring(aint *args); -extern aint LtagHash(char *s); -extern void *Barray(aint *args, aint bn); -extern void *Bsexp(aint *args, aint bn); -extern void *Bclosure(aint *args, aint bn); -extern void *Bstring(aint *args); -extern void *Belem(void *p, aint i); -extern void *Bsta(void *x, aint i, void *v); - -extern aint Btag(void *d, aint t, aint n); -extern aint Barray_patt(void *d, aint n); -extern aint Bstring_patt(void *x, void *y); -extern aint Bclosure_tag_patt(void *x); -extern aint Bboxed_patt(void *x); -extern aint Bunboxed_patt(void *x); -extern aint Barray_tag_patt(void *x); -extern aint Bstring_tag_patt(void *x); -extern aint Bsexp_tag_patt(void *x); - -#define DISPATCH() \ - do { \ - ip++; \ - __attribute__((musttail)) return ip->func(STATE); \ - } while (0) - -#define DISPATCH_JUMP() \ - do { \ - __attribute__((musttail)) return ip->func(STATE); \ - } while (0) - -/* - * Opcode handlers - */ -void op_const(DECL_STATE) { - ip++; - aint val = ip->num; - VM_DEBUG("CONST: %ld\n", (long)val); - STACK_PUSH(sp, BOX(val)); - DISPATCH(); -} - -#define DEFINE_BINOP(name, fn, opname) \ - static void name(DECL_STATE) { \ - aint y = STACK_POP(sp); \ - aint x = STACK_POP(sp); \ - VM_DEBUG(opname ": x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); \ - aint res = fn((void *)x, (void *)y); \ - VM_DEBUG(opname " result=%ld\n", (long)UNBOX(res)); \ - STACK_PUSH(sp, res); \ - DISPATCH(); \ - } - -DEFINE_BINOP(op_add, Ls__Infix_43, "ADD") -DEFINE_BINOP(op_sub, Ls__Infix_45, "SUB") -DEFINE_BINOP(op_mul, Ls__Infix_42, "MUL") -DEFINE_BINOP(op_lt, Ls__Infix_60, "LT") -DEFINE_BINOP(op_le, Ls__Infix_6061, "LE") -DEFINE_BINOP(op_gt, Ls__Infix_62, "GT") -DEFINE_BINOP(op_ge, Ls__Infix_6261, "GE") -DEFINE_BINOP(op_eq, Ls__Infix_6161, "EQ") -DEFINE_BINOP(op_ne, Ls__Infix_3361, "NE") -DEFINE_BINOP(op_and, Ls__Infix_3838, "AND") -DEFINE_BINOP(op_or, Ls__Infix_3333, "OR") - -void symbol_table_init(symbol_table *table) { da_init(*table); } - -void symbol_table_free(symbol_table *table) { da_free(*table); } - -static resolved_symbol *symbol_table_find(symbol_table *table, - const char *name) { - for (size_t i = 0; i < table->len; i++) { - if (strcmp(table->data[i].name, name) == 0) { - return &table->data[i]; - } - } - return NULL; -} - -// TODO: Make two separate functions for functions and globals? -// and structures? -static int symbol_table_add(symbol_table *table, const char *name, - insn *code_ptr, int32_t global_idx, - bool is_function) { - - // TODO: handle main in another way? - if (strcmp(name, "main") != 0) { - resolved_symbol *existing = symbol_table_find(table, name); - // Update with the new definition - if (existing) { - existing->code_ptr = code_ptr; - existing->global_idx = global_idx; - existing->is_function = is_function; - return 0; - } - } - - resolved_symbol entry = { - .name = name, - .code_ptr = code_ptr, - .global_idx = global_idx, - .is_function = is_function, - }; - - symbol_table tmp = *table; - da_append(tmp, entry); - *table = tmp; - - return 0; -} - -/* - * Register sysargs separately because it's not stored explicitly during - * execution - */ -void register_sysargs(symbol_table *table) { - symbol_table_add(table, "global_sysargs", NULL, 0, false); -} - -void ext_func_stub_table_init(ext_func_stub_table *table) { da_init(*table); } - -void ext_func_stub_table_free(ext_func_stub_table *table) { da_free(*table); } - -static insn *ext_func_stub_table_find(ext_func_stub_table *table, - const char *name) { - for (size_t i = 0; i < table->len; i++) { - if (strcmp(table->data[i].name, name) == 0) { - return table->data[i].stub; - } - } - return NULL; -} - -static insn *ext_func_stub_table_add(ext_func_stub_table *table, - const char *name, fn stub_fn, - arena *code_arena) { - insn *stub = ARENA_ALLOC(code_arena, insn, 2); - - char *persistent_name = ARENA_STRDUP(code_arena, name); - - stub[0].func = stub_fn; - stub[1].str = persistent_name; - - ext_func_stub_entry entry = {.name = persistent_name, .stub = stub}; - ext_func_stub_table tmp = *table; - da_append(tmp, entry); - *table = tmp; - - VM_DEBUG("EXT_FUNC_STUB_TABLE: added '%s' -> stub=%p\n", name, (void *)stub); - return stub; -} - -int register_public_symbols(symbol_table *st, insn *code, - public_symbols *public_symbols, - int32_t *offset_to_insn, int32_t global_base) { - - for (size_t i = 0; i < public_symbols->len; i++) { - public_symbol *pub = &public_symbols->data[i]; - - insn *code_ptr = NULL; - int32_t global_idx = 0; - // TODO: ugly - bool is_function = (pub->flag == PUB_FLAG_FUNCTION); - - if (is_function) { - int32_t insn_idx = offset_to_insn[pub->code_offset]; - code_ptr = &code[insn_idx]; - } else { - // Global variable - rebase index with module's global base - global_idx = pub->code_offset + global_base; - } - - symbol_table_add(st, pub->name, code_ptr, global_idx, is_function); - } - return 0; -} - -void op_div(DECL_STATE) { - aint y = STACK_POP(sp); - aint x = STACK_POP(sp); - VM_DEBUG("DIV: x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); - if (UNBOX(y) == 0) { - fprintf(stderr, "Division by zero\n"); - exit(1); - } - aint res = Ls__Infix_47((void *)x, (void *)y); - VM_DEBUG("DIV result=%ld\n", (long)UNBOX(res)); - STACK_PUSH(sp, res); - DISPATCH(); -} - -void op_mod(DECL_STATE) { - aint y = STACK_POP(sp); - aint x = STACK_POP(sp); - VM_DEBUG("MOD: x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); - if (UNBOX(y) == 0) { - fprintf(stderr, "Division by zero\n"); - exit(1); - } - aint res = Ls__Infix_37((void *)x, (void *)y); - VM_DEBUG("MOD result=%ld\n", (long)UNBOX(res)); - STACK_PUSH(sp, res); - DISPATCH(); -} - -void op_drop(DECL_STATE) { - VM_DEBUG("DROP\n"); - sp++; - DISPATCH(); -} - -void op_dup(DECL_STATE) { - aint val = STACK_PEEK(sp); - VM_DEBUG("DUP: %ld\n", (long)UNBOX(val)); - STACK_PUSH(sp, val); - DISPATCH(); -} - -void op_swap(DECL_STATE) { - aint a = STACK_POP(sp); - aint b = STACK_POP(sp); - VM_DEBUG("SWAP: a=%ld, b=%ld\n", (long)UNBOX(a), (long)UNBOX(b)); - STACK_PUSH(sp, a); - STACK_PUSH(sp, b); - DISPATCH(); -} - -void op_elem(DECL_STATE) { - aint idx = STACK_POP(sp); - aint arr = STACK_POP(sp); - VM_DEBUG("ELEM: arr=%p, idx=%ld\n", (void *)arr, (long)UNBOX(idx)); - void *elem = Belem((void *)arr, idx); - STACK_PUSH(sp, (aint)elem); - DISPATCH(); -} - -void op_sta(DECL_STATE) { - aint val = STACK_POP(sp); - aint idx = STACK_POP(sp); - aint arr = STACK_POP(sp); - VM_DEBUG("STA: arr=%p, idx=%ld, val=%ld\n", (void *)arr, (long)UNBOX(idx), - (long)UNBOX(val)); - Bsta((void *)arr, idx, (void *)val); - STACK_PUSH(sp, val); - DISPATCH(); -} - -/* - * Jumps - */ -void op_jmp(DECL_STATE) { - ip++; - VM_DEBUG("JMP: target=%p\n", (void *)ip->target); - ip = ip->target; - DISPATCH_JUMP(); -} - -void op_cjmp_z(DECL_STATE) { - ip++; - insn *target = ip->target; - ip++; - aint val = STACK_POP(sp); - VM_DEBUG("CJMP_Z: val=%ld, target=%p, will_jump=%d\n", (long)UNBOX(val), - (void *)target, UNBOX(val) == 0); - if (UNBOX(val) == 0) { - ip = target; - } - DISPATCH_JUMP(); -} - -void op_cjmp_nz(DECL_STATE) { - ip++; - insn *target = ip->target; - ip++; - aint val = STACK_POP(sp); - VM_DEBUG("CJMP_NZ: val=%ld, target=%p, will_jump=%d\n", (long)UNBOX(val), - (void *)target, UNBOX(val) != 0); - if (UNBOX(val) != 0) { - ip = target; - } - DISPATCH_JUMP(); -} - -/* - * String, data etc. - */ -void op_string(DECL_STATE) { - (void)bp; - (void)globals; - ip++; - const char *str = ip->str; - VM_DEBUG("STRING: \"%s\"\n", str); - void *result = Bstring((void *)&str); - STACK_PUSH(sp, (aint)result); - DISPATCH(); -} - -void op_barray(DECL_STATE) { - (void)bp; - (void)globals; - ip++; - int32_t n = ip->num; - VM_DEBUG("BARRAY: n=%d\n", n); - aint *args_base = sp + 1; - aint tmp_args[256]; - // TODO: optimize for passing direct pointer - // instead of population array - for (int32_t i = 0; i < n; i++) { - tmp_args[i] = args_base[n - 1 - i]; - } - sp += n; - void *arr = Barray(tmp_args, BOX(n)); - STACK_PUSH(sp, (aint)arr); - DISPATCH(); -} - -void op_sexp(DECL_STATE) { - ip++; - const char *tag_str = ip->str; - ip++; - int32_t n_fields = ip->num; - - aint tag_hash = LtagHash((char *)tag_str); - VM_DEBUG("SEXP: tag=\"%s\" (hash=0x%lx), n_fields=%d\n", tag_str, tag_hash, - n_fields); - aint args[256]; - aint *args_base = sp + 1; - // TODO: optimize for passing direct pointer - // instead of population array - for (int32_t i = 0; i < n_fields; i++) { - args[i] = args_base[n_fields - 1 - i]; - } - args[n_fields] = tag_hash; - sp += n_fields; - - void *s = Bsexp(args, BOX(n_fields + 1)); - STACK_PUSH(sp, (aint)s); - DISPATCH(); -} - -void op_tag(DECL_STATE) { - ip++; - const char *tag_str = ip->str; - ip++; - int32_t n_fields = ip->num; - - aint tag_hash = LtagHash((char *)tag_str); - aint val = STACK_POP(sp); - VM_DEBUG("TAG: tag='%s' hash=0x%lx n_fields=%d val=0x%lx\n", tag_str, - (long)tag_hash, n_fields, (long)val); - aint result = Btag((void *)val, tag_hash, BOX(n_fields)); - VM_DEBUG("TAG: result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} - -void op_array(DECL_STATE) { - ip++; - int32_t n = ip->num; - aint val = STACK_POP(sp); - VM_DEBUG("ARRAY: n=%d, val=%p\n", n, (void *)val); - aint result = Barray_patt((void *)val, BOX(n)); - STACK_PUSH(sp, result); - DISPATCH(); -} - -void op_fail(DECL_STATE) { - ip++; - int32_t line = ip->num; - ip++; - int32_t col = ip->num; - VM_DEBUG("FAIL: line=%d, col=%d\n", line, col); - fprintf(stderr, "Match failure at line %d, column %d\n", line, col); - exit(1); -} - -/* - * Pattern matching operations - */ -void op_patt_str_cmp(DECL_STATE) { - aint y = STACK_POP(sp); - aint x = STACK_POP(sp); - VM_DEBUG("PATT_STR_CMP: x=%p, y=%p\n", (void *)x, (void *)y); - aint result = Bstring_patt((void *)x, (void *)y); - VM_DEBUG("PATT_STR_CMP result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} - -void op_patt_string(DECL_STATE) { - aint val = STACK_POP(sp); - VM_DEBUG("PATT_STRING: val=%p\n", (void *)val); - aint result = Bstring_tag_patt((void *)val); - VM_DEBUG("PATT_STRING result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} - -void op_patt_array(DECL_STATE) { - aint val = STACK_POP(sp); - VM_DEBUG("PATT_ARRAY: val=%p\n", (void *)val); - aint result = Barray_tag_patt((void *)val); - VM_DEBUG("PATT_ARRAY result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} - -void op_patt_sexp(DECL_STATE) { - aint val = STACK_POP(sp); - VM_DEBUG("PATT_SEXP: val=%p\n", (void *)val); - aint result = Bsexp_tag_patt((void *)val); - VM_DEBUG("PATT_SEXP result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} - -void op_patt_boxed(DECL_STATE) { - aint val = STACK_POP(sp); - VM_DEBUG("PATT_BOXED: val=%p\n", (void *)val); - aint result = Bboxed_patt((void *)val); - VM_DEBUG("PATT_BOXED result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} - -void op_patt_unboxed(DECL_STATE) { - aint val = STACK_POP(sp); - VM_DEBUG("PATT_UNBOXED: val=%ld\n", (long)val); - aint result = Bunboxed_patt((void *)val); - VM_DEBUG("PATT_UNBOXED result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} - -void op_patt_closure(DECL_STATE) { - aint val = STACK_POP(sp); - VM_DEBUG("PATT_CLOSURE: val=%p\n", (void *)val); - aint result = Bclosure_tag_patt((void *)val); - VM_DEBUG("PATT_CLOSURE result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} - -/* - * Load / store operations - */ -void op_ld_glo(DECL_STATE) { - ip++; - int32_t idx = ip->num; - VM_DEBUG("LD_GLO[%d] = %ld\n", idx, (long)globals[idx]); - STACK_PUSH(sp, globals[idx]); - DISPATCH(); -} - -void op_st_glo(DECL_STATE) { - ip++; - int32_t idx = ip->num; - aint val = STACK_PEEK(sp); - VM_DEBUG("ST_GLO[%d] = %ld\n", idx, (long)val); - globals[idx] = val; - DISPATCH(); -} - -void op_ld_loc(DECL_STATE) { - ip++; - int32_t idx = ip->num; - VM_DEBUG("LD_LOC[%d] bp=%p bp[-idx]=%ld\n", idx, (void *)bp, (long)bp[-idx]); - STACK_PUSH(sp, bp[-idx]); - DISPATCH(); -} - -void op_st_loc(DECL_STATE) { - ip++; - int32_t idx = ip->num; - aint val = STACK_PEEK(sp); - VM_DEBUG("ST_LOC[%d] = %ld bp=%p\n", idx, (long)val, (void *)bp); - bp[-idx] = val; - DISPATCH(); -} - -void op_ld_arg(DECL_STATE) { - ip++; - int32_t idx = ip->num; - int32_t n_args = (int32_t)bp[1]; - aint val = bp[n_args + 1 - idx]; - VM_DEBUG("LD_ARG[%d] n_args=%d bp=%p val=%ld\n", idx, n_args, (void *)bp, - (long)val); - STACK_PUSH(sp, val); - DISPATCH(); -} - -void op_st_arg(DECL_STATE) { - ip++; - int32_t idx = ip->num; - int32_t n_args = (int32_t)bp[1]; - aint val = STACK_PEEK(sp); - VM_DEBUG("ST_ARG[%d] = %ld bp=%p\n", idx, (long)val, (void *)bp); - bp[n_args + 1 - idx] = val; - DISPATCH(); -} - -void op_ld_clo(DECL_STATE) { - ip++; - int32_t idx = ip->num; - int32_t n_args = (int32_t)bp[1]; - aint *closure = (aint *)bp[n_args + 2]; - VM_DEBUG("LD_CLO[%d] closure=%p val=%ld\n", idx, (void *)closure, - (long)closure[idx + 1]); - STACK_PUSH(sp, closure[idx + 1]); - DISPATCH(); -} - -void op_st_clo(DECL_STATE) { - ip++; - int32_t idx = ip->num; - int32_t n_args = (int32_t)bp[1]; - aint val = STACK_PEEK(sp); - aint *closure = (aint *)bp[n_args + 2]; - VM_DEBUG("ST_CLO[%d] = %ld closure=%p\n", idx, (long)val, (void *)closure); - closure[idx + 1] = val; - DISPATCH(); -} - -/* - * Function call operations - */ -void op_begin(DECL_STATE) { - - ip++; - int32_t n_args = ip->num; - (void)n_args; - ip++; - int32_t n_locals = ip->num; - ip++; - - VM_TRACE_CALL("BEGIN n_args=%d n_locals=%d bp=%p sp=%p\n", n_args, n_locals, - (void *)bp, (void *)sp); - - for (int32_t i = 0; i < n_locals; i++) { - STACK_PUSH(sp, 0); - } - - DISPATCH(); -} - -void op_call(DECL_STATE) { - ip++; - insn *target = ip->target; - ip++; - int32_t n_args = ip->num; - - VM_TRACE_CALL("CALL target=%p n_args=%d sp=%p bp=%p\n", (void *)target, - n_args, (void *)sp, (void *)bp); - - STACK_PUSH(sp, (aint)n_args); - STACK_PUSH(sp, (aint)bp); - - aint *new_bp = sp + 1; - target->func(target, sp, new_bp, globals); - - aint ret_val = *new_bp; - - sp = new_bp + n_args + 1; - - STACK_PUSH(sp, ret_val); - DISPATCH(); -} - -void op_module_end(DECL_STATE); - -void op_callc(DECL_STATE) { - ip++; - int32_t n_args = ip->num; - - aint closure_val = *(sp + 1 + n_args); - aint *closure = (aint *)closure_val; - - aint entry = closure[0]; - insn *target = (insn *)entry; - - VM_TRACE_CALL("CALLC closure=%p target=%p n_args=%d sp=%p bp=%p\n", - (void *)closure, (void *)target, n_args, (void *)sp, - (void *)bp); - - STACK_PUSH(sp, (aint)n_args); - STACK_PUSH(sp, (aint)bp); - - aint *new_bp = sp + 1; - target->func(target, sp, new_bp, globals); - - aint ret_val = *new_bp; - VM_DEBUG("CALLC: return value=%ld new_bp=%p\n", (long)ret_val, - (void *)new_bp); - - sp = new_bp + n_args + 2; - - STACK_PUSH(sp, ret_val); - DISPATCH(); -} - -void op_end(DECL_STATE) { - VM_TRACE_CALL("END sp=%p\n", (void *)sp); - aint ret_val = STACK_PEEK(sp); - *bp = ret_val; - // If a module_end bridge follows, jump to it. - // Otherwise, return to finish execution. - insn *next = ip + 1; - if (next && next->func == op_module_end) { - VM_DEBUG("END: jumping to module_end bridge at %p\n", (void *)next); - ip = next; - DISPATCH_JUMP(); - } - VM_DEBUG("END: returning (no module bridge)\n"); - return; -} - -/* - * Closures - */ - -/* - * External function closure stub - called when an external closure is invoked - * via op_callc This stub is generated for each unresolved external closure - * reference. The function name is embedded in the next instruction. - */ -static void op_callc_ext_func_stub(DECL_STATE) { - ip++; - const char *func_name = ip->str; - - int32_t n_args = (int32_t)bp[1]; - - VM_DEBUG("EXT_FUNC_STUB: func='%s' n_args=%d bp=%p\n", func_name, n_args, - (void *)bp); - - aint args[256]; - for (int32_t i = 0; i < n_args; i++) { - args[i] = bp[n_args + 1 - i]; - } - - aint result = ffi_call_c(func_name, args, n_args); - VM_DEBUG("EXT_FUNC_STUB: func='%s' result=%ld\n", func_name, (long)result); - - // Store result in return value slot - *bp = result; - - return; -} -void op_closure(DECL_STATE) { - ip++; - insn *target = ip->target; - ip++; - int32_t n_captured = ip->num; - - VM_DEBUG("CLOSURE: target=%p n_captured=%d\n", (void *)target, n_captured); - - aint tmp_args[256]; - tmp_args[0] = (aint)target; - aint *args_base = sp + 1; - for (int32_t i = 0; i < n_captured; i++) { - tmp_args[i + 1] = args_base[n_captured - 1 - i]; - VM_DEBUG("CLOSURE: captured[%d]=%ld\n", i, (long)tmp_args[i + 1]); - } - sp += n_captured; - - void *closure = Bclosure(tmp_args, BOX(n_captured)); - VM_DEBUG("CLOSURE: created=%p\n", (void *)closure); - STACK_PUSH(sp, (aint)closure); - DISPATCH(); -} - -#ifdef DEBUG_PRINT -void op_line(DECL_STATE) { - ip++; - int32_t line = ip->num; - fprintf(stderr, "LINE %d\n", line); - (void)line; - DISPATCH(); -} -#else -void op_line(DECL_STATE) { - ip++; - DISPATCH(); -} -#endif - -void op_call_ext_func(DECL_STATE) { - ip++; - const char *func_name = ip->str; - ip++; - int32_t n_args = ip->num; - - VM_DEBUG("CALL_EXT_FUNC: func='%s' n_args=%d\n", func_name, n_args); - - aint args[256]; - aint *args_base = sp + 1; - for (int32_t i = 0; i < n_args; i++) { - args[i] = args_base[n_args - 1 - i]; - } - sp += n_args; - - aint result = ffi_call_c(func_name, args, n_args); +typedef struct { + const bytecode *bc; + insn *code; + size_t code_len; + byte_reader reader; + size_t global_offset; + size_t unit_end_idx; - VM_DEBUG("CALL_EXT_FUNC: result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} + struct { + stub *data; + size_t len; + size_t cap; + } stubs; -void op_module_end(DECL_STATE) { - ip++; - insn *next_module = ip->target; + struct { + size_t *data; + size_t len; + size_t cap; + } relocs; - VM_DEBUG("MODULE_END: next_module=%p\n", (void *)next_module); + int32_t *bc_to_insn_map; - if (next_module) { - ip = next_module; - DISPATCH_JUMP(); - } - // If no next module, just fall through (return) - VM_DEBUG("MODULE_END: no next module, returning\n"); - return; -} +} decode_ctx; -decode_ctx *decode_ctx_create(const bytecode *bc, int32_t global_offset, - arena *arena) { - decode_ctx *ctx = ARENA_NEW(arena, decode_ctx); +decode_ctx *decode_ctx_create(const bytecode *bc, int32_t global_offset) { + decode_ctx *ctx = ALLOC(decode_ctx); ctx->bc = bc; - // TODO: ugly? - // Code array will be allocated from arena in decode() - ctx->code_cap = 0; ctx->code = NULL; ctx->code_len = 0; ctx->global_offset = global_offset; - ctx->module_end_idx = (size_t)-1; + ctx->unit_end_idx = -1; + ctx->bc_to_insn_map = NULL; - // Allocate offset map (one entry per bytecode byte) - ctx->offset_map.cap = bc->code_size; - ctx->offset_map.offset_to_insn = ARENA_ALLOC(arena, int32_t, bc->code_size); + da_init(ctx->stubs); - // Initialize all to -1 (unmapped) - for (size_t i = 0; i < bc->code_size; i++) { - ctx->offset_map.offset_to_insn[i] = -1; - } + da_init(ctx->relocs); - // Initialize reader - // TODO: return struct not pass? reader_init(&ctx->reader, bc->code, bc->code_size); return ctx; } -/* - * Decoding - */ -// TODO: /?? +static void add_stub(decode_ctx *ctx, size_t patch_idx, const char *name, + stub_kind kind) { + stub s = {.patch_idx = patch_idx, .name = name, .kind = kind}; + da_append(ctx->stubs, s); +} + static fixup_node *add_fixup(meta_info *meta, size_t target_off, - size_t insn_idx, memory *mem) { - fixup_node *node = ARENA_NEW(mem->tmp, fixup_node); + size_t insn_idx) { + fixup_node *node = ALLOC(fixup_node); if (!node) return NULL; @@ -898,42 +154,46 @@ static bool validate_target_off(const bytecode *bc, size_t target_off, return true; } -static bool emit_ld_glo(decode_ctx *ctx, symbol_table *st, int32_t idx, - size_t global_base) { +/* + * Record that code[insn_idx].target holds a code-array index . + * The linker will convert it to an absolute pointer after copying. + */ +static void emit_target_idx(decode_ctx *ctx, size_t target_code_idx) { + size_t slot = ctx->code_len; + ctx->code[ctx->code_len++].num = (int32_t)target_code_idx; + da_append(ctx->relocs, slot); +} + +static bool emit_ld_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { const bytecode *bc = ctx->bc; if (IS_EXT_REF(idx)) { - // External global: resolve by name (idx is negative string offset) int str_offset = EXT_REF_INDEX(idx); const char *glob_name = bytecode_get_string(bc, str_offset); - resolved_symbol *sym = st ? symbol_table_find(st, glob_name) : NULL; - VM_DEBUG("DECODE: OP_LD external global '%s' resolved to idx=%d\n", - glob_name, sym->global_idx); + VM_DEBUG("DECODE: OP_LD external global '%s' (stub)\n", glob_name); EMIT_FUNC(ctx, op_ld_glo); - EMIT_NUM(ctx, sym->global_idx); + size_t patch_idx = ctx->code_len; + EMIT_NUM(ctx, 0); // placeholder — linker will patch + add_stub(ctx, patch_idx, glob_name, STUB_GLOBAL); } else { - // Local global: add module's global_base EMIT_FUNC(ctx, op_ld_glo); EMIT_NUM(ctx, global_base + idx); } return true; } -static bool emit_st_glo(decode_ctx *ctx, symbol_table *st, int32_t idx, - size_t global_base) { +static bool emit_st_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { const bytecode *bc = ctx->bc; if (IS_EXT_REF(idx)) { - // External global: resolve by name (idx is negative string offset) int str_offset = EXT_REF_INDEX(idx); const char *glob_name = bytecode_get_string(bc, str_offset); - resolved_symbol *sym = st ? symbol_table_find(st, glob_name) : NULL; - VM_DEBUG("DECODE: OP_ST external global '%s' resolved to idx=%d\n", - glob_name, sym->global_idx); + VM_DEBUG("DECODE: OP_ST external global '%s' (stub)\n", glob_name); EMIT_FUNC(ctx, op_st_glo); - EMIT_NUM(ctx, sym->global_idx); + size_t patch_idx = ctx->code_len; + EMIT_NUM(ctx, 0); // placeholder — linker will patch + add_stub(ctx, patch_idx, glob_name, STUB_GLOBAL); } else { - // Local global: add module's global_base EMIT_FUNC(ctx, op_st_glo); EMIT_NUM(ctx, global_base + idx); } @@ -941,11 +201,10 @@ static bool emit_st_glo(decode_ctx *ctx, symbol_table *st, int32_t idx, } /* - * Handle jump target resolution + * Handle jump target resolution (intra-unit only — these are always local) */ static bool handle_jump(decode_ctx *ctx, meta_info *meta, size_t current_bc_off, - int32_t depth, memory *mem) { - // TODO: unsigned ?? + int32_t depth) { int32_t target_off = reader_i32(&ctx->reader); if (!validate_target_off(ctx->bc, target_off, current_bc_off, "JUMP")) { @@ -953,19 +212,20 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, size_t current_bc_off, } size_t my_idx = ctx->code_len; - EMIT_TARGET(ctx, NULL); // placeholder + EMIT_NUM(ctx, 0); // placeholder — will hold code index meta_info *tm = &meta[target_off]; - if (target_off < current_bc_off && tm->resolved_idx != -1) { - // Backward jump - ctx->code[my_idx].target = &ctx->code[tm->resolved_idx]; + if (target_off < (int32_t)current_bc_off && tm->resolved_idx != -1) { + // Backward jump — already resolved, store as index + ctx->code[my_idx].num = tm->resolved_idx; + da_append(ctx->relocs, my_idx); if (depth != -1 && tm->stack_depth != -1 && tm->stack_depth != depth) { fprintf(stderr, "Error: Loop stack mismatch\n"); return false; } } else { - // Forward jump - if (!add_fixup(meta, target_off, my_idx, mem)) { + // Forward jump — add fixup + if (!add_fixup(meta, target_off, my_idx)) { return false; } if (depth != -1) { @@ -980,17 +240,15 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, size_t current_bc_off, return true; } -insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, - memory *mem) { +static insn *decode_internal(decode_ctx *ctx) { const bytecode *bc = ctx->bc; size_t global_base = ctx->global_offset; size_t code_cap = bc->code_size * 16; // TODO: estimate better - insn *code = ARENA_ALLOC(mem->code, insn, code_cap); + insn *code = ALLOC_ARRAY(insn, code_cap); ctx->code = code; - ctx->code_cap = code_cap; - meta_info *meta = ARENA_ALLOC(mem->tmp, meta_info, bc->code_size); + meta_info *meta = ALLOC_ARRAY(meta_info, bc->code_size); // Initialize meta table for (size_t i = 0; i < bc->code_size; i++) { @@ -1000,28 +258,18 @@ insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, } int32_t depth = 0; - // Track if we've seen the first END bool first_end_seen = false; while (!reader_eof(&ctx->reader)) { size_t current_bc_off = reader_pos(&ctx->reader); uint8_t opcode = reader_u8(&ctx->reader); -#ifdef DEBUG_PRINT - if (current_bc_off < 10) { // Only log first 10 instructions - VM_DEBUG("DECODE: bc_off=%zu opcode=0x%02X\n", current_bc_off, opcode); - } -#endif - VM_DEBUG("DECODE: visiting bc_off=%zu opcode=%d code_idx=%zu\n", current_bc_off, opcode, ctx->code_len); meta_info *m = &meta[current_bc_off]; m->resolved_idx = (int32_t)ctx->code_len; - // Update offset map (bytecode offset -> instruction index) - ctx->offset_map.offset_to_insn[current_bc_off] = (int32_t)ctx->code_len; - // Validate stack depth if (depth != -1) { if (m->stack_depth != -1 && m->stack_depth != depth) { @@ -1035,14 +283,14 @@ insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, depth = m->stack_depth; } - // Resolve forward jumps (backpatching) + // Resolve forward jumps (backpatching) — store as index, record relocation for (fixup_node *f = m->fixups; f; f = f->next) { VM_DEBUG("DECODE: Resolving fixup at bc_off=%zu: insn_idx=%zu -> " "code_idx=%zu\n", current_bc_off, f->insn_idx, ctx->code_len); - ctx->code[f->insn_idx].target = &ctx->code[ctx->code_len]; + ctx->code[f->insn_idx].num = (int32_t)ctx->code_len; + da_append(ctx->relocs, f->insn_idx); } - // m->fixups = NULL; switch (opcode) { case OP_CONST: @@ -1118,7 +366,7 @@ insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, case OP_JMP: EMIT_FUNC(ctx, op_jmp); - if (!handle_jump(ctx, meta, current_bc_off, depth, mem)) { + if (!handle_jump(ctx, meta, current_bc_off, depth)) { return NULL; } DEPTH_DEAD(depth); @@ -1127,7 +375,7 @@ insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, case OP_CJMP_Z: DEPTH_POP(depth); EMIT_FUNC(ctx, op_cjmp_z); - if (!handle_jump(ctx, meta, current_bc_off, depth, mem)) { + if (!handle_jump(ctx, meta, current_bc_off, depth)) { return NULL; } break; @@ -1135,7 +383,7 @@ insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, case OP_CJMP_NZ: DEPTH_POP(depth); EMIT_FUNC(ctx, op_cjmp_nz); - if (!handle_jump(ctx, meta, current_bc_off, depth, mem)) { + if (!handle_jump(ctx, meta, current_bc_off, depth)) { return NULL; } break; @@ -1167,13 +415,13 @@ insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, case OP_LD: { DEPTH_PUSH(depth); int32_t idx = reader_i32(&ctx->reader); - emit_ld_glo(ctx, st, idx, global_base); + emit_ld_glo(ctx, idx, global_base); break; } case OP_ST: { int32_t idx = reader_i32(&ctx->reader); - emit_st_glo(ctx, st, idx, global_base); + emit_st_glo(ctx, idx, global_base); break; } @@ -1303,23 +551,11 @@ insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, EMIT_FUNC(ctx, op_patt_closure); break; - case OP_BEGIN: { - int32_t n_args = reader_i32(&ctx->reader); - int32_t n_locals = reader_i32(&ctx->reader); - depth = 0; - EMIT_FUNC(ctx, op_begin); - EMIT_NUM(ctx, n_args); - EMIT_NUM(ctx, n_locals); - EMIT_NUM(ctx, 0); - break; - } - + case OP_BEGIN: case OP_BEGIN_CLOSURE: { int32_t n_args = reader_i32(&ctx->reader); int32_t n_locals = reader_i32(&ctx->reader); depth = 0; - VM_DEBUG("DECODE: OP_BEGIN_CLOSURE n_args=%d n_locals=%d\n", n_args, - n_locals); EMIT_FUNC(ctx, op_begin); EMIT_NUM(ctx, n_args); EMIT_NUM(ctx, n_locals); @@ -1335,19 +571,6 @@ insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, target_raw, n_captured, current_bc_off); bool is_external = IS_EXT_REF(target_raw); - const char *ext_func_name = NULL; - resolved_symbol *ext_sym = NULL; - - if (is_external) { - int str_offset = EXT_REF_INDEX(target_raw); - ext_func_name = bytecode_get_string(bc, str_offset); - VM_DEBUG("DECODE: OP_CLOSURE external name='%s' str_offset=%d\n", - ext_func_name, str_offset); - ext_sym = st ? symbol_table_find(st, ext_func_name) : NULL; - } else if (!validate_target_off(bc, (uint32_t)target_raw, current_bc_off, - "CLOSURE")) { - return NULL; - } // Emit load instructions for each captured variable for (int32_t i = 0; i < n_captured; i++) { @@ -1358,7 +581,7 @@ insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, switch (designation_type) { case 0: // Global DEPTH_PUSH(depth); - emit_ld_glo(ctx, st, idx, global_base); + emit_ld_glo(ctx, idx, global_base); break; case 1: // Local DEPTH_PUSH(depth); @@ -1384,38 +607,38 @@ insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, DEPTH_DEC(depth, n_captured - 1); if (is_external) { - if (ext_sym && ext_sym->is_function && ext_sym->code_ptr) { - // Resolved external - emit regular closure with code pointer - EMIT_FUNC(ctx, op_closure); - EMIT_TARGET(ctx, ext_sym->code_ptr); - EMIT_NUM(ctx, n_captured); - } else { - // Check if we already have a stub for this function - insn *stub = ext_func_stub_table_find(fst, ext_func_name); - if (!stub) { - stub = ext_func_stub_table_add(fst, ext_func_name, - op_callc_ext_func_stub, mem->code); - } - - EMIT_FUNC(ctx, op_closure); - EMIT_TARGET(ctx, stub); - EMIT_NUM(ctx, n_captured); - } + int str_offset = EXT_REF_INDEX(target_raw); + const char *ext_func_name = bytecode_get_string(bc, str_offset); + + VM_DEBUG("DECODE: OP_CLOSURE external name='%s' (stub)\n", + ext_func_name); + + // Emit closure with NULL target placeholder. + // Linker will resolve to inter-unit function or create FFI stub. + EMIT_FUNC(ctx, op_closure); + size_t target_slot = ctx->code_len; + EMIT_TARGET(ctx, NULL); // placeholder + EMIT_NUM(ctx, n_captured); + + // Record stub so linker can resolve + add_stub(ctx, target_slot, ext_func_name, STUB_CLOSURE); } else { uint32_t target_off = (uint32_t)target_raw; - size_t target_slot = ctx->code_len + 1; + if (!validate_target_off(bc, target_off, current_bc_off, "CLOSURE")) { + return NULL; + } + EMIT_FUNC(ctx, op_closure); - EMIT_TARGET(ctx, NULL); + size_t target_slot = ctx->code_len; + EMIT_NUM(ctx, 0); // placeholder — will hold code index EMIT_NUM(ctx, n_captured); - VM_DEBUG("DECODE: OP_CLOSURE internal target_off=%u target_slot=%zu\n", - target_off, target_slot); - meta_info *tm = &meta[target_off]; if (target_off < current_bc_off && tm->resolved_idx != -1) { - ctx->code[target_slot].target = &ctx->code[tm->resolved_idx]; + ctx->code[target_slot].num = tm->resolved_idx; + da_append(ctx->relocs, target_slot); } else { - add_fixup(meta, target_off, target_slot, mem); + add_fixup(meta, target_off, target_slot); } } break; @@ -1434,40 +657,32 @@ insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, int str_offset = EXT_REF_INDEX(target_off); const char *func_name = bytecode_get_string(bc, str_offset); - // Try to resolve from symbol table (external module call) - resolved_symbol *sym = st ? symbol_table_find(st, func_name) : NULL; + VM_DEBUG("DECODE: OP_CALL external '%s' (stub)\n", func_name); - if (sym && sym->is_function && sym->code_ptr) { - VM_DEBUG("DECODE: external module call to '%s' resolved to %p\n", - func_name, (void *)sym->code_ptr); - - EMIT_FUNC(ctx, op_call); - EMIT_TARGET(ctx, sym->code_ptr); - EMIT_NUM(ctx, n_args); - } else { - VM_DEBUG("DECODE: external function call to '%s'\n", func_name); + // To be patched by linker + EMIT_FUNC(ctx, NULL); + size_t name_slot = ctx->code_len; + EMIT_TARGET(ctx, NULL); + EMIT_NUM(ctx, n_args); - EMIT_FUNC(ctx, op_call_ext_func); - EMIT_STR(ctx, func_name); - EMIT_NUM(ctx, n_args); - } + // Record stub — linker decides if it's inter-unit or FFI + add_stub(ctx, name_slot, func_name, STUB_CALL); } else { - // Call between modules if (!validate_target_off(bc, (uint32_t)target_off, current_bc_off, "CALL")) { return NULL; } size_t target_slot = ctx->code_len + 1; EMIT_FUNC(ctx, op_call); - EMIT_TARGET(ctx, NULL); + EMIT_NUM(ctx, 0); // placeholder — will hold code index EMIT_NUM(ctx, n_args); meta_info *tm = &meta[(uint32_t)target_off]; - VM_DEBUG("DECODE: tm->resolved_idx=%d\n", tm->resolved_idx); if ((uint32_t)target_off < current_bc_off && tm->resolved_idx != -1) { - ctx->code[target_slot].target = &ctx->code[tm->resolved_idx]; + ctx->code[target_slot].num = tm->resolved_idx; + da_append(ctx->relocs, target_slot); } else { - add_fixup(meta, (uint32_t)target_off, target_slot, mem); + add_fixup(meta, (uint32_t)target_off, target_slot); } } break; @@ -1485,12 +700,12 @@ insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, EMIT_FUNC(ctx, op_end); DEPTH_DEAD(depth); - // After the first END (main function's end), emit module bridge + // After the first END (main function's end), emit unit bridge if (!first_end_seen) { first_end_seen = true; - VM_DEBUG("DECODE: First END detected, emitting op_module_end bridge\n"); - ctx->module_end_idx = ctx->code_len; - EMIT_FUNC(ctx, op_module_end); + VM_DEBUG("DECODE: First END detected, emitting op_unit_end bridge\n"); + ctx->unit_end_idx = ctx->code_len; + EMIT_FUNC(ctx, op_unit_end); // Will be patched by linker EMIT_TARGET(ctx, NULL); } @@ -1514,8 +729,75 @@ insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, default: fprintf(stderr, "Not yet supported opcode 0x%02X at ip=0x%08zx\n", opcode, reader_pos(&ctx->reader) - 1); + free(meta); return NULL; } } + + // Extract mapping + ctx->bc_to_insn_map = ALLOC_ARRAY(int32_t, bc->code_size); + for (size_t i = 0; i < bc->code_size; i++) { + ctx->bc_to_insn_map[i] = meta[i].resolved_idx; + } + + // Free temporary metadata and fixup nodes + for (size_t i = 0; i < bc->code_size; i++) { + fixup_node *node = meta[i].fixups; + while (node) { + fixup_node *next = node->next; + free(node); + node = next; + } + } + free(meta); + return ctx->code; } + +decoded **decode(bytecode **bc_arr, size_t n) { + typedef struct { + decoded **data; + size_t len; + size_t cap; + } decoded_array; + + decoded_array result; + da_init(result); + + size_t global_offset = 0; + + for (size_t i = 0; i < n; i++) { + decode_ctx *ctx = decode_ctx_create(bc_arr[i], global_offset); + insn *code = decode_internal(ctx); + if (!code) { + fprintf(stderr, "Failed to decode %s\n", bc_arr[i]->name); + return NULL; + } + decoded *dec = ALLOC(decoded); + *dec = (decoded){ + .code = code, + .code_len = ctx->code_len, + .stubs = ctx->stubs.data, + .stubs_len = ctx->stubs.len, + .unit_end_idx = ctx->unit_end_idx, + .bc_to_insn_map = ctx->bc_to_insn_map, + .relocs = ctx->relocs.data, + .relocs_len = ctx->relocs.len, + }; + da_append(result, dec); + global_offset += bc_arr[i]->globals_count; + free(ctx); + } + + return result.data; +} + +void decoded_free(decoded *dec) { + if (dec) { + free(dec->code); + free(dec->stubs); + free(dec->bc_to_insn_map); + free(dec->relocs); + free(dec); + } +} diff --git a/virtual_machine/decoder.h b/virtual_machine/decoder.h index aa3665040..7f7896c32 100644 --- a/virtual_machine/decoder.h +++ b/virtual_machine/decoder.h @@ -2,113 +2,49 @@ #define DECODER_NEW_H #include "../runtime/runtime_common.h" -#include "arena.h" #include "bytecode.h" +#include "bytecode_util.h" +#include "insn.h" #include #include -union insn; - -// State: ip = instruction pointer, sp = stack pointer, bp = base pointer -// bp and globals are marked unused since not all handlers need them -#define DECL_STATE \ - __attribute((unused)) union insn *ip, __attribute__((unused)) aint *sp, \ - __attribute__((unused)) aint *bp, __attribute__((unused)) aint *globals -#define STATE ip, sp, bp, globals - -// Function pointer type for opcode handlers (returns void for tail calls) -typedef void (*fn)(DECL_STATE); - -// Union representing a single threaded code instruction/operand -typedef union insn { - fn func; // Pointer to function - int32_t num; // Integer operand (signed) - const char *str; // String operand (direct pointer) - union insn *target; // Direct jump target (pointer to insn) -} insn; +typedef enum { + STUB_CALL, + STUB_CLOSURE, + STUB_GLOBAL, +} stub_kind; /* - * Sentinel value for external references (both functions and globals). - * Address = -index - 1, so index 0 becomes -1, index 1 becomes -2, etc. - */ -#define TO_EXT_REF(idx) (-(idx) - 1) -#define IS_EXT_REF(addr) ((addr) < 0) -#define EXT_REF_INDEX(addr) (-(addr) - 1) - -/* - * Resolved symbol structure - represents a function or global variable that has - * been resolved during decoding. + * A single fixup record emitted by the decoder for the linker to resolve. */ typedef struct { - const char *name; // Symbol name (points into bytecode's string table) - insn *code_ptr; // For functions: pointer to first instruction - int32_t global_idx; // For globals: rebased global index - bool is_function; // true = function, false = global variable - const char *module_name; // Module that defined this symbol -} resolved_symbol; + size_t patch_idx; // Index into code array + const char *name; // Symbol name to look up + stub_kind kind; +} stub; /* - * Maps symbol names to resolved symbols (functions or globals). - * Used for resolving imports and external references during decoding. + * Result of decoding a single unit. */ typedef struct { - resolved_symbol *data; - size_t len; - size_t cap; -} symbol_table; - -typedef struct { - const char *name; // Function name (points into bytecode string table or dup) - insn *stub; // Pointer to 2-insn stub: [op_callc_ext_stub][name_str] -} ext_func_stub_entry; + insn *code; // Decoded threaded code array + size_t code_len; + stub *stubs; // Fixups for the linker to resolve + size_t stubs_len; + size_t unit_end_idx; // Index of op_unit_end in code[] (-1 if none) + int32_t *bc_to_insn_map; + size_t *relocs; // Indices of insn with internal target offsets + size_t relocs_len; +} decoded; -/* - * Cache of generated stubs for unresolved external function references. - */ -typedef struct { - ext_func_stub_entry *data; - size_t len; - size_t cap; -} ext_func_stub_table; +decoded **decode(bytecode **bc_arr, size_t n); +void decoded_free(decoded *dec); /* - * Mapping from bytecode offsets to instruction indices in the decoded code + * Used for patching */ -typedef struct { - int32_t *offset_to_insn; // offset_to_insn[bytecode_offset] = insn_index - size_t cap; // Size of the mapping array (= bytecode size) -} offset_map; - -typedef struct { - - const bytecode *bc; - - insn *code; // Output threaded code - size_t code_cap; - size_t code_len; - - byte_reader reader; - offset_map offset_map; - - size_t global_offset; // Offset for global variables - - size_t module_end_idx; // Pointer to this module's op_module_end instruction - // for linking (initialized to -1 if not found) - -} decode_ctx; - -void symbol_table_init(symbol_table *table); -void symbol_table_free(symbol_table *table); -void register_sysargs(symbol_table *table); -void ext_func_stub_table_init(ext_func_stub_table *table); -int register_public_symbols(symbol_table *st, insn *code, - public_symbols *public_symbols, - int32_t *offset_to_insn, int32_t global_base); - -decode_ctx *decode_ctx_create(const bytecode *bc, int32_t global_offset, - arena *arena); - -insn *decode(decode_ctx *ctx, symbol_table *st, ext_func_stub_table *fst, - memory *mem); +fn decoder_get_op_call(void); +fn decoder_get_op_call_ffi_stub(void); +fn decoder_get_op_callc_ffi_stub(void); #endif // DECODER_NEW_H diff --git a/virtual_machine/ffi.c b/virtual_machine/ffi.c index 421c01aa1..e3915cbef 100644 --- a/virtual_machine/ffi.c +++ b/virtual_machine/ffi.c @@ -5,6 +5,8 @@ #include "ffi.h" #include "../runtime/runtime_common.h" +#include "da.h" +#include "memory.h" #include #include #include @@ -12,6 +14,50 @@ #include #include +struct ffi_call_table { + ffi_call_stub *data; + size_t len; + size_t cap; +}; + +ffi_call_table *ffi_call_table_create(void) { + ffi_call_table *table = ALLOC(ffi_call_table); + da_init(*table); + return table; +} + +// Currently frees only table and not stubs themselves since they are needed for +// execution +void ffi_call_table_destroy(ffi_call_table *table) { + da_free(*table); + free(table); +} + +insn *ffi_call_table_find(ffi_call_table *table, const char *name) { + for (size_t i = 0; i < table->len; i++) { + if (strcmp(table->data[i].name, name) == 0) { + return table->data[i].stub; + } + } + return NULL; +} + +insn *ffi_call_table_add(ffi_call_table *table, const char *name, fn stub_fn) { + insn *stub = ALLOC_ARRAY(insn, 2); + + char *persistent_name = ESTRDUP(name); + + stub[0].func = stub_fn; + stub[1].str = persistent_name; + + ffi_call_stub entry = {.name = persistent_name, .stub = stub}; + da_append(*table, entry); + + // VM_DEBUG("EXT_FUNC_STUB_TABLE: added '%s' -> stub=%p\n", name, (void + // *)stub); + return stub; +} + // TODO: ugly? typedef struct { const char *lama_name; @@ -35,7 +81,7 @@ static const func_metadata func_table[] = { {"Lsprintf", "Bsprintf", false, 1}, // Sentinel - {NULL, NULL, false, 0, NULL}}; + {NULL, NULL, false, 0}}; // TODO: cache? static void *lookup_function(const char *name) { @@ -64,7 +110,7 @@ static aint call_args_array_function(const char *name, aint *args) { void *fn = lookup_function(name); if (!fn) { fprintf(stderr, "Undefined external function: %s\n", name); - exit(1); + exit(EXIT_FAILURE); } ffi_cif cif; @@ -76,7 +122,7 @@ static aint call_args_array_function(const char *name, aint *args) { ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 1, &ffi_type_pointer, arg_types); if (status != FFI_OK) { fprintf(stderr, "FFI prep failed for '%s': status=%d\n", name, status); - exit(1); + exit(EXIT_FAILURE); } ffi_call(&cif, FFI_FN(fn), &result, arg_values); @@ -92,13 +138,13 @@ static aint call_variadic_function(const char *target_name, int fixed_args, void *fn = lookup_function(target_name); if (!fn) { fprintf(stderr, "Undefined external function: %s\n", target_name); - exit(1); + exit(EXIT_FAILURE); } if (n_args < fixed_args) { fprintf(stderr, "FFI call '%s': expected at least %d args, got %d\n", target_name, fixed_args, n_args); - exit(1); + exit(EXIT_FAILURE); } ffi_cif cif; @@ -125,7 +171,7 @@ static aint call_variadic_function(const char *target_name, int fixed_args, if (status != FFI_OK) { fprintf(stderr, "FFI prep failed for '%s': status=%d\n", target_name, status); - exit(1); + exit(EXIT_FAILURE); } ffi_call(&cif, FFI_FN(fn), &result, arg_values); @@ -136,7 +182,7 @@ static aint call_regular_function(const char *name, aint *args, int n_args) { void *fn = lookup_function(name); if (!fn) { fprintf(stderr, "Undefined external function: %s\n", name); - exit(1); + exit(EXIT_FAILURE); } ffi_cif cif; @@ -154,7 +200,7 @@ static aint call_regular_function(const char *name, aint *args, int n_args) { if (status != FFI_OK) { fprintf(stderr, "FFI prep failed for '%s': status=%d\n", name, status); - exit(1); + exit(EXIT_FAILURE); } ffi_call(&cif, FFI_FN(fn), &result, arg_values); diff --git a/virtual_machine/ffi.h b/virtual_machine/ffi.h index 2ebc4b37b..345f75106 100644 --- a/virtual_machine/ffi.h +++ b/virtual_machine/ffi.h @@ -2,12 +2,28 @@ #define FFI_CALL_H #include "../runtime/runtime_common.h" +#include "insn.h" #include /* - * Call an external function by name using libffi. + * FFI call by name using libffi. * */ aint ffi_call_c(const char *name, aint *args, int n_args); -#endif +typedef struct { + const char *name; // Function name + insn *stub; // Pointer to insn-stub +} ffi_call_stub; + +/* + * Cache of generated stubs for unresolved FFI references. + */ +typedef struct ffi_call_table ffi_call_table; + +ffi_call_table *ffi_call_table_create(void); +void ffi_call_table_destroy(ffi_call_table *table); +insn *ffi_call_table_find(ffi_call_table *table, const char *name); +insn *ffi_call_table_add(ffi_call_table *table, const char *name, fn stub_fn); + +#endif // FFI_CALL_H diff --git a/virtual_machine/insn.h b/virtual_machine/insn.h new file mode 100644 index 000000000..3c0419c2e --- /dev/null +++ b/virtual_machine/insn.h @@ -0,0 +1,31 @@ +/* + * Internal instruction representation and VM state definitions. + */ + +#ifndef INSN_H +#define INSN_H + +#include "../runtime/runtime_common.h" +#include + +union insn; + +// State: ip = instruction pointer, sp = stack pointer, bp = base pointer +// bp and globals are marked unused since not all handlers need them +#define DECL_STATE \ + __attribute((unused)) union insn *ip, __attribute__((unused)) aint *sp, \ + __attribute__((unused)) aint *bp, __attribute__((unused)) aint *globals +#define STATE ip, sp, bp, globals + +// Function pointer type for opcode handlers (returns void for tail calls) +typedef void (*fn)(DECL_STATE); + +// Union representing a single threaded code instruction/operand +typedef union insn { + fn func; // Pointer to function + int32_t num; // Integer operand (signed) + const char *str; // String operand (direct pointer) + union insn *target; // Direct jump target (pointer to insn) +} insn; + +#endif // INSN_H diff --git a/virtual_machine/interpreter.c b/virtual_machine/interpreter.c deleted file mode 100755 index 0a05b014b..000000000 --- a/virtual_machine/interpreter.c +++ /dev/null @@ -1,658 +0,0 @@ -/* - * Core bytecode interpreter for the Lama VM. - * Implements the fetch-decode-execute loop for all supported opcodes. - * Manages the data stack, call stack, and interacts with the C runtime. - */ - -#include "../runtime/gc.h" -#include "../runtime/runtime_common.h" -#include "bytecode.h" -#include "call_stack.h" -#include "opcodes.h" -#include "stack.h" -#include -#include -#include - -#ifdef DEBUG_PRINT -#define STACK_PEEK_SIZE 5 -#define VM_DEBUG(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) -#define VM_TRACE_OP(opcode, ip) \ - fprintf(stderr, "ip: 0x%08X opcode: %s (0x%02X)\n", (ip), \ - opcode_to_string(opcode), (opcode)) -#define VM_TRACE_STACK(stack) \ - do { \ - long sp_idx = (stack)->sp - (stack)->data; \ - fprintf(stderr, "stack [sp=%p, idx=%ld]: ", (stack)->sp, sp_idx); \ - for (int i = 1; i <= STACK_PEEK_SIZE; i++) { \ - if (sp_idx + i < STACK_SIZE) { \ - fprintf(stderr, "%ld ", (long)(stack)->data[sp_idx + i]); \ - } \ - } \ - fprintf(stderr, "\n"); \ - } while (0) -#define VM_TRACE_CALL(fmt, ...) fprintf(stderr, "[CALL] " fmt, ##__VA_ARGS__) -#define VM_ASSERT(cond, msg) \ - do { \ - if (!(cond)) { \ - fprintf(stderr, "Assert failed: %s at %s:%d\n", msg, __FILE__, \ - __LINE__); \ - exit(1); \ - } \ - } while (0) -#else -#define VM_DEBUG(fmt, ...) -#define VM_TRACE_OP(opcode, ip) -#define VM_TRACE_STACK(stack) -#define VM_TRACE_CALL(fmt, ...) -#define VM_ASSERT(cond, msg) -#endif - -static aint pending_closure = 0; - -void *__start_custom_data; -void *__stop_custom_data; - -extern void __init(void); - -extern aint Lread(void); -extern aint Lwrite(aint n); -extern aint Ls__Infix_43(void *p, void *q); -extern aint Ls__Infix_45(void *p, void *q); -extern aint Ls__Infix_42(void *p, void *q); -extern aint Ls__Infix_47(void *p, void *q); -extern aint Ls__Infix_37(void *p, void *q); -extern aint Ls__Infix_60(void *p, void *q); -extern aint Ls__Infix_6061(void *p, void *q); -extern aint Ls__Infix_62(void *p, void *q); -extern aint Ls__Infix_6261(void *p, void *q); -extern aint Ls__Infix_6161(void *p, void *q); -extern aint Ls__Infix_3361(void *p, void *q); -extern aint Ls__Infix_3838(void *p, void *q); -extern aint Ls__Infix_3333(void *p, void *q); - -extern aint Llength(void *p); -extern void *Lstring(aint *args); -extern aint LtagHash(char *s); -extern void *Barray(aint *args, aint bn); -extern void *Bsexp(aint *args, aint bn); -extern void *Bclosure(aint *args, aint bn); -extern void *Bstring(aint *args); -extern void *Belem(void *p, aint i); -extern void *Bsta(void *x, aint i, void *v); - -extern aint Btag(void *d, aint t, aint n); -extern aint Barray_patt(void *d, aint n); -extern aint Bstring_patt(void *x, void *y); -extern aint Bclosure_tag_patt(void *x); -extern aint Bboxed_patt(void *x); -extern aint Bunboxed_patt(void *x); -extern aint Barray_tag_patt(void *x); -extern aint Bstring_tag_patt(void *x); -extern aint Bsexp_tag_patt(void *x); - -/** - * Retrieves a pointer to a local variable in the current stack frame. - * Locals are stored below the arguments in the stack. - */ -static inline aint *get_local(stack_t *stack, call_frame_t *frame, int idx) { - return &stack->data[frame->base - frame->n_args - idx]; -} - -/** - * Retrieves a pointer to an argument in the current stack frame. - */ -static inline aint *get_arg(stack_t *stack, call_frame_t *frame, int idx) { - return &stack->data[frame->base - idx]; -} - -/** - * Retrieves a pointer to a variable stored in a closure's environment. - */ -static inline aint *get_closure_var(call_frame_t *frame, int idx) { - data *closure_data = TO_DATA(frame->closure); - aint *contents = (aint *)closure_data->contents; - // +1 because contents[0] is the entry point - return &contents[idx + 1]; -} - -static aint read_designation(stack_t *stack, call_frame_t *frame, aint *globals, - const uint8_t *code, int *ip_ptr) { - uint8_t type_byte = code[(*ip_ptr)++]; - int idx = read_i32(code, *ip_ptr); - *ip_ptr += 4; - - int designation_type = type_byte & 0xF; - switch (designation_type) { - case 0: - return globals[idx]; - case 1: - return *get_local(stack, frame, idx); - case 2: - return *get_arg(stack, frame, idx); - case 3: - return *get_closure_var(frame, idx); - default: - fprintf(stderr, "Unknown designation type: %d\n", designation_type); - exit(1); - } -} - -/** - * The main execution loop of the virtual machine. - * Consumes bytecode and updates the stack and call stack accordingly. - */ -void run(bytecode *bc) { - stack_t stack; - call_stack_t call_stack; - stack_init(&stack); - call_stack_init(&call_stack); - - // gc initialization - __init(); - - aint *globals = stack.data; - // space for globals - // TODO: might not be the place to store globals - for (int i = 0; i < bc->globals_count; i++) { - stack_push(&stack, 0); - } - - int ip = bc->entry_point; - int return_ip = -1; - - while (ip < bc->code_size) { - uint8_t opcode = bc->code[ip++]; - int l = opcode & 0xF; - - VM_TRACE_OP(opcode, ip - 1); - VM_TRACE_STACK(&stack); - - switch (opcode) { - case OP_CONST: { - int n = read_i32(bc->code, ip); - ip += 4; - VM_DEBUG("CONST: %d\n", n); - stack_push(&stack, BOX(n)); - break; - } - case OP_BINOP_ADD: - case OP_BINOP_SUB: - case OP_BINOP_MUL: - case OP_BINOP_DIV: - case OP_BINOP_MOD: - case OP_BINOP_EQ: - case OP_BINOP_NE: - case OP_BINOP_LT: - case OP_BINOP_LE: - case OP_BINOP_GT: - case OP_BINOP_GE: - case OP_BINOP_AND: - case OP_BINOP_OR: { - aint y = stack_pop(&stack); - aint x = stack_pop(&stack); - aint result; - switch (l) { - case 1: // + - result = Ls__Infix_43((void *)x, (void *)y); - break; - case 2: // - - result = Ls__Infix_45((void *)x, (void *)y); - break; - case 3: // * - result = Ls__Infix_42((void *)x, (void *)y); - break; - case 4: // / - if (UNBOX(y) == 0) { - fprintf(stderr, "Division by zero\n"); - goto end; - } - result = Ls__Infix_47((void *)x, (void *)y); - break; - case 5: // % - if (UNBOX(y) == 0) { - fprintf(stderr, "Division by zero\n"); - goto end; - } - result = Ls__Infix_37((void *)x, (void *)y); - break; - case 6: // < - result = Ls__Infix_60((void *)x, (void *)y); - break; - case 7: // <= - result = Ls__Infix_6061((void *)x, (void *)y); - break; - case 8: // > - result = Ls__Infix_62((void *)x, (void *)y); - break; - case 9: // >= - result = Ls__Infix_6261((void *)x, (void *)y); - break; - case 10: // == - result = Ls__Infix_6161((void *)x, (void *)y); - break; - case 11: // != - result = Ls__Infix_3361((void *)x, (void *)y); - break; - case 12: // && - result = Ls__Infix_3838((void *)x, (void *)y); - break; - case 13: // !! - result = Ls__Infix_3333((void *)x, (void *)y); - break; - } - stack_push(&stack, result); - break; - } - case OP_JMP: { - int addr = read_i32(bc->code, ip); - ip = addr; - break; - } - case OP_CJMP_Z: { - int addr = read_i32(bc->code, ip); - ip += 4; - aint val = stack_pop(&stack); - if (UNBOX(val) == 0) { - ip = addr; - } - break; - } - case OP_CJMP_NZ: { - int addr = read_i32(bc->code, ip); - ip += 4; - aint val = stack_pop(&stack); - if (UNBOX(val) != 0) { - ip = addr; - } - break; - } - // TODO: unify ld and st - case OP_LD: { - int idx = read_i32(bc->code, ip); - ip += 4; - aint val = globals[idx]; - VM_DEBUG("LD global[%d] = %ld\n", idx, val); - stack_push(&stack, val); - break; - } - case OP_LD_LOC: { - int idx = read_i32(bc->code, ip); - ip += 4; - call_frame_t *frame = call_stack_current(&call_stack); - aint val = *get_local(&stack, frame, idx); - VM_DEBUG("LD_LOC local[%d] = %ld\n", idx, val); - stack_push(&stack, val); - break; - } - case OP_LD_ARG: { - int idx = read_i32(bc->code, ip); - ip += 4; - call_frame_t *frame = call_stack_current(&call_stack); - aint val = *get_arg(&stack, frame, idx); - VM_DEBUG("LD_ARG arg[%d] = %ld\n", idx, val); - stack_push(&stack, val); - break; - } - case OP_LD_CLO: { - int idx = read_i32(bc->code, ip); - ip += 4; - call_frame_t *frame = call_stack_current(&call_stack); - aint val = *get_closure_var(frame, idx); - VM_DEBUG("LD_CLO closure[%d] = %ld\n", idx, val); - stack_push(&stack, val); - break; - } - case OP_ST: { - int idx = read_i32(bc->code, ip); - ip += 4; - aint val = stack_pop(&stack); - VM_DEBUG("ST global[%d] = %ld\n", idx, val); - globals[idx] = val; - stack_push(&stack, val); - break; - } - case OP_ST_LOC: { - int idx = read_i32(bc->code, ip); - ip += 4; - call_frame_t *frame = call_stack_current(&call_stack); - aint val = stack_pop(&stack); - VM_DEBUG("ST_LOC local[%d] = %ld\n", idx, val); - *get_local(&stack, frame, idx) = val; - stack_push(&stack, val); - break; - } - case OP_ST_ARG: { - int idx = read_i32(bc->code, ip); - ip += 4; - call_frame_t *frame = call_stack_current(&call_stack); - aint val = stack_pop(&stack); - VM_DEBUG("ST_ARG arg[%d] = %ld\n", idx, val); - *get_arg(&stack, frame, idx) = val; - stack_push(&stack, val); - break; - } - case OP_ST_CLO: { - int idx = read_i32(bc->code, ip); - ip += 4; - call_frame_t *frame = call_stack_current(&call_stack); - aint val = stack_pop(&stack); - VM_DEBUG("ST_CLO closure[%d] = %ld\n", idx, val); - *get_closure_var(frame, idx) = val; - stack_push(&stack, val); - break; - } - case OP_DROP: - stack_pop(&stack); - break; - case OP_DUP: - stack_dup(&stack); - break; - case OP_SWAP: - stack_swap(&stack); - break; - // TODO: possibly unify as well - case OP_BEGIN: { - int n_args = read_i32(bc->code, ip); - ip += 4; - int n_locals = read_i32(bc->code, ip); - ip += 4; - VM_TRACE_CALL("BEGIN n_args=%d n_locals=%d\n", n_args, n_locals); - - // base points to arg0 (highest address of args) - int base = (stack.sp - stack.data) + n_args; - - // space for locals - for (int i = 0; i < n_locals; i++) { - stack_push(&stack, 0); - } - - call_stack_push(&call_stack, return_ip, base, n_args, n_locals, 0); - break; - } - case OP_BEGIN_CLOSURE: { - int n_args = read_i32(bc->code, ip); - ip += 4; - int n_locals = read_i32(bc->code, ip); - ip += 4; - VM_TRACE_CALL("BEGIN_CLOSURE n_args=%d n_locals=%d\n", n_args, n_locals); - - // CALLC already shifted args and removed closure from stack - int base = (stack.sp - stack.data) + n_args; - aint closure = pending_closure; - - // space for locals - for (int i = 0; i < n_locals; i++) { - stack_push(&stack, 0); - } - - call_stack_push(&call_stack, return_ip, base, n_args, n_locals, closure); - break; - } - case OP_CLOSURE: { - // addr:32 n_captured:32 [type:8 idx:32]... - int addr = read_i32(bc->code, ip); - ip += 4; - int n_captured = read_i32(bc->code, ip); - ip += 4; - - VM_DEBUG("CLOSURE addr=0x%08X n_captured=%d\n", addr, n_captured); - - aint args[n_captured + 1]; - args[0] = BOX(addr); - - for (int i = 0; i < n_captured; i++) { - aint val = read_designation(&stack, call_stack_current(&call_stack), - globals, bc->code, &ip); - VM_DEBUG("Captured[%d] = %ld\n", i, val); - args[i + 1] = val; - } - - void *closure = Bclosure(args, BOX(n_captured + 1)); - stack_push(&stack, (aint)closure); - break; - } - case OP_CALLC: { - int n_args = read_i32(bc->code, ip); - ip += 4; - - // stack: [... closure arg0 arg1 ... argN-1] - int base = (stack.sp - stack.data) + n_args + 1; - aint closure = stack.data[base]; - - // save closure for BEGIN_CLOSURE to retrieve - pending_closure = closure; - - // shift args over closure slot, removing closure from stack - for (int i = 0; i < n_args; i++) { - stack.data[base - i] = stack.data[base - i - 1]; - } - stack.sp++; - - aint entry_point = UNBOX(((aint *)closure)[0]); - VM_TRACE_CALL("CALLC n_args=%d closure=0x%lx entry=0x%lx\n", n_args, - closure, entry_point); - return_ip = ip; - ip = entry_point; - break; - } - case OP_CALL: { - int addr = read_i32(bc->code, ip); - ip += 4; - // discarding n_args - ip += 4; - VM_TRACE_CALL("CALL addr=0x%08X\n", addr); - return_ip = ip; - ip = addr; - break; - } - case OP_RET: - case OP_END: { - if (call_stack_is_empty(&call_stack)) { - goto end; - } - call_frame_t frame = call_stack_pop(&call_stack); - - int current_top = stack.sp - stack.data; - int returns_start = frame.base - frame.n_args - frame.n_locals; - int n_returns = returns_start - current_top; - - if (n_returns <= 0) { - n_returns = 0; - } else { - for (int i = 0; i < n_returns; i++) { - // TODO: make stack function for this - stack.data[frame.base - i] = stack.data[returns_start - i]; - } - } - - // sp points to empty slot below the return values - stack.sp = stack.data + frame.base - n_returns; - if (frame.return_ip < 0) { - goto end; - } - ip = frame.return_ip; - break; - } - - case OP_READ: { - stack_push(&stack, Lread()); - break; - } - case OP_WRITE: { - aint val = stack_pop(&stack); - stack_push(&stack, Lwrite(val)); - break; - } - case OP_STRING: { - // push string from string table onto stack - int str_offset = read_i32(bc->code, ip); - ip += 4; - const char *src = bc->string_table + str_offset; - void *str = Bstring((void *)&src); - stack_push(&stack, (aint)str); - break; - } - case OP_ELEM: { - // [top --> index, array] -> [element] - aint idx = stack_pop(&stack); - aint arr = stack_pop(&stack); - void *elem = Belem((void *)arr, idx); - stack_push(&stack, (aint)elem); - break; - } - case OP_STA: { - // TODO: support string (two operands) - aint val = stack_pop(&stack); - aint idx = stack_pop(&stack); - aint arr = stack_pop(&stack); - Bsta((void *)arr, idx, (void *)val); - stack_push(&stack, val); - break; - } - case OP_LENGTH: { - aint val = stack_pop(&stack); - aint len = Llength((void *)val); - stack_push(&stack, len); - break; - } - case OP_LSTRING: { - aint val = stack_pop(&stack); - void *str = Lstring(&val); - stack_push(&stack, (aint)str); - break; - } - case OP_BARRAY: { - int n = read_i32(bc->code, ip); - ip += 4; - aint args[n]; - for (int i = n - 1; i >= 0; i--) { - args[i] = stack_pop(&stack); - } - void *arr = Barray(args, BOX(n)); - stack_push(&stack, (aint)arr); - break; - } - case OP_SEXP: { - int tag_offset = read_i32(bc->code, ip); - ip += 4; - int n_fields = read_i32(bc->code, ip); - ip += 4; - const char *tag_str = bc->string_table + tag_offset; - aint tag_hash = LtagHash((char *)tag_str); - aint args[n_fields + 1]; - for (int i = n_fields - 1; i >= 0; i--) { - args[i] = stack_pop(&stack); - } - args[n_fields] = tag_hash; - - void *s = Bsexp(args, BOX(n_fields + 1)); - stack_push(&stack, (aint)s); - break; - } - case OP_TAG: { - int tag_offset = read_i32(bc->code, ip); - ip += 4; - int n_fields = read_i32(bc->code, ip); - ip += 4; - const char *tag_str = bc->string_table + tag_offset; - aint tag_hash = LtagHash((char *)tag_str); - aint val = stack_pop(&stack); - aint result = Btag((void *)val, tag_hash, BOX(n_fields)); - stack_push(&stack, result); - break; - } - case OP_ARRAY: { - int n = read_i32(bc->code, ip); - ip += 4; - aint val = stack_pop(&stack); - aint result = Barray_patt((void *)val, BOX(n)); - stack_push(&stack, result); - break; - } - case OP_FAIL: { - int line = read_i32(bc->code, ip); - ip += 4; - int col = read_i32(bc->code, ip); - ip += 4; - fprintf(stderr, "Match failure at line %d, column %d\n", line, col); - goto end; - } - case OP_PATT_STR_CMP: { - aint y = stack_pop(&stack); - aint x = stack_pop(&stack); - aint result = Bstring_patt((void *)x, (void *)y); - stack_push(&stack, result); - break; - } - case OP_PATT_STRING: { - aint val = stack_pop(&stack); - aint result = Bstring_tag_patt((void *)val); - stack_push(&stack, result); - break; - } - case OP_PATT_ARRAY: { - aint val = stack_pop(&stack); - aint result = Barray_tag_patt((void *)val); - stack_push(&stack, result); - break; - } - case OP_PATT_SEXP: { - aint val = stack_pop(&stack); - aint result = Bsexp_tag_patt((void *)val); - stack_push(&stack, result); - break; - } - case OP_PATT_BOXED: { - aint val = stack_pop(&stack); - aint result = Bboxed_patt((void *)val); - stack_push(&stack, result); - break; - } - case OP_PATT_UNBOXED: { - aint val = stack_pop(&stack); - aint result = Bunboxed_patt((void *)val); - stack_push(&stack, result); - break; - } - case OP_PATT_CLOSURE: { - aint val = stack_pop(&stack); - aint result = Bclosure_tag_patt((void *)val); - stack_push(&stack, result); - break; - } - case OP_HALT: - goto end; - case OP_LINE: - ip += 4; - break; - default: - fprintf(stderr, "Not yet supported opcode 0x%02X at ip=0x%08x\n", opcode, - ip - 1); - goto end; - } - } - -end: - return; -} - -/** - * Entry point for the VM. Loads bytecode from a file and starts execution. - */ -int main(int argc, char *argv[]) { - if (argc < 2) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return 1; - } - - bytecode *bc = load_bytecode(argv[1]); - if (!bc) { - return 1; - } - - run(bc); - - free_bytecode(bc); - return 0; -} diff --git a/virtual_machine/lama.c b/virtual_machine/lama.c index 8df7345bd..1e208dbe0 100644 --- a/virtual_machine/lama.c +++ b/virtual_machine/lama.c @@ -1,7 +1,5 @@ #define _POSIX_C_SOURCE 200809L -#include "../runtime/gc.h" -#include "../runtime/runtime_common.h" #include "vm.h" #include #include @@ -11,15 +9,13 @@ #define MAX_INCLUDE_PATHS 64 -extern void set_args(aint argc, char *argv[]); - static void print_usage(const char *prog_name) { printf("Usage: %s [options] [args]\n", prog_name); printf("\nWhen no options are specified, the VM will run the bytecode file " - "and look for modules in the same directory.\n"); + "and look for units in the same directory.\n"); printf("Options:\n"); printf(" -h, --help Show this help message\n"); - printf(" -I, --include PATH Add PATH to module search paths (can be " + printf(" -I, --include PATH Add PATH to unit search paths (can be " "used multiple times)\n"); } @@ -29,7 +25,6 @@ int main(int argc, char *argv[]) { // TODO: better error handling in general int exit_code = 0; char *bytecode_dir = NULL; - search_paths paths = {0}; static struct option long_options[] = {{"help", no_argument, 0, 'h'}, {"include", required_argument, 0, 'I'}, @@ -53,13 +48,7 @@ int main(int argc, char *argv[]) { return 1; } break; - case '?': - if (optopt) { - fprintf(stderr, "Invalid command line specifier ('-%c')\n", optopt); - return 1; - } default: - fprintf(stderr, "Invalid command line specifier\n"); return 1; } } @@ -72,19 +61,14 @@ int main(int argc, char *argv[]) { char *bytecode_file = argv[optind]; - // Inlcude main module's directory by default + // Include main unit's directory by default bytecode_dir = strdup(dirname(bytecode_dir)); include_paths[0] = bytecode_dir; - paths.paths = (const char **)include_paths; - paths.len = include_path_count; - - __gc_init(); - + virtual_machine *vm = vm_create(bytecode_file, (const char **)include_paths, + include_path_count); // Skip options, pass only program args - set_args(argc - optind, argv + optind); - - virtual_machine *vm = vm_create(bytecode_file, &paths); + vm_set_args(vm, argc - optind, argv + optind); if (!vm) { exit_code = 1; goto cleanup; diff --git a/virtual_machine/linker.c b/virtual_machine/linker.c index c852b21d1..46e46d159 100644 --- a/virtual_machine/linker.c +++ b/virtual_machine/linker.c @@ -1,65 +1,164 @@ #include "linker.h" -#include "arena.h" +#include "bytecode.h" #include "decoder.h" -#include "module_manager.h" -#include +#include "ffi.h" +#include "memory.h" +#include "symbols.h" #include #include #include #include -// opcode handler used to identify module end -extern void op_module_end(DECL_STATE); +static void register_public_symbols(symbol_table *st, const bytecode *bc, + size_t code_offset, size_t global_base, + const int32_t *bc_to_insn_map) { + const public_symbols *pub = &bc->public_symbols; -insn *decode_and_link(module_manager *mm, memory *mem) { - arena_savepoint sp = arena_save(mem->tmp); - // TODO: shoudl be redone without dynamic array - symbol_table *st = ARENA_NEW(mem->tmp, symbol_table); - symbol_table_init(st); + for (size_t i = 0; i < pub->len; i++) { + const public_symbol *p = &pub->data[i]; - register_sysargs(st); + if (p->flag == PUB_FLAG_FUNCTION) { + // p->code_offset is the offset in the bytecode, so we use the mapping + int32_t insn_idx = bc_to_insn_map[p->code_offset]; + if (insn_idx == -1) { + fprintf(stderr, + "Error: public symbol '%s' at bytecode offset %d not decoded\n", + p->name, p->code_offset); + exit(EXIT_FAILURE); + } + int32_t code_idx = insn_idx + code_offset; + symbol_table_add_function(st, p->name, code_idx); + } else { + int32_t gidx = p->code_offset + global_base; + symbol_table_add_global(st, p->name, gidx); + } + } +} - ext_func_stub_table *fst = ARENA_NEW(mem->tmp, ext_func_stub_table); - ext_func_stub_table_init(fst); +/* + * Resolve all stubs from a decoded unit. + */ +static void resolve_stubs(decoded *dec, insn *all_code, size_t code_offset, + symbol_table *st, ffi_call_table *ffi_stubs) { + // Unit's code starts at all_code + code_offset + insn *code = all_code + code_offset; + stub *stubs_arr = dec->stubs; + size_t stubs_len = dec->stubs_len; - insn *hd_insn = NULL; - insn *tl_insn = NULL; - for (size_t i = 0; i < mm->modules.len; i++) { - loaded_module *mod = mm->modules.data[i]; + for (size_t i = 0; i < stubs_len; i++) { + stub *s = &stubs_arr[i]; + size_t pi = s->patch_idx; - decode_ctx *ctx = decode_ctx_create(mod->bc, mod->global_base, mem->tmp); + switch (s->kind) { - insn *mod_code = decode(ctx, st, fst, mem); + case STUB_CALL: { + resolved_symbol *sym = symbol_table_find(st, s->name); - // Register public symbols from this module - register_public_symbols(st, mod_code, &mod->bc->public_symbols, - ctx->offset_map.offset_to_insn, mod->global_base); + // Decoder emitted: [NULL] [NULL] [n_args] + if (sym && sym->is_function) { + code[pi - 1].func = decoder_get_op_call(); + code[pi].target = &all_code[sym->idx]; + } else { + code[pi - 1].func = decoder_get_op_call_ffi_stub(); + code[pi].str = s->name; + } + break; + } + + case STUB_CLOSURE: { + resolved_symbol *sym = symbol_table_find(st, s->name); - if (hd_insn == NULL) { - hd_insn = &mod_code[0]; + if (sym && sym->is_function) { + code[pi].target = &all_code[sym->idx]; + } else { + // Not found in symbol table — create FFI stub + insn *ffi_stub = ffi_call_table_find(ffi_stubs, s->name); + if (!ffi_stub) { + ffi_stub = ffi_call_table_add(ffi_stubs, s->name, + decoder_get_op_callc_ffi_stub()); + } + code[pi].target = ffi_stub; + } + break; } - // Link previous module's end to this module's start - if (tl_insn != NULL) { - // prev_module_end is pointing to op_module_end instruction - // The next slot contains the target pointer (NULL placeholder) - tl_insn[1].target = &mod_code[0]; + case STUB_GLOBAL: { + resolved_symbol *sym = symbol_table_find(st, s->name); + if (sym && !sym->is_function) { + code[pi].num = sym->idx; + } else { + // TODO: C globals + exit(EXIT_FAILURE); + } + break; + } } + } +} - // Remember this module's op_module_end instruction for next iteration - // It was emitted after the first END, position stored in - // ctx->module_end_idx - size_t module_end_idx = ctx->module_end_idx; +program *link(bytecode **bc_arr, decoded **dec_arr, size_t n) { + symbol_table *st = symbol_table_create(); + ffi_call_table *ffi_stubs = ffi_call_table_create(); - // Last module's op_module_end target remains NULL (program ends) - if (module_end_idx == (size_t)-1) { - tl_insn = NULL; - } else { - tl_insn = &mod_code[module_end_idx]; + size_t total_code_len = 0; + size_t total_globals = 0; + + for (size_t i = 0; i < n; i++) { + decoded *dec = dec_arr[i]; + bytecode *bc = bc_arr[i]; + register_public_symbols(st, bc, total_code_len, total_globals, + dec->bc_to_insn_map); + total_code_len += dec->code_len; + total_globals += bc->globals_count; + } + + insn *all_code = ALLOC_ARRAY(insn, total_code_len); + + size_t code_offset = 0; + for (size_t i = 0; i < n; i++) { + decoded *dec = dec_arr[i]; + + memcpy(all_code + code_offset, dec->code, dec->code_len * sizeof(insn)); + + // Resolve internal jumps + for (size_t j = 0; j < dec->relocs_len; j++) { + size_t slot = dec->relocs[j]; + int32_t target_idx = all_code[code_offset + slot].num; + all_code[code_offset + slot].target = &all_code[code_offset + target_idx]; + } + + // Resolve all stubs + resolve_stubs(dec, all_code, code_offset, st, ffi_stubs); + + // Link main() functions across units + if (i < n - 1 && dec->unit_end_idx != (size_t)-1) { + size_t next_offset = code_offset + dec->code_len; + all_code[code_offset + dec->unit_end_idx + 1].target = + &all_code[next_offset]; } + + code_offset += dec->code_len; + } + + program *prog = ALLOC(program); + prog->code = all_code; + prog->code_len = total_code_len; + prog->total_globals = total_globals; + + symbol_table_destroy(st); + ffi_call_table_destroy(ffi_stubs); + // NOTE: we don't free bytecode here since it's used for strings etc. + for (size_t i = 0; i < n; i++) { + decoded_free(dec_arr[i]); } + free(dec_arr); - arena_restore(mem->tmp, sp); + return prog; +} - return hd_insn; +void prog_free(program *prog) { + if (prog) { + free(prog->code); + free(prog); + } } diff --git a/virtual_machine/linker.h b/virtual_machine/linker.h index 7a5ed336d..f7739b86d 100644 --- a/virtual_machine/linker.h +++ b/virtual_machine/linker.h @@ -1,10 +1,19 @@ #ifndef LINKER_H #define LINKER_H +#include "bytecode.h" #include "decoder.h" -#include "module_manager.h" -#include "stddef.h" +#include "insn.h" +#include -insn *decode_and_link(module_manager *mm, memory *mem); +typedef struct { + insn *code; + size_t code_len; + size_t total_globals; +} program; -#endif +program *link(bytecode **bc_arr, decoded **dec_arr, size_t n); + +void prog_free(program *prog); + +#endif // LINKER_H diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c new file mode 100644 index 000000000..7764f3505 --- /dev/null +++ b/virtual_machine/loader.c @@ -0,0 +1,137 @@ +/* + * Unit loader implementation for Lama VM. + * Recursively loads bytecode files following import declarations. + */ + +#define _POSIX_C_SOURCE 200809L + +#include "loader.h" +#include "bytecode.h" +#include "da.h" +#include "memory.h" +#include +#include +#include +#include +#include +#include + +typedef struct { + bytecode **data; + size_t len; + size_t cap; +} bytecode_array; + +/* + * Build the path to a unit's .bc file by searching through paths. + */ +static char *build_unit_path(const char *unit_name, const search_paths *paths) { + char *path = ALLOC_ARRAY(char, MAX_PATH_LEN); + + for (size_t i = 0; i < paths->len; i++) { + snprintf(path, MAX_PATH_LEN, "%s/%s.bc", paths->paths[i], unit_name); + if (access(path, F_OK) == 0) { + return path; + } + } + + free(path); + return NULL; +} + +/* + * Check if a string looks like a file path (contains '/' or ends with '.bc') + */ +static bool is_filepath(const char *str) { + size_t len = strlen(str); + return strchr(str, '/') != NULL || + (len > 3 && strcmp(str + len - 3, ".bc") == 0); +} + +static bool find_loaded(bytecode_array *units, const char *name) { + for (size_t i = 0; i < units->len; i++) { + if (strcmp(units->data[i]->name, name) == 0) { + return true; + } + } + return false; +} + +/* + * Extract name from filename (without path and extension .bc) + */ +static char *extract_unit_name(const char *filename) { + char *path_copy = ESTRDUP(filename); + char *base = basename(path_copy); + + char *dot = strrchr(base, '.'); + if (dot && strcmp(dot, ".bc") == 0) { + *dot = '\0'; + } + + char *result = ESTRDUP(base); + free(path_copy); + return result; +} + +/* + * Load a single unit and its dependencies recursively. + */ +static bool load_unit_recursive(bytecode_array *units, const char *s, + const search_paths *paths) { + char *filepath = NULL; + char *unit_name = NULL; + + // The initial call uses a filepath, recursive calls use unit names + if (is_filepath(s)) { + filepath = ESTRDUP(s); + unit_name = extract_unit_name(s); + } else { + filepath = build_unit_path(s, paths); + unit_name = ESTRDUP(s); + } + + if (find_loaded(units, unit_name)) { + free(filepath); + free(unit_name); + return true; + } + + bytecode *bc = bytecode_load(filepath); + if (!bc) { + fprintf(stderr, "Failed to load dependency '%s' from '%s'\n", unit_name, + filepath); + free(filepath); + free(unit_name); + return false; + } + bc->name = unit_name; + + // Recursively load dependencies first (topological order) + for (size_t i = 0; i < bc->imports.len; i++) { + const char *import_name = bc->imports.data[i]; + + // Skip Std since we have it as runtime.a + if (strcmp(import_name, "Std") == 0) { + continue; + } + + load_unit_recursive(units, import_name, paths); + } + + da_append(*units, bc); + free(filepath); + return true; +} + +bytecode **load(const char *main_unit_path, const search_paths *paths, + size_t *out_len) { + bytecode_array m; + da_init(m); + + if (!load_unit_recursive(&m, main_unit_path, paths)) { + return NULL; + } + *out_len = m.len; + return m.data; +} diff --git a/virtual_machine/loader.h b/virtual_machine/loader.h new file mode 100644 index 000000000..2827584f4 --- /dev/null +++ b/virtual_machine/loader.h @@ -0,0 +1,18 @@ +#ifndef LOADER_H +#define LOADER_H + +#include "bytecode.h" +#include +#include + +#define MAX_PATH_LEN 1024 + +typedef struct { + const char **paths; + size_t len; +} search_paths; + +bytecode **load(const char *main_unit_path, const search_paths *paths, + size_t *out_len); + +#endif // LOADER_H diff --git a/virtual_machine/memory.c b/virtual_machine/memory.c new file mode 100644 index 000000000..a64b52323 --- /dev/null +++ b/virtual_machine/memory.c @@ -0,0 +1,31 @@ +#define _POSIX_C_SOURCE 200809L + +#include "memory.h" +#include +#include +#include + +static void check_ptr(void *ptr, const char *file, int line) { + if (ptr == NULL) { + fprintf(stderr, "Out of memory at %s:%d\n", file, line); + exit(EXIT_FAILURE); + } +} + +void *emalloc(size_t size, const char *file, int line) { + void *ptr = malloc(size); + check_ptr(ptr, file, line); + return ptr; +} + +void *erealloc(void *ptr, size_t size, const char *file, int line) { + void *new_ptr = realloc(ptr, size); + check_ptr(new_ptr, file, line); + return new_ptr; +} + +char *estrdup(const char *s, const char *file, int line) { + char *ptr = strdup(s); + check_ptr(ptr, file, line); + return ptr; +} diff --git a/virtual_machine/memory.h b/virtual_machine/memory.h new file mode 100644 index 000000000..9a1535b9f --- /dev/null +++ b/virtual_machine/memory.h @@ -0,0 +1,20 @@ +#ifndef MEMORY_H +#define MEMORY_H + +#include + +void *emalloc(size_t size, const char *file, int line); +void *erealloc(void *ptr, size_t size, const char *file, int line); +char *estrdup(const char *s, const char *file, int line); + +// TODO: +#define EMALLOC(size) emalloc((size), __FILE__, __LINE__) +#define EREALLOC(ptr, size) erealloc((ptr), (size), __FILE__, __LINE__) +#define ESTRDUP(s) estrdup((s), __FILE__, __LINE__) + +#define ALLOC(type) ((type *)emalloc(sizeof(type), __FILE__, __LINE__)) + +#define ALLOC_ARRAY(type, count) \ + ((type *)emalloc(sizeof(type) * (count), __FILE__, __LINE__)) + +#endif diff --git a/virtual_machine/module_manager.c b/virtual_machine/module_manager.c deleted file mode 100644 index 352e8a685..000000000 --- a/virtual_machine/module_manager.c +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Module manager implementation for Lama VM. - */ - -#define _POSIX_C_SOURCE 200809L - -#include "module_manager.h" -#include "arena.h" -#include "bytecode.h" -#include "da.h" -#include -#include -#include -#include -#include -#include - -/* - * Build the path to a module's .bc file by searching through paths. - */ -static char *build_module_path(const char *module_name, - const search_paths *paths, arena *arena) { - char *path = ARENA_ALLOC(arena, char, MAX_PATH_LEN); - - for (size_t i = 0; i < paths->len; i++) { - snprintf(path, MAX_PATH_LEN, "%s/%s.bc", paths->paths[i], module_name); - if (access(path, F_OK) == 0) { - return path; - } - } - - return path; -} - -/* Extract module name from filename (without path and extension .bc) */ -static char *extract_module_name(const char *filename, arena *arena) { - char *path_copy = ARENA_STRDUP(arena, filename); - char *base = basename(path_copy); - - char *dot = strrchr(base, '.'); - if (dot && strcmp(dot, ".bc") == 0) { - *dot = '\0'; - } - - return ARENA_STRDUP(arena, base); -} - -/* - * Check if a string looks like a file path (contains '/' or ends with '.bc') - */ -static bool is_filepath(const char *str) { - size_t len = strlen(str); - return strchr(str, '/') != NULL || - (len > 3 && strcmp(str + len - 3, ".bc") == 0); -} - -static loaded_module *find_module(module_manager *mm, const char *name) { - for (size_t i = 0; i < mm->modules.len; i++) { - if (strcmp(mm->modules.data[i]->bc->module_name, name) == 0) { - return mm->modules.data[i]; - } - } - return NULL; -} - -/* - * Load modules recursively. - */ -static loaded_module *load_module(module_manager *mm, const char *s, - const search_paths *paths, memory *mem) { - char *filepath; - char *module_name; - - // The initial call uses a filepath, recursive calls use module names - if (is_filepath(s)) { - filepath = ARENA_STRDUP(mem->tmp, s); - module_name = extract_module_name(s, mem->tmp); - } else { - filepath = build_module_path(s, paths, mem->tmp); - module_name = ARENA_STRDUP(mem->tmp, s); - } - - // Check if module is already loaded (avoid duplicates and circular - // dependencies) - // TODO: check circular imports? - loaded_module *result = find_module(mm, module_name); - if (result) { - return result; - } - - bytecode *bc = load_bytecode(filepath, mem); - if (!bc) { - fprintf(stderr, "Failed to load module '%s' from '%s'\n", module_name, - filepath); - return NULL; - } - - // NOTE: a bit ugly: - bc->module_name = module_name; - - // Recursively load dependencies - for (size_t i = 0; i < bc->imports.len; i++) { - const char *import_name = bc->imports.data[i]; - - // Skip since we already have it (as runtime.a) - if (strcmp(import_name, "Std") == 0) { - continue; - } - - loaded_module *dep = load_module(mm, import_name, paths, mem); - if (!dep) { - fprintf(stderr, "Failed to load dependency '%s' for module '%s'\n", - import_name, module_name); - return NULL; - } - } - - result = ARENA_NEW(mem->main, loaded_module); - result->bc = bc; - result->global_base = (int32_t)mm->total_globals_count; - mm->total_globals_count += bc->globals_count; - mm->total_code_size += bc->code_size; - // TODO: also use arena - da_append(mm->modules, result); - return result; -} - -module_manager *load_modules(const char *main_module_path, - const search_paths *paths, memory *mem) { - - module_manager *mm = ARENA_NEW(mem->main, module_manager); - - da_init(mm->modules); - // Reserve global index 0 for sysargs - mm->total_globals_count = 1; - - arena_savepoint sp = arena_save(mem->tmp); - loaded_module *main_mod = load_module(mm, main_module_path, paths, mem); - arena_restore(mem->tmp, sp); - - if (!main_mod) { - return NULL; - } - - return mm; -} diff --git a/virtual_machine/module_manager.h b/virtual_machine/module_manager.h deleted file mode 100644 index dbee83a34..000000000 --- a/virtual_machine/module_manager.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Module loader and linker for Lama VM. - * - */ - -#ifndef MODULE_MANAGER_H -#define MODULE_MANAGER_H - -#include "arena.h" -#include "bytecode.h" -#include -#include - -#define MAX_PATH_LEN 1024 -#define INITIAL_SYMBOL_TABLE_CAP 64 -#define INITIAL_MODULE_CAP 8 - -typedef struct { - const char **paths; - size_t len; -} search_paths; - -typedef struct { - bytecode *bc; // Loaded bytecode - int32_t global_base; // Starting index for this module's globals -} loaded_module; - -typedef struct { - // Loaded modules (topological order) - struct { - loaded_module **data; - size_t len; - size_t cap; - } modules; - - // Combined globals count - // Used for stack allocation - size_t total_globals_count; - size_t total_code_size; - -} module_manager; - -module_manager *load_modules(const char *main_module_path, - const search_paths *paths, memory *mem); - -#endif diff --git a/virtual_machine/opcodes.c b/virtual_machine/opcodes.c deleted file mode 100644 index 5c29f1268..000000000 --- a/virtual_machine/opcodes.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Utility functions for Lama VM opcodes. - * Provides debugging support, such as converting opcode values to string - * representations. - */ - -#include "opcodes.h" -#include -#include -#include - -const char *opcode_to_string(uint8_t opcode) { - switch (opcode) { - case OP_BINOP_ADD: - return "BINOP_ADD"; - case OP_BINOP_SUB: - return "BINOP_SUB"; - case OP_BINOP_MUL: - return "BINOP_MUL"; - case OP_BINOP_DIV: - return "BINOP_DIV"; - case OP_BINOP_MOD: - return "BINOP_MOD"; - case OP_BINOP_LT: - return "BINOP_LT"; - case OP_BINOP_LE: - return "BINOP_LE"; - case OP_BINOP_GT: - return "BINOP_GT"; - case OP_BINOP_GE: - return "BINOP_GE"; - case OP_BINOP_EQ: - return "BINOP_EQ"; - case OP_BINOP_NE: - return "BINOP_NE"; - case OP_BINOP_AND: - return "BINOP_AND"; - case OP_BINOP_OR: - return "BINOP_OR"; - case OP_CONST: - return "CONST"; - case OP_STRING: - return "STRING"; - case OP_SEXP: - return "SEXP"; - case OP_STA: - return "STA"; - case OP_JMP: - return "JMP"; - case OP_END: - return "END"; - case OP_RET: - return "RET"; - case OP_DROP: - return "DROP"; - case OP_DUP: - return "DUP"; - case OP_SWAP: - return "SWAP"; - case OP_ELEM: - return "ELEM"; - case OP_LD: - return "LD"; - case OP_LD_LOC: - return "LD_LOC"; - case OP_LD_ARG: - return "LD_ARG"; - case OP_LD_CLO: - return "LD_CLO"; - case OP_ST: - return "ST"; - case OP_ST_LOC: - return "ST_LOC"; - case OP_ST_ARG: - return "ST_ARG"; - case OP_ST_CLO: - return "ST_CLO"; - case OP_CJMP_Z: - return "CJMP_Z"; - case OP_CJMP_NZ: - return "CJMP_NZ"; - case OP_BEGIN: - return "BEGIN"; - case OP_BEGIN_CLOSURE: - return "BEGIN_CLOSURE"; - case OP_CLOSURE: - return "CLOSURE"; - case OP_CALLC: - return "CALLC"; - case OP_CALL: - return "CALL"; - case OP_TAG: - return "TAG"; - case OP_ARRAY: - return "ARRAY"; - case OP_FAIL: - return "FAIL"; - case OP_LINE: - return "LINE"; - case OP_PATT_STR_CMP: - return "PATT_STR_CMP"; - case OP_PATT_STRING: - return "PATT_STRING"; - case OP_PATT_ARRAY: - return "PATT_ARRAY"; - case OP_PATT_SEXP: - return "PATT_SEXP"; - case OP_PATT_BOXED: - return "PATT_BOXED"; - case OP_PATT_UNBOXED: - return "PATT_UNBOXED"; - case OP_PATT_CLOSURE: - return "PATT_CLOSURE"; - case OP_READ: - return "READ"; - case OP_WRITE: - return "WRITE"; - case OP_LENGTH: - return "LENGTH"; - case OP_LSTRING: - return "LSTRING"; - case OP_BARRAY: - return "BARRAY"; - default: - fprintf(stderr, "Unknown opcode: 0x%02X\n", opcode); - exit(1); - } -} diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h index 072675de9..6dcc4c39f 100755 --- a/virtual_machine/opcodes.h +++ b/virtual_machine/opcodes.h @@ -53,7 +53,7 @@ typedef enum { OP_PATT_BOXED = 0x64, OP_PATT_UNBOXED = 0x65, OP_PATT_CLOSURE = 0x66, - // TODO: remove this opcode, instead call real function + // TODO: remove this opcode, call real function instead OP_BARRAY = 0x74, } opcode_t; diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c new file mode 100644 index 000000000..71ecae15c --- /dev/null +++ b/virtual_machine/ops.c @@ -0,0 +1,672 @@ + +#include "ops.h" +#include "../runtime/runtime_common.h" +#include "ffi.h" +#include "insn.h" +#include +#include + +/* + * External runtime functions (runtime.c) + */ +extern aint Ls__Infix_43(void *p, void *q); // + +extern aint Ls__Infix_45(void *p, void *q); // - +extern aint Ls__Infix_42(void *p, void *q); // * +extern aint Ls__Infix_47(void *p, void *q); // / +extern aint Ls__Infix_37(void *p, void *q); // % +extern aint Ls__Infix_60(void *p, void *q); // < +extern aint Ls__Infix_6061(void *p, void *q); // <= +extern aint Ls__Infix_62(void *p, void *q); // > +extern aint Ls__Infix_6261(void *p, void *q); // >= +extern aint Ls__Infix_6161(void *p, void *q); // == +extern aint Ls__Infix_3361(void *p, void *q); // != +extern aint Ls__Infix_3838(void *p, void *q); // && +extern aint Ls__Infix_3333(void *p, void *q); // || + +extern aint LtagHash(char *s); +extern void *Barray(aint *args, aint bn); +extern void *Bsexp(aint *args, aint bn); +extern void *Bclosure(aint *args, aint bn); +extern void *Bstring(aint *args); +extern void *Belem(void *p, aint i); +extern void *Bsta(void *x, aint i, void *v); + +extern aint Btag(void *d, aint t, aint n); +extern aint Barray_patt(void *d, aint n); +extern aint Bstring_patt(void *x, void *y); +extern aint Bclosure_tag_patt(void *x); +extern aint Bboxed_patt(void *x); +extern aint Bunboxed_patt(void *x); +extern aint Barray_tag_patt(void *x); +extern aint Bstring_tag_patt(void *x); +extern aint Bsexp_tag_patt(void *x); + +/* + * Debug macros + */ +#ifdef DEBUG_PRINT +#define VM_DEBUG(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) +#define VM_TRACE_STACK(stack) \ + do { \ + long sp_idx = (stack)->sp - (stack)->data; \ + fprintf(stderr, " stack [sp=%p, idx=%ld]: ", (stack)->sp, sp_idx); \ + for (int i = 1; i <= STACK_PEEK_SIZE; i++) { \ + if (sp_idx + i < STACK_SIZE) { \ + fprintf(stderr, "%ld ", (long)(stack)->data[sp_idx + i]); \ + } \ + } \ + fprintf(stderr, "\n"); \ + } while (0) +#define VM_TRACE_CALL(fmt, ...) fprintf(stderr, "[CALL] " fmt, ##__VA_ARGS__) +#define VM_ASSERT(cond, msg) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "Assert failed: %s at %s:%d\n", msg, __FILE__, \ + __LINE__); \ + exit(1); \ + } \ + } while (0) +#else +#define VM_DEBUG(fmt, ...) +#define VM_TRACE_STACK(stack) +#define VM_TRACE_CALL(fmt, ...) +#define VM_ASSERT(cond, msg) +#endif + +#define DISPATCH() \ + do { \ + ip++; \ + __attribute__((musttail)) return ip->func(STATE); \ + } while (0) + +#define DISPATCH_JUMP() \ + do { \ + __attribute__((musttail)) return ip->func(STATE); \ + } while (0) + +/* + * Stack manipulation macros (stack grows downwards) + */ +#define STACK_PUSH(sp, val) (*sp-- = (val)) +#define STACK_POP(sp) (*++sp) +#define STACK_PEEK(sp) (*(sp + 1)) + +#define DEFINE_BINOP(name, fn, opname) \ + void name(DECL_STATE) { \ + aint y = STACK_POP(sp); \ + aint x = STACK_POP(sp); \ + VM_DEBUG(opname ": x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); \ + aint res = fn((void *)x, (void *)y); \ + VM_DEBUG(opname " result=%ld\n", (long)UNBOX(res)); \ + STACK_PUSH(sp, res); \ + DISPATCH(); \ + } + +DEFINE_BINOP(op_add, Ls__Infix_43, "ADD") +DEFINE_BINOP(op_sub, Ls__Infix_45, "SUB") +DEFINE_BINOP(op_mul, Ls__Infix_42, "MUL") +DEFINE_BINOP(op_lt, Ls__Infix_60, "LT") +DEFINE_BINOP(op_le, Ls__Infix_6061, "LE") +DEFINE_BINOP(op_gt, Ls__Infix_62, "GT") +DEFINE_BINOP(op_ge, Ls__Infix_6261, "GE") +DEFINE_BINOP(op_eq, Ls__Infix_6161, "EQ") +DEFINE_BINOP(op_ne, Ls__Infix_3361, "NE") +DEFINE_BINOP(op_and, Ls__Infix_3838, "AND") +DEFINE_BINOP(op_or, Ls__Infix_3333, "OR") + +#undef DEFINE_BINOP + +void op_const(DECL_STATE) { + ip++; + aint val = ip->num; + VM_DEBUG("CONST: %ld\n", (long)val); + STACK_PUSH(sp, BOX(val)); + DISPATCH(); +} + +void op_div(DECL_STATE) { + aint y = STACK_POP(sp); + aint x = STACK_POP(sp); + VM_DEBUG("DIV: x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); + if (UNBOX(y) == 0) { + fprintf(stderr, "Division by zero\n"); + exit(1); + } + aint res = Ls__Infix_47((void *)x, (void *)y); + VM_DEBUG("DIV result=%ld\n", (long)UNBOX(res)); + STACK_PUSH(sp, res); + DISPATCH(); +} + +void op_mod(DECL_STATE) { + aint y = STACK_POP(sp); + aint x = STACK_POP(sp); + VM_DEBUG("MOD: x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); + if (UNBOX(y) == 0) { + fprintf(stderr, "Division by zero\n"); + exit(1); + } + aint res = Ls__Infix_37((void *)x, (void *)y); + VM_DEBUG("MOD result=%ld\n", (long)UNBOX(res)); + STACK_PUSH(sp, res); + DISPATCH(); +} + +void op_drop(DECL_STATE) { + VM_DEBUG("DROP\n"); + sp++; + DISPATCH(); +} + +void op_dup(DECL_STATE) { + aint val = STACK_PEEK(sp); + VM_DEBUG("DUP: %ld\n", (long)UNBOX(val)); + STACK_PUSH(sp, val); + DISPATCH(); +} + +void op_swap(DECL_STATE) { + aint a = STACK_POP(sp); + aint b = STACK_POP(sp); + VM_DEBUG("SWAP: a=%ld, b=%ld\n", (long)UNBOX(a), (long)UNBOX(b)); + STACK_PUSH(sp, a); + STACK_PUSH(sp, b); + DISPATCH(); +} + +void op_elem(DECL_STATE) { + aint idx = STACK_POP(sp); + aint arr = STACK_POP(sp); + VM_DEBUG("ELEM: arr=%p, idx=%ld\n", (void *)arr, (long)UNBOX(idx)); + void *elem = Belem((void *)arr, idx); + STACK_PUSH(sp, (aint)elem); + DISPATCH(); +} + +void op_sta(DECL_STATE) { + aint val = STACK_POP(sp); + aint idx = STACK_POP(sp); + aint arr = STACK_POP(sp); + VM_DEBUG("STA: arr=%p, idx=%ld, val=%ld\n", (void *)arr, (long)UNBOX(idx), + (long)UNBOX(val)); + Bsta((void *)arr, idx, (void *)val); + STACK_PUSH(sp, val); + DISPATCH(); +} + +/* + * Jumps + */ +void op_jmp(DECL_STATE) { + ip++; + VM_DEBUG("JMP: target=%p\n", (void *)ip->target); + ip = ip->target; + DISPATCH_JUMP(); +} + +void op_cjmp_z(DECL_STATE) { + ip++; + insn *target = ip->target; + ip++; + aint val = STACK_POP(sp); + VM_DEBUG("CJMP_Z: val=%ld, target=%p, will_jump=%d\n", (long)UNBOX(val), + (void *)target, UNBOX(val) == 0); + if (UNBOX(val) == 0) { + ip = target; + } + DISPATCH_JUMP(); +} + +void op_cjmp_nz(DECL_STATE) { + ip++; + insn *target = ip->target; + ip++; + aint val = STACK_POP(sp); + VM_DEBUG("CJMP_NZ: val=%ld, target=%p, will_jump=%d\n", (long)UNBOX(val), + (void *)target, UNBOX(val) != 0); + if (UNBOX(val) != 0) { + ip = target; + } + DISPATCH_JUMP(); +} + +/* + * String, data etc. + */ +void op_string(DECL_STATE) { + (void)bp; + (void)globals; + ip++; + const char *str = ip->str; + VM_DEBUG("STRING: \"%s\"\n", str); + void *result = Bstring((void *)&str); + STACK_PUSH(sp, (aint)result); + DISPATCH(); +} + +void op_barray(DECL_STATE) { + (void)bp; + (void)globals; + ip++; + int32_t n = ip->num; + VM_DEBUG("BARRAY: n=%d\n", n); + aint *args_base = sp + 1; + aint tmp_args[256]; + // TODO: optimize for passing direct pointer + // instead of population array + for (int32_t i = 0; i < n; i++) { + tmp_args[i] = args_base[n - 1 - i]; + } + sp += n; + void *arr = Barray(tmp_args, BOX(n)); + STACK_PUSH(sp, (aint)arr); + DISPATCH(); +} + +void op_sexp(DECL_STATE) { + ip++; + const char *tag_str = ip->str; + ip++; + int32_t n_fields = ip->num; + + aint tag_hash = LtagHash((char *)tag_str); + VM_DEBUG("SEXP: tag=\"%s\" (hash=0x%lx), n_fields=%d\n", tag_str, tag_hash, + n_fields); + aint args[256]; + aint *args_base = sp + 1; + // TODO: optimize for passing direct pointer + // instead of population array + for (int32_t i = 0; i < n_fields; i++) { + args[i] = args_base[n_fields - 1 - i]; + } + args[n_fields] = tag_hash; + sp += n_fields; + + void *s = Bsexp(args, BOX(n_fields + 1)); + STACK_PUSH(sp, (aint)s); + DISPATCH(); +} + +void op_tag(DECL_STATE) { + ip++; + const char *tag_str = ip->str; + ip++; + int32_t n_fields = ip->num; + + aint tag_hash = LtagHash((char *)tag_str); + aint val = STACK_POP(sp); + VM_DEBUG("TAG: tag='%s' hash=0x%lx n_fields=%d val=0x%lx\n", tag_str, + (long)tag_hash, n_fields, (long)val); + aint result = Btag((void *)val, tag_hash, BOX(n_fields)); + VM_DEBUG("TAG: result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_array(DECL_STATE) { + ip++; + int32_t n = ip->num; + aint val = STACK_POP(sp); + VM_DEBUG("ARRAY: n=%d, val=%p\n", n, (void *)val); + aint result = Barray_patt((void *)val, BOX(n)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_fail(DECL_STATE) { + ip++; + int32_t line = ip->num; + ip++; + int32_t col = ip->num; + VM_DEBUG("FAIL: line=%d, col=%d\n", line, col); + fprintf(stderr, "Match failure at line %d, column %d\n", line, col); + exit(1); +} + +/* + * Pattern matching operations + */ +void op_patt_str_cmp(DECL_STATE) { + aint y = STACK_POP(sp); + aint x = STACK_POP(sp); + VM_DEBUG("PATT_STR_CMP: x=%p, y=%p\n", (void *)x, (void *)y); + aint result = Bstring_patt((void *)x, (void *)y); + VM_DEBUG("PATT_STR_CMP result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_patt_string(DECL_STATE) { + aint val = STACK_POP(sp); + VM_DEBUG("PATT_STRING: val=%p\n", (void *)val); + aint result = Bstring_tag_patt((void *)val); + VM_DEBUG("PATT_STRING result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_patt_array(DECL_STATE) { + aint val = STACK_POP(sp); + VM_DEBUG("PATT_ARRAY: val=%p\n", (void *)val); + aint result = Barray_tag_patt((void *)val); + VM_DEBUG("PATT_ARRAY result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_patt_sexp(DECL_STATE) { + aint val = STACK_POP(sp); + VM_DEBUG("PATT_SEXP: val=%p\n", (void *)val); + aint result = Bsexp_tag_patt((void *)val); + VM_DEBUG("PATT_SEXP result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_patt_boxed(DECL_STATE) { + aint val = STACK_POP(sp); + VM_DEBUG("PATT_BOXED: val=%p\n", (void *)val); + aint result = Bboxed_patt((void *)val); + VM_DEBUG("PATT_BOXED result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_patt_unboxed(DECL_STATE) { + aint val = STACK_POP(sp); + VM_DEBUG("PATT_UNBOXED: val=%ld\n", (long)val); + aint result = Bunboxed_patt((void *)val); + VM_DEBUG("PATT_UNBOXED result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_patt_closure(DECL_STATE) { + aint val = STACK_POP(sp); + VM_DEBUG("PATT_CLOSURE: val=%p\n", (void *)val); + aint result = Bclosure_tag_patt((void *)val); + VM_DEBUG("PATT_CLOSURE result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +/* + * Load / store operations + */ +void op_ld_glo(DECL_STATE) { + ip++; + int32_t idx = ip->num; + VM_DEBUG("LD_GLO[%d] = %ld\n", idx, (long)globals[idx]); + STACK_PUSH(sp, globals[idx]); + DISPATCH(); +} + +void op_st_glo(DECL_STATE) { + ip++; + int32_t idx = ip->num; + aint val = STACK_PEEK(sp); + VM_DEBUG("ST_GLO[%d] = %ld\n", idx, (long)val); + globals[idx] = val; + DISPATCH(); +} + +void op_ld_loc(DECL_STATE) { + ip++; + int32_t idx = ip->num; + VM_DEBUG("LD_LOC[%d] bp=%p bp[-idx]=%ld\n", idx, (void *)bp, (long)bp[-idx]); + STACK_PUSH(sp, bp[-idx]); + DISPATCH(); +} + +void op_st_loc(DECL_STATE) { + ip++; + int32_t idx = ip->num; + aint val = STACK_PEEK(sp); + VM_DEBUG("ST_LOC[%d] = %ld bp=%p\n", idx, (long)val, (void *)bp); + bp[-idx] = val; + DISPATCH(); +} + +void op_ld_arg(DECL_STATE) { + ip++; + int32_t idx = ip->num; + int32_t n_args = (int32_t)bp[1]; + aint val = bp[n_args + 1 - idx]; + VM_DEBUG("LD_ARG[%d] n_args=%d bp=%p val=%ld\n", idx, n_args, (void *)bp, + (long)val); + STACK_PUSH(sp, val); + DISPATCH(); +} + +void op_st_arg(DECL_STATE) { + ip++; + int32_t idx = ip->num; + int32_t n_args = (int32_t)bp[1]; + aint val = STACK_PEEK(sp); + VM_DEBUG("ST_ARG[%d] = %ld bp=%p\n", idx, (long)val, (void *)bp); + bp[n_args + 1 - idx] = val; + DISPATCH(); +} + +void op_ld_clo(DECL_STATE) { + ip++; + int32_t idx = ip->num; + int32_t n_args = (int32_t)bp[1]; + aint *closure = (aint *)bp[n_args + 2]; + VM_DEBUG("LD_CLO[%d] closure=%p val=%ld\n", idx, (void *)closure, + (long)closure[idx + 1]); + STACK_PUSH(sp, closure[idx + 1]); + DISPATCH(); +} + +void op_st_clo(DECL_STATE) { + ip++; + int32_t idx = ip->num; + int32_t n_args = (int32_t)bp[1]; + aint val = STACK_PEEK(sp); + aint *closure = (aint *)bp[n_args + 2]; + VM_DEBUG("ST_CLO[%d] = %ld closure=%p\n", idx, (long)val, (void *)closure); + closure[idx + 1] = val; + DISPATCH(); +} + +/* + * Function call operations + */ +void op_begin(DECL_STATE) { + + ip++; + int32_t n_args = ip->num; + (void)n_args; + ip++; + int32_t n_locals = ip->num; + ip++; + + VM_TRACE_CALL("BEGIN n_args=%d n_locals=%d bp=%p sp=%p\n", n_args, n_locals, + (void *)bp, (void *)sp); + + for (int32_t i = 0; i < n_locals; i++) { + STACK_PUSH(sp, 0); + } + + DISPATCH(); +} + +void op_call(DECL_STATE) { + ip++; + insn *target = ip->target; + ip++; + int32_t n_args = ip->num; + + VM_TRACE_CALL("CALL target=%p n_args=%d sp=%p bp=%p\n", (void *)target, + n_args, (void *)sp, (void *)bp); + + STACK_PUSH(sp, (aint)n_args); + STACK_PUSH(sp, (aint)bp); + + aint *new_bp = sp + 1; + target->func(target, sp, new_bp, globals); + + aint ret_val = *new_bp; + + sp = new_bp + n_args + 1; + + STACK_PUSH(sp, ret_val); + DISPATCH(); +} + +void op_unit_end(DECL_STATE); + +void op_callc(DECL_STATE) { + ip++; + int32_t n_args = ip->num; + + aint closure_val = *(sp + 1 + n_args); + aint *closure = (aint *)closure_val; + + aint entry = closure[0]; + insn *target = (insn *)entry; + + VM_TRACE_CALL("CALLC closure=%p target=%p n_args=%d sp=%p bp=%p\n", + (void *)closure, (void *)target, n_args, (void *)sp, + (void *)bp); + + STACK_PUSH(sp, (aint)n_args); + STACK_PUSH(sp, (aint)bp); + + aint *new_bp = sp + 1; + target->func(target, sp, new_bp, globals); + + aint ret_val = *new_bp; + VM_DEBUG("CALLC: return value=%ld new_bp=%p\n", (long)ret_val, + (void *)new_bp); + + sp = new_bp + n_args + 2; + + STACK_PUSH(sp, ret_val); + DISPATCH(); +} + +void op_end(DECL_STATE) { + VM_TRACE_CALL("END sp=%p\n", (void *)sp); + aint ret_val = STACK_PEEK(sp); + *bp = ret_val; + // If a unit_end bridge follows, jump to it. + // Otherwise, return to finish execution. + insn *next = ip + 1; + if (next && next->func == op_unit_end) { + VM_DEBUG("END: jumping to unit_end bridge at %p\n", (void *)next); + ip = next; + DISPATCH_JUMP(); + } + VM_DEBUG("END: returning (no unit bridge)\n"); + return; +} + +/* + * Closures + */ + +/* + * FFI closure stub - called when an external closure is invoked + * via op_callc This stub is generated for each unresolved external closure + * reference. The function name is embedded in the next instruction. + */ +void op_callc_ffi_stub(DECL_STATE) { + ip++; + const char *func_name = ip->str; + + int32_t n_args = (int32_t)bp[1]; + + VM_DEBUG("FFI_STUB: func='%s' n_args=%d bp=%p\n", func_name, n_args, + (void *)bp); + + aint args[256]; + for (int32_t i = 0; i < n_args; i++) { + args[i] = bp[n_args + 1 - i]; + } + + aint result = ffi_call_c(func_name, args, n_args); + VM_DEBUG("FFI_STUB: func='%s' result=%ld\n", func_name, (long)result); + + // Store result in return value slot + *bp = result; + + return; +} +void op_closure(DECL_STATE) { + ip++; + insn *target = ip->target; + ip++; + int32_t n_captured = ip->num; + + VM_DEBUG("CLOSURE: target=%p n_captured=%d\n", (void *)target, n_captured); + + aint tmp_args[256]; + tmp_args[0] = (aint)target; + aint *args_base = sp + 1; + for (int32_t i = 0; i < n_captured; i++) { + tmp_args[i + 1] = args_base[n_captured - 1 - i]; + VM_DEBUG("CLOSURE: captured[%d]=%ld\n", i, (long)tmp_args[i + 1]); + } + sp += n_captured; + + void *closure = Bclosure(tmp_args, BOX(n_captured)); + VM_DEBUG("CLOSURE: created=%p\n", (void *)closure); + STACK_PUSH(sp, (aint)closure); + DISPATCH(); +} + +// TODO: think about unifying with callc_ffi +void op_call_ffi_stub(DECL_STATE) { + ip++; + const char *func_name = ip->str; + ip++; + int32_t n_args = ip->num; + + VM_DEBUG("FFI_CALL: func='%s' n_args=%d\n", func_name, n_args); + + aint args[256]; + aint *args_base = sp + 1; + for (int32_t i = 0; i < n_args; i++) { + args[i] = args_base[n_args - 1 - i]; + } + sp += n_args; + + aint result = ffi_call_c(func_name, args, n_args); + + VM_DEBUG("FFI_CALL: result=%ld\n", (long)UNBOX(result)); + STACK_PUSH(sp, result); + DISPATCH(); +} + +void op_unit_end(DECL_STATE) { + ip++; + insn *next_unit = ip->target; + + VM_DEBUG("UNIT_END: next_unit=%p\n", (void *)next_unit); + + if (next_unit) { + ip = next_unit; + DISPATCH_JUMP(); + } + // If no next unit, just fall through (return) + VM_DEBUG("UNIT_END: no next unit, returning\n"); + + return; +} + +#ifdef DEBUG_PRINT +void op_line(DECL_STATE) { + ip++; + int32_t line = ip->num; + fprintf(stderr, "LINE %d\n", line); + (void)line; + DISPATCH(); +} +#else +void op_line(DECL_STATE) { + ip++; + DISPATCH(); +} +#endif diff --git a/virtual_machine/ops.h b/virtual_machine/ops.h new file mode 100644 index 000000000..2ed1c58a1 --- /dev/null +++ b/virtual_machine/ops.h @@ -0,0 +1,69 @@ +#ifndef OPS_H +#define OPS_H + +#include "insn.h" + +void op_add(DECL_STATE); +void op_sub(DECL_STATE); +void op_mul(DECL_STATE); +void op_div(DECL_STATE); +void op_mod(DECL_STATE); + +void op_lt(DECL_STATE); +void op_le(DECL_STATE); +void op_gt(DECL_STATE); +void op_ge(DECL_STATE); +void op_eq(DECL_STATE); +void op_ne(DECL_STATE); + +void op_and(DECL_STATE); +void op_or(DECL_STATE); + +void op_const(DECL_STATE); +void op_drop(DECL_STATE); +void op_dup(DECL_STATE); +void op_swap(DECL_STATE); + +void op_elem(DECL_STATE); +void op_sta(DECL_STATE); +void op_string(DECL_STATE); +void op_barray(DECL_STATE); +void op_sexp(DECL_STATE); +void op_tag(DECL_STATE); +void op_array(DECL_STATE); + +void op_jmp(DECL_STATE); +void op_cjmp_z(DECL_STATE); +void op_cjmp_nz(DECL_STATE); +void op_fail(DECL_STATE); + +void op_patt_str_cmp(DECL_STATE); +void op_patt_string(DECL_STATE); +void op_patt_array(DECL_STATE); +void op_patt_sexp(DECL_STATE); +void op_patt_boxed(DECL_STATE); +void op_patt_unboxed(DECL_STATE); +void op_patt_closure(DECL_STATE); + +void op_ld_glo(DECL_STATE); +void op_st_glo(DECL_STATE); +void op_ld_loc(DECL_STATE); +void op_st_loc(DECL_STATE); +void op_ld_arg(DECL_STATE); +void op_st_arg(DECL_STATE); +void op_ld_clo(DECL_STATE); +void op_st_clo(DECL_STATE); + +void op_begin(DECL_STATE); +void op_call(DECL_STATE); +void op_callc(DECL_STATE); +void op_end(DECL_STATE); +void op_closure(DECL_STATE); +void op_callc_ffi_stub(DECL_STATE); +void op_call_ffi_stub(DECL_STATE); + +void op_unit_end(DECL_STATE); + +void op_line(DECL_STATE); + +#endif // OPS_H diff --git a/virtual_machine/symbols.c b/virtual_machine/symbols.c new file mode 100644 index 000000000..485bec05d --- /dev/null +++ b/virtual_machine/symbols.c @@ -0,0 +1,66 @@ +#include "symbols.h" +#include "da.h" +#include "memory.h" +#include +#include +#include + +struct symbol_table { + resolved_symbol *data; + size_t len; + size_t cap; +}; + +symbol_table *symbol_table_create(void) { + symbol_table *table = ALLOC(symbol_table); + da_init(*table); + return table; +} + +void symbol_table_destroy(symbol_table *table) { + da_free(*table); + free(table); +} + +resolved_symbol *symbol_table_find(symbol_table *table, const char *name) { + for (size_t i = 0; i < table->len; i++) { + if (strcmp(table->data[i].name, name) == 0) { + return &table->data[i]; + } + } + return NULL; +} + +static int symbol_table_add(symbol_table *table, const char *name, + bool is_function, int32_t idx) { + + // Allow duplicate "main" (each bytecode has one) + if (strcmp(name, "main") != 0) { + resolved_symbol *existing = symbol_table_find(table, name); + if (existing) { + fprintf(stderr, "Error: Duplicate symbol '%s' found in symbol table\n", + name); + exit(EXIT_FAILURE); + } + } + + resolved_symbol entry = { + .name = name, + .is_function = is_function, + .idx = idx, + }; + + da_append(*table, entry); + + return 0; +} + +int symbol_table_add_function(symbol_table *table, const char *name, + int32_t code_idx) { + return symbol_table_add(table, name, true, code_idx); +} + +int symbol_table_add_global(symbol_table *table, const char *name, + int32_t global_idx) { + return symbol_table_add(table, name, false, global_idx); +} diff --git a/virtual_machine/symbols.h b/virtual_machine/symbols.h new file mode 100644 index 000000000..d16f9d17c --- /dev/null +++ b/virtual_machine/symbols.h @@ -0,0 +1,35 @@ +#ifndef SYMBOLS_H +#define SYMBOLS_H + +#include "insn.h" +#include +#include +#include + +/* + * Resolved symbol — represents a public function or global variable + * that has been registered by the linker after decoding a unit. + */ +typedef struct { + const char *name; // Symbol name (points into bytecode's string table) + bool is_function; // true = function, false = global variable + // For functions: index into the final code array + // For globals: rebased global index (stack position) + int32_t idx; +} resolved_symbol; + +/* + * Maps symbol names to resolved symbols (functions or globals). + * Used by the linker to resolve stubs after all units are decoded. + */ +typedef struct symbol_table symbol_table; + +symbol_table *symbol_table_create(void); +void symbol_table_destroy(symbol_table *table); +resolved_symbol *symbol_table_find(symbol_table *table, const char *name); +int symbol_table_add_function(symbol_table *table, const char *name, + int32_t code_index); +int symbol_table_add_global(symbol_table *table, const char *name, + int32_t global_idx); + +#endif // SYMBOLS_H diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index e737cfdfc..354e90c82 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -1,40 +1,70 @@ #include "vm.h" +#include "../runtime/gc.h" #include "../runtime/runtime_common.h" -#include "arena.h" #include "decoder.h" #include "linker.h" -#include "module_manager.h" +#include "loader.h" +#include "memory.h" +#include #include #include #include -extern void set_args(aint argc, char *argv[]); extern size_t __gc_stack_top, __gc_stack_bottom; +extern void set_args(aint argc, char *argv[]); + +struct virtual_machine { + bytecode **bc_arr; // Array of loaded bytecode units + size_t bc_len; + insn *entry_point; // Entry point instruction + size_t total_globals; // Number of globals +}; -virtual_machine *vm_create(const char *main_module_path, - const search_paths *paths) { +virtual_machine *vm_create(const char *main_unit_path, const char **paths, + size_t total_paths_len) { + __gc_init(); + search_paths search_paths = {.paths = paths, .len = total_paths_len}; - // TODO: estimates - memory *mem = memory_create(1024 * 1024, 4096); - virtual_machine *vm = ARENA_NEW(mem->main, virtual_machine); + virtual_machine *vm = ALLOC(virtual_machine); - module_manager *mm = load_modules(main_module_path, paths, mem); - if (!mm) { - memory_destroy(mem); + bytecode **bc_arr = load(main_unit_path, &search_paths, &vm->bc_len); + if (!bc_arr) { + free(vm); + return NULL; + } + vm->bc_arr = bc_arr; + decoded **decoded_arr = decode(bc_arr, vm->bc_len); + if (!decoded_arr) { + for (size_t i = 0; i < vm->bc_len; i++) { + bytecode_free(bc_arr[i]); + } + free(bc_arr); + free(vm); return NULL; } - vm->globals_count = mm->total_globals_count; + program *prog = link(bc_arr, decoded_arr, vm->bc_len); - insn *entry_point = decode_and_link(mm, mem); - vm->entry_point = entry_point; + vm->total_globals = prog->total_globals; + vm->entry_point = prog->code; - vm->mem = mem; + free(prog); return vm; } -void vm_destroy(virtual_machine *vm) { memory_destroy(vm->mem); } +void vm_destroy(virtual_machine *vm) { + for (size_t i = 0; i < vm->bc_len; i++) { + bytecode_free(vm->bc_arr[i]); + } + free(vm->bc_arr); + free(vm->entry_point); + free(vm); +} + +void vm_set_args(virtual_machine *vm, int argc, char *argv[]) { + set_args(argc, argv); +} aint vm_run(virtual_machine *vm) { diff --git a/virtual_machine/vm.h b/virtual_machine/vm.h index d84619a3b..79746e283 100644 --- a/virtual_machine/vm.h +++ b/virtual_machine/vm.h @@ -1,23 +1,18 @@ #ifndef VM_H #define VM_H -#include "arena.h" -#include "decoder.h" -#include "module_manager.h" +#include "../runtime/runtime_common.h" #include -typedef struct { - size_t globals_count; // Number of globals - insn *entry_point; // Entry point instruction - memory *mem; // Memory managed by arenas +typedef struct virtual_machine virtual_machine; -} virtual_machine; - -virtual_machine *vm_create(const char *main_module_path, - const search_paths *paths); +virtual_machine *vm_create(const char *main_unit_path, const char **paths, + size_t total_paths_len); void vm_destroy(virtual_machine *vm); +void vm_set_args(virtual_machine *vm, int argc, char *argv[]); + aint vm_run(virtual_machine *vm); #endif // VM_H From 1be099261a01353f7ab73a912f3613b9baea0ec6 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 13 Feb 2026 13:17:52 +0300 Subject: [PATCH 030/141] store public symbol flag as 1 byte --- src/SM.ml | 4 ++-- virtual_machine/bytecode.c | 4 ++-- virtual_machine/bytecode.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/SM.ml b/src/SM.ml index c1b527738..891eb30b4 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -404,7 +404,7 @@ module ByteCode = struct with Not_found -> failwith (Printf.sprintf "ERROR: undefined label of public '%s'" name) in - (Int32.of_int @@ StringTab.add st name, Int32.of_int pos, Int32.of_int flag)) + (Int32.of_int @@ StringTab.add st name, Int32.of_int pos, flag)) !pubs in let imports = @@ -422,7 +422,7 @@ module ByteCode = struct (fun (name_off, offset, flag) -> Buffer.add_int32_le file name_off; Buffer.add_int32_le file offset; - Buffer.add_int32_le file flag) + Buffer.add_char file (Char.chr flag)) pubs_resolved; Buffer.add_bytes file code; let f = open_out_bin (Printf.sprintf "%s.bc" cmd#basename) in diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index 79c551af0..3f4359cc0 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -12,7 +12,7 @@ #include #define HEADER_SIZE 16 -#define PUB_ENTRY_SIZE 12 +#define PUB_ENTRY_SIZE 9 #define IMPORT_ENTRY_SIZE 4 bytecode *bytecode_load(const char *filename) { @@ -80,7 +80,7 @@ bytecode *bytecode_load(const char *filename) { for (int32_t i = 0; i < num_pubs; i++) { int32_t name_offset = reader_i32(&reader); int32_t code_off = reader_i32(&reader); - int32_t flag = reader_i32(&reader); + uint8_t flag = reader_u8(&reader); bc->public_symbols.data[i].name = string_table + name_offset; bc->public_symbols.data[i].code_offset = code_off; diff --git a/virtual_machine/bytecode.h b/virtual_machine/bytecode.h index e8b41d5e6..dea284716 100644 --- a/virtual_machine/bytecode.h +++ b/virtual_machine/bytecode.h @@ -12,7 +12,7 @@ typedef struct { const char *name; // Direct pointer to string int32_t code_offset; // Offset into bytecode section (for functions) or global // index - int32_t flag; // PUB_FLAG_FUNCTION or PUB_FLAG_GLOBAL + uint8_t flag; // PUB_FLAG_FUNCTION or PUB_FLAG_GLOBAL } public_symbol; typedef struct { From 90b2d62b1b3cf8fa61a5b62db29140adf6ae1d69 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 13 Feb 2026 13:47:18 +0300 Subject: [PATCH 031/141] init globals to 0 --- virtual_machine/vm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index 354e90c82..0b41b6cda 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -79,9 +79,9 @@ aint vm_run(virtual_machine *vm) { // Globals at the top of stack aint *globals = stack_data; - - extern void *global_sysargs; - globals[0] = (aint)global_sysargs; + for (size_t i = 0; i < vm->total_globals; i++) { + globals[i] = 0; + } aint *sp = &stack_data[active_stack_size - 1]; aint *bp = sp; From 61a754017795c5596cc47674e77a9c71fdd08e9a Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 13 Feb 2026 16:40:33 +0300 Subject: [PATCH 032/141] change `loader.c` to handle cyclic import --- virtual_machine/decoder.c | 14 ----- virtual_machine/decoder.h | 1 - virtual_machine/linker.c | 13 +++-- virtual_machine/linker.h | 2 + virtual_machine/loader.c | 105 ++++++++++++++++++++++++++++---------- virtual_machine/loader.h | 10 +++- virtual_machine/ops.c | 28 +--------- virtual_machine/ops.h | 2 - virtual_machine/vm.c | 44 +++++++++++----- 9 files changed, 126 insertions(+), 93 deletions(-) diff --git a/virtual_machine/decoder.c b/virtual_machine/decoder.c index db0fcc4e8..28cb6c1f8 100644 --- a/virtual_machine/decoder.c +++ b/virtual_machine/decoder.c @@ -86,7 +86,6 @@ typedef struct { size_t code_len; byte_reader reader; size_t global_offset; - size_t unit_end_idx; struct { stub *data; @@ -112,7 +111,6 @@ decode_ctx *decode_ctx_create(const bytecode *bc, int32_t global_offset) { ctx->code = NULL; ctx->code_len = 0; ctx->global_offset = global_offset; - ctx->unit_end_idx = -1; ctx->bc_to_insn_map = NULL; da_init(ctx->stubs); @@ -258,7 +256,6 @@ static insn *decode_internal(decode_ctx *ctx) { } int32_t depth = 0; - bool first_end_seen = false; while (!reader_eof(&ctx->reader)) { size_t current_bc_off = reader_pos(&ctx->reader); @@ -699,16 +696,6 @@ static insn *decode_internal(decode_ctx *ctx) { case OP_END: EMIT_FUNC(ctx, op_end); DEPTH_DEAD(depth); - - // After the first END (main function's end), emit unit bridge - if (!first_end_seen) { - first_end_seen = true; - VM_DEBUG("DECODE: First END detected, emitting op_unit_end bridge\n"); - ctx->unit_end_idx = ctx->code_len; - EMIT_FUNC(ctx, op_unit_end); - // Will be patched by linker - EMIT_TARGET(ctx, NULL); - } break; case OP_LINE: { @@ -779,7 +766,6 @@ decoded **decode(bytecode **bc_arr, size_t n) { .code_len = ctx->code_len, .stubs = ctx->stubs.data, .stubs_len = ctx->stubs.len, - .unit_end_idx = ctx->unit_end_idx, .bc_to_insn_map = ctx->bc_to_insn_map, .relocs = ctx->relocs.data, .relocs_len = ctx->relocs.len, diff --git a/virtual_machine/decoder.h b/virtual_machine/decoder.h index 7f7896c32..8f6aa06e3 100644 --- a/virtual_machine/decoder.h +++ b/virtual_machine/decoder.h @@ -31,7 +31,6 @@ typedef struct { size_t code_len; stub *stubs; // Fixups for the linker to resolve size_t stubs_len; - size_t unit_end_idx; // Index of op_unit_end in code[] (-1 if none) int32_t *bc_to_insn_map; size_t *relocs; // Indices of insn with internal target offsets size_t relocs_len; diff --git a/virtual_machine/linker.c b/virtual_machine/linker.c index 46e46d159..60954d37f 100644 --- a/virtual_machine/linker.c +++ b/virtual_machine/linker.c @@ -113,6 +113,7 @@ program *link(bytecode **bc_arr, decoded **dec_arr, size_t n) { } insn *all_code = ALLOC_ARRAY(insn, total_code_len); + insn **entry_points = ALLOC_ARRAY(insn *, n); size_t code_offset = 0; for (size_t i = 0; i < n; i++) { @@ -120,6 +121,8 @@ program *link(bytecode **bc_arr, decoded **dec_arr, size_t n) { memcpy(all_code + code_offset, dec->code, dec->code_len * sizeof(insn)); + entry_points[i] = &all_code[code_offset]; + // Resolve internal jumps for (size_t j = 0; j < dec->relocs_len; j++) { size_t slot = dec->relocs[j]; @@ -130,13 +133,6 @@ program *link(bytecode **bc_arr, decoded **dec_arr, size_t n) { // Resolve all stubs resolve_stubs(dec, all_code, code_offset, st, ffi_stubs); - // Link main() functions across units - if (i < n - 1 && dec->unit_end_idx != (size_t)-1) { - size_t next_offset = code_offset + dec->code_len; - all_code[code_offset + dec->unit_end_idx + 1].target = - &all_code[next_offset]; - } - code_offset += dec->code_len; } @@ -144,6 +140,8 @@ program *link(bytecode **bc_arr, decoded **dec_arr, size_t n) { prog->code = all_code; prog->code_len = total_code_len; prog->total_globals = total_globals; + prog->entry_points = entry_points; + prog->entry_points_len = n; symbol_table_destroy(st); ffi_call_table_destroy(ffi_stubs); @@ -159,6 +157,7 @@ program *link(bytecode **bc_arr, decoded **dec_arr, size_t n) { void prog_free(program *prog) { if (prog) { free(prog->code); + free(prog->entry_points); free(prog); } } diff --git a/virtual_machine/linker.h b/virtual_machine/linker.h index f7739b86d..433474d83 100644 --- a/virtual_machine/linker.h +++ b/virtual_machine/linker.h @@ -10,6 +10,8 @@ typedef struct { insn *code; size_t code_len; size_t total_globals; + insn **entry_points; // Entry point for each unit (pointer into code) + size_t entry_points_len; } program; program *link(bytecode **bc_arr, decoded **dec_arr, size_t n); diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c index 7764f3505..03f1d7e60 100644 --- a/virtual_machine/loader.c +++ b/virtual_machine/loader.c @@ -22,6 +22,12 @@ typedef struct { size_t cap; } bytecode_array; +typedef struct { + size_t *data; // Indices into the loaded bytecode_array + size_t len; + size_t cap; +} exec_order; + /* * Build the path to a unit's .bc file by searching through paths. */ @@ -48,13 +54,16 @@ static bool is_filepath(const char *str) { (len > 3 && strcmp(str + len - 3, ".bc") == 0); } -static bool find_loaded(bytecode_array *units, const char *name) { +/* + * Find a loaded unit by name. Returns its index, or (size_t)-1 if not found. + */ +static size_t find_loaded(bytecode_array *units, const char *name) { for (size_t i = 0; i < units->len; i++) { if (strcmp(units->data[i]->name, name) == 0) { - return true; + return i; } } - return false; + return (size_t)-1; } /* @@ -77,21 +86,14 @@ static char *extract_unit_name(const char *filename) { /* * Load a single unit and its dependencies recursively. */ -static bool load_unit_recursive(bytecode_array *units, const char *s, - const search_paths *paths) { - char *filepath = NULL; - char *unit_name = NULL; - - // The initial call uses a filepath, recursive calls use unit names - if (is_filepath(s)) { - filepath = ESTRDUP(s); - unit_name = extract_unit_name(s); - } else { - filepath = build_unit_path(s, paths); - unit_name = ESTRDUP(s); - } +static bool load_unit_recursive(bytecode_array *units, exec_order *order, + const char *s, const search_paths *paths) { + + char *filepath = build_unit_path(s, paths); + char *unit_name = ESTRDUP(s); - if (find_loaded(units, unit_name)) { + size_t existing = find_loaded(units, unit_name); + if (existing != (size_t)-1) { free(filepath); free(unit_name); return true; @@ -107,7 +109,10 @@ static bool load_unit_recursive(bytecode_array *units, const char *s, } bc->name = unit_name; - // Recursively load dependencies first (topological order) + size_t my_idx = units->len; + da_append(*units, bc); + + // Recursively load dependencies for (size_t i = 0; i < bc->imports.len; i++) { const char *import_name = bc->imports.data[i]; @@ -116,22 +121,70 @@ static bool load_unit_recursive(bytecode_array *units, const char *s, continue; } - load_unit_recursive(units, import_name, paths); + if (!load_unit_recursive(units, order, import_name, paths)) { + free(filepath); + return false; + } } - da_append(*units, bc); + da_append(*order, my_idx); free(filepath); return true; } -bytecode **load(const char *main_unit_path, const search_paths *paths, - size_t *out_len) { +static bytecode *load_main_unit(const char *path) { + char *filepath = ESTRDUP(path); + char *unit_name = extract_unit_name(path); + bytecode *bc = bytecode_load(filepath); + if (!bc) { + fprintf(stderr, "Failed to load main unit from '%s'\n", filepath); + exit(EXIT_FAILURE); + } + + bc->name = unit_name; + free(filepath); + return bc; +} + +load_result load(const char *main_unit_path, const search_paths *paths) { + load_result result = {0}; bytecode_array m; da_init(m); + exec_order order; + da_init(order); + + bytecode *bc = load_main_unit(main_unit_path); + + for (size_t i = 0; i < bc->imports.len; i++) { + const char *import_name = bc->imports.data[i]; + + // Skip Std since we have it as runtime.a + if (strcmp(import_name, "Std") == 0) { + continue; + } - if (!load_unit_recursive(&m, main_unit_path, paths)) { - return NULL; + if (!load_unit_recursive(&m, &order, import_name, paths)) { + free(order.data); + return result; + } } - *out_len = m.len; - return m.data; + + // Check if main unit was already loaded as a dependency + // NOTE: this is all done to comply with the semantics of the reference + // implementation which allows main module to execute twice (if it's imported + // by one of its dependencies). + size_t main_idx = find_loaded(&m, bc->name); + if (main_idx == (size_t)-1) { + main_idx = m.len; + da_append(m, bc); + } else { + bytecode_free(bc); + } + da_append(order, main_idx); + + result.units = m.data; + result.units_len = m.len; + result.exec_order = order.data; + result.exec_order_len = order.len; + return result; } diff --git a/virtual_machine/loader.h b/virtual_machine/loader.h index 2827584f4..22dcd3ee1 100644 --- a/virtual_machine/loader.h +++ b/virtual_machine/loader.h @@ -12,7 +12,13 @@ typedef struct { size_t len; } search_paths; -bytecode **load(const char *main_unit_path, const search_paths *paths, - size_t *out_len); +typedef struct { + bytecode **units; // Array of unique loaded bytecode units + size_t units_len; // Number of unique units + size_t *exec_order; // Indices into units[], in execution order + size_t exec_order_len; // Length of exec_order (may be units_len + 1) +} load_result; + +load_result load(const char *main_unit_path, const search_paths *paths); #endif // LOADER_H diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 71ecae15c..b5e4768ba 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -515,8 +515,6 @@ void op_call(DECL_STATE) { DISPATCH(); } -void op_unit_end(DECL_STATE); - void op_callc(DECL_STATE) { ip++; int32_t n_args = ip->num; @@ -551,15 +549,6 @@ void op_end(DECL_STATE) { VM_TRACE_CALL("END sp=%p\n", (void *)sp); aint ret_val = STACK_PEEK(sp); *bp = ret_val; - // If a unit_end bridge follows, jump to it. - // Otherwise, return to finish execution. - insn *next = ip + 1; - if (next && next->func == op_unit_end) { - VM_DEBUG("END: jumping to unit_end bridge at %p\n", (void *)next); - ip = next; - DISPATCH_JUMP(); - } - VM_DEBUG("END: returning (no unit bridge)\n"); return; } @@ -594,6 +583,7 @@ void op_callc_ffi_stub(DECL_STATE) { return; } + void op_closure(DECL_STATE) { ip++; insn *target = ip->target; @@ -640,22 +630,6 @@ void op_call_ffi_stub(DECL_STATE) { DISPATCH(); } -void op_unit_end(DECL_STATE) { - ip++; - insn *next_unit = ip->target; - - VM_DEBUG("UNIT_END: next_unit=%p\n", (void *)next_unit); - - if (next_unit) { - ip = next_unit; - DISPATCH_JUMP(); - } - // If no next unit, just fall through (return) - VM_DEBUG("UNIT_END: no next unit, returning\n"); - - return; -} - #ifdef DEBUG_PRINT void op_line(DECL_STATE) { ip++; diff --git a/virtual_machine/ops.h b/virtual_machine/ops.h index 2ed1c58a1..d877cb730 100644 --- a/virtual_machine/ops.h +++ b/virtual_machine/ops.h @@ -62,8 +62,6 @@ void op_closure(DECL_STATE); void op_callc_ffi_stub(DECL_STATE); void op_call_ffi_stub(DECL_STATE); -void op_unit_end(DECL_STATE); - void op_line(DECL_STATE); #endif // OPS_H diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index 0b41b6cda..97242ed2b 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -14,10 +14,14 @@ extern size_t __gc_stack_top, __gc_stack_bottom; extern void set_args(aint argc, char *argv[]); struct virtual_machine { - bytecode **bc_arr; // Array of loaded bytecode units + bytecode **bc_arr; // Array of unique loaded bytecode units size_t bc_len; - insn *entry_point; // Entry point instruction - size_t total_globals; // Number of globals + insn *code; // Contiguous code array + insn **entry_points; // Entry point for each unique unit + size_t entry_points_len; + size_t *exec_order; // Indices into entry_points[], execution order + size_t exec_order_len; + size_t total_globals; }; virtual_machine *vm_create(const char *main_unit_path, const char **paths, @@ -27,26 +31,33 @@ virtual_machine *vm_create(const char *main_unit_path, const char **paths, virtual_machine *vm = ALLOC(virtual_machine); - bytecode **bc_arr = load(main_unit_path, &search_paths, &vm->bc_len); - if (!bc_arr) { + load_result lr = load(main_unit_path, &search_paths); + if (!lr.units) { free(vm); return NULL; } - vm->bc_arr = bc_arr; - decoded **decoded_arr = decode(bc_arr, vm->bc_len); + vm->bc_arr = lr.units; + vm->bc_len = lr.units_len; + + decoded **decoded_arr = decode(lr.units, lr.units_len); if (!decoded_arr) { for (size_t i = 0; i < vm->bc_len; i++) { - bytecode_free(bc_arr[i]); + bytecode_free(lr.units[i]); } - free(bc_arr); + free(lr.units); + free(lr.exec_order); free(vm); return NULL; } - program *prog = link(bc_arr, decoded_arr, vm->bc_len); + program *prog = link(lr.units, decoded_arr, lr.units_len); vm->total_globals = prog->total_globals; - vm->entry_point = prog->code; + vm->code = prog->code; + vm->entry_points = prog->entry_points; + vm->entry_points_len = prog->entry_points_len; + vm->exec_order = lr.exec_order; + vm->exec_order_len = lr.exec_order_len; free(prog); @@ -58,7 +69,9 @@ void vm_destroy(virtual_machine *vm) { bytecode_free(vm->bc_arr[i]); } free(vm->bc_arr); - free(vm->entry_point); + free(vm->code); + free(vm->entry_points); + free(vm->exec_order); free(vm); } @@ -86,9 +99,12 @@ aint vm_run(virtual_machine *vm) { aint *sp = &stack_data[active_stack_size - 1]; aint *bp = sp; - insn *ip = vm->entry_point; + for (size_t i = 0; i < vm->exec_order_len; i++) { + size_t unit_idx = vm->exec_order[i]; + insn *ip = vm->entry_points[unit_idx]; - ip->func(ip, sp, bp, globals); + ip->func(ip, sp, bp, globals); + } return *bp; } From 75d27f977e73606215afe2e4d70d472cf22351d9 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 13 Feb 2026 17:15:43 +0300 Subject: [PATCH 033/141] support external globals --- virtual_machine/decoder.c | 16 ++++++++++++---- virtual_machine/decoder.h | 7 ++++++- virtual_machine/insn.h | 1 + virtual_machine/linker.c | 25 ++++++++++++++++++++++--- virtual_machine/ops.c | 20 ++++++++++++++++++++ virtual_machine/ops.h | 2 ++ 6 files changed, 63 insertions(+), 8 deletions(-) diff --git a/virtual_machine/decoder.c b/virtual_machine/decoder.c index 28cb6c1f8..fd2e2991f 100644 --- a/virtual_machine/decoder.c +++ b/virtual_machine/decoder.c @@ -67,6 +67,14 @@ fn decoder_get_op_call_ffi_stub(void) { return op_call_ffi_stub; } fn decoder_get_op_callc_ffi_stub(void) { return op_callc_ffi_stub; } +fn decoder_get_op_ld_glo(void) { return op_ld_glo; } + +fn decoder_get_op_st_glo(void) { return op_st_glo; } + +fn decoder_get_op_ld_glo_ext(void) { return op_ld_glo_ext; } + +fn decoder_get_op_st_glo_ext(void) { return op_st_glo_ext; } + typedef struct fixup_node { size_t insn_idx; // Index in code array that needs the jump target struct fixup_node *next; @@ -169,10 +177,10 @@ static bool emit_ld_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { int str_offset = EXT_REF_INDEX(idx); const char *glob_name = bytecode_get_string(bc, str_offset); VM_DEBUG("DECODE: OP_LD external global '%s' (stub)\n", glob_name); - EMIT_FUNC(ctx, op_ld_glo); + EMIT_FUNC(ctx, NULL); // linker will patch this size_t patch_idx = ctx->code_len; EMIT_NUM(ctx, 0); // placeholder — linker will patch - add_stub(ctx, patch_idx, glob_name, STUB_GLOBAL); + add_stub(ctx, patch_idx, glob_name, STUB_GLOBAL_LD); } else { EMIT_FUNC(ctx, op_ld_glo); EMIT_NUM(ctx, global_base + idx); @@ -187,10 +195,10 @@ static bool emit_st_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { int str_offset = EXT_REF_INDEX(idx); const char *glob_name = bytecode_get_string(bc, str_offset); VM_DEBUG("DECODE: OP_ST external global '%s' (stub)\n", glob_name); - EMIT_FUNC(ctx, op_st_glo); + EMIT_FUNC(ctx, NULL); // linker will patch this size_t patch_idx = ctx->code_len; EMIT_NUM(ctx, 0); // placeholder — linker will patch - add_stub(ctx, patch_idx, glob_name, STUB_GLOBAL); + add_stub(ctx, patch_idx, glob_name, STUB_GLOBAL_ST); } else { EMIT_FUNC(ctx, op_st_glo); EMIT_NUM(ctx, global_base + idx); diff --git a/virtual_machine/decoder.h b/virtual_machine/decoder.h index 8f6aa06e3..44fec3766 100644 --- a/virtual_machine/decoder.h +++ b/virtual_machine/decoder.h @@ -11,7 +11,8 @@ typedef enum { STUB_CALL, STUB_CLOSURE, - STUB_GLOBAL, + STUB_GLOBAL_LD, + STUB_GLOBAL_ST, } stub_kind; /* @@ -45,5 +46,9 @@ void decoded_free(decoded *dec); fn decoder_get_op_call(void); fn decoder_get_op_call_ffi_stub(void); fn decoder_get_op_callc_ffi_stub(void); +fn decoder_get_op_ld_glo(void); +fn decoder_get_op_st_glo(void); +fn decoder_get_op_ld_glo_ext(void); +fn decoder_get_op_st_glo_ext(void); #endif // DECODER_NEW_H diff --git a/virtual_machine/insn.h b/virtual_machine/insn.h index 3c0419c2e..6ad2d521a 100644 --- a/virtual_machine/insn.h +++ b/virtual_machine/insn.h @@ -26,6 +26,7 @@ typedef union insn { int32_t num; // Integer operand (signed) const char *str; // String operand (direct pointer) union insn *target; // Direct jump target (pointer to insn) + aint *global_ptr; // Pointer to a C global variable } insn; #endif // INSN_H diff --git a/virtual_machine/linker.c b/virtual_machine/linker.c index 60954d37f..9732530bb 100644 --- a/virtual_machine/linker.c +++ b/virtual_machine/linker.c @@ -4,6 +4,7 @@ #include "ffi.h" #include "memory.h" #include "symbols.h" +#include #include #include #include @@ -82,13 +83,31 @@ static void resolve_stubs(decoded *dec, insn *all_code, size_t code_offset, break; } - case STUB_GLOBAL: { + case STUB_GLOBAL_LD: + case STUB_GLOBAL_ST: { resolved_symbol *sym = symbol_table_find(st, s->name); if (sym && !sym->is_function) { + // Global from another unit + if (s->kind == STUB_GLOBAL_LD) { + code[pi - 1].func = decoder_get_op_ld_glo(); + } else { + code[pi - 1].func = decoder_get_op_st_glo(); + } code[pi].num = sym->idx; } else { - // TODO: C globals - exit(EXIT_FAILURE); + // C global + void *ptr = dlsym(RTLD_DEFAULT, s->name); + if (ptr) { + if (s->kind == STUB_GLOBAL_LD) { + code[pi - 1].func = decoder_get_op_ld_glo_ext(); + } else { + code[pi - 1].func = decoder_get_op_st_glo_ext(); + } + code[pi].global_ptr = (aint *)ptr; + } else { + fprintf(stderr, "Error: unresolved global '%s'\n", s->name); + exit(EXIT_FAILURE); + } } break; } diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index b5e4768ba..f23fff86b 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -410,6 +410,26 @@ void op_st_glo(DECL_STATE) { DISPATCH(); } +/* + * Load / store extenral globals + */ +void op_ld_glo_ext(DECL_STATE) { + ip++; + aint *ptr = ip->global_ptr; + VM_DEBUG("LD_GLO_FFI ptr=%p val=%ld\n", (void *)ptr, (long)*ptr); + STACK_PUSH(sp, *ptr); + DISPATCH(); +} + +void op_st_glo_ext(DECL_STATE) { + ip++; + aint *ptr = ip->global_ptr; + aint val = STACK_PEEK(sp); + VM_DEBUG("ST_GLO_FFI ptr=%p val=%ld\n", (void *)ptr, (long)val); + *ptr = val; + DISPATCH(); +} + void op_ld_loc(DECL_STATE) { ip++; int32_t idx = ip->num; diff --git a/virtual_machine/ops.h b/virtual_machine/ops.h index d877cb730..d8f9b04e5 100644 --- a/virtual_machine/ops.h +++ b/virtual_machine/ops.h @@ -61,6 +61,8 @@ void op_end(DECL_STATE); void op_closure(DECL_STATE); void op_callc_ffi_stub(DECL_STATE); void op_call_ffi_stub(DECL_STATE); +void op_ld_glo_ext(DECL_STATE); +void op_st_glo_ext(DECL_STATE); void op_line(DECL_STATE); From e91a1d1684f6de19e03c6e6a1dbb2cea5e29f730 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 13 Feb 2026 19:27:38 +0300 Subject: [PATCH 034/141] remove `is_filepath` --- virtual_machine/loader.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c index 03f1d7e60..aca29c4d4 100644 --- a/virtual_machine/loader.c +++ b/virtual_machine/loader.c @@ -45,15 +45,6 @@ static char *build_unit_path(const char *unit_name, const search_paths *paths) { return NULL; } -/* - * Check if a string looks like a file path (contains '/' or ends with '.bc') - */ -static bool is_filepath(const char *str) { - size_t len = strlen(str); - return strchr(str, '/') != NULL || - (len > 3 && strcmp(str + len - 3, ".bc") == 0); -} - /* * Find a loaded unit by name. Returns its index, or (size_t)-1 if not found. */ From 8752d736f8c6c04b61f3bc39f90b9ea71082c73d Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 15 Feb 2026 13:31:31 +0300 Subject: [PATCH 035/141] cleanup --- virtual_machine/Makefile | 2 +- virtual_machine/bytecode.c | 9 ++++-- virtual_machine/bytecode.h | 6 ++-- virtual_machine/decoder.c | 13 ++------ virtual_machine/decoder.h | 2 +- virtual_machine/ffi.c | 4 +++ virtual_machine/lama.c | 30 +++++++++++-------- virtual_machine/linker.c | 15 +++++----- virtual_machine/linker.h | 6 ++-- virtual_machine/loader.c | 3 +- virtual_machine/opcodes.h | 1 - virtual_machine/ops.c | 4 +-- virtual_machine/{bytecode_util.h => reader.h} | 6 ++-- virtual_machine/symbols.c | 9 ++++-- virtual_machine/vm.c | 5 +++- 15 files changed, 63 insertions(+), 52 deletions(-) rename virtual_machine/{bytecode_util.h => reader.h} (94%) diff --git a/virtual_machine/Makefile b/virtual_machine/Makefile index 9de59c012..efe9e0ddd 100644 --- a/virtual_machine/Makefile +++ b/virtual_machine/Makefile @@ -12,7 +12,7 @@ RUNTIME_LIB = $(RUNTIME_DIR)/runtime.a all: $(TARGET) -debug: CFLAGS += -DDEBUG_PRINT -g3 -Og -O0 +debug: CFLAGS += -DDEBUG_PRINT -g3 -Og -O0 -fsanitize=address debug: $(TARGET) $(TARGET): $(OBJECTS) $(RUNTIME_LIB) diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index 3f4359cc0..b2d8248ca 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -1,7 +1,7 @@ #define _POSIX_C_SOURCE 200809L #include "bytecode.h" -#include "bytecode_util.h" #include "memory.h" +#include "reader.h" #include #include #include @@ -107,8 +107,11 @@ bytecode *bytecode_load(const char *filename) { return bc; } -void bytecode_free(bytecode *bc) { - munmap(bc->map_base, bc->map_size); +void bytecode_free(bytecode *bc) { + if (!bc) { + return; + } + munmap(bc->map_base, bc->map_size); free(bc->public_symbols.data); free(bc->imports.data); free((void *)bc->name); diff --git a/virtual_machine/bytecode.h b/virtual_machine/bytecode.h index dea284716..25f39235d 100644 --- a/virtual_machine/bytecode.h +++ b/virtual_machine/bytecode.h @@ -1,5 +1,5 @@ -#ifndef BYTECODE_NEW_H -#define BYTECODE_NEW_H +#ifndef BYTECODE_H +#define BYTECODE_H #include #include @@ -60,4 +60,4 @@ static inline const char *bytecode_get_string(const bytecode *bc, return bc->string_table + offset; } -#endif // BYTECODE_NEW_H +#endif // BYTECODE_H diff --git a/virtual_machine/decoder.c b/virtual_machine/decoder.c index fd2e2991f..4a85e935b 100644 --- a/virtual_machine/decoder.c +++ b/virtual_machine/decoder.c @@ -750,14 +750,7 @@ static insn *decode_internal(decode_ctx *ctx) { } decoded **decode(bytecode **bc_arr, size_t n) { - typedef struct { - decoded **data; - size_t len; - size_t cap; - } decoded_array; - - decoded_array result; - da_init(result); + decoded **result = ALLOC_ARRAY(decoded *, n); size_t global_offset = 0; @@ -778,12 +771,12 @@ decoded **decode(bytecode **bc_arr, size_t n) { .relocs = ctx->relocs.data, .relocs_len = ctx->relocs.len, }; - da_append(result, dec); + result[i] = dec; global_offset += bc_arr[i]->globals_count; free(ctx); } - return result.data; + return result; } void decoded_free(decoded *dec) { diff --git a/virtual_machine/decoder.h b/virtual_machine/decoder.h index 44fec3766..6f80f91f1 100644 --- a/virtual_machine/decoder.h +++ b/virtual_machine/decoder.h @@ -3,7 +3,7 @@ #include "../runtime/runtime_common.h" #include "bytecode.h" -#include "bytecode_util.h" +#include "reader.h" #include "insn.h" #include #include diff --git a/virtual_machine/ffi.c b/virtual_machine/ffi.c index e3915cbef..d1524daf8 100644 --- a/virtual_machine/ffi.c +++ b/virtual_machine/ffi.c @@ -29,6 +29,9 @@ ffi_call_table *ffi_call_table_create(void) { // Currently frees only table and not stubs themselves since they are needed for // execution void ffi_call_table_destroy(ffi_call_table *table) { + if (!table) { + return; + } da_free(*table); free(table); } @@ -88,6 +91,7 @@ static void *lookup_function(const char *name) { void *fn = dlsym(RTLD_DEFAULT, name); char *error = dlerror(); if (error) { + fprintf(stderr, "Error looking up function '%s': %s\n", name, error); return NULL; } return fn; diff --git a/virtual_machine/lama.c b/virtual_machine/lama.c index 1e208dbe0..1cec60cdb 100644 --- a/virtual_machine/lama.c +++ b/virtual_machine/lama.c @@ -9,14 +9,16 @@ #define MAX_INCLUDE_PATHS 64 -static void print_usage(const char *prog_name) { - printf("Usage: %s [options] [args]\n", prog_name); - printf("\nWhen no options are specified, the VM will run the bytecode file " - "and look for units in the same directory.\n"); - printf("Options:\n"); - printf(" -h, --help Show this help message\n"); - printf(" -I, --include PATH Add PATH to unit search paths (can be " - "used multiple times)\n"); +static void print_usage(FILE *dest, const char *prog_name) { + fprintf(dest, "Usage: %s [options] [args]\n", prog_name); + fprintf(dest, + "\nWhen no options are specified, the VM will run the bytecode file " + "and look for units in the same directory.\n"); + fprintf(dest, "Options:\n"); + fprintf(dest, " -h, --help Show this help message\n"); + fprintf(dest, + " -I, --include PATH Add PATH to unit search paths (can be " + "used multiple times)\n"); } int main(int argc, char *argv[]) { @@ -37,7 +39,7 @@ int main(int argc, char *argv[]) { -1) { switch (opt) { case 'h': - print_usage(argv[0]); + print_usage(stdout, argv[0]); return 0; case 'I': if (include_path_count < MAX_INCLUDE_PATHS) { @@ -49,13 +51,14 @@ int main(int argc, char *argv[]) { } break; default: + print_usage(stdout, argv[0]); return 1; } } if (optind >= argc) { fprintf(stderr, "No bytecode file specified\n\n"); - print_usage(argv[0]); + print_usage(stderr, argv[0]); return 1; } @@ -67,17 +70,18 @@ int main(int argc, char *argv[]) { virtual_machine *vm = vm_create(bytecode_file, (const char **)include_paths, include_path_count); - // Skip options, pass only program args - vm_set_args(vm, argc - optind, argv + optind); if (!vm) { exit_code = 1; goto cleanup; } + // Skip options, pass only program args + vm_set_args(vm, argc - optind, argv + optind); + vm_run(vm); cleanup: - free(bytecode_dir); vm_destroy(vm); + free(bytecode_dir); return exit_code; } diff --git a/virtual_machine/linker.c b/virtual_machine/linker.c index 9732530bb..0a1858fae 100644 --- a/virtual_machine/linker.c +++ b/virtual_machine/linker.c @@ -115,7 +115,7 @@ static void resolve_stubs(decoded *dec, insn *all_code, size_t code_offset, } } -program *link(bytecode **bc_arr, decoded **dec_arr, size_t n) { +program_link *link(bytecode **bc_arr, decoded **dec_arr, size_t n) { symbol_table *st = symbol_table_create(); ffi_call_table *ffi_stubs = ffi_call_table_create(); @@ -155,7 +155,7 @@ program *link(bytecode **bc_arr, decoded **dec_arr, size_t n) { code_offset += dec->code_len; } - program *prog = ALLOC(program); + program_link *prog = ALLOC(program_link); prog->code = all_code; prog->code_len = total_code_len; prog->total_globals = total_globals; @@ -173,10 +173,11 @@ program *link(bytecode **bc_arr, decoded **dec_arr, size_t n) { return prog; } -void prog_free(program *prog) { - if (prog) { - free(prog->code); - free(prog->entry_points); - free(prog); +void program_free(program_link *prog) { + if (!prog) { + return; } + free(prog->code); + free(prog->entry_points); + free(prog); } diff --git a/virtual_machine/linker.h b/virtual_machine/linker.h index 433474d83..9d9ae4431 100644 --- a/virtual_machine/linker.h +++ b/virtual_machine/linker.h @@ -12,10 +12,10 @@ typedef struct { size_t total_globals; insn **entry_points; // Entry point for each unit (pointer into code) size_t entry_points_len; -} program; +} program_link; -program *link(bytecode **bc_arr, decoded **dec_arr, size_t n); +program_link *link(bytecode **bc_arr, decoded **dec_arr, size_t n); -void prog_free(program *prog); +void program_free(program_link *prog); #endif // LINKER_H diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c index aca29c4d4..88da6c051 100644 --- a/virtual_machine/loader.c +++ b/virtual_machine/loader.c @@ -94,10 +94,10 @@ static bool load_unit_recursive(bytecode_array *units, exec_order *order, if (!bc) { fprintf(stderr, "Failed to load dependency '%s' from '%s'\n", unit_name, filepath); - free(filepath); free(unit_name); return false; } + free(filepath); bc->name = unit_name; size_t my_idx = units->len; @@ -119,7 +119,6 @@ static bool load_unit_recursive(bytecode_array *units, exec_order *order, } da_append(*order, my_idx); - free(filepath); return true; } diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h index 6dcc4c39f..fe8a44182 100755 --- a/virtual_machine/opcodes.h +++ b/virtual_machine/opcodes.h @@ -53,7 +53,6 @@ typedef enum { OP_PATT_BOXED = 0x64, OP_PATT_UNBOXED = 0x65, OP_PATT_CLOSURE = 0x66, - // TODO: remove this opcode, call real function instead OP_BARRAY = 0x74, } opcode_t; diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index f23fff86b..a495f5b24 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -130,7 +130,7 @@ void op_div(DECL_STATE) { VM_DEBUG("DIV: x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); if (UNBOX(y) == 0) { fprintf(stderr, "Division by zero\n"); - exit(1); + exit(EXIT_FAILURE); } aint res = Ls__Infix_47((void *)x, (void *)y); VM_DEBUG("DIV result=%ld\n", (long)UNBOX(res)); @@ -144,7 +144,7 @@ void op_mod(DECL_STATE) { VM_DEBUG("MOD: x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); if (UNBOX(y) == 0) { fprintf(stderr, "Division by zero\n"); - exit(1); + exit(EXIT_FAILURE); } aint res = Ls__Infix_37((void *)x, (void *)y); VM_DEBUG("MOD result=%ld\n", (long)UNBOX(res)); diff --git a/virtual_machine/bytecode_util.h b/virtual_machine/reader.h similarity index 94% rename from virtual_machine/bytecode_util.h rename to virtual_machine/reader.h index 7afeeea72..e07281065 100644 --- a/virtual_machine/bytecode_util.h +++ b/virtual_machine/reader.h @@ -1,5 +1,5 @@ -#ifndef BYTECODE_UTIL_H -#define BYTECODE_UTIL_H +#ifndef READER_H +#define READER_H #include #include @@ -57,4 +57,4 @@ static inline bool reader_eof(const byte_reader *r) { return r->pos >= r->size; } -#endif // BYTECODE_UTIL_H +#endif // READER_H diff --git a/virtual_machine/symbols.c b/virtual_machine/symbols.c index 485bec05d..a53cfbfc5 100644 --- a/virtual_machine/symbols.c +++ b/virtual_machine/symbols.c @@ -5,6 +5,8 @@ #include #include +static const char *MAIN_FUNC = "main"; + struct symbol_table { resolved_symbol *data; size_t len; @@ -18,6 +20,9 @@ symbol_table *symbol_table_create(void) { } void symbol_table_destroy(symbol_table *table) { + if (!table) { + return; + } da_free(*table); free(table); } @@ -34,8 +39,8 @@ resolved_symbol *symbol_table_find(symbol_table *table, const char *name) { static int symbol_table_add(symbol_table *table, const char *name, bool is_function, int32_t idx) { - // Allow duplicate "main" (each bytecode has one) - if (strcmp(name, "main") != 0) { + // Allow duplicate main() (each uinit has one) + if (strcmp(name, MAIN_FUNC) != 0) { resolved_symbol *existing = symbol_table_find(table, name); if (existing) { fprintf(stderr, "Error: Duplicate symbol '%s' found in symbol table\n", diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index 97242ed2b..3ae1174f8 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -50,7 +50,7 @@ virtual_machine *vm_create(const char *main_unit_path, const char **paths, return NULL; } - program *prog = link(lr.units, decoded_arr, lr.units_len); + program_link *prog = link(lr.units, decoded_arr, lr.units_len); vm->total_globals = prog->total_globals; vm->code = prog->code; @@ -65,6 +65,9 @@ virtual_machine *vm_create(const char *main_unit_path, const char **paths, } void vm_destroy(virtual_machine *vm) { + if (!vm) { + return; + } for (size_t i = 0; i < vm->bc_len; i++) { bytecode_free(vm->bc_arr[i]); } From 9a2694d139ea442f38118c18ed665027bccc8817 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 15 Feb 2026 15:35:39 +0300 Subject: [PATCH 036/141] use dynamic array --- virtual_machine/decoder.c | 79 ++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 47 deletions(-) diff --git a/virtual_machine/decoder.c b/virtual_machine/decoder.c index 4a85e935b..6ad78ab29 100644 --- a/virtual_machine/decoder.c +++ b/virtual_machine/decoder.c @@ -44,22 +44,10 @@ /* * Code emission macros - append to code array in context */ -#define EMIT_FUNC(ctx, f) \ - do { \ - (ctx)->code[(ctx)->code_len++].func = (f); \ - } while (0) -#define EMIT_NUM(ctx, n) \ - do { \ - (ctx)->code[(ctx)->code_len++].num = (n); \ - } while (0) -#define EMIT_STR(ctx, s) \ - do { \ - (ctx)->code[(ctx)->code_len++].str = (s); \ - } while (0) -#define EMIT_TARGET(ctx, t) \ - do { \ - (ctx)->code[(ctx)->code_len++].target = (t); \ - } while (0) +#define EMIT_FUNC(ctx, f) da_append((ctx)->code, ((insn){.func = (f)})) +#define EMIT_NUM(ctx, n) da_append((ctx)->code, ((insn){.num = (n)})) +#define EMIT_STR(ctx, s) da_append((ctx)->code, ((insn){.str = (s)})) +#define EMIT_TARGET(ctx, t) da_append((ctx)->code, ((insn){.target = (t)})) fn decoder_get_op_call(void) { return op_call; } @@ -90,8 +78,13 @@ typedef struct { typedef struct { const bytecode *bc; - insn *code; - size_t code_len; + + struct { + insn *data; + size_t len; + size_t cap; + } code; + byte_reader reader; size_t global_offset; @@ -116,13 +109,11 @@ decode_ctx *decode_ctx_create(const bytecode *bc, int32_t global_offset) { ctx->bc = bc; - ctx->code = NULL; - ctx->code_len = 0; ctx->global_offset = global_offset; ctx->bc_to_insn_map = NULL; + da_init(ctx->code); da_init(ctx->stubs); - da_init(ctx->relocs); reader_init(&ctx->reader, bc->code, bc->code_size); @@ -139,9 +130,6 @@ static void add_stub(decode_ctx *ctx, size_t patch_idx, const char *name, static fixup_node *add_fixup(meta_info *meta, size_t target_off, size_t insn_idx) { fixup_node *node = ALLOC(fixup_node); - if (!node) - return NULL; - node->insn_idx = insn_idx; node->next = meta[target_off].fixups; meta[target_off].fixups = node; @@ -165,8 +153,8 @@ static bool validate_target_off(const bytecode *bc, size_t target_off, * The linker will convert it to an absolute pointer after copying. */ static void emit_target_idx(decode_ctx *ctx, size_t target_code_idx) { - size_t slot = ctx->code_len; - ctx->code[ctx->code_len++].num = (int32_t)target_code_idx; + size_t slot = ctx->code.len; + da_append(ctx->code, ((insn){.num = (int32_t)target_code_idx})); da_append(ctx->relocs, slot); } @@ -178,7 +166,7 @@ static bool emit_ld_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { const char *glob_name = bytecode_get_string(bc, str_offset); VM_DEBUG("DECODE: OP_LD external global '%s' (stub)\n", glob_name); EMIT_FUNC(ctx, NULL); // linker will patch this - size_t patch_idx = ctx->code_len; + size_t patch_idx = ctx->code.len; EMIT_NUM(ctx, 0); // placeholder — linker will patch add_stub(ctx, patch_idx, glob_name, STUB_GLOBAL_LD); } else { @@ -196,7 +184,7 @@ static bool emit_st_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { const char *glob_name = bytecode_get_string(bc, str_offset); VM_DEBUG("DECODE: OP_ST external global '%s' (stub)\n", glob_name); EMIT_FUNC(ctx, NULL); // linker will patch this - size_t patch_idx = ctx->code_len; + size_t patch_idx = ctx->code.len; EMIT_NUM(ctx, 0); // placeholder — linker will patch add_stub(ctx, patch_idx, glob_name, STUB_GLOBAL_ST); } else { @@ -217,13 +205,13 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, size_t current_bc_off, return false; } - size_t my_idx = ctx->code_len; + size_t my_idx = ctx->code.len; EMIT_NUM(ctx, 0); // placeholder — will hold code index meta_info *tm = &meta[target_off]; if (target_off < (int32_t)current_bc_off && tm->resolved_idx != -1) { // Backward jump — already resolved, store as index - ctx->code[my_idx].num = tm->resolved_idx; + ctx->code.data[my_idx].num = tm->resolved_idx; da_append(ctx->relocs, my_idx); if (depth != -1 && tm->stack_depth != -1 && tm->stack_depth != depth) { fprintf(stderr, "Error: Loop stack mismatch\n"); @@ -247,13 +235,10 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, size_t current_bc_off, } static insn *decode_internal(decode_ctx *ctx) { + const bytecode *bc = ctx->bc; size_t global_base = ctx->global_offset; - size_t code_cap = bc->code_size * 16; // TODO: estimate better - insn *code = ALLOC_ARRAY(insn, code_cap); - ctx->code = code; - meta_info *meta = ALLOC_ARRAY(meta_info, bc->code_size); // Initialize meta table @@ -270,10 +255,10 @@ static insn *decode_internal(decode_ctx *ctx) { uint8_t opcode = reader_u8(&ctx->reader); VM_DEBUG("DECODE: visiting bc_off=%zu opcode=%d code_idx=%zu\n", - current_bc_off, opcode, ctx->code_len); + current_bc_off, opcode, ctx->code.len); meta_info *m = &meta[current_bc_off]; - m->resolved_idx = (int32_t)ctx->code_len; + m->resolved_idx = (int32_t)ctx->code.len; // Validate stack depth if (depth != -1) { @@ -292,8 +277,8 @@ static insn *decode_internal(decode_ctx *ctx) { for (fixup_node *f = m->fixups; f; f = f->next) { VM_DEBUG("DECODE: Resolving fixup at bc_off=%zu: insn_idx=%zu -> " "code_idx=%zu\n", - current_bc_off, f->insn_idx, ctx->code_len); - ctx->code[f->insn_idx].num = (int32_t)ctx->code_len; + current_bc_off, f->insn_idx, ctx->code.len); + ctx->code.data[f->insn_idx].num = (int32_t)ctx->code.len; da_append(ctx->relocs, f->insn_idx); } @@ -621,7 +606,7 @@ static insn *decode_internal(decode_ctx *ctx) { // Emit closure with NULL target placeholder. // Linker will resolve to inter-unit function or create FFI stub. EMIT_FUNC(ctx, op_closure); - size_t target_slot = ctx->code_len; + size_t target_slot = ctx->code.len; EMIT_TARGET(ctx, NULL); // placeholder EMIT_NUM(ctx, n_captured); @@ -634,13 +619,13 @@ static insn *decode_internal(decode_ctx *ctx) { } EMIT_FUNC(ctx, op_closure); - size_t target_slot = ctx->code_len; + size_t target_slot = ctx->code.len; EMIT_NUM(ctx, 0); // placeholder — will hold code index EMIT_NUM(ctx, n_captured); meta_info *tm = &meta[target_off]; if (target_off < current_bc_off && tm->resolved_idx != -1) { - ctx->code[target_slot].num = tm->resolved_idx; + ctx->code.data[target_slot].num = tm->resolved_idx; da_append(ctx->relocs, target_slot); } else { add_fixup(meta, target_off, target_slot); @@ -656,7 +641,7 @@ static insn *decode_internal(decode_ctx *ctx) { VM_DEBUG("DECODE: OP_CALL target_off=0x%x n_args=%d " "current_bc_off=%zu code_idx=%zu\n", - target_off, n_args, current_bc_off, ctx->code_len); + target_off, n_args, current_bc_off, ctx->code.len); if (IS_EXT_REF(target_off)) { int str_offset = EXT_REF_INDEX(target_off); @@ -666,7 +651,7 @@ static insn *decode_internal(decode_ctx *ctx) { // To be patched by linker EMIT_FUNC(ctx, NULL); - size_t name_slot = ctx->code_len; + size_t name_slot = ctx->code.len; EMIT_TARGET(ctx, NULL); EMIT_NUM(ctx, n_args); @@ -677,14 +662,14 @@ static insn *decode_internal(decode_ctx *ctx) { "CALL")) { return NULL; } - size_t target_slot = ctx->code_len + 1; + size_t target_slot = ctx->code.len + 1; EMIT_FUNC(ctx, op_call); EMIT_NUM(ctx, 0); // placeholder — will hold code index EMIT_NUM(ctx, n_args); meta_info *tm = &meta[(uint32_t)target_off]; if ((uint32_t)target_off < current_bc_off && tm->resolved_idx != -1) { - ctx->code[target_slot].num = tm->resolved_idx; + ctx->code.data[target_slot].num = tm->resolved_idx; da_append(ctx->relocs, target_slot); } else { add_fixup(meta, (uint32_t)target_off, target_slot); @@ -746,7 +731,7 @@ static insn *decode_internal(decode_ctx *ctx) { } free(meta); - return ctx->code; + return ctx->code.data; } decoded **decode(bytecode **bc_arr, size_t n) { @@ -764,7 +749,7 @@ decoded **decode(bytecode **bc_arr, size_t n) { decoded *dec = ALLOC(decoded); *dec = (decoded){ .code = code, - .code_len = ctx->code_len, + .code_len = ctx->code.len, .stubs = ctx->stubs.data, .stubs_len = ctx->stubs.len, .bc_to_insn_map = ctx->bc_to_insn_map, From e3c53d2bbb0a644020f024348027ffce26829318 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 15 Feb 2026 18:32:44 +0300 Subject: [PATCH 037/141] free fixup nodes; cleanup --- virtual_machine/decoder.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/virtual_machine/decoder.c b/virtual_machine/decoder.c index 6ad78ab29..543200a34 100644 --- a/virtual_machine/decoder.c +++ b/virtual_machine/decoder.c @@ -214,7 +214,7 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, size_t current_bc_off, ctx->code.data[my_idx].num = tm->resolved_idx; da_append(ctx->relocs, my_idx); if (depth != -1 && tm->stack_depth != -1 && tm->stack_depth != depth) { - fprintf(stderr, "Error: Loop stack mismatch\n"); + fprintf(stderr, "Error: Loop stack mismatch at bc_off=%zu\n", current_bc_off); return false; } } else { @@ -249,6 +249,7 @@ static insn *decode_internal(decode_ctx *ctx) { } int32_t depth = 0; + insn *result = NULL; while (!reader_eof(&ctx->reader)) { size_t current_bc_off = reader_pos(&ctx->reader); @@ -266,7 +267,7 @@ static insn *decode_internal(decode_ctx *ctx) { fprintf(stderr, "Error: Stack mismatch at offset %zu (expected %d, got %d)\n", current_bc_off, m->stack_depth, depth); - return NULL; + goto cleanup; } m->stack_depth = depth; } else { @@ -274,13 +275,19 @@ static insn *decode_internal(decode_ctx *ctx) { } // Resolve forward jumps (backpatching) — store as index, record relocation - for (fixup_node *f = m->fixups; f; f = f->next) { + fixup_node *f = m->fixups; + while (f) { VM_DEBUG("DECODE: Resolving fixup at bc_off=%zu: insn_idx=%zu -> " "code_idx=%zu\n", current_bc_off, f->insn_idx, ctx->code.len); ctx->code.data[f->insn_idx].num = (int32_t)ctx->code.len; da_append(ctx->relocs, f->insn_idx); + + fixup_node *next = f->next; + free(f); + f = next; } + m->fixups = NULL; switch (opcode) { case OP_CONST: @@ -357,7 +364,7 @@ static insn *decode_internal(decode_ctx *ctx) { case OP_JMP: EMIT_FUNC(ctx, op_jmp); if (!handle_jump(ctx, meta, current_bc_off, depth)) { - return NULL; + goto cleanup; } DEPTH_DEAD(depth); break; @@ -366,7 +373,7 @@ static insn *decode_internal(decode_ctx *ctx) { DEPTH_POP(depth); EMIT_FUNC(ctx, op_cjmp_z); if (!handle_jump(ctx, meta, current_bc_off, depth)) { - return NULL; + goto cleanup; } break; @@ -374,7 +381,7 @@ static insn *decode_internal(decode_ctx *ctx) { DEPTH_POP(depth); EMIT_FUNC(ctx, op_cjmp_nz); if (!handle_jump(ctx, meta, current_bc_off, depth)) { - return NULL; + goto cleanup; } break; @@ -590,7 +597,7 @@ static insn *decode_internal(decode_ctx *ctx) { break; default: fprintf(stderr, "Unknown designation type: %d\n", designation_type); - return NULL; + goto cleanup; } } @@ -615,7 +622,7 @@ static insn *decode_internal(decode_ctx *ctx) { } else { uint32_t target_off = (uint32_t)target_raw; if (!validate_target_off(bc, target_off, current_bc_off, "CLOSURE")) { - return NULL; + goto cleanup; } EMIT_FUNC(ctx, op_closure); @@ -660,7 +667,7 @@ static insn *decode_internal(decode_ctx *ctx) { } else { if (!validate_target_off(bc, (uint32_t)target_off, current_bc_off, "CALL")) { - return NULL; + goto cleanup; } size_t target_slot = ctx->code.len + 1; EMIT_FUNC(ctx, op_call); @@ -709,8 +716,7 @@ static insn *decode_internal(decode_ctx *ctx) { default: fprintf(stderr, "Not yet supported opcode 0x%02X at ip=0x%08zx\n", opcode, reader_pos(&ctx->reader) - 1); - free(meta); - return NULL; + goto cleanup; } } @@ -720,6 +726,9 @@ static insn *decode_internal(decode_ctx *ctx) { ctx->bc_to_insn_map[i] = meta[i].resolved_idx; } + result = ctx->code.data; + +cleanup: // Free temporary metadata and fixup nodes for (size_t i = 0; i < bc->code_size; i++) { fixup_node *node = meta[i].fixups; @@ -731,7 +740,7 @@ static insn *decode_internal(decode_ctx *ctx) { } free(meta); - return ctx->code.data; + return result; } decoded **decode(bytecode **bc_arr, size_t n) { From bf93aca20a68652ca4e72a1309bcbf3032593e04 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 15 Feb 2026 20:43:26 +0300 Subject: [PATCH 038/141] assert for `is_function` --- virtual_machine/linker.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/virtual_machine/linker.c b/virtual_machine/linker.c index 0a1858fae..fe1ceae7e 100644 --- a/virtual_machine/linker.c +++ b/virtual_machine/linker.c @@ -4,6 +4,7 @@ #include "ffi.h" #include "memory.h" #include "symbols.h" +#include #include #include #include @@ -50,13 +51,14 @@ static void resolve_stubs(decoded *dec, insn *all_code, size_t code_offset, stub *s = &stubs_arr[i]; size_t pi = s->patch_idx; + resolved_symbol *sym = symbol_table_find(st, s->name); switch (s->kind) { case STUB_CALL: { - resolved_symbol *sym = symbol_table_find(st, s->name); // Decoder emitted: [NULL] [NULL] [n_args] - if (sym && sym->is_function) { + if (sym) { + assert(sym->is_function); code[pi - 1].func = decoder_get_op_call(); code[pi].target = &all_code[sym->idx]; } else { @@ -67,9 +69,9 @@ static void resolve_stubs(decoded *dec, insn *all_code, size_t code_offset, } case STUB_CLOSURE: { - resolved_symbol *sym = symbol_table_find(st, s->name); - if (sym && sym->is_function) { + if (sym) { + assert(sym->is_function); code[pi].target = &all_code[sym->idx]; } else { // Not found in symbol table — create FFI stub @@ -85,9 +87,9 @@ static void resolve_stubs(decoded *dec, insn *all_code, size_t code_offset, case STUB_GLOBAL_LD: case STUB_GLOBAL_ST: { - resolved_symbol *sym = symbol_table_find(st, s->name); - if (sym && !sym->is_function) { + if (sym) { // Global from another unit + assert(!sym->is_function); if (s->kind == STUB_GLOBAL_LD) { code[pi - 1].func = decoder_get_op_ld_glo(); } else { From 5a1847a0440fa2e7d5ebc155dfcfb218fe551464 Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 16 Feb 2026 14:38:54 +0300 Subject: [PATCH 039/141] add iterator for public symbols and imports --- virtual_machine/bytecode.c | 75 ++++++++++++++++++++++---------------- virtual_machine/bytecode.h | 29 +++++++++------ virtual_machine/linker.c | 21 ++++++----- virtual_machine/loader.c | 12 ++++-- virtual_machine/reader.h | 9 ++--- 5 files changed, 82 insertions(+), 64 deletions(-) diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index b2d8248ca..7dca708ae 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -1,7 +1,6 @@ #define _POSIX_C_SOURCE 200809L #include "bytecode.h" #include "memory.h" -#include "reader.h" #include #include #include @@ -57,8 +56,8 @@ bytecode *bytecode_load(const char *filename) { // TODO: VALIdation + const char *string_table = map + st_offset; const uint8_t *data = (const uint8_t *)map; - const char *string_table = (const char *)(data + st_offset); bytecode *bc = ALLOC(bytecode); @@ -71,35 +70,11 @@ bytecode *bytecode_load(const char *filename) { bc->code_size = code_size; bc->globals_count = (size_t)globals_count; - // Allocate and resolve public symbols - bc->public_symbols.len = (size_t)num_pubs; - if (num_pubs > 0) { - bc->public_symbols.data = ALLOC_ARRAY(public_symbol, num_pubs); - - reader_seek(&reader, pubs_offset); - for (int32_t i = 0; i < num_pubs; i++) { - int32_t name_offset = reader_i32(&reader); - int32_t code_off = reader_i32(&reader); - uint8_t flag = reader_u8(&reader); - - bc->public_symbols.data[i].name = string_table + name_offset; - bc->public_symbols.data[i].code_offset = code_off; - bc->public_symbols.data[i].flag = flag; - } - } - - // Allocate and resolve imports - bc->imports.len = (size_t)num_imports; - if (num_imports > 0) { - bc->imports.data = ALLOC_ARRAY(const char *, (size_t)num_imports); + bc->pubs = data + pubs_offset; + bc->pubs_len = (size_t)num_pubs; - reader_seek(&reader, imports_offset); - for (int32_t i = 0; i < num_imports; i++) { - int32_t name_offset = reader_i32(&reader); - - bc->imports.data[i] = string_table + name_offset; - } - } + bc->imports = data + imports_offset; + bc->imports_len = (size_t)num_imports; // will be set later bc->name = NULL; @@ -107,13 +82,49 @@ bytecode *bytecode_load(const char *filename) { return bc; } +void bytecode_pubs_init(bytecode_iterator *iter, const bytecode *bc) { + reader_init(&iter->reader, bc->pubs, bc->pubs_len * PUB_ENTRY_SIZE); + iter->string_table = bc->string_table; + iter->len = bc->pubs_len; + iter->curr = 0; +} + +bool bytecode_pubs_next(bytecode_iterator *iter, public_symbol *out) { + if (iter->curr >= iter->len) { + return false; + } + int32_t name_offset = reader_i32(&iter->reader); + out->name = iter->string_table + name_offset; + out->code_offset = reader_i32(&iter->reader); + out->flag = reader_u8(&iter->reader); + + iter->curr++; + return true; +} + +void bytecode_imports_init(bytecode_iterator *it, const bytecode *bc) { + reader_init(&it->reader, bc->imports, bc->imports_len * IMPORT_ENTRY_SIZE); + it->string_table = bc->string_table; + it->len = bc->imports_len; + it->curr = 0; +} + +bool bytecode_imports_next(bytecode_iterator *it, const char **out_name) { + if (it->curr >= it->len) { + return false; + } + int32_t name_offset = reader_i32(&it->reader); + *out_name = it->string_table + name_offset; + + it->curr++; + return true; +} + void bytecode_free(bytecode *bc) { if (!bc) { return; } munmap(bc->map_base, bc->map_size); - free(bc->public_symbols.data); - free(bc->imports.data); free((void *)bc->name); free(bc); } diff --git a/virtual_machine/bytecode.h b/virtual_machine/bytecode.h index 25f39235d..0cb4e982d 100644 --- a/virtual_machine/bytecode.h +++ b/virtual_machine/bytecode.h @@ -1,6 +1,7 @@ #ifndef BYTECODE_H #define BYTECODE_H +#include "reader.h" #include #include #include @@ -15,16 +16,6 @@ typedef struct { uint8_t flag; // PUB_FLAG_FUNCTION or PUB_FLAG_GLOBAL } public_symbol; -typedef struct { - public_symbol *data; - size_t len; -} public_symbols; - -typedef struct { - const char **data; - size_t len; -} imports; - typedef struct { // Memory-mapped file void *map_base; @@ -36,9 +27,11 @@ typedef struct { const uint8_t *code; size_t code_size; - public_symbols public_symbols; + const uint8_t *pubs; + size_t pubs_len; - imports imports; + const uint8_t *imports; + size_t imports_len; size_t globals_count; @@ -49,6 +42,18 @@ bytecode *bytecode_load(const char *filename); void bytecode_free(bytecode *bc); +typedef struct { + byte_reader reader; + const char *string_table; + size_t len; + size_t curr; +} bytecode_iterator; + +void bytecode_pubs_init(bytecode_iterator *iter, const bytecode *bc); +bool bytecode_pubs_next(bytecode_iterator *iter, public_symbol *out); + +void bytecode_imports_init(bytecode_iterator *iter, const bytecode *bc); +bool bytecode_imports_next(bytecode_iterator *iter, const char **out_name); /* * Get string from string table by offset */ diff --git a/virtual_machine/linker.c b/virtual_machine/linker.c index fe1ceae7e..afa2ce8f9 100644 --- a/virtual_machine/linker.c +++ b/virtual_machine/linker.c @@ -14,25 +14,26 @@ static void register_public_symbols(symbol_table *st, const bytecode *bc, size_t code_offset, size_t global_base, const int32_t *bc_to_insn_map) { - const public_symbols *pub = &bc->public_symbols; + public_symbol pub; + bytecode_iterator iter; + bytecode_pubs_init(&iter, bc); - for (size_t i = 0; i < pub->len; i++) { - const public_symbol *p = &pub->data[i]; + while (bytecode_pubs_next(&iter, &pub)) { - if (p->flag == PUB_FLAG_FUNCTION) { - // p->code_offset is the offset in the bytecode, so we use the mapping - int32_t insn_idx = bc_to_insn_map[p->code_offset]; + if (pub.flag == PUB_FLAG_FUNCTION) { + // pub.code_offset is the offset in the bytecode, so we use the mapping + int32_t insn_idx = bc_to_insn_map[pub.code_offset]; if (insn_idx == -1) { fprintf(stderr, "Error: public symbol '%s' at bytecode offset %d not decoded\n", - p->name, p->code_offset); + pub.name, pub.code_offset); exit(EXIT_FAILURE); } int32_t code_idx = insn_idx + code_offset; - symbol_table_add_function(st, p->name, code_idx); + symbol_table_add_function(st, pub.name, code_idx); } else { - int32_t gidx = p->code_offset + global_base; - symbol_table_add_global(st, p->name, gidx); + int32_t global_idx = pub.code_offset + global_base; + symbol_table_add_global(st, pub.name, global_idx); } } } diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c index 88da6c051..208188946 100644 --- a/virtual_machine/loader.c +++ b/virtual_machine/loader.c @@ -104,9 +104,11 @@ static bool load_unit_recursive(bytecode_array *units, exec_order *order, da_append(*units, bc); // Recursively load dependencies - for (size_t i = 0; i < bc->imports.len; i++) { - const char *import_name = bc->imports.data[i]; + const char *import_name; + bytecode_iterator iter; + bytecode_imports_init(&iter, bc); + while (bytecode_imports_next(&iter, &import_name)) { // Skip Std since we have it as runtime.a if (strcmp(import_name, "Std") == 0) { continue; @@ -145,8 +147,10 @@ load_result load(const char *main_unit_path, const search_paths *paths) { bytecode *bc = load_main_unit(main_unit_path); - for (size_t i = 0; i < bc->imports.len; i++) { - const char *import_name = bc->imports.data[i]; + const char *import_name; + bytecode_iterator iter; + bytecode_imports_init(&iter, bc); + while (bytecode_imports_next(&iter, &import_name)) { // Skip Std since we have it as runtime.a if (strcmp(import_name, "Std") == 0) { diff --git a/virtual_machine/reader.h b/virtual_machine/reader.h index e07281065..15fb9f58b 100644 --- a/virtual_machine/reader.h +++ b/virtual_machine/reader.h @@ -1,6 +1,7 @@ #ifndef READER_H #define READER_H +#include #include #include #include @@ -22,18 +23,14 @@ static inline void reader_init(byte_reader *r, const uint8_t *data, * Read 32-bit little-endian integer and advance position */ static inline int32_t reader_i32(byte_reader *r) { - if (r->pos + 4 > r->size) { - return 0; // TODO: better error handling - } + assert(r->pos + 4 <= r->size); const uint8_t *p = r->data + r->pos; r->pos += 4; return (int32_t)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24)); } static inline uint8_t reader_u8(byte_reader *r) { - if (r->pos >= r->size) { - return 0; - } + assert(r->pos < r->size); return r->data[r->pos++]; } From dc6fd81d3c6f1dd8b220a1ede9ce8ebc72a1c24a Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 16 Feb 2026 16:49:03 +0300 Subject: [PATCH 040/141] don't use op call ffi stub --- virtual_machine/linker.c | 11 +++++++++-- virtual_machine/ops.c | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/virtual_machine/linker.c b/virtual_machine/linker.c index afa2ce8f9..0b4c88351 100644 --- a/virtual_machine/linker.c +++ b/virtual_machine/linker.c @@ -63,8 +63,15 @@ static void resolve_stubs(decoded *dec, insn *all_code, size_t code_offset, code[pi - 1].func = decoder_get_op_call(); code[pi].target = &all_code[sym->idx]; } else { - code[pi - 1].func = decoder_get_op_call_ffi_stub(); - code[pi].str = s->name; + code[pi - 1].func = decoder_get_op_call(); + insn *ffi_stub = ffi_call_table_find(ffi_stubs, s->name); + if (!ffi_stub) { + ffi_stub = ffi_call_table_add(ffi_stubs, s->name, + decoder_get_op_callc_ffi_stub()); + } + code[pi].target = ffi_stub; + // code[pi - 1].func = decoder_get_op_call_ffi_stub(); + // code[pi].str = s->name; } break; } diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index a495f5b24..626b7acb8 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -645,7 +645,7 @@ void op_call_ffi_stub(DECL_STATE) { aint result = ffi_call_c(func_name, args, n_args); - VM_DEBUG("FFI_CALL: result=%ld\n", (long)UNBOX(result)); + VM_DEBUG("FFI_CALL: result=%ld\n", (long)result); STACK_PUSH(sp, result); DISPATCH(); } From 51e38ef4c3cfeffaecec50efada3072bbc8a1d78 Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 16 Feb 2026 18:14:16 +0300 Subject: [PATCH 041/141] move everything to `converter.c` revert `loader.c` --- virtual_machine/Makefile | 2 +- virtual_machine/{decoder.c => converter.c} | 367 +++++++++++++++------ virtual_machine/converter.h | 21 ++ virtual_machine/decoder.h | 54 --- virtual_machine/ffi.c | 27 +- virtual_machine/ffi.h | 6 +- virtual_machine/insn.h | 2 +- virtual_machine/linker.c | 26 +- virtual_machine/linker.h | 2 +- virtual_machine/loader.c | 114 ++----- virtual_machine/loader.h | 6 +- virtual_machine/ops.c | 33 +- virtual_machine/ops.h | 3 +- virtual_machine/symbols.c | 23 +- virtual_machine/symbols.h | 5 +- virtual_machine/vm.c | 21 +- 16 files changed, 389 insertions(+), 323 deletions(-) rename virtual_machine/{decoder.c => converter.c} (66%) create mode 100644 virtual_machine/converter.h delete mode 100644 virtual_machine/decoder.h diff --git a/virtual_machine/Makefile b/virtual_machine/Makefile index efe9e0ddd..d163262aa 100644 --- a/virtual_machine/Makefile +++ b/virtual_machine/Makefile @@ -4,7 +4,7 @@ CFLAGS = -Wall -Wextra -std=c99 -O0 LIBS = -lffi -ldl LDFLAGS = -rdynamic TARGET = lama.exe -SOURCES = lama.c decoder.c vm.c bytecode.c linker.c ffi.c loader.c symbols.c ops.c memory.c +SOURCES = lama.c converter.c vm.c bytecode.c ffi.c loader.c symbols.c ops.c memory.c OBJECTS = $(SOURCES:.c=.o) RUNTIME_DIR = ../runtime diff --git a/virtual_machine/decoder.c b/virtual_machine/converter.c similarity index 66% rename from virtual_machine/decoder.c rename to virtual_machine/converter.c index 543200a34..70a370b81 100644 --- a/virtual_machine/decoder.c +++ b/virtual_machine/converter.c @@ -1,8 +1,14 @@ -#include "decoder.h" +#include "converter.h" +#include "bytecode.h" #include "da.h" +#include "ffi.h" #include "memory.h" #include "opcodes.h" #include "ops.h" +#include "symbols.h" +#include +#include +#include #include #include #include @@ -48,20 +54,22 @@ #define EMIT_NUM(ctx, n) da_append((ctx)->code, ((insn){.num = (n)})) #define EMIT_STR(ctx, s) da_append((ctx)->code, ((insn){.str = (s)})) #define EMIT_TARGET(ctx, t) da_append((ctx)->code, ((insn){.target = (t)})) +#define EMIT_GLOBAL_PTR(ctx, p) \ + da_append((ctx)->code, ((insn){.global_ptr = (p)})) -fn decoder_get_op_call(void) { return op_call; } +#define FFI_STUB_SIZE 2 -fn decoder_get_op_call_ffi_stub(void) { return op_call_ffi_stub; } +typedef enum { + INTERNAL, // internal call + UNIT, // inter-unit call + FFI, // FFI call +} reloc_kind; -fn decoder_get_op_callc_ffi_stub(void) { return op_callc_ffi_stub; } - -fn decoder_get_op_ld_glo(void) { return op_ld_glo; } - -fn decoder_get_op_st_glo(void) { return op_st_glo; } - -fn decoder_get_op_ld_glo_ext(void) { return op_ld_glo_ext; } - -fn decoder_get_op_st_glo_ext(void) { return op_st_glo_ext; } +typedef struct { + size_t patch_idx; + const char *name; + reloc_kind kind; +} reloc; typedef struct fixup_node { size_t insn_idx; // Index in code array that needs the jump target @@ -76,6 +84,14 @@ typedef struct { fixup_node *fixups; // Linked list of forward jumps pointing here } meta_info; +typedef struct { + insn *code; + size_t code_len; + int32_t *bc_to_insn_map; + reloc *relocs; + size_t relocs_len; +} decoded; + typedef struct { const bytecode *bc; @@ -89,42 +105,41 @@ typedef struct { size_t global_offset; struct { - stub *data; - size_t len; - size_t cap; - } stubs; - - struct { - size_t *data; + reloc *data; size_t len; size_t cap; } relocs; int32_t *bc_to_insn_map; + symbol_table *st; + ffi_call_table *ffi; } decode_ctx; -decode_ctx *decode_ctx_create(const bytecode *bc, int32_t global_offset) { +decode_ctx *decode_ctx_create(const bytecode *bc, symbol_table *st, + ffi_call_table *ffi, int32_t global_offset) { decode_ctx *ctx = ALLOC(decode_ctx); ctx->bc = bc; ctx->global_offset = global_offset; - ctx->bc_to_insn_map = NULL; + ctx->bc_to_insn_map = ALLOC_ARRAY(int32_t, bc->code_size); da_init(ctx->code); - da_init(ctx->stubs); da_init(ctx->relocs); + ctx->st = st; + ctx->ffi = ffi; + reader_init(&ctx->reader, bc->code, bc->code_size); return ctx; } -static void add_stub(decode_ctx *ctx, size_t patch_idx, const char *name, - stub_kind kind) { - stub s = {.patch_idx = patch_idx, .name = name, .kind = kind}; - da_append(ctx->stubs, s); +static void add_reloc(decode_ctx *ctx, size_t patch_idx, const char *name, + reloc_kind kind) { + reloc s = {.patch_idx = patch_idx, .name = name, .kind = kind}; + da_append(ctx->relocs, s); } static fixup_node *add_fixup(meta_info *meta, size_t target_off, @@ -136,39 +151,45 @@ static fixup_node *add_fixup(meta_info *meta, size_t target_off, return node; } -static bool validate_target_off(const bytecode *bc, size_t target_off, +static bool validate_target_off(const bytecode *bc, int32_t target_off, size_t current_bc_off, const char *op_name) { if (target_off >= bc->code_size) { fprintf( stderr, - "Error: %s target_off=%zu out of range (bc_off=%zu, code_size=%zu)\n", + "Error: %s target_off=%d out of range (bc_off=%zu, code_size=%zu)\n", op_name, target_off, current_bc_off, bc->code_size); return false; } return true; } -/* - * Record that code[insn_idx].target holds a code-array index . - * The linker will convert it to an absolute pointer after copying. - */ -static void emit_target_idx(decode_ctx *ctx, size_t target_code_idx) { - size_t slot = ctx->code.len; - da_append(ctx->code, ((insn){.num = (int32_t)target_code_idx})); - da_append(ctx->relocs, slot); -} - static bool emit_ld_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { const bytecode *bc = ctx->bc; if (IS_EXT_REF(idx)) { int str_offset = EXT_REF_INDEX(idx); const char *glob_name = bytecode_get_string(bc, str_offset); + VM_DEBUG("DECODE: OP_LD external global '%s' (stub)\n", glob_name); - EMIT_FUNC(ctx, NULL); // linker will patch this - size_t patch_idx = ctx->code.len; - EMIT_NUM(ctx, 0); // placeholder — linker will patch - add_stub(ctx, patch_idx, glob_name, STUB_GLOBAL_LD); + + resolved_symbol *sym = symbol_table_find_global(ctx->st, glob_name); + if (sym) { + // Global from another unit + EMIT_FUNC(ctx, op_ld_glo); + EMIT_NUM(ctx, sym->idx); + return true; + } else { + // C global + void *ptr = dlsym(RTLD_DEFAULT, glob_name); + if (ptr) { + EMIT_FUNC(ctx, op_ld_glo_ext); + EMIT_GLOBAL_PTR(ctx, (aint *)ptr); + return true; + } else { + fprintf(stderr, "Error: unresolved global '%s'\n", glob_name); + return false; + } + } } else { EMIT_FUNC(ctx, op_ld_glo); EMIT_NUM(ctx, global_base + idx); @@ -182,11 +203,27 @@ static bool emit_st_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { if (IS_EXT_REF(idx)) { int str_offset = EXT_REF_INDEX(idx); const char *glob_name = bytecode_get_string(bc, str_offset); + VM_DEBUG("DECODE: OP_ST external global '%s' (stub)\n", glob_name); - EMIT_FUNC(ctx, NULL); // linker will patch this - size_t patch_idx = ctx->code.len; - EMIT_NUM(ctx, 0); // placeholder — linker will patch - add_stub(ctx, patch_idx, glob_name, STUB_GLOBAL_ST); + + resolved_symbol *sym = symbol_table_find_global(ctx->st, glob_name); + if (sym) { + // Global from another unit + EMIT_FUNC(ctx, op_st_glo); + EMIT_NUM(ctx, sym->idx); + return true; + } else { + // C global + void *ptr = dlsym(RTLD_DEFAULT, glob_name); + if (ptr) { + EMIT_FUNC(ctx, op_st_glo_ext); + EMIT_GLOBAL_PTR(ctx, (aint *)ptr); + return true; + } else { + fprintf(stderr, "Error: unresolved global '%s'\n", glob_name); + return false; + } + } } else { EMIT_FUNC(ctx, op_st_glo); EMIT_NUM(ctx, global_base + idx); @@ -212,9 +249,11 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, size_t current_bc_off, if (target_off < (int32_t)current_bc_off && tm->resolved_idx != -1) { // Backward jump — already resolved, store as index ctx->code.data[my_idx].num = tm->resolved_idx; - da_append(ctx->relocs, my_idx); + + add_reloc(ctx, my_idx, NULL, INTERNAL); if (depth != -1 && tm->stack_depth != -1 && tm->stack_depth != depth) { - fprintf(stderr, "Error: Loop stack mismatch at bc_off=%zu\n", current_bc_off); + fprintf(stderr, "Error: Loop stack mismatch at bc_off=%zu\n", + current_bc_off); return false; } } else { @@ -274,15 +313,16 @@ static insn *decode_internal(decode_ctx *ctx) { depth = m->stack_depth; } - // Resolve forward jumps (backpatching) — store as index, record relocation + // Resolve forward jumps (backpatching) — store as index, record + // relocation fixup_node *f = m->fixups; while (f) { VM_DEBUG("DECODE: Resolving fixup at bc_off=%zu: insn_idx=%zu -> " "code_idx=%zu\n", current_bc_off, f->insn_idx, ctx->code.len); ctx->code.data[f->insn_idx].num = (int32_t)ctx->code.len; - da_append(ctx->relocs, f->insn_idx); - + add_reloc(ctx, f->insn_idx, NULL, INTERNAL); + fixup_node *next = f->next; free(f); f = next; @@ -561,13 +601,13 @@ static insn *decode_internal(decode_ctx *ctx) { } case OP_CLOSURE: { - int32_t target_raw = reader_i32(&ctx->reader); + int32_t target_off = reader_i32(&ctx->reader); int32_t n_captured = reader_i32(&ctx->reader); VM_DEBUG("DECODE: OP_CLOSURE target_raw=0x%x n_captured=%d bc_off=%zu\n", - target_raw, n_captured, current_bc_off); + target_off, n_captured, current_bc_off); - bool is_external = IS_EXT_REF(target_raw); + bool is_external = IS_EXT_REF(target_off); // Emit load instructions for each captured variable for (int32_t i = 0; i < n_captured; i++) { @@ -603,37 +643,47 @@ static insn *decode_internal(decode_ctx *ctx) { DEPTH_DEC(depth, n_captured - 1); + EMIT_FUNC(ctx, op_closure); + + size_t target_slot = ctx->code.len; if (is_external) { - int str_offset = EXT_REF_INDEX(target_raw); + int str_offset = EXT_REF_INDEX(target_off); const char *ext_func_name = bytecode_get_string(bc, str_offset); VM_DEBUG("DECODE: OP_CLOSURE external name='%s' (stub)\n", ext_func_name); - // Emit closure with NULL target placeholder. - // Linker will resolve to inter-unit function or create FFI stub. - EMIT_FUNC(ctx, op_closure); - size_t target_slot = ctx->code.len; - EMIT_TARGET(ctx, NULL); // placeholder + resolved_symbol *sym = + symbol_table_find_function(ctx->st, ext_func_name); + if (sym) { + add_reloc(ctx, target_slot, ext_func_name, UNIT); + EMIT_NUM( + ctx, + sym->idx); // placeholder, will be resolved to inter-unit function + } else { + size_t idx = ffi_call_table_find(ctx->ffi, ext_func_name); + if (idx == -1) { + ffi_call_table_add(ctx->ffi, ext_func_name, op_ffi_call); + idx = ffi_call_table_count(ctx->ffi) - 1; + } + add_reloc(ctx, target_slot, ext_func_name, FFI); + EMIT_NUM(ctx, idx); // placeholder, will be resolved to FFI call + } + EMIT_NUM(ctx, n_captured); - // Record stub so linker can resolve - add_stub(ctx, target_slot, ext_func_name, STUB_CLOSURE); } else { - uint32_t target_off = (uint32_t)target_raw; if (!validate_target_off(bc, target_off, current_bc_off, "CLOSURE")) { goto cleanup; } - EMIT_FUNC(ctx, op_closure); - size_t target_slot = ctx->code.len; EMIT_NUM(ctx, 0); // placeholder — will hold code index EMIT_NUM(ctx, n_captured); meta_info *tm = &meta[target_off]; if (target_off < current_bc_off && tm->resolved_idx != -1) { ctx->code.data[target_slot].num = tm->resolved_idx; - da_append(ctx->relocs, target_slot); + add_reloc(ctx, target_slot, NULL, INTERNAL); } else { add_fixup(meta, target_off, target_slot); } @@ -649,35 +699,48 @@ static insn *decode_internal(decode_ctx *ctx) { VM_DEBUG("DECODE: OP_CALL target_off=0x%x n_args=%d " "current_bc_off=%zu code_idx=%zu\n", target_off, n_args, current_bc_off, ctx->code.len); + bool is_external = IS_EXT_REF(target_off); - if (IS_EXT_REF(target_off)) { + EMIT_FUNC(ctx, op_call); + + size_t target_slot = ctx->code.len; + if (is_external) { int str_offset = EXT_REF_INDEX(target_off); - const char *func_name = bytecode_get_string(bc, str_offset); + const char *ext_func_name = bytecode_get_string(bc, str_offset); - VM_DEBUG("DECODE: OP_CALL external '%s' (stub)\n", func_name); + VM_DEBUG("DECODE: OP_CALL external '%s' (stub)\n", ext_func_name); - // To be patched by linker - EMIT_FUNC(ctx, NULL); - size_t name_slot = ctx->code.len; - EMIT_TARGET(ctx, NULL); + resolved_symbol *sym = + symbol_table_find_function(ctx->st, ext_func_name); + + if (sym) { + add_reloc(ctx, target_slot, ext_func_name, UNIT); + EMIT_NUM( + ctx, + sym->idx); // placeholder, will be resolved to inter-unit function + } else { + size_t idx = ffi_call_table_find(ctx->ffi, ext_func_name); + if (idx == -1) { + ffi_call_table_add(ctx->ffi, ext_func_name, op_ffi_call); + idx = ffi_call_table_count(ctx->ffi) - 1; + } + add_reloc(ctx, target_slot, ext_func_name, FFI); + EMIT_NUM(ctx, idx); // placeholder, will be resolved to FFI call + } EMIT_NUM(ctx, n_args); - // Record stub — linker decides if it's inter-unit or FFI - add_stub(ctx, name_slot, func_name, STUB_CALL); } else { if (!validate_target_off(bc, (uint32_t)target_off, current_bc_off, "CALL")) { goto cleanup; } - size_t target_slot = ctx->code.len + 1; - EMIT_FUNC(ctx, op_call); EMIT_NUM(ctx, 0); // placeholder — will hold code index EMIT_NUM(ctx, n_args); - meta_info *tm = &meta[(uint32_t)target_off]; - if ((uint32_t)target_off < current_bc_off && tm->resolved_idx != -1) { + meta_info *tm = &meta[target_off]; + if (target_off < current_bc_off && tm->resolved_idx != -1) { ctx->code.data[target_slot].num = tm->resolved_idx; - da_append(ctx->relocs, target_slot); + add_reloc(ctx, target_slot, NULL, INTERNAL); } else { add_fixup(meta, (uint32_t)target_off, target_slot); } @@ -721,7 +784,6 @@ static insn *decode_internal(decode_ctx *ctx) { } // Extract mapping - ctx->bc_to_insn_map = ALLOC_ARRAY(int32_t, bc->code_size); for (size_t i = 0; i < bc->code_size; i++) { ctx->bc_to_insn_map[i] = meta[i].resolved_idx; } @@ -743,42 +805,149 @@ static insn *decode_internal(decode_ctx *ctx) { return result; } -decoded **decode(bytecode **bc_arr, size_t n) { - decoded **result = ALLOC_ARRAY(decoded *, n); +static void register_public_symbols(symbol_table *st, const bytecode *bc, + size_t code_offset, size_t global_base, + const int32_t *bc_to_insn_map) { + public_symbol pub; + bytecode_iterator iter; + bytecode_pubs_init(&iter, bc); + + while (bytecode_pubs_next(&iter, &pub)) { + if (pub.flag == PUB_FLAG_FUNCTION) { + // pub.code_offset is the offset in the bytecode, so we use the mapping + int32_t insn_idx = bc_to_insn_map[pub.code_offset]; + if (insn_idx == -1) { + fprintf(stderr, + "Error: public symbol '%s' at bytecode offset %d not decoded\n", + pub.name, pub.code_offset); + exit(EXIT_FAILURE); + } + int32_t code_idx = insn_idx + code_offset; + symbol_table_add_function(st, pub.name, code_idx); + } else { + int32_t global_idx = pub.code_offset + global_base; + symbol_table_add_global(st, pub.name, global_idx); + } + } +} + +/* + * Resolve relocs / placeholders in the final code array after all units are + * decoded and merged. + */ +static void resolve_relocs(insn *all_code, decoded *dec, size_t code_offset, + size_t ffi_call_offset) { + for (size_t j = 0; j < dec->relocs_len; j++) { + reloc rel = dec->relocs[j]; + size_t slot = code_offset + rel.patch_idx; + int32_t target_idx = all_code[slot].num; + switch (rel.kind) { + case INTERNAL: { + all_code[slot].target = &all_code[code_offset + target_idx]; + break; + } + case UNIT: { + all_code[slot].target = &all_code[target_idx]; + break; + } + case FFI: { + all_code[slot].target = + &all_code[ffi_call_offset + target_idx * FFI_STUB_SIZE]; + break; + } + } + } +} + +program *decode(bytecode **bc_arr, size_t n) { + symbol_table *st = symbol_table_create(); + ffi_call_table *ffi = ffi_call_table_create(); + + decoded *dec_arr = ALLOC_ARRAY(decoded, n); - size_t global_offset = 0; + size_t total_code_len = 0; + size_t total_globals = 0; for (size_t i = 0; i < n; i++) { - decode_ctx *ctx = decode_ctx_create(bc_arr[i], global_offset); + decode_ctx *ctx = decode_ctx_create(bc_arr[i], st, ffi, total_code_len); insn *code = decode_internal(ctx); if (!code) { + // TODO: cleanup fprintf(stderr, "Failed to decode %s\n", bc_arr[i]->name); + free(dec_arr); + free(ctx); return NULL; } - decoded *dec = ALLOC(decoded); - *dec = (decoded){ + + dec_arr[i] = (decoded){ .code = code, .code_len = ctx->code.len, - .stubs = ctx->stubs.data, - .stubs_len = ctx->stubs.len, .bc_to_insn_map = ctx->bc_to_insn_map, .relocs = ctx->relocs.data, .relocs_len = ctx->relocs.len, }; - result[i] = dec; - global_offset += bc_arr[i]->globals_count; + + register_public_symbols(ctx->st, bc_arr[i], total_code_len, total_globals, + ctx->bc_to_insn_map); + + total_code_len += ctx->code.len; + total_globals += bc_arr[i]->globals_count; free(ctx); } - return result; + size_t ffi_call_len = ffi_call_table_count(ffi); + size_t ffi_call_offset = total_code_len; + size_t all_code_len = total_code_len + ffi_call_len * FFI_STUB_SIZE; + + insn *all_code = ALLOC_ARRAY(insn, all_code_len); + insn **entry_points = ALLOC_ARRAY(insn *, n); + + size_t code_offset = 0; + for (size_t i = 0; i < n; i++) { + decoded *dec = &dec_arr[i]; + + // Move instructions into final code array + memcpy(all_code + code_offset, dec->code, dec->code_len * sizeof(insn)); + + entry_points[i] = &all_code[code_offset]; + + resolve_relocs(all_code, dec, code_offset, ffi_call_offset); + + code_offset += dec->code_len; + } + + // Copy FFI calls into the tail of all_code + insn *ffi_data = ffi_call_table_get_all(ffi); + if (ffi_data) { + memcpy(all_code + ffi_call_offset, ffi_data, + ffi_call_len * FFI_STUB_SIZE * sizeof(insn)); + free(ffi_data); + } + + program *prog = ALLOC(program); + prog->code = all_code; + prog->code_len = all_code_len; + prog->total_globals = total_globals; + prog->entry_points = entry_points; + + symbol_table_destroy(st); + ffi_call_table_destroy(ffi); + + for (size_t i = 0; i < n; i++) { + free(dec_arr[i].code); + free(dec_arr[i].bc_to_insn_map); + free(dec_arr[i].relocs); + } + free(dec_arr); + + return prog; } -void decoded_free(decoded *dec) { - if (dec) { - free(dec->code); - free(dec->stubs); - free(dec->bc_to_insn_map); - free(dec->relocs); - free(dec); +void program_free(program *prog) { + if (!prog) { + return; } + free(prog->code); + free(prog->entry_points); + free(prog); } diff --git a/virtual_machine/converter.h b/virtual_machine/converter.h new file mode 100644 index 000000000..b0fa97d48 --- /dev/null +++ b/virtual_machine/converter.h @@ -0,0 +1,21 @@ +#ifndef CONVERTER_H +#define CONVERTER_H + +#include "../runtime/runtime_common.h" +#include "bytecode.h" +#include "insn.h" +#include "reader.h" +#include +#include + +typedef struct { + insn *code; + size_t code_len; + size_t total_globals; + insn **entry_points; +} program; + +program *decode(bytecode **bc_arr, size_t n); +void program_free(program *prog); + +#endif // CONVERTER_H diff --git a/virtual_machine/decoder.h b/virtual_machine/decoder.h deleted file mode 100644 index 6f80f91f1..000000000 --- a/virtual_machine/decoder.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef DECODER_NEW_H -#define DECODER_NEW_H - -#include "../runtime/runtime_common.h" -#include "bytecode.h" -#include "reader.h" -#include "insn.h" -#include -#include - -typedef enum { - STUB_CALL, - STUB_CLOSURE, - STUB_GLOBAL_LD, - STUB_GLOBAL_ST, -} stub_kind; - -/* - * A single fixup record emitted by the decoder for the linker to resolve. - */ -typedef struct { - size_t patch_idx; // Index into code array - const char *name; // Symbol name to look up - stub_kind kind; -} stub; - -/* - * Result of decoding a single unit. - */ -typedef struct { - insn *code; // Decoded threaded code array - size_t code_len; - stub *stubs; // Fixups for the linker to resolve - size_t stubs_len; - int32_t *bc_to_insn_map; - size_t *relocs; // Indices of insn with internal target offsets - size_t relocs_len; -} decoded; - -decoded **decode(bytecode **bc_arr, size_t n); -void decoded_free(decoded *dec); - -/* - * Used for patching - */ -fn decoder_get_op_call(void); -fn decoder_get_op_call_ffi_stub(void); -fn decoder_get_op_callc_ffi_stub(void); -fn decoder_get_op_ld_glo(void); -fn decoder_get_op_st_glo(void); -fn decoder_get_op_ld_glo_ext(void); -fn decoder_get_op_st_glo_ext(void); - -#endif // DECODER_NEW_H diff --git a/virtual_machine/ffi.c b/virtual_machine/ffi.c index d1524daf8..2da74a8b3 100644 --- a/virtual_machine/ffi.c +++ b/virtual_machine/ffi.c @@ -32,17 +32,20 @@ void ffi_call_table_destroy(ffi_call_table *table) { if (!table) { return; } + for (size_t i = 0; i < table->len; i++) { + free(table->data[i].stub); + } da_free(*table); free(table); } -insn *ffi_call_table_find(ffi_call_table *table, const char *name) { +size_t ffi_call_table_find(ffi_call_table *table, const char *name) { for (size_t i = 0; i < table->len; i++) { if (strcmp(table->data[i].name, name) == 0) { - return table->data[i].stub; + return i; } } - return NULL; + return -1; } insn *ffi_call_table_add(ffi_call_table *table, const char *name, fn stub_fn) { @@ -61,6 +64,24 @@ insn *ffi_call_table_add(ffi_call_table *table, const char *name, fn stub_fn) { return stub; } +size_t ffi_call_table_count(ffi_call_table *table) { return table->len; } + +ffi_call_stub *ffi_call_table_get(ffi_call_table *table, size_t idx) { + return &table->data[idx]; +} + +insn *ffi_call_table_get_all(ffi_call_table *table) { + if (table->len == 0) { + return NULL; + } + insn *all_stubs = ALLOC_ARRAY(insn, table->len * 2); + for (size_t i = 0; i < table->len; i++) { + all_stubs[i * 2] = table->data[i].stub[0]; + all_stubs[i * 2 + 1] = table->data[i].stub[1]; + } + return all_stubs; +} + // TODO: ugly? typedef struct { const char *lama_name; diff --git a/virtual_machine/ffi.h b/virtual_machine/ffi.h index 345f75106..be05888f9 100644 --- a/virtual_machine/ffi.h +++ b/virtual_machine/ffi.h @@ -23,7 +23,11 @@ typedef struct ffi_call_table ffi_call_table; ffi_call_table *ffi_call_table_create(void); void ffi_call_table_destroy(ffi_call_table *table); -insn *ffi_call_table_find(ffi_call_table *table, const char *name); +size_t ffi_call_table_find(ffi_call_table *table, const char *name); insn *ffi_call_table_add(ffi_call_table *table, const char *name, fn stub_fn); +size_t ffi_call_table_count(ffi_call_table *table); +ffi_call_stub *ffi_call_table_get(ffi_call_table *table, size_t idx); + +insn *ffi_call_table_get_all(ffi_call_table *table); #endif // FFI_CALL_H diff --git a/virtual_machine/insn.h b/virtual_machine/insn.h index 6ad2d521a..bf85694a1 100644 --- a/virtual_machine/insn.h +++ b/virtual_machine/insn.h @@ -26,7 +26,7 @@ typedef union insn { int32_t num; // Integer operand (signed) const char *str; // String operand (direct pointer) union insn *target; // Direct jump target (pointer to insn) - aint *global_ptr; // Pointer to a C global variable + aint *global_ptr; // Pointer to a C global variable } insn; #endif // INSN_H diff --git a/virtual_machine/linker.c b/virtual_machine/linker.c index 0b4c88351..a5b0c0ced 100644 --- a/virtual_machine/linker.c +++ b/virtual_machine/linker.c @@ -1,6 +1,6 @@ #include "linker.h" #include "bytecode.h" -#include "decoder.h" +#include "converter.h" #include "ffi.h" #include "memory.h" #include "symbols.h" @@ -55,34 +55,12 @@ static void resolve_stubs(decoded *dec, insn *all_code, size_t code_offset, resolved_symbol *sym = symbol_table_find(st, s->name); switch (s->kind) { - case STUB_CALL: { + case STUB_FUNC: { - // Decoder emitted: [NULL] [NULL] [n_args] if (sym) { assert(sym->is_function); - code[pi - 1].func = decoder_get_op_call(); code[pi].target = &all_code[sym->idx]; } else { - code[pi - 1].func = decoder_get_op_call(); - insn *ffi_stub = ffi_call_table_find(ffi_stubs, s->name); - if (!ffi_stub) { - ffi_stub = ffi_call_table_add(ffi_stubs, s->name, - decoder_get_op_callc_ffi_stub()); - } - code[pi].target = ffi_stub; - // code[pi - 1].func = decoder_get_op_call_ffi_stub(); - // code[pi].str = s->name; - } - break; - } - - case STUB_CLOSURE: { - - if (sym) { - assert(sym->is_function); - code[pi].target = &all_code[sym->idx]; - } else { - // Not found in symbol table — create FFI stub insn *ffi_stub = ffi_call_table_find(ffi_stubs, s->name); if (!ffi_stub) { ffi_stub = ffi_call_table_add(ffi_stubs, s->name, diff --git a/virtual_machine/linker.h b/virtual_machine/linker.h index 9d9ae4431..e53c753b7 100644 --- a/virtual_machine/linker.h +++ b/virtual_machine/linker.h @@ -2,7 +2,7 @@ #define LINKER_H #include "bytecode.h" -#include "decoder.h" +#include "converter.h" #include "insn.h" #include diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c index 208188946..72f11df44 100644 --- a/virtual_machine/loader.c +++ b/virtual_machine/loader.c @@ -22,12 +22,6 @@ typedef struct { size_t cap; } bytecode_array; -typedef struct { - size_t *data; // Indices into the loaded bytecode_array - size_t len; - size_t cap; -} exec_order; - /* * Build the path to a unit's .bc file by searching through paths. */ @@ -46,15 +40,21 @@ static char *build_unit_path(const char *unit_name, const search_paths *paths) { } /* - * Find a loaded unit by name. Returns its index, or (size_t)-1 if not found. + * Check if a string looks like a file path (contains '/' or ends with '.bc') */ -static size_t find_loaded(bytecode_array *units, const char *name) { +static bool is_filepath(const char *str) { + size_t len = strlen(str); + return strchr(str, '/') != NULL || + (len > 3 && strcmp(str + len - 3, ".bc") == 0); +} + +static bool find_loaded(bytecode_array *units, const char *name) { for (size_t i = 0; i < units->len; i++) { if (strcmp(units->data[i]->name, name) == 0) { - return i; + return true; } } - return (size_t)-1; + return false; } /* @@ -77,14 +77,21 @@ static char *extract_unit_name(const char *filename) { /* * Load a single unit and its dependencies recursively. */ -static bool load_unit_recursive(bytecode_array *units, exec_order *order, - const char *s, const search_paths *paths) { - - char *filepath = build_unit_path(s, paths); - char *unit_name = ESTRDUP(s); +static bool load_unit_recursive(bytecode_array *units, const char *s, + const search_paths *paths) { + char *filepath = NULL; + char *unit_name = NULL; + + // The initial call uses a filepath, recursive calls use unit names + if (is_filepath(s)) { + filepath = ESTRDUP(s); + unit_name = extract_unit_name(s); + } else { + filepath = build_unit_path(s, paths); + unit_name = ESTRDUP(s); + } - size_t existing = find_loaded(units, unit_name); - if (existing != (size_t)-1) { + if (find_loaded(units, unit_name)) { free(filepath); free(unit_name); return true; @@ -94,91 +101,40 @@ static bool load_unit_recursive(bytecode_array *units, exec_order *order, if (!bc) { fprintf(stderr, "Failed to load dependency '%s' from '%s'\n", unit_name, filepath); + free(filepath); free(unit_name); return false; } - free(filepath); bc->name = unit_name; - size_t my_idx = units->len; - da_append(*units, bc); - - // Recursively load dependencies - + // Recursively load dependencies first (topological order) const char *import_name; bytecode_iterator iter; bytecode_imports_init(&iter, bc); while (bytecode_imports_next(&iter, &import_name)) { + // Skip Std since we have it as runtime.a if (strcmp(import_name, "Std") == 0) { continue; } - if (!load_unit_recursive(units, order, import_name, paths)) { - free(filepath); - return false; - } - } - - da_append(*order, my_idx); - return true; -} - -static bytecode *load_main_unit(const char *path) { - char *filepath = ESTRDUP(path); - char *unit_name = extract_unit_name(path); - bytecode *bc = bytecode_load(filepath); - if (!bc) { - fprintf(stderr, "Failed to load main unit from '%s'\n", filepath); - exit(EXIT_FAILURE); + load_unit_recursive(units, import_name, paths); } - bc->name = unit_name; + da_append(*units, bc); free(filepath); - return bc; + return true; } load_result load(const char *main_unit_path, const search_paths *paths) { - load_result result = {0}; bytecode_array m; da_init(m); - exec_order order; - da_init(order); - - bytecode *bc = load_main_unit(main_unit_path); - - const char *import_name; - bytecode_iterator iter; - bytecode_imports_init(&iter, bc); - while (bytecode_imports_next(&iter, &import_name)) { - // Skip Std since we have it as runtime.a - if (strcmp(import_name, "Std") == 0) { - continue; - } - - if (!load_unit_recursive(&m, &order, import_name, paths)) { - free(order.data); - return result; - } - } - - // Check if main unit was already loaded as a dependency - // NOTE: this is all done to comply with the semantics of the reference - // implementation which allows main module to execute twice (if it's imported - // by one of its dependencies). - size_t main_idx = find_loaded(&m, bc->name); - if (main_idx == (size_t)-1) { - main_idx = m.len; - da_append(m, bc); - } else { - bytecode_free(bc); - } - da_append(order, main_idx); + load_unit_recursive(&m, main_unit_path, paths); - result.units = m.data; - result.units_len = m.len; - result.exec_order = order.data; - result.exec_order_len = order.len; + load_result result = { + .units = m.data, + .units_len = m.len, + }; return result; } diff --git a/virtual_machine/loader.h b/virtual_machine/loader.h index 22dcd3ee1..987735ad6 100644 --- a/virtual_machine/loader.h +++ b/virtual_machine/loader.h @@ -13,10 +13,8 @@ typedef struct { } search_paths; typedef struct { - bytecode **units; // Array of unique loaded bytecode units - size_t units_len; // Number of unique units - size_t *exec_order; // Indices into units[], in execution order - size_t exec_order_len; // Length of exec_order (may be units_len + 1) + bytecode **units; // Array of unique loaded bytecode units + size_t units_len; // Number of unique units } load_result; load_result load(const char *main_unit_path, const search_paths *paths); diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 626b7acb8..774b2bd5f 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -573,15 +573,9 @@ void op_end(DECL_STATE) { } /* - * Closures + * FFI call */ - -/* - * FFI closure stub - called when an external closure is invoked - * via op_callc This stub is generated for each unresolved external closure - * reference. The function name is embedded in the next instruction. - */ -void op_callc_ffi_stub(DECL_STATE) { +void op_ffi_call(DECL_STATE) { ip++; const char *func_name = ip->str; @@ -627,29 +621,6 @@ void op_closure(DECL_STATE) { DISPATCH(); } -// TODO: think about unifying with callc_ffi -void op_call_ffi_stub(DECL_STATE) { - ip++; - const char *func_name = ip->str; - ip++; - int32_t n_args = ip->num; - - VM_DEBUG("FFI_CALL: func='%s' n_args=%d\n", func_name, n_args); - - aint args[256]; - aint *args_base = sp + 1; - for (int32_t i = 0; i < n_args; i++) { - args[i] = args_base[n_args - 1 - i]; - } - sp += n_args; - - aint result = ffi_call_c(func_name, args, n_args); - - VM_DEBUG("FFI_CALL: result=%ld\n", (long)result); - STACK_PUSH(sp, result); - DISPATCH(); -} - #ifdef DEBUG_PRINT void op_line(DECL_STATE) { ip++; diff --git a/virtual_machine/ops.h b/virtual_machine/ops.h index d8f9b04e5..7ab835686 100644 --- a/virtual_machine/ops.h +++ b/virtual_machine/ops.h @@ -59,8 +59,7 @@ void op_call(DECL_STATE); void op_callc(DECL_STATE); void op_end(DECL_STATE); void op_closure(DECL_STATE); -void op_callc_ffi_stub(DECL_STATE); -void op_call_ffi_stub(DECL_STATE); +void op_ffi_call(DECL_STATE); void op_ld_glo_ext(DECL_STATE); void op_st_glo_ext(DECL_STATE); diff --git a/virtual_machine/symbols.c b/virtual_machine/symbols.c index a53cfbfc5..435964269 100644 --- a/virtual_machine/symbols.c +++ b/virtual_machine/symbols.c @@ -27,7 +27,8 @@ void symbol_table_destroy(symbol_table *table) { free(table); } -resolved_symbol *symbol_table_find(symbol_table *table, const char *name) { +static resolved_symbol *symbol_table_find(symbol_table *table, const char *name, + bool is_function) { for (size_t i = 0; i < table->len; i++) { if (strcmp(table->data[i].name, name) == 0) { return &table->data[i]; @@ -36,12 +37,12 @@ resolved_symbol *symbol_table_find(symbol_table *table, const char *name) { return NULL; } -static int symbol_table_add(symbol_table *table, const char *name, - bool is_function, int32_t idx) { +static int symbol_table_add(symbol_table *table, const char *name, int32_t idx, + bool is_function) { // Allow duplicate main() (each uinit has one) if (strcmp(name, MAIN_FUNC) != 0) { - resolved_symbol *existing = symbol_table_find(table, name); + resolved_symbol *existing = symbol_table_find(table, name, is_function); if (existing) { fprintf(stderr, "Error: Duplicate symbol '%s' found in symbol table\n", name); @@ -60,12 +61,22 @@ static int symbol_table_add(symbol_table *table, const char *name, return 0; } +resolved_symbol *symbol_table_find_function(symbol_table *table, + const char *name) { + return symbol_table_find(table, name, true); +} + +resolved_symbol *symbol_table_find_global(symbol_table *table, + const char *name) { + return symbol_table_find(table, name, false); +} + int symbol_table_add_function(symbol_table *table, const char *name, int32_t code_idx) { - return symbol_table_add(table, name, true, code_idx); + return symbol_table_add(table, name, code_idx, true); } int symbol_table_add_global(symbol_table *table, const char *name, int32_t global_idx) { - return symbol_table_add(table, name, false, global_idx); + return symbol_table_add(table, name, global_idx, false); } diff --git a/virtual_machine/symbols.h b/virtual_machine/symbols.h index d16f9d17c..f811ae891 100644 --- a/virtual_machine/symbols.h +++ b/virtual_machine/symbols.h @@ -26,7 +26,10 @@ typedef struct symbol_table symbol_table; symbol_table *symbol_table_create(void); void symbol_table_destroy(symbol_table *table); -resolved_symbol *symbol_table_find(symbol_table *table, const char *name); +resolved_symbol *symbol_table_find_function(symbol_table *table, + const char *name); +resolved_symbol *symbol_table_find_global(symbol_table *table, + const char *name); int symbol_table_add_function(symbol_table *table, const char *name, int32_t code_index); int symbol_table_add_global(symbol_table *table, const char *name, diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index 3ae1174f8..8314a53a4 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -1,8 +1,7 @@ #include "vm.h" #include "../runtime/gc.h" #include "../runtime/runtime_common.h" -#include "decoder.h" -#include "linker.h" +#include "converter.h" #include "loader.h" #include "memory.h" #include @@ -19,8 +18,6 @@ struct virtual_machine { insn *code; // Contiguous code array insn **entry_points; // Entry point for each unique unit size_t entry_points_len; - size_t *exec_order; // Indices into entry_points[], execution order - size_t exec_order_len; size_t total_globals; }; @@ -39,25 +36,19 @@ virtual_machine *vm_create(const char *main_unit_path, const char **paths, vm->bc_arr = lr.units; vm->bc_len = lr.units_len; - decoded **decoded_arr = decode(lr.units, lr.units_len); - if (!decoded_arr) { + program *prog = decode(lr.units, lr.units_len); + if (!prog) { for (size_t i = 0; i < vm->bc_len; i++) { bytecode_free(lr.units[i]); } free(lr.units); - free(lr.exec_order); free(vm); return NULL; } - program_link *prog = link(lr.units, decoded_arr, lr.units_len); - vm->total_globals = prog->total_globals; vm->code = prog->code; vm->entry_points = prog->entry_points; - vm->entry_points_len = prog->entry_points_len; - vm->exec_order = lr.exec_order; - vm->exec_order_len = lr.exec_order_len; free(prog); @@ -74,7 +65,6 @@ void vm_destroy(virtual_machine *vm) { free(vm->bc_arr); free(vm->code); free(vm->entry_points); - free(vm->exec_order); free(vm); } @@ -102,9 +92,8 @@ aint vm_run(virtual_machine *vm) { aint *sp = &stack_data[active_stack_size - 1]; aint *bp = sp; - for (size_t i = 0; i < vm->exec_order_len; i++) { - size_t unit_idx = vm->exec_order[i]; - insn *ip = vm->entry_points[unit_idx]; + for (size_t i = 0; i < vm->bc_len; i++) { + insn *ip = vm->entry_points[i]; ip->func(ip, sp, bp, globals); } From efd3165823f964640a0be7f19b6e3af9a65bd233 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 26 Feb 2026 16:32:06 +0300 Subject: [PATCH 042/141] cache ffi calls --- virtual_machine/converter.c | 45 +++++---- virtual_machine/converter.h | 2 + virtual_machine/ffi.c | 195 +++++++++++++++++------------------- virtual_machine/ffi.h | 40 +++++--- virtual_machine/insn.h | 1 + virtual_machine/ops.c | 12 +-- virtual_machine/vm.c | 5 + 7 files changed, 157 insertions(+), 143 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 70a370b81..d9ce58d29 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -56,6 +56,7 @@ #define EMIT_TARGET(ctx, t) da_append((ctx)->code, ((insn){.target = (t)})) #define EMIT_GLOBAL_PTR(ctx, p) \ da_append((ctx)->code, ((insn){.global_ptr = (p)})) +#define EMIT_PTR(ctx, p) da_append((ctx)->code, ((insn){.ptr = (p)})) #define FFI_STUB_SIZE 2 @@ -661,11 +662,7 @@ static insn *decode_internal(decode_ctx *ctx) { ctx, sym->idx); // placeholder, will be resolved to inter-unit function } else { - size_t idx = ffi_call_table_find(ctx->ffi, ext_func_name); - if (idx == -1) { - ffi_call_table_add(ctx->ffi, ext_func_name, op_ffi_call); - idx = ffi_call_table_count(ctx->ffi) - 1; - } + size_t idx = ffi_call_table_intern(ctx->ffi, ext_func_name); add_reloc(ctx, target_slot, ext_func_name, FFI); EMIT_NUM(ctx, idx); // placeholder, will be resolved to FFI call } @@ -719,11 +716,7 @@ static insn *decode_internal(decode_ctx *ctx) { ctx, sym->idx); // placeholder, will be resolved to inter-unit function } else { - size_t idx = ffi_call_table_find(ctx->ffi, ext_func_name); - if (idx == -1) { - ffi_call_table_add(ctx->ffi, ext_func_name, op_ffi_call); - idx = ffi_call_table_count(ctx->ffi) - 1; - } + size_t idx = ffi_call_table_intern(ctx->ffi, ext_func_name); add_reloc(ctx, target_slot, ext_func_name, FFI); EMIT_NUM(ctx, idx); // placeholder, will be resolved to FFI call } @@ -872,10 +865,17 @@ program *decode(bytecode **bc_arr, size_t n) { decode_ctx *ctx = decode_ctx_create(bc_arr[i], st, ffi, total_code_len); insn *code = decode_internal(ctx); if (!code) { - // TODO: cleanup fprintf(stderr, "Failed to decode %s\n", bc_arr[i]->name); - free(dec_arr); + free(ctx->bc_to_insn_map); free(ctx); + for (size_t j = 0; j < i; j++) { + free(dec_arr[j].code); + free(dec_arr[j].bc_to_insn_map); + free(dec_arr[j].relocs); + } + free(dec_arr); + symbol_table_destroy(st); + ffi_call_table_destroy(ffi); return NULL; } @@ -895,7 +895,7 @@ program *decode(bytecode **bc_arr, size_t n) { free(ctx); } - size_t ffi_call_len = ffi_call_table_count(ffi); + size_t ffi_call_len = ffi_call_table_len(ffi); size_t ffi_call_offset = total_code_len; size_t all_code_len = total_code_len + ffi_call_len * FFI_STUB_SIZE; @@ -916,19 +916,25 @@ program *decode(bytecode **bc_arr, size_t n) { code_offset += dec->code_len; } - // Copy FFI calls into the tail of all_code - insn *ffi_data = ffi_call_table_get_all(ffi); - if (ffi_data) { - memcpy(all_code + ffi_call_offset, ffi_data, - ffi_call_len * FFI_STUB_SIZE * sizeof(insn)); - free(ffi_data); + ffi_call_iterator ffi_iter; + ffi_call_table_emit_init(&ffi_iter, ffi); + ffi_resolved *res; + size_t ffi_idx = 0; + while (ffi_call_table_emit_next(&ffi_iter, &res)) { + all_code[ffi_call_offset + ffi_idx * FFI_STUB_SIZE].func = op_ffi_call; + all_code[ffi_call_offset + ffi_idx * FFI_STUB_SIZE + 1].ptr = res; + ffi_idx++; } + ffi_resolved *ffi_data = ffi_call_table_release(ffi); + program *prog = ALLOC(program); prog->code = all_code; prog->code_len = all_code_len; prog->total_globals = total_globals; prog->entry_points = entry_points; + prog->ffi_data = ffi_data; + prog->ffi_len = ffi_call_len; symbol_table_destroy(st); ffi_call_table_destroy(ffi); @@ -947,6 +953,7 @@ void program_free(program *prog) { if (!prog) { return; } + free(prog->ffi_data); free(prog->code); free(prog->entry_points); free(prog); diff --git a/virtual_machine/converter.h b/virtual_machine/converter.h index b0fa97d48..71f8b3dc2 100644 --- a/virtual_machine/converter.h +++ b/virtual_machine/converter.h @@ -13,6 +13,8 @@ typedef struct { size_t code_len; size_t total_globals; insn **entry_points; + void *ffi_data; + size_t ffi_len; } program; program *decode(bytecode **bc_arr, size_t n); diff --git a/virtual_machine/ffi.c b/virtual_machine/ffi.c index 2da74a8b3..cb33760f5 100644 --- a/virtual_machine/ffi.c +++ b/virtual_machine/ffi.c @@ -15,74 +15,37 @@ #include struct ffi_call_table { - ffi_call_stub *data; + ffi_resolved *data; size_t len; size_t cap; + + // Used for dedup + struct { + const char **data; + size_t len; + size_t cap; + } names; }; ffi_call_table *ffi_call_table_create(void) { ffi_call_table *table = ALLOC(ffi_call_table); da_init(*table); + da_init(table->names); return table; } -// Currently frees only table and not stubs themselves since they are needed for -// execution void ffi_call_table_destroy(ffi_call_table *table) { if (!table) { return; } - for (size_t i = 0; i < table->len; i++) { - free(table->data[i].stub); + for (size_t i = 0; i < table->names.len; i++) { + free((char *)table->names.data[i]); } - da_free(*table); + da_free(table->names); + free(table->data); free(table); } -size_t ffi_call_table_find(ffi_call_table *table, const char *name) { - for (size_t i = 0; i < table->len; i++) { - if (strcmp(table->data[i].name, name) == 0) { - return i; - } - } - return -1; -} - -insn *ffi_call_table_add(ffi_call_table *table, const char *name, fn stub_fn) { - insn *stub = ALLOC_ARRAY(insn, 2); - - char *persistent_name = ESTRDUP(name); - - stub[0].func = stub_fn; - stub[1].str = persistent_name; - - ffi_call_stub entry = {.name = persistent_name, .stub = stub}; - da_append(*table, entry); - - // VM_DEBUG("EXT_FUNC_STUB_TABLE: added '%s' -> stub=%p\n", name, (void - // *)stub); - return stub; -} - -size_t ffi_call_table_count(ffi_call_table *table) { return table->len; } - -ffi_call_stub *ffi_call_table_get(ffi_call_table *table, size_t idx) { - return &table->data[idx]; -} - -insn *ffi_call_table_get_all(ffi_call_table *table) { - if (table->len == 0) { - return NULL; - } - insn *all_stubs = ALLOC_ARRAY(insn, table->len * 2); - for (size_t i = 0; i < table->len; i++) { - all_stubs[i * 2] = table->data[i].stub[0]; - all_stubs[i * 2 + 1] = table->data[i].stub[1]; - } - return all_stubs; -} - -// TODO: ugly? typedef struct { const char *lama_name; const char *target_name; @@ -107,7 +70,6 @@ static const func_metadata func_table[] = { // Sentinel {NULL, NULL, false, 0}}; -// TODO: cache? static void *lookup_function(const char *name) { void *fn = dlsym(RTLD_DEFAULT, name); char *error = dlerror(); @@ -127,17 +89,70 @@ static const func_metadata *lookup_metadata(const char *name) { return NULL; } -/* - * Functions that take (aint* args) - a pointer to argument array - * TODO: a better way? - */ -static aint call_args_array_function(const char *name, aint *args) { - void *fn = lookup_function(name); +size_t ffi_call_table_intern(ffi_call_table *table, const char *name) { + for (size_t i = 0; i < table->names.len; i++) { + if (strcmp(table->names.data[i], name) == 0) { + return i; + } + } + + const func_metadata *meta = lookup_metadata(name); + const char *target_name = meta ? meta->target_name : name; + + void *fn = lookup_function(target_name); if (!fn) { fprintf(stderr, "Undefined external function: %s\n", name); exit(EXIT_FAILURE); } + ffi_kind kind = FFI_REGULAR; + int fixed_args = 0; + + if (meta) { + if (meta->is_args_array) { + kind = FFI_ARGS_ARRAY; + } else { + kind = FFI_VARIADIC; + fixed_args = meta->fixed_args; + } + } + + ffi_resolved entry = { + .fn_ptr = fn, + .kind = kind, + .fixed_args = fixed_args, + }; + + da_append(*table, entry); + da_append(table->names, ESTRDUP(name)); + return table->len - 1; +} + +size_t ffi_call_table_len(ffi_call_table *table) { return table->len; } + +ffi_resolved *ffi_call_table_release(ffi_call_table *table) { + ffi_resolved *data = table->data; + table->data = NULL; + table->len = 0; + table->cap = 0; + return data; +} + +void ffi_call_table_emit_init(ffi_call_iterator *iter, ffi_call_table *table) { + iter->table = table; + iter->curr = 0; +} + +bool ffi_call_table_emit_next(ffi_call_iterator *iter, ffi_resolved **out) { + if (iter->curr >= iter->table->len) { + return false; + } + *out = &iter->table->data[iter->curr]; + iter->curr++; + return true; +} + +static aint call_args_array(void *fn_ptr, aint *args) { ffi_cif cif; ffi_type *arg_types[1] = {&ffi_type_pointer}; void *arg_values[1] = {&args}; @@ -146,29 +161,19 @@ static aint call_args_array_function(const char *name, aint *args) { ffi_status status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 1, &ffi_type_pointer, arg_types); if (status != FFI_OK) { - fprintf(stderr, "FFI prep failed for '%s': status=%d\n", name, status); + fprintf(stderr, "FFI prep failed: status=%d\n", status); exit(EXIT_FAILURE); } - ffi_call(&cif, FFI_FN(fn), &result, arg_values); + ffi_call(&cif, FFI_FN(fn_ptr), &result, arg_values); return (aint)result; } -/* - * Mapping functions due to runtime.c x32 and x64 variants of printf etc. - * TODO: very ugly - */ -static aint call_variadic_function(const char *target_name, int fixed_args, - aint *args, int n_args) { - void *fn = lookup_function(target_name); - if (!fn) { - fprintf(stderr, "Undefined external function: %s\n", target_name); - exit(EXIT_FAILURE); - } - +static aint call_variadic(void *fn_ptr, int fixed_args, aint *args, + int n_args) { if (n_args < fixed_args) { - fprintf(stderr, "FFI call '%s': expected at least %d args, got %d\n", - target_name, fixed_args, n_args); + fprintf(stderr, "FFI variadic call: expected at least %d args, got %d\n", + fixed_args, n_args); exit(EXIT_FAILURE); } @@ -189,27 +194,18 @@ static aint call_variadic_function(const char *target_name, int fixed_args, } } - // TODO: ABI ? ffi_status status = ffi_prep_cif_var(&cif, FFI_DEFAULT_ABI, fixed_args, n_args, &ffi_type_pointer, arg_types); - if (status != FFI_OK) { - fprintf(stderr, "FFI prep failed for '%s': status=%d\n", target_name, - status); + fprintf(stderr, "FFI prep failed: status=%d\n", status); exit(EXIT_FAILURE); } - ffi_call(&cif, FFI_FN(fn), &result, arg_values); + ffi_call(&cif, FFI_FN(fn_ptr), &result, arg_values); return (aint)result; } -static aint call_regular_function(const char *name, aint *args, int n_args) { - void *fn = lookup_function(name); - if (!fn) { - fprintf(stderr, "Undefined external function: %s\n", name); - exit(EXIT_FAILURE); - } - +static aint call_regular(void *fn_ptr, aint *args, int n_args) { ffi_cif cif; ffi_type *arg_types[n_args]; void *arg_values[n_args]; @@ -222,28 +218,25 @@ static aint call_regular_function(const char *name, aint *args, int n_args) { ffi_status status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, n_args, &ffi_type_pointer, arg_types); - if (status != FFI_OK) { - fprintf(stderr, "FFI prep failed for '%s': status=%d\n", name, status); + fprintf(stderr, "FFI prep failed: status=%d\n", status); exit(EXIT_FAILURE); } - ffi_call(&cif, FFI_FN(fn), &result, arg_values); - + ffi_call(&cif, FFI_FN(fn_ptr), &result, arg_values); return result; } -aint ffi_call_c(const char *name, aint *args, int n_args) { - const func_metadata *meta = lookup_metadata(name); - - if (meta) { - if (meta->is_args_array) { - return call_args_array_function(meta->target_name, args); - } else { - return call_variadic_function(meta->target_name, meta->fixed_args, args, - n_args); - } +aint ffi_call_c(const ffi_resolved *res, aint *args, int n_args) { + switch (res->kind) { + case FFI_ARGS_ARRAY: + return call_args_array(res->fn_ptr, args); + case FFI_VARIADIC: + return call_variadic(res->fn_ptr, res->fixed_args, args, n_args); + case FFI_REGULAR: + return call_regular(res->fn_ptr, args, n_args); + default: + fprintf(stderr, "Unknown FFI kind: %d\n", res->kind); + exit(EXIT_FAILURE); } - - return call_regular_function(name, args, n_args); } diff --git a/virtual_machine/ffi.h b/virtual_machine/ffi.h index be05888f9..20eb465b8 100644 --- a/virtual_machine/ffi.h +++ b/virtual_machine/ffi.h @@ -3,31 +3,39 @@ #include "../runtime/runtime_common.h" #include "insn.h" +#include +#include #include -/* - * FFI call by name using libffi. - * - */ -aint ffi_call_c(const char *name, aint *args, int n_args); +typedef enum { FFI_REGULAR, FFI_ARGS_ARRAY, FFI_VARIADIC } ffi_kind; typedef struct { - const char *name; // Function name - insn *stub; // Pointer to insn-stub -} ffi_call_stub; + void *fn_ptr; + ffi_kind kind; + int fixed_args; +} ffi_resolved; -/* - * Cache of generated stubs for unresolved FFI references. - */ typedef struct ffi_call_table ffi_call_table; ffi_call_table *ffi_call_table_create(void); void ffi_call_table_destroy(ffi_call_table *table); -size_t ffi_call_table_find(ffi_call_table *table, const char *name); -insn *ffi_call_table_add(ffi_call_table *table, const char *name, fn stub_fn); -size_t ffi_call_table_count(ffi_call_table *table); -ffi_call_stub *ffi_call_table_get(ffi_call_table *table, size_t idx); -insn *ffi_call_table_get_all(ffi_call_table *table); +/* + * Find existing or resolve and add. + */ +size_t ffi_call_table_intern(ffi_call_table *table, const char *name); +size_t ffi_call_table_len(ffi_call_table *table); + +ffi_resolved *ffi_call_table_release(ffi_call_table *table); + +typedef struct { + ffi_call_table *table; + size_t curr; +} ffi_call_iterator; + +void ffi_call_table_emit_init(ffi_call_iterator *iter, ffi_call_table *table); +bool ffi_call_table_emit_next(ffi_call_iterator *iter, ffi_resolved **out); + +aint ffi_call_c(const ffi_resolved *res, aint *args, int n_args); #endif // FFI_CALL_H diff --git a/virtual_machine/insn.h b/virtual_machine/insn.h index bf85694a1..07ede703b 100644 --- a/virtual_machine/insn.h +++ b/virtual_machine/insn.h @@ -27,6 +27,7 @@ typedef union insn { const char *str; // String operand (direct pointer) union insn *target; // Direct jump target (pointer to insn) aint *global_ptr; // Pointer to a C global variable + void *ptr; // Generic pointer } insn; #endif // INSN_H diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 774b2bd5f..01a36d925 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -1,4 +1,3 @@ - #include "ops.h" #include "../runtime/runtime_common.h" #include "ffi.h" @@ -573,15 +572,15 @@ void op_end(DECL_STATE) { } /* - * FFI call + * FFI call — dispatches via pre-resolved ffi_resolved struct */ void op_ffi_call(DECL_STATE) { ip++; - const char *func_name = ip->str; + const ffi_resolved *res = (const ffi_resolved *)ip->ptr; int32_t n_args = (int32_t)bp[1]; - VM_DEBUG("FFI_STUB: func='%s' n_args=%d bp=%p\n", func_name, n_args, + VM_DEBUG("FFI_CALL: kind=%d n_args=%d bp=%p\n", res->kind, n_args, (void *)bp); aint args[256]; @@ -589,10 +588,9 @@ void op_ffi_call(DECL_STATE) { args[i] = bp[n_args + 1 - i]; } - aint result = ffi_call_c(func_name, args, n_args); - VM_DEBUG("FFI_STUB: func='%s' result=%ld\n", func_name, (long)result); + aint result = ffi_call_c(res, args, n_args); + VM_DEBUG("FFI_CALL: result=%ld\n", (long)result); - // Store result in return value slot *bp = result; return; diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index 8314a53a4..8ea12ac72 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -19,6 +19,8 @@ struct virtual_machine { insn **entry_points; // Entry point for each unique unit size_t entry_points_len; size_t total_globals; + void *ffi_data; // ffi_resolved array + size_t ffi_count; }; virtual_machine *vm_create(const char *main_unit_path, const char **paths, @@ -49,6 +51,8 @@ virtual_machine *vm_create(const char *main_unit_path, const char **paths, vm->total_globals = prog->total_globals; vm->code = prog->code; vm->entry_points = prog->entry_points; + vm->ffi_data = prog->ffi_data; + vm->ffi_count = prog->ffi_len; free(prog); @@ -63,6 +67,7 @@ void vm_destroy(virtual_machine *vm) { bytecode_free(vm->bc_arr[i]); } free(vm->bc_arr); + free(vm->ffi_data); free(vm->code); free(vm->entry_points); free(vm); From 787013cdb1a0a37a885f8f9c3fd49d417552fe85 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 26 Feb 2026 16:47:05 +0300 Subject: [PATCH 043/141] cleanup --- virtual_machine/converter.c | 115 ++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 58 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index d9ce58d29..93e1332e3 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -117,10 +117,9 @@ typedef struct { } decode_ctx; -decode_ctx *decode_ctx_create(const bytecode *bc, symbol_table *st, - ffi_call_table *ffi, int32_t global_offset) { - decode_ctx *ctx = ALLOC(decode_ctx); - +static void decode_ctx_init(decode_ctx *ctx, const bytecode *bc, + symbol_table *st, ffi_call_table *ffi, + int32_t global_offset) { ctx->bc = bc; ctx->global_offset = global_offset; @@ -133,8 +132,14 @@ decode_ctx *decode_ctx_create(const bytecode *bc, symbol_table *st, ctx->ffi = ffi; reader_init(&ctx->reader, bc->code, bc->code_size); +} - return ctx; +static void free_decoded_arr(decoded *arr, size_t n) { + for (size_t i = 0; i < n; i++) { + free(arr[i].code); + free(arr[i].bc_to_insn_map); + free(arr[i].relocs); + } } static void add_reloc(decode_ctx *ctx, size_t patch_idx, const char *name, @@ -852,49 +857,8 @@ static void resolve_relocs(insn *all_code, decoded *dec, size_t code_offset, } } -program *decode(bytecode **bc_arr, size_t n) { - symbol_table *st = symbol_table_create(); - ffi_call_table *ffi = ffi_call_table_create(); - - decoded *dec_arr = ALLOC_ARRAY(decoded, n); - - size_t total_code_len = 0; - size_t total_globals = 0; - - for (size_t i = 0; i < n; i++) { - decode_ctx *ctx = decode_ctx_create(bc_arr[i], st, ffi, total_code_len); - insn *code = decode_internal(ctx); - if (!code) { - fprintf(stderr, "Failed to decode %s\n", bc_arr[i]->name); - free(ctx->bc_to_insn_map); - free(ctx); - for (size_t j = 0; j < i; j++) { - free(dec_arr[j].code); - free(dec_arr[j].bc_to_insn_map); - free(dec_arr[j].relocs); - } - free(dec_arr); - symbol_table_destroy(st); - ffi_call_table_destroy(ffi); - return NULL; - } - - dec_arr[i] = (decoded){ - .code = code, - .code_len = ctx->code.len, - .bc_to_insn_map = ctx->bc_to_insn_map, - .relocs = ctx->relocs.data, - .relocs_len = ctx->relocs.len, - }; - - register_public_symbols(ctx->st, bc_arr[i], total_code_len, total_globals, - ctx->bc_to_insn_map); - - total_code_len += ctx->code.len; - total_globals += bc_arr[i]->globals_count; - free(ctx); - } - +static program *link_program(decoded *dec_arr, size_t n, size_t total_code_len, + size_t total_globals, ffi_call_table *ffi) { size_t ffi_call_len = ffi_call_table_len(ffi); size_t ffi_call_offset = total_code_len; size_t all_code_len = total_code_len + ffi_call_len * FFI_STUB_SIZE; @@ -902,15 +866,14 @@ program *decode(bytecode **bc_arr, size_t n) { insn *all_code = ALLOC_ARRAY(insn, all_code_len); insn **entry_points = ALLOC_ARRAY(insn *, n); + // Copy code and resolve relocations size_t code_offset = 0; for (size_t i = 0; i < n; i++) { decoded *dec = &dec_arr[i]; // Move instructions into final code array memcpy(all_code + code_offset, dec->code, dec->code_len * sizeof(insn)); - entry_points[i] = &all_code[code_offset]; - resolve_relocs(all_code, dec, code_offset, ffi_call_offset); code_offset += dec->code_len; @@ -926,24 +889,60 @@ program *decode(bytecode **bc_arr, size_t n) { ffi_idx++; } - ffi_resolved *ffi_data = ffi_call_table_release(ffi); - program *prog = ALLOC(program); prog->code = all_code; prog->code_len = all_code_len; prog->total_globals = total_globals; prog->entry_points = entry_points; - prog->ffi_data = ffi_data; + prog->ffi_data = ffi_call_table_release(ffi); prog->ffi_len = ffi_call_len; - symbol_table_destroy(st); - ffi_call_table_destroy(ffi); + return prog; +} + +program *decode(bytecode **bc_arr, size_t n) { + symbol_table *st = symbol_table_create(); + ffi_call_table *ffi = ffi_call_table_create(); + + decoded *dec_arr = ALLOC_ARRAY(decoded, n); + program *prog = NULL; + size_t n_decoded = 0; + + size_t total_code_len = 0; + size_t total_globals = 0; for (size_t i = 0; i < n; i++) { - free(dec_arr[i].code); - free(dec_arr[i].bc_to_insn_map); - free(dec_arr[i].relocs); + decode_ctx ctx; + decode_ctx_init(&ctx, bc_arr[i], st, ffi, total_code_len); + insn *code = decode_internal(&ctx); + if (!code) { + fprintf(stderr, "Failed to decode %s\n", bc_arr[i]->name); + free(ctx.bc_to_insn_map); + goto cleanup; + } + + dec_arr[i] = (decoded){ + .code = code, + .code_len = ctx.code.len, + .bc_to_insn_map = ctx.bc_to_insn_map, + .relocs = ctx.relocs.data, + .relocs_len = ctx.relocs.len, + }; + n_decoded++; + + register_public_symbols(st, bc_arr[i], total_code_len, total_globals, + ctx.bc_to_insn_map); + + total_code_len += ctx.code.len; + total_globals += bc_arr[i]->globals_count; } + + prog = link_program(dec_arr, n, total_code_len, total_globals, ffi); + +cleanup: + symbol_table_destroy(st); + ffi_call_table_destroy(ffi); + free_decoded_arr(dec_arr, n_decoded); free(dec_arr); return prog; From 27d0ed64fff2e902c43282508b2262d945cd1ebd Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 27 Feb 2026 20:10:43 +0300 Subject: [PATCH 044/141] SM.ml chore --- src/SM.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SM.ml b/src/SM.ml index 891eb30b4..eb7d6e182 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -422,7 +422,7 @@ module ByteCode = struct (fun (name_off, offset, flag) -> Buffer.add_int32_le file name_off; Buffer.add_int32_le file offset; - Buffer.add_char file (Char.chr flag)) + Buffer.add_uint8 file flag) pubs_resolved; Buffer.add_bytes file code; let f = open_out_bin (Printf.sprintf "%s.bc" cmd#basename) in From 30d47ffa26399a2fe3b6e30a03d00f6901cc0f77 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sat, 28 Feb 2026 18:21:45 +0300 Subject: [PATCH 045/141] cleanup --- runtime/gc.c | 2 +- virtual_machine/bytecode.c | 14 ++++++-------- virtual_machine/bytecode.h | 2 +- virtual_machine/converter.c | 6 +++--- virtual_machine/insn.h | 6 +----- virtual_machine/ops.c | 11 +++++++---- virtual_machine/symbols.c | 2 +- virtual_machine/vm.c | 1 + 8 files changed, 21 insertions(+), 23 deletions(-) diff --git a/runtime/gc.c b/runtime/gc.c index 5353ed58a..050049d41 100644 --- a/runtime/gc.c +++ b/runtime/gc.c @@ -63,7 +63,7 @@ void *alloc (size_t size) { size = BYTES_TO_WORDS(size); size_t padding = size * sizeof(size_t) - obj_size; #if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) - fprintf(stderr, "allocation of size %zu words (%zu bytes): ", size, padding); + fprintf(stderr, "allocation of size %zu words (%zu bytes): ", size, bytes_sz); #endif void *p = gc_alloc_on_existing_heap(size); if (!p) { diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index 7dca708ae..13f1f1d0c 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -30,9 +30,9 @@ bytecode *bytecode_load(const char *filename) { size_t file_size = (size_t)st.st_size; - void *map = mmap(NULL, file_size, PROT_READ, MAP_PRIVATE, fd, 0); + const uint8_t *data = mmap(NULL, file_size, PROT_READ, MAP_PRIVATE, fd, 0); - if (map == MAP_FAILED) { + if (data == MAP_FAILED) { perror("bytecode_load: mmap"); close(fd); return NULL; @@ -41,7 +41,7 @@ bytecode *bytecode_load(const char *filename) { close(fd); byte_reader reader; - reader_init(&reader, (const uint8_t *)map, file_size); + reader_init(&reader, data, file_size); int32_t string_table_size = reader_i32(&reader); int32_t globals_count = reader_i32(&reader); @@ -55,13 +55,11 @@ bytecode *bytecode_load(const char *filename) { size_t code_size = file_size - code_offset; // TODO: VALIdation - - const char *string_table = map + st_offset; - const uint8_t *data = (const uint8_t *)map; + const char *string_table = (const char *)data + st_offset; bytecode *bc = ALLOC(bytecode); - bc->map_base = map; + bc->map_base = data; bc->map_size = file_size; bc->string_table = string_table; @@ -124,7 +122,7 @@ void bytecode_free(bytecode *bc) { if (!bc) { return; } - munmap(bc->map_base, bc->map_size); + munmap((void *)bc->map_base, bc->map_size); free((void *)bc->name); free(bc); } diff --git a/virtual_machine/bytecode.h b/virtual_machine/bytecode.h index 0cb4e982d..04a3fd049 100644 --- a/virtual_machine/bytecode.h +++ b/virtual_machine/bytecode.h @@ -18,7 +18,7 @@ typedef struct { typedef struct { // Memory-mapped file - void *map_base; + const uint8_t *map_base; size_t map_size; const char *string_table; diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 93e1332e3..67ec9fa79 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -159,7 +159,7 @@ static fixup_node *add_fixup(meta_info *meta, size_t target_off, static bool validate_target_off(const bytecode *bc, int32_t target_off, size_t current_bc_off, const char *op_name) { - if (target_off >= bc->code_size) { + if (target_off >= (int32_t)bc->code_size) { fprintf( stderr, "Error: %s target_off=%d out of range (bc_off=%zu, code_size=%zu)\n", @@ -683,7 +683,7 @@ static insn *decode_internal(decode_ctx *ctx) { EMIT_NUM(ctx, n_captured); meta_info *tm = &meta[target_off]; - if (target_off < current_bc_off && tm->resolved_idx != -1) { + if (target_off < (int32_t)current_bc_off && tm->resolved_idx != -1) { ctx->code.data[target_slot].num = tm->resolved_idx; add_reloc(ctx, target_slot, NULL, INTERNAL); } else { @@ -736,7 +736,7 @@ static insn *decode_internal(decode_ctx *ctx) { EMIT_NUM(ctx, n_args); meta_info *tm = &meta[target_off]; - if (target_off < current_bc_off && tm->resolved_idx != -1) { + if (target_off < (int32_t)current_bc_off && tm->resolved_idx != -1) { ctx->code.data[target_slot].num = tm->resolved_idx; add_reloc(ctx, target_slot, NULL, INTERNAL); } else { diff --git a/virtual_machine/insn.h b/virtual_machine/insn.h index 07ede703b..520175f21 100644 --- a/virtual_machine/insn.h +++ b/virtual_machine/insn.h @@ -9,12 +9,8 @@ #include union insn; - // State: ip = instruction pointer, sp = stack pointer, bp = base pointer -// bp and globals are marked unused since not all handlers need them -#define DECL_STATE \ - __attribute((unused)) union insn *ip, __attribute__((unused)) aint *sp, \ - __attribute__((unused)) aint *bp, __attribute__((unused)) aint *globals +#define DECL_STATE union insn *ip, aint *sp, aint *bp, aint *globals #define STATE ip, sp, bp, globals // Function pointer type for opcode handlers (returns void for tail calls) diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 01a36d925..2ffe08fbb 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -233,8 +233,6 @@ void op_cjmp_nz(DECL_STATE) { * String, data etc. */ void op_string(DECL_STATE) { - (void)bp; - (void)globals; ip++; const char *str = ip->str; VM_DEBUG("STRING: \"%s\"\n", str); @@ -244,8 +242,6 @@ void op_string(DECL_STATE) { } void op_barray(DECL_STATE) { - (void)bp; - (void)globals; ip++; int32_t n = ip->num; VM_DEBUG("BARRAY: n=%d\n", n); @@ -313,6 +309,9 @@ void op_array(DECL_STATE) { } void op_fail(DECL_STATE) { + (void)sp; + (void)bp; + (void)globals; ip++; int32_t line = ip->num; ip++; @@ -565,6 +564,8 @@ void op_callc(DECL_STATE) { } void op_end(DECL_STATE) { + (void)ip; + (void)globals; VM_TRACE_CALL("END sp=%p\n", (void *)sp); aint ret_val = STACK_PEEK(sp); *bp = ret_val; @@ -575,6 +576,8 @@ void op_end(DECL_STATE) { * FFI call — dispatches via pre-resolved ffi_resolved struct */ void op_ffi_call(DECL_STATE) { + (void)sp; + (void)globals; ip++; const ffi_resolved *res = (const ffi_resolved *)ip->ptr; diff --git a/virtual_machine/symbols.c b/virtual_machine/symbols.c index 435964269..319eb05ba 100644 --- a/virtual_machine/symbols.c +++ b/virtual_machine/symbols.c @@ -30,7 +30,7 @@ void symbol_table_destroy(symbol_table *table) { static resolved_symbol *symbol_table_find(symbol_table *table, const char *name, bool is_function) { for (size_t i = 0; i < table->len; i++) { - if (strcmp(table->data[i].name, name) == 0) { + if (strcmp(table->data[i].name, name) == 0 && table->data[i].is_function == is_function) { return &table->data[i]; } } diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index 8ea12ac72..1e0dcd35c 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -74,6 +74,7 @@ void vm_destroy(virtual_machine *vm) { } void vm_set_args(virtual_machine *vm, int argc, char *argv[]) { + (void)vm; set_args(argc, argv); } From d275e4d47bf24865468e51422638fa4ca0a8ef95 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sat, 28 Feb 2026 23:54:38 +0300 Subject: [PATCH 046/141] add bytecode format description --- virtual_machine/README.md | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/virtual_machine/README.md b/virtual_machine/README.md index 921f6b5f3..e7416b987 100644 --- a/virtual_machine/README.md +++ b/virtual_machine/README.md @@ -23,4 +23,40 @@ The VM is tightly integrated with the Lama runtime (`../runtime/`). It relies on ## Bytecode format -The VM executes a dense bytecode format where each instruction consists of a 1-byte opcode followed by optional immediate values or offsets. Function definitions include metadata about the number of arguments and local variables required. +### Layout +Bytes are laid out in little-endian order. +1. Header (16 bytes) +2. String table (variable) +3. Imports (number of imports * 4 bytes) +4. Public symbols (number of public symbols * 9 bytes) +5. Code section (until 0xFF) + +### Header +| offset | size | field | +|--------|------|-------| +| 0 | 4 | string table size | +| 4 | 4 | globals count | +| 8 | 4 | number of imports | +| 12 | 4 | number of public symbols | + +### Imports +Each entry is 4 bytes: +- `name_offset` (int32): offset into string table for module name + +### Public symbols +Each entry is 9 bytes: +- `name_offset` (int32): offset into string table +- `code_offset` (int32): for functions: bytecode offset; for globals: global index +- `flag` (uint8): 0 = function, 1 = global + +### External references +CALL (0x56) and CLOSURE (0x54) instructions use negative values for external function references. +LD (0x20) and ST (0x40) instructions use negative values for external global references. + +The encoding is the same for both: +- Non-negative values: local references (bytecode offset for functions, global index for globals) +- Negative values: `string_table_offset = -value -1` + +The string at that offset is looked up to resolve the external symbol at load time. + + From 3cd8652109ec2423d92aeaf8d660eb822cffd18b Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 26 Feb 2026 19:03:23 +0300 Subject: [PATCH 047/141] change stack validation --- virtual_machine/converter.c | 214 ++++++++++++++++++++++++------------ 1 file changed, 143 insertions(+), 71 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 67ec9fa79..5404cc638 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -31,21 +31,27 @@ /* * Symbolic stack depth tracking macros used during decoding - * depth = -1 means unreachable code */ -#define DEPTH_INC(d, n) \ + +typedef enum { LIVE, BARRIER, DEAD } reach_state; + +#define DEPTH_INC(d, state, n) \ do { \ - if ((d) != -1) \ + if ((state) != DEAD) { \ + VM_DEBUG(" DEPTH: %d -> %d (+%d)\n", (d), (d) + (n), (n)); \ (d) += (n); \ + } \ } while (0) -#define DEPTH_DEC(d, n) \ +#define DEPTH_DEC(d, state, n) \ do { \ - if ((d) != -1) \ + if ((state) != DEAD) { \ + VM_DEBUG(" DEPTH: %d -> %d (-%d)\n", (d), (d) - (n), (n)); \ (d) -= (n); \ + assert((d) >= 0 && "stack underflow"); \ + } \ } while (0) -#define DEPTH_PUSH(d) DEPTH_INC(d, 1) -#define DEPTH_POP(d) DEPTH_DEC(d, 1) -#define DEPTH_DEAD(d) ((d) = -1) +#define DEPTH_PUSH(d, state) DEPTH_INC(d, state, 1) +#define DEPTH_POP(d, state) DEPTH_DEC(d, state, 1) /* * Code emission macros - append to code array in context @@ -241,7 +247,7 @@ static bool emit_st_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { * Handle jump target resolution (intra-unit only — these are always local) */ static bool handle_jump(decode_ctx *ctx, meta_info *meta, size_t current_bc_off, - int32_t depth) { + int32_t depth, reach_state state) { int32_t target_off = reader_i32(&ctx->reader); if (!validate_target_off(ctx->bc, target_off, current_bc_off, "JUMP")) { @@ -252,28 +258,49 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, size_t current_bc_off, EMIT_NUM(ctx, 0); // placeholder — will hold code index meta_info *tm = &meta[target_off]; - if (target_off < (int32_t)current_bc_off && tm->resolved_idx != -1) { - // Backward jump — already resolved, store as index + if (target_off < (int32_t)current_bc_off) { + // Backward jump — target was already visited by sequential decode + assert(tm->resolved_idx != -1 && + "backward jump target must have been visited"); ctx->code.data[my_idx].num = tm->resolved_idx; add_reloc(ctx, my_idx, NULL, INTERNAL); - if (depth != -1 && tm->stack_depth != -1 && tm->stack_depth != depth) { - fprintf(stderr, "Error: Loop stack mismatch at bc_off=%zu\n", - current_bc_off); - return false; + VM_DEBUG(" JUMP: backward to bc_off=%d, (depth=%d, target_depth=%d)\n", + target_off, depth, tm->stack_depth); + if (state != DEAD) { + assert(tm->stack_depth != -1 && + "backward jump target must have known stack depth"); + if (tm->stack_depth != depth) { + fprintf(stderr, + "Error: Jump stack mismatch at bc_off=%zu (exptected %d, " + "actual %d)\n", + current_bc_off, depth, tm->stack_depth); + return false; + } } } else { // Forward jump — add fixup if (!add_fixup(meta, target_off, my_idx)) { return false; } - if (depth != -1) { - if (tm->stack_depth == -1) - tm->stack_depth = depth; - else if (tm->stack_depth != depth) { - fprintf(stderr, "Error: Jump stack mismatch\n"); - return false; - } + if (state == DEAD) { + // Don't set or validate depth at target since it's not reachable from + // sequential decode + VM_DEBUG(" JUMP: forward to bc_off=%d (dead, skipping depth)\n", + target_off); + } else if (tm->stack_depth == -1) { + VM_DEBUG(" JUMP: forward to bc_off=%d, (depth=%d, target_depth=%d)\n", + target_off, depth, tm->stack_depth); + tm->stack_depth = depth; + } else if (tm->stack_depth != depth) { + fprintf(stderr, + "Error: Jump stack mismatch at bc_off=%zu (expected %d, actual " + "%d)\n", + current_bc_off, depth, tm->stack_depth); + return false; + } else { + VM_DEBUG(" JUMP: forward to bc_off=%d, (depth=%d, target_depth=%d)\n", + target_off, depth, tm->stack_depth); } } return true; @@ -294,20 +321,47 @@ static insn *decode_internal(decode_ctx *ctx) { } int32_t depth = 0; + reach_state state = LIVE; insn *result = NULL; while (!reader_eof(&ctx->reader)) { size_t current_bc_off = reader_pos(&ctx->reader); uint8_t opcode = reader_u8(&ctx->reader); - VM_DEBUG("DECODE: visiting bc_off=%zu opcode=%d code_idx=%zu\n", - current_bc_off, opcode, ctx->code.len); + VM_DEBUG("DECODE: bc_off=%zu %s (0x%02X) depth=%d\n", current_bc_off, + opcode_to_string(opcode), opcode, depth, + state == BARRIER ? " [barrier]" + : state == DEAD ? " [dead]" + : ""); meta_info *m = &meta[current_bc_off]; m->resolved_idx = (int32_t)ctx->code.len; - // Validate stack depth - if (depth != -1) { + // Validate stack depth at intersections + if (state == DEAD) { + if (m->stack_depth != -1) { + // Forward jump visited + VM_DEBUG(" DEPTH: %d -> %d", depth, m->stack_depth); + depth = m->stack_depth; + state = LIVE; + } else { + // No forward jump + VM_DEBUG(" DEPTH: dead, skipping at bc_off=%zu\n", current_bc_off); + m->stack_depth = -1; // unvisited + } + } else if (state == BARRIER) { + if (m->stack_depth != -1) { + // Forward jump visited + VM_DEBUG(" DEPTH: %d -> %d", depth, m->stack_depth); + depth = m->stack_depth; + } else { + // No forward jump + VM_DEBUG(" DEPTH: barrier, keeping stale depth=%d at bc_off=%zu\n", + depth, current_bc_off); + m->stack_depth = depth; + } + state = LIVE; + } else { if (m->stack_depth != -1 && m->stack_depth != depth) { fprintf(stderr, "Error: Stack mismatch at offset %zu (expected %d, got %d)\n", @@ -315,8 +369,6 @@ static insn *decode_internal(decode_ctx *ctx) { goto cleanup; } m->stack_depth = depth; - } else { - depth = m->stack_depth; } // Resolve forward jumps (backpatching) — store as index, record @@ -337,107 +389,109 @@ static insn *decode_internal(decode_ctx *ctx) { switch (opcode) { case OP_CONST: - DEPTH_PUSH(depth); + DEPTH_PUSH(depth, state); EMIT_FUNC(ctx, op_const); EMIT_NUM(ctx, reader_i32(&ctx->reader)); break; case OP_BINOP_ADD: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_add); break; case OP_BINOP_SUB: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_sub); break; case OP_BINOP_MUL: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_mul); break; case OP_BINOP_DIV: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_div); break; case OP_BINOP_MOD: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_mod); break; case OP_BINOP_LT: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_lt); break; case OP_BINOP_LE: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_le); break; case OP_BINOP_GT: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_gt); break; case OP_BINOP_GE: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_ge); break; case OP_BINOP_EQ: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_eq); break; case OP_BINOP_NE: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_ne); break; case OP_BINOP_AND: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_and); break; case OP_BINOP_OR: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_or); break; case OP_JMP: EMIT_FUNC(ctx, op_jmp); - if (!handle_jump(ctx, meta, current_bc_off, depth)) { + if (!handle_jump(ctx, meta, current_bc_off, depth, state)) { goto cleanup; } - DEPTH_DEAD(depth); + if (state != DEAD) { + state = BARRIER; + } break; case OP_CJMP_Z: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_cjmp_z); - if (!handle_jump(ctx, meta, current_bc_off, depth)) { + if (!handle_jump(ctx, meta, current_bc_off, depth, state)) { goto cleanup; } break; case OP_CJMP_NZ: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_cjmp_nz); - if (!handle_jump(ctx, meta, current_bc_off, depth)) { + if (!handle_jump(ctx, meta, current_bc_off, depth, state)) { goto cleanup; } break; case OP_DROP: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_drop); break; case OP_DUP: - DEPTH_PUSH(depth); + DEPTH_PUSH(depth, state); EMIT_FUNC(ctx, op_dup); break; @@ -446,17 +500,18 @@ static insn *decode_internal(decode_ctx *ctx) { break; case OP_ELEM: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_elem); break; case OP_STA: - DEPTH_DEC(depth, 2); + // TODO: + DEPTH_DEC(depth, state, 2); EMIT_FUNC(ctx, op_sta); break; case OP_LD: { - DEPTH_PUSH(depth); + DEPTH_PUSH(depth, state); int32_t idx = reader_i32(&ctx->reader); emit_ld_glo(ctx, idx, global_base); break; @@ -469,7 +524,7 @@ static insn *decode_internal(decode_ctx *ctx) { } case OP_LD_LOC: { - DEPTH_PUSH(depth); + DEPTH_PUSH(depth, state); int32_t idx = reader_i32(&ctx->reader); EMIT_FUNC(ctx, op_ld_loc); EMIT_NUM(ctx, idx); @@ -484,7 +539,7 @@ static insn *decode_internal(decode_ctx *ctx) { } case OP_LD_ARG: { - DEPTH_PUSH(depth); + DEPTH_PUSH(depth, state); int32_t idx = reader_i32(&ctx->reader); EMIT_FUNC(ctx, op_ld_arg); EMIT_NUM(ctx, idx); @@ -499,7 +554,7 @@ static insn *decode_internal(decode_ctx *ctx) { } case OP_LD_CLO: { - DEPTH_PUSH(depth); + DEPTH_PUSH(depth, state); int32_t idx = reader_i32(&ctx->reader); EMIT_FUNC(ctx, op_ld_clo); EMIT_NUM(ctx, idx); @@ -514,7 +569,7 @@ static insn *decode_internal(decode_ctx *ctx) { } case OP_STRING: { - DEPTH_PUSH(depth); + DEPTH_PUSH(depth, state); int32_t str_idx = reader_i32(&ctx->reader); EMIT_FUNC(ctx, op_string); EMIT_STR(ctx, bytecode_get_string(bc, str_idx)); @@ -523,7 +578,8 @@ static insn *decode_internal(decode_ctx *ctx) { case OP_BARRAY: { int32_t n = reader_i32(&ctx->reader); - DEPTH_DEC(depth, n - 1); + // push array, pop elements == n - 1 net stack change + DEPTH_DEC(depth, state, n - 1); EMIT_FUNC(ctx, op_barray); EMIT_NUM(ctx, n); break; @@ -532,7 +588,8 @@ static insn *decode_internal(decode_ctx *ctx) { case OP_SEXP: { int32_t tag_idx = reader_i32(&ctx->reader); int32_t n_fields = reader_i32(&ctx->reader); - DEPTH_DEC(depth, n_fields - 1); + // push sexp, pop elements == n_fields - 1 net stack change + DEPTH_DEC(depth, state, n_fields - 1); EMIT_FUNC(ctx, op_sexp); EMIT_STR(ctx, bytecode_get_string(bc, tag_idx)); EMIT_NUM(ctx, n_fields); @@ -561,12 +618,12 @@ static insn *decode_internal(decode_ctx *ctx) { EMIT_FUNC(ctx, op_fail); EMIT_NUM(ctx, line); EMIT_NUM(ctx, col); - DEPTH_DEAD(depth); + state = DEAD; break; } case OP_PATT_STR_CMP: - DEPTH_POP(depth); + DEPTH_POP(depth, state); EMIT_FUNC(ctx, op_patt_str_cmp); break; @@ -623,21 +680,21 @@ static insn *decode_internal(decode_ctx *ctx) { int designation_type = type_byte & 0xF; switch (designation_type) { case 0: // Global - DEPTH_PUSH(depth); + DEPTH_PUSH(depth, state); emit_ld_glo(ctx, idx, global_base); break; case 1: // Local - DEPTH_PUSH(depth); + DEPTH_PUSH(depth, state); EMIT_FUNC(ctx, op_ld_loc); EMIT_NUM(ctx, idx); break; case 2: // Arg - DEPTH_PUSH(depth); + DEPTH_PUSH(depth, state); EMIT_FUNC(ctx, op_ld_arg); EMIT_NUM(ctx, idx); break; case 3: // Closure var - DEPTH_PUSH(depth); + DEPTH_PUSH(depth, state); EMIT_FUNC(ctx, op_ld_clo); EMIT_NUM(ctx, idx); break; @@ -647,7 +704,7 @@ static insn *decode_internal(decode_ctx *ctx) { } } - DEPTH_DEC(depth, n_captured - 1); + DEPTH_DEC(depth, state, n_captured - 1); EMIT_FUNC(ctx, op_closure); @@ -683,7 +740,10 @@ static insn *decode_internal(decode_ctx *ctx) { EMIT_NUM(ctx, n_captured); meta_info *tm = &meta[target_off]; - if (target_off < (int32_t)current_bc_off && tm->resolved_idx != -1) { + if (target_off < (int32_t)current_bc_off) { + assert(tm->resolved_idx != -1 && + "backward closure target must have been visited"); + ctx->code.data[target_slot].num = tm->resolved_idx; add_reloc(ctx, target_slot, NULL, INTERNAL); } else { @@ -696,7 +756,8 @@ static insn *decode_internal(decode_ctx *ctx) { case OP_CALL: { int32_t target_off = reader_i32(&ctx->reader); int32_t n_args = reader_i32(&ctx->reader); - DEPTH_DEC(depth, n_args - 1); + // push n_args, return 1 value == n_args - 1 net stack change + DEPTH_DEC(depth, state, n_args - 1); VM_DEBUG("DECODE: OP_CALL target_off=0x%x n_args=%d " "current_bc_off=%zu code_idx=%zu\n", @@ -736,7 +797,10 @@ static insn *decode_internal(decode_ctx *ctx) { EMIT_NUM(ctx, n_args); meta_info *tm = &meta[target_off]; - if (target_off < (int32_t)current_bc_off && tm->resolved_idx != -1) { + if (target_off < (int32_t)current_bc_off) { + assert(tm->resolved_idx != -1 && + "backward call target must have been visited"); + ctx->code.data[target_slot].num = tm->resolved_idx; add_reloc(ctx, target_slot, NULL, INTERNAL); } else { @@ -748,15 +812,23 @@ static insn *decode_internal(decode_ctx *ctx) { case OP_CALLC: { int32_t n_args = reader_i32(&ctx->reader); - DEPTH_DEC(depth, n_args); + DEPTH_DEC(depth, state, n_args); EMIT_FUNC(ctx, op_callc); EMIT_NUM(ctx, n_args); break; } case OP_END: + // depth == 1 <=> return value (?) + if (state != DEAD && depth != 1) { + fprintf(stderr, "Error: END with depth = %d at bc_off=%zu\n", depth, + current_bc_off); + goto cleanup; + } EMIT_FUNC(ctx, op_end); - DEPTH_DEAD(depth); + if (state != DEAD) { + state = BARRIER; + } break; case OP_LINE: { From 6300f5f37cdd3858cebea9e0d0efe926e682b52e Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 1 Mar 2026 01:19:20 +0300 Subject: [PATCH 048/141] bring back `opcodes.c` for easier debugging --- virtual_machine/Makefile | 2 +- virtual_machine/opcodes.c | 111 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 virtual_machine/opcodes.c diff --git a/virtual_machine/Makefile b/virtual_machine/Makefile index d163262aa..4a9e61fb5 100644 --- a/virtual_machine/Makefile +++ b/virtual_machine/Makefile @@ -4,7 +4,7 @@ CFLAGS = -Wall -Wextra -std=c99 -O0 LIBS = -lffi -ldl LDFLAGS = -rdynamic TARGET = lama.exe -SOURCES = lama.c converter.c vm.c bytecode.c ffi.c loader.c symbols.c ops.c memory.c +SOURCES = lama.c converter.c vm.c bytecode.c ffi.c loader.c symbols.c ops.c memory.c opcodes.c OBJECTS = $(SOURCES:.c=.o) RUNTIME_DIR = ../runtime diff --git a/virtual_machine/opcodes.c b/virtual_machine/opcodes.c new file mode 100644 index 000000000..13fff9a87 --- /dev/null +++ b/virtual_machine/opcodes.c @@ -0,0 +1,111 @@ +#include "opcodes.h" +#include +#include + +const char *opcode_to_string(uint8_t opcode) { + switch ((opcode_t)opcode) { + case OP_BINOP_ADD: + return "ADD"; + case OP_BINOP_SUB: + return "SUB"; + case OP_BINOP_MUL: + return "MUL"; + case OP_BINOP_DIV: + return "DIV"; + case OP_BINOP_MOD: + return "MOD"; + case OP_BINOP_LT: + return "LT"; + case OP_BINOP_LE: + return "LE"; + case OP_BINOP_GT: + return "GT"; + case OP_BINOP_GE: + return "GE"; + case OP_BINOP_EQ: + return "EQ"; + case OP_BINOP_NE: + return "NE"; + case OP_BINOP_AND: + return "AND"; + case OP_BINOP_OR: + return "OR"; + case OP_CONST: + return "CONST"; + case OP_STRING: + return "STRING"; + case OP_SEXP: + return "SEXP"; + case OP_STA: + return "STA"; + case OP_JMP: + return "JMP"; + case OP_END: + return "END"; + case OP_DROP: + return "DROP"; + case OP_DUP: + return "DUP"; + case OP_SWAP: + return "SWAP"; + case OP_ELEM: + return "ELEM"; + case OP_LD: + return "LD"; + case OP_LD_LOC: + return "LD.LOC"; + case OP_LD_ARG: + return "LD.ARG"; + case OP_LD_CLO: + return "LD.CLO"; + case OP_ST: + return "ST"; + case OP_ST_LOC: + return "ST.LOC"; + case OP_ST_ARG: + return "ST.ARG"; + case OP_ST_CLO: + return "ST.CLO"; + case OP_CJMP_Z: + return "CJMP.Z"; + case OP_CJMP_NZ: + return "CJMP.NZ"; + case OP_BEGIN: + return "BEGIN"; + case OP_BEGIN_CLOSURE: + return "BEGIN.CLO"; + case OP_CLOSURE: + return "CLOSURE"; + case OP_CALLC: + return "CALLC"; + case OP_CALL: + return "CALL"; + case OP_TAG: + return "TAG"; + case OP_ARRAY: + return "ARRAY"; + case OP_FAIL: + return "FAIL"; + case OP_LINE: + return "LINE"; + case OP_PATT_STR_CMP: + return "PATT.STRCMP"; + case OP_PATT_STRING: + return "PATT.STRING"; + case OP_PATT_ARRAY: + return "PATT.ARRAY"; + case OP_PATT_SEXP: + return "PATT.SEXP"; + case OP_PATT_BOXED: + return "PATT.BOXED"; + case OP_PATT_UNBOXED: + return "PATT.UNBOXED"; + case OP_PATT_CLOSURE: + return "PATT.CLOSURE"; + case OP_BARRAY: + return "BARRAY"; + default: + fprintf(stderr, "Unknown opcode: %d\n", opcode); + exit(EXIT_FAILURE); + } +} From 214f6d50301f13526f5cb6aa2742ebce5a72ddf0 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 1 Mar 2026 02:14:08 +0300 Subject: [PATCH 049/141] separate stack validation, store max depth for func --- virtual_machine/converter.c | 187 +++++++++++++++-------------- virtual_machine/stack_validation.h | 60 +++++++++ 2 files changed, 154 insertions(+), 93 deletions(-) create mode 100644 virtual_machine/stack_validation.h diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 5404cc638..e04a45017 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -5,6 +5,7 @@ #include "memory.h" #include "opcodes.h" #include "ops.h" +#include "stack_validation.h" #include "symbols.h" #include #include @@ -29,30 +30,6 @@ #define IS_EXT_REF(addr) ((addr) < 0) #define EXT_REF_INDEX(addr) (-(addr) - 1) -/* - * Symbolic stack depth tracking macros used during decoding - */ - -typedef enum { LIVE, BARRIER, DEAD } reach_state; - -#define DEPTH_INC(d, state, n) \ - do { \ - if ((state) != DEAD) { \ - VM_DEBUG(" DEPTH: %d -> %d (+%d)\n", (d), (d) + (n), (n)); \ - (d) += (n); \ - } \ - } while (0) -#define DEPTH_DEC(d, state, n) \ - do { \ - if ((state) != DEAD) { \ - VM_DEBUG(" DEPTH: %d -> %d (-%d)\n", (d), (d) - (n), (n)); \ - (d) -= (n); \ - assert((d) >= 0 && "stack underflow"); \ - } \ - } while (0) -#define DEPTH_PUSH(d, state) DEPTH_INC(d, state, 1) -#define DEPTH_POP(d, state) DEPTH_DEC(d, state, 1) - /* * Code emission macros - append to code array in context */ @@ -121,6 +98,7 @@ typedef struct { symbol_table *st; ffi_call_table *ffi; + stack_validation sv; } decode_ctx; static void decode_ctx_init(decode_ctx *ctx, const bytecode *bc, @@ -246,9 +224,11 @@ static bool emit_st_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { /* * Handle jump target resolution (intra-unit only — these are always local) */ -static bool handle_jump(decode_ctx *ctx, meta_info *meta, size_t current_bc_off, - int32_t depth, reach_state state) { +static bool handle_jump(decode_ctx *ctx, meta_info *meta, + size_t current_bc_off) { int32_t target_off = reader_i32(&ctx->reader); + int32_t depth = ctx->sv.depth; + reach_state state = ctx->sv.state; if (!validate_target_off(ctx->bc, target_off, current_bc_off, "JUMP")) { return false; @@ -320,8 +300,10 @@ static insn *decode_internal(decode_ctx *ctx) { meta[i].fixups = NULL; } - int32_t depth = 0; - reach_state state = LIVE; + ctx->sv = (stack_validation){ + .depth = 0, .state = LIVE, .max_depth = 0, .max_depth_pos = 0}; + da_init(ctx->sv.func_stack); + insn *result = NULL; while (!reader_eof(&ctx->reader)) { @@ -329,46 +311,46 @@ static insn *decode_internal(decode_ctx *ctx) { uint8_t opcode = reader_u8(&ctx->reader); VM_DEBUG("DECODE: bc_off=%zu %s (0x%02X) depth=%d\n", current_bc_off, - opcode_to_string(opcode), opcode, depth, - state == BARRIER ? " [barrier]" - : state == DEAD ? " [dead]" - : ""); + opcode_to_string(opcode), opcode, ctx->sv.depth, + ctx->sv.state == BARRIER ? " [barrier]" + : ctx->sv.state == DEAD ? " [dead]" + : ""); meta_info *m = &meta[current_bc_off]; m->resolved_idx = (int32_t)ctx->code.len; // Validate stack depth at intersections - if (state == DEAD) { + if (ctx->sv.state == DEAD) { if (m->stack_depth != -1) { // Forward jump visited - VM_DEBUG(" DEPTH: %d -> %d", depth, m->stack_depth); - depth = m->stack_depth; - state = LIVE; + VM_DEBUG(" DEPTH: %d -> %d", ctx->sv.depth, m->stack_depth); + ctx->sv.depth = m->stack_depth; + ctx->sv.state = LIVE; } else { // No forward jump VM_DEBUG(" DEPTH: dead, skipping at bc_off=%zu\n", current_bc_off); m->stack_depth = -1; // unvisited } - } else if (state == BARRIER) { + } else if (ctx->sv.state == BARRIER) { if (m->stack_depth != -1) { // Forward jump visited - VM_DEBUG(" DEPTH: %d -> %d", depth, m->stack_depth); - depth = m->stack_depth; + VM_DEBUG(" DEPTH: %d -> %d", ctx->sv.depth, m->stack_depth); + ctx->sv.depth = m->stack_depth; } else { // No forward jump VM_DEBUG(" DEPTH: barrier, keeping stale depth=%d at bc_off=%zu\n", - depth, current_bc_off); - m->stack_depth = depth; + ctx->sv.depth, current_bc_off); + m->stack_depth = ctx->sv.depth; } - state = LIVE; + ctx->sv.state = LIVE; } else { - if (m->stack_depth != -1 && m->stack_depth != depth) { + if (m->stack_depth != -1 && m->stack_depth != ctx->sv.depth) { fprintf(stderr, "Error: Stack mismatch at offset %zu (expected %d, got %d)\n", - current_bc_off, m->stack_depth, depth); + current_bc_off, m->stack_depth, ctx->sv.depth); goto cleanup; } - m->stack_depth = depth; + m->stack_depth = ctx->sv.depth; } // Resolve forward jumps (backpatching) — store as index, record @@ -389,109 +371,109 @@ static insn *decode_internal(decode_ctx *ctx) { switch (opcode) { case OP_CONST: - DEPTH_PUSH(depth, state); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_const); EMIT_NUM(ctx, reader_i32(&ctx->reader)); break; case OP_BINOP_ADD: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_add); break; case OP_BINOP_SUB: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_sub); break; case OP_BINOP_MUL: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_mul); break; case OP_BINOP_DIV: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_div); break; case OP_BINOP_MOD: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_mod); break; case OP_BINOP_LT: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_lt); break; case OP_BINOP_LE: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_le); break; case OP_BINOP_GT: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_gt); break; case OP_BINOP_GE: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_ge); break; case OP_BINOP_EQ: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_eq); break; case OP_BINOP_NE: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_ne); break; case OP_BINOP_AND: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_and); break; case OP_BINOP_OR: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_or); break; case OP_JMP: EMIT_FUNC(ctx, op_jmp); - if (!handle_jump(ctx, meta, current_bc_off, depth, state)) { + if (!handle_jump(ctx, meta, current_bc_off)) { goto cleanup; } - if (state != DEAD) { - state = BARRIER; + if (ctx->sv.state != DEAD) { + ctx->sv.state = BARRIER; } break; case OP_CJMP_Z: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_cjmp_z); - if (!handle_jump(ctx, meta, current_bc_off, depth, state)) { + if (!handle_jump(ctx, meta, current_bc_off)) { goto cleanup; } break; case OP_CJMP_NZ: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_cjmp_nz); - if (!handle_jump(ctx, meta, current_bc_off, depth, state)) { + if (!handle_jump(ctx, meta, current_bc_off)) { goto cleanup; } break; case OP_DROP: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_drop); break; case OP_DUP: - DEPTH_PUSH(depth, state); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_dup); break; @@ -500,18 +482,18 @@ static insn *decode_internal(decode_ctx *ctx) { break; case OP_ELEM: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_elem); break; case OP_STA: // TODO: - DEPTH_DEC(depth, state, 2); + DEPTH_DEC(ctx->sv, 2); EMIT_FUNC(ctx, op_sta); break; case OP_LD: { - DEPTH_PUSH(depth, state); + DEPTH_PUSH(ctx->sv); int32_t idx = reader_i32(&ctx->reader); emit_ld_glo(ctx, idx, global_base); break; @@ -524,7 +506,7 @@ static insn *decode_internal(decode_ctx *ctx) { } case OP_LD_LOC: { - DEPTH_PUSH(depth, state); + DEPTH_PUSH(ctx->sv); int32_t idx = reader_i32(&ctx->reader); EMIT_FUNC(ctx, op_ld_loc); EMIT_NUM(ctx, idx); @@ -539,7 +521,7 @@ static insn *decode_internal(decode_ctx *ctx) { } case OP_LD_ARG: { - DEPTH_PUSH(depth, state); + DEPTH_PUSH(ctx->sv); int32_t idx = reader_i32(&ctx->reader); EMIT_FUNC(ctx, op_ld_arg); EMIT_NUM(ctx, idx); @@ -554,7 +536,7 @@ static insn *decode_internal(decode_ctx *ctx) { } case OP_LD_CLO: { - DEPTH_PUSH(depth, state); + DEPTH_PUSH(ctx->sv); int32_t idx = reader_i32(&ctx->reader); EMIT_FUNC(ctx, op_ld_clo); EMIT_NUM(ctx, idx); @@ -569,7 +551,7 @@ static insn *decode_internal(decode_ctx *ctx) { } case OP_STRING: { - DEPTH_PUSH(depth, state); + DEPTH_PUSH(ctx->sv); int32_t str_idx = reader_i32(&ctx->reader); EMIT_FUNC(ctx, op_string); EMIT_STR(ctx, bytecode_get_string(bc, str_idx)); @@ -579,7 +561,7 @@ static insn *decode_internal(decode_ctx *ctx) { case OP_BARRAY: { int32_t n = reader_i32(&ctx->reader); // push array, pop elements == n - 1 net stack change - DEPTH_DEC(depth, state, n - 1); + DEPTH_DEC(ctx->sv, n - 1); EMIT_FUNC(ctx, op_barray); EMIT_NUM(ctx, n); break; @@ -589,7 +571,7 @@ static insn *decode_internal(decode_ctx *ctx) { int32_t tag_idx = reader_i32(&ctx->reader); int32_t n_fields = reader_i32(&ctx->reader); // push sexp, pop elements == n_fields - 1 net stack change - DEPTH_DEC(depth, state, n_fields - 1); + DEPTH_DEC(ctx->sv, n_fields - 1); EMIT_FUNC(ctx, op_sexp); EMIT_STR(ctx, bytecode_get_string(bc, tag_idx)); EMIT_NUM(ctx, n_fields); @@ -618,12 +600,12 @@ static insn *decode_internal(decode_ctx *ctx) { EMIT_FUNC(ctx, op_fail); EMIT_NUM(ctx, line); EMIT_NUM(ctx, col); - state = DEAD; + ctx->sv.state = DEAD; break; } case OP_PATT_STR_CMP: - DEPTH_POP(depth, state); + DEPTH_POP(ctx->sv); EMIT_FUNC(ctx, op_patt_str_cmp); break; @@ -655,11 +637,18 @@ static insn *decode_internal(decode_ctx *ctx) { case OP_BEGIN_CLOSURE: { int32_t n_args = reader_i32(&ctx->reader); int32_t n_locals = reader_i32(&ctx->reader); - depth = 0; + ctx->sv.depth = 0; + // Save outer function's max_depth + func_frame frame = {.max_depth = ctx->sv.max_depth, + .max_depth_pos = ctx->sv.max_depth_pos}; + da_append(ctx->sv.func_stack, frame); + ctx->sv.max_depth = 0; EMIT_FUNC(ctx, op_begin); EMIT_NUM(ctx, n_args); EMIT_NUM(ctx, n_locals); - EMIT_NUM(ctx, 0); + ctx->sv.max_depth_pos = ctx->code.len; + EMIT_NUM(ctx, 0); // placeholder for max depth, will be patched + break; } @@ -680,21 +669,21 @@ static insn *decode_internal(decode_ctx *ctx) { int designation_type = type_byte & 0xF; switch (designation_type) { case 0: // Global - DEPTH_PUSH(depth, state); + DEPTH_PUSH(ctx->sv); emit_ld_glo(ctx, idx, global_base); break; case 1: // Local - DEPTH_PUSH(depth, state); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_ld_loc); EMIT_NUM(ctx, idx); break; case 2: // Arg - DEPTH_PUSH(depth, state); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_ld_arg); EMIT_NUM(ctx, idx); break; case 3: // Closure var - DEPTH_PUSH(depth, state); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_ld_clo); EMIT_NUM(ctx, idx); break; @@ -704,7 +693,7 @@ static insn *decode_internal(decode_ctx *ctx) { } } - DEPTH_DEC(depth, state, n_captured - 1); + DEPTH_DEC(ctx->sv, n_captured - 1); EMIT_FUNC(ctx, op_closure); @@ -757,7 +746,7 @@ static insn *decode_internal(decode_ctx *ctx) { int32_t target_off = reader_i32(&ctx->reader); int32_t n_args = reader_i32(&ctx->reader); // push n_args, return 1 value == n_args - 1 net stack change - DEPTH_DEC(depth, state, n_args - 1); + DEPTH_DEC(ctx->sv, n_args - 1); VM_DEBUG("DECODE: OP_CALL target_off=0x%x n_args=%d " "current_bc_off=%zu code_idx=%zu\n", @@ -812,7 +801,7 @@ static insn *decode_internal(decode_ctx *ctx) { case OP_CALLC: { int32_t n_args = reader_i32(&ctx->reader); - DEPTH_DEC(depth, state, n_args); + DEPTH_DEC(ctx->sv, n_args); EMIT_FUNC(ctx, op_callc); EMIT_NUM(ctx, n_args); break; @@ -820,15 +809,22 @@ static insn *decode_internal(decode_ctx *ctx) { case OP_END: // depth == 1 <=> return value (?) - if (state != DEAD && depth != 1) { - fprintf(stderr, "Error: END with depth = %d at bc_off=%zu\n", depth, - current_bc_off); + if (ctx->sv.state != DEAD && ctx->sv.depth != 1) { + fprintf(stderr, "Error: END with depth = %d at bc_off=%zu\n", + ctx->sv.depth, current_bc_off); goto cleanup; } EMIT_FUNC(ctx, op_end); - if (state != DEAD) { - state = BARRIER; + if (ctx->sv.state != DEAD) { + ctx->code.data[ctx->sv.max_depth_pos].num = ctx->sv.max_depth; + ctx->sv.state = BARRIER; } + assert(ctx->sv.func_stack.len > 0); + ctx->sv.max_depth = + ctx->sv.func_stack.data[ctx->sv.func_stack.len - 1].max_depth; + ctx->sv.max_depth_pos = + ctx->sv.func_stack.data[ctx->sv.func_stack.len - 1].max_depth_pos; + ctx->sv.func_stack.len--; break; case OP_LINE: { @@ -851,6 +847,10 @@ static insn *decode_internal(decode_ctx *ctx) { reader_pos(&ctx->reader) - 1); goto cleanup; } + + if (ctx->sv.state != DEAD && ctx->sv.depth > ctx->sv.max_depth) { + ctx->sv.max_depth = ctx->sv.depth; + } } // Extract mapping @@ -861,6 +861,7 @@ static insn *decode_internal(decode_ctx *ctx) { result = ctx->code.data; cleanup: + da_free(ctx->sv.func_stack); // Free temporary metadata and fixup nodes for (size_t i = 0; i < bc->code_size; i++) { fixup_node *node = meta[i].fixups; diff --git a/virtual_machine/stack_validation.h b/virtual_machine/stack_validation.h new file mode 100644 index 000000000..b16c434c2 --- /dev/null +++ b/virtual_machine/stack_validation.h @@ -0,0 +1,60 @@ +#ifndef STACK_VALIDATION_H +#define STACK_VALIDATION_H + +#include +#include + +#ifdef DEBUG_PRINT +#define VM_DEBUG(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) +#else +#define VM_DEBUG(fmt, ...) +#endif + +/* + * Different states of reachability for stack validation: + * LIVE: currently decoding sequentially, reachable from previous instruction + * BARRIER: just emitted JMP or END, so next instruction is reachable but not + * from previous instruction + * DEAD: not reachable from previous instruction + */ +typedef enum { LIVE, BARRIER, DEAD } reach_state; + +typedef struct { + int32_t max_depth; // max stack depth of the function + size_t max_depth_pos; // position in code array where max_depth is emitted + // (for patching) +} func_frame; + +typedef struct { + int32_t depth; + reach_state state; + int32_t max_depth; + size_t max_depth_pos; + struct { + func_frame *data; + size_t len; + size_t cap; + } func_stack; +} stack_validation; + +#define DEPTH_INC(sv, n) \ + do { \ + if ((sv).state != DEAD) { \ + VM_DEBUG(" DEPTH: %d -> %d (+%d)\n", (sv).depth, (sv).depth + (n), \ + (n)); \ + (sv).depth += (n); \ + } \ + } while (0) +#define DEPTH_DEC(sv, n) \ + do { \ + if ((sv).state != DEAD) { \ + VM_DEBUG(" DEPTH: %d -> %d (-%d)\n", (sv).depth, (sv).depth - (n), \ + (n)); \ + (sv).depth -= (n); \ + assert((sv).depth >= 0 && "stack underflow"); \ + } \ + } while (0) +#define DEPTH_PUSH(sv) DEPTH_INC(sv, 1) +#define DEPTH_POP(sv) DEPTH_DEC(sv, 1) + +#endif // STACK_VALIDATION_H From 978a92543f5be33900510fedb5d5c1fa442800c2 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 1 Mar 2026 02:37:46 +0300 Subject: [PATCH 050/141] add `debug.h` --- virtual_machine/converter.c | 8 +----- virtual_machine/debug.h | 12 +++++++++ virtual_machine/ops.c | 40 ++++-------------------------- virtual_machine/stack_validation.h | 7 +----- 4 files changed, 19 insertions(+), 48 deletions(-) create mode 100644 virtual_machine/debug.h diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index e04a45017..ff50899a5 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -1,6 +1,7 @@ #include "converter.h" #include "bytecode.h" #include "da.h" +#include "debug.h" #include "ffi.h" #include "memory.h" #include "opcodes.h" @@ -15,13 +16,6 @@ #include #include -// TODO: conolidate -#ifdef DEBUG_PRINT -#define VM_DEBUG(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) -#else -#define VM_DEBUG(fmt, ...) -#endif - /* * Sentinel value for external references (both functions and globals). * Address = -index - 1, so index 0 becomes -1, index 1 becomes -2, etc. diff --git a/virtual_machine/debug.h b/virtual_machine/debug.h new file mode 100644 index 000000000..82a92b27c --- /dev/null +++ b/virtual_machine/debug.h @@ -0,0 +1,12 @@ +#ifndef DEBUG_H +#define DEBUG_H + +#include + +#ifdef DEBUG_PRINT +#define VM_DEBUG(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) +#else +#define VM_DEBUG(fmt, ...) +#endif + +#endif // DEBUG_H diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 2ffe08fbb..9ec44e03b 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -1,5 +1,6 @@ #include "ops.h" #include "../runtime/runtime_common.h" +#include "debug.h" #include "ffi.h" #include "insn.h" #include @@ -40,37 +41,6 @@ extern aint Barray_tag_patt(void *x); extern aint Bstring_tag_patt(void *x); extern aint Bsexp_tag_patt(void *x); -/* - * Debug macros - */ -#ifdef DEBUG_PRINT -#define VM_DEBUG(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) -#define VM_TRACE_STACK(stack) \ - do { \ - long sp_idx = (stack)->sp - (stack)->data; \ - fprintf(stderr, " stack [sp=%p, idx=%ld]: ", (stack)->sp, sp_idx); \ - for (int i = 1; i <= STACK_PEEK_SIZE; i++) { \ - if (sp_idx + i < STACK_SIZE) { \ - fprintf(stderr, "%ld ", (long)(stack)->data[sp_idx + i]); \ - } \ - } \ - fprintf(stderr, "\n"); \ - } while (0) -#define VM_TRACE_CALL(fmt, ...) fprintf(stderr, "[CALL] " fmt, ##__VA_ARGS__) -#define VM_ASSERT(cond, msg) \ - do { \ - if (!(cond)) { \ - fprintf(stderr, "Assert failed: %s at %s:%d\n", msg, __FILE__, \ - __LINE__); \ - exit(1); \ - } \ - } while (0) -#else -#define VM_DEBUG(fmt, ...) -#define VM_TRACE_STACK(stack) -#define VM_TRACE_CALL(fmt, ...) -#define VM_ASSERT(cond, msg) -#endif #define DISPATCH() \ do { \ @@ -500,7 +470,7 @@ void op_begin(DECL_STATE) { int32_t n_locals = ip->num; ip++; - VM_TRACE_CALL("BEGIN n_args=%d n_locals=%d bp=%p sp=%p\n", n_args, n_locals, + VM_DEBUG("BEGIN n_args=%d n_locals=%d bp=%p sp=%p\n", n_args, n_locals, (void *)bp, (void *)sp); for (int32_t i = 0; i < n_locals; i++) { @@ -516,7 +486,7 @@ void op_call(DECL_STATE) { ip++; int32_t n_args = ip->num; - VM_TRACE_CALL("CALL target=%p n_args=%d sp=%p bp=%p\n", (void *)target, + VM_DEBUG("CALL target=%p n_args=%d sp=%p bp=%p\n", (void *)target, n_args, (void *)sp, (void *)bp); STACK_PUSH(sp, (aint)n_args); @@ -543,7 +513,7 @@ void op_callc(DECL_STATE) { aint entry = closure[0]; insn *target = (insn *)entry; - VM_TRACE_CALL("CALLC closure=%p target=%p n_args=%d sp=%p bp=%p\n", + VM_DEBUG("CALLC closure=%p target=%p n_args=%d sp=%p bp=%p\n", (void *)closure, (void *)target, n_args, (void *)sp, (void *)bp); @@ -566,7 +536,7 @@ void op_callc(DECL_STATE) { void op_end(DECL_STATE) { (void)ip; (void)globals; - VM_TRACE_CALL("END sp=%p\n", (void *)sp); + VM_DEBUG("END sp=%p\n", (void *)sp); aint ret_val = STACK_PEEK(sp); *bp = ret_val; return; diff --git a/virtual_machine/stack_validation.h b/virtual_machine/stack_validation.h index b16c434c2..b35a64c9b 100644 --- a/virtual_machine/stack_validation.h +++ b/virtual_machine/stack_validation.h @@ -1,15 +1,10 @@ #ifndef STACK_VALIDATION_H #define STACK_VALIDATION_H +#include "debug.h" #include #include -#ifdef DEBUG_PRINT -#define VM_DEBUG(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) -#else -#define VM_DEBUG(fmt, ...) -#endif - /* * Different states of reachability for stack validation: * LIVE: currently decoding sequentially, reachable from previous instruction From ddcfa1a7c223686e04fb5637a2b55f54b42de033 Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 31 Mar 2026 03:17:52 +0300 Subject: [PATCH 051/141] cleanup `regression_check.sh` --- virtual_machine/regression_check.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/virtual_machine/regression_check.sh b/virtual_machine/regression_check.sh index 62c9af3e4..4628bf402 100755 --- a/virtual_machine/regression_check.sh +++ b/virtual_machine/regression_check.sh @@ -2,8 +2,6 @@ # credit: ProgramSnail -make build - prefix="../regression/" suffix=".lama" From ddc1bb4e45582709e8a3aa2a4e882dfd4f676387 Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 1 Apr 2026 03:18:37 +0300 Subject: [PATCH 052/141] stack --- runtime/runtime.c | 6 +- virtual_machine/converter.c | 19 ++- virtual_machine/opcodes.c | 2 + virtual_machine/opcodes.h | 1 + virtual_machine/ops.c | 192 ++++++++++++++++------------- virtual_machine/ops.h | 2 + virtual_machine/stack_validation.h | 2 + virtual_machine/vm.c | 37 +++--- 8 files changed, 154 insertions(+), 107 deletions(-) diff --git a/runtime/runtime.c b/runtime/runtime.c index 956536d60..fc8b28468 100644 --- a/runtime/runtime.c +++ b/runtime/runtime.c @@ -7,6 +7,7 @@ extern size_t __gc_stack_top, __gc_stack_bottom; +#ifdef LAMA_ENV #define PRE_GC() \ bool flag = false; \ flag = __gc_stack_top == 0; \ @@ -14,10 +15,13 @@ extern size_t __gc_stack_top, __gc_stack_bottom; assert(__gc_stack_top != 0); \ assert((__gc_stack_top & 0xF) == 0); \ assert(__builtin_frame_address(0) <= (void *)__gc_stack_top); - #define POST_GC() \ assert(__builtin_frame_address(0) <= (void *)__gc_stack_top); \ if (flag) { __gc_stack_top = 0; } +#else +#define PRE_GC() (void)0 +#define POST_GC() (void)0 +#endif _Noreturn static void vfailure (char *s, va_list args) { fprintf(stderr, "*** FAILURE: "); diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index ff50899a5..dd8810ff4 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -298,6 +298,9 @@ static insn *decode_internal(decode_ctx *ctx) { .depth = 0, .state = LIVE, .max_depth = 0, .max_depth_pos = 0}; da_init(ctx->sv.func_stack); + EMIT_FUNC(ctx, op_init); + EMIT_NUM(ctx, 0); // placeholder for op_eof + insn *result = NULL; while (!reader_eof(&ctx->reader)) { @@ -739,7 +742,7 @@ static insn *decode_internal(decode_ctx *ctx) { case OP_CALL: { int32_t target_off = reader_i32(&ctx->reader); int32_t n_args = reader_i32(&ctx->reader); - // push n_args, return 1 value == n_args - 1 net stack change + // consume n_args, produce 1 result = net -(n_args - 1) DEPTH_DEC(ctx->sv, n_args - 1); VM_DEBUG("DECODE: OP_CALL target_off=0x%x n_args=%d " @@ -832,8 +835,7 @@ static insn *decode_internal(decode_ctx *ctx) { break; } - case 0xFF: - case 0x00: + case OP_EOF: break; default: @@ -927,12 +929,15 @@ static void resolve_relocs(insn *all_code, decoded *dec, size_t code_offset, static program *link_program(decoded *dec_arr, size_t n, size_t total_code_len, size_t total_globals, ffi_call_table *ffi) { size_t ffi_call_len = ffi_call_table_len(ffi); - size_t ffi_call_offset = total_code_len; - size_t all_code_len = total_code_len + ffi_call_len * FFI_STUB_SIZE; + + size_t eof_offset = total_code_len; + size_t ffi_call_offset = eof_offset + 1; + size_t all_code_len = ffi_call_offset + ffi_call_len * FFI_STUB_SIZE; insn *all_code = ALLOC_ARRAY(insn, all_code_len); insn **entry_points = ALLOC_ARRAY(insn *, n); + all_code[eof_offset].func = op_eof; // Copy code and resolve relocations size_t code_offset = 0; for (size_t i = 0; i < n; i++) { @@ -943,6 +948,8 @@ static program *link_program(decoded *dec_arr, size_t n, size_t total_code_len, entry_points[i] = &all_code[code_offset]; resolve_relocs(all_code, dec, code_offset, ffi_call_offset); + all_code[code_offset + 1].target = &all_code[eof_offset]; + code_offset += dec->code_len; } @@ -980,7 +987,7 @@ program *decode(bytecode **bc_arr, size_t n) { for (size_t i = 0; i < n; i++) { decode_ctx ctx; - decode_ctx_init(&ctx, bc_arr[i], st, ffi, total_code_len); + decode_ctx_init(&ctx, bc_arr[i], st, ffi, total_globals); insn *code = decode_internal(&ctx); if (!code) { fprintf(stderr, "Failed to decode %s\n", bc_arr[i]->name); diff --git a/virtual_machine/opcodes.c b/virtual_machine/opcodes.c index 13fff9a87..911960e29 100644 --- a/virtual_machine/opcodes.c +++ b/virtual_machine/opcodes.c @@ -104,6 +104,8 @@ const char *opcode_to_string(uint8_t opcode) { return "PATT.CLOSURE"; case OP_BARRAY: return "BARRAY"; + case OP_EOF: + return "EOF"; default: fprintf(stderr, "Unknown opcode: %d\n", opcode); exit(EXIT_FAILURE); diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h index fe8a44182..4eb230f3b 100755 --- a/virtual_machine/opcodes.h +++ b/virtual_machine/opcodes.h @@ -54,6 +54,7 @@ typedef enum { OP_PATT_UNBOXED = 0x65, OP_PATT_CLOSURE = 0x66, OP_BARRAY = 0x74, + OP_EOF = 0xFF, } opcode_t; const char *opcode_to_string(uint8_t opcode); diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 9ec44e03b..5ac5e45ef 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -5,6 +5,9 @@ #include "insn.h" #include #include +#include + +extern size_t __gc_stack_top; /* * External runtime functions (runtime.c) @@ -41,7 +44,6 @@ extern aint Barray_tag_patt(void *x); extern aint Bstring_tag_patt(void *x); extern aint Bsexp_tag_patt(void *x); - #define DISPATCH() \ do { \ ip++; \ @@ -59,6 +61,29 @@ extern aint Bsexp_tag_patt(void *x); #define STACK_PUSH(sp, val) (*sp-- = (val)) #define STACK_POP(sp) (*++sp) #define STACK_PEEK(sp) (*(sp + 1)) +#define STACK_REVERSE(base, n) \ + do { \ + for (int32_t _i = 0; _i < (n) / 2; _i++) { \ + aint _tmp = (base)[_i]; \ + (base)[_i] = (base)[(n) - 1 - _i]; \ + (base)[(n) - 1 - _i] = _tmp; \ + } \ + } while (0) + +#define FRAME_SAVED_BP (-1) +#define FRAME_SAVED_IP (-2) +#define FRAME_SAVED_SP (-3) +#define FRAME_LOCALS (-4) + +#define PUSH_FRAME(n_args_val, saved_bp, saved_ip, caller_sp_val) \ + do { \ + STACK_PUSH(sp, n_args_val); \ + aint *new_bp = sp + 1; \ + STACK_PUSH(sp, (aint)(saved_bp)); \ + STACK_PUSH(sp, (aint)(saved_ip)); \ + STACK_PUSH(sp, (aint)(caller_sp_val)); \ + bp = new_bp; \ + } while (0) #define DEFINE_BINOP(name, fn, opname) \ void name(DECL_STATE) { \ @@ -205,8 +230,8 @@ void op_cjmp_nz(DECL_STATE) { void op_string(DECL_STATE) { ip++; const char *str = ip->str; - VM_DEBUG("STRING: \"%s\"\n", str); void *result = Bstring((void *)&str); + VM_DEBUG("STRING literal: \"%s\" -> %p\n", str, result); STACK_PUSH(sp, (aint)result); DISPATCH(); } @@ -215,15 +240,10 @@ void op_barray(DECL_STATE) { ip++; int32_t n = ip->num; VM_DEBUG("BARRAY: n=%d\n", n); - aint *args_base = sp + 1; - aint tmp_args[256]; - // TODO: optimize for passing direct pointer - // instead of population array - for (int32_t i = 0; i < n; i++) { - tmp_args[i] = args_base[n - 1 - i]; - } + aint *args = sp + 1; + STACK_REVERSE(args, n); sp += n; - void *arr = Barray(tmp_args, BOX(n)); + void *arr = Barray(args, BOX(n)); STACK_PUSH(sp, (aint)arr); DISPATCH(); } @@ -237,14 +257,10 @@ void op_sexp(DECL_STATE) { aint tag_hash = LtagHash((char *)tag_str); VM_DEBUG("SEXP: tag=\"%s\" (hash=0x%lx), n_fields=%d\n", tag_str, tag_hash, n_fields); - aint args[256]; - aint *args_base = sp + 1; - // TODO: optimize for passing direct pointer - // instead of population array - for (int32_t i = 0; i < n_fields; i++) { - args[i] = args_base[n_fields - 1 - i]; - } - args[n_fields] = tag_hash; + // Use the free slot at sp for tag_hash, reverse the whole range in-place + *sp = tag_hash; + STACK_REVERSE(sp, n_fields + 1); + aint *args = sp; sp += n_fields; void *s = Bsexp(args, BOX(n_fields + 1)); @@ -401,8 +417,9 @@ void op_st_glo_ext(DECL_STATE) { void op_ld_loc(DECL_STATE) { ip++; int32_t idx = ip->num; - VM_DEBUG("LD_LOC[%d] bp=%p bp[-idx]=%ld\n", idx, (void *)bp, (long)bp[-idx]); - STACK_PUSH(sp, bp[-idx]); + VM_DEBUG("LD_LOC[%d] bp=%p val=%ld\n", idx, (void *)bp, + (long)bp[FRAME_LOCALS - idx]); + STACK_PUSH(sp, bp[FRAME_LOCALS - idx]); DISPATCH(); } @@ -411,15 +428,15 @@ void op_st_loc(DECL_STATE) { int32_t idx = ip->num; aint val = STACK_PEEK(sp); VM_DEBUG("ST_LOC[%d] = %ld bp=%p\n", idx, (long)val, (void *)bp); - bp[-idx] = val; + bp[FRAME_LOCALS - idx] = val; DISPATCH(); } void op_ld_arg(DECL_STATE) { ip++; int32_t idx = ip->num; - int32_t n_args = (int32_t)bp[1]; - aint val = bp[n_args + 1 - idx]; + int32_t n_args = (int32_t)bp[0]; + aint val = bp[n_args - idx]; VM_DEBUG("LD_ARG[%d] n_args=%d bp=%p val=%ld\n", idx, n_args, (void *)bp, (long)val); STACK_PUSH(sp, val); @@ -429,18 +446,18 @@ void op_ld_arg(DECL_STATE) { void op_st_arg(DECL_STATE) { ip++; int32_t idx = ip->num; - int32_t n_args = (int32_t)bp[1]; + int32_t n_args = (int32_t)bp[0]; aint val = STACK_PEEK(sp); VM_DEBUG("ST_ARG[%d] = %ld bp=%p\n", idx, (long)val, (void *)bp); - bp[n_args + 1 - idx] = val; + bp[n_args - idx] = val; DISPATCH(); } void op_ld_clo(DECL_STATE) { ip++; int32_t idx = ip->num; - int32_t n_args = (int32_t)bp[1]; - aint *closure = (aint *)bp[n_args + 2]; + int32_t n_args = (int32_t)bp[0]; + aint *closure = (aint *)bp[n_args + 1]; VM_DEBUG("LD_CLO[%d] closure=%p val=%ld\n", idx, (void *)closure, (long)closure[idx + 1]); STACK_PUSH(sp, closure[idx + 1]); @@ -450,9 +467,9 @@ void op_ld_clo(DECL_STATE) { void op_st_clo(DECL_STATE) { ip++; int32_t idx = ip->num; - int32_t n_args = (int32_t)bp[1]; + int32_t n_args = (int32_t)bp[0]; aint val = STACK_PEEK(sp); - aint *closure = (aint *)bp[n_args + 2]; + aint *closure = (aint *)bp[n_args + 1]; VM_DEBUG("ST_CLO[%d] = %ld closure=%p\n", idx, (long)val, (void *)closure); closure[idx + 1] = val; DISPATCH(); @@ -462,21 +479,23 @@ void op_st_clo(DECL_STATE) { * Function call operations */ void op_begin(DECL_STATE) { - ip++; int32_t n_args = ip->num; (void)n_args; ip++; int32_t n_locals = ip->num; ip++; + int32_t max_depth = ip->num; - VM_DEBUG("BEGIN n_args=%d n_locals=%d bp=%p sp=%p\n", n_args, n_locals, - (void *)bp, (void *)sp); + VM_DEBUG("BEGIN n_args=%d n_locals=%d max_depth=%d bp=%p sp=%p\n", n_args, + n_locals, max_depth, (void *)bp, (void *)sp); for (int32_t i = 0; i < n_locals; i++) { STACK_PUSH(sp, 0); } + __gc_stack_top = (size_t)(sp - max_depth); + DISPATCH(); } @@ -485,61 +504,51 @@ void op_call(DECL_STATE) { insn *target = ip->target; ip++; int32_t n_args = ip->num; + ip++; // sort of a return address - VM_DEBUG("CALL target=%p n_args=%d sp=%p bp=%p\n", (void *)target, - n_args, (void *)sp, (void *)bp); - - STACK_PUSH(sp, (aint)n_args); - STACK_PUSH(sp, (aint)bp); + VM_DEBUG("CALL target=%p n_args=%d sp=%p bp=%p\n", (void *)target, n_args, + (void *)sp, (void *)bp); - aint *new_bp = sp + 1; - target->func(target, sp, new_bp, globals); - - aint ret_val = *new_bp; - - sp = new_bp + n_args + 1; - - STACK_PUSH(sp, ret_val); - DISPATCH(); + aint *caller_sp = sp + n_args; + PUSH_FRAME(n_args, bp, ip, caller_sp); + ip = target; + DISPATCH_JUMP(); } void op_callc(DECL_STATE) { ip++; int32_t n_args = ip->num; + ip++; // sort of a return address aint closure_val = *(sp + 1 + n_args); aint *closure = (aint *)closure_val; - aint entry = closure[0]; insn *target = (insn *)entry; VM_DEBUG("CALLC closure=%p target=%p n_args=%d sp=%p bp=%p\n", - (void *)closure, (void *)target, n_args, (void *)sp, - (void *)bp); + (void *)closure, (void *)target, n_args, (void *)sp, (void *)bp); - STACK_PUSH(sp, (aint)n_args); - STACK_PUSH(sp, (aint)bp); + aint *caller_sp = sp + n_args + 1; + PUSH_FRAME(n_args, bp, ip, caller_sp); + ip = target; + DISPATCH_JUMP(); +} - aint *new_bp = sp + 1; - target->func(target, sp, new_bp, globals); +void op_end(DECL_STATE) { + (void)globals; + (void)sp; + aint ret_val = STACK_POP(sp); - aint ret_val = *new_bp; - VM_DEBUG("CALLC: return value=%ld new_bp=%p\n", (long)ret_val, - (void *)new_bp); + VM_DEBUG("END ret_val=%ld bp=%p sp=%p\n", (long)ret_val, (void *)bp, + (void *)sp); - sp = new_bp + n_args + 2; + // Restore caller's state from frame + sp = (aint *)bp[FRAME_SAVED_SP]; + ip = (insn *)bp[FRAME_SAVED_IP]; + bp = (aint *)bp[FRAME_SAVED_BP]; STACK_PUSH(sp, ret_val); - DISPATCH(); -} - -void op_end(DECL_STATE) { - (void)ip; - (void)globals; - VM_DEBUG("END sp=%p\n", (void *)sp); - aint ret_val = STACK_PEEK(sp); - *bp = ret_val; - return; + DISPATCH_JUMP(); } /* @@ -551,22 +560,23 @@ void op_ffi_call(DECL_STATE) { ip++; const ffi_resolved *res = (const ffi_resolved *)ip->ptr; - int32_t n_args = (int32_t)bp[1]; + int32_t n_args = (int32_t)bp[0]; VM_DEBUG("FFI_CALL: kind=%d n_args=%d bp=%p\n", res->kind, n_args, (void *)bp); - aint args[256]; - for (int32_t i = 0; i < n_args; i++) { - args[i] = bp[n_args + 1 - i]; - } - - aint result = ffi_call_c(res, args, n_args); + // args at bp[1..n_args] + STACK_REVERSE(bp + 1, n_args); + aint result = ffi_call_c(res, bp + 1, n_args); VM_DEBUG("FFI_CALL: result=%ld\n", (long)result); - *bp = result; + // Same as op_end + sp = (aint *)bp[FRAME_SAVED_SP]; + ip = (insn *)bp[FRAME_SAVED_IP]; + bp = (aint *)bp[FRAME_SAVED_BP]; - return; + STACK_PUSH(sp, result); + DISPATCH_JUMP(); } void op_closure(DECL_STATE) { @@ -577,21 +587,35 @@ void op_closure(DECL_STATE) { VM_DEBUG("CLOSURE: target=%p n_captured=%d\n", (void *)target, n_captured); - aint tmp_args[256]; - tmp_args[0] = (aint)target; - aint *args_base = sp + 1; - for (int32_t i = 0; i < n_captured; i++) { - tmp_args[i + 1] = args_base[n_captured - 1 - i]; - VM_DEBUG("CLOSURE: captured[%d]=%ld\n", i, (long)tmp_args[i + 1]); - } + *sp = (aint)target; + STACK_REVERSE(sp + 1, n_captured); + aint *args = sp; sp += n_captured; - void *closure = Bclosure(tmp_args, BOX(n_captured)); + void *closure = Bclosure(args, BOX(n_captured)); VM_DEBUG("CLOSURE: created=%p\n", (void *)closure); STACK_PUSH(sp, (aint)closure); DISPATCH(); } +void op_init(DECL_STATE) { + ip++; + insn *eof_ip = ip->target; + + aint *caller_sp = sp; + PUSH_FRAME(0, 0, eof_ip, caller_sp); + + DISPATCH(); +} + +void op_eof(DECL_STATE) { + (void)ip; + (void)bp; + (void)globals; + (void)sp; + return; +} + #ifdef DEBUG_PRINT void op_line(DECL_STATE) { ip++; diff --git a/virtual_machine/ops.h b/virtual_machine/ops.h index 7ab835686..60a7bcaa8 100644 --- a/virtual_machine/ops.h +++ b/virtual_machine/ops.h @@ -63,6 +63,8 @@ void op_ffi_call(DECL_STATE); void op_ld_glo_ext(DECL_STATE); void op_st_glo_ext(DECL_STATE); +void op_init(DECL_STATE); +void op_eof(DECL_STATE); void op_line(DECL_STATE); #endif // OPS_H diff --git a/virtual_machine/stack_validation.h b/virtual_machine/stack_validation.h index b35a64c9b..23a2e287d 100644 --- a/virtual_machine/stack_validation.h +++ b/virtual_machine/stack_validation.h @@ -38,6 +38,8 @@ typedef struct { VM_DEBUG(" DEPTH: %d -> %d (+%d)\n", (sv).depth, (sv).depth + (n), \ (n)); \ (sv).depth += (n); \ + if ((sv).depth > (sv).max_depth) \ + (sv).max_depth = (sv).depth; \ } \ } while (0) #define DEPTH_DEC(sv, n) \ diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index 1e0dcd35c..14d7069cc 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -8,6 +8,7 @@ #include #include #include +#include extern size_t __gc_stack_top, __gc_stack_bottom; extern void set_args(aint argc, char *argv[]); @@ -21,6 +22,8 @@ struct virtual_machine { size_t total_globals; void *ffi_data; // ffi_resolved array size_t ffi_count; + void *stack_base; + size_t stack_size; }; virtual_machine *vm_create(const char *main_unit_path, const char **paths, @@ -56,6 +59,13 @@ virtual_machine *vm_create(const char *main_unit_path, const char **paths, free(prog); + vm->stack_size = 8 * 1024 * 1024; + vm->stack_base = mmap(NULL, vm->stack_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0); + if (vm->stack_base == MAP_FAILED) { + perror("mmap stack"); + exit(EXIT_FAILURE); + } return vm; } @@ -70,6 +80,7 @@ void vm_destroy(virtual_machine *vm) { free(vm->ffi_data); free(vm->code); free(vm->entry_points); + munmap(vm->stack_base, vm->stack_size); free(vm); } @@ -80,29 +91,23 @@ void vm_set_args(virtual_machine *vm, int argc, char *argv[]) { aint vm_run(virtual_machine *vm) { - // TODO: this is all very ugly - size_t active_stack_size = 32768; - __attribute__((aligned(16))) aint stack_data[65536]; - - memset(stack_data, 0, active_stack_size * sizeof(aint)); + aint *stack_top = (aint *)((char *)vm->stack_base + vm->stack_size); - __gc_stack_bottom = (size_t)(stack_data + active_stack_size); - __gc_stack_top = (size_t)(stack_data - 16); + aint *globals = stack_top - vm->total_globals; + memset(globals, 0, vm->total_globals * sizeof(aint)); - // Globals at the top of stack - aint *globals = stack_data; - for (size_t i = 0; i < vm->total_globals; i++) { - globals[i] = 0; - } + aint *sp = globals - 1; - aint *sp = &stack_data[active_stack_size - 1]; - aint *bp = sp; + __gc_stack_top = (size_t)sp; + __gc_stack_bottom = (size_t)stack_top; + aint *bp; + aint ret_val = 0; for (size_t i = 0; i < vm->bc_len; i++) { insn *ip = vm->entry_points[i]; - ip->func(ip, sp, bp, globals); + ret_val = *sp; } - return *bp; + return ret_val; } From 967528929047edcfdafbaff301697e092441b8e8 Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 1 Apr 2026 04:01:44 +0300 Subject: [PATCH 053/141] load / store globals by a pointer instead of an index --- virtual_machine/converter.c | 32 +++++++++++++++++++---------- virtual_machine/converter.h | 4 +++- virtual_machine/insn.h | 4 ++-- virtual_machine/ops.c | 32 ++++------------------------- virtual_machine/ops.h | 2 -- virtual_machine/vm.c | 40 ++++++++++++++++++++++--------------- 6 files changed, 54 insertions(+), 60 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index dd8810ff4..dbd9cf571 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -80,6 +80,7 @@ typedef struct { } code; byte_reader reader; + aint *globals; size_t global_offset; struct { @@ -97,9 +98,10 @@ typedef struct { static void decode_ctx_init(decode_ctx *ctx, const bytecode *bc, symbol_table *st, ffi_call_table *ffi, - int32_t global_offset) { + aint *globals, size_t global_offset) { ctx->bc = bc; + ctx->globals = globals; ctx->global_offset = global_offset; ctx->bc_to_insn_map = ALLOC_ARRAY(int32_t, bc->code_size); @@ -154,19 +156,19 @@ static bool emit_ld_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { int str_offset = EXT_REF_INDEX(idx); const char *glob_name = bytecode_get_string(bc, str_offset); - VM_DEBUG("DECODE: OP_LD external global '%s' (stub)\n", glob_name); + VM_DEBUG("DECODE: OP_LD external global '%s'\n", glob_name); resolved_symbol *sym = symbol_table_find_global(ctx->st, glob_name); if (sym) { // Global from another unit EMIT_FUNC(ctx, op_ld_glo); - EMIT_NUM(ctx, sym->idx); + EMIT_GLOBAL_PTR(ctx, &ctx->globals[sym->idx]); return true; } else { // C global void *ptr = dlsym(RTLD_DEFAULT, glob_name); if (ptr) { - EMIT_FUNC(ctx, op_ld_glo_ext); + EMIT_FUNC(ctx, op_ld_glo); EMIT_GLOBAL_PTR(ctx, (aint *)ptr); return true; } else { @@ -176,7 +178,7 @@ static bool emit_ld_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { } } else { EMIT_FUNC(ctx, op_ld_glo); - EMIT_NUM(ctx, global_base + idx); + EMIT_GLOBAL_PTR(ctx, &ctx->globals[global_base + idx]); } return true; } @@ -188,19 +190,19 @@ static bool emit_st_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { int str_offset = EXT_REF_INDEX(idx); const char *glob_name = bytecode_get_string(bc, str_offset); - VM_DEBUG("DECODE: OP_ST external global '%s' (stub)\n", glob_name); + VM_DEBUG("DECODE: OP_ST external global '%s'\n", glob_name); resolved_symbol *sym = symbol_table_find_global(ctx->st, glob_name); if (sym) { // Global from another unit EMIT_FUNC(ctx, op_st_glo); - EMIT_NUM(ctx, sym->idx); + EMIT_GLOBAL_PTR(ctx, &ctx->globals[sym->idx]); return true; } else { // C global void *ptr = dlsym(RTLD_DEFAULT, glob_name); if (ptr) { - EMIT_FUNC(ctx, op_st_glo_ext); + EMIT_FUNC(ctx, op_st_glo); EMIT_GLOBAL_PTR(ctx, (aint *)ptr); return true; } else { @@ -210,7 +212,7 @@ static bool emit_st_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { } } else { EMIT_FUNC(ctx, op_st_glo); - EMIT_NUM(ctx, global_base + idx); + EMIT_GLOBAL_PTR(ctx, &ctx->globals[global_base + idx]); } return true; } @@ -974,7 +976,15 @@ static program *link_program(decoded *dec_arr, size_t n, size_t total_code_len, return prog; } -program *decode(bytecode **bc_arr, size_t n) { +size_t count_globals(bytecode **bc_arr, size_t n) { + size_t total = 0; + for (size_t i = 0; i < n; i++) { + total += bc_arr[i]->globals_count; + } + return total; +} + +program *decode(bytecode **bc_arr, size_t n, aint *globals) { symbol_table *st = symbol_table_create(); ffi_call_table *ffi = ffi_call_table_create(); @@ -987,7 +997,7 @@ program *decode(bytecode **bc_arr, size_t n) { for (size_t i = 0; i < n; i++) { decode_ctx ctx; - decode_ctx_init(&ctx, bc_arr[i], st, ffi, total_globals); + decode_ctx_init(&ctx, bc_arr[i], st, ffi, globals, total_globals); insn *code = decode_internal(&ctx); if (!code) { fprintf(stderr, "Failed to decode %s\n", bc_arr[i]->name); diff --git a/virtual_machine/converter.h b/virtual_machine/converter.h index 71f8b3dc2..aedc40a4f 100644 --- a/virtual_machine/converter.h +++ b/virtual_machine/converter.h @@ -17,7 +17,9 @@ typedef struct { size_t ffi_len; } program; -program *decode(bytecode **bc_arr, size_t n); +size_t count_globals(bytecode **bc_arr, size_t n); + +program *decode(bytecode **bc_arr, size_t n, aint *globals); void program_free(program *prog); #endif // CONVERTER_H diff --git a/virtual_machine/insn.h b/virtual_machine/insn.h index 520175f21..74233aeb4 100644 --- a/virtual_machine/insn.h +++ b/virtual_machine/insn.h @@ -10,8 +10,8 @@ union insn; // State: ip = instruction pointer, sp = stack pointer, bp = base pointer -#define DECL_STATE union insn *ip, aint *sp, aint *bp, aint *globals -#define STATE ip, sp, bp, globals +#define DECL_STATE union insn *ip, aint *sp, aint *bp +#define STATE ip, sp, bp // Function pointer type for opcode handlers (returns void for tail calls) typedef void (*fn)(DECL_STATE); diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 5ac5e45ef..4327c911f 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -297,7 +297,6 @@ void op_array(DECL_STATE) { void op_fail(DECL_STATE) { (void)sp; (void)bp; - (void)globals; ip++; int32_t line = ip->num; ip++; @@ -375,41 +374,21 @@ void op_patt_closure(DECL_STATE) { } /* - * Load / store operations + * Load / store global variables (by pointer) */ void op_ld_glo(DECL_STATE) { - ip++; - int32_t idx = ip->num; - VM_DEBUG("LD_GLO[%d] = %ld\n", idx, (long)globals[idx]); - STACK_PUSH(sp, globals[idx]); - DISPATCH(); -} - -void op_st_glo(DECL_STATE) { - ip++; - int32_t idx = ip->num; - aint val = STACK_PEEK(sp); - VM_DEBUG("ST_GLO[%d] = %ld\n", idx, (long)val); - globals[idx] = val; - DISPATCH(); -} - -/* - * Load / store extenral globals - */ -void op_ld_glo_ext(DECL_STATE) { ip++; aint *ptr = ip->global_ptr; - VM_DEBUG("LD_GLO_FFI ptr=%p val=%ld\n", (void *)ptr, (long)*ptr); + VM_DEBUG("LD_GLO ptr=%p val=%ld\n", (void *)ptr, (long)*ptr); STACK_PUSH(sp, *ptr); DISPATCH(); } -void op_st_glo_ext(DECL_STATE) { +void op_st_glo(DECL_STATE) { ip++; aint *ptr = ip->global_ptr; aint val = STACK_PEEK(sp); - VM_DEBUG("ST_GLO_FFI ptr=%p val=%ld\n", (void *)ptr, (long)val); + VM_DEBUG("ST_GLO ptr=%p val=%ld\n", (void *)ptr, (long)val); *ptr = val; DISPATCH(); } @@ -535,7 +514,6 @@ void op_callc(DECL_STATE) { } void op_end(DECL_STATE) { - (void)globals; (void)sp; aint ret_val = STACK_POP(sp); @@ -556,7 +534,6 @@ void op_end(DECL_STATE) { */ void op_ffi_call(DECL_STATE) { (void)sp; - (void)globals; ip++; const ffi_resolved *res = (const ffi_resolved *)ip->ptr; @@ -611,7 +588,6 @@ void op_init(DECL_STATE) { void op_eof(DECL_STATE) { (void)ip; (void)bp; - (void)globals; (void)sp; return; } diff --git a/virtual_machine/ops.h b/virtual_machine/ops.h index 60a7bcaa8..a4d7d35d4 100644 --- a/virtual_machine/ops.h +++ b/virtual_machine/ops.h @@ -60,8 +60,6 @@ void op_callc(DECL_STATE); void op_end(DECL_STATE); void op_closure(DECL_STATE); void op_ffi_call(DECL_STATE); -void op_ld_glo_ext(DECL_STATE); -void op_st_glo_ext(DECL_STATE); void op_init(DECL_STATE); void op_eof(DECL_STATE); diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index 14d7069cc..116f62984 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -20,6 +20,7 @@ struct virtual_machine { insn **entry_points; // Entry point for each unique unit size_t entry_points_len; size_t total_globals; + aint *globals; // Globals array (at the top of the stack) void *ffi_data; // ffi_resolved array size_t ffi_count; void *stack_base; @@ -41,17 +42,36 @@ virtual_machine *vm_create(const char *main_unit_path, const char **paths, vm->bc_arr = lr.units; vm->bc_len = lr.units_len; - program *prog = decode(lr.units, lr.units_len); + vm->stack_size = 8 * 1024 * 1024; + vm->stack_base = mmap(NULL, vm->stack_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0); + if (vm->stack_base == MAP_FAILED) { + perror("mmap stack"); + for (size_t i = 0; i < vm->bc_len; i++) { + bytecode_free(lr.units[i]); + } + free(lr.units); + free(vm); + return NULL; + } + + // Compute total globals and place at the top of the stack + vm->total_globals = count_globals(lr.units, lr.units_len); + aint *stack_top = (aint *)((char *)vm->stack_base + vm->stack_size); + vm->globals = stack_top - vm->total_globals; + memset(vm->globals, 0, vm->total_globals * sizeof(aint)); + + program *prog = decode(lr.units, lr.units_len, vm->globals); if (!prog) { for (size_t i = 0; i < vm->bc_len; i++) { bytecode_free(lr.units[i]); } free(lr.units); + munmap(vm->stack_base, vm->stack_size); free(vm); return NULL; } - vm->total_globals = prog->total_globals; vm->code = prog->code; vm->entry_points = prog->entry_points; vm->ffi_data = prog->ffi_data; @@ -59,13 +79,6 @@ virtual_machine *vm_create(const char *main_unit_path, const char **paths, free(prog); - vm->stack_size = 8 * 1024 * 1024; - vm->stack_base = mmap(NULL, vm->stack_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0); - if (vm->stack_base == MAP_FAILED) { - perror("mmap stack"); - exit(EXIT_FAILURE); - } return vm; } @@ -90,13 +103,8 @@ void vm_set_args(virtual_machine *vm, int argc, char *argv[]) { } aint vm_run(virtual_machine *vm) { - aint *stack_top = (aint *)((char *)vm->stack_base + vm->stack_size); - - aint *globals = stack_top - vm->total_globals; - memset(globals, 0, vm->total_globals * sizeof(aint)); - - aint *sp = globals - 1; + aint *sp = vm->globals - 1; __gc_stack_top = (size_t)sp; __gc_stack_bottom = (size_t)stack_top; @@ -105,7 +113,7 @@ aint vm_run(virtual_machine *vm) { aint ret_val = 0; for (size_t i = 0; i < vm->bc_len; i++) { insn *ip = vm->entry_points[i]; - ip->func(ip, sp, bp, globals); + ip->func(ip, sp, bp); ret_val = *sp; } From 1b1d39a1a12ae2392c8c4c426aadd9913d15b8d6 Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 1 Apr 2026 04:18:34 +0300 Subject: [PATCH 054/141] better cleanup --- virtual_machine/vm.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index 116f62984..af826e9c1 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -33,10 +33,12 @@ virtual_machine *vm_create(const char *main_unit_path, const char **paths, search_paths search_paths = {.paths = paths, .len = total_paths_len}; virtual_machine *vm = ALLOC(virtual_machine); + memset(vm, 0, sizeof(virtual_machine)); + vm->stack_base = MAP_FAILED; load_result lr = load(main_unit_path, &search_paths); if (!lr.units) { - free(vm); + vm_destroy(vm); return NULL; } vm->bc_arr = lr.units; @@ -47,11 +49,7 @@ virtual_machine *vm_create(const char *main_unit_path, const char **paths, MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0); if (vm->stack_base == MAP_FAILED) { perror("mmap stack"); - for (size_t i = 0; i < vm->bc_len; i++) { - bytecode_free(lr.units[i]); - } - free(lr.units); - free(vm); + vm_destroy(vm); return NULL; } @@ -63,12 +61,7 @@ virtual_machine *vm_create(const char *main_unit_path, const char **paths, program *prog = decode(lr.units, lr.units_len, vm->globals); if (!prog) { - for (size_t i = 0; i < vm->bc_len; i++) { - bytecode_free(lr.units[i]); - } - free(lr.units); - munmap(vm->stack_base, vm->stack_size); - free(vm); + vm_destroy(vm); return NULL; } @@ -86,14 +79,18 @@ void vm_destroy(virtual_machine *vm) { if (!vm) { return; } - for (size_t i = 0; i < vm->bc_len; i++) { - bytecode_free(vm->bc_arr[i]); + if (vm->bc_arr) { + for (size_t i = 0; i < vm->bc_len; i++) { + bytecode_free(vm->bc_arr[i]); + } + free(vm->bc_arr); } - free(vm->bc_arr); free(vm->ffi_data); free(vm->code); free(vm->entry_points); - munmap(vm->stack_base, vm->stack_size); + if (vm->stack_base && vm->stack_base != MAP_FAILED) { + munmap(vm->stack_base, vm->stack_size); + } free(vm); } From 0db7ad428a3723a9761a10f207ac0b0ff113af1e Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 2 Apr 2026 19:00:23 +0300 Subject: [PATCH 055/141] add function and global prefixing --- virtual_machine/converter.c | 135 +++++++++++++++++++++--------------- virtual_machine/ffi.c | 52 ++++++++------ 2 files changed, 112 insertions(+), 75 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index dbd9cf571..a3771b36b 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -24,6 +24,8 @@ #define IS_EXT_REF(addr) ((addr) < 0) #define EXT_REF_INDEX(addr) (-(addr) - 1) +#define GLOBAL_PREFIX "global_" + /* * Code emission macros - append to code array in context */ @@ -70,6 +72,23 @@ typedef struct { size_t relocs_len; } decoded; +/* + * Cache for external C globals.. + * Also used as GC root table. + */ +typedef struct { + const char *name; + void *ptr; +} ext_global_entry; + +typedef struct { + struct { + ext_global_entry *data; + size_t len; + size_t cap; + } entries; +} ext_global_cache; + typedef struct { const bytecode *bc; @@ -92,13 +111,15 @@ typedef struct { int32_t *bc_to_insn_map; symbol_table *st; ffi_call_table *ffi; + ext_global_cache *ext_globals; stack_validation sv; } decode_ctx; static void decode_ctx_init(decode_ctx *ctx, const bytecode *bc, symbol_table *st, ffi_call_table *ffi, - aint *globals, size_t global_offset) { + ext_global_cache *ext_globals, aint *globals, + size_t global_offset) { ctx->bc = bc; ctx->globals = globals; @@ -110,10 +131,38 @@ static void decode_ctx_init(decode_ctx *ctx, const bytecode *bc, ctx->st = st; ctx->ffi = ffi; + ctx->ext_globals = ext_globals; reader_init(&ctx->reader, bc->code, bc->code_size); } +/* + * Resolve an external C global -- prefix with "global_", dlsym, cache. + */ +static void *resolve_ext_global(ext_global_cache *cache, const char *name) { + for (size_t i = 0; i < cache->entries.len; i++) { + if (strcmp(cache->entries.data[i].name, name) == 0) { + return cache->entries.data[i].ptr; + } + } + + size_t nlen = strlen(name); + char prefixed[sizeof(GLOBAL_PREFIX) + nlen]; + memcpy(prefixed, GLOBAL_PREFIX, sizeof(GLOBAL_PREFIX) - 1); + memcpy(prefixed + sizeof(GLOBAL_PREFIX) - 1, name, nlen + 1); + + void *ptr = dlsym(RTLD_DEFAULT, prefixed); + if (!ptr) { + fprintf(stderr, "Error: unresolved global '%s' (tried '%s')\n", name, + prefixed); + return NULL; + } + + ext_global_entry entry = {.name = name, .ptr = ptr}; + da_append(cache->entries, entry); + return ptr; +} + static void free_decoded_arr(decoded *arr, size_t n) { for (size_t i = 0; i < n; i++) { free(arr[i].code); @@ -149,71 +198,45 @@ static bool validate_target_off(const bytecode *bc, int32_t target_off, return true; } -static bool emit_ld_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { - const bytecode *bc = ctx->bc; +static bool emit_ext_glo(decode_ctx *ctx, const char *glob_name, fn op) { + resolved_symbol *sym = symbol_table_find_global(ctx->st, glob_name); + if (sym) { + // Global from another unit + EMIT_FUNC(ctx, op); + EMIT_GLOBAL_PTR(ctx, &ctx->globals[sym->idx]); + return true; + } + // C global + void *ptr = resolve_ext_global(ctx->ext_globals, glob_name); + if (!ptr) { + return false; + } + EMIT_FUNC(ctx, op); + EMIT_GLOBAL_PTR(ctx, (aint *)ptr); + return true; +} +static bool emit_ld_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { if (IS_EXT_REF(idx)) { int str_offset = EXT_REF_INDEX(idx); - const char *glob_name = bytecode_get_string(bc, str_offset); - + const char *glob_name = bytecode_get_string(ctx->bc, str_offset); VM_DEBUG("DECODE: OP_LD external global '%s'\n", glob_name); - - resolved_symbol *sym = symbol_table_find_global(ctx->st, glob_name); - if (sym) { - // Global from another unit - EMIT_FUNC(ctx, op_ld_glo); - EMIT_GLOBAL_PTR(ctx, &ctx->globals[sym->idx]); - return true; - } else { - // C global - void *ptr = dlsym(RTLD_DEFAULT, glob_name); - if (ptr) { - EMIT_FUNC(ctx, op_ld_glo); - EMIT_GLOBAL_PTR(ctx, (aint *)ptr); - return true; - } else { - fprintf(stderr, "Error: unresolved global '%s'\n", glob_name); - return false; - } - } - } else { - EMIT_FUNC(ctx, op_ld_glo); - EMIT_GLOBAL_PTR(ctx, &ctx->globals[global_base + idx]); + return emit_ext_glo(ctx, glob_name, op_ld_glo); } + EMIT_FUNC(ctx, op_ld_glo); + EMIT_GLOBAL_PTR(ctx, &ctx->globals[global_base + idx]); return true; } static bool emit_st_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { - const bytecode *bc = ctx->bc; - if (IS_EXT_REF(idx)) { int str_offset = EXT_REF_INDEX(idx); - const char *glob_name = bytecode_get_string(bc, str_offset); - + const char *glob_name = bytecode_get_string(ctx->bc, str_offset); VM_DEBUG("DECODE: OP_ST external global '%s'\n", glob_name); - - resolved_symbol *sym = symbol_table_find_global(ctx->st, glob_name); - if (sym) { - // Global from another unit - EMIT_FUNC(ctx, op_st_glo); - EMIT_GLOBAL_PTR(ctx, &ctx->globals[sym->idx]); - return true; - } else { - // C global - void *ptr = dlsym(RTLD_DEFAULT, glob_name); - if (ptr) { - EMIT_FUNC(ctx, op_st_glo); - EMIT_GLOBAL_PTR(ctx, (aint *)ptr); - return true; - } else { - fprintf(stderr, "Error: unresolved global '%s'\n", glob_name); - return false; - } - } - } else { - EMIT_FUNC(ctx, op_st_glo); - EMIT_GLOBAL_PTR(ctx, &ctx->globals[global_base + idx]); + return emit_ext_glo(ctx, glob_name, op_st_glo); } + EMIT_FUNC(ctx, op_st_glo); + EMIT_GLOBAL_PTR(ctx, &ctx->globals[global_base + idx]); return true; } @@ -987,6 +1010,8 @@ size_t count_globals(bytecode **bc_arr, size_t n) { program *decode(bytecode **bc_arr, size_t n, aint *globals) { symbol_table *st = symbol_table_create(); ffi_call_table *ffi = ffi_call_table_create(); + ext_global_cache ext_globals = {0}; + da_init(ext_globals.entries); decoded *dec_arr = ALLOC_ARRAY(decoded, n); program *prog = NULL; @@ -997,7 +1022,8 @@ program *decode(bytecode **bc_arr, size_t n, aint *globals) { for (size_t i = 0; i < n; i++) { decode_ctx ctx; - decode_ctx_init(&ctx, bc_arr[i], st, ffi, globals, total_globals); + decode_ctx_init(&ctx, bc_arr[i], st, ffi, &ext_globals, globals, + total_globals); insn *code = decode_internal(&ctx); if (!code) { fprintf(stderr, "Failed to decode %s\n", bc_arr[i]->name); @@ -1026,6 +1052,7 @@ program *decode(bytecode **bc_arr, size_t n, aint *globals) { cleanup: symbol_table_destroy(st); ffi_call_table_destroy(ffi); + da_free(ext_globals.entries); free_decoded_arr(dec_arr, n_decoded); free(dec_arr); diff --git a/virtual_machine/ffi.c b/virtual_machine/ffi.c index cb33760f5..7b7bf8ca0 100644 --- a/virtual_machine/ffi.c +++ b/virtual_machine/ffi.c @@ -46,26 +46,28 @@ void ffi_call_table_destroy(ffi_call_table *table) { free(table); } +#define FUNC_PREFIX "L" + typedef struct { - const char *lama_name; - const char *target_name; + const char *name; // raw name as it appears in bytecode + const char *target_name; // explicit dlsym name, or NULL for default bool is_args_array; int fixed_args; } func_metadata; static const func_metadata func_table[] = { // Args array functions - {"Lsubstring", "Lsubstring", true, 0}, - {"Lstringcat", "Lstringcat", true, 0}, - {"Lstring", "Lstring", true, 0}, - {"Li__Infix_4343", "Li__Infix_4343", true, 0}, // strcat - {"Ls__Infix_58", "Ls__Infix_58", true, 0}, // : (cons) - {"Lclone", "Lclone", true, 0}, // clone + {"substring", NULL, true, 0}, + {"stringcat", NULL, true, 0}, + {"string", NULL, true, 0}, + {"i__Infix_4343", NULL, true, 0}, // strcat + {"s__Infix_58", NULL, true, 0}, // : (cons) + {"clone", NULL, true, 0}, // Variadic functions with mapping - {"Lprintf", "Bprintf", false, 1}, - {"Lfprintf", "Bfprintf", false, 2}, - {"Lsprintf", "Bsprintf", false, 1}, + {"printf", "Bprintf", false, 1}, + {"fprintf", "Bfprintf", false, 2}, + {"sprintf", "Bsprintf", false, 1}, // Sentinel {NULL, NULL, false, 0}}; @@ -81,8 +83,8 @@ static void *lookup_function(const char *name) { } static const func_metadata *lookup_metadata(const char *name) { - for (int i = 0; func_table[i].lama_name != NULL; i++) { - if (strcmp(name, func_table[i].lama_name) == 0) { + for (int i = 0; func_table[i].name != NULL; i++) { + if (strcmp(name, func_table[i].name) == 0) { return &func_table[i]; } } @@ -97,17 +99,9 @@ size_t ffi_call_table_intern(ffi_call_table *table, const char *name) { } const func_metadata *meta = lookup_metadata(name); - const char *target_name = meta ? meta->target_name : name; - - void *fn = lookup_function(target_name); - if (!fn) { - fprintf(stderr, "Undefined external function: %s\n", name); - exit(EXIT_FAILURE); - } ffi_kind kind = FFI_REGULAR; int fixed_args = 0; - if (meta) { if (meta->is_args_array) { kind = FFI_ARGS_ARRAY; @@ -117,6 +111,22 @@ size_t ffi_call_table_intern(ffi_call_table *table, const char *name) { } } + void *fn; + if (meta && meta->target_name) { + fn = lookup_function(meta->target_name); + } else { + size_t nlen = strlen(name); + char prefixed[sizeof(FUNC_PREFIX) + nlen]; + memcpy(prefixed, FUNC_PREFIX, sizeof(FUNC_PREFIX) - 1); + memcpy(prefixed + sizeof(FUNC_PREFIX) - 1, name, nlen + 1); + fn = lookup_function(prefixed); + } + + if (!fn) { + fprintf(stderr, "Undefined external function: %s\n", name); + exit(EXIT_FAILURE); + } + ffi_resolved entry = { .fn_ptr = fn, .kind = kind, From e2320fbc97ce9bd736c613fc6d08d24d4b7fc141 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 2 Apr 2026 19:07:17 +0300 Subject: [PATCH 056/141] track external globals for gc --- virtual_machine/converter.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index a3771b36b..c9cf5344c 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -1,4 +1,5 @@ #include "converter.h" +#include "../runtime/gc.h" #include "bytecode.h" #include "da.h" #include "debug.h" @@ -158,6 +159,8 @@ static void *resolve_ext_global(ext_global_cache *cache, const char *name) { return NULL; } + push_extra_root((void **)ptr); + ext_global_entry entry = {.name = name, .ptr = ptr}; da_append(cache->entries, entry); return ptr; From 73ddeaa1bf27536f8844eab55e88760a83903767 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 2 Apr 2026 19:26:38 +0300 Subject: [PATCH 057/141] cleanup --- src/SM.ml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/SM.ml b/src/SM.ml index eb7d6e182..f4a43c9d1 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -206,7 +206,7 @@ module ByteCode = struct let rec iterate i = if i < n then match x.[i] with - | '"' -> + | '"' -> Buffer.add_char buf '"'; iterate (i + 1) | '\\' -> ( @@ -388,9 +388,9 @@ module ByteCode = struct Bytes.set_int32_le code ofs (Int32.of_int @@ - try Hashtbl.find lmap l - with Not_found -> - failwith (Printf.sprintf "ERROR: undefined label '%s'" l))) + try Hashtbl.find lmap l + with Not_found -> + failwith (Printf.sprintf "ERROR: undefined label '%s'" l))) !fixups; let pubs_resolved = List.rev_map (fun (name, flag) -> @@ -410,19 +410,19 @@ module ByteCode = struct let imports = List.rev_map (fun l -> Int32.of_int @@ StringTab.add st l) !imports in - let str_table = Buffer.to_bytes st.StringTab.buffer in + let st = Buffer.to_bytes st.StringTab.buffer in let file = Buffer.create 1024 in - Buffer.add_int32_le file (Int32.of_int @@ Bytes.length str_table); + Buffer.add_int32_le file (Int32.of_int @@ Bytes.length st); Buffer.add_int32_le file (Int32.of_int @@ Hashtbl.length globals); Buffer.add_int32_le file (Int32.of_int @@ List.length imports); Buffer.add_int32_le file (Int32.of_int @@ List.length pubs_resolved); - Buffer.add_bytes file str_table; + Buffer.add_bytes file st; List.iter (fun n -> Buffer.add_int32_le file n) imports; List.iter - (fun (name_off, offset, flag) -> - Buffer.add_int32_le file name_off; - Buffer.add_int32_le file offset; - Buffer.add_uint8 file flag) + (fun (n, o, f) -> + Buffer.add_int32_le file n; + Buffer.add_int32_le file o; + Buffer.add_uint8 file f) pubs_resolved; Buffer.add_bytes file code; let f = open_out_bin (Printf.sprintf "%s.bc" cmd#basename) in @@ -1618,8 +1618,8 @@ let compile cmd ((imports, _), p) = Some lfalse, i + 1, ((match lab with - | None -> [ SLABEL blab ] - | Some l -> [ SLABEL blab; LABEL l; DUP ]) + | None -> [ SLABEL blab ] + | Some l -> [ SLABEL blab; LABEL l; DUP ]) @ pcode @ bindcode @ scode @ jmp @ [ SLABEL elab ]) :: code, lfalse' ) From 3fc2c2dc89c941c43a12a333d3b0a2ccaa4ec031 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 17:45:02 +0300 Subject: [PATCH 058/141] zero out `op_drop` --- virtual_machine/ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 4327c911f..8d2cca2cb 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -148,7 +148,7 @@ void op_mod(DECL_STATE) { void op_drop(DECL_STATE) { VM_DEBUG("DROP\n"); - sp++; + *++sp = 0; DISPATCH(); } From 665516778d518eceabd1188de6af95ae25f378d3 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 17:56:21 +0300 Subject: [PATCH 059/141] remove div/mod by 0 check --- virtual_machine/ops.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 8d2cca2cb..ee44517ee 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -122,10 +122,6 @@ void op_div(DECL_STATE) { aint y = STACK_POP(sp); aint x = STACK_POP(sp); VM_DEBUG("DIV: x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); - if (UNBOX(y) == 0) { - fprintf(stderr, "Division by zero\n"); - exit(EXIT_FAILURE); - } aint res = Ls__Infix_47((void *)x, (void *)y); VM_DEBUG("DIV result=%ld\n", (long)UNBOX(res)); STACK_PUSH(sp, res); @@ -136,10 +132,6 @@ void op_mod(DECL_STATE) { aint y = STACK_POP(sp); aint x = STACK_POP(sp); VM_DEBUG("MOD: x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); - if (UNBOX(y) == 0) { - fprintf(stderr, "Division by zero\n"); - exit(EXIT_FAILURE); - } aint res = Ls__Infix_37((void *)x, (void *)y); VM_DEBUG("MOD result=%ld\n", (long)UNBOX(res)); STACK_PUSH(sp, res); From 164f49e71241b879c3c467fd321e93e51bb78fd2 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 18:10:19 +0300 Subject: [PATCH 060/141] cleanup unused files --- virtual_machine/dune | 42 ---------- virtual_machine/linker.c | 171 --------------------------------------- virtual_machine/linker.h | 21 ----- virtual_machine/stack.c | 58 ------------- virtual_machine/stack.h | 21 ----- 5 files changed, 313 deletions(-) delete mode 100644 virtual_machine/dune delete mode 100644 virtual_machine/linker.c delete mode 100644 virtual_machine/linker.h delete mode 100755 virtual_machine/stack.c delete mode 100755 virtual_machine/stack.h diff --git a/virtual_machine/dune b/virtual_machine/dune deleted file mode 100644 index 387a87a9e..000000000 --- a/virtual_machine/dune +++ /dev/null @@ -1,42 +0,0 @@ -(rule - (target interpreter.exe) - (deps - (:main interpreter.c) - mac-specific-flags.txt) - (mode - (promote (until-clean))) - (action - (run - gcc - %{read-lines:mac-specific-flags.txt} - -g - %{main} - -o - %{target}))) - -(rule - (target mac-specific-flags.txt) - (enabled_if - (= %{system} "linux")) - (action - (write-file %{target} ""))) - -(rule - (target mac-specific-flags.txt) - (enabled_if - (= %{ocaml-config:system} macosx)) - (action - (write-file %{target} "-Wl,-no_pie"))) - -(cram (deps interpreter.exe)) - -(cram (applies_to test001) - (deps test001.bc ../regression/test001.input)) -(cram (applies_to test002) - (deps test002.bc ../regression/test002.input)) -(cram (applies_to test003) - (deps test003.bc ../regression/test003.input)) -(cram (applies_to test004) - (deps test004.bc ../regression/test004.input)) -(cram (applies_to test005) - (deps test005.bc ../regression/test005.input)) \ No newline at end of file diff --git a/virtual_machine/linker.c b/virtual_machine/linker.c deleted file mode 100644 index a5b0c0ced..000000000 --- a/virtual_machine/linker.c +++ /dev/null @@ -1,171 +0,0 @@ -#include "linker.h" -#include "bytecode.h" -#include "converter.h" -#include "ffi.h" -#include "memory.h" -#include "symbols.h" -#include -#include -#include -#include -#include -#include - -static void register_public_symbols(symbol_table *st, const bytecode *bc, - size_t code_offset, size_t global_base, - const int32_t *bc_to_insn_map) { - public_symbol pub; - bytecode_iterator iter; - bytecode_pubs_init(&iter, bc); - - while (bytecode_pubs_next(&iter, &pub)) { - - if (pub.flag == PUB_FLAG_FUNCTION) { - // pub.code_offset is the offset in the bytecode, so we use the mapping - int32_t insn_idx = bc_to_insn_map[pub.code_offset]; - if (insn_idx == -1) { - fprintf(stderr, - "Error: public symbol '%s' at bytecode offset %d not decoded\n", - pub.name, pub.code_offset); - exit(EXIT_FAILURE); - } - int32_t code_idx = insn_idx + code_offset; - symbol_table_add_function(st, pub.name, code_idx); - } else { - int32_t global_idx = pub.code_offset + global_base; - symbol_table_add_global(st, pub.name, global_idx); - } - } -} - -/* - * Resolve all stubs from a decoded unit. - */ -static void resolve_stubs(decoded *dec, insn *all_code, size_t code_offset, - symbol_table *st, ffi_call_table *ffi_stubs) { - // Unit's code starts at all_code + code_offset - insn *code = all_code + code_offset; - stub *stubs_arr = dec->stubs; - size_t stubs_len = dec->stubs_len; - - for (size_t i = 0; i < stubs_len; i++) { - stub *s = &stubs_arr[i]; - size_t pi = s->patch_idx; - - resolved_symbol *sym = symbol_table_find(st, s->name); - switch (s->kind) { - - case STUB_FUNC: { - - if (sym) { - assert(sym->is_function); - code[pi].target = &all_code[sym->idx]; - } else { - insn *ffi_stub = ffi_call_table_find(ffi_stubs, s->name); - if (!ffi_stub) { - ffi_stub = ffi_call_table_add(ffi_stubs, s->name, - decoder_get_op_callc_ffi_stub()); - } - code[pi].target = ffi_stub; - } - break; - } - - case STUB_GLOBAL_LD: - case STUB_GLOBAL_ST: { - if (sym) { - // Global from another unit - assert(!sym->is_function); - if (s->kind == STUB_GLOBAL_LD) { - code[pi - 1].func = decoder_get_op_ld_glo(); - } else { - code[pi - 1].func = decoder_get_op_st_glo(); - } - code[pi].num = sym->idx; - } else { - // C global - void *ptr = dlsym(RTLD_DEFAULT, s->name); - if (ptr) { - if (s->kind == STUB_GLOBAL_LD) { - code[pi - 1].func = decoder_get_op_ld_glo_ext(); - } else { - code[pi - 1].func = decoder_get_op_st_glo_ext(); - } - code[pi].global_ptr = (aint *)ptr; - } else { - fprintf(stderr, "Error: unresolved global '%s'\n", s->name); - exit(EXIT_FAILURE); - } - } - break; - } - } - } -} - -program_link *link(bytecode **bc_arr, decoded **dec_arr, size_t n) { - symbol_table *st = symbol_table_create(); - ffi_call_table *ffi_stubs = ffi_call_table_create(); - - size_t total_code_len = 0; - size_t total_globals = 0; - - for (size_t i = 0; i < n; i++) { - decoded *dec = dec_arr[i]; - bytecode *bc = bc_arr[i]; - register_public_symbols(st, bc, total_code_len, total_globals, - dec->bc_to_insn_map); - total_code_len += dec->code_len; - total_globals += bc->globals_count; - } - - insn *all_code = ALLOC_ARRAY(insn, total_code_len); - insn **entry_points = ALLOC_ARRAY(insn *, n); - - size_t code_offset = 0; - for (size_t i = 0; i < n; i++) { - decoded *dec = dec_arr[i]; - - memcpy(all_code + code_offset, dec->code, dec->code_len * sizeof(insn)); - - entry_points[i] = &all_code[code_offset]; - - // Resolve internal jumps - for (size_t j = 0; j < dec->relocs_len; j++) { - size_t slot = dec->relocs[j]; - int32_t target_idx = all_code[code_offset + slot].num; - all_code[code_offset + slot].target = &all_code[code_offset + target_idx]; - } - - // Resolve all stubs - resolve_stubs(dec, all_code, code_offset, st, ffi_stubs); - - code_offset += dec->code_len; - } - - program_link *prog = ALLOC(program_link); - prog->code = all_code; - prog->code_len = total_code_len; - prog->total_globals = total_globals; - prog->entry_points = entry_points; - prog->entry_points_len = n; - - symbol_table_destroy(st); - ffi_call_table_destroy(ffi_stubs); - // NOTE: we don't free bytecode here since it's used for strings etc. - for (size_t i = 0; i < n; i++) { - decoded_free(dec_arr[i]); - } - free(dec_arr); - - return prog; -} - -void program_free(program_link *prog) { - if (!prog) { - return; - } - free(prog->code); - free(prog->entry_points); - free(prog); -} diff --git a/virtual_machine/linker.h b/virtual_machine/linker.h deleted file mode 100644 index e53c753b7..000000000 --- a/virtual_machine/linker.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef LINKER_H -#define LINKER_H - -#include "bytecode.h" -#include "converter.h" -#include "insn.h" -#include - -typedef struct { - insn *code; - size_t code_len; - size_t total_globals; - insn **entry_points; // Entry point for each unit (pointer into code) - size_t entry_points_len; -} program_link; - -program_link *link(bytecode **bc_arr, decoded **dec_arr, size_t n); - -void program_free(program_link *prog); - -#endif // LINKER_H diff --git a/virtual_machine/stack.c b/virtual_machine/stack.c deleted file mode 100755 index daff25409..000000000 --- a/virtual_machine/stack.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Data stack implementation for the Lama VM. - * Handles operand storage for expressions and parameters. - * Integrated with the garbage collector for root scanning. - */ - -#include "stack.h" -#include -#include - -extern size_t __gc_stack_top, __gc_stack_bottom; - -void stack_init(stack_t *s) { - // mandated by gc - s->sp = s->data + STACK_SIZE - 1; - __gc_stack_bottom = ((size_t)(s->data + STACK_SIZE)); - __gc_stack_top = (size_t)s->sp & ~0xFUL; -} - -void stack_push(stack_t *s, aint val) { - if (s->sp <= s->data) { - fprintf(stderr, "Stack overflow\n"); - exit(1); - } - *s->sp-- = val; - if (((size_t)s->sp & 0xF) == 0) { - __gc_stack_top = (size_t)s->sp; - } -} - -aint stack_pop(stack_t *s) { - if (s->sp >= s->data + STACK_SIZE - 1) { - fprintf(stderr, "Cannot pop from an empty stack\n"); - exit(1); - } - aint val = *++s->sp; - return val; -} - -aint stack_peek(const stack_t *s) { - if (s->sp >= s->data + STACK_SIZE - 1) { - fprintf(stderr, "Cannot peek from an empty stack\n"); - exit(1); - } - return *(s->sp + 1); -} - -void stack_dup(stack_t *s) { - aint top = stack_peek(s); - stack_push(s, top); -} - -void stack_swap(stack_t *s) { - aint y = stack_pop(s); - aint x = stack_pop(s); - stack_push(s, y); - stack_push(s, x); -} diff --git a/virtual_machine/stack.h b/virtual_machine/stack.h deleted file mode 100755 index 720684cac..000000000 --- a/virtual_machine/stack.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef STACK_H -#define STACK_H - -#include "../runtime/runtime_common.h" -#include - -#define STACK_SIZE 1024 - -typedef struct { - aint data[STACK_SIZE]; - aint *sp; -} stack_t; - -void stack_init(stack_t *s); -void stack_push(stack_t *s, aint val); -aint stack_pop(stack_t *s); -aint stack_peek(const stack_t *s); -void stack_dup(stack_t *s); -void stack_swap(stack_t *s); - -#endif From 8edeb25fe744793e9c95439b6b67e6eaa4aa327d Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 18:15:09 +0300 Subject: [PATCH 061/141] return `bool` instead of `code` --- virtual_machine/converter.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index c9cf5344c..b043cf4ee 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -308,7 +308,7 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, return true; } -static insn *decode_internal(decode_ctx *ctx) { +static bool decode_internal(decode_ctx *ctx) { const bytecode *bc = ctx->bc; size_t global_base = ctx->global_offset; @@ -329,7 +329,7 @@ static insn *decode_internal(decode_ctx *ctx) { EMIT_FUNC(ctx, op_init); EMIT_NUM(ctx, 0); // placeholder for op_eof - insn *result = NULL; + bool ok = false; while (!reader_eof(&ctx->reader)) { size_t current_bc_off = reader_pos(&ctx->reader); @@ -882,7 +882,7 @@ static insn *decode_internal(decode_ctx *ctx) { ctx->bc_to_insn_map[i] = meta[i].resolved_idx; } - result = ctx->code.data; + ok = true; cleanup: da_free(ctx->sv.func_stack); @@ -897,7 +897,7 @@ static insn *decode_internal(decode_ctx *ctx) { } free(meta); - return result; + return ok; } static void register_public_symbols(symbol_table *st, const bytecode *bc, @@ -1027,15 +1027,14 @@ program *decode(bytecode **bc_arr, size_t n, aint *globals) { decode_ctx ctx; decode_ctx_init(&ctx, bc_arr[i], st, ffi, &ext_globals, globals, total_globals); - insn *code = decode_internal(&ctx); - if (!code) { + if (!decode_internal(&ctx)) { fprintf(stderr, "Failed to decode %s\n", bc_arr[i]->name); free(ctx.bc_to_insn_map); goto cleanup; } dec_arr[i] = (decoded){ - .code = code, + .code = ctx.code.data, .code_len = ctx.code.len, .bc_to_insn_map = ctx.bc_to_insn_map, .relocs = ctx.relocs.data, From d468a23f824bad6f000c38a37f9921475e4b40e3 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 18:25:52 +0300 Subject: [PATCH 062/141] memset 0 gc stack range --- virtual_machine/ops.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index ee44517ee..73d6cb25c 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -465,7 +465,9 @@ void op_begin(DECL_STATE) { STACK_PUSH(sp, 0); } - __gc_stack_top = (size_t)(sp - max_depth); + aint *offset = sp - max_depth; + memset(offset + 1, 0, max_depth * sizeof(aint)); + __gc_stack_top = (size_t)offset; DISPATCH(); } From e9fdb5d4da4ba64dc609341a5875c012b99e2b9b Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 18:40:37 +0300 Subject: [PATCH 063/141] stdout to stderr --- virtual_machine/lama.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtual_machine/lama.c b/virtual_machine/lama.c index 1cec60cdb..2f72b576b 100644 --- a/virtual_machine/lama.c +++ b/virtual_machine/lama.c @@ -51,7 +51,7 @@ int main(int argc, char *argv[]) { } break; default: - print_usage(stdout, argv[0]); + print_usage(stderr, argv[0]); return 1; } } From f1f241a541f20f7688e34ce0e0405eb4f574be41 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 18:52:30 +0300 Subject: [PATCH 064/141] better `README.md` --- virtual_machine/README.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/virtual_machine/README.md b/virtual_machine/README.md index e7416b987..3dbb459d7 100644 --- a/virtual_machine/README.md +++ b/virtual_machine/README.md @@ -24,7 +24,7 @@ The VM is tightly integrated with the Lama runtime (`../runtime/`). It relies on ## Bytecode format ### Layout -Bytes are laid out in little-endian order. +Multi-byte integers are laid out in little-endian order. 1. Header (16 bytes) 2. String table (variable) 3. Imports (number of imports * 4 bytes) @@ -32,12 +32,11 @@ Bytes are laid out in little-endian order. 5. Code section (until 0xFF) ### Header -| offset | size | field | -|--------|------|-------| -| 0 | 4 | string table size | -| 4 | 4 | globals count | -| 8 | 4 | number of imports | -| 12 | 4 | number of public symbols | +The header is 16 bytes: +* `string_table_size` (int32): size of the string table +* `globals_count` (int32): number of global slots (stored on the stack) +* `imports_count` (int32): number of imports +* `public_symbols_count` (int32): number of public symbols ### Imports Each entry is 4 bytes: From e461bfe488959b3583c9e5fb6335c38d281c0aa1 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 19:30:25 +0300 Subject: [PATCH 065/141] lift branching --- virtual_machine/loader.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c index 72f11df44..69ad74e5c 100644 --- a/virtual_machine/loader.c +++ b/virtual_machine/loader.c @@ -77,23 +77,10 @@ static char *extract_unit_name(const char *filename) { /* * Load a single unit and its dependencies recursively. */ -static bool load_unit_recursive(bytecode_array *units, const char *s, +static bool load_unit_recursive(bytecode_array *units, const char *unit_name, + const char *filepath, const search_paths *paths) { - char *filepath = NULL; - char *unit_name = NULL; - - // The initial call uses a filepath, recursive calls use unit names - if (is_filepath(s)) { - filepath = ESTRDUP(s); - unit_name = extract_unit_name(s); - } else { - filepath = build_unit_path(s, paths); - unit_name = ESTRDUP(s); - } - if (find_loaded(units, unit_name)) { - free(filepath); - free(unit_name); return true; } @@ -101,11 +88,9 @@ static bool load_unit_recursive(bytecode_array *units, const char *s, if (!bc) { fprintf(stderr, "Failed to load dependency '%s' from '%s'\n", unit_name, filepath); - free(filepath); - free(unit_name); return false; } - bc->name = unit_name; + bc->name = ESTRDUP(unit_name); // Recursively load dependencies first (topological order) const char *import_name; @@ -118,11 +103,12 @@ static bool load_unit_recursive(bytecode_array *units, const char *s, continue; } - load_unit_recursive(units, import_name, paths); + char *dep_path = build_unit_path(import_name, paths); + load_unit_recursive(units, import_name, dep_path, paths); + free(dep_path); } da_append(*units, bc); - free(filepath); return true; } @@ -130,7 +116,19 @@ load_result load(const char *main_unit_path, const search_paths *paths) { bytecode_array m; da_init(m); - load_unit_recursive(&m, main_unit_path, paths); + char *filepath; + char *unit_name; + if (is_filepath(main_unit_path)) { + filepath = ESTRDUP(main_unit_path); + unit_name = extract_unit_name(main_unit_path); + } else { + filepath = build_unit_path(main_unit_path, paths); + unit_name = ESTRDUP(main_unit_path); + } + + load_unit_recursive(&m, unit_name, filepath, paths); + free(filepath); + free(unit_name); load_result result = { .units = m.data, From 92ad137c11e0829beb44651301c385b0a187487b Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 19:33:15 +0300 Subject: [PATCH 066/141] better `is_filepath` --- virtual_machine/loader.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c index 69ad74e5c..7d6ad1241 100644 --- a/virtual_machine/loader.c +++ b/virtual_machine/loader.c @@ -43,9 +43,10 @@ static char *build_unit_path(const char *unit_name, const search_paths *paths) { * Check if a string looks like a file path (contains '/' or ends with '.bc') */ static bool is_filepath(const char *str) { + if (strchr(str, '/') != NULL) + return true; size_t len = strlen(str); - return strchr(str, '/') != NULL || - (len > 3 && strcmp(str + len - 3, ".bc") == 0); + return len > 3 && strcmp(str + len - 3, ".bc") == 0; } static bool find_loaded(bytecode_array *units, const char *name) { From 716c1c854016cd80c4fbe46e943a4b84e1996ac5 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 19:42:40 +0300 Subject: [PATCH 067/141] prefer static array for path --- virtual_machine/loader.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c index 7d6ad1241..d4966a2bf 100644 --- a/virtual_machine/loader.c +++ b/virtual_machine/loader.c @@ -25,8 +25,9 @@ typedef struct { /* * Build the path to a unit's .bc file by searching through paths. */ -static char *build_unit_path(const char *unit_name, const search_paths *paths) { - char *path = ALLOC_ARRAY(char, MAX_PATH_LEN); +static const char *build_unit_path(const char *unit_name, + const search_paths *paths) { + static char path[MAX_PATH_LEN]; for (size_t i = 0; i < paths->len; i++) { snprintf(path, MAX_PATH_LEN, "%s/%s.bc", paths->paths[i], unit_name); @@ -35,7 +36,6 @@ static char *build_unit_path(const char *unit_name, const search_paths *paths) { } } - free(path); return NULL; } @@ -104,9 +104,8 @@ static bool load_unit_recursive(bytecode_array *units, const char *unit_name, continue; } - char *dep_path = build_unit_path(import_name, paths); + const char *dep_path = build_unit_path(import_name, paths); load_unit_recursive(units, import_name, dep_path, paths); - free(dep_path); } da_append(*units, bc); @@ -117,10 +116,10 @@ load_result load(const char *main_unit_path, const search_paths *paths) { bytecode_array m; da_init(m); - char *filepath; + const char *filepath; char *unit_name; if (is_filepath(main_unit_path)) { - filepath = ESTRDUP(main_unit_path); + filepath = main_unit_path; unit_name = extract_unit_name(main_unit_path); } else { filepath = build_unit_path(main_unit_path, paths); @@ -128,7 +127,6 @@ load_result load(const char *main_unit_path, const search_paths *paths) { } load_unit_recursive(&m, unit_name, filepath, paths); - free(filepath); free(unit_name); load_result result = { From c35e3d3b28b483833c813f2c6e91494efe720015 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 20:13:17 +0300 Subject: [PATCH 068/141] better --- virtual_machine/vm.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index af826e9c1..dcd6ba278 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -45,18 +45,18 @@ virtual_machine *vm_create(const char *main_unit_path, const char **paths, vm->bc_len = lr.units_len; vm->stack_size = 8 * 1024 * 1024; - vm->stack_base = mmap(NULL, vm->stack_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0); - if (vm->stack_base == MAP_FAILED) { + void *mmap_base = mmap(NULL, vm->stack_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0); + if (mmap_base == MAP_FAILED) { perror("mmap stack"); vm_destroy(vm); return NULL; } + vm->stack_base = (char *)mmap_base + vm->stack_size; // Compute total globals and place at the top of the stack vm->total_globals = count_globals(lr.units, lr.units_len); - aint *stack_top = (aint *)((char *)vm->stack_base + vm->stack_size); - vm->globals = stack_top - vm->total_globals; + vm->globals = (aint *)vm->stack_base - vm->total_globals; memset(vm->globals, 0, vm->total_globals * sizeof(aint)); program *prog = decode(lr.units, lr.units_len, vm->globals); @@ -89,7 +89,7 @@ void vm_destroy(virtual_machine *vm) { free(vm->code); free(vm->entry_points); if (vm->stack_base && vm->stack_base != MAP_FAILED) { - munmap(vm->stack_base, vm->stack_size); + munmap((char *)vm->stack_base - vm->stack_size, vm->stack_size); } free(vm); } @@ -100,11 +100,10 @@ void vm_set_args(virtual_machine *vm, int argc, char *argv[]) { } aint vm_run(virtual_machine *vm) { - aint *stack_top = (aint *)((char *)vm->stack_base + vm->stack_size); aint *sp = vm->globals - 1; __gc_stack_top = (size_t)sp; - __gc_stack_bottom = (size_t)stack_top; + __gc_stack_bottom = (size_t)vm->stack_base; aint *bp; aint ret_val = 0; From d9339e57b662851ee155329ac87bf3cd81cec611 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 20:18:49 +0300 Subject: [PATCH 069/141] move `count_globals` to `bytecode.c` --- virtual_machine/bytecode.c | 8 ++++++++ virtual_machine/bytecode.h | 2 ++ virtual_machine/converter.c | 8 -------- virtual_machine/converter.h | 2 -- virtual_machine/vm.c | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index 13f1f1d0c..e82016a1f 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -118,6 +118,14 @@ bool bytecode_imports_next(bytecode_iterator *it, const char **out_name) { return true; } +size_t bytecode_count_globals(bytecode **bc_arr, size_t n) { + size_t total = 0; + for (size_t i = 0; i < n; i++) { + total += bc_arr[i]->globals_count; + } + return total; +} + void bytecode_free(bytecode *bc) { if (!bc) { return; diff --git a/virtual_machine/bytecode.h b/virtual_machine/bytecode.h index 04a3fd049..4f4b57808 100644 --- a/virtual_machine/bytecode.h +++ b/virtual_machine/bytecode.h @@ -49,6 +49,8 @@ typedef struct { size_t curr; } bytecode_iterator; +size_t bytecode_count_globals(bytecode **bc_arr, size_t n); + void bytecode_pubs_init(bytecode_iterator *iter, const bytecode *bc); bool bytecode_pubs_next(bytecode_iterator *iter, public_symbol *out); diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index b043cf4ee..2d6a55032 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -1002,14 +1002,6 @@ static program *link_program(decoded *dec_arr, size_t n, size_t total_code_len, return prog; } -size_t count_globals(bytecode **bc_arr, size_t n) { - size_t total = 0; - for (size_t i = 0; i < n; i++) { - total += bc_arr[i]->globals_count; - } - return total; -} - program *decode(bytecode **bc_arr, size_t n, aint *globals) { symbol_table *st = symbol_table_create(); ffi_call_table *ffi = ffi_call_table_create(); diff --git a/virtual_machine/converter.h b/virtual_machine/converter.h index aedc40a4f..5f493ec03 100644 --- a/virtual_machine/converter.h +++ b/virtual_machine/converter.h @@ -17,8 +17,6 @@ typedef struct { size_t ffi_len; } program; -size_t count_globals(bytecode **bc_arr, size_t n); - program *decode(bytecode **bc_arr, size_t n, aint *globals); void program_free(program *prog); diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index dcd6ba278..319c566c4 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -55,7 +55,7 @@ virtual_machine *vm_create(const char *main_unit_path, const char **paths, vm->stack_base = (char *)mmap_base + vm->stack_size; // Compute total globals and place at the top of the stack - vm->total_globals = count_globals(lr.units, lr.units_len); + vm->total_globals = bytecode_count_globals(lr.units, lr.units_len); vm->globals = (aint *)vm->stack_base - vm->total_globals; memset(vm->globals, 0, vm->total_globals * sizeof(aint)); From aa84c241423adcb6329159e14cb97147717fd0ba Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 21:52:38 +0300 Subject: [PATCH 070/141] Revert "track external globals for gc" This reverts commit 8b48195e69a12bb116fe08cd4da9eb45d6208908. --- virtual_machine/converter.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 2d6a55032..63c5906f0 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -1,5 +1,4 @@ #include "converter.h" -#include "../runtime/gc.h" #include "bytecode.h" #include "da.h" #include "debug.h" @@ -159,8 +158,6 @@ static void *resolve_ext_global(ext_global_cache *cache, const char *name) { return NULL; } - push_extra_root((void **)ptr); - ext_global_entry entry = {.name = name, .ptr = ptr}; da_append(cache->entries, entry); return ptr; From 6c78ce2370cc531e6efa52223ea096c55d8f077f Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 21:57:38 +0300 Subject: [PATCH 071/141] dedup `emit_st_glo` and `emit_ld_glo` --- virtual_machine/converter.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 63c5906f0..93c591264 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -216,26 +216,14 @@ static bool emit_ext_glo(decode_ctx *ctx, const char *glob_name, fn op) { return true; } -static bool emit_ld_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { +static bool emit_glo(decode_ctx *ctx, int32_t idx, size_t global_base, fn op) { if (IS_EXT_REF(idx)) { int str_offset = EXT_REF_INDEX(idx); const char *glob_name = bytecode_get_string(ctx->bc, str_offset); - VM_DEBUG("DECODE: OP_LD external global '%s'\n", glob_name); - return emit_ext_glo(ctx, glob_name, op_ld_glo); + VM_DEBUG("DECODE: external global '%s'\n", glob_name); + return emit_ext_glo(ctx, glob_name, op); } - EMIT_FUNC(ctx, op_ld_glo); - EMIT_GLOBAL_PTR(ctx, &ctx->globals[global_base + idx]); - return true; -} - -static bool emit_st_glo(decode_ctx *ctx, int32_t idx, size_t global_base) { - if (IS_EXT_REF(idx)) { - int str_offset = EXT_REF_INDEX(idx); - const char *glob_name = bytecode_get_string(ctx->bc, str_offset); - VM_DEBUG("DECODE: OP_ST external global '%s'\n", glob_name); - return emit_ext_glo(ctx, glob_name, op_st_glo); - } - EMIT_FUNC(ctx, op_st_glo); + EMIT_FUNC(ctx, op); EMIT_GLOBAL_PTR(ctx, &ctx->globals[global_base + idx]); return true; } @@ -517,13 +505,15 @@ static bool decode_internal(decode_ctx *ctx) { case OP_LD: { DEPTH_PUSH(ctx->sv); int32_t idx = reader_i32(&ctx->reader); - emit_ld_glo(ctx, idx, global_base); + VM_DEBUG("DECODE: OP_LD global idx=%d\n", idx); + emit_glo(ctx, idx, global_base, op_ld_glo); break; } case OP_ST: { int32_t idx = reader_i32(&ctx->reader); - emit_st_glo(ctx, idx, global_base); + VM_DEBUG("DECODE: OP_ST global idx=%d\n", idx); + emit_glo(ctx, idx, global_base, op_st_glo); break; } @@ -692,7 +682,7 @@ static bool decode_internal(decode_ctx *ctx) { switch (designation_type) { case 0: // Global DEPTH_PUSH(ctx->sv); - emit_ld_glo(ctx, idx, global_base); + emit_glo(ctx, idx, global_base, op_ld_glo); break; case 1: // Local DEPTH_PUSH(ctx->sv); From c4fa9c54861e797396421c6faa16e8a6b4b558c9 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 22:40:20 +0300 Subject: [PATCH 072/141] move `stack_validation.h` to `converter.c` --- virtual_machine/converter.c | 55 +++++++++++++++++++++++++++- virtual_machine/stack_validation.h | 57 ------------------------------ 2 files changed, 54 insertions(+), 58 deletions(-) delete mode 100644 virtual_machine/stack_validation.h diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 93c591264..30bea61e5 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -6,7 +6,6 @@ #include "memory.h" #include "opcodes.h" #include "ops.h" -#include "stack_validation.h" #include "symbols.h" #include #include @@ -89,6 +88,33 @@ typedef struct { } entries; } ext_global_cache; +/* + * Different states of reachability for stack validation: + * LIVE: currently decoding sequentially, reachable from previous instruction + * BARRIER: just emitted JMP or END, so next instruction is reachable but not + * from previous instruction + * DEAD: not reachable from previous instruction + */ +typedef enum { LIVE, BARRIER, DEAD } reach_state; + +typedef struct { + int32_t max_depth; // max stack depth of the function + size_t max_depth_pos; // position in code array where max_depth is emitted + // (for patching) +} func_frame; + +typedef struct { + int32_t depth; + reach_state state; + int32_t max_depth; + size_t max_depth_pos; + struct { + func_frame *data; + size_t len; + size_t cap; + } func_stack; +} stack_validation; + typedef struct { const bytecode *bc; @@ -293,6 +319,28 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, return true; } +#define DEPTH_INC(sv, n) \ + do { \ + if ((sv).state != DEAD) { \ + VM_DEBUG(" DEPTH: %d -> %d (+%d)\n", (sv).depth, (sv).depth + (n), \ + (n)); \ + (sv).depth += (n); \ + if ((sv).depth > (sv).max_depth) \ + (sv).max_depth = (sv).depth; \ + } \ + } while (0) +#define DEPTH_DEC(sv, n) \ + do { \ + if ((sv).state != DEAD) { \ + VM_DEBUG(" DEPTH: %d -> %d (-%d)\n", (sv).depth, (sv).depth - (n), \ + (n)); \ + (sv).depth -= (n); \ + assert((sv).depth >= 0 && "stack underflow"); \ + } \ + } while (0) +#define DEPTH_PUSH(sv) DEPTH_INC(sv, 1) +#define DEPTH_POP(sv) DEPTH_DEC(sv, 1) + static bool decode_internal(decode_ctx *ctx) { const bytecode *bc = ctx->bc; @@ -887,6 +935,11 @@ static bool decode_internal(decode_ctx *ctx) { return ok; } +#undef DEPTH_INC +#undef DEPTH_DEC +#undef DEPTH_PUSH +#undef DEPTH_POP + static void register_public_symbols(symbol_table *st, const bytecode *bc, size_t code_offset, size_t global_base, const int32_t *bc_to_insn_map) { diff --git a/virtual_machine/stack_validation.h b/virtual_machine/stack_validation.h deleted file mode 100644 index 23a2e287d..000000000 --- a/virtual_machine/stack_validation.h +++ /dev/null @@ -1,57 +0,0 @@ -#ifndef STACK_VALIDATION_H -#define STACK_VALIDATION_H - -#include "debug.h" -#include -#include - -/* - * Different states of reachability for stack validation: - * LIVE: currently decoding sequentially, reachable from previous instruction - * BARRIER: just emitted JMP or END, so next instruction is reachable but not - * from previous instruction - * DEAD: not reachable from previous instruction - */ -typedef enum { LIVE, BARRIER, DEAD } reach_state; - -typedef struct { - int32_t max_depth; // max stack depth of the function - size_t max_depth_pos; // position in code array where max_depth is emitted - // (for patching) -} func_frame; - -typedef struct { - int32_t depth; - reach_state state; - int32_t max_depth; - size_t max_depth_pos; - struct { - func_frame *data; - size_t len; - size_t cap; - } func_stack; -} stack_validation; - -#define DEPTH_INC(sv, n) \ - do { \ - if ((sv).state != DEAD) { \ - VM_DEBUG(" DEPTH: %d -> %d (+%d)\n", (sv).depth, (sv).depth + (n), \ - (n)); \ - (sv).depth += (n); \ - if ((sv).depth > (sv).max_depth) \ - (sv).max_depth = (sv).depth; \ - } \ - } while (0) -#define DEPTH_DEC(sv, n) \ - do { \ - if ((sv).state != DEAD) { \ - VM_DEBUG(" DEPTH: %d -> %d (-%d)\n", (sv).depth, (sv).depth - (n), \ - (n)); \ - (sv).depth -= (n); \ - assert((sv).depth >= 0 && "stack underflow"); \ - } \ - } while (0) -#define DEPTH_PUSH(sv) DEPTH_INC(sv, 1) -#define DEPTH_POP(sv) DEPTH_DEC(sv, 1) - -#endif // STACK_VALIDATION_H From c5ab0098850e63f9f64119d5c7590917835661c0 Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 22:47:16 +0300 Subject: [PATCH 073/141] move `sv` init to `decode_ctx_init` --- virtual_machine/converter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 30bea61e5..44a410b75 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -159,6 +159,10 @@ static void decode_ctx_init(decode_ctx *ctx, const bytecode *bc, ctx->ffi = ffi; ctx->ext_globals = ext_globals; + ctx->sv = (stack_validation){ + .depth = 0, .state = LIVE, .max_depth = 0, .max_depth_pos = 0}; + da_init(ctx->sv.func_stack); + reader_init(&ctx->reader, bc->code, bc->code_size); } @@ -355,10 +359,6 @@ static bool decode_internal(decode_ctx *ctx) { meta[i].fixups = NULL; } - ctx->sv = (stack_validation){ - .depth = 0, .state = LIVE, .max_depth = 0, .max_depth_pos = 0}; - da_init(ctx->sv.func_stack); - EMIT_FUNC(ctx, op_init); EMIT_NUM(ctx, 0); // placeholder for op_eof From fd3b4b53b911c0194797f65100c0f7c09eab284b Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 22:52:51 +0300 Subject: [PATCH 074/141] fix stack validation by not merging all operations --- virtual_machine/converter.c | 84 ++++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 24 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 44a410b75..67032cb5a 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -435,67 +435,80 @@ static bool decode_internal(decode_ctx *ctx) { break; case OP_BINOP_ADD: - DEPTH_POP(ctx->sv); + DEPTH_DEC(ctx->sv, 2); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_add); break; case OP_BINOP_SUB: - DEPTH_POP(ctx->sv); + DEPTH_DEC(ctx->sv, 2); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_sub); break; case OP_BINOP_MUL: - DEPTH_POP(ctx->sv); + DEPTH_DEC(ctx->sv, 2); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_mul); break; case OP_BINOP_DIV: - DEPTH_POP(ctx->sv); + DEPTH_DEC(ctx->sv, 2); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_div); break; case OP_BINOP_MOD: - DEPTH_POP(ctx->sv); + DEPTH_DEC(ctx->sv, 2); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_mod); break; case OP_BINOP_LT: - DEPTH_POP(ctx->sv); + DEPTH_DEC(ctx->sv, 2); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_lt); break; case OP_BINOP_LE: - DEPTH_POP(ctx->sv); + DEPTH_DEC(ctx->sv, 2); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_le); break; case OP_BINOP_GT: - DEPTH_POP(ctx->sv); + DEPTH_DEC(ctx->sv, 2); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_gt); break; case OP_BINOP_GE: - DEPTH_POP(ctx->sv); + DEPTH_DEC(ctx->sv, 2); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_ge); break; case OP_BINOP_EQ: - DEPTH_POP(ctx->sv); + DEPTH_DEC(ctx->sv, 2); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_eq); break; case OP_BINOP_NE: - DEPTH_POP(ctx->sv); + DEPTH_DEC(ctx->sv, 2); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_ne); break; case OP_BINOP_AND: - DEPTH_POP(ctx->sv); + DEPTH_DEC(ctx->sv, 2); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_and); break; case OP_BINOP_OR: - DEPTH_POP(ctx->sv); + DEPTH_DEC(ctx->sv, 2); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_or); break; @@ -536,17 +549,21 @@ static bool decode_internal(decode_ctx *ctx) { break; case OP_SWAP: + DEPTH_DEC(ctx->sv, 2); + DEPTH_INC(ctx->sv, 2); EMIT_FUNC(ctx, op_swap); break; case OP_ELEM: - DEPTH_POP(ctx->sv); + DEPTH_DEC(ctx->sv, 2); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_elem); break; case OP_STA: // TODO: - DEPTH_DEC(ctx->sv, 2); + DEPTH_DEC(ctx->sv, 3); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_sta); break; @@ -620,8 +637,8 @@ static bool decode_internal(decode_ctx *ctx) { case OP_BARRAY: { int32_t n = reader_i32(&ctx->reader); - // push array, pop elements == n - 1 net stack change - DEPTH_DEC(ctx->sv, n - 1); + DEPTH_DEC(ctx->sv, n); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_barray); EMIT_NUM(ctx, n); break; @@ -630,8 +647,8 @@ static bool decode_internal(decode_ctx *ctx) { case OP_SEXP: { int32_t tag_idx = reader_i32(&ctx->reader); int32_t n_fields = reader_i32(&ctx->reader); - // push sexp, pop elements == n_fields - 1 net stack change - DEPTH_DEC(ctx->sv, n_fields - 1); + DEPTH_DEC(ctx->sv, n_fields); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_sexp); EMIT_STR(ctx, bytecode_get_string(bc, tag_idx)); EMIT_NUM(ctx, n_fields); @@ -639,6 +656,8 @@ static bool decode_internal(decode_ctx *ctx) { } case OP_TAG: { + DEPTH_POP(ctx->sv); + DEPTH_PUSH(ctx->sv); int32_t tag_idx = reader_i32(&ctx->reader); int32_t n_fields = reader_i32(&ctx->reader); EMIT_FUNC(ctx, op_tag); @@ -648,6 +667,8 @@ static bool decode_internal(decode_ctx *ctx) { } case OP_ARRAY: { + DEPTH_POP(ctx->sv); + DEPTH_PUSH(ctx->sv); int32_t n = reader_i32(&ctx->reader); EMIT_FUNC(ctx, op_array); EMIT_NUM(ctx, n); @@ -665,31 +686,44 @@ static bool decode_internal(decode_ctx *ctx) { } case OP_PATT_STR_CMP: - DEPTH_POP(ctx->sv); + DEPTH_DEC(ctx->sv, 2); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_patt_str_cmp); break; case OP_PATT_STRING: + DEPTH_POP(ctx->sv); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_patt_string); break; case OP_PATT_ARRAY: + DEPTH_POP(ctx->sv); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_patt_array); break; case OP_PATT_SEXP: + DEPTH_POP(ctx->sv); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_patt_sexp); break; case OP_PATT_BOXED: + DEPTH_POP(ctx->sv); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_patt_boxed); break; case OP_PATT_UNBOXED: + DEPTH_POP(ctx->sv); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_patt_unboxed); break; case OP_PATT_CLOSURE: + DEPTH_POP(ctx->sv); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_patt_closure); break; @@ -753,7 +787,8 @@ static bool decode_internal(decode_ctx *ctx) { } } - DEPTH_DEC(ctx->sv, n_captured - 1); + DEPTH_DEC(ctx->sv, n_captured); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_closure); @@ -805,8 +840,8 @@ static bool decode_internal(decode_ctx *ctx) { case OP_CALL: { int32_t target_off = reader_i32(&ctx->reader); int32_t n_args = reader_i32(&ctx->reader); - // consume n_args, produce 1 result = net -(n_args - 1) - DEPTH_DEC(ctx->sv, n_args - 1); + DEPTH_DEC(ctx->sv, n_args); + DEPTH_PUSH(ctx->sv); VM_DEBUG("DECODE: OP_CALL target_off=0x%x n_args=%d " "current_bc_off=%zu code_idx=%zu\n", @@ -861,7 +896,8 @@ static bool decode_internal(decode_ctx *ctx) { case OP_CALLC: { int32_t n_args = reader_i32(&ctx->reader); - DEPTH_DEC(ctx->sv, n_args); + DEPTH_DEC(ctx->sv, n_args + 1); + DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_callc); EMIT_NUM(ctx, n_args); break; From 80e5008cc65b2b1603e934ec8cd520a9dbe03caf Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 3 Apr 2026 23:36:38 +0300 Subject: [PATCH 075/141] remove `func_stack` --- virtual_machine/converter.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 67032cb5a..004cf9ccf 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -97,22 +97,11 @@ typedef struct { */ typedef enum { LIVE, BARRIER, DEAD } reach_state; -typedef struct { - int32_t max_depth; // max stack depth of the function - size_t max_depth_pos; // position in code array where max_depth is emitted - // (for patching) -} func_frame; - typedef struct { int32_t depth; reach_state state; int32_t max_depth; size_t max_depth_pos; - struct { - func_frame *data; - size_t len; - size_t cap; - } func_stack; } stack_validation; typedef struct { @@ -161,7 +150,6 @@ static void decode_ctx_init(decode_ctx *ctx, const bytecode *bc, ctx->sv = (stack_validation){ .depth = 0, .state = LIVE, .max_depth = 0, .max_depth_pos = 0}; - da_init(ctx->sv.func_stack); reader_init(&ctx->reader, bc->code, bc->code_size); } @@ -732,10 +720,6 @@ static bool decode_internal(decode_ctx *ctx) { int32_t n_args = reader_i32(&ctx->reader); int32_t n_locals = reader_i32(&ctx->reader); ctx->sv.depth = 0; - // Save outer function's max_depth - func_frame frame = {.max_depth = ctx->sv.max_depth, - .max_depth_pos = ctx->sv.max_depth_pos}; - da_append(ctx->sv.func_stack, frame); ctx->sv.max_depth = 0; EMIT_FUNC(ctx, op_begin); EMIT_NUM(ctx, n_args); @@ -915,12 +899,6 @@ static bool decode_internal(decode_ctx *ctx) { ctx->code.data[ctx->sv.max_depth_pos].num = ctx->sv.max_depth; ctx->sv.state = BARRIER; } - assert(ctx->sv.func_stack.len > 0); - ctx->sv.max_depth = - ctx->sv.func_stack.data[ctx->sv.func_stack.len - 1].max_depth; - ctx->sv.max_depth_pos = - ctx->sv.func_stack.data[ctx->sv.func_stack.len - 1].max_depth_pos; - ctx->sv.func_stack.len--; break; case OP_LINE: { @@ -956,7 +934,6 @@ static bool decode_internal(decode_ctx *ctx) { ok = true; cleanup: - da_free(ctx->sv.func_stack); // Free temporary metadata and fixup nodes for (size_t i = 0; i < bc->code_size; i++) { fixup_node *node = meta[i].fixups; From 5962987e8548466573cbf01e2133fad44bcf5978 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 5 Apr 2026 19:52:49 +0300 Subject: [PATCH 076/141] hash tag during decoding --- virtual_machine/converter.c | 7 +++++-- virtual_machine/insn.h | 1 + virtual_machine/ops.c | 13 +++++-------- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 004cf9ccf..ccc07042a 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -15,6 +15,8 @@ #include #include +extern aint LtagHash(char *s); + /* * Sentinel value for external references (both functions and globals). * Address = -index - 1, so index 0 becomes -1, index 1 becomes -2, etc. @@ -30,6 +32,7 @@ */ #define EMIT_FUNC(ctx, f) da_append((ctx)->code, ((insn){.func = (f)})) #define EMIT_NUM(ctx, n) da_append((ctx)->code, ((insn){.num = (n)})) +#define EMIT_ANUM(ctx, n) da_append((ctx)->code, ((insn){.anum = (n)})) #define EMIT_STR(ctx, s) da_append((ctx)->code, ((insn){.str = (s)})) #define EMIT_TARGET(ctx, t) da_append((ctx)->code, ((insn){.target = (t)})) #define EMIT_GLOBAL_PTR(ctx, p) \ @@ -638,7 +641,7 @@ static bool decode_internal(decode_ctx *ctx) { DEPTH_DEC(ctx->sv, n_fields); DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_sexp); - EMIT_STR(ctx, bytecode_get_string(bc, tag_idx)); + EMIT_ANUM(ctx, LtagHash((char *)bytecode_get_string(bc, tag_idx))); EMIT_NUM(ctx, n_fields); break; } @@ -649,7 +652,7 @@ static bool decode_internal(decode_ctx *ctx) { int32_t tag_idx = reader_i32(&ctx->reader); int32_t n_fields = reader_i32(&ctx->reader); EMIT_FUNC(ctx, op_tag); - EMIT_STR(ctx, bytecode_get_string(bc, tag_idx)); + EMIT_ANUM(ctx, LtagHash((char *)bytecode_get_string(bc, tag_idx))); EMIT_NUM(ctx, n_fields); break; } diff --git a/virtual_machine/insn.h b/virtual_machine/insn.h index 74233aeb4..c665c3082 100644 --- a/virtual_machine/insn.h +++ b/virtual_machine/insn.h @@ -20,6 +20,7 @@ typedef void (*fn)(DECL_STATE); typedef union insn { fn func; // Pointer to function int32_t num; // Integer operand (signed) + aint anum; // Runtime value operand const char *str; // String operand (direct pointer) union insn *target; // Direct jump target (pointer to insn) aint *global_ptr; // Pointer to a C global variable diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 73d6cb25c..5a0a1afe4 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -242,13 +242,11 @@ void op_barray(DECL_STATE) { void op_sexp(DECL_STATE) { ip++; - const char *tag_str = ip->str; + aint tag_hash = ip->anum; ip++; int32_t n_fields = ip->num; - aint tag_hash = LtagHash((char *)tag_str); - VM_DEBUG("SEXP: tag=\"%s\" (hash=0x%lx), n_fields=%d\n", tag_str, tag_hash, - n_fields); + VM_DEBUG("SEXP: tag_hash=0x%lx, n_fields=%d\n", tag_hash, n_fields); // Use the free slot at sp for tag_hash, reverse the whole range in-place *sp = tag_hash; STACK_REVERSE(sp, n_fields + 1); @@ -262,14 +260,13 @@ void op_sexp(DECL_STATE) { void op_tag(DECL_STATE) { ip++; - const char *tag_str = ip->str; + aint tag_hash = ip->anum; ip++; int32_t n_fields = ip->num; - aint tag_hash = LtagHash((char *)tag_str); aint val = STACK_POP(sp); - VM_DEBUG("TAG: tag='%s' hash=0x%lx n_fields=%d val=0x%lx\n", tag_str, - (long)tag_hash, n_fields, (long)val); + VM_DEBUG("TAG: tag_hash=0x%lx n_fields=%d val=0x%lx\n", (long)tag_hash, + n_fields, (long)val); aint result = Btag((void *)val, tag_hash, BOX(n_fields)); VM_DEBUG("TAG: result=%ld\n", (long)UNBOX(result)); STACK_PUSH(sp, result); From 9aef730237f898359cbc346394aed6c9940bba45 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 5 Apr 2026 21:08:34 +0300 Subject: [PATCH 077/141] validate `ld/st` operations --- virtual_machine/converter.c | 70 +++++++++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 2 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index ccc07042a..62451eba3 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -39,6 +39,15 @@ extern aint LtagHash(char *s); da_append((ctx)->code, ((insn){.global_ptr = (p)})) #define EMIT_PTR(ctx, p) da_append((ctx)->code, ((insn){.ptr = (p)})) +#define CHECK_IDX(idx, limit, name) \ + do { \ + if ((idx) < 0 || (idx) >= (limit)) { \ + fprintf(stderr, "%s: index %d >= %d at bc_off=%zu\n", name, (int)(idx), \ + (int)(limit), current_bc_off); \ + goto cleanup; \ + } \ + } while (0) + #define FFI_STUB_SIZE 2 typedef enum { @@ -66,6 +75,12 @@ typedef struct { fixup_node *fixups; // Linked list of forward jumps pointing here } meta_info; +// Maps CLOSURE target bytecode offsets to their n_captured +typedef struct { + int32_t bc_off; + int32_t n_captured; +} closure_info; + typedef struct { insn *code; size_t code_len; @@ -107,6 +122,12 @@ typedef struct { size_t max_depth_pos; } stack_validation; +typedef struct { + int32_t n_locals; + int32_t n_args; + int32_t n_captured; // 0 for BEGIN, >0 for BEGIN_CLOSURE +} func_ctx; + typedef struct { const bytecode *bc; @@ -132,6 +153,14 @@ typedef struct { ext_global_cache *ext_globals; stack_validation sv; + + struct { + closure_info *data; + size_t len; + size_t cap; + } closures; // CLOSURE target_off -> n_captured mapping + + func_ctx func; } decode_ctx; static void decode_ctx_init(decode_ctx *ctx, const bytecode *bc, @@ -146,17 +175,30 @@ static void decode_ctx_init(decode_ctx *ctx, const bytecode *bc, da_init(ctx->code); da_init(ctx->relocs); + da_init(ctx->closures); ctx->st = st; ctx->ffi = ffi; ctx->ext_globals = ext_globals; - ctx->sv = (stack_validation){ - .depth = 0, .state = LIVE, .max_depth = 0, .max_depth_pos = 0}; + ctx->sv = (stack_validation){0}; + ctx->func = (func_ctx){0}; reader_init(&ctx->reader, bc->code, bc->code_size); } +/* + * Returns n_captured for a CLOSURE target, or -1 if no CLOSURE targets this + * offset + */ +static int32_t find_n_captured(decode_ctx *ctx, int32_t bc_off) { + for (size_t i = 0; i < ctx->closures.len; i++) { + if (ctx->closures.data[i].bc_off == bc_off) + return ctx->closures.data[i].n_captured; + } + return -1; +} + /* * Resolve an external C global -- prefix with "global_", dlsym, cache. */ @@ -576,6 +618,7 @@ static bool decode_internal(decode_ctx *ctx) { case OP_LD_LOC: { DEPTH_PUSH(ctx->sv); int32_t idx = reader_i32(&ctx->reader); + CHECK_IDX(idx, ctx->func.n_locals, "LD_LOC"); EMIT_FUNC(ctx, op_ld_loc); EMIT_NUM(ctx, idx); break; @@ -583,6 +626,7 @@ static bool decode_internal(decode_ctx *ctx) { case OP_ST_LOC: { int32_t idx = reader_i32(&ctx->reader); + CHECK_IDX(idx, ctx->func.n_locals, "ST_LOC"); EMIT_FUNC(ctx, op_st_loc); EMIT_NUM(ctx, idx); break; @@ -591,6 +635,7 @@ static bool decode_internal(decode_ctx *ctx) { case OP_LD_ARG: { DEPTH_PUSH(ctx->sv); int32_t idx = reader_i32(&ctx->reader); + CHECK_IDX(idx, ctx->func.n_args, "LD_ARG"); EMIT_FUNC(ctx, op_ld_arg); EMIT_NUM(ctx, idx); break; @@ -598,6 +643,7 @@ static bool decode_internal(decode_ctx *ctx) { case OP_ST_ARG: { int32_t idx = reader_i32(&ctx->reader); + CHECK_IDX(idx, ctx->func.n_args, "ST_ARG"); EMIT_FUNC(ctx, op_st_arg); EMIT_NUM(ctx, idx); break; @@ -606,6 +652,8 @@ static bool decode_internal(decode_ctx *ctx) { case OP_LD_CLO: { DEPTH_PUSH(ctx->sv); int32_t idx = reader_i32(&ctx->reader); + if (ctx->func.n_captured != -1) + CHECK_IDX(idx, ctx->func.n_captured, "LD_CLO"); EMIT_FUNC(ctx, op_ld_clo); EMIT_NUM(ctx, idx); break; @@ -613,6 +661,8 @@ static bool decode_internal(decode_ctx *ctx) { case OP_ST_CLO: { int32_t idx = reader_i32(&ctx->reader); + if (ctx->func.n_captured != -1) + CHECK_IDX(idx, ctx->func.n_captured, "ST_CLO"); EMIT_FUNC(ctx, op_st_clo); EMIT_NUM(ctx, idx); break; @@ -724,6 +774,14 @@ static bool decode_internal(decode_ctx *ctx) { int32_t n_locals = reader_i32(&ctx->reader); ctx->sv.depth = 0; ctx->sv.max_depth = 0; + + ctx->func = + (func_ctx){.n_args = n_args, + .n_locals = n_locals, + .n_captured = (opcode == OP_BEGIN_CLOSURE) + ? find_n_captured(ctx, current_bc_off) + : 0}; + EMIT_FUNC(ctx, op_begin); EMIT_NUM(ctx, n_args); EMIT_NUM(ctx, n_locals); @@ -754,16 +812,20 @@ static bool decode_internal(decode_ctx *ctx) { emit_glo(ctx, idx, global_base, op_ld_glo); break; case 1: // Local + CHECK_IDX(idx, ctx->func.n_locals, "CLOSURE desig local"); DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_ld_loc); EMIT_NUM(ctx, idx); break; case 2: // Arg + CHECK_IDX(idx, ctx->func.n_args, "CLOSURE desig arg"); DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_ld_arg); EMIT_NUM(ctx, idx); break; case 3: // Closure var + if (ctx->func.n_captured != -1) + CHECK_IDX(idx, ctx->func.n_captured, "CLOSURE desig closure"); DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_ld_clo); EMIT_NUM(ctx, idx); @@ -810,6 +872,9 @@ static bool decode_internal(decode_ctx *ctx) { EMIT_NUM(ctx, 0); // placeholder — will hold code index EMIT_NUM(ctx, n_captured); + da_append(ctx->closures, ((closure_info){.bc_off = target_off, + .n_captured = n_captured})); + meta_info *tm = &meta[target_off]; if (target_off < (int32_t)current_bc_off) { assert(tm->resolved_idx != -1 && @@ -947,6 +1012,7 @@ static bool decode_internal(decode_ctx *ctx) { } } free(meta); + da_free(ctx->closures); return ok; } From a8dcd39ce070a9982f7e27245ea979cb4ec31d61 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 5 Apr 2026 22:22:35 +0300 Subject: [PATCH 078/141] refactor `op_call` and `op_closure` --- virtual_machine/converter.c | 137 +++++++++++++----------------------- 1 file changed, 49 insertions(+), 88 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 62451eba3..99fc568a2 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -291,6 +291,48 @@ static bool emit_glo(decode_ctx *ctx, int32_t idx, size_t global_base, fn op) { return true; } +/* + * Emit target slot for CALL/CLOSURE, handles external and internal targets. + */ +static bool emit_target(decode_ctx *ctx, meta_info *meta, int32_t target_off, + size_t current_bc_off, const char *opname) { + const bytecode *bc = ctx->bc; + size_t target_slot = ctx->code.len; + + if (IS_EXT_REF(target_off)) { + int str_offset = EXT_REF_INDEX(target_off); + const char *name = bytecode_get_string(bc, str_offset); + VM_DEBUG("DECODE: %s external '%s'\n", opname, name); + + resolved_symbol *sym = symbol_table_find_function(ctx->st, name); + if (sym) { + add_reloc(ctx, target_slot, name, UNIT); + EMIT_NUM( + ctx, + sym->idx); // placeholder, will be resolved to inter-unit function + } else { + size_t idx = ffi_call_table_intern(ctx->ffi, name); + add_reloc(ctx, target_slot, name, FFI); + EMIT_NUM(ctx, idx); // placeholder, will be resolved to FFI call + } + } else { + if (!validate_target_off(bc, target_off, current_bc_off, opname)) + return false; + + EMIT_NUM(ctx, 0); // placeholder — will hold code index + + meta_info *tm = &meta[target_off]; + if (target_off < (int32_t)current_bc_off) { + assert(tm->resolved_idx != -1); + ctx->code.data[target_slot].num = tm->resolved_idx; + add_reloc(ctx, target_slot, NULL, INTERNAL); + } else { + add_fixup(meta, target_off, target_slot); + } + } + return true; +} + /* * Handle jump target resolution (intra-unit only — these are always local) */ @@ -798,8 +840,6 @@ static bool decode_internal(decode_ctx *ctx) { VM_DEBUG("DECODE: OP_CLOSURE target_raw=0x%x n_captured=%d bc_off=%zu\n", target_off, n_captured, current_bc_off); - bool is_external = IS_EXT_REF(target_off); - // Emit load instructions for each captured variable for (int32_t i = 0; i < n_captured; i++) { uint8_t type_byte = reader_u8(&ctx->reader); @@ -840,52 +880,13 @@ static bool decode_internal(decode_ctx *ctx) { DEPTH_PUSH(ctx->sv); EMIT_FUNC(ctx, op_closure); + if (!emit_target(ctx, meta, target_off, current_bc_off, "CLOSURE")) + goto cleanup; + EMIT_NUM(ctx, n_captured); - size_t target_slot = ctx->code.len; - if (is_external) { - int str_offset = EXT_REF_INDEX(target_off); - const char *ext_func_name = bytecode_get_string(bc, str_offset); - - VM_DEBUG("DECODE: OP_CLOSURE external name='%s' (stub)\n", - ext_func_name); - - resolved_symbol *sym = - symbol_table_find_function(ctx->st, ext_func_name); - if (sym) { - add_reloc(ctx, target_slot, ext_func_name, UNIT); - EMIT_NUM( - ctx, - sym->idx); // placeholder, will be resolved to inter-unit function - } else { - size_t idx = ffi_call_table_intern(ctx->ffi, ext_func_name); - add_reloc(ctx, target_slot, ext_func_name, FFI); - EMIT_NUM(ctx, idx); // placeholder, will be resolved to FFI call - } - - EMIT_NUM(ctx, n_captured); - - } else { - if (!validate_target_off(bc, target_off, current_bc_off, "CLOSURE")) { - goto cleanup; - } - - EMIT_NUM(ctx, 0); // placeholder — will hold code index - EMIT_NUM(ctx, n_captured); - + if (!IS_EXT_REF(target_off)) da_append(ctx->closures, ((closure_info){.bc_off = target_off, .n_captured = n_captured})); - - meta_info *tm = &meta[target_off]; - if (target_off < (int32_t)current_bc_off) { - assert(tm->resolved_idx != -1 && - "backward closure target must have been visited"); - - ctx->code.data[target_slot].num = tm->resolved_idx; - add_reloc(ctx, target_slot, NULL, INTERNAL); - } else { - add_fixup(meta, target_off, target_slot); - } - } break; } @@ -898,51 +899,11 @@ static bool decode_internal(decode_ctx *ctx) { VM_DEBUG("DECODE: OP_CALL target_off=0x%x n_args=%d " "current_bc_off=%zu code_idx=%zu\n", target_off, n_args, current_bc_off, ctx->code.len); - bool is_external = IS_EXT_REF(target_off); EMIT_FUNC(ctx, op_call); - - size_t target_slot = ctx->code.len; - if (is_external) { - int str_offset = EXT_REF_INDEX(target_off); - const char *ext_func_name = bytecode_get_string(bc, str_offset); - - VM_DEBUG("DECODE: OP_CALL external '%s' (stub)\n", ext_func_name); - - resolved_symbol *sym = - symbol_table_find_function(ctx->st, ext_func_name); - - if (sym) { - add_reloc(ctx, target_slot, ext_func_name, UNIT); - EMIT_NUM( - ctx, - sym->idx); // placeholder, will be resolved to inter-unit function - } else { - size_t idx = ffi_call_table_intern(ctx->ffi, ext_func_name); - add_reloc(ctx, target_slot, ext_func_name, FFI); - EMIT_NUM(ctx, idx); // placeholder, will be resolved to FFI call - } - EMIT_NUM(ctx, n_args); - - } else { - if (!validate_target_off(bc, (uint32_t)target_off, current_bc_off, - "CALL")) { - goto cleanup; - } - EMIT_NUM(ctx, 0); // placeholder — will hold code index - EMIT_NUM(ctx, n_args); - - meta_info *tm = &meta[target_off]; - if (target_off < (int32_t)current_bc_off) { - assert(tm->resolved_idx != -1 && - "backward call target must have been visited"); - - ctx->code.data[target_slot].num = tm->resolved_idx; - add_reloc(ctx, target_slot, NULL, INTERNAL); - } else { - add_fixup(meta, (uint32_t)target_off, target_slot); - } - } + if (!emit_target(ctx, meta, target_off, current_bc_off, "CALL")) + goto cleanup; + EMIT_NUM(ctx, n_args); break; } From 4486cee836edeacb36b4a004ab285b112c1c2ab1 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 5 Apr 2026 23:42:27 +0300 Subject: [PATCH 079/141] open files in `loader.c` instead of `bytecode.c` --- virtual_machine/bytecode.c | 13 ++++++- virtual_machine/bytecode.h | 1 + virtual_machine/loader.c | 75 +++++++++++++++++++++++--------------- 3 files changed, 57 insertions(+), 32 deletions(-) diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index e82016a1f..9f394d659 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -14,8 +14,7 @@ #define PUB_ENTRY_SIZE 9 #define IMPORT_ENTRY_SIZE 4 -bytecode *bytecode_load(const char *filename) { - int fd = open(filename, O_RDONLY); +bytecode *bytecode_load_fd(int fd) { if (fd < 0) { perror("bytecode_load: open"); return NULL; @@ -80,6 +79,16 @@ bytecode *bytecode_load(const char *filename) { return bc; } +bytecode *bytecode_load(const char *filename) { + int fd = open(filename, O_RDONLY); + if (fd < 0) { + perror("bytecode_load: open"); + return NULL; + } + + return bytecode_load_fd(fd); +} + void bytecode_pubs_init(bytecode_iterator *iter, const bytecode *bc) { reader_init(&iter->reader, bc->pubs, bc->pubs_len * PUB_ENTRY_SIZE); iter->string_table = bc->string_table; diff --git a/virtual_machine/bytecode.h b/virtual_machine/bytecode.h index 4f4b57808..26e806f16 100644 --- a/virtual_machine/bytecode.h +++ b/virtual_machine/bytecode.h @@ -38,6 +38,7 @@ typedef struct { const char *name; } bytecode; +bytecode *bytecode_load_fd(int fd); bytecode *bytecode_load(const char *filename); void bytecode_free(bytecode *bc); diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c index d4966a2bf..d655fa423 100644 --- a/virtual_machine/loader.c +++ b/virtual_machine/loader.c @@ -9,12 +9,12 @@ #include "bytecode.h" #include "da.h" #include "memory.h" +#include #include #include #include #include #include -#include typedef struct { bytecode **data; @@ -22,17 +22,25 @@ typedef struct { size_t cap; } bytecode_array; +static void free_loaded_units(bytecode_array *units) { + for (size_t i = 0; i < units->len; i++) { + bytecode_free(units->data[i]); + } + da_free(*units); +} + /* - * Build the path to a unit's .bc file by searching through paths. + * Resolve a unit name against the search paths and load the first + * bytecode file. */ -static const char *build_unit_path(const char *unit_name, - const search_paths *paths) { +static bytecode *load_unit_from_paths(const char *unit_name, + const search_paths *paths) { static char path[MAX_PATH_LEN]; - for (size_t i = 0; i < paths->len; i++) { snprintf(path, MAX_PATH_LEN, "%s/%s.bc", paths->paths[i], unit_name); - if (access(path, F_OK) == 0) { - return path; + int fd = open(path, O_RDONLY); + if (fd >= 0) { + return bytecode_load_fd(fd); } } @@ -79,18 +87,7 @@ static char *extract_unit_name(const char *filename) { * Load a single unit and its dependencies recursively. */ static bool load_unit_recursive(bytecode_array *units, const char *unit_name, - const char *filepath, - const search_paths *paths) { - if (find_loaded(units, unit_name)) { - return true; - } - - bytecode *bc = bytecode_load(filepath); - if (!bc) { - fprintf(stderr, "Failed to load dependency '%s' from '%s'\n", unit_name, - filepath); - return false; - } + bytecode *bc, const search_paths *paths) { bc->name = ESTRDUP(unit_name); // Recursively load dependencies first (topological order) @@ -104,8 +101,21 @@ static bool load_unit_recursive(bytecode_array *units, const char *unit_name, continue; } - const char *dep_path = build_unit_path(import_name, paths); - load_unit_recursive(units, import_name, dep_path, paths); + if (find_loaded(units, import_name)) { + continue; + } + + bytecode *dep_bc = load_unit_from_paths(import_name, paths); + if (!dep_bc) { + fprintf(stderr, "Failed to load dependency '%s'\n", import_name); + bytecode_free(bc); + return false; + } + + if (!load_unit_recursive(units, import_name, dep_bc, paths)) { + bytecode_free(bc); + return false; + } } da_append(*units, bc); @@ -116,17 +126,22 @@ load_result load(const char *main_unit_path, const search_paths *paths) { bytecode_array m; da_init(m); - const char *filepath; - char *unit_name; - if (is_filepath(main_unit_path)) { - filepath = main_unit_path; - unit_name = extract_unit_name(main_unit_path); - } else { - filepath = build_unit_path(main_unit_path, paths); - unit_name = ESTRDUP(main_unit_path); + bool is_path = is_filepath(main_unit_path); + bytecode *bc = is_path ? bytecode_load(main_unit_path) + : load_unit_from_paths(main_unit_path, paths); + if (!bc) { + fprintf(stderr, "Failed to load unit '%s'\n", main_unit_path); + return (load_result){0}; } - load_unit_recursive(&m, unit_name, filepath, paths); + char *unit_name = + is_path ? extract_unit_name(main_unit_path) : ESTRDUP(main_unit_path); + + if (!load_unit_recursive(&m, unit_name, bc, paths)) { + free(unit_name); + free_loaded_units(&m); + return (load_result){0}; + } free(unit_name); load_result result = { From 4c64bb77f566e5b54be5cfdd7216a0e11d131ba8 Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 6 Apr 2026 00:02:06 +0300 Subject: [PATCH 080/141] allocate global `eof_ip` instead of placing it in the converted bytecode --- virtual_machine/converter.c | 9 +++------ virtual_machine/ops.c | 7 +++++++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 99fc568a2..626047fc2 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -1039,16 +1039,13 @@ static void resolve_relocs(insn *all_code, decoded *dec, size_t code_offset, static program *link_program(decoded *dec_arr, size_t n, size_t total_code_len, size_t total_globals, ffi_call_table *ffi) { + static insn eof_ip = {.func = op_eof}; size_t ffi_call_len = ffi_call_table_len(ffi); - - size_t eof_offset = total_code_len; - size_t ffi_call_offset = eof_offset + 1; + size_t ffi_call_offset = total_code_len; size_t all_code_len = ffi_call_offset + ffi_call_len * FFI_STUB_SIZE; insn *all_code = ALLOC_ARRAY(insn, all_code_len); insn **entry_points = ALLOC_ARRAY(insn *, n); - - all_code[eof_offset].func = op_eof; // Copy code and resolve relocations size_t code_offset = 0; for (size_t i = 0; i < n; i++) { @@ -1059,7 +1056,7 @@ static program *link_program(decoded *dec_arr, size_t n, size_t total_code_len, entry_points[i] = &all_code[code_offset]; resolve_relocs(all_code, dec, code_offset, ffi_call_offset); - all_code[code_offset + 1].target = &all_code[eof_offset]; + all_code[code_offset + 1].target = &eof_ip; code_offset += dec->code_len; } diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 5a0a1afe4..7cbee02fe 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -566,6 +566,13 @@ void op_closure(DECL_STATE) { DISPATCH(); } +/* + * op_init is a setup for the main op_begin of the entry point unit. It pushes a + * fake frame with 0 args and no saved state, so that the main function can use + * the normal CALL/END sequence without worrying about the initial case. The + * fake return address points to a special op_eof which just returns, causing + * the whole program to exit when the main function returns. + */ void op_init(DECL_STATE) { ip++; insn *eof_ip = ip->target; From 70f2f4bd23430ab467f3b7c8654a630a0db451ae Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 6 Apr 2026 00:45:36 +0300 Subject: [PATCH 081/141] remove `main` from public symbols for bytecode --- src/SM.ml | 5 ++++- virtual_machine/symbols.c | 16 +++++----------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/SM.ml b/src/SM.ml index f4a43c9d1..4972c0951 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -1716,9 +1716,12 @@ let compile cmd ((imports, _), p) = ] env in + let top_public = + match cmd#get_mode with `BC -> [] | _ -> [ PUBLIC (topname, true) ] + in let prg = List.map (fun i -> IMPORT i) imports - @ [ PUBLIC (topname, true) ] @ env#get_decls @ List.flatten prg + @ top_public @ env#get_decls @ List.flatten prg in (*Printf.eprintf "Before propagating closures:\n"; Printf.eprintf "%s\n%!" env#show_funinfo; diff --git a/virtual_machine/symbols.c b/virtual_machine/symbols.c index 319eb05ba..30494ae5d 100644 --- a/virtual_machine/symbols.c +++ b/virtual_machine/symbols.c @@ -5,8 +5,6 @@ #include #include -static const char *MAIN_FUNC = "main"; - struct symbol_table { resolved_symbol *data; size_t len; @@ -39,15 +37,11 @@ static resolved_symbol *symbol_table_find(symbol_table *table, const char *name, static int symbol_table_add(symbol_table *table, const char *name, int32_t idx, bool is_function) { - - // Allow duplicate main() (each uinit has one) - if (strcmp(name, MAIN_FUNC) != 0) { - resolved_symbol *existing = symbol_table_find(table, name, is_function); - if (existing) { - fprintf(stderr, "Error: Duplicate symbol '%s' found in symbol table\n", - name); - exit(EXIT_FAILURE); - } + resolved_symbol *existing = symbol_table_find(table, name, is_function); + if (existing) { + fprintf(stderr, "Error: Duplicate symbol '%s' found in symbol table\n", + name); + exit(EXIT_FAILURE); } resolved_symbol entry = { From ebbe9f817e82e3d630a6d0b5ea27391ac76387a4 Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 6 Apr 2026 01:26:11 +0300 Subject: [PATCH 082/141] barrier that should work --- src/SM.ml | 6 +-- virtual_machine/converter.c | 94 +++++++++++++------------------------ virtual_machine/opcodes.c | 2 + virtual_machine/opcodes.h | 1 + 4 files changed, 39 insertions(+), 64 deletions(-) diff --git a/src/SM.ml b/src/SM.ml index 4972c0951..b630bfcb0 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -352,9 +352,9 @@ module ByteCode = struct | ARRAY n -> add_bytes [ (5 * 16) + 8 ]; add_ints [ n ] - (* 0x59 n:32 n:32 *) - | FAIL ((l, c), _) -> - add_bytes [ (5 * 16) + 9 ]; + (* 0x59/0x5b n:32 n:32 *) + | FAIL ((l, c), value) -> + add_bytes [ if value then (5 * 16) + 11 else (5 * 16) + 9 ]; add_ints [ l; c ] (* 0x5a n:32 *) | LINE n -> diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 626047fc2..415da329e 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -111,9 +111,8 @@ typedef struct { * LIVE: currently decoding sequentially, reachable from previous instruction * BARRIER: just emitted JMP or END, so next instruction is reachable but not * from previous instruction - * DEAD: not reachable from previous instruction */ -typedef enum { LIVE, BARRIER, DEAD } reach_state; +typedef enum { LIVE, BARRIER } reach_state; typedef struct { int32_t depth; @@ -340,7 +339,6 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, size_t current_bc_off) { int32_t target_off = reader_i32(&ctx->reader); int32_t depth = ctx->sv.depth; - reach_state state = ctx->sv.state; if (!validate_target_off(ctx->bc, target_off, current_bc_off, "JUMP")) { return false; @@ -359,30 +357,23 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, add_reloc(ctx, my_idx, NULL, INTERNAL); VM_DEBUG(" JUMP: backward to bc_off=%d, (depth=%d, target_depth=%d)\n", target_off, depth, tm->stack_depth); - if (state != DEAD) { - assert(tm->stack_depth != -1 && - "backward jump target must have known stack depth"); - if (tm->stack_depth != depth) { - fprintf(stderr, - "Error: Jump stack mismatch at bc_off=%zu (exptected %d, " - "actual %d)\n", - current_bc_off, depth, tm->stack_depth); - return false; - } + assert(tm->stack_depth != -1 && + "backward jump target must have known stack depth"); + if (tm->stack_depth != depth) { + fprintf(stderr, + "Error: Jump stack mismatch at bc_off=%zu (exptected %d, " + "actual %d)\n", + current_bc_off, depth, tm->stack_depth); + return false; } } else { // Forward jump — add fixup if (!add_fixup(meta, target_off, my_idx)) { return false; } - if (state == DEAD) { - // Don't set or validate depth at target since it's not reachable from - // sequential decode - VM_DEBUG(" JUMP: forward to bc_off=%d (dead, skipping depth)\n", - target_off); - } else if (tm->stack_depth == -1) { - VM_DEBUG(" JUMP: forward to bc_off=%d, (depth=%d, target_depth=%d)\n", - target_off, depth, tm->stack_depth); + VM_DEBUG(" JUMP: forward to bc_off=%d, (depth=%d, target_depth=%d)\n", + target_off, depth, tm->stack_depth); + if (tm->stack_depth == -1) { tm->stack_depth = depth; } else if (tm->stack_depth != depth) { fprintf(stderr, @@ -400,22 +391,16 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, #define DEPTH_INC(sv, n) \ do { \ - if ((sv).state != DEAD) { \ - VM_DEBUG(" DEPTH: %d -> %d (+%d)\n", (sv).depth, (sv).depth + (n), \ - (n)); \ - (sv).depth += (n); \ - if ((sv).depth > (sv).max_depth) \ - (sv).max_depth = (sv).depth; \ - } \ + VM_DEBUG(" DEPTH: %d -> %d (+%d)\n", (sv).depth, (sv).depth + (n), (n)); \ + (sv).depth += (n); \ + if ((sv).depth > (sv).max_depth) \ + (sv).max_depth = (sv).depth; \ } while (0) #define DEPTH_DEC(sv, n) \ do { \ - if ((sv).state != DEAD) { \ - VM_DEBUG(" DEPTH: %d -> %d (-%d)\n", (sv).depth, (sv).depth - (n), \ - (n)); \ - (sv).depth -= (n); \ - assert((sv).depth >= 0 && "stack underflow"); \ - } \ + VM_DEBUG(" DEPTH: %d -> %d (-%d)\n", (sv).depth, (sv).depth - (n), (n)); \ + (sv).depth -= (n); \ + assert((sv).depth >= 0 && "stack underflow"); \ } while (0) #define DEPTH_PUSH(sv) DEPTH_INC(sv, 1) #define DEPTH_POP(sv) DEPTH_DEC(sv, 1) @@ -443,28 +428,15 @@ static bool decode_internal(decode_ctx *ctx) { size_t current_bc_off = reader_pos(&ctx->reader); uint8_t opcode = reader_u8(&ctx->reader); - VM_DEBUG("DECODE: bc_off=%zu %s (0x%02X) depth=%d\n", current_bc_off, + VM_DEBUG("DECODE: bc_off=%zu %s (0x%02X) depth=%d%s\n", current_bc_off, opcode_to_string(opcode), opcode, ctx->sv.depth, - ctx->sv.state == BARRIER ? " [barrier]" - : ctx->sv.state == DEAD ? " [dead]" - : ""); + ctx->sv.state == BARRIER ? " [barrier]" : ""); meta_info *m = &meta[current_bc_off]; m->resolved_idx = (int32_t)ctx->code.len; // Validate stack depth at intersections - if (ctx->sv.state == DEAD) { - if (m->stack_depth != -1) { - // Forward jump visited - VM_DEBUG(" DEPTH: %d -> %d", ctx->sv.depth, m->stack_depth); - ctx->sv.depth = m->stack_depth; - ctx->sv.state = LIVE; - } else { - // No forward jump - VM_DEBUG(" DEPTH: dead, skipping at bc_off=%zu\n", current_bc_off); - m->stack_depth = -1; // unvisited - } - } else if (ctx->sv.state == BARRIER) { + if (ctx->sv.state == BARRIER) { if (m->stack_depth != -1) { // Forward jump visited VM_DEBUG(" DEPTH: %d -> %d", ctx->sv.depth, m->stack_depth); @@ -592,9 +564,7 @@ static bool decode_internal(decode_ctx *ctx) { if (!handle_jump(ctx, meta, current_bc_off)) { goto cleanup; } - if (ctx->sv.state != DEAD) { - ctx->sv.state = BARRIER; - } + ctx->sv.state = BARRIER; break; case OP_CJMP_Z: @@ -758,13 +728,17 @@ static bool decode_internal(decode_ctx *ctx) { break; } - case OP_FAIL: { + case OP_FAIL: + case OP_FAIL_KEEP: { + bool keep_value = opcode == OP_FAIL_KEEP; int32_t line = reader_i32(&ctx->reader); int32_t col = reader_i32(&ctx->reader); + if (!keep_value) { + DEPTH_POP(ctx->sv); + } EMIT_FUNC(ctx, op_fail); EMIT_NUM(ctx, line); EMIT_NUM(ctx, col); - ctx->sv.state = DEAD; break; } @@ -918,16 +892,14 @@ static bool decode_internal(decode_ctx *ctx) { case OP_END: // depth == 1 <=> return value (?) - if (ctx->sv.state != DEAD && ctx->sv.depth != 1) { + if (ctx->sv.depth != 1) { fprintf(stderr, "Error: END with depth = %d at bc_off=%zu\n", ctx->sv.depth, current_bc_off); goto cleanup; } EMIT_FUNC(ctx, op_end); - if (ctx->sv.state != DEAD) { - ctx->code.data[ctx->sv.max_depth_pos].num = ctx->sv.max_depth; - ctx->sv.state = BARRIER; - } + ctx->code.data[ctx->sv.max_depth_pos].num = ctx->sv.max_depth; + ctx->sv.state = BARRIER; break; case OP_LINE: { @@ -950,7 +922,7 @@ static bool decode_internal(decode_ctx *ctx) { goto cleanup; } - if (ctx->sv.state != DEAD && ctx->sv.depth > ctx->sv.max_depth) { + if (ctx->sv.depth > ctx->sv.max_depth) { ctx->sv.max_depth = ctx->sv.depth; } } diff --git a/virtual_machine/opcodes.c b/virtual_machine/opcodes.c index 911960e29..04f8af7cd 100644 --- a/virtual_machine/opcodes.c +++ b/virtual_machine/opcodes.c @@ -88,6 +88,8 @@ const char *opcode_to_string(uint8_t opcode) { return "FAIL"; case OP_LINE: return "LINE"; + case OP_FAIL_KEEP: + return "FAIL.KEEP"; case OP_PATT_STR_CMP: return "PATT.STRCMP"; case OP_PATT_STRING: diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h index 4eb230f3b..19af54c80 100755 --- a/virtual_machine/opcodes.h +++ b/virtual_machine/opcodes.h @@ -46,6 +46,7 @@ typedef enum { OP_ARRAY = 0x58, OP_FAIL = 0x59, OP_LINE = 0x5A, + OP_FAIL_KEEP = 0x5B, OP_PATT_STR_CMP = 0x60, OP_PATT_STRING = 0x61, OP_PATT_ARRAY = 0x62, From a45ed41a60bb25bac517448c5c71186fd2e098d0 Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 7 Apr 2026 02:47:40 +0300 Subject: [PATCH 083/141] remove redundant check --- virtual_machine/bytecode.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index 9f394d659..1591c22fa 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -15,11 +15,6 @@ #define IMPORT_ENTRY_SIZE 4 bytecode *bytecode_load_fd(int fd) { - if (fd < 0) { - perror("bytecode_load: open"); - return NULL; - } - struct stat st; if (fstat(fd, &st) < 0) { perror("bytecode_load: fstat"); From 58b59b28c782d77b1a0baed1d8b39bd59c5f61d9 Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 7 Apr 2026 21:14:29 +0300 Subject: [PATCH 084/141] fix main module handling --- virtual_machine/lama.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/virtual_machine/lama.c b/virtual_machine/lama.c index 2f72b576b..cea3891f1 100644 --- a/virtual_machine/lama.c +++ b/virtual_machine/lama.c @@ -1,5 +1,6 @@ #define _POSIX_C_SOURCE 200809L +#include "memory.h" #include "vm.h" #include #include @@ -65,7 +66,9 @@ int main(int argc, char *argv[]) { char *bytecode_file = argv[optind]; // Include main unit's directory by default - bytecode_dir = strdup(dirname(bytecode_dir)); + char *tmp = ESTRDUP(bytecode_file); + bytecode_dir = ESTRDUP(dirname(tmp)); + free(tmp); include_paths[0] = bytecode_dir; virtual_machine *vm = vm_create(bytecode_file, (const char **)include_paths, From 2fc0020f05b73b22160b81fbac9b52c98805be08 Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 7 Apr 2026 21:31:05 +0300 Subject: [PATCH 085/141] set args and init gc in `vm_run` --- virtual_machine/vm.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index 319c566c4..4638daf3a 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -25,11 +25,12 @@ struct virtual_machine { size_t ffi_count; void *stack_base; size_t stack_size; + int argc; + char **argv; }; virtual_machine *vm_create(const char *main_unit_path, const char **paths, size_t total_paths_len) { - __gc_init(); search_paths search_paths = {.paths = paths, .len = total_paths_len}; virtual_machine *vm = ALLOC(virtual_machine); @@ -95,11 +96,14 @@ void vm_destroy(virtual_machine *vm) { } void vm_set_args(virtual_machine *vm, int argc, char *argv[]) { - (void)vm; - set_args(argc, argv); + vm->argc = argc; + vm->argv = argv; } aint vm_run(virtual_machine *vm) { + __init(); + set_args(vm->argc, vm->argv); + aint *sp = vm->globals - 1; __gc_stack_top = (size_t)sp; @@ -113,5 +117,6 @@ aint vm_run(virtual_machine *vm) { ret_val = *sp; } + __shutdown(); return ret_val; } From 39152422961b1bf66be3abc7c6df2c656f327d71 Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 7 Apr 2026 21:43:59 +0300 Subject: [PATCH 086/141] remove return value from `vm` --- virtual_machine/vm.c | 5 +---- virtual_machine/vm.h | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index 4638daf3a..b4ff96af1 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -100,7 +100,7 @@ void vm_set_args(virtual_machine *vm, int argc, char *argv[]) { vm->argv = argv; } -aint vm_run(virtual_machine *vm) { +void vm_run(virtual_machine *vm) { __init(); set_args(vm->argc, vm->argv); @@ -110,13 +110,10 @@ aint vm_run(virtual_machine *vm) { __gc_stack_bottom = (size_t)vm->stack_base; aint *bp; - aint ret_val = 0; for (size_t i = 0; i < vm->bc_len; i++) { insn *ip = vm->entry_points[i]; ip->func(ip, sp, bp); - ret_val = *sp; } __shutdown(); - return ret_val; } diff --git a/virtual_machine/vm.h b/virtual_machine/vm.h index 79746e283..bebae9ca0 100644 --- a/virtual_machine/vm.h +++ b/virtual_machine/vm.h @@ -13,6 +13,6 @@ void vm_destroy(virtual_machine *vm); void vm_set_args(virtual_machine *vm, int argc, char *argv[]); -aint vm_run(virtual_machine *vm); +void vm_run(virtual_machine *vm); #endif // VM_H From 4e2d138c6ca8e183299db2aefb90b47742cbbf7f Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 7 Apr 2026 21:44:13 +0300 Subject: [PATCH 087/141] pop the result in `op_eof` --- virtual_machine/ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 7cbee02fe..4f3ef402c 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -586,7 +586,8 @@ void op_init(DECL_STATE) { void op_eof(DECL_STATE) { (void)ip; (void)bp; - (void)sp; + // Pop the result to keep stack consistent between runs + STACK_POP(sp); return; } From a39202a9877e93397c3b5cf3ee088f2e6453b17f Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 7 Apr 2026 21:46:53 +0300 Subject: [PATCH 088/141] remove redundant `__gc_stack_top` setup in `vm.c` --- virtual_machine/vm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index b4ff96af1..a96ec39f7 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -106,7 +106,6 @@ void vm_run(virtual_machine *vm) { aint *sp = vm->globals - 1; - __gc_stack_top = (size_t)sp; __gc_stack_bottom = (size_t)vm->stack_base; aint *bp; From 6f66df58fb7e8b4adca804050375f736d70abf87 Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 7 Apr 2026 21:53:45 +0300 Subject: [PATCH 089/141] remove unused `total_globals` field --- virtual_machine/converter.c | 5 ++--- virtual_machine/converter.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 415da329e..0c6e0ac10 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -1010,7 +1010,7 @@ static void resolve_relocs(insn *all_code, decoded *dec, size_t code_offset, } static program *link_program(decoded *dec_arr, size_t n, size_t total_code_len, - size_t total_globals, ffi_call_table *ffi) { + ffi_call_table *ffi) { static insn eof_ip = {.func = op_eof}; size_t ffi_call_len = ffi_call_table_len(ffi); size_t ffi_call_offset = total_code_len; @@ -1046,7 +1046,6 @@ static program *link_program(decoded *dec_arr, size_t n, size_t total_code_len, program *prog = ALLOC(program); prog->code = all_code; prog->code_len = all_code_len; - prog->total_globals = total_globals; prog->entry_points = entry_points; prog->ffi_data = ffi_call_table_release(ffi); prog->ffi_len = ffi_call_len; @@ -1093,7 +1092,7 @@ program *decode(bytecode **bc_arr, size_t n, aint *globals) { total_globals += bc_arr[i]->globals_count; } - prog = link_program(dec_arr, n, total_code_len, total_globals, ffi); + prog = link_program(dec_arr, n, total_code_len, ffi); cleanup: symbol_table_destroy(st); diff --git a/virtual_machine/converter.h b/virtual_machine/converter.h index 5f493ec03..26b374678 100644 --- a/virtual_machine/converter.h +++ b/virtual_machine/converter.h @@ -11,7 +11,6 @@ typedef struct { insn *code; size_t code_len; - size_t total_globals; insn **entry_points; void *ffi_data; size_t ffi_len; From 242fec56e4ddeedad3a79486eaae5badb272203c Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 7 Apr 2026 21:59:24 +0300 Subject: [PATCH 090/141] change `OP_LINE` and `OP_FAIL_KEEP` opcodes --- src/SM.ml | 8 ++++---- virtual_machine/converter.c | 2 +- virtual_machine/opcodes.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/SM.ml b/src/SM.ml index b630bfcb0..f656101cd 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -352,13 +352,13 @@ module ByteCode = struct | ARRAY n -> add_bytes [ (5 * 16) + 8 ]; add_ints [ n ] - (* 0x59/0x5b n:32 n:32 *) + (* 0x59/0x5a n:32 n:32 *) | FAIL ((l, c), value) -> - add_bytes [ if value then (5 * 16) + 11 else (5 * 16) + 9 ]; + add_bytes [ if value then (5 * 16) + 10 else (5 * 16) + 9 ]; add_ints [ l; c ] - (* 0x5a n:32 *) + (* 0x5b n:32 *) | LINE n -> - add_bytes [ (5 * 16) + 10 ]; + add_bytes [ (5 * 16) + 11 ]; add_ints [ n ] (* 0x6p *) | PATT p -> add_bytes [ (6 * 16) + enum patt p ] diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 0c6e0ac10..2823b9c47 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -730,7 +730,7 @@ static bool decode_internal(decode_ctx *ctx) { case OP_FAIL: case OP_FAIL_KEEP: { - bool keep_value = opcode == OP_FAIL_KEEP; + bool keep_value = (opcode & 1) == 0; int32_t line = reader_i32(&ctx->reader); int32_t col = reader_i32(&ctx->reader); if (!keep_value) { diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h index 19af54c80..821bca99c 100755 --- a/virtual_machine/opcodes.h +++ b/virtual_machine/opcodes.h @@ -45,8 +45,8 @@ typedef enum { OP_TAG = 0x57, OP_ARRAY = 0x58, OP_FAIL = 0x59, - OP_LINE = 0x5A, - OP_FAIL_KEEP = 0x5B, + OP_FAIL_KEEP = 0x5A, + OP_LINE = 0x5B, OP_PATT_STR_CMP = 0x60, OP_PATT_STRING = 0x61, OP_PATT_ARRAY = 0x62, From f603f155188200152546efd935cc2aa1a9a50876 Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 7 Apr 2026 23:05:37 +0300 Subject: [PATCH 091/141] `op_line` chore --- virtual_machine/ops.c | 5 ----- virtual_machine/ops.h | 2 ++ 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 4f3ef402c..aae1af2c0 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -599,9 +599,4 @@ void op_line(DECL_STATE) { (void)line; DISPATCH(); } -#else -void op_line(DECL_STATE) { - ip++; - DISPATCH(); -} #endif diff --git a/virtual_machine/ops.h b/virtual_machine/ops.h index a4d7d35d4..907a1b053 100644 --- a/virtual_machine/ops.h +++ b/virtual_machine/ops.h @@ -63,6 +63,8 @@ void op_ffi_call(DECL_STATE); void op_init(DECL_STATE); void op_eof(DECL_STATE); +#ifdef DEBUG_PRINT void op_line(DECL_STATE); +#endif #endif // OPS_H From 672d5057c22f4521565eb31af27b7202d68cb98a Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 7 Apr 2026 23:50:44 +0300 Subject: [PATCH 092/141] check for `EOF` --- virtual_machine/bytecode.c | 14 ++++++++++++-- virtual_machine/converter.c | 6 ++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index 1591c22fa..c20b6da9e 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -1,6 +1,7 @@ #define _POSIX_C_SOURCE 200809L #include "bytecode.h" #include "memory.h" +#include "opcodes.h" #include #include #include @@ -47,11 +48,16 @@ bytecode *bytecode_load_fd(int fd) { size_t pubs_offset = imports_offset + (size_t)num_imports * IMPORT_ENTRY_SIZE; size_t code_offset = pubs_offset + (size_t)num_pubs * PUB_ENTRY_SIZE; size_t code_size = file_size - code_offset; + bytecode *bc; + + if (data[code_offset + code_size - 1] != OP_EOF) { + fprintf(stderr, "bytecode_load: bytecode must end with EOF opcode\n"); + goto err_unmap; + } - // TODO: VALIdation const char *string_table = (const char *)data + st_offset; - bytecode *bc = ALLOC(bytecode); + bc = ALLOC(bytecode); bc->map_base = data; bc->map_size = file_size; @@ -72,6 +78,10 @@ bytecode *bytecode_load_fd(int fd) { bc->name = NULL; return bc; + +err_unmap: + munmap((void *)data, file_size); + return NULL; } bytecode *bytecode_load(const char *filename) { diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 2823b9c47..4bce73800 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -914,6 +914,12 @@ static bool decode_internal(decode_ctx *ctx) { } case OP_EOF: + if (current_bc_off + 1 != bc->code_size) { + fprintf(stderr, + "Error: EOF opcode before end of bytecode at bc_off=%zu\n", + current_bc_off); + goto cleanup; + } break; default: From 39323a823b691d1dfddd063398e496247276a1bc Mon Sep 17 00:00:00 2001 From: ancavar Date: Tue, 7 Apr 2026 23:55:26 +0300 Subject: [PATCH 093/141] undef emits --- virtual_machine/converter.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 4bce73800..6e269f1bd 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -960,6 +960,13 @@ static bool decode_internal(decode_ctx *ctx) { #undef DEPTH_DEC #undef DEPTH_PUSH #undef DEPTH_POP +#undef EMIT_FUNC +#undef EMIT_NUM +#undef EMIT_ANUM +#undef EMIT_STR +#undef EMIT_TARGET +#undef EMIT_GLOBAL_PTR +#undef EMIT_PTR static void register_public_symbols(symbol_table *st, const bytecode *bc, size_t code_offset, size_t global_base, From 31a5fa765458e1bf067196513fa6e2fde76080bd Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 8 Apr 2026 00:09:21 +0300 Subject: [PATCH 094/141] better error handling for `symbols.c` --- virtual_machine/converter.c | 22 ++++++++++++++++------ virtual_machine/symbols.c | 19 ++++++++++--------- virtual_machine/symbols.h | 9 ++++----- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 6e269f1bd..728fd4456 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -968,7 +968,7 @@ static bool decode_internal(decode_ctx *ctx) { #undef EMIT_GLOBAL_PTR #undef EMIT_PTR -static void register_public_symbols(symbol_table *st, const bytecode *bc, +static bool register_public_symbols(symbol_table *st, const bytecode *bc, size_t code_offset, size_t global_base, const int32_t *bc_to_insn_map) { public_symbol pub; @@ -983,15 +983,21 @@ static void register_public_symbols(symbol_table *st, const bytecode *bc, fprintf(stderr, "Error: public symbol '%s' at bytecode offset %d not decoded\n", pub.name, pub.code_offset); - exit(EXIT_FAILURE); + return false; } int32_t code_idx = insn_idx + code_offset; - symbol_table_add_function(st, pub.name, code_idx); + if (!symbol_table_add_function(st, pub.name, code_idx)) { + return false; + } } else { int32_t global_idx = pub.code_offset + global_base; - symbol_table_add_global(st, pub.name, global_idx); + if (!symbol_table_add_global(st, pub.name, global_idx)) { + return false; + } } } + + return true; } /* @@ -1098,8 +1104,12 @@ program *decode(bytecode **bc_arr, size_t n, aint *globals) { }; n_decoded++; - register_public_symbols(st, bc_arr[i], total_code_len, total_globals, - ctx.bc_to_insn_map); + if (!register_public_symbols(st, bc_arr[i], total_code_len, total_globals, + ctx.bc_to_insn_map)) { + fprintf(stderr, "Failed to register public symbols for %s\n", + bc_arr[i]->name); + goto cleanup; + } total_code_len += ctx.code.len; total_globals += bc_arr[i]->globals_count; diff --git a/virtual_machine/symbols.c b/virtual_machine/symbols.c index 30494ae5d..a2185c880 100644 --- a/virtual_machine/symbols.c +++ b/virtual_machine/symbols.c @@ -28,20 +28,21 @@ void symbol_table_destroy(symbol_table *table) { static resolved_symbol *symbol_table_find(symbol_table *table, const char *name, bool is_function) { for (size_t i = 0; i < table->len; i++) { - if (strcmp(table->data[i].name, name) == 0 && table->data[i].is_function == is_function) { + if (strcmp(table->data[i].name, name) == 0 && + table->data[i].is_function == is_function) { return &table->data[i]; } } return NULL; } -static int symbol_table_add(symbol_table *table, const char *name, int32_t idx, - bool is_function) { +static bool symbol_table_add(symbol_table *table, const char *name, int32_t idx, + bool is_function) { resolved_symbol *existing = symbol_table_find(table, name, is_function); if (existing) { fprintf(stderr, "Error: Duplicate symbol '%s' found in symbol table\n", name); - exit(EXIT_FAILURE); + return false; } resolved_symbol entry = { @@ -52,7 +53,7 @@ static int symbol_table_add(symbol_table *table, const char *name, int32_t idx, da_append(*table, entry); - return 0; + return true; } resolved_symbol *symbol_table_find_function(symbol_table *table, @@ -65,12 +66,12 @@ resolved_symbol *symbol_table_find_global(symbol_table *table, return symbol_table_find(table, name, false); } -int symbol_table_add_function(symbol_table *table, const char *name, - int32_t code_idx) { +bool symbol_table_add_function(symbol_table *table, const char *name, + int32_t code_idx) { return symbol_table_add(table, name, code_idx, true); } -int symbol_table_add_global(symbol_table *table, const char *name, - int32_t global_idx) { +bool symbol_table_add_global(symbol_table *table, const char *name, + int32_t global_idx) { return symbol_table_add(table, name, global_idx, false); } diff --git a/virtual_machine/symbols.h b/virtual_machine/symbols.h index f811ae891..dd37b63f2 100644 --- a/virtual_machine/symbols.h +++ b/virtual_machine/symbols.h @@ -1,7 +1,6 @@ #ifndef SYMBOLS_H #define SYMBOLS_H -#include "insn.h" #include #include #include @@ -30,9 +29,9 @@ resolved_symbol *symbol_table_find_function(symbol_table *table, const char *name); resolved_symbol *symbol_table_find_global(symbol_table *table, const char *name); -int symbol_table_add_function(symbol_table *table, const char *name, - int32_t code_index); -int symbol_table_add_global(symbol_table *table, const char *name, - int32_t global_idx); +bool symbol_table_add_function(symbol_table *table, const char *name, + int32_t code_index); +bool symbol_table_add_global(symbol_table *table, const char *name, + int32_t global_idx); #endif // SYMBOLS_H From 92911acb703fbb9488450b91c8236003c8a6f49c Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 8 Apr 2026 00:20:01 +0300 Subject: [PATCH 095/141] specialize macros --- virtual_machine/converter.c | 364 ++++++++++++++++++------------------ 1 file changed, 182 insertions(+), 182 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 728fd4456..1b3f1f40a 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -30,14 +30,13 @@ extern aint LtagHash(char *s); /* * Code emission macros - append to code array in context */ -#define EMIT_FUNC(ctx, f) da_append((ctx)->code, ((insn){.func = (f)})) -#define EMIT_NUM(ctx, n) da_append((ctx)->code, ((insn){.num = (n)})) -#define EMIT_ANUM(ctx, n) da_append((ctx)->code, ((insn){.anum = (n)})) -#define EMIT_STR(ctx, s) da_append((ctx)->code, ((insn){.str = (s)})) -#define EMIT_TARGET(ctx, t) da_append((ctx)->code, ((insn){.target = (t)})) -#define EMIT_GLOBAL_PTR(ctx, p) \ - da_append((ctx)->code, ((insn){.global_ptr = (p)})) -#define EMIT_PTR(ctx, p) da_append((ctx)->code, ((insn){.ptr = (p)})) +#define EMIT_FUNC(f) da_append(ctx->code, ((insn){.func = (f)})) +#define EMIT_NUM(n) da_append(ctx->code, ((insn){.num = (n)})) +#define EMIT_ANUM(n) da_append(ctx->code, ((insn){.anum = (n)})) +#define EMIT_STR(s) da_append(ctx->code, ((insn){.str = (s)})) +#define EMIT_TARGET(t) da_append(ctx->code, ((insn){.target = (t)})) +#define EMIT_GLOBAL_PTR(p) da_append(ctx->code, ((insn){.global_ptr = (p)})) +#define EMIT_PTR(p) da_append(ctx->code, ((insn){.ptr = (p)})) #define CHECK_IDX(idx, limit, name) \ do { \ @@ -264,8 +263,8 @@ static bool emit_ext_glo(decode_ctx *ctx, const char *glob_name, fn op) { resolved_symbol *sym = symbol_table_find_global(ctx->st, glob_name); if (sym) { // Global from another unit - EMIT_FUNC(ctx, op); - EMIT_GLOBAL_PTR(ctx, &ctx->globals[sym->idx]); + EMIT_FUNC(op); + EMIT_GLOBAL_PTR(&ctx->globals[sym->idx]); return true; } // C global @@ -273,8 +272,8 @@ static bool emit_ext_glo(decode_ctx *ctx, const char *glob_name, fn op) { if (!ptr) { return false; } - EMIT_FUNC(ctx, op); - EMIT_GLOBAL_PTR(ctx, (aint *)ptr); + EMIT_FUNC(op); + EMIT_GLOBAL_PTR((aint *)ptr); return true; } @@ -285,8 +284,8 @@ static bool emit_glo(decode_ctx *ctx, int32_t idx, size_t global_base, fn op) { VM_DEBUG("DECODE: external global '%s'\n", glob_name); return emit_ext_glo(ctx, glob_name, op); } - EMIT_FUNC(ctx, op); - EMIT_GLOBAL_PTR(ctx, &ctx->globals[global_base + idx]); + EMIT_FUNC(op); + EMIT_GLOBAL_PTR(&ctx->globals[global_base + idx]); return true; } @@ -307,18 +306,17 @@ static bool emit_target(decode_ctx *ctx, meta_info *meta, int32_t target_off, if (sym) { add_reloc(ctx, target_slot, name, UNIT); EMIT_NUM( - ctx, sym->idx); // placeholder, will be resolved to inter-unit function } else { size_t idx = ffi_call_table_intern(ctx->ffi, name); add_reloc(ctx, target_slot, name, FFI); - EMIT_NUM(ctx, idx); // placeholder, will be resolved to FFI call + EMIT_NUM(idx); // placeholder, will be resolved to FFI call } } else { if (!validate_target_off(bc, target_off, current_bc_off, opname)) return false; - EMIT_NUM(ctx, 0); // placeholder — will hold code index + EMIT_NUM(0); // placeholder — will hold code index meta_info *tm = &meta[target_off]; if (target_off < (int32_t)current_bc_off) { @@ -345,7 +343,7 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, } size_t my_idx = ctx->code.len; - EMIT_NUM(ctx, 0); // placeholder — will hold code index + EMIT_NUM(0); // placeholder — will hold code index meta_info *tm = &meta[target_off]; if (target_off < (int32_t)current_bc_off) { @@ -389,21 +387,23 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, return true; } -#define DEPTH_INC(sv, n) \ +#define DEPTH_INC(n) \ do { \ - VM_DEBUG(" DEPTH: %d -> %d (+%d)\n", (sv).depth, (sv).depth + (n), (n)); \ - (sv).depth += (n); \ - if ((sv).depth > (sv).max_depth) \ - (sv).max_depth = (sv).depth; \ + VM_DEBUG(" DEPTH: %d -> %d (+%d)\n", ctx->sv.depth, ctx->sv.depth + (n), \ + (n)); \ + ctx->sv.depth += (n); \ + if (ctx->sv.depth > ctx->sv.max_depth) \ + ctx->sv.max_depth = ctx->sv.depth; \ } while (0) -#define DEPTH_DEC(sv, n) \ +#define DEPTH_DEC(n) \ do { \ - VM_DEBUG(" DEPTH: %d -> %d (-%d)\n", (sv).depth, (sv).depth - (n), (n)); \ - (sv).depth -= (n); \ - assert((sv).depth >= 0 && "stack underflow"); \ + VM_DEBUG(" DEPTH: %d -> %d (-%d)\n", ctx->sv.depth, ctx->sv.depth - (n), \ + (n)); \ + ctx->sv.depth -= (n); \ + assert(ctx->sv.depth >= 0 && "stack underflow"); \ } while (0) -#define DEPTH_PUSH(sv) DEPTH_INC(sv, 1) -#define DEPTH_POP(sv) DEPTH_DEC(sv, 1) +#define DEPTH_PUSH() DEPTH_INC(1) +#define DEPTH_POP() DEPTH_DEC(1) static bool decode_internal(decode_ctx *ctx) { @@ -419,8 +419,8 @@ static bool decode_internal(decode_ctx *ctx) { meta[i].fixups = NULL; } - EMIT_FUNC(ctx, op_init); - EMIT_NUM(ctx, 0); // placeholder for op_eof + EMIT_FUNC(op_init); + EMIT_NUM(0); // placeholder for op_eof bool ok = false; @@ -476,91 +476,91 @@ static bool decode_internal(decode_ctx *ctx) { switch (opcode) { case OP_CONST: - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_const); - EMIT_NUM(ctx, reader_i32(&ctx->reader)); + DEPTH_PUSH(); + EMIT_FUNC(op_const); + EMIT_NUM(reader_i32(&ctx->reader)); break; case OP_BINOP_ADD: - DEPTH_DEC(ctx->sv, 2); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_add); + DEPTH_DEC(2); + DEPTH_PUSH(); + EMIT_FUNC(op_add); break; case OP_BINOP_SUB: - DEPTH_DEC(ctx->sv, 2); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_sub); + DEPTH_DEC(2); + DEPTH_PUSH(); + EMIT_FUNC(op_sub); break; case OP_BINOP_MUL: - DEPTH_DEC(ctx->sv, 2); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_mul); + DEPTH_DEC(2); + DEPTH_PUSH(); + EMIT_FUNC(op_mul); break; case OP_BINOP_DIV: - DEPTH_DEC(ctx->sv, 2); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_div); + DEPTH_DEC(2); + DEPTH_PUSH(); + EMIT_FUNC(op_div); break; case OP_BINOP_MOD: - DEPTH_DEC(ctx->sv, 2); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_mod); + DEPTH_DEC(2); + DEPTH_PUSH(); + EMIT_FUNC(op_mod); break; case OP_BINOP_LT: - DEPTH_DEC(ctx->sv, 2); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_lt); + DEPTH_DEC(2); + DEPTH_PUSH(); + EMIT_FUNC(op_lt); break; case OP_BINOP_LE: - DEPTH_DEC(ctx->sv, 2); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_le); + DEPTH_DEC(2); + DEPTH_PUSH(); + EMIT_FUNC(op_le); break; case OP_BINOP_GT: - DEPTH_DEC(ctx->sv, 2); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_gt); + DEPTH_DEC(2); + DEPTH_PUSH(); + EMIT_FUNC(op_gt); break; case OP_BINOP_GE: - DEPTH_DEC(ctx->sv, 2); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_ge); + DEPTH_DEC(2); + DEPTH_PUSH(); + EMIT_FUNC(op_ge); break; case OP_BINOP_EQ: - DEPTH_DEC(ctx->sv, 2); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_eq); + DEPTH_DEC(2); + DEPTH_PUSH(); + EMIT_FUNC(op_eq); break; case OP_BINOP_NE: - DEPTH_DEC(ctx->sv, 2); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_ne); + DEPTH_DEC(2); + DEPTH_PUSH(); + EMIT_FUNC(op_ne); break; case OP_BINOP_AND: - DEPTH_DEC(ctx->sv, 2); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_and); + DEPTH_DEC(2); + DEPTH_PUSH(); + EMIT_FUNC(op_and); break; case OP_BINOP_OR: - DEPTH_DEC(ctx->sv, 2); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_or); + DEPTH_DEC(2); + DEPTH_PUSH(); + EMIT_FUNC(op_or); break; case OP_JMP: - EMIT_FUNC(ctx, op_jmp); + EMIT_FUNC(op_jmp); if (!handle_jump(ctx, meta, current_bc_off)) { goto cleanup; } @@ -568,52 +568,52 @@ static bool decode_internal(decode_ctx *ctx) { break; case OP_CJMP_Z: - DEPTH_POP(ctx->sv); - EMIT_FUNC(ctx, op_cjmp_z); + DEPTH_POP(); + EMIT_FUNC(op_cjmp_z); if (!handle_jump(ctx, meta, current_bc_off)) { goto cleanup; } break; case OP_CJMP_NZ: - DEPTH_POP(ctx->sv); - EMIT_FUNC(ctx, op_cjmp_nz); + DEPTH_POP(); + EMIT_FUNC(op_cjmp_nz); if (!handle_jump(ctx, meta, current_bc_off)) { goto cleanup; } break; case OP_DROP: - DEPTH_POP(ctx->sv); - EMIT_FUNC(ctx, op_drop); + DEPTH_POP(); + EMIT_FUNC(op_drop); break; case OP_DUP: - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_dup); + DEPTH_PUSH(); + EMIT_FUNC(op_dup); break; case OP_SWAP: - DEPTH_DEC(ctx->sv, 2); - DEPTH_INC(ctx->sv, 2); - EMIT_FUNC(ctx, op_swap); + DEPTH_DEC(2); + DEPTH_INC(2); + EMIT_FUNC(op_swap); break; case OP_ELEM: - DEPTH_DEC(ctx->sv, 2); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_elem); + DEPTH_DEC(2); + DEPTH_PUSH(); + EMIT_FUNC(op_elem); break; case OP_STA: // TODO: - DEPTH_DEC(ctx->sv, 3); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_sta); + DEPTH_DEC(3); + DEPTH_PUSH(); + EMIT_FUNC(op_sta); break; case OP_LD: { - DEPTH_PUSH(ctx->sv); + DEPTH_PUSH(); int32_t idx = reader_i32(&ctx->reader); VM_DEBUG("DECODE: OP_LD global idx=%d\n", idx); emit_glo(ctx, idx, global_base, op_ld_glo); @@ -628,46 +628,46 @@ static bool decode_internal(decode_ctx *ctx) { } case OP_LD_LOC: { - DEPTH_PUSH(ctx->sv); + DEPTH_PUSH(); int32_t idx = reader_i32(&ctx->reader); CHECK_IDX(idx, ctx->func.n_locals, "LD_LOC"); - EMIT_FUNC(ctx, op_ld_loc); - EMIT_NUM(ctx, idx); + EMIT_FUNC(op_ld_loc); + EMIT_NUM(idx); break; } case OP_ST_LOC: { int32_t idx = reader_i32(&ctx->reader); CHECK_IDX(idx, ctx->func.n_locals, "ST_LOC"); - EMIT_FUNC(ctx, op_st_loc); - EMIT_NUM(ctx, idx); + EMIT_FUNC(op_st_loc); + EMIT_NUM(idx); break; } case OP_LD_ARG: { - DEPTH_PUSH(ctx->sv); + DEPTH_PUSH(); int32_t idx = reader_i32(&ctx->reader); CHECK_IDX(idx, ctx->func.n_args, "LD_ARG"); - EMIT_FUNC(ctx, op_ld_arg); - EMIT_NUM(ctx, idx); + EMIT_FUNC(op_ld_arg); + EMIT_NUM(idx); break; } case OP_ST_ARG: { int32_t idx = reader_i32(&ctx->reader); CHECK_IDX(idx, ctx->func.n_args, "ST_ARG"); - EMIT_FUNC(ctx, op_st_arg); - EMIT_NUM(ctx, idx); + EMIT_FUNC(op_st_arg); + EMIT_NUM(idx); break; } case OP_LD_CLO: { - DEPTH_PUSH(ctx->sv); + DEPTH_PUSH(); int32_t idx = reader_i32(&ctx->reader); if (ctx->func.n_captured != -1) CHECK_IDX(idx, ctx->func.n_captured, "LD_CLO"); - EMIT_FUNC(ctx, op_ld_clo); - EMIT_NUM(ctx, idx); + EMIT_FUNC(op_ld_clo); + EMIT_NUM(idx); break; } @@ -675,56 +675,56 @@ static bool decode_internal(decode_ctx *ctx) { int32_t idx = reader_i32(&ctx->reader); if (ctx->func.n_captured != -1) CHECK_IDX(idx, ctx->func.n_captured, "ST_CLO"); - EMIT_FUNC(ctx, op_st_clo); - EMIT_NUM(ctx, idx); + EMIT_FUNC(op_st_clo); + EMIT_NUM(idx); break; } case OP_STRING: { - DEPTH_PUSH(ctx->sv); + DEPTH_PUSH(); int32_t str_idx = reader_i32(&ctx->reader); - EMIT_FUNC(ctx, op_string); - EMIT_STR(ctx, bytecode_get_string(bc, str_idx)); + EMIT_FUNC(op_string); + EMIT_STR(bytecode_get_string(bc, str_idx)); break; } case OP_BARRAY: { int32_t n = reader_i32(&ctx->reader); - DEPTH_DEC(ctx->sv, n); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_barray); - EMIT_NUM(ctx, n); + DEPTH_DEC(n); + DEPTH_PUSH(); + EMIT_FUNC(op_barray); + EMIT_NUM(n); break; } case OP_SEXP: { int32_t tag_idx = reader_i32(&ctx->reader); int32_t n_fields = reader_i32(&ctx->reader); - DEPTH_DEC(ctx->sv, n_fields); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_sexp); - EMIT_ANUM(ctx, LtagHash((char *)bytecode_get_string(bc, tag_idx))); - EMIT_NUM(ctx, n_fields); + DEPTH_DEC(n_fields); + DEPTH_PUSH(); + EMIT_FUNC(op_sexp); + EMIT_ANUM(LtagHash((char *)bytecode_get_string(bc, tag_idx))); + EMIT_NUM(n_fields); break; } case OP_TAG: { - DEPTH_POP(ctx->sv); - DEPTH_PUSH(ctx->sv); + DEPTH_POP(); + DEPTH_PUSH(); int32_t tag_idx = reader_i32(&ctx->reader); int32_t n_fields = reader_i32(&ctx->reader); - EMIT_FUNC(ctx, op_tag); - EMIT_ANUM(ctx, LtagHash((char *)bytecode_get_string(bc, tag_idx))); - EMIT_NUM(ctx, n_fields); + EMIT_FUNC(op_tag); + EMIT_ANUM(LtagHash((char *)bytecode_get_string(bc, tag_idx))); + EMIT_NUM(n_fields); break; } case OP_ARRAY: { - DEPTH_POP(ctx->sv); - DEPTH_PUSH(ctx->sv); + DEPTH_POP(); + DEPTH_PUSH(); int32_t n = reader_i32(&ctx->reader); - EMIT_FUNC(ctx, op_array); - EMIT_NUM(ctx, n); + EMIT_FUNC(op_array); + EMIT_NUM(n); break; } @@ -734,54 +734,54 @@ static bool decode_internal(decode_ctx *ctx) { int32_t line = reader_i32(&ctx->reader); int32_t col = reader_i32(&ctx->reader); if (!keep_value) { - DEPTH_POP(ctx->sv); + DEPTH_POP(); } - EMIT_FUNC(ctx, op_fail); - EMIT_NUM(ctx, line); - EMIT_NUM(ctx, col); + EMIT_FUNC(op_fail); + EMIT_NUM(line); + EMIT_NUM(col); break; } case OP_PATT_STR_CMP: - DEPTH_DEC(ctx->sv, 2); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_patt_str_cmp); + DEPTH_DEC(2); + DEPTH_PUSH(); + EMIT_FUNC(op_patt_str_cmp); break; case OP_PATT_STRING: - DEPTH_POP(ctx->sv); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_patt_string); + DEPTH_POP(); + DEPTH_PUSH(); + EMIT_FUNC(op_patt_string); break; case OP_PATT_ARRAY: - DEPTH_POP(ctx->sv); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_patt_array); + DEPTH_POP(); + DEPTH_PUSH(); + EMIT_FUNC(op_patt_array); break; case OP_PATT_SEXP: - DEPTH_POP(ctx->sv); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_patt_sexp); + DEPTH_POP(); + DEPTH_PUSH(); + EMIT_FUNC(op_patt_sexp); break; case OP_PATT_BOXED: - DEPTH_POP(ctx->sv); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_patt_boxed); + DEPTH_POP(); + DEPTH_PUSH(); + EMIT_FUNC(op_patt_boxed); break; case OP_PATT_UNBOXED: - DEPTH_POP(ctx->sv); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_patt_unboxed); + DEPTH_POP(); + DEPTH_PUSH(); + EMIT_FUNC(op_patt_unboxed); break; case OP_PATT_CLOSURE: - DEPTH_POP(ctx->sv); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_patt_closure); + DEPTH_POP(); + DEPTH_PUSH(); + EMIT_FUNC(op_patt_closure); break; case OP_BEGIN: @@ -798,11 +798,11 @@ static bool decode_internal(decode_ctx *ctx) { ? find_n_captured(ctx, current_bc_off) : 0}; - EMIT_FUNC(ctx, op_begin); - EMIT_NUM(ctx, n_args); - EMIT_NUM(ctx, n_locals); + EMIT_FUNC(op_begin); + EMIT_NUM(n_args); + EMIT_NUM(n_locals); ctx->sv.max_depth_pos = ctx->code.len; - EMIT_NUM(ctx, 0); // placeholder for max depth, will be patched + EMIT_NUM(0); // placeholder for max depth, will be patched break; } @@ -822,27 +822,27 @@ static bool decode_internal(decode_ctx *ctx) { int designation_type = type_byte & 0xF; switch (designation_type) { case 0: // Global - DEPTH_PUSH(ctx->sv); + DEPTH_PUSH(); emit_glo(ctx, idx, global_base, op_ld_glo); break; case 1: // Local CHECK_IDX(idx, ctx->func.n_locals, "CLOSURE desig local"); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_ld_loc); - EMIT_NUM(ctx, idx); + DEPTH_PUSH(); + EMIT_FUNC(op_ld_loc); + EMIT_NUM(idx); break; case 2: // Arg CHECK_IDX(idx, ctx->func.n_args, "CLOSURE desig arg"); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_ld_arg); - EMIT_NUM(ctx, idx); + DEPTH_PUSH(); + EMIT_FUNC(op_ld_arg); + EMIT_NUM(idx); break; case 3: // Closure var if (ctx->func.n_captured != -1) CHECK_IDX(idx, ctx->func.n_captured, "CLOSURE desig closure"); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_ld_clo); - EMIT_NUM(ctx, idx); + DEPTH_PUSH(); + EMIT_FUNC(op_ld_clo); + EMIT_NUM(idx); break; default: fprintf(stderr, "Unknown designation type: %d\n", designation_type); @@ -850,13 +850,13 @@ static bool decode_internal(decode_ctx *ctx) { } } - DEPTH_DEC(ctx->sv, n_captured); - DEPTH_PUSH(ctx->sv); + DEPTH_DEC(n_captured); + DEPTH_PUSH(); - EMIT_FUNC(ctx, op_closure); + EMIT_FUNC(op_closure); if (!emit_target(ctx, meta, target_off, current_bc_off, "CLOSURE")) goto cleanup; - EMIT_NUM(ctx, n_captured); + EMIT_NUM(n_captured); if (!IS_EXT_REF(target_off)) da_append(ctx->closures, ((closure_info){.bc_off = target_off, @@ -867,26 +867,26 @@ static bool decode_internal(decode_ctx *ctx) { case OP_CALL: { int32_t target_off = reader_i32(&ctx->reader); int32_t n_args = reader_i32(&ctx->reader); - DEPTH_DEC(ctx->sv, n_args); - DEPTH_PUSH(ctx->sv); + DEPTH_DEC(n_args); + DEPTH_PUSH(); VM_DEBUG("DECODE: OP_CALL target_off=0x%x n_args=%d " "current_bc_off=%zu code_idx=%zu\n", target_off, n_args, current_bc_off, ctx->code.len); - EMIT_FUNC(ctx, op_call); + EMIT_FUNC(op_call); if (!emit_target(ctx, meta, target_off, current_bc_off, "CALL")) goto cleanup; - EMIT_NUM(ctx, n_args); + EMIT_NUM(n_args); break; } case OP_CALLC: { int32_t n_args = reader_i32(&ctx->reader); - DEPTH_DEC(ctx->sv, n_args + 1); - DEPTH_PUSH(ctx->sv); - EMIT_FUNC(ctx, op_callc); - EMIT_NUM(ctx, n_args); + DEPTH_DEC(n_args + 1); + DEPTH_PUSH(); + EMIT_FUNC(op_callc); + EMIT_NUM(n_args); break; } @@ -897,7 +897,7 @@ static bool decode_internal(decode_ctx *ctx) { ctx->sv.depth, current_bc_off); goto cleanup; } - EMIT_FUNC(ctx, op_end); + EMIT_FUNC(op_end); ctx->code.data[ctx->sv.max_depth_pos].num = ctx->sv.max_depth; ctx->sv.state = BARRIER; break; @@ -905,8 +905,8 @@ static bool decode_internal(decode_ctx *ctx) { case OP_LINE: { #ifdef DEBUG_PRINT int32_t line = reader_i32(&ctx->reader); - EMIT_FUNC(ctx, op_line); - EMIT_NUM(ctx, line); + EMIT_FUNC(op_line); + EMIT_NUM(line); #else reader_skip(&ctx->reader, 4); #endif From b5d9a50b4cb6e85e9a8074e6a9c4291ed27e30ca Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 8 Apr 2026 00:50:24 +0300 Subject: [PATCH 096/141] refactor emitting global --- virtual_machine/converter.c | 91 ++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 46 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 1b3f1f40a..e306db028 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -197,33 +197,6 @@ static int32_t find_n_captured(decode_ctx *ctx, int32_t bc_off) { return -1; } -/* - * Resolve an external C global -- prefix with "global_", dlsym, cache. - */ -static void *resolve_ext_global(ext_global_cache *cache, const char *name) { - for (size_t i = 0; i < cache->entries.len; i++) { - if (strcmp(cache->entries.data[i].name, name) == 0) { - return cache->entries.data[i].ptr; - } - } - - size_t nlen = strlen(name); - char prefixed[sizeof(GLOBAL_PREFIX) + nlen]; - memcpy(prefixed, GLOBAL_PREFIX, sizeof(GLOBAL_PREFIX) - 1); - memcpy(prefixed + sizeof(GLOBAL_PREFIX) - 1, name, nlen + 1); - - void *ptr = dlsym(RTLD_DEFAULT, prefixed); - if (!ptr) { - fprintf(stderr, "Error: unresolved global '%s' (tried '%s')\n", name, - prefixed); - return NULL; - } - - ext_global_entry entry = {.name = name, .ptr = ptr}; - da_append(cache->entries, entry); - return ptr; -} - static void free_decoded_arr(decoded *arr, size_t n) { for (size_t i = 0; i < n; i++) { free(arr[i].code); @@ -259,33 +232,59 @@ static bool validate_target_off(const bytecode *bc, int32_t target_off, return true; } -static bool emit_ext_glo(decode_ctx *ctx, const char *glob_name, fn op) { - resolved_symbol *sym = symbol_table_find_global(ctx->st, glob_name); - if (sym) { - // Global from another unit - EMIT_FUNC(op); - EMIT_GLOBAL_PTR(&ctx->globals[sym->idx]); - return true; +/* + * Resolve an external C global -- prefix with "global_", dlsym, cache. + */ +static void *resolve_ext_global_ptr(ext_global_cache *cache, const char *name) { + for (size_t i = 0; i < cache->entries.len; i++) { + if (strcmp(cache->entries.data[i].name, name) == 0) { + return cache->entries.data[i].ptr; + } } - // C global - void *ptr = resolve_ext_global(ctx->ext_globals, glob_name); + + size_t nlen = strlen(name); + char prefixed[sizeof(GLOBAL_PREFIX) + nlen]; + memcpy(prefixed, GLOBAL_PREFIX, sizeof(GLOBAL_PREFIX) - 1); + memcpy(prefixed + sizeof(GLOBAL_PREFIX) - 1, name, nlen + 1); + + void *ptr = dlsym(RTLD_DEFAULT, prefixed); if (!ptr) { - return false; + fprintf(stderr, "Error: unresolved global '%s' (tried '%s')\n", name, + prefixed); + return NULL; } - EMIT_FUNC(op); - EMIT_GLOBAL_PTR((aint *)ptr); - return true; + + ext_global_entry entry = {.name = name, .ptr = ptr}; + da_append(cache->entries, entry); + return ptr; +} + +static aint *resolve_global_ptr(decode_ctx *ctx, int32_t idx, + size_t global_base) { + if (!IS_EXT_REF(idx)) { + return &ctx->globals[global_base + idx]; + } + + int str_offset = EXT_REF_INDEX(idx); + const char *glob_name = bytecode_get_string(ctx->bc, str_offset); + VM_DEBUG("DECODE: external global '%s'\n", glob_name); + + resolved_symbol *sym = symbol_table_find_global(ctx->st, glob_name); + if (sym) { + return &ctx->globals[sym->idx]; + } + + return (aint *)resolve_ext_global_ptr(ctx->ext_globals, glob_name); } static bool emit_glo(decode_ctx *ctx, int32_t idx, size_t global_base, fn op) { - if (IS_EXT_REF(idx)) { - int str_offset = EXT_REF_INDEX(idx); - const char *glob_name = bytecode_get_string(ctx->bc, str_offset); - VM_DEBUG("DECODE: external global '%s'\n", glob_name); - return emit_ext_glo(ctx, glob_name, op); + aint *ptr = resolve_global_ptr(ctx, idx, global_base); + if (!ptr) { + return false; } + EMIT_FUNC(op); - EMIT_GLOBAL_PTR(&ctx->globals[global_base + idx]); + EMIT_GLOBAL_PTR(ptr); return true; } From fc134c35a1935654556cede003ea82862c5fdb1e Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 8 Apr 2026 00:55:08 +0300 Subject: [PATCH 097/141] `op_ld` and `op_st` to `op_ld_glo` and `op_st_glo` --- virtual_machine/converter.c | 8 ++++---- virtual_machine/opcodes.c | 8 ++++---- virtual_machine/opcodes.h | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index e306db028..076dd3864 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -611,17 +611,17 @@ static bool decode_internal(decode_ctx *ctx) { EMIT_FUNC(op_sta); break; - case OP_LD: { + case OP_LD_GLO: { DEPTH_PUSH(); int32_t idx = reader_i32(&ctx->reader); - VM_DEBUG("DECODE: OP_LD global idx=%d\n", idx); + VM_DEBUG("DECODE: OP_LD_GLO idx=%d\n", idx); emit_glo(ctx, idx, global_base, op_ld_glo); break; } - case OP_ST: { + case OP_ST_GLO: { int32_t idx = reader_i32(&ctx->reader); - VM_DEBUG("DECODE: OP_ST global idx=%d\n", idx); + VM_DEBUG("DECODE: OP_ST_GLO idx=%d\n", idx); emit_glo(ctx, idx, global_base, op_st_glo); break; } diff --git a/virtual_machine/opcodes.c b/virtual_machine/opcodes.c index 04f8af7cd..8f19b34a9 100644 --- a/virtual_machine/opcodes.c +++ b/virtual_machine/opcodes.c @@ -50,16 +50,16 @@ const char *opcode_to_string(uint8_t opcode) { return "SWAP"; case OP_ELEM: return "ELEM"; - case OP_LD: - return "LD"; + case OP_LD_GLO: + return "LD.GLO"; case OP_LD_LOC: return "LD.LOC"; case OP_LD_ARG: return "LD.ARG"; case OP_LD_CLO: return "LD.CLO"; - case OP_ST: - return "ST"; + case OP_ST_GLO: + return "ST.GLO"; case OP_ST_LOC: return "ST.LOC"; case OP_ST_ARG: diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h index 821bca99c..56a738779 100755 --- a/virtual_machine/opcodes.h +++ b/virtual_machine/opcodes.h @@ -27,11 +27,11 @@ typedef enum { OP_DUP = 0x19, OP_SWAP = 0x1A, OP_ELEM = 0x1B, - OP_LD = 0x20, + OP_LD_GLO = 0x20, OP_LD_LOC = 0x21, OP_LD_ARG = 0x22, OP_LD_CLO = 0x23, - OP_ST = 0x40, + OP_ST_GLO = 0x40, OP_ST_LOC = 0x41, OP_ST_ARG = 0x42, OP_ST_CLO = 0x43, From 3a77e64bb6fe68bd42d1abfce4266a83a01785a6 Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 8 Apr 2026 04:31:20 +0300 Subject: [PATCH 098/141] some validation --- virtual_machine/converter.c | 205 +++++++++++++++++++++++++----------- virtual_machine/ops.c | 49 +++++---- virtual_machine/ops.h | 1 + 3 files changed, 176 insertions(+), 79 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 076dd3864..b1db9cbad 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -55,6 +55,12 @@ typedef enum { FFI, // FFI call } reloc_kind; +typedef enum { + TARGET_JUMP, // must not land on function entry or EOF + TARGET_CALL, // must land on OP_BEGIN + TARGET_CLOSURE, // must land on OP_BEGIN or OP_BEGIN_CLOSURE +} target_kind; + typedef struct { size_t patch_idx; const char *name; @@ -62,24 +68,21 @@ typedef struct { } reloc; typedef struct fixup_node { - size_t insn_idx; // Index in code array that needs the jump target struct fixup_node *next; + size_t insn_idx; // Index in code array that needs the jump target + size_t origin_bc_off; // Source bytecode offset } fixup_node; // Metadata for each bytecode offset typedef struct { insn *insn; // NULL if not visited + fixup_node *fixups; // Linked list of forward jumps pointing here int32_t resolved_idx; // Index in generated code array (-1 if not visited) int32_t stack_depth; // Expected stack depth (-1 if not visited yet) - fixup_node *fixups; // Linked list of forward jumps pointing here + int32_t n_captured; // n_captured for CLOSURE targets (-1 if not a target) + int32_t func_idx; // Current function entry offset (-1 outside any function) } meta_info; -// Maps CLOSURE target bytecode offsets to their n_captured -typedef struct { - int32_t bc_off; - int32_t n_captured; -} closure_info; - typedef struct { insn *code; size_t code_len; @@ -152,13 +155,8 @@ typedef struct { stack_validation sv; - struct { - closure_info *data; - size_t len; - size_t cap; - } closures; // CLOSURE target_off -> n_captured mapping - func_ctx func; + int32_t func_idx; // Current function entry offset (-1 outside any function) } decode_ctx; static void decode_ctx_init(decode_ctx *ctx, const bytecode *bc, @@ -173,30 +171,18 @@ static void decode_ctx_init(decode_ctx *ctx, const bytecode *bc, da_init(ctx->code); da_init(ctx->relocs); - da_init(ctx->closures); ctx->st = st; ctx->ffi = ffi; ctx->ext_globals = ext_globals; ctx->sv = (stack_validation){0}; - ctx->func = (func_ctx){0}; + ctx->func = (func_ctx){.n_captured = -1}; + ctx->func_idx = -1; reader_init(&ctx->reader, bc->code, bc->code_size); } -/* - * Returns n_captured for a CLOSURE target, or -1 if no CLOSURE targets this - * offset - */ -static int32_t find_n_captured(decode_ctx *ctx, int32_t bc_off) { - for (size_t i = 0; i < ctx->closures.len; i++) { - if (ctx->closures.data[i].bc_off == bc_off) - return ctx->closures.data[i].n_captured; - } - return -1; -} - static void free_decoded_arr(decoded *arr, size_t n) { for (size_t i = 0; i < n; i++) { free(arr[i].code); @@ -212,21 +198,47 @@ static void add_reloc(decode_ctx *ctx, size_t patch_idx, const char *name, } static fixup_node *add_fixup(meta_info *meta, size_t target_off, - size_t insn_idx) { + size_t insn_idx, size_t origin_bc_off) { fixup_node *node = ALLOC(fixup_node); node->insn_idx = insn_idx; + node->origin_bc_off = origin_bc_off; node->next = meta[target_off].fixups; meta[target_off].fixups = node; return node; } +static inline bool opcode_is_func_entry(uint8_t opcode) { + return opcode == OP_BEGIN || opcode == OP_BEGIN_CLOSURE; +} + +/* + * Validate that an internal target is valid: in range, and has a correct + * opcode. + */ static bool validate_target_off(const bytecode *bc, int32_t target_off, - size_t current_bc_off, const char *op_name) { - if (target_off >= (int32_t)bc->code_size) { - fprintf( - stderr, - "Error: %s target_off=%d out of range (bc_off=%zu, code_size=%zu)\n", - op_name, target_off, current_bc_off, bc->code_size); + size_t current_bc_off, target_kind kind) { + if (target_off < 0 || target_off >= (int32_t)bc->code_size) { + fprintf(stderr, "Error: target_off=%d out of range at bc_off=%zu\n", + target_off, current_bc_off); + return false; + } + + uint8_t got = bc->code[target_off]; + bool bad; + switch (kind) { + case TARGET_JUMP: + bad = opcode_is_func_entry(got) || got == OP_EOF; + break; + case TARGET_CALL: + bad = got != OP_BEGIN; + break; + case TARGET_CLOSURE: + bad = !opcode_is_func_entry(got); + break; + } + if (bad) { + fprintf(stderr, "Error: bad target %s at bc_off=%zu, target=%d\n", + opcode_to_string(got), current_bc_off, target_off); return false; } return true; @@ -292,14 +304,15 @@ static bool emit_glo(decode_ctx *ctx, int32_t idx, size_t global_base, fn op) { * Emit target slot for CALL/CLOSURE, handles external and internal targets. */ static bool emit_target(decode_ctx *ctx, meta_info *meta, int32_t target_off, - size_t current_bc_off, const char *opname) { + size_t current_bc_off, target_kind kind) { const bytecode *bc = ctx->bc; size_t target_slot = ctx->code.len; if (IS_EXT_REF(target_off)) { int str_offset = EXT_REF_INDEX(target_off); const char *name = bytecode_get_string(bc, str_offset); - VM_DEBUG("DECODE: %s external '%s'\n", opname, name); + VM_DEBUG("DECODE: %s external target '%s' at bc_off=%zu\n", + opcode_to_string(bc->code[current_bc_off]), name, current_bc_off); resolved_symbol *sym = symbol_table_find_function(ctx->st, name); if (sym) { @@ -312,7 +325,7 @@ static bool emit_target(decode_ctx *ctx, meta_info *meta, int32_t target_off, EMIT_NUM(idx); // placeholder, will be resolved to FFI call } } else { - if (!validate_target_off(bc, target_off, current_bc_off, opname)) + if (!validate_target_off(bc, target_off, current_bc_off, kind)) return false; EMIT_NUM(0); // placeholder — will hold code index @@ -323,7 +336,7 @@ static bool emit_target(decode_ctx *ctx, meta_info *meta, int32_t target_off, ctx->code.data[target_slot].num = tm->resolved_idx; add_reloc(ctx, target_slot, NULL, INTERNAL); } else { - add_fixup(meta, target_off, target_slot); + add_fixup(meta, target_off, target_slot, current_bc_off); } } return true; @@ -337,7 +350,7 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, int32_t target_off = reader_i32(&ctx->reader); int32_t depth = ctx->sv.depth; - if (!validate_target_off(ctx->bc, target_off, current_bc_off, "JUMP")) { + if (!validate_target_off(ctx->bc, target_off, current_bc_off, TARGET_JUMP)) { return false; } @@ -347,6 +360,13 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, meta_info *tm = &meta[target_off]; if (target_off < (int32_t)current_bc_off) { // Backward jump — target was already visited by sequential decode + if (tm->func_idx != ctx->func_idx) { + fprintf( + stderr, + "Error: backward jump escapes function at bc_off=%zu, target=%d\n", + current_bc_off, target_off); + return false; + } assert(tm->resolved_idx != -1 && "backward jump target must have been visited"); ctx->code.data[my_idx].num = tm->resolved_idx; @@ -365,9 +385,7 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, } } else { // Forward jump — add fixup - if (!add_fixup(meta, target_off, my_idx)) { - return false; - } + add_fixup(meta, target_off, my_idx, current_bc_off); VM_DEBUG(" JUMP: forward to bc_off=%d, (depth=%d, target_depth=%d)\n", target_off, depth, tm->stack_depth); if (tm->stack_depth == -1) { @@ -415,6 +433,8 @@ static bool decode_internal(decode_ctx *ctx) { for (size_t i = 0; i < bc->code_size; i++) { meta[i].resolved_idx = -1; meta[i].stack_depth = -1; + meta[i].n_captured = -1; + meta[i].func_idx = -1; meta[i].fixups = NULL; } @@ -431,8 +451,19 @@ static bool decode_internal(decode_ctx *ctx) { opcode_to_string(opcode), opcode, ctx->sv.depth, ctx->sv.state == BARRIER ? " [barrier]" : ""); + // Validate no nested function + if (opcode_is_func_entry(opcode)) { + if (ctx->func_idx != -1) { + fprintf(stderr, "Error: nested function at bc_off=%zu\n", + current_bc_off); + goto cleanup; + } + ctx->func_idx = (int32_t)current_bc_off; + } + meta_info *m = &meta[current_bc_off]; m->resolved_idx = (int32_t)ctx->code.len; + m->func_idx = ctx->func_idx; // Validate stack depth at intersections if (ctx->sv.state == BARRIER) { @@ -461,6 +492,20 @@ static bool decode_internal(decode_ctx *ctx) { // relocation fixup_node *f = m->fixups; while (f) { + // Validate jumps + if (meta[f->origin_bc_off].func_idx != m->func_idx) { + uint8_t origin_opcode = bc->code[f->origin_bc_off]; + bool is_jump = origin_opcode == OP_JMP || origin_opcode == OP_CJMP_Z || + origin_opcode == OP_CJMP_NZ; + if (is_jump) { + fprintf(stderr, + "Error: forward jump escapes function at bc_off=%zu -> " + "target=%zu\n", + f->origin_bc_off, current_bc_off); + goto cleanup; + } + } + VM_DEBUG("DECODE: Resolving fixup at bc_off=%zu: insn_idx=%zu -> " "code_idx=%zu\n", current_bc_off, f->insn_idx, ctx->code.len); @@ -473,6 +518,14 @@ static bool decode_internal(decode_ctx *ctx) { } m->fixups = NULL; + // Validate no instructions outside function bodies (except EOF) + if (ctx->func_idx == -1 && opcode != OP_EOF) { + fprintf(stderr, + "Error: instruction %s outside function body at bc_off=%zu\n", + opcode_to_string(opcode), current_bc_off); + goto cleanup; + } + switch (opcode) { case OP_CONST: DEPTH_PUSH(); @@ -790,14 +843,13 @@ static bool decode_internal(decode_ctx *ctx) { ctx->sv.depth = 0; ctx->sv.max_depth = 0; - ctx->func = - (func_ctx){.n_args = n_args, - .n_locals = n_locals, - .n_captured = (opcode == OP_BEGIN_CLOSURE) - ? find_n_captured(ctx, current_bc_off) - : 0}; + ctx->func = (func_ctx){.n_args = n_args, + .n_locals = n_locals, + .n_captured = (opcode == OP_BEGIN_CLOSURE) + ? meta[current_bc_off].n_captured + : 0}; - EMIT_FUNC(op_begin); + EMIT_FUNC(opcode == OP_BEGIN_CLOSURE ? op_begin_closure : op_begin); EMIT_NUM(n_args); EMIT_NUM(n_locals); ctx->sv.max_depth_pos = ctx->code.len; @@ -853,13 +905,22 @@ static bool decode_internal(decode_ctx *ctx) { DEPTH_PUSH(); EMIT_FUNC(op_closure); - if (!emit_target(ctx, meta, target_off, current_bc_off, "CLOSURE")) + if (!emit_target(ctx, meta, target_off, current_bc_off, TARGET_CLOSURE)) goto cleanup; EMIT_NUM(n_captured); - if (!IS_EXT_REF(target_off)) - da_append(ctx->closures, ((closure_info){.bc_off = target_off, - .n_captured = n_captured})); + // Validate CLOSURE target's n_captured consistency + if (!IS_EXT_REF(target_off)) { + if (meta[target_off].n_captured != -1 && + meta[target_off].n_captured != n_captured) { + fprintf(stderr, + "Error: mismatched CLOSURE arity at target=%d " + "(expected %d, got %d)\n", + target_off, meta[target_off].n_captured, n_captured); + goto cleanup; + } + meta[target_off].n_captured = n_captured; + } break; } @@ -874,7 +935,7 @@ static bool decode_internal(decode_ctx *ctx) { target_off, n_args, current_bc_off, ctx->code.len); EMIT_FUNC(op_call); - if (!emit_target(ctx, meta, target_off, current_bc_off, "CALL")) + if (!emit_target(ctx, meta, target_off, current_bc_off, TARGET_CALL)) goto cleanup; EMIT_NUM(n_args); break; @@ -899,6 +960,8 @@ static bool decode_internal(decode_ctx *ctx) { EMIT_FUNC(op_end); ctx->code.data[ctx->sv.max_depth_pos].num = ctx->sv.max_depth; ctx->sv.state = BARRIER; + ctx->func = (func_ctx){.n_captured = -1}; + ctx->func_idx = -1; break; case OP_LINE: { @@ -913,6 +976,11 @@ static bool decode_internal(decode_ctx *ctx) { } case OP_EOF: + if (ctx->func_idx != -1) { + fprintf(stderr, "Error: EOF inside function body at bc_off=%zu\n", + current_bc_off); + goto cleanup; + } if (current_bc_off + 1 != bc->code_size) { fprintf(stderr, "Error: EOF opcode before end of bytecode at bc_off=%zu\n", @@ -950,7 +1018,6 @@ static bool decode_internal(decode_ctx *ctx) { } } free(meta); - da_free(ctx->closures); return ok; } @@ -1003,7 +1070,7 @@ static bool register_public_symbols(symbol_table *st, const bytecode *bc, * Resolve relocs / placeholders in the final code array after all units are * decoded and merged. */ -static void resolve_relocs(insn *all_code, decoded *dec, size_t code_offset, +static bool resolve_relocs(insn *all_code, decoded *dec, size_t code_offset, size_t ffi_call_offset) { for (size_t j = 0; j < dec->relocs_len; j++) { reloc rel = dec->relocs[j]; @@ -1015,7 +1082,22 @@ static void resolve_relocs(insn *all_code, decoded *dec, size_t code_offset, break; } case UNIT: { - all_code[slot].target = &all_code[target_idx]; + // Validate inter-unit CALL/CLOSURE targets + insn *target = &all_code[target_idx]; + fn caller = all_code[slot - 1].func; + assert(caller == op_call || caller == op_closure); + const char *caller_name = caller == op_call ? "CALL" : "CLOSURE"; + const char *target_name = + caller == op_call ? "BEGIN" : "BEGIN/BEGIN_CLOSURE"; + bool ok = caller == op_call ? target->func == op_begin + : target->func == op_begin || + target->func == op_begin_closure; + if (!ok) { + fprintf(stderr, "Error: inter-unit %s to non-%s function '%s'\n", + caller_name, target_name, rel.name); + return false; + } + all_code[slot].target = target; break; } case FFI: { @@ -1025,6 +1107,7 @@ static void resolve_relocs(insn *all_code, decoded *dec, size_t code_offset, } } } + return true; } static program *link_program(decoded *dec_arr, size_t n, size_t total_code_len, @@ -1044,7 +1127,11 @@ static program *link_program(decoded *dec_arr, size_t n, size_t total_code_len, // Move instructions into final code array memcpy(all_code + code_offset, dec->code, dec->code_len * sizeof(insn)); entry_points[i] = &all_code[code_offset]; - resolve_relocs(all_code, dec, code_offset, ffi_call_offset); + if (!resolve_relocs(all_code, dec, code_offset, ffi_call_offset)) { + free(all_code); + free(entry_points); + return NULL; + } all_code[code_offset + 1].target = &eof_ip; diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index aae1af2c0..bc0c03777 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -446,28 +446,37 @@ void op_st_clo(DECL_STATE) { /* * Function call operations */ -void op_begin(DECL_STATE) { - ip++; - int32_t n_args = ip->num; - (void)n_args; - ip++; - int32_t n_locals = ip->num; - ip++; - int32_t max_depth = ip->num; - - VM_DEBUG("BEGIN n_args=%d n_locals=%d max_depth=%d bp=%p sp=%p\n", n_args, - n_locals, max_depth, (void *)bp, (void *)sp); - - for (int32_t i = 0; i < n_locals; i++) { - STACK_PUSH(sp, 0); +#define DEFINE_BEGIN(name) \ + void name(DECL_STATE) { \ + ip++; \ + int32_t n_args = ip->num; \ + (void)n_args; \ + ip++; \ + int32_t n_locals = ip->num; \ + ip++; \ + int32_t max_depth = ip->num; \ + \ + VM_DEBUG("BEGIN n_args=%d n_locals=%d max_depth=%d bp=%p sp=%p\n", n_args, \ + n_locals, max_depth, (void *)bp, (void *)sp); \ + \ + for (int32_t i = 0; i < n_locals; i++) { \ + STACK_PUSH(sp, 0); \ + } \ + \ + aint *offset = sp - max_depth; \ + memset(offset + 1, 0, max_depth * sizeof(aint)); \ + __gc_stack_top = (size_t)offset; \ + \ + DISPATCH(); \ } - aint *offset = sp - max_depth; - memset(offset + 1, 0, max_depth * sizeof(aint)); - __gc_stack_top = (size_t)offset; - - DISPATCH(); -} +/* + * The distinction is made at the opcode level to allow for easier validation + * during decoding (and maybe for future things). + */ +DEFINE_BEGIN(op_begin) +DEFINE_BEGIN(op_begin_closure) +#undef DEFINE_BEGIN void op_call(DECL_STATE) { ip++; diff --git a/virtual_machine/ops.h b/virtual_machine/ops.h index 907a1b053..732321465 100644 --- a/virtual_machine/ops.h +++ b/virtual_machine/ops.h @@ -55,6 +55,7 @@ void op_ld_clo(DECL_STATE); void op_st_clo(DECL_STATE); void op_begin(DECL_STATE); +void op_begin_closure(DECL_STATE); void op_call(DECL_STATE); void op_callc(DECL_STATE); void op_end(DECL_STATE); From a55f9e98e161bbf448c96f29b97cec91160b7872 Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 8 Apr 2026 05:09:17 +0300 Subject: [PATCH 099/141] cleanup --- virtual_machine/converter.c | 13 ++++++------- virtual_machine/ops.c | 1 - 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index b1db9cbad..30a8c4316 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -15,7 +15,7 @@ #include #include -extern aint LtagHash(char *s); +extern aint LtagHash(const char *s); /* * Sentinel value for external references (both functions and globals). @@ -41,8 +41,8 @@ extern aint LtagHash(char *s); #define CHECK_IDX(idx, limit, name) \ do { \ if ((idx) < 0 || (idx) >= (limit)) { \ - fprintf(stderr, "%s: index %d >= %d at bc_off=%zu\n", name, (int)(idx), \ - (int)(limit), current_bc_off); \ + fprintf(stderr, "%s: index %d out of range [0, %d) at bc_off=%zu\n", \ + name, (int)(idx), (int)(limit), current_bc_off); \ goto cleanup; \ } \ } while (0) @@ -755,7 +755,7 @@ static bool decode_internal(decode_ctx *ctx) { DEPTH_DEC(n_fields); DEPTH_PUSH(); EMIT_FUNC(op_sexp); - EMIT_ANUM(LtagHash((char *)bytecode_get_string(bc, tag_idx))); + EMIT_ANUM(LtagHash(bytecode_get_string(bc, tag_idx))); EMIT_NUM(n_fields); break; } @@ -766,7 +766,7 @@ static bool decode_internal(decode_ctx *ctx) { int32_t tag_idx = reader_i32(&ctx->reader); int32_t n_fields = reader_i32(&ctx->reader); EMIT_FUNC(op_tag); - EMIT_ANUM(LtagHash((char *)bytecode_get_string(bc, tag_idx))); + EMIT_ANUM(LtagHash(bytecode_get_string(bc, tag_idx))); EMIT_NUM(n_fields); break; } @@ -782,10 +782,9 @@ static bool decode_internal(decode_ctx *ctx) { case OP_FAIL: case OP_FAIL_KEEP: { - bool keep_value = (opcode & 1) == 0; int32_t line = reader_i32(&ctx->reader); int32_t col = reader_i32(&ctx->reader); - if (!keep_value) { + if (opcode == OP_FAIL) { DEPTH_POP(); } EMIT_FUNC(op_fail); diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index bc0c03777..54e6f4e94 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -26,7 +26,6 @@ extern aint Ls__Infix_3361(void *p, void *q); // != extern aint Ls__Infix_3838(void *p, void *q); // && extern aint Ls__Infix_3333(void *p, void *q); // || -extern aint LtagHash(char *s); extern void *Barray(aint *args, aint bn); extern void *Bsexp(aint *args, aint bn); extern void *Bclosure(aint *args, aint bn); From e9c81d840e89202c2b23e92a8271d148055b8567 Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 8 Apr 2026 18:29:56 +0300 Subject: [PATCH 100/141] add `extern_funcs` --- src/SM.ml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/SM.ml b/src/SM.ml index f656101cd..114c0e69e 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -174,6 +174,7 @@ module ByteCode = struct let imports = Stdlib.ref [] in let globals = Hashtbl.create 16 in let extern_globals = Stdlib.ref S.empty in + let extern_funcs = Stdlib.ref S.empty in let fixups = Stdlib.ref [] in let func_fixups = Stdlib.ref [] in let add_lab l = Hashtbl.replace lmap l (Buffer.length code) in @@ -185,7 +186,9 @@ module ByteCode = struct i in let add_extern name is_fun = - if not is_fun then + if is_fun then + extern_funcs := S.add name !extern_funcs + else extern_globals := S.add name !extern_globals in let add_public name is_fun = @@ -377,9 +380,12 @@ module ByteCode = struct let resolved_addr = try Hashtbl.find lmap l with Not_found -> - (* External function: use negative string offset *) - let str_off = StringTab.add st l in - -(str_off + 1) + if S.mem l !extern_funcs then + (* External function: use negative string offset *) + let str_off = StringTab.add st l in + -(str_off + 1) + else + failwith (Printf.sprintf "ERROR: undefined function '%s'" l) in Bytes.set_int32_le code addr_ofs (Int32.of_int resolved_addr)) !func_fixups; From c63ebcbd657b7dce0b2e884f7077ad25556c6b5a Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 8 Apr 2026 19:39:50 +0300 Subject: [PATCH 101/141] detect cycles --- virtual_machine/loader.c | 60 +++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c index d655fa423..87559b082 100644 --- a/virtual_machine/loader.c +++ b/virtual_machine/loader.c @@ -22,6 +22,20 @@ typedef struct { size_t cap; } bytecode_array; +typedef struct { + const char **data; + size_t len; + size_t cap; +} name_array; + +static bool is_loading(const name_array *stack, const char *name) { + for (size_t i = 0; i < stack->len; i++) { + if (strcmp(stack->data[i], name) == 0) + return true; + } + return false; +} + static void free_loaded_units(bytecode_array *units) { for (size_t i = 0; i < units->len; i++) { bytecode_free(units->data[i]); @@ -86,10 +100,13 @@ static char *extract_unit_name(const char *filename) { /* * Load a single unit and its dependencies recursively. */ -static bool load_unit_recursive(bytecode_array *units, const char *unit_name, - bytecode *bc, const search_paths *paths) { +static bool load_unit_recursive(bytecode_array *units, name_array *loading, + const char *unit_name, bytecode *bc, + const search_paths *paths) { bc->name = ESTRDUP(unit_name); + da_append(*loading, unit_name); + // Recursively load dependencies first (topological order) const char *import_name; bytecode_iterator iter; @@ -101,6 +118,12 @@ static bool load_unit_recursive(bytecode_array *units, const char *unit_name, continue; } + if (is_loading(loading, import_name)) { + fprintf(stderr, "Circular dependency: '%s' -> '%s'\n", unit_name, + import_name); + goto fail; + } + if (find_loaded(units, import_name)) { continue; } @@ -108,45 +131,56 @@ static bool load_unit_recursive(bytecode_array *units, const char *unit_name, bytecode *dep_bc = load_unit_from_paths(import_name, paths); if (!dep_bc) { fprintf(stderr, "Failed to load dependency '%s'\n", import_name); - bytecode_free(bc); - return false; + goto fail; } - if (!load_unit_recursive(units, import_name, dep_bc, paths)) { - bytecode_free(bc); - return false; + if (!load_unit_recursive(units, loading, import_name, dep_bc, paths)) { + goto fail; } } + loading->len--; da_append(*units, bc); return true; + +fail: + loading->len--; + bytecode_free(bc); + return false; } load_result load(const char *main_unit_path, const search_paths *paths) { bytecode_array m; da_init(m); + name_array loading; + da_init(loading); + bool is_path = is_filepath(main_unit_path); bytecode *bc = is_path ? bytecode_load(main_unit_path) : load_unit_from_paths(main_unit_path, paths); if (!bc) { fprintf(stderr, "Failed to load unit '%s'\n", main_unit_path); - return (load_result){0}; + goto cleanup; } char *unit_name = is_path ? extract_unit_name(main_unit_path) : ESTRDUP(main_unit_path); - if (!load_unit_recursive(&m, unit_name, bc, paths)) { + if (!load_unit_recursive(&m, &loading, unit_name, bc, paths)) { free(unit_name); - free_loaded_units(&m); - return (load_result){0}; + goto cleanup; } free(unit_name); + da_free(loading); - load_result result = { + return (load_result){ .units = m.data, .units_len = m.len, }; - return result; + +cleanup: + da_free(loading); + free_loaded_units(&m); + return (load_result){0}; } From e31211d6d8fa31d0561af0dcdcc516a9093894e8 Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 8 Apr 2026 20:24:16 +0300 Subject: [PATCH 102/141] some validation concerns from `fuzzing` --- runtime/runtime.c | 2 +- virtual_machine/bytecode.c | 64 +++++++++++++++++++++++++++++-------- virtual_machine/bytecode.h | 1 + virtual_machine/converter.c | 6 ++++ virtual_machine/ops.c | 2 +- virtual_machine/vm.c | 4 ++- 6 files changed, 62 insertions(+), 17 deletions(-) diff --git a/runtime/runtime.c b/runtime/runtime.c index fc8b28468..ace5f2411 100644 --- a/runtime/runtime.c +++ b/runtime/runtime.c @@ -1292,7 +1292,7 @@ extern aint Lwrite (aint n) { printf("%" PRIdAI "\n", UNBOX(n)); fflush(stdout); - return 0; + return BOX(0); } extern aint Lrandom (aint n) { diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index c20b6da9e..de9fb70c6 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -16,43 +16,66 @@ #define IMPORT_ENTRY_SIZE 4 bytecode *bytecode_load_fd(int fd) { + bytecode *bc = NULL; + const uint8_t *data = MAP_FAILED; + size_t file_size = 0; struct stat st; if (fstat(fd, &st) < 0) { perror("bytecode_load: fstat"); - close(fd); - return NULL; + goto out; } - size_t file_size = (size_t)st.st_size; + file_size = (size_t)st.st_size; - const uint8_t *data = mmap(NULL, file_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (file_size == 0) { + fprintf(stderr, "bytecode_load: empty file\n"); + goto out; + } + data = mmap(NULL, file_size, PROT_READ, MAP_PRIVATE, fd, 0); if (data == MAP_FAILED) { perror("bytecode_load: mmap"); - close(fd); - return NULL; + goto out; } - close(fd); - byte_reader reader; reader_init(&reader, data, file_size); + if (file_size < HEADER_SIZE) { + fprintf(stderr, "bytecode_load: file too small for header (%zu bytes)\n", + file_size); + goto out; + } + int32_t string_table_size = reader_i32(&reader); int32_t globals_count = reader_i32(&reader); int32_t num_imports = reader_i32(&reader); int32_t num_pubs = reader_i32(&reader); + if (string_table_size < 0 || globals_count < 0 || num_imports < 0 || + num_pubs < 0) { + fprintf(stderr, "bytecode_load: negative header field\n"); + goto out; + } + size_t st_offset = HEADER_SIZE; size_t imports_offset = st_offset + (size_t)string_table_size; size_t pubs_offset = imports_offset + (size_t)num_imports * IMPORT_ENTRY_SIZE; size_t code_offset = pubs_offset + (size_t)num_pubs * PUB_ENTRY_SIZE; + + if (code_offset > file_size) { + fprintf(stderr, + "bytecode_load: sections exceed file size (code_offset=%zu, " + "file_size=%zu)\n", + code_offset, file_size); + goto out; + } + size_t code_size = file_size - code_offset; - bytecode *bc; if (data[code_offset + code_size - 1] != OP_EOF) { fprintf(stderr, "bytecode_load: bytecode must end with EOF opcode\n"); - goto err_unmap; + goto out; } const char *string_table = (const char *)data + st_offset; @@ -77,11 +100,12 @@ bytecode *bytecode_load_fd(int fd) { // will be set later bc->name = NULL; +out: + close(fd); + if (!bc && data != MAP_FAILED) { + munmap((void *)data, file_size); + } return bc; - -err_unmap: - munmap((void *)data, file_size); - return NULL; } bytecode *bytecode_load(const char *filename) { @@ -97,6 +121,7 @@ bytecode *bytecode_load(const char *filename) { void bytecode_pubs_init(bytecode_iterator *iter, const bytecode *bc) { reader_init(&iter->reader, bc->pubs, bc->pubs_len * PUB_ENTRY_SIZE); iter->string_table = bc->string_table; + iter->string_table_size = bc->string_table_size; iter->len = bc->pubs_len; iter->curr = 0; } @@ -106,6 +131,11 @@ bool bytecode_pubs_next(bytecode_iterator *iter, public_symbol *out) { return false; } int32_t name_offset = reader_i32(&iter->reader); + if (name_offset < 0 || (size_t)name_offset >= iter->string_table_size) { + fprintf(stderr, "bytecode_pubs_next: name_offset %d out of range\n", + name_offset); + return false; + } out->name = iter->string_table + name_offset; out->code_offset = reader_i32(&iter->reader); out->flag = reader_u8(&iter->reader); @@ -117,6 +147,7 @@ bool bytecode_pubs_next(bytecode_iterator *iter, public_symbol *out) { void bytecode_imports_init(bytecode_iterator *it, const bytecode *bc) { reader_init(&it->reader, bc->imports, bc->imports_len * IMPORT_ENTRY_SIZE); it->string_table = bc->string_table; + it->string_table_size = bc->string_table_size; it->len = bc->imports_len; it->curr = 0; } @@ -126,6 +157,11 @@ bool bytecode_imports_next(bytecode_iterator *it, const char **out_name) { return false; } int32_t name_offset = reader_i32(&it->reader); + if (name_offset < 0 || (size_t)name_offset >= it->string_table_size) { + fprintf(stderr, "bytecode_imports_next: name_offset %d out of range\n", + name_offset); + return false; + } *out_name = it->string_table + name_offset; it->curr++; diff --git a/virtual_machine/bytecode.h b/virtual_machine/bytecode.h index 26e806f16..5b2d4ed60 100644 --- a/virtual_machine/bytecode.h +++ b/virtual_machine/bytecode.h @@ -46,6 +46,7 @@ void bytecode_free(bytecode *bc); typedef struct { byte_reader reader; const char *string_table; + size_t string_table_size; size_t len; size_t curr; } bytecode_iterator; diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 30a8c4316..a9e419a96 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -1041,6 +1041,12 @@ static bool register_public_symbols(symbol_table *st, const bytecode *bc, bytecode_pubs_init(&iter, bc); while (bytecode_pubs_next(&iter, &pub)) { + if (pub.code_offset < 0 || (size_t)pub.code_offset >= bc->code_size) { + fprintf(stderr, + "Error: public symbol '%s' has out-of-range code_offset %d\n", + pub.name, pub.code_offset, bc->code_size); + return false; + } if (pub.flag == PUB_FLAG_FUNCTION) { // pub.code_offset is the offset in the bytecode, so we use the mapping int32_t insn_idx = bc_to_insn_map[pub.code_offset]; diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 54e6f4e94..a1c8f96f9 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -459,7 +459,7 @@ void op_st_clo(DECL_STATE) { n_locals, max_depth, (void *)bp, (void *)sp); \ \ for (int32_t i = 0; i < n_locals; i++) { \ - STACK_PUSH(sp, 0); \ + STACK_PUSH(sp, BOX(0)); \ } \ \ aint *offset = sp - max_depth; \ diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index a96ec39f7..f36991b26 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -58,7 +58,9 @@ virtual_machine *vm_create(const char *main_unit_path, const char **paths, // Compute total globals and place at the top of the stack vm->total_globals = bytecode_count_globals(lr.units, lr.units_len); vm->globals = (aint *)vm->stack_base - vm->total_globals; - memset(vm->globals, 0, vm->total_globals * sizeof(aint)); + for (size_t i = 0; i < vm->total_globals; i++) { + vm->globals[i] = BOX(0); + } program *prog = decode(lr.units, lr.units_len, vm->globals); if (!prog) { From b4247746d070d6d2b503bec344f2e7562e3ba7ba Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 8 Apr 2026 21:28:49 +0300 Subject: [PATCH 103/141] better `SM.ml` --- src/SM.ml | 150 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 79 insertions(+), 71 deletions(-) diff --git a/src/SM.ml b/src/SM.ml index 114c0e69e..b49fc1505 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -198,10 +198,14 @@ module ByteCode = struct let add_import l = imports := l :: !imports in let add_fixup l = fixups := (Buffer.length code, l) :: !fixups in let add_func_fixup l = func_fixups := (Buffer.length code, l) :: !func_fixups in - let add_bytes = List.iter (fun x -> Buffer.add_char code @@ Char.chr x) in - let add_ints = - List.iter (fun x -> Buffer.add_int32_le code @@ Int32.of_int x) + let add_bytes buffer = + List.iter (fun x -> Buffer.add_uint8 buffer x) in + let add_ints buffer = + List.iter (fun x -> Buffer.add_int32_le buffer @@ Int32.of_int x) + in + let add_code_bytes = add_bytes code in + let add_code_ints = add_ints code in let add_strings = let unescape x = let n = String.length x in @@ -236,8 +240,7 @@ module ByteCode = struct iterate 0; Buffer.contents buf in - List.iter (fun x -> - Buffer.add_int32_le code @@ Int32.of_int @@ StringTab.add st @@ unescape x) + List.iter (fun x -> add_code_ints [ StringTab.add st @@ unescape x ]) in let add_designations n = let b x = match n with None -> x | Some b -> (b * 16) + x in @@ -245,64 +248,64 @@ module ByteCode = struct | Value.Global s -> if S.mem s !extern_globals then begin let str_off = StringTab.add st s in - add_bytes [ b 0 ]; - add_ints [ -str_off - 1 ] + add_code_bytes [ b 0 ]; + add_code_ints [ -str_off - 1 ] end else begin let i = add_global s in - add_bytes [ b 0 ]; - add_ints [ i ] + add_code_bytes [ b 0 ]; + add_code_ints [ i ] end | Value.Local n -> - add_bytes [ b 1 ]; - add_ints [ n ] + add_code_bytes [ b 1 ]; + add_code_ints [ n ] | Value.Arg n -> - add_bytes [ b 2 ]; - add_ints [ n ] + add_code_bytes [ b 2 ]; + add_code_ints [ n ] | Value.Access n -> - add_bytes [ b 3 ]; - add_ints [ n ] + add_code_bytes [ b 3 ]; + add_code_ints [ n ] | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)) in let insn_code = function (* 0x0s *) - | BINOP s -> add_bytes [ opnum s ] + | BINOP s -> add_code_bytes [ opnum s ] (* 0x10 n:32 *) | CONST n -> - add_bytes [ (1 * 16) + 0 ]; - add_ints [ n ] + add_code_bytes [ (1 * 16) + 0 ]; + add_code_ints [ n ] (* 0x11 s:32 *) | STRING s -> - add_bytes [ (1 * 16) + 1 ]; + add_code_bytes [ (1 * 16) + 1 ]; add_strings [ s ] (* 0x12 s:32 n:32 *) | SEXP (s, n) -> - add_bytes [ (1 * 16) + 2 ]; + add_code_bytes [ (1 * 16) + 2 ]; add_strings [ s ]; - add_ints [ n ] + add_code_ints [ n ] (* 0x13 *) - | STI -> add_bytes [ (1 * 16) + 3 ] + | STI -> add_code_bytes [ (1 * 16) + 3 ] (* 0x14 *) - | STA -> add_bytes [ (1 * 16) + 4 ] + | STA -> add_code_bytes [ (1 * 16) + 4 ] | LABEL s | FLABEL s | SLABEL s -> add_lab s (* 0x15 l:32 *) | JMP s -> - add_bytes [ (1 * 16) + 5 ]; + add_code_bytes [ (1 * 16) + 5 ]; add_fixup s; - add_ints [ 0 ] + add_code_ints [ 0 ] (* 0x16 *) - | END -> add_bytes [ (1 * 16) + 6 ] + | END -> add_code_bytes [ (1 * 16) + 6 ] (* 0x17 *) - | RET -> add_bytes [ (1 * 16) + 7 ] + | RET -> add_code_bytes [ (1 * 16) + 7 ] (* 0x18 *) - | DROP -> add_bytes [ (1 * 16) + 8 ] + | DROP -> add_code_bytes [ (1 * 16) + 8 ] (* 0x19 *) - | DUP -> add_bytes [ (1 * 16) + 9 ] + | DUP -> add_code_bytes [ (1 * 16) + 9 ] (* 0x1a *) - | SWAP -> add_bytes [ (1 * 16) + 10 ] + | SWAP -> add_code_bytes [ (1 * 16) + 10 ] (* 0x1b *) - | ELEM -> add_bytes [ (1 * 16) + 11 ] + | ELEM -> add_code_bytes [ (1 * 16) + 11 ] (* 0x2d n:32 *) | LD d -> add_designations (Some 2) [ d ] (* 0x3d n:32 *) @@ -311,60 +314,64 @@ module ByteCode = struct | ST d -> add_designations (Some 4) [ d ] (* 0x50 l:32 *) | CJMP ("z", s) -> - add_bytes [ (5 * 16) + 0 ]; + add_code_bytes [ (5 * 16) + 0 ]; add_fixup s; - add_ints [ 0 ] + add_code_ints [ 0 ] (* 0x51 l:32 *) | CJMP ("nz", s) -> - add_bytes [ (5 * 16) + 1 ]; + add_code_bytes [ (5 * 16) + 1 ]; add_fixup s; - add_ints [ 0 ] + add_code_ints [ 0 ] (* 0x74 *) | CALL (".array", n, _) -> - add_bytes [ (7 * 16) + 4 ]; - add_ints [ n ] + add_code_bytes [ (7 * 16) + 4 ]; + add_code_ints [ n ] (* 0x52 n:32 n:32 *) | BEGIN (_, a, l, [], _, _) -> - add_bytes [ (5 * 16) + 2 ]; - add_ints [ a; l ] (* with no closure *) + add_code_bytes [ (5 * 16) + 2 ]; + add_code_ints [ a; l ] (* with no closure *) (* 0x53 n:32 n:32 *) | BEGIN (_, a, l, _, _, _) -> - add_bytes [ (5 * 16) + 3 ]; - add_ints [ a; l ] (* with a closure *) + add_code_bytes [ (5 * 16) + 3 ]; + add_code_ints [ a; l ] (* with a closure *) (* 0x54 l:32 n:32 d*:32 *) | CLOSURE (s, ds) -> - add_bytes [ (5 * 16) + 4 ]; + add_code_bytes [ (5 * 16) + 4 ]; add_func_fixup s; - add_ints [ 0; List.length ds ]; + add_code_ints [ 0; List.length ds ]; add_designations None ds (* 0x55 n:32 *) | CALLC (n, _) -> - add_bytes [ (5 * 16) + 5 ]; - add_ints [ n ] + add_code_bytes [ (5 * 16) + 5 ]; + add_code_ints [ n ] (* 0x56 l:32 n:32 *) | CALL (fn, n, _) -> - add_bytes [ (5 * 16) + 6 ]; + add_code_bytes [ (5 * 16) + 6 ]; add_func_fixup fn; - add_ints [ 0; n ] + add_code_ints [ 0; n ] (* 0x57 s:32 n:32 *) | TAG (s, n) -> - add_bytes [ (5 * 16) + 7 ]; + add_code_bytes [ (5 * 16) + 7 ]; add_strings [ s ]; - add_ints [ n ] + add_code_ints [ n ] (* 0x58 n:32 *) | ARRAY n -> - add_bytes [ (5 * 16) + 8 ]; - add_ints [ n ] - (* 0x59/0x5a n:32 n:32 *) - | FAIL ((l, c), value) -> - add_bytes [ if value then (5 * 16) + 10 else (5 * 16) + 9 ]; - add_ints [ l; c ] + add_code_bytes [ (5 * 16) + 8 ]; + add_code_ints [ n ] + (* 0x59 n:32 n:32 *) + | FAIL ((l, c), false) -> + add_code_bytes [ (5 * 16) + 9 ]; + add_code_ints [ l; c ] + (* 0x5a n:32 n:32 *) + | FAIL ((l, c), true) -> + add_code_bytes [ (5 * 16) + 10 ]; + add_code_ints [ l; c ] (* 0x5b n:32 *) | LINE n -> - add_bytes [ (5 * 16) + 11 ]; - add_ints [ n ] + add_code_bytes [ (5 * 16) + 11 ]; + add_code_ints [ n ] (* 0x6p *) - | PATT p -> add_bytes [ (6 * 16) + enum patt p ] + | PATT p -> add_code_bytes [ (6 * 16) + enum patt p ] | EXTERN (name, is_fun) -> add_extern name is_fun | PUBLIC (name, is_fun) -> add_public name is_fun | IMPORT s -> add_import s @@ -373,7 +380,7 @@ module ByteCode = struct (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__) in List.iter insn_code insns; - add_bytes [ 255 ]; + add_code_bytes [ 255 ]; let code = Buffer.to_bytes code in List.iter (fun (addr_ofs, l) -> @@ -410,25 +417,26 @@ module ByteCode = struct with Not_found -> failwith (Printf.sprintf "ERROR: undefined label of public '%s'" name) in - (Int32.of_int @@ StringTab.add st name, Int32.of_int pos, flag)) + (StringTab.add st name, pos, flag)) !pubs in - let imports = - List.rev_map (fun l -> Int32.of_int @@ StringTab.add st l) !imports - in + let imports = List.rev_map (fun l -> StringTab.add st l) !imports in let st = Buffer.to_bytes st.StringTab.buffer in let file = Buffer.create 1024 in - Buffer.add_int32_le file (Int32.of_int @@ Bytes.length st); - Buffer.add_int32_le file (Int32.of_int @@ Hashtbl.length globals); - Buffer.add_int32_le file (Int32.of_int @@ List.length imports); - Buffer.add_int32_le file (Int32.of_int @@ List.length pubs_resolved); + let add_file_bytes = add_bytes file in + let add_file_ints = add_ints file in + add_file_ints + [ Bytes.length st + ; Hashtbl.length globals + ; List.length imports + ; List.length pubs_resolved + ]; Buffer.add_bytes file st; - List.iter (fun n -> Buffer.add_int32_le file n) imports; + add_file_ints imports; List.iter (fun (n, o, f) -> - Buffer.add_int32_le file n; - Buffer.add_int32_le file o; - Buffer.add_uint8 file f) + add_file_ints [ n; o ]; + add_file_bytes [ f ]) pubs_resolved; Buffer.add_bytes file code; let f = open_out_bin (Printf.sprintf "%s.bc" cmd#basename) in From ee9b43ccf633da17317b5e984d9a9f3b12d8d69b Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 8 Apr 2026 21:31:56 +0300 Subject: [PATCH 104/141] reversed order `is_loading` --- virtual_machine/loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c index 87559b082..238aa4848 100644 --- a/virtual_machine/loader.c +++ b/virtual_machine/loader.c @@ -29,7 +29,7 @@ typedef struct { } name_array; static bool is_loading(const name_array *stack, const char *name) { - for (size_t i = 0; i < stack->len; i++) { + for (size_t i = stack->len; i-- > 0; ) { if (strcmp(stack->data[i], name) == 0) return true; } From 30a3f2aa1099e652e0c4727566a2f848faced974 Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 8 Apr 2026 21:57:08 +0300 Subject: [PATCH 105/141] better --- virtual_machine/opcodes.c | 4 ++-- virtual_machine/ops.c | 22 ++-------------------- 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/virtual_machine/opcodes.c b/virtual_machine/opcodes.c index 8f19b34a9..522c913cf 100644 --- a/virtual_machine/opcodes.c +++ b/virtual_machine/opcodes.c @@ -86,10 +86,10 @@ const char *opcode_to_string(uint8_t opcode) { return "ARRAY"; case OP_FAIL: return "FAIL"; - case OP_LINE: - return "LINE"; case OP_FAIL_KEEP: return "FAIL.KEEP"; + case OP_LINE: + return "LINE"; case OP_PATT_STR_CMP: return "PATT.STRCMP"; case OP_PATT_STRING: diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index a1c8f96f9..0f1b69d39 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -98,6 +98,8 @@ extern aint Bsexp_tag_patt(void *x); DEFINE_BINOP(op_add, Ls__Infix_43, "ADD") DEFINE_BINOP(op_sub, Ls__Infix_45, "SUB") DEFINE_BINOP(op_mul, Ls__Infix_42, "MUL") +DEFINE_BINOP(op_div, Ls__Infix_47, "DIV") +DEFINE_BINOP(op_mod, Ls__Infix_37, "MOD") DEFINE_BINOP(op_lt, Ls__Infix_60, "LT") DEFINE_BINOP(op_le, Ls__Infix_6061, "LE") DEFINE_BINOP(op_gt, Ls__Infix_62, "GT") @@ -117,26 +119,6 @@ void op_const(DECL_STATE) { DISPATCH(); } -void op_div(DECL_STATE) { - aint y = STACK_POP(sp); - aint x = STACK_POP(sp); - VM_DEBUG("DIV: x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); - aint res = Ls__Infix_47((void *)x, (void *)y); - VM_DEBUG("DIV result=%ld\n", (long)UNBOX(res)); - STACK_PUSH(sp, res); - DISPATCH(); -} - -void op_mod(DECL_STATE) { - aint y = STACK_POP(sp); - aint x = STACK_POP(sp); - VM_DEBUG("MOD: x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); - aint res = Ls__Infix_37((void *)x, (void *)y); - VM_DEBUG("MOD result=%ld\n", (long)UNBOX(res)); - STACK_PUSH(sp, res); - DISPATCH(); -} - void op_drop(DECL_STATE) { VM_DEBUG("DROP\n"); *++sp = 0; From 3036a700614c0323a960f933e10749331494bd9f Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 8 Apr 2026 22:08:08 +0300 Subject: [PATCH 106/141] change signature --- virtual_machine/ops.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 0f1b69d39..9cb3588b9 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -12,19 +12,19 @@ extern size_t __gc_stack_top; /* * External runtime functions (runtime.c) */ -extern aint Ls__Infix_43(void *p, void *q); // + -extern aint Ls__Infix_45(void *p, void *q); // - -extern aint Ls__Infix_42(void *p, void *q); // * -extern aint Ls__Infix_47(void *p, void *q); // / -extern aint Ls__Infix_37(void *p, void *q); // % -extern aint Ls__Infix_60(void *p, void *q); // < -extern aint Ls__Infix_6061(void *p, void *q); // <= -extern aint Ls__Infix_62(void *p, void *q); // > -extern aint Ls__Infix_6261(void *p, void *q); // >= -extern aint Ls__Infix_6161(void *p, void *q); // == -extern aint Ls__Infix_3361(void *p, void *q); // != -extern aint Ls__Infix_3838(void *p, void *q); // && -extern aint Ls__Infix_3333(void *p, void *q); // || +extern aint Ls__Infix_43(aint p, aint q); // + +extern aint Ls__Infix_45(aint p, aint q); // - +extern aint Ls__Infix_42(aint p, aint q); // * +extern aint Ls__Infix_47(aint p, aint q); // / +extern aint Ls__Infix_37(aint p, aint q); // % +extern aint Ls__Infix_60(aint p, aint q); // < +extern aint Ls__Infix_6061(aint p, aint q); // <= +extern aint Ls__Infix_62(aint p, aint q); // > +extern aint Ls__Infix_6261(aint p, aint q); // >= +extern aint Ls__Infix_6161(aint p, aint q); // == +extern aint Ls__Infix_3361(aint p, aint q); // != +extern aint Ls__Infix_3838(aint p, aint q); // && +extern aint Ls__Infix_3333(aint p, aint q); // || extern void *Barray(aint *args, aint bn); extern void *Bsexp(aint *args, aint bn); @@ -89,7 +89,7 @@ extern aint Bsexp_tag_patt(void *x); aint y = STACK_POP(sp); \ aint x = STACK_POP(sp); \ VM_DEBUG(opname ": x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); \ - aint res = fn((void *)x, (void *)y); \ + aint res = fn(x, y); \ VM_DEBUG(opname " result=%ld\n", (long)UNBOX(res)); \ STACK_PUSH(sp, res); \ DISPATCH(); \ From ff6a9698b373ff2c83ebe40d23eca1cd1913f7f0 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 9 Apr 2026 02:04:56 +0300 Subject: [PATCH 107/141] add macros for `op_patt` --- virtual_machine/ops.c | 146 +++++++++++++++--------------------------- 1 file changed, 51 insertions(+), 95 deletions(-) diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 9cb3588b9..070945bf3 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -34,14 +34,14 @@ extern void *Belem(void *p, aint i); extern void *Bsta(void *x, aint i, void *v); extern aint Btag(void *d, aint t, aint n); -extern aint Barray_patt(void *d, aint n); -extern aint Bstring_patt(void *x, void *y); -extern aint Bclosure_tag_patt(void *x); -extern aint Bboxed_patt(void *x); -extern aint Bunboxed_patt(void *x); -extern aint Barray_tag_patt(void *x); -extern aint Bstring_tag_patt(void *x); -extern aint Bsexp_tag_patt(void *x); +extern aint Barray_patt(aint d, aint n); +extern aint Bstring_patt(aint x, aint y); +extern aint Bclosure_tag_patt(aint x); +extern aint Bboxed_patt(aint x); +extern aint Bunboxed_patt(aint x); +extern aint Barray_tag_patt(aint x); +extern aint Bstring_tag_patt(aint x); +extern aint Bsexp_tag_patt(aint x); #define DISPATCH() \ do { \ @@ -84,32 +84,55 @@ extern aint Bsexp_tag_patt(void *x); bp = new_bp; \ } while (0) -#define DEFINE_BINOP(name, fn, opname) \ +#define DEFINE_BINARY_OP(name, fn, opname) \ void name(DECL_STATE) { \ aint y = STACK_POP(sp); \ aint x = STACK_POP(sp); \ - VM_DEBUG(opname ": x=%ld, y=%ld\n", (long)UNBOX(x), (long)UNBOX(y)); \ + VM_DEBUG(opname ": x=0x%lx, y=0x%lx\n", (unsigned long)x, \ + (unsigned long)y); \ aint res = fn(x, y); \ VM_DEBUG(opname " result=%ld\n", (long)UNBOX(res)); \ STACK_PUSH(sp, res); \ DISPATCH(); \ } -DEFINE_BINOP(op_add, Ls__Infix_43, "ADD") -DEFINE_BINOP(op_sub, Ls__Infix_45, "SUB") -DEFINE_BINOP(op_mul, Ls__Infix_42, "MUL") -DEFINE_BINOP(op_div, Ls__Infix_47, "DIV") -DEFINE_BINOP(op_mod, Ls__Infix_37, "MOD") -DEFINE_BINOP(op_lt, Ls__Infix_60, "LT") -DEFINE_BINOP(op_le, Ls__Infix_6061, "LE") -DEFINE_BINOP(op_gt, Ls__Infix_62, "GT") -DEFINE_BINOP(op_ge, Ls__Infix_6261, "GE") -DEFINE_BINOP(op_eq, Ls__Infix_6161, "EQ") -DEFINE_BINOP(op_ne, Ls__Infix_3361, "NE") -DEFINE_BINOP(op_and, Ls__Infix_3838, "AND") -DEFINE_BINOP(op_or, Ls__Infix_3333, "OR") - -#undef DEFINE_BINOP +#define DEFINE_UNARY_OP(name, fn, opname) \ + void name(DECL_STATE) { \ + aint val = STACK_POP(sp); \ + VM_DEBUG(opname ": val=0x%lx\n", (unsigned long)val); \ + aint result = fn(val); \ + VM_DEBUG(opname " result=%ld\n", (long)UNBOX(result)); \ + STACK_PUSH(sp, result); \ + DISPATCH(); \ + } + +DEFINE_BINARY_OP(op_add, Ls__Infix_43, "ADD") +DEFINE_BINARY_OP(op_sub, Ls__Infix_45, "SUB") +DEFINE_BINARY_OP(op_mul, Ls__Infix_42, "MUL") +DEFINE_BINARY_OP(op_div, Ls__Infix_47, "DIV") +DEFINE_BINARY_OP(op_mod, Ls__Infix_37, "MOD") +DEFINE_BINARY_OP(op_lt, Ls__Infix_60, "LT") +DEFINE_BINARY_OP(op_le, Ls__Infix_6061, "LE") +DEFINE_BINARY_OP(op_gt, Ls__Infix_62, "GT") +DEFINE_BINARY_OP(op_ge, Ls__Infix_6261, "GE") +DEFINE_BINARY_OP(op_eq, Ls__Infix_6161, "EQ") +DEFINE_BINARY_OP(op_ne, Ls__Infix_3361, "NE") +DEFINE_BINARY_OP(op_and, Ls__Infix_3838, "AND") +DEFINE_BINARY_OP(op_or, Ls__Infix_3333, "OR") + +/* + * Pattern matching operations + */ +DEFINE_BINARY_OP(op_patt_str_cmp, Bstring_patt, "PATT_STR_CMP") +DEFINE_UNARY_OP(op_patt_string, Bstring_tag_patt, "PATT_STRING") +DEFINE_UNARY_OP(op_patt_array, Barray_tag_patt, "PATT_ARRAY") +DEFINE_UNARY_OP(op_patt_sexp, Bsexp_tag_patt, "PATT_SEXP") +DEFINE_UNARY_OP(op_patt_boxed, Bboxed_patt, "PATT_BOXED") +DEFINE_UNARY_OP(op_patt_unboxed, Bunboxed_patt, "PATT_UNBOXED") +DEFINE_UNARY_OP(op_patt_closure, Bclosure_tag_patt, "PATT_CLOSURE") + +#undef DEFINE_BINARY_OP +#undef DEFINE_UNARY_OP void op_const(DECL_STATE) { ip++; @@ -246,7 +269,7 @@ void op_tag(DECL_STATE) { int32_t n_fields = ip->num; aint val = STACK_POP(sp); - VM_DEBUG("TAG: tag_hash=0x%lx n_fields=%d val=0x%lx\n", (long)tag_hash, + VM_DEBUG("TAG: tag_hash=0x%lx n_fields=%d val=0x%lx\n", (unsigned long)tag_hash, n_fields, (long)val); aint result = Btag((void *)val, tag_hash, BOX(n_fields)); VM_DEBUG("TAG: result=%ld\n", (long)UNBOX(result)); @@ -259,7 +282,7 @@ void op_array(DECL_STATE) { int32_t n = ip->num; aint val = STACK_POP(sp); VM_DEBUG("ARRAY: n=%d, val=%p\n", n, (void *)val); - aint result = Barray_patt((void *)val, BOX(n)); + aint result = Barray_patt(val, BOX(n)); STACK_PUSH(sp, result); DISPATCH(); } @@ -276,73 +299,6 @@ void op_fail(DECL_STATE) { exit(1); } -/* - * Pattern matching operations - */ -void op_patt_str_cmp(DECL_STATE) { - aint y = STACK_POP(sp); - aint x = STACK_POP(sp); - VM_DEBUG("PATT_STR_CMP: x=%p, y=%p\n", (void *)x, (void *)y); - aint result = Bstring_patt((void *)x, (void *)y); - VM_DEBUG("PATT_STR_CMP result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} - -void op_patt_string(DECL_STATE) { - aint val = STACK_POP(sp); - VM_DEBUG("PATT_STRING: val=%p\n", (void *)val); - aint result = Bstring_tag_patt((void *)val); - VM_DEBUG("PATT_STRING result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} - -void op_patt_array(DECL_STATE) { - aint val = STACK_POP(sp); - VM_DEBUG("PATT_ARRAY: val=%p\n", (void *)val); - aint result = Barray_tag_patt((void *)val); - VM_DEBUG("PATT_ARRAY result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} - -void op_patt_sexp(DECL_STATE) { - aint val = STACK_POP(sp); - VM_DEBUG("PATT_SEXP: val=%p\n", (void *)val); - aint result = Bsexp_tag_patt((void *)val); - VM_DEBUG("PATT_SEXP result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} - -void op_patt_boxed(DECL_STATE) { - aint val = STACK_POP(sp); - VM_DEBUG("PATT_BOXED: val=%p\n", (void *)val); - aint result = Bboxed_patt((void *)val); - VM_DEBUG("PATT_BOXED result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} - -void op_patt_unboxed(DECL_STATE) { - aint val = STACK_POP(sp); - VM_DEBUG("PATT_UNBOXED: val=%ld\n", (long)val); - aint result = Bunboxed_patt((void *)val); - VM_DEBUG("PATT_UNBOXED result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} - -void op_patt_closure(DECL_STATE) { - aint val = STACK_POP(sp); - VM_DEBUG("PATT_CLOSURE: val=%p\n", (void *)val); - aint result = Bclosure_tag_patt((void *)val); - VM_DEBUG("PATT_CLOSURE result=%ld\n", (long)UNBOX(result)); - STACK_PUSH(sp, result); - DISPATCH(); -} - /* * Load / store global variables (by pointer) */ @@ -577,7 +533,7 @@ void op_eof(DECL_STATE) { (void)ip; (void)bp; // Pop the result to keep stack consistent between runs - STACK_POP(sp); + (void)STACK_POP(sp); return; } From 97d31f2ed5f4a826c11eb06dc058183868419362 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 9 Apr 2026 02:18:01 +0300 Subject: [PATCH 108/141] move `opcode_is_func_begin` --- virtual_machine/converter.c | 10 +++------- virtual_machine/opcodes.h | 5 +++++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index a9e419a96..320a9d0aa 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -207,10 +207,6 @@ static fixup_node *add_fixup(meta_info *meta, size_t target_off, return node; } -static inline bool opcode_is_func_entry(uint8_t opcode) { - return opcode == OP_BEGIN || opcode == OP_BEGIN_CLOSURE; -} - /* * Validate that an internal target is valid: in range, and has a correct * opcode. @@ -227,13 +223,13 @@ static bool validate_target_off(const bytecode *bc, int32_t target_off, bool bad; switch (kind) { case TARGET_JUMP: - bad = opcode_is_func_entry(got) || got == OP_EOF; + bad = opcode_is_func_begin(got) || got == OP_EOF; break; case TARGET_CALL: bad = got != OP_BEGIN; break; case TARGET_CLOSURE: - bad = !opcode_is_func_entry(got); + bad = !opcode_is_func_begin(got); break; } if (bad) { @@ -452,7 +448,7 @@ static bool decode_internal(decode_ctx *ctx) { ctx->sv.state == BARRIER ? " [barrier]" : ""); // Validate no nested function - if (opcode_is_func_entry(opcode)) { + if (opcode_is_func_begin(opcode)) { if (ctx->func_idx != -1) { fprintf(stderr, "Error: nested function at bc_off=%zu\n", current_bc_off); diff --git a/virtual_machine/opcodes.h b/virtual_machine/opcodes.h index 56a738779..d51ca43a5 100755 --- a/virtual_machine/opcodes.h +++ b/virtual_machine/opcodes.h @@ -1,6 +1,7 @@ #ifndef OPCODES_H #define OPCODES_H +#include #include typedef enum { @@ -60,4 +61,8 @@ typedef enum { const char *opcode_to_string(uint8_t opcode); +static inline bool opcode_is_func_begin(uint8_t opcode) { + return opcode == OP_BEGIN || opcode == OP_BEGIN_CLOSURE; +} + #endif From 34470917f5fbc25ed2f7602ae432eabad7d31e18 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 9 Apr 2026 02:27:12 +0300 Subject: [PATCH 109/141] better checks --- virtual_machine/converter.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 320a9d0aa..52b8e710c 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -356,6 +356,8 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, meta_info *tm = &meta[target_off]; if (target_off < (int32_t)current_bc_off) { // Backward jump — target was already visited by sequential decode + assert(tm->resolved_idx != -1 && + "backward jump target must have been visited"); if (tm->func_idx != ctx->func_idx) { fprintf( stderr, @@ -363,8 +365,6 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, current_bc_off, target_off); return false; } - assert(tm->resolved_idx != -1 && - "backward jump target must have been visited"); ctx->code.data[my_idx].num = tm->resolved_idx; add_reloc(ctx, my_idx, NULL, INTERNAL); @@ -392,9 +392,6 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, "%d)\n", current_bc_off, depth, tm->stack_depth); return false; - } else { - VM_DEBUG(" JUMP: forward to bc_off=%d, (depth=%d, target_depth=%d)\n", - target_off, depth, tm->stack_depth); } } return true; @@ -906,15 +903,16 @@ static bool decode_internal(decode_ctx *ctx) { // Validate CLOSURE target's n_captured consistency if (!IS_EXT_REF(target_off)) { - if (meta[target_off].n_captured != -1 && - meta[target_off].n_captured != n_captured) { + int32_t *target_n_captured = &meta[target_off].n_captured; + if (meta[target_off].n_captured == -1) { + *target_n_captured = n_captured; + } else if (*target_n_captured != n_captured) { fprintf(stderr, "Error: mismatched CLOSURE arity at target=%d " "(expected %d, got %d)\n", target_off, meta[target_off].n_captured, n_captured); goto cleanup; } - meta[target_off].n_captured = n_captured; } break; } From 98fde8929269d83efbb793a879dfef3713d11541 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 9 Apr 2026 04:02:45 +0300 Subject: [PATCH 110/141] point `sp` to the last pushed value, not the first empty slot --- virtual_machine/ops.c | 32 ++++++++++++++++---------------- virtual_machine/vm.c | 4 ++-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 070945bf3..e09a21151 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -57,9 +57,9 @@ extern aint Bsexp_tag_patt(aint x); /* * Stack manipulation macros (stack grows downwards) */ -#define STACK_PUSH(sp, val) (*sp-- = (val)) -#define STACK_POP(sp) (*++sp) -#define STACK_PEEK(sp) (*(sp + 1)) +#define STACK_PUSH(sp, val) (*--(sp) = (val)) +#define STACK_POP(sp) (*sp++) +#define STACK_PEEK(sp) (*sp) #define STACK_REVERSE(base, n) \ do { \ for (int32_t _i = 0; _i < (n) / 2; _i++) { \ @@ -77,7 +77,7 @@ extern aint Bsexp_tag_patt(aint x); #define PUSH_FRAME(n_args_val, saved_bp, saved_ip, caller_sp_val) \ do { \ STACK_PUSH(sp, n_args_val); \ - aint *new_bp = sp + 1; \ + aint *new_bp = sp; \ STACK_PUSH(sp, (aint)(saved_bp)); \ STACK_PUSH(sp, (aint)(saved_ip)); \ STACK_PUSH(sp, (aint)(caller_sp_val)); \ @@ -144,7 +144,7 @@ void op_const(DECL_STATE) { void op_drop(DECL_STATE) { VM_DEBUG("DROP\n"); - *++sp = 0; + (void)STACK_POP(sp); DISPATCH(); } @@ -236,7 +236,7 @@ void op_barray(DECL_STATE) { ip++; int32_t n = ip->num; VM_DEBUG("BARRAY: n=%d\n", n); - aint *args = sp + 1; + aint *args = sp; STACK_REVERSE(args, n); sp += n; void *arr = Barray(args, BOX(n)); @@ -251,10 +251,10 @@ void op_sexp(DECL_STATE) { int32_t n_fields = ip->num; VM_DEBUG("SEXP: tag_hash=0x%lx, n_fields=%d\n", tag_hash, n_fields); - // Use the free slot at sp for tag_hash, reverse the whole range in-place - *sp = tag_hash; - STACK_REVERSE(sp, n_fields + 1); - aint *args = sp; + // Use the free slot below the current top for tag_hash. + aint *args = sp - 1; + args[0] = tag_hash; + STACK_REVERSE(args, n_fields + 1); sp += n_fields; void *s = Bsexp(args, BOX(n_fields + 1)); @@ -401,8 +401,8 @@ void op_st_clo(DECL_STATE) { } \ \ aint *offset = sp - max_depth; \ - memset(offset + 1, 0, max_depth * sizeof(aint)); \ - __gc_stack_top = (size_t)offset; \ + memset(offset, 0, max_depth * sizeof(aint)); \ + __gc_stack_top = (size_t)(offset - 1); \ \ DISPATCH(); \ } @@ -436,7 +436,7 @@ void op_callc(DECL_STATE) { int32_t n_args = ip->num; ip++; // sort of a return address - aint closure_val = *(sp + 1 + n_args); + aint closure_val = *(sp + n_args); aint *closure = (aint *)closure_val; aint entry = closure[0]; insn *target = (insn *)entry; @@ -501,9 +501,9 @@ void op_closure(DECL_STATE) { VM_DEBUG("CLOSURE: target=%p n_captured=%d\n", (void *)target, n_captured); - *sp = (aint)target; - STACK_REVERSE(sp + 1, n_captured); - aint *args = sp; + aint *args = sp - 1; + args[0] = (aint)target; + STACK_REVERSE(args + 1, n_captured); sp += n_captured; void *closure = Bclosure(args, BOX(n_captured)); diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index f36991b26..d05088134 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -106,11 +106,11 @@ void vm_run(virtual_machine *vm) { __init(); set_args(vm->argc, vm->argv); - aint *sp = vm->globals - 1; + aint *sp = vm->globals; __gc_stack_bottom = (size_t)vm->stack_base; - aint *bp; + aint *bp = NULL; for (size_t i = 0; i < vm->bc_len; i++) { insn *ip = vm->entry_points[i]; ip->func(ip, sp, bp); From b81176f147c7726ac5b221f3f08bf93bdf2424d8 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 9 Apr 2026 04:17:05 +0300 Subject: [PATCH 111/141] correct `op_fail` --- virtual_machine/converter.c | 5 ++++- virtual_machine/ops.c | 13 +++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 52b8e710c..5ca050354 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -777,12 +777,15 @@ static bool decode_internal(decode_ctx *ctx) { case OP_FAIL_KEEP: { int32_t line = reader_i32(&ctx->reader); int32_t col = reader_i32(&ctx->reader); - if (opcode == OP_FAIL) { + bool drop_value = opcode == OP_FAIL; + if (drop_value) { DEPTH_POP(); } EMIT_FUNC(op_fail); EMIT_NUM(line); EMIT_NUM(col); + EMIT_NUM(drop_value); + EMIT_STR(ctx->bc->name); break; } diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index e09a21151..9458c0588 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -3,6 +3,7 @@ #include "debug.h" #include "ffi.h" #include "insn.h" +#include #include #include #include @@ -42,6 +43,7 @@ extern aint Bunboxed_patt(aint x); extern aint Barray_tag_patt(aint x); extern aint Bstring_tag_patt(aint x); extern aint Bsexp_tag_patt(aint x); +extern void Bmatch_failure(aint v, const char *fname, aint line, aint col); #define DISPATCH() \ do { \ @@ -288,15 +290,18 @@ void op_array(DECL_STATE) { } void op_fail(DECL_STATE) { - (void)sp; (void)bp; ip++; int32_t line = ip->num; ip++; int32_t col = ip->num; - VM_DEBUG("FAIL: line=%d, col=%d\n", line, col); - fprintf(stderr, "Match failure at line %d, column %d\n", line, col); - exit(1); + ip++; + bool drop_value = ip->num; + ip++; + const char *module_name = ip->str; + + aint val = drop_value ? STACK_POP(sp) : STACK_PEEK(sp); + Bmatch_failure(val, module_name, BOX(line), BOX(col)); } /* From 933162067c37663eed5201bc50c167c484208cf0 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 9 Apr 2026 04:25:13 +0300 Subject: [PATCH 112/141] do not include main unit's path as a search path --- virtual_machine/lama.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/virtual_machine/lama.c b/virtual_machine/lama.c index cea3891f1..9e54b7441 100644 --- a/virtual_machine/lama.c +++ b/virtual_machine/lama.c @@ -24,7 +24,7 @@ static void print_usage(FILE *dest, const char *prog_name) { int main(int argc, char *argv[]) { char *include_paths[MAX_INCLUDE_PATHS]; - int include_path_count = 1; // Reserve index 0 for bytecode file's directory + int include_path_count = 0; // TODO: better error handling in general int exit_code = 0; char *bytecode_dir = NULL; @@ -64,13 +64,6 @@ int main(int argc, char *argv[]) { } char *bytecode_file = argv[optind]; - - // Include main unit's directory by default - char *tmp = ESTRDUP(bytecode_file); - bytecode_dir = ESTRDUP(dirname(tmp)); - free(tmp); - include_paths[0] = bytecode_dir; - virtual_machine *vm = vm_create(bytecode_file, (const char **)include_paths, include_path_count); if (!vm) { From 48cc6c7460c4247329fab356511e0cdd0de6e23f Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 9 Apr 2026 04:28:50 +0300 Subject: [PATCH 113/141] remove `ifdef debug` for op_line --- virtual_machine/ops.c | 2 -- virtual_machine/ops.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 9458c0588..a89c999e3 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -542,7 +542,6 @@ void op_eof(DECL_STATE) { return; } -#ifdef DEBUG_PRINT void op_line(DECL_STATE) { ip++; int32_t line = ip->num; @@ -550,4 +549,3 @@ void op_line(DECL_STATE) { (void)line; DISPATCH(); } -#endif diff --git a/virtual_machine/ops.h b/virtual_machine/ops.h index 732321465..0e20224df 100644 --- a/virtual_machine/ops.h +++ b/virtual_machine/ops.h @@ -64,8 +64,6 @@ void op_ffi_call(DECL_STATE); void op_init(DECL_STATE); void op_eof(DECL_STATE); -#ifdef DEBUG_PRINT void op_line(DECL_STATE); -#endif #endif // OPS_H From cac7809b413ed7a1cdd605799d86a477501284bb Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 9 Apr 2026 04:37:53 +0300 Subject: [PATCH 114/141] group together manipulations with GC before pseudo-registers initialization --- virtual_machine/vm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index d05088134..e74983086 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -104,12 +104,10 @@ void vm_set_args(virtual_machine *vm, int argc, char *argv[]) { void vm_run(virtual_machine *vm) { __init(); + __gc_stack_bottom = (size_t)vm->stack_base; set_args(vm->argc, vm->argv); aint *sp = vm->globals; - - __gc_stack_bottom = (size_t)vm->stack_base; - aint *bp = NULL; for (size_t i = 0; i < vm->bc_len; i++) { insn *ip = vm->entry_points[i]; From 68778f2000cd33b3000ad909b4be895f9565f638 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 9 Apr 2026 04:57:03 +0300 Subject: [PATCH 115/141] chore --- virtual_machine/converter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 5ca050354..a43d0b394 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -1041,7 +1041,7 @@ static bool register_public_symbols(symbol_table *st, const bytecode *bc, if (pub.code_offset < 0 || (size_t)pub.code_offset >= bc->code_size) { fprintf(stderr, "Error: public symbol '%s' has out-of-range code_offset %d\n", - pub.name, pub.code_offset, bc->code_size); + pub.name, pub.code_offset); return false; } if (pub.flag == PUB_FLAG_FUNCTION) { From bd7e9b1df60516f196f07fa8075d89b068c3c5ad Mon Sep 17 00:00:00 2001 From: ancavar Date: Fri, 10 Apr 2026 00:04:37 +0300 Subject: [PATCH 116/141] remove `op_init`, use `op_call` for main unit's begin --- virtual_machine/converter.c | 22 +++++++++++++++------- virtual_machine/converter.h | 3 +-- virtual_machine/ops.c | 24 +++--------------------- virtual_machine/ops.h | 1 - virtual_machine/vm.c | 11 +++++------ 5 files changed, 24 insertions(+), 37 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index a43d0b394..464ebbc82 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -285,6 +285,17 @@ static aint *resolve_global_ptr(decode_ctx *ctx, int32_t idx, return (aint *)resolve_ext_global_ptr(ctx->ext_globals, glob_name); } +#define ENTRY_STEP_SLOTS 4 + +static void emit_entry_step(insn *slot, insn *main_begin) { + slot[0].func = op_call; + slot[1].target = main_begin; + slot[2].num = 0; + // Pop the result of the main unit's BEGIN since we don't do anything + // with it. + slot[3].func = op_drop; +} + static bool emit_glo(decode_ctx *ctx, int32_t idx, size_t global_base, fn op) { aint *ptr = resolve_global_ptr(ctx, idx, global_base); if (!ptr) { @@ -431,9 +442,6 @@ static bool decode_internal(decode_ctx *ctx) { meta[i].fixups = NULL; } - EMIT_FUNC(op_init); - EMIT_NUM(0); // placeholder for op_eof - bool ok = false; while (!reader_eof(&ctx->reader)) { @@ -1120,7 +1128,7 @@ static program *link_program(decoded *dec_arr, size_t n, size_t total_code_len, size_t all_code_len = ffi_call_offset + ffi_call_len * FFI_STUB_SIZE; insn *all_code = ALLOC_ARRAY(insn, all_code_len); - insn **entry_points = ALLOC_ARRAY(insn *, n); + insn *entry_points = ALLOC_ARRAY(insn, ENTRY_STEP_SLOTS * n + 1); // Copy code and resolve relocations size_t code_offset = 0; for (size_t i = 0; i < n; i++) { @@ -1128,17 +1136,17 @@ static program *link_program(decoded *dec_arr, size_t n, size_t total_code_len, // Move instructions into final code array memcpy(all_code + code_offset, dec->code, dec->code_len * sizeof(insn)); - entry_points[i] = &all_code[code_offset]; + emit_entry_step(&entry_points[ENTRY_STEP_SLOTS * i], + &all_code[code_offset]); if (!resolve_relocs(all_code, dec, code_offset, ffi_call_offset)) { free(all_code); free(entry_points); return NULL; } - all_code[code_offset + 1].target = &eof_ip; - code_offset += dec->code_len; } + entry_points[ENTRY_STEP_SLOTS * n] = eof_ip; ffi_call_iterator ffi_iter; ffi_call_table_emit_init(&ffi_iter, ffi); diff --git a/virtual_machine/converter.h b/virtual_machine/converter.h index 26b374678..4615fcd8d 100644 --- a/virtual_machine/converter.h +++ b/virtual_machine/converter.h @@ -4,14 +4,13 @@ #include "../runtime/runtime_common.h" #include "bytecode.h" #include "insn.h" -#include "reader.h" #include #include typedef struct { insn *code; size_t code_len; - insn **entry_points; + insn *entry_points; void *ffi_data; size_t ffi_len; } program; diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index a89c999e3..18f0e0272 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -271,8 +271,8 @@ void op_tag(DECL_STATE) { int32_t n_fields = ip->num; aint val = STACK_POP(sp); - VM_DEBUG("TAG: tag_hash=0x%lx n_fields=%d val=0x%lx\n", (unsigned long)tag_hash, - n_fields, (long)val); + VM_DEBUG("TAG: tag_hash=0x%lx n_fields=%d val=0x%lx\n", + (unsigned long)tag_hash, n_fields, (long)val); aint result = Btag((void *)val, tag_hash, BOX(n_fields)); VM_DEBUG("TAG: result=%ld\n", (long)UNBOX(result)); STACK_PUSH(sp, result); @@ -517,28 +517,10 @@ void op_closure(DECL_STATE) { DISPATCH(); } -/* - * op_init is a setup for the main op_begin of the entry point unit. It pushes a - * fake frame with 0 args and no saved state, so that the main function can use - * the normal CALL/END sequence without worrying about the initial case. The - * fake return address points to a special op_eof which just returns, causing - * the whole program to exit when the main function returns. - */ -void op_init(DECL_STATE) { - ip++; - insn *eof_ip = ip->target; - - aint *caller_sp = sp; - PUSH_FRAME(0, 0, eof_ip, caller_sp); - - DISPATCH(); -} - void op_eof(DECL_STATE) { (void)ip; (void)bp; - // Pop the result to keep stack consistent between runs - (void)STACK_POP(sp); + (void)sp; return; } diff --git a/virtual_machine/ops.h b/virtual_machine/ops.h index 0e20224df..de04113e3 100644 --- a/virtual_machine/ops.h +++ b/virtual_machine/ops.h @@ -62,7 +62,6 @@ void op_end(DECL_STATE); void op_closure(DECL_STATE); void op_ffi_call(DECL_STATE); -void op_init(DECL_STATE); void op_eof(DECL_STATE); void op_line(DECL_STATE); diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index e74983086..0188129ef 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -16,8 +16,8 @@ extern void set_args(aint argc, char *argv[]); struct virtual_machine { bytecode **bc_arr; // Array of unique loaded bytecode units size_t bc_len; - insn *code; // Contiguous code array - insn **entry_points; // Entry point for each unique unit + insn *code; // Contiguous code array + insn *entry_points; // Entry point for each unique unit size_t entry_points_len; size_t total_globals; aint *globals; // Globals array (at the top of the stack) @@ -107,12 +107,11 @@ void vm_run(virtual_machine *vm) { __gc_stack_bottom = (size_t)vm->stack_base; set_args(vm->argc, vm->argv); + insn *ip = vm->entry_points; aint *sp = vm->globals; aint *bp = NULL; - for (size_t i = 0; i < vm->bc_len; i++) { - insn *ip = vm->entry_points[i]; - ip->func(ip, sp, bp); - } + + ip->func(ip, sp, bp); __shutdown(); } From b012ef77d77ea06a6f9f011dd232d5c384b257f8 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 16 Apr 2026 17:02:59 +0300 Subject: [PATCH 117/141] change path loading --- virtual_machine/lama.c | 52 ++++++++++++++++++++++++++++++-- virtual_machine/loader.c | 65 +++++++++++++--------------------------- virtual_machine/loader.h | 3 +- virtual_machine/vm.c | 5 ++-- virtual_machine/vm.h | 4 +-- 5 files changed, 77 insertions(+), 52 deletions(-) diff --git a/virtual_machine/lama.c b/virtual_machine/lama.c index 9e54b7441..8eef7f460 100644 --- a/virtual_machine/lama.c +++ b/virtual_machine/lama.c @@ -4,12 +4,38 @@ #include "vm.h" #include #include +#include #include #include #include #define MAX_INCLUDE_PATHS 64 +/* + * Check if a string looks like a file (ends with '.bc') + */ +static bool is_filepath(const char *str) { + size_t len = strlen(str); + return len > 3 && strcmp(str + len - 3, ".bc") == 0; +} + +/* + * Extract name from filename (without path and extension .bc) + */ +static char *extract_unit_name(const char *filename) { + char *path_copy = ESTRDUP(filename); + char *base = basename(path_copy); + + char *dot = strrchr(base, '.'); + if (dot && strcmp(dot, ".bc") == 0) { + *dot = '\0'; + } + + char *result = ESTRDUP(base); + free(path_copy); + return result; +} + static void print_usage(FILE *dest, const char *prog_name) { fprintf(dest, "Usage: %s [options] [args]\n", prog_name); fprintf(dest, @@ -28,6 +54,8 @@ int main(int argc, char *argv[]) { // TODO: better error handling in general int exit_code = 0; char *bytecode_dir = NULL; + char *main_unit_name_alloc = NULL; + const char *main_unit_dir = NULL; static struct option long_options[] = {{"help", no_argument, 0, 'h'}, {"include", required_argument, 0, 'I'}, @@ -63,9 +91,26 @@ int main(int argc, char *argv[]) { return 1; } - char *bytecode_file = argv[optind]; - virtual_machine *vm = vm_create(bytecode_file, (const char **)include_paths, - include_path_count); + char *name = argv[optind]; + if (is_filepath(name)) { + if (include_path_count >= MAX_INCLUDE_PATHS) { + fprintf(stderr, "Maximum number of include paths (%d) exceeded\n", + MAX_INCLUDE_PATHS); + return 1; + } + + char *tmp = ESTRDUP(name); + bytecode_dir = ESTRDUP(dirname(tmp)); + free(tmp); + + main_unit_name_alloc = extract_unit_name(name); + name = main_unit_name_alloc; + main_unit_dir = bytecode_dir; + include_paths[include_path_count++] = bytecode_dir; + } + + virtual_machine *vm = vm_create( + name, main_unit_dir, (const char **)include_paths, include_path_count); if (!vm) { exit_code = 1; goto cleanup; @@ -78,6 +123,7 @@ int main(int argc, char *argv[]) { cleanup: vm_destroy(vm); + free(main_unit_name_alloc); free(bytecode_dir); return exit_code; } diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c index 238aa4848..21dcb0d69 100644 --- a/virtual_machine/loader.c +++ b/virtual_machine/loader.c @@ -10,7 +10,6 @@ #include "da.h" #include "memory.h" #include -#include #include #include #include @@ -43,34 +42,35 @@ static void free_loaded_units(bytecode_array *units) { da_free(*units); } +static bytecode *load_unit_from_dir(const char *unit_name, const char *dir) { + char path[MAX_PATH_LEN]; + const char *base_dir = dir ? dir : "."; + + snprintf(path, MAX_PATH_LEN, "%s/%s.bc", base_dir, unit_name); + int fd = open(path, O_RDONLY); + if (fd >= 0) { + return bytecode_load_fd(fd); + } + + return NULL; +} + /* * Resolve a unit name against the search paths and load the first * bytecode file. */ static bytecode *load_unit_from_paths(const char *unit_name, const search_paths *paths) { - static char path[MAX_PATH_LEN]; for (size_t i = 0; i < paths->len; i++) { - snprintf(path, MAX_PATH_LEN, "%s/%s.bc", paths->paths[i], unit_name); - int fd = open(path, O_RDONLY); - if (fd >= 0) { - return bytecode_load_fd(fd); + bytecode *bc = load_unit_from_dir(unit_name, paths->paths[i]); + if (bc) { + return bc; } } return NULL; } -/* - * Check if a string looks like a file path (contains '/' or ends with '.bc') - */ -static bool is_filepath(const char *str) { - if (strchr(str, '/') != NULL) - return true; - size_t len = strlen(str); - return len > 3 && strcmp(str + len - 3, ".bc") == 0; -} - static bool find_loaded(bytecode_array *units, const char *name) { for (size_t i = 0; i < units->len; i++) { if (strcmp(units->data[i]->name, name) == 0) { @@ -80,23 +80,6 @@ static bool find_loaded(bytecode_array *units, const char *name) { return false; } -/* - * Extract name from filename (without path and extension .bc) - */ -static char *extract_unit_name(const char *filename) { - char *path_copy = ESTRDUP(filename); - char *base = basename(path_copy); - - char *dot = strrchr(base, '.'); - if (dot && strcmp(dot, ".bc") == 0) { - *dot = '\0'; - } - - char *result = ESTRDUP(base); - free(path_copy); - return result; -} - /* * Load a single unit and its dependencies recursively. */ @@ -149,29 +132,23 @@ static bool load_unit_recursive(bytecode_array *units, name_array *loading, return false; } -load_result load(const char *main_unit_path, const search_paths *paths) { +load_result load(const char *main_unit_name, const char *main_unit_dir, + const search_paths *paths) { bytecode_array m; da_init(m); name_array loading; da_init(loading); - bool is_path = is_filepath(main_unit_path); - bytecode *bc = is_path ? bytecode_load(main_unit_path) - : load_unit_from_paths(main_unit_path, paths); + bytecode *bc = load_unit_from_dir(main_unit_name, main_unit_dir); if (!bc) { - fprintf(stderr, "Failed to load unit '%s'\n", main_unit_path); + fprintf(stderr, "Failed to load unit '%s'\n", main_unit_name); goto cleanup; } - char *unit_name = - is_path ? extract_unit_name(main_unit_path) : ESTRDUP(main_unit_path); - - if (!load_unit_recursive(&m, &loading, unit_name, bc, paths)) { - free(unit_name); + if (!load_unit_recursive(&m, &loading, main_unit_name, bc, paths)) { goto cleanup; } - free(unit_name); da_free(loading); return (load_result){ diff --git a/virtual_machine/loader.h b/virtual_machine/loader.h index 987735ad6..3c5522cf1 100644 --- a/virtual_machine/loader.h +++ b/virtual_machine/loader.h @@ -17,6 +17,7 @@ typedef struct { size_t units_len; // Number of unique units } load_result; -load_result load(const char *main_unit_path, const search_paths *paths); +load_result load(const char *main_unit_name, const char *main_unit_dir, + const search_paths *paths); #endif // LOADER_H diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index 0188129ef..21975ec8b 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -29,7 +29,8 @@ struct virtual_machine { char **argv; }; -virtual_machine *vm_create(const char *main_unit_path, const char **paths, +virtual_machine *vm_create(const char *main_unit_name, + const char *main_unit_dir, const char **paths, size_t total_paths_len) { search_paths search_paths = {.paths = paths, .len = total_paths_len}; @@ -37,7 +38,7 @@ virtual_machine *vm_create(const char *main_unit_path, const char **paths, memset(vm, 0, sizeof(virtual_machine)); vm->stack_base = MAP_FAILED; - load_result lr = load(main_unit_path, &search_paths); + load_result lr = load(main_unit_name, main_unit_dir, &search_paths); if (!lr.units) { vm_destroy(vm); return NULL; diff --git a/virtual_machine/vm.h b/virtual_machine/vm.h index bebae9ca0..76f3daccd 100644 --- a/virtual_machine/vm.h +++ b/virtual_machine/vm.h @@ -1,12 +1,12 @@ #ifndef VM_H #define VM_H -#include "../runtime/runtime_common.h" #include typedef struct virtual_machine virtual_machine; -virtual_machine *vm_create(const char *main_unit_path, const char **paths, +virtual_machine *vm_create(const char *main_unit_name, + const char *main_unit_dir, const char **paths, size_t total_paths_len); void vm_destroy(virtual_machine *vm); From b5299937d05e9ae8d1726e7f7cfd8cb7cd151d47 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 16 Apr 2026 17:11:15 +0300 Subject: [PATCH 118/141] chore --- virtual_machine/converter.c | 41 +++++++++++++++++++------------------ virtual_machine/loader.c | 2 +- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 464ebbc82..5251bbfa3 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -27,26 +27,6 @@ extern aint LtagHash(const char *s); #define GLOBAL_PREFIX "global_" -/* - * Code emission macros - append to code array in context - */ -#define EMIT_FUNC(f) da_append(ctx->code, ((insn){.func = (f)})) -#define EMIT_NUM(n) da_append(ctx->code, ((insn){.num = (n)})) -#define EMIT_ANUM(n) da_append(ctx->code, ((insn){.anum = (n)})) -#define EMIT_STR(s) da_append(ctx->code, ((insn){.str = (s)})) -#define EMIT_TARGET(t) da_append(ctx->code, ((insn){.target = (t)})) -#define EMIT_GLOBAL_PTR(p) da_append(ctx->code, ((insn){.global_ptr = (p)})) -#define EMIT_PTR(p) da_append(ctx->code, ((insn){.ptr = (p)})) - -#define CHECK_IDX(idx, limit, name) \ - do { \ - if ((idx) < 0 || (idx) >= (limit)) { \ - fprintf(stderr, "%s: index %d out of range [0, %d) at bc_off=%zu\n", \ - name, (int)(idx), (int)(limit), current_bc_off); \ - goto cleanup; \ - } \ - } while (0) - #define FFI_STUB_SIZE 2 typedef enum { @@ -285,6 +265,17 @@ static aint *resolve_global_ptr(decode_ctx *ctx, int32_t idx, return (aint *)resolve_ext_global_ptr(ctx->ext_globals, glob_name); } +/* + * Code emission macros - append to code array in context + */ +#define EMIT_FUNC(f) da_append(ctx->code, ((insn){.func = (f)})) +#define EMIT_NUM(n) da_append(ctx->code, ((insn){.num = (n)})) +#define EMIT_ANUM(n) da_append(ctx->code, ((insn){.anum = (n)})) +#define EMIT_STR(s) da_append(ctx->code, ((insn){.str = (s)})) +#define EMIT_TARGET(t) da_append(ctx->code, ((insn){.target = (t)})) +#define EMIT_GLOBAL_PTR(p) da_append(ctx->code, ((insn){.global_ptr = (p)})) +#define EMIT_PTR(p) da_append(ctx->code, ((insn){.ptr = (p)})) + #define ENTRY_STEP_SLOTS 4 static void emit_entry_step(insn *slot, insn *main_begin) { @@ -426,6 +417,15 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, #define DEPTH_PUSH() DEPTH_INC(1) #define DEPTH_POP() DEPTH_DEC(1) +#define CHECK_IDX(idx, limit, name) \ + do { \ + if ((idx) < 0 || (idx) >= (limit)) { \ + fprintf(stderr, "%s: index %d out of range [0, %d) at bc_off=%zu\n", \ + name, (int)(idx), (int)(limit), current_bc_off); \ + goto cleanup; \ + } \ + } while (0) + static bool decode_internal(decode_ctx *ctx) { const bytecode *bc = ctx->bc; @@ -1026,6 +1026,7 @@ static bool decode_internal(decode_ctx *ctx) { return ok; } +#undef CHECK_IDX #undef DEPTH_INC #undef DEPTH_DEC #undef DEPTH_PUSH diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c index 21dcb0d69..d137f288a 100644 --- a/virtual_machine/loader.c +++ b/virtual_machine/loader.c @@ -28,7 +28,7 @@ typedef struct { } name_array; static bool is_loading(const name_array *stack, const char *name) { - for (size_t i = stack->len; i-- > 0; ) { + for (size_t i = stack->len - 1; ~i; --i) { if (strcmp(stack->data[i], name) == 0) return true; } From af41961c2318b9a75a5e2e2254a818cf04908a50 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 16 Apr 2026 22:05:24 +0300 Subject: [PATCH 119/141] proper casts for reading bytes --- virtual_machine/reader.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virtual_machine/reader.h b/virtual_machine/reader.h index 15fb9f58b..2c8547830 100644 --- a/virtual_machine/reader.h +++ b/virtual_machine/reader.h @@ -26,7 +26,8 @@ static inline int32_t reader_i32(byte_reader *r) { assert(r->pos + 4 <= r->size); const uint8_t *p = r->data + r->pos; r->pos += 4; - return (int32_t)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24)); + return (int32_t)((uint32_t)p[0] | ((uint32_t)p[1] << 8) | + ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24)); } static inline uint8_t reader_u8(byte_reader *r) { From 8203fdd252e0207b297f3fa0c8427820f5806181 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 19 Apr 2026 21:08:29 +0300 Subject: [PATCH 120/141] change path loading --- virtual_machine/lama.c | 70 ++++++++++++++++++++++++---------------- virtual_machine/loader.c | 29 ++++++----------- virtual_machine/loader.h | 3 +- virtual_machine/vm.c | 5 ++- virtual_machine/vm.h | 3 +- 5 files changed, 57 insertions(+), 53 deletions(-) diff --git a/virtual_machine/lama.c b/virtual_machine/lama.c index 8eef7f460..996e7c76a 100644 --- a/virtual_machine/lama.c +++ b/virtual_machine/lama.c @@ -9,14 +9,17 @@ #include #include -#define MAX_INCLUDE_PATHS 64 +// TODO: think about unifying with loader.c +static const char bytecode_suffix[] = ".bc"; /* - * Check if a string looks like a file (ends with '.bc') + * Check if a string looks like a file path (ends with '.bc') */ static bool is_filepath(const char *str) { size_t len = strlen(str); - return len > 3 && strcmp(str + len - 3, ".bc") == 0; + size_t suffix_len = sizeof(bytecode_suffix) - 1; + return len > suffix_len && + strcmp(str + len - suffix_len, bytecode_suffix) == 0; } /* @@ -27,7 +30,7 @@ static char *extract_unit_name(const char *filename) { char *base = basename(path_copy); char *dot = strrchr(base, '.'); - if (dot && strcmp(dot, ".bc") == 0) { + if (dot && strcmp(dot, bytecode_suffix) == 0) { *dot = '\0'; } @@ -36,11 +39,28 @@ static char *extract_unit_name(const char *filename) { return result; } +/* + * Extract path from filename + */ +static char *extract_unit_dir(const char *filename) { + char *path_copy = ESTRDUP(filename); + char *dir = dirname(path_copy); + char *result = ESTRDUP(dir); + free(path_copy); + return result; +} + +#define MAX_INCLUDE_PATHS 64 + static void print_usage(FILE *dest, const char *prog_name) { - fprintf(dest, "Usage: %s [options] [args]\n", prog_name); + fprintf(dest, "Usage: %s [options] [args]\n", + prog_name); fprintf(dest, "\nWhen no options are specified, the VM will run the bytecode file " "and look for units in the same directory.\n"); + fprintf(dest, + "You can also specify unit name instead of a bytecode file, but " + "you need to manually include relevant search paths.\n"); fprintf(dest, "Options:\n"); fprintf(dest, " -h, --help Show this help message\n"); fprintf(dest, @@ -50,12 +70,12 @@ static void print_usage(FILE *dest, const char *prog_name) { int main(int argc, char *argv[]) { char *include_paths[MAX_INCLUDE_PATHS]; - int include_path_count = 0; + int include_path_count = 1; // TODO: better error handling in general int exit_code = 0; char *bytecode_dir = NULL; - char *main_unit_name_alloc = NULL; - const char *main_unit_dir = NULL; + char *main_unit_name = NULL; + bool is_path = false; static struct option long_options[] = {{"help", no_argument, 0, 'h'}, {"include", required_argument, 0, 'I'}, @@ -91,26 +111,20 @@ int main(int argc, char *argv[]) { return 1; } - char *name = argv[optind]; - if (is_filepath(name)) { - if (include_path_count >= MAX_INCLUDE_PATHS) { - fprintf(stderr, "Maximum number of include paths (%d) exceeded\n", - MAX_INCLUDE_PATHS); - return 1; - } - - char *tmp = ESTRDUP(name); - bytecode_dir = ESTRDUP(dirname(tmp)); - free(tmp); - - main_unit_name_alloc = extract_unit_name(name); - name = main_unit_name_alloc; - main_unit_dir = bytecode_dir; - include_paths[include_path_count++] = bytecode_dir; + char *entry_arg = argv[optind]; + is_path = is_filepath(entry_arg); + if (is_path) { + bytecode_dir = extract_unit_dir(entry_arg); + main_unit_name = extract_unit_name(entry_arg); + include_paths[0] = bytecode_dir; + } else { + main_unit_name = entry_arg; } - virtual_machine *vm = vm_create( - name, main_unit_dir, (const char **)include_paths, include_path_count); + virtual_machine *vm = + vm_create(main_unit_name, + (const char **)(is_path ? include_paths : include_paths + 1), + is_path ? include_path_count : include_path_count - 1); if (!vm) { exit_code = 1; goto cleanup; @@ -123,7 +137,9 @@ int main(int argc, char *argv[]) { cleanup: vm_destroy(vm); - free(main_unit_name_alloc); + if (is_path) { + free(main_unit_name); + } free(bytecode_dir); return exit_code; } diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c index d137f288a..3f0b81202 100644 --- a/virtual_machine/loader.c +++ b/virtual_machine/loader.c @@ -15,6 +15,8 @@ #include #include +static const char bytecode_suffix[] = ".bc"; + typedef struct { bytecode **data; size_t len; @@ -42,29 +44,19 @@ static void free_loaded_units(bytecode_array *units) { da_free(*units); } -static bytecode *load_unit_from_dir(const char *unit_name, const char *dir) { - char path[MAX_PATH_LEN]; - const char *base_dir = dir ? dir : "."; - - snprintf(path, MAX_PATH_LEN, "%s/%s.bc", base_dir, unit_name); - int fd = open(path, O_RDONLY); - if (fd >= 0) { - return bytecode_load_fd(fd); - } - - return NULL; -} - /* * Resolve a unit name against the search paths and load the first * bytecode file. */ static bytecode *load_unit_from_paths(const char *unit_name, const search_paths *paths) { + static char path[MAX_PATH_LEN]; for (size_t i = 0; i < paths->len; i++) { - bytecode *bc = load_unit_from_dir(unit_name, paths->paths[i]); - if (bc) { - return bc; + snprintf(path, MAX_PATH_LEN, "%s/%s%s", paths->paths[i], unit_name, + bytecode_suffix); + int fd = open(path, O_RDONLY); + if (fd >= 0) { + return bytecode_load_fd(fd); } } @@ -132,15 +124,14 @@ static bool load_unit_recursive(bytecode_array *units, name_array *loading, return false; } -load_result load(const char *main_unit_name, const char *main_unit_dir, - const search_paths *paths) { +load_result load(const char *main_unit_name, const search_paths *paths) { bytecode_array m; da_init(m); name_array loading; da_init(loading); - bytecode *bc = load_unit_from_dir(main_unit_name, main_unit_dir); + bytecode *bc = load_unit_from_paths(main_unit_name, paths); if (!bc) { fprintf(stderr, "Failed to load unit '%s'\n", main_unit_name); goto cleanup; diff --git a/virtual_machine/loader.h b/virtual_machine/loader.h index 3c5522cf1..94fa1a119 100644 --- a/virtual_machine/loader.h +++ b/virtual_machine/loader.h @@ -17,7 +17,6 @@ typedef struct { size_t units_len; // Number of unique units } load_result; -load_result load(const char *main_unit_name, const char *main_unit_dir, - const search_paths *paths); +load_result load(const char *main_unit_name, const search_paths *paths); #endif // LOADER_H diff --git a/virtual_machine/vm.c b/virtual_machine/vm.c index 21975ec8b..b2075cbd2 100644 --- a/virtual_machine/vm.c +++ b/virtual_machine/vm.c @@ -29,8 +29,7 @@ struct virtual_machine { char **argv; }; -virtual_machine *vm_create(const char *main_unit_name, - const char *main_unit_dir, const char **paths, +virtual_machine *vm_create(const char *main_unit_name, const char **paths, size_t total_paths_len) { search_paths search_paths = {.paths = paths, .len = total_paths_len}; @@ -38,7 +37,7 @@ virtual_machine *vm_create(const char *main_unit_name, memset(vm, 0, sizeof(virtual_machine)); vm->stack_base = MAP_FAILED; - load_result lr = load(main_unit_name, main_unit_dir, &search_paths); + load_result lr = load(main_unit_name, &search_paths); if (!lr.units) { vm_destroy(vm); return NULL; diff --git a/virtual_machine/vm.h b/virtual_machine/vm.h index 76f3daccd..8b3f1ff50 100644 --- a/virtual_machine/vm.h +++ b/virtual_machine/vm.h @@ -5,8 +5,7 @@ typedef struct virtual_machine virtual_machine; -virtual_machine *vm_create(const char *main_unit_name, - const char *main_unit_dir, const char **paths, +virtual_machine *vm_create(const char *main_unit_name, const char **paths, size_t total_paths_len); void vm_destroy(virtual_machine *vm); From 79ea67c9da7d81b6bb15a06813d950c349385ea6 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 19 Apr 2026 21:34:55 +0300 Subject: [PATCH 121/141] validate closure captures --- src/SM.ml | 6 ++-- virtual_machine/converter.c | 67 ++++++++++++++++++++++++++----------- 2 files changed, 51 insertions(+), 22 deletions(-) diff --git a/src/SM.ml b/src/SM.ml index b49fc1505..a1df79eb7 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -330,10 +330,10 @@ module ByteCode = struct | BEGIN (_, a, l, [], _, _) -> add_code_bytes [ (5 * 16) + 2 ]; add_code_ints [ a; l ] (* with no closure *) - (* 0x53 n:32 n:32 *) - | BEGIN (_, a, l, _, _, _) -> + (* 0x53 n:32 n:32 n:32 *) + | BEGIN (_, a, l, ds, _, _) -> add_code_bytes [ (5 * 16) + 3 ]; - add_code_ints [ a; l ] (* with a closure *) + add_code_ints [ a; l; List.length ds ] (* with a closure *) (* 0x54 l:32 n:32 d*:32 *) | CLOSURE (s, ds) -> add_code_bytes [ (5 * 16) + 4 ]; diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 5251bbfa3..9a3bd2313 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -399,6 +399,24 @@ static bool handle_jump(decode_ctx *ctx, meta_info *meta, return true; } +static bool validate_closure_captures(meta_info *meta, int32_t target_off, + int32_t n_captured, + size_t current_bc_off) { + int32_t *expected = &meta[target_off].n_captured; + if (*expected == -1) { + *expected = n_captured; + return true; + } + if (*expected != n_captured) { + fprintf(stderr, + "Error: mismatched closure arity at bc_off=%zu, target=%d " + "(expected %d, got %d)\n", + current_bc_off, target_off, *expected, n_captured); + return false; + } + return true; +} + #define DEPTH_INC(n) \ do { \ VM_DEBUG(" DEPTH: %d -> %d (+%d)\n", ctx->sv.depth, ctx->sv.depth + (n), \ @@ -717,8 +735,7 @@ static bool decode_internal(decode_ctx *ctx) { case OP_LD_CLO: { DEPTH_PUSH(); int32_t idx = reader_i32(&ctx->reader); - if (ctx->func.n_captured != -1) - CHECK_IDX(idx, ctx->func.n_captured, "LD_CLO"); + CHECK_IDX(idx, ctx->func.n_captured, "LD_CLO"); EMIT_FUNC(op_ld_clo); EMIT_NUM(idx); break; @@ -726,8 +743,7 @@ static bool decode_internal(decode_ctx *ctx) { case OP_ST_CLO: { int32_t idx = reader_i32(&ctx->reader); - if (ctx->func.n_captured != -1) - CHECK_IDX(idx, ctx->func.n_captured, "ST_CLO"); + CHECK_IDX(idx, ctx->func.n_captured, "ST_CLO"); EMIT_FUNC(op_st_clo); EMIT_NUM(idx); break; @@ -839,20 +855,39 @@ static bool decode_internal(decode_ctx *ctx) { EMIT_FUNC(op_patt_closure); break; - case OP_BEGIN: + case OP_BEGIN: { + int32_t n_args = reader_i32(&ctx->reader); + int32_t n_locals = reader_i32(&ctx->reader); + ctx->sv.depth = 0; + ctx->sv.max_depth = 0; + + ctx->func = + (func_ctx){.n_args = n_args, .n_locals = n_locals, .n_captured = 0}; + + EMIT_FUNC(op_begin); + EMIT_NUM(n_args); + EMIT_NUM(n_locals); + ctx->sv.max_depth_pos = ctx->code.len; + EMIT_NUM(0); // placeholder for max depth, will be patched + + break; + } + case OP_BEGIN_CLOSURE: { int32_t n_args = reader_i32(&ctx->reader); int32_t n_locals = reader_i32(&ctx->reader); + int32_t n_captured = reader_i32(&ctx->reader); + if (!validate_closure_captures(meta, (int32_t)current_bc_off, n_captured, + current_bc_off)) { + goto cleanup; + } ctx->sv.depth = 0; ctx->sv.max_depth = 0; - ctx->func = (func_ctx){.n_args = n_args, - .n_locals = n_locals, - .n_captured = (opcode == OP_BEGIN_CLOSURE) - ? meta[current_bc_off].n_captured - : 0}; + ctx->func = (func_ctx){ + .n_args = n_args, .n_locals = n_locals, .n_captured = n_captured}; - EMIT_FUNC(opcode == OP_BEGIN_CLOSURE ? op_begin_closure : op_begin); + EMIT_FUNC(op_begin_closure); EMIT_NUM(n_args); EMIT_NUM(n_locals); ctx->sv.max_depth_pos = ctx->code.len; @@ -914,14 +949,8 @@ static bool decode_internal(decode_ctx *ctx) { // Validate CLOSURE target's n_captured consistency if (!IS_EXT_REF(target_off)) { - int32_t *target_n_captured = &meta[target_off].n_captured; - if (meta[target_off].n_captured == -1) { - *target_n_captured = n_captured; - } else if (*target_n_captured != n_captured) { - fprintf(stderr, - "Error: mismatched CLOSURE arity at target=%d " - "(expected %d, got %d)\n", - target_off, meta[target_off].n_captured, n_captured); + if (!validate_closure_captures(meta, target_off, n_captured, + current_bc_off)) { goto cleanup; } } From b25464a99f153b8160d2b85f788bec360d10828e Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 19 Apr 2026 21:57:50 +0300 Subject: [PATCH 122/141] add explanation for setting current depth to instruction after `BARRIER` --- virtual_machine/converter.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 9a3bd2313..a0da45191 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -491,7 +491,16 @@ static bool decode_internal(decode_ctx *ctx) { VM_DEBUG(" DEPTH: %d -> %d", ctx->sv.depth, m->stack_depth); ctx->sv.depth = m->stack_depth; } else { - // No forward jump + // No forward jump has targeted this instruction yet. We are starting a + // new "region" after JMP/END, so there is no previous instruction to + // validate against. So set the current decode depth which will be + // checked by some backward jump. Example (while loop): + // JMP cond + // body: + // ... + // cond: + // ... + // CJMP_NZ body VM_DEBUG(" DEPTH: barrier, keeping stale depth=%d at bc_off=%zu\n", ctx->sv.depth, current_bc_off); m->stack_depth = ctx->sv.depth; From 837743195009f1eaffaaf5091bea535270ed8370 Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 20 Apr 2026 22:04:01 +0300 Subject: [PATCH 123/141] fix stack operations --- virtual_machine/ops.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index 18f0e0272..d6efe7973 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -74,7 +74,8 @@ extern void Bmatch_failure(aint v, const char *fname, aint line, aint col); #define FRAME_SAVED_BP (-1) #define FRAME_SAVED_IP (-2) #define FRAME_SAVED_SP (-3) -#define FRAME_LOCALS (-4) +#define FRAME_SAVED_GC_TOP (-4) +#define FRAME_LOCALS (-5) #define PUSH_FRAME(n_args_val, saved_bp, saved_ip, caller_sp_val) \ do { \ @@ -83,6 +84,12 @@ extern void Bmatch_failure(aint v, const char *fname, aint line, aint col); STACK_PUSH(sp, (aint)(saved_bp)); \ STACK_PUSH(sp, (aint)(saved_ip)); \ STACK_PUSH(sp, (aint)(caller_sp_val)); \ + /* \ + * If we don't restore it, we might end up with a smaller (therefore \ + * incorrect) __gc_stack_top. \ + * See DEFINE_BEGIN. \ + */ \ + STACK_PUSH(sp, (aint)__gc_stack_top); \ bp = new_bp; \ } while (0) @@ -259,7 +266,17 @@ void op_sexp(DECL_STATE) { STACK_REVERSE(args, n_fields + 1); sp += n_fields; + // Ugly corner case due to using sp - 1. + // When we reverse, some heap object might occupy sp - 1 + // when sp - 1 == __gc_stack_top and GC can trigger. Therefore, we need to + // "guard" against it. + size_t saved_gc_stack_top = __gc_stack_top; + size_t sexp_gc_stack_top = (size_t)(args - 1); + if (__gc_stack_top == 0 || sexp_gc_stack_top < __gc_stack_top) { + __gc_stack_top = sexp_gc_stack_top; + } void *s = Bsexp(args, BOX(n_fields + 1)); + __gc_stack_top = saved_gc_stack_top; STACK_PUSH(sp, (aint)s); DISPATCH(); } @@ -407,7 +424,16 @@ void op_st_clo(DECL_STATE) { \ aint *offset = sp - max_depth; \ memset(offset, 0, max_depth * sizeof(aint)); \ - __gc_stack_top = (size_t)(offset - 1); \ + size_t new_gc_stack_top = (size_t)(offset - 1); \ + /* \ + * A caller may have stack depth lower than caller's. \ + * Example: \ + * caller: sp = 100, max_depth = 10 -> __gc_stack_top = 89 \ + * callee: sp = 96, max_depth = 2 -> __gc_stack_top = 93 \ + */ \ + if (__gc_stack_top == 0 || new_gc_stack_top < __gc_stack_top) { \ + __gc_stack_top = new_gc_stack_top; \ + } \ \ DISPATCH(); \ } @@ -465,6 +491,7 @@ void op_end(DECL_STATE) { // Restore caller's state from frame sp = (aint *)bp[FRAME_SAVED_SP]; ip = (insn *)bp[FRAME_SAVED_IP]; + __gc_stack_top = (size_t)bp[FRAME_SAVED_GC_TOP]; bp = (aint *)bp[FRAME_SAVED_BP]; STACK_PUSH(sp, ret_val); @@ -492,6 +519,7 @@ void op_ffi_call(DECL_STATE) { // Same as op_end sp = (aint *)bp[FRAME_SAVED_SP]; ip = (insn *)bp[FRAME_SAVED_IP]; + __gc_stack_top = (size_t)bp[FRAME_SAVED_GC_TOP]; bp = (aint *)bp[FRAME_SAVED_BP]; STACK_PUSH(sp, result); From 242347bdb0ffb97089531ff0f8e857ea15eb60ef Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 22 Apr 2026 16:21:38 +0300 Subject: [PATCH 124/141] unify `is_filepath` and `extract_unit_name` --- virtual_machine/lama.c | 52 +++++++++++++++------------------------- virtual_machine/loader.c | 4 +--- virtual_machine/loader.h | 1 + 3 files changed, 21 insertions(+), 36 deletions(-) diff --git a/virtual_machine/lama.c b/virtual_machine/lama.c index 996e7c76a..f2a4a98b3 100644 --- a/virtual_machine/lama.c +++ b/virtual_machine/lama.c @@ -1,5 +1,6 @@ #define _POSIX_C_SOURCE 200809L +#include "loader.h" #include "memory.h" #include "vm.h" #include @@ -9,45 +10,32 @@ #include #include -// TODO: think about unifying with loader.c -static const char bytecode_suffix[] = ".bc"; - /* - * Check if a string looks like a file path (ends with '.bc') + * Extract directory and unit name from a path. Returns false if the suffix does + * not match. */ -static bool is_filepath(const char *str) { - size_t len = strlen(str); - size_t suffix_len = sizeof(bytecode_suffix) - 1; - return len > suffix_len && - strcmp(str + len - suffix_len, bytecode_suffix) == 0; -} +static bool parse_bytecode_path(const char *path, char **unit_name_out, + char **dir_out) { + size_t len = strlen(path); + size_t suffix_len = sizeof(BYTECODE_SUFFIX) - 1; + if (len <= suffix_len || + strcmp(path + len - suffix_len, BYTECODE_SUFFIX) != 0) { + return false; + } -/* - * Extract name from filename (without path and extension .bc) - */ -static char *extract_unit_name(const char *filename) { - char *path_copy = ESTRDUP(filename); + char *path_copy = ESTRDUP(path); char *base = basename(path_copy); - char *dot = strrchr(base, '.'); - if (dot && strcmp(dot, bytecode_suffix) == 0) { + if (dot) { *dot = '\0'; } - char *result = ESTRDUP(base); - free(path_copy); - return result; -} - -/* - * Extract path from filename - */ -static char *extract_unit_dir(const char *filename) { - char *path_copy = ESTRDUP(filename); - char *dir = dirname(path_copy); - char *result = ESTRDUP(dir); + char *dir_copy = ESTRDUP(path); + *unit_name_out = ESTRDUP(base); + *dir_out = ESTRDUP(dirname(dir_copy)); + free(dir_copy); free(path_copy); - return result; + return true; } #define MAX_INCLUDE_PATHS 64 @@ -112,10 +100,8 @@ int main(int argc, char *argv[]) { } char *entry_arg = argv[optind]; - is_path = is_filepath(entry_arg); + is_path = parse_bytecode_path(entry_arg, &main_unit_name, &bytecode_dir); if (is_path) { - bytecode_dir = extract_unit_dir(entry_arg); - main_unit_name = extract_unit_name(entry_arg); include_paths[0] = bytecode_dir; } else { main_unit_name = entry_arg; diff --git a/virtual_machine/loader.c b/virtual_machine/loader.c index 3f0b81202..baa284d9b 100644 --- a/virtual_machine/loader.c +++ b/virtual_machine/loader.c @@ -15,8 +15,6 @@ #include #include -static const char bytecode_suffix[] = ".bc"; - typedef struct { bytecode **data; size_t len; @@ -53,7 +51,7 @@ static bytecode *load_unit_from_paths(const char *unit_name, static char path[MAX_PATH_LEN]; for (size_t i = 0; i < paths->len; i++) { snprintf(path, MAX_PATH_LEN, "%s/%s%s", paths->paths[i], unit_name, - bytecode_suffix); + BYTECODE_SUFFIX); int fd = open(path, O_RDONLY); if (fd >= 0) { return bytecode_load_fd(fd); diff --git a/virtual_machine/loader.h b/virtual_machine/loader.h index 94fa1a119..f5a8f0b7b 100644 --- a/virtual_machine/loader.h +++ b/virtual_machine/loader.h @@ -5,6 +5,7 @@ #include #include +#define BYTECODE_SUFFIX ".bc" #define MAX_PATH_LEN 1024 typedef struct { From 78923ececde273ba11932184e0df774c7386269b Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 22 Apr 2026 16:24:42 +0300 Subject: [PATCH 125/141] cleanup redundant check for closure var --- virtual_machine/converter.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index a0da45191..500bf1450 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -936,8 +936,7 @@ static bool decode_internal(decode_ctx *ctx) { EMIT_NUM(idx); break; case 3: // Closure var - if (ctx->func.n_captured != -1) - CHECK_IDX(idx, ctx->func.n_captured, "CLOSURE desig closure"); + CHECK_IDX(idx, ctx->func.n_captured, "CLOSURE desig closure"); DEPTH_PUSH(); EMIT_FUNC(op_ld_clo); EMIT_NUM(idx); From 03cbacb156cae741deb1abd98dcb4b800f3315d5 Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 22 Apr 2026 17:56:20 +0300 Subject: [PATCH 126/141] change stack mechanics, remove max_depth --- virtual_machine/converter.c | 15 ----------- virtual_machine/ops.c | 54 +++++++++---------------------------- 2 files changed, 12 insertions(+), 57 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 500bf1450..75462abb2 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -99,8 +99,6 @@ typedef enum { LIVE, BARRIER } reach_state; typedef struct { int32_t depth; reach_state state; - int32_t max_depth; - size_t max_depth_pos; } stack_validation; typedef struct { @@ -422,8 +420,6 @@ static bool validate_closure_captures(meta_info *meta, int32_t target_off, VM_DEBUG(" DEPTH: %d -> %d (+%d)\n", ctx->sv.depth, ctx->sv.depth + (n), \ (n)); \ ctx->sv.depth += (n); \ - if (ctx->sv.depth > ctx->sv.max_depth) \ - ctx->sv.max_depth = ctx->sv.depth; \ } while (0) #define DEPTH_DEC(n) \ do { \ @@ -868,7 +864,6 @@ static bool decode_internal(decode_ctx *ctx) { int32_t n_args = reader_i32(&ctx->reader); int32_t n_locals = reader_i32(&ctx->reader); ctx->sv.depth = 0; - ctx->sv.max_depth = 0; ctx->func = (func_ctx){.n_args = n_args, .n_locals = n_locals, .n_captured = 0}; @@ -876,8 +871,6 @@ static bool decode_internal(decode_ctx *ctx) { EMIT_FUNC(op_begin); EMIT_NUM(n_args); EMIT_NUM(n_locals); - ctx->sv.max_depth_pos = ctx->code.len; - EMIT_NUM(0); // placeholder for max depth, will be patched break; } @@ -891,7 +884,6 @@ static bool decode_internal(decode_ctx *ctx) { goto cleanup; } ctx->sv.depth = 0; - ctx->sv.max_depth = 0; ctx->func = (func_ctx){ .n_args = n_args, .n_locals = n_locals, .n_captured = n_captured}; @@ -899,8 +891,6 @@ static bool decode_internal(decode_ctx *ctx) { EMIT_FUNC(op_begin_closure); EMIT_NUM(n_args); EMIT_NUM(n_locals); - ctx->sv.max_depth_pos = ctx->code.len; - EMIT_NUM(0); // placeholder for max depth, will be patched break; } @@ -999,7 +989,6 @@ static bool decode_internal(decode_ctx *ctx) { goto cleanup; } EMIT_FUNC(op_end); - ctx->code.data[ctx->sv.max_depth_pos].num = ctx->sv.max_depth; ctx->sv.state = BARRIER; ctx->func = (func_ctx){.n_captured = -1}; ctx->func_idx = -1; @@ -1035,10 +1024,6 @@ static bool decode_internal(decode_ctx *ctx) { reader_pos(&ctx->reader) - 1); goto cleanup; } - - if (ctx->sv.depth > ctx->sv.max_depth) { - ctx->sv.max_depth = ctx->sv.depth; - } } // Extract mapping diff --git a/virtual_machine/ops.c b/virtual_machine/ops.c index d6efe7973..1e129b140 100644 --- a/virtual_machine/ops.c +++ b/virtual_machine/ops.c @@ -62,6 +62,7 @@ extern void Bmatch_failure(aint v, const char *fname, aint line, aint col); #define STACK_PUSH(sp, val) (*--(sp) = (val)) #define STACK_POP(sp) (*sp++) #define STACK_PEEK(sp) (*sp) +#define SYNC_GC_STACK(sp) (__gc_stack_top = (size_t)((sp) - 1)) #define STACK_REVERSE(base, n) \ do { \ for (int32_t _i = 0; _i < (n) / 2; _i++) { \ @@ -74,8 +75,7 @@ extern void Bmatch_failure(aint v, const char *fname, aint line, aint col); #define FRAME_SAVED_BP (-1) #define FRAME_SAVED_IP (-2) #define FRAME_SAVED_SP (-3) -#define FRAME_SAVED_GC_TOP (-4) -#define FRAME_LOCALS (-5) +#define FRAME_LOCALS (-4) #define PUSH_FRAME(n_args_val, saved_bp, saved_ip, caller_sp_val) \ do { \ @@ -84,12 +84,6 @@ extern void Bmatch_failure(aint v, const char *fname, aint line, aint col); STACK_PUSH(sp, (aint)(saved_bp)); \ STACK_PUSH(sp, (aint)(saved_ip)); \ STACK_PUSH(sp, (aint)(caller_sp_val)); \ - /* \ - * If we don't restore it, we might end up with a smaller (therefore \ - * incorrect) __gc_stack_top. \ - * See DEFINE_BEGIN. \ - */ \ - STACK_PUSH(sp, (aint)__gc_stack_top); \ bp = new_bp; \ } while (0) @@ -235,6 +229,7 @@ void op_cjmp_nz(DECL_STATE) { void op_string(DECL_STATE) { ip++; const char *str = ip->str; + SYNC_GC_STACK(sp); void *result = Bstring((void *)&str); VM_DEBUG("STRING literal: \"%s\" -> %p\n", str, result); STACK_PUSH(sp, (aint)result); @@ -247,8 +242,9 @@ void op_barray(DECL_STATE) { VM_DEBUG("BARRAY: n=%d\n", n); aint *args = sp; STACK_REVERSE(args, n); - sp += n; + SYNC_GC_STACK(sp); void *arr = Barray(args, BOX(n)); + sp = args + n; STACK_PUSH(sp, (aint)arr); DISPATCH(); } @@ -264,19 +260,10 @@ void op_sexp(DECL_STATE) { aint *args = sp - 1; args[0] = tag_hash; STACK_REVERSE(args, n_fields + 1); - sp += n_fields; - // Ugly corner case due to using sp - 1. - // When we reverse, some heap object might occupy sp - 1 - // when sp - 1 == __gc_stack_top and GC can trigger. Therefore, we need to - // "guard" against it. - size_t saved_gc_stack_top = __gc_stack_top; - size_t sexp_gc_stack_top = (size_t)(args - 1); - if (__gc_stack_top == 0 || sexp_gc_stack_top < __gc_stack_top) { - __gc_stack_top = sexp_gc_stack_top; - } + SYNC_GC_STACK(args); void *s = Bsexp(args, BOX(n_fields + 1)); - __gc_stack_top = saved_gc_stack_top; + sp += n_fields; STACK_PUSH(sp, (aint)s); DISPATCH(); } @@ -412,29 +399,14 @@ void op_st_clo(DECL_STATE) { (void)n_args; \ ip++; \ int32_t n_locals = ip->num; \ - ip++; \ - int32_t max_depth = ip->num; \ \ - VM_DEBUG("BEGIN n_args=%d n_locals=%d max_depth=%d bp=%p sp=%p\n", n_args, \ - n_locals, max_depth, (void *)bp, (void *)sp); \ + VM_DEBUG("BEGIN n_args=%d n_locals=%d bp=%p sp=%p\n", n_args, n_locals, \ + (void *)bp, (void *)sp); \ \ for (int32_t i = 0; i < n_locals; i++) { \ STACK_PUSH(sp, BOX(0)); \ } \ \ - aint *offset = sp - max_depth; \ - memset(offset, 0, max_depth * sizeof(aint)); \ - size_t new_gc_stack_top = (size_t)(offset - 1); \ - /* \ - * A caller may have stack depth lower than caller's. \ - * Example: \ - * caller: sp = 100, max_depth = 10 -> __gc_stack_top = 89 \ - * callee: sp = 96, max_depth = 2 -> __gc_stack_top = 93 \ - */ \ - if (__gc_stack_top == 0 || new_gc_stack_top < __gc_stack_top) { \ - __gc_stack_top = new_gc_stack_top; \ - } \ - \ DISPATCH(); \ } @@ -482,7 +454,6 @@ void op_callc(DECL_STATE) { } void op_end(DECL_STATE) { - (void)sp; aint ret_val = STACK_POP(sp); VM_DEBUG("END ret_val=%ld bp=%p sp=%p\n", (long)ret_val, (void *)bp, @@ -491,7 +462,6 @@ void op_end(DECL_STATE) { // Restore caller's state from frame sp = (aint *)bp[FRAME_SAVED_SP]; ip = (insn *)bp[FRAME_SAVED_IP]; - __gc_stack_top = (size_t)bp[FRAME_SAVED_GC_TOP]; bp = (aint *)bp[FRAME_SAVED_BP]; STACK_PUSH(sp, ret_val); @@ -502,7 +472,6 @@ void op_end(DECL_STATE) { * FFI call — dispatches via pre-resolved ffi_resolved struct */ void op_ffi_call(DECL_STATE) { - (void)sp; ip++; const ffi_resolved *res = (const ffi_resolved *)ip->ptr; @@ -513,13 +482,13 @@ void op_ffi_call(DECL_STATE) { // args at bp[1..n_args] STACK_REVERSE(bp + 1, n_args); + SYNC_GC_STACK(sp); aint result = ffi_call_c(res, bp + 1, n_args); VM_DEBUG("FFI_CALL: result=%ld\n", (long)result); // Same as op_end sp = (aint *)bp[FRAME_SAVED_SP]; ip = (insn *)bp[FRAME_SAVED_IP]; - __gc_stack_top = (size_t)bp[FRAME_SAVED_GC_TOP]; bp = (aint *)bp[FRAME_SAVED_BP]; STACK_PUSH(sp, result); @@ -537,9 +506,10 @@ void op_closure(DECL_STATE) { aint *args = sp - 1; args[0] = (aint)target; STACK_REVERSE(args + 1, n_captured); - sp += n_captured; + SYNC_GC_STACK(args); void *closure = Bclosure(args, BOX(n_captured)); + sp += n_captured; VM_DEBUG("CLOSURE: created=%p\n", (void *)closure); STACK_PUSH(sp, (aint)closure); DISPATCH(); From f2e99689503a8d823da0489fd219b0120afd2573 Mon Sep 17 00:00:00 2001 From: ancavar Date: Sun, 26 Apr 2026 19:41:50 +0300 Subject: [PATCH 127/141] fix --- virtual_machine/lama.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/virtual_machine/lama.c b/virtual_machine/lama.c index f2a4a98b3..a4c735271 100644 --- a/virtual_machine/lama.c +++ b/virtual_machine/lama.c @@ -25,10 +25,7 @@ static bool parse_bytecode_path(const char *path, char **unit_name_out, char *path_copy = ESTRDUP(path); char *base = basename(path_copy); - char *dot = strrchr(base, '.'); - if (dot) { - *dot = '\0'; - } + base[strlen(base) - suffix_len] = '\0'; char *dir_copy = ESTRDUP(path); *unit_name_out = ESTRDUP(base); From efec8f9e4befa443f723145f6aba98085fc0db27 Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 18 May 2026 01:03:00 +0300 Subject: [PATCH 128/141] better build --- Makefile | 2 +- runtime/Makefile | 23 +++++++++++++---------- stdlib/Makefile | 14 +++++++++++--- stdlib/x64/dune | 18 +++++++++++++++++- virtual_machine/Makefile | 23 +++++++++++++---------- virtual_machine/dune | 30 ++++++++++++++++++++++++++++++ 6 files changed, 85 insertions(+), 25 deletions(-) create mode 100644 virtual_machine/dune diff --git a/Makefile b/Makefile index d4cb1a599..078709e0a 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ BUILDDIR = _build all: build test build: - dune b src runtime runtime32 stdlib tutorial + dune b src runtime runtime32 stdlib tutorial virtual_machine install: all dune b @install --profile=release diff --git a/runtime/Makefile b/runtime/Makefile index 64a6a816b..4d4a4c618 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -8,6 +8,7 @@ else ifeq ($(UNAME_S),Darwin) endif LAMA_ENV ?= 1 +BUILD_DIR ?= . # Virtual machine doesn't work well with this parameter ifeq ($(LAMA_ENV), 1) @@ -23,18 +24,20 @@ TEST_FLAGS=$(COMMON_FLAGS) -DDEBUG_VERSION UNIT_TESTS_FLAGS=$(TEST_FLAGS) INVARIANTS_CHECK_FLAGS=$(TEST_FLAGS) -DFULL_INVARIANT_CHECKS -all: gc.o runtime.o printf.o - ar rc runtime.a runtime.o gc.o printf.o +all: $(BUILD_DIR)/gc.o $(BUILD_DIR)/runtime.o $(BUILD_DIR)/printf.o + ar rc $(BUILD_DIR)/runtime.a $(BUILD_DIR)/runtime.o $(BUILD_DIR)/gc.o $(BUILD_DIR)/printf.o -gc.o: gc.c gc.h - $(CC) $(PROD_FLAGS) -c gc.c -o gc.o +$(BUILD_DIR): + mkdir -p $@ -runtime.o: runtime.c runtime.h - $(CC) $(PROD_FLAGS) -c runtime.c -o runtime.o +$(BUILD_DIR)/gc.o: gc.c gc.h | $(BUILD_DIR) + $(CC) $(PROD_FLAGS) -c gc.c -o $(BUILD_DIR)/gc.o -printf.o: printf.S - $(CC) $(PROD_FLAGS) -Wa,--noexecstack -x assembler-with-cpp -c -g printf.S -o printf.o +$(BUILD_DIR)/runtime.o: runtime.c runtime.h | $(BUILD_DIR) + $(CC) $(PROD_FLAGS) -c runtime.c -o $(BUILD_DIR)/runtime.o -clean: - $(RM) *.a *.o *~ negative_scenarios/*.err +$(BUILD_DIR)/printf.o: printf.S | $(BUILD_DIR) + $(CC) $(PROD_FLAGS) -Wa,--noexecstack -x assembler-with-cpp -c -g printf.S -o $(BUILD_DIR)/printf.o +clean: + $(RM) $(BUILD_DIR)/*.a $(BUILD_DIR)/*.o *~ negative_scenarios/*.err diff --git a/stdlib/Makefile b/stdlib/Makefile index 5d4766162..52efebbc7 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -1,4 +1,4 @@ -.PHONY: all +.PHONY: all obj bytecode SHELL := /bin/bash SRCDIR ?= . @@ -8,12 +8,17 @@ $(info FILES = $(FILES)) OFILES = $(FILES:$(SRCDIR)/%=%) OFILES := $(OFILES:.lama=.o) $(info OFILES = $(OFILES)) +BCFILES = $(OFILES:.o=.bc) ALL := $(sort $(OFILES)) LAMAC ?= ../src/lamac BDIR ?= . -all: $(addprefix $(BDIR)/,$(ALL)) +all: obj + +obj: $(addprefix $(BDIR)/,$(ALL)) $(info ALL = $(ALL), SRCDIR = $(SRCDIR)) + +bytecode: obj $(addprefix $(BDIR)/,$(BCFILES)) $(BDIR)/Fun.o: $(BDIR)/Ref.o $(BDIR)/Data.o: $(BDIR)/Ref.o $(BDIR)/Collection.o @@ -33,7 +38,10 @@ $(BDIR)/STM.o: $(BDIR)/List.o $(BDIR)/Fun.o $(BDIR)/%.o: $(SRCDIR)/%.lama $(LAMAC) -g -I . -c $< #-o $@ +$(BDIR)/%.bc: $(SRCDIR)/%.lama obj + $(LAMAC) -I . -b $< + clean: - $(RM) -r *.s *.o *.i *~ + $(RM) -r *.s *.o *.i *.bc *~ pushd regression && make clean && popd diff --git a/stdlib/x64/dune b/stdlib/x64/dune index de85adfc9..bcb4009ea 100644 --- a/stdlib/x64/dune +++ b/stdlib/x64/dune @@ -18,30 +18,43 @@ %{project_root}/src/Driver.exe) (targets Array.i + Array.bc Array.o Buffer.i + Buffer.bc Buffer.o Collection.i + Collection.bc Collection.o Data.i + Data.bc Data.o Fun.i + Fun.bc Fun.o Lazy.i + Lazy.bc Lazy.o List.i + List.bc List.o Matcher.i + Matcher.bc Matcher.o Ostap.i + Ostap.bc Ostap.o Random.i + Random.bc Random.o Ref.i + Ref.bc Ref.o STM.i + STM.bc STM.o Timer.i + Timer.bc Timer.o) (mode (promote (until-clean))) @@ -56,10 +69,13 @@ (setenv LAMAC "../../src/Driver.exe -64 -I ../../runtime" - (run make -j2 -f ../Makefile all))))))) + (progn + (run make -j2 -f ../Makefile obj) + (run make -j2 -f ../Makefile bytecode)))))))) (install (section share) (files (glob_files (*.i with_prefix x64)) + (glob_files (*.bc with_prefix x64)) (glob_files (*.o with_prefix x64)))) diff --git a/virtual_machine/Makefile b/virtual_machine/Makefile index 4a9e61fb5..e7d174543 100644 --- a/virtual_machine/Makefile +++ b/virtual_machine/Makefile @@ -1,31 +1,34 @@ CC = gcc -#TODO: O2 doesn't work -CFLAGS = -Wall -Wextra -std=c99 -O0 +CFLAGS = -Wall -Wextra -std=c99 -O3 LIBS = -lffi -ldl LDFLAGS = -rdynamic +BUILD_DIR = .vm-build TARGET = lama.exe SOURCES = lama.c converter.c vm.c bytecode.c ffi.c loader.c symbols.c ops.c memory.c opcodes.c -OBJECTS = $(SOURCES:.c=.o) +OBJECTS = $(addprefix $(BUILD_DIR)/,$(SOURCES:.c=.o)) RUNTIME_DIR = ../runtime -RUNTIME_LIB = $(RUNTIME_DIR)/runtime.a +RUNTIME_LIB = $(BUILD_DIR)/runtime/runtime.a all: $(TARGET) debug: CFLAGS += -DDEBUG_PRINT -g3 -Og -O0 -fsanitize=address debug: $(TARGET) -$(TARGET): $(OBJECTS) $(RUNTIME_LIB) +$(TARGET): $(OBJECTS) $(RUNTIME_LIB) | $(BUILD_DIR) $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) -Wl,--whole-archive $(RUNTIME_LIB) -Wl,--no-whole-archive $(LIBS) $(RUNTIME_LIB): - $(MAKE) LAMA_ENV=0 -C $(RUNTIME_DIR) + $(MAKE) -C $(RUNTIME_DIR) LAMA_ENV=0 BUILD_DIR=$(abspath $(BUILD_DIR)/runtime) -%.o: %.c - $(CC) $(CFLAGS) $(DEPFLAGS) -c -o $@ $< +$(BUILD_DIR): + mkdir -p $@ + +$(BUILD_DIR)/%.o: %.c | $(BUILD_DIR) + $(CC) $(CFLAGS) -c -o $@ $< clean: - rm -f $(OBJECTS) $(TARGET) $(OBJECTS:.o=.d) - $(MAKE) -C $(RUNTIME_DIR) clean + rm -f $(OBJECTS) $(TARGET) + $(MAKE) -C $(RUNTIME_DIR) BUILD_DIR=$(abspath $(BUILD_DIR)/runtime) clean .PHONY: all debug clean diff --git a/virtual_machine/dune b/virtual_machine/dune new file mode 100644 index 000000000..27d192bb5 --- /dev/null +++ b/virtual_machine/dune @@ -0,0 +1,30 @@ +(rule + (target lama.exe) + (deps + Makefile + lama.c + converter.c + vm.c + bytecode.c + ffi.c + loader.c + symbols.c + ops.c + memory.c + opcodes.c + (glob_files *.h) + ../runtime/Makefile + ../runtime/gc.c + ../runtime/gc.h + ../runtime/runtime.c + ../runtime/runtime.h + ../runtime/runtime_common.h + ../runtime/printf.S) + (action + (run + make + TARGET=%{target}))) + +(alias + (name default) + (deps lama.exe)) From 1640d07e22adae96115378689b4fa99fdbe674a1 Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 18 May 2026 17:10:39 +0300 Subject: [PATCH 129/141] add O3 runtime (?) for bench --- runtime/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/Makefile b/runtime/Makefile index 4d4a4c618..5876c8568 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -18,7 +18,7 @@ else endif DISABLE_WARNINGS=-Wno-shift-negative-value -COMMON_FLAGS=$(DISABLE_WARNINGS) -g -fstack-protector-all $(ARCH) --std=c11 +COMMON_FLAGS=$(DISABLE_WARNINGS) -O3 -g -fstack-protector-all $(ARCH) --std=c11 PROD_FLAGS=$(COMMON_FLAGS) $(ENV_FLAGS) TEST_FLAGS=$(COMMON_FLAGS) -DDEBUG_VERSION UNIT_TESTS_FLAGS=$(TEST_FLAGS) From 2863794e81a382b28e67ac5b79345602de64a8cc Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 18 May 2026 20:06:53 +0300 Subject: [PATCH 130/141] first version specification --- virtual_machine/README.md | 56 +- virtual_machine/SPEC.md | 1153 +++++++++++++++++++++++++++++++++++++ 2 files changed, 1165 insertions(+), 44 deletions(-) create mode 100644 virtual_machine/SPEC.md diff --git a/virtual_machine/README.md b/virtual_machine/README.md index 3dbb459d7..a8e72e8ae 100644 --- a/virtual_machine/README.md +++ b/virtual_machine/README.md @@ -2,60 +2,28 @@ This directory contains the implementation of the virtual machine for the Lama programming language. The VM is a stack-based execution engine designed to run Lama bytecode. +Documentation is split as follows: + +* [`SPEC.md`](SPEC.md) - bytecode file format and instruction reference +* `README.md` - architectural overview of the VM implementation + ## Architecture overview (work in progress) The Lama VM follows a stack-based architecture where operands are pushed onto a data stack, and operations consume these operands and push results back. ![Architecture](arch.png) (work in progress, each iteration the architecture will change) + ### Key Components -* **Interpreter (`interpreter.c`)**: The core execution loop that fetches, decodes, and executes bytecode instructions. -* **Data stack (`stack.c`, `stack.h`)**: A growable stack used for evaluating expressions, passing function arguments, and storing local variables. -* **Call stack (`call_stack.c`, `call_stack.h`)**: Manages function activation records (frames), tracking return addresses and stack base pointers. -* **Instruction set (`opcodes.h`)**: Defines the bytecode opcodes +* **Interpreter (`interpreter.c`)**: The core execution loop that fetches, decodes, and executes bytecode instructions. +* **Data stack (`stack.c`, `stack.h`)**: A growable stack used for evaluating expressions, passing function arguments, and storing local variables. +* **Call stack (`call_stack.c`, `call_stack.h`)**: Manages function activation records (frames), tracking return addresses and stack base pointers. +* **Instruction set (`opcodes.h`)**: Defines the bytecode opcodes ### Interaction with Runtime The VM is tightly integrated with the Lama runtime (`../runtime/`). It relies on the runtime for: -* **Memory management**: Automatic garbage collection for heap-allocated objects. -* **Built-in functions**: IO operations (read/write), array/S-expression/string handling. - -## Bytecode format - -### Layout -Multi-byte integers are laid out in little-endian order. -1. Header (16 bytes) -2. String table (variable) -3. Imports (number of imports * 4 bytes) -4. Public symbols (number of public symbols * 9 bytes) -5. Code section (until 0xFF) - -### Header -The header is 16 bytes: -* `string_table_size` (int32): size of the string table -* `globals_count` (int32): number of global slots (stored on the stack) -* `imports_count` (int32): number of imports -* `public_symbols_count` (int32): number of public symbols - -### Imports -Each entry is 4 bytes: -- `name_offset` (int32): offset into string table for module name - -### Public symbols -Each entry is 9 bytes: -- `name_offset` (int32): offset into string table -- `code_offset` (int32): for functions: bytecode offset; for globals: global index -- `flag` (uint8): 0 = function, 1 = global - -### External references -CALL (0x56) and CLOSURE (0x54) instructions use negative values for external function references. -LD (0x20) and ST (0x40) instructions use negative values for external global references. - -The encoding is the same for both: -- Non-negative values: local references (bytecode offset for functions, global index for globals) -- Negative values: `string_table_offset = -value -1` - -The string at that offset is looked up to resolve the external symbol at load time. - +* **Memory management**: Automatic garbage collection for heap-allocated objects. +* **Built-in functions**: IO operations (read/write), array/S-expression/string handling. diff --git a/virtual_machine/SPEC.md b/virtual_machine/SPEC.md new file mode 100644 index 000000000..26c957165 --- /dev/null +++ b/virtual_machine/SPEC.md @@ -0,0 +1,1153 @@ +# ![Lama](lama.svg) Bytecode and VM Instruction Reference + +This document describes two representations: + + 1. __Bytecode format__: the serialized instructions stored in `.bc` files. + 2. __VM representation__: the decoded (threaded code) representation executed by the VM. + +## Table of contents + +* [Data types](#data-types) +* [Bytecode file layout](#bytecode-file-layout) +* [External references](#external-references) +* [Instruction reference conventions](#instruction-reference-conventions) +* [BINOP](#binop) +* [CONST](#const) +* [STRING](#string) +* [SEXP](#sexp) +* [STA](#sta) +* [JMP](#jmp) +* [END](#end) +* [DROP](#drop) +* [DUP](#dup) +* [SWAP](#swap) +* [ELEM](#elem) +* [LD](#ld) +* [ST](#st) +* [CJMP](#cjmp) +* [BEGIN](#begin) +* [BEGIN_CLOSURE](#begin_closure) +* [CLOSURE](#closure) +* [CALLC](#callc) +* [CALL](#call) +* [TAG](#tag) +* [ARRAY](#array) +* [FAIL](#fail) +* [LINE](#line) +* [PATT](#patt) +* [BARRAY](#barray) +* [EOF](#eof) + +## Data types + +Types use fixed-width, little-endian, two's complement integer encodings. The following convention is being followed in the document: + +| Type | Width | Range | +|:--|:--|:--| +| `uint8` | 1 byte | 0 to 255 | +| `int32` | 4 bytes | −231 to 231−1 | + +The `opcode` field of every bytecode instruction is `uint8`. + +# Bytecode file layout + +1. Header (16 bytes) +2. String table (`string_table_size` bytes) +3. Imports (`imports_count * 4` bytes) +4. Public symbols (`public_symbols_count * 9` bytes) +5. Code section (until `0xFF`) + +### Header + +| Field | Type | Description | +|:--|:--|:--| +| `string_table_size` | `int32` | size of the string table (in bytes) | +| `globals_count` | `int32` | number of global variables (stored on the stack) | +| `imports_count` | `int32` | number of imports | +| `public_symbols_count` | `int32` | number of public symbols | + +### Imports + +| Field | Type | Description | +|:--|:--|:--| +| `name_offset` | `int32` | offset into the string table for the module name | + +### Public symbols + +| Field | Type | Description | +|:--|:--|:--| +| `name_offset` | `int32` | offset into the string table | +| `code_offset` | `int32` | bytecode offset for functions, global index for globals | +| `flag` | `uint8` | `0` = function, `1` = global | + +## External references + +`CALL` (`0x56`) and `CLOSURE` (`0x54`) instructions use negative values for external function references. `LD_GLO` (`0x20`) and `ST_GLO` (`0x40`) instructions use negative values for external global references. + +The encoding is the same for both: + +* `value >= 0` : local references (bytecode offset for functions, global index for globals) +* `value < 0`: `string_table_offset = -value -1` + +The string at that offset is looked up to resolve the external symbol at load time. + +## Instruction reference conventions + +* __Operation__ - a short description of what the instruction does. +* __Format__ - the bytecode mnemonic and operand types. +* __Forms__ - concrete opcode variants and their opcode values. +* __Operand stack__ - written as `before` → `after`. The top of stack is rightmost. +* __Description__ - bytecode-level meaning of the instruction and its operands. +* __Implementation notes__ - VM-internal representation after decoding, and the effect on registers ``. + +The VM uses the following virtual registers: + +* `ip` - instruction pointer into the decoded (threaded-code) stream +* `sp` - operand stack pointer. The operand stack grows downwards: a push decrements `sp`, and a pop increments it. +* `bp` - base pointer of the current call frame + +## BINOP + +### Operation + +Apply a binary operator to the top two stack values. + +### Format + +```text +BINOP +``` + +### Forms + +```text +ADD = 0x01 +SUB = 0x02 +MUL = 0x03 +DIV = 0x04 +MOD = 0x05 +LT = 0x06 +LE = 0x07 +GT = 0x08 +GE = 0x09 +EQ = 0x0A +NE = 0x0B +AND = 0x0C +OR = 0x0D +``` + +### Operand stack + +`..., x, y` → `..., result` + +### Description + +Each binary operator pops its two operands from the stack, applies the corresponding runtime operation, and pushes the result. + +The operand order is `x` then `y`, so the topmost stack value is the right operand. + +### Implementation notes + +```text +ADD -> [op_add] +SUB -> [op_sub] +MUL -> [op_mul] +DIV -> [op_div] +MOD -> [op_mod] +LT -> [op_lt] +LE -> [op_le] +GT -> [op_gt] +GE -> [op_ge] +EQ -> [op_eq] +NE -> [op_ne] +AND -> [op_and] +OR -> [op_or] +``` + +All binary operator handlers pop `y`, then `x`, call `runtime_fn(x, y)`, and push the resulting value. + +`` → `` + +## CONST + +### Operation + +Push a constant on the stack. + +### Format + +```text +CONST value:int32 +``` + +### Forms + +```text +CONST = 0x10 +``` + +### Operand stack + +`...` → `..., value` + +### Description + +`value` is a signed 32-bit integer literal encoded directly in the bytecode. + +### Implementation notes + +```text +[op_const][value] +``` + +Pushes `BOX(value)` onto the stack. + +`` → `` + +## STRING + +### Operation + +Push a string on the stack. + +### Format + +```text +STRING string_offset:int32 +``` + +### Forms + +```text +STRING = 0x11 +``` + +### Operand stack + +`...` → `..., value` + +### Description + +`string_offset` is an offset into the string table pointing to the literal bytes of the string. + +### Implementation notes + +```text +[op_string][str] +``` + +`string_offset` is resolved during decoding to a direct C string pointer `str`. + +`` → `` + +## SEXP + +### Operation + +Create an S-expression + +### Format + +```text +SEXP tag_offset:int32 n_fields:int32 +``` + +### Forms + +```text +SEXP = 0x12 +``` + +### Operand stack + +`..., field_1, ..., field_n` → `..., sexp` + +### Description + +`tag_offset` is an offset into the string table pointing to the S-expression tag name. `n_fields` is the number of fields consumed from the top of the operand stack. + +The fields are taken from the stack and used as the contents of the constructed S-expression. + +### Implementation notes + +```text +[op_sexp][tag_hash][n_fields] +``` + +`tag_offset` is resolved beforehand during decoding to `tag_hash` (using runtime `LtagHash`). + +`` → `` + +## STA + +### Operation + +Store a value into an aggregate. + +### Format + +```text +STA +``` + +### Forms + +```text +STA = 0x14 +``` + +### Operand stack + +`..., aggregate, index, value` → `..., value` + +### Description + +`aggregate` represents an array, S-expression, or string. + +`STA` stores `value` into `aggregate[index]`. + +The target aggregate and index are taken from the operand stack. The updated value remains on the stack after the store. + +### Implementation notes + +```text +[op_sta] +``` + +Calls `Bsta(aggregate, index, value)` and leaves `value` on the stack. + +`` → `` + +## JMP + +### Operation + +Jump unconditionally to a target instruction. + +### Format + +```text +JMP target:int32 +``` + +### Forms + +```text +JMP = 0x15 +``` + +### Operand stack + +`...` → `...` + +### Description + +`target` is a bytecode offset naming the jump destination within the code section. + +### Implementation notes + +```text +[op_jmp][target] +``` + +`target` is resolved during decoding to a threaded-code instruction pointer. + +`` → `` + +## END + +### Operation + +Return from the current function. + +### Format + +```text +END +``` + +### Forms + +```text +END = 0x16 +``` + +### Operand stack + +`..., return_value` → `..., return_value` + +### Description + +`END` terminates the current function body and returns the top stack value to the caller. + +### Implementation notes + +```text +[op_end] +``` + +Restores the caller frame and pushes the return value onto the caller stack. + +`` → `` + +## DROP + +### Operation + +Drop the top stack value. + +### Format + +```text +DROP +``` + +### Forms + +```text +DROP = 0x18 +``` + +### Operand stack + +`..., value` → `...` + +### Description + +`DROP` removes the top value from the operand stack. + +### Implementation notes + +```text +[op_drop] +``` + +`` → `` + +## DUP + +### Operation + +Duplicate the top stack value. + +### Format + +```text +DUP +``` + +### Forms + +```text +DUP = 0x19 +``` + +### Operand stack + +`..., value` → `..., value, value` + +### Description + +`DUP` copies the top value of the operand stack. + +### Implementation notes + +```text +[op_dup] +``` + +`` → `` + +## SWAP + +### Operation + +Swap the top two stack values. + +### Format + +```text +SWAP +``` + +### Forms + +```text +SWAP = 0x1A +``` + +### Operand stack + +`..., x, y` → `..., y, x` + +### Description + +`SWAP` exchanges the top two values on the operand stack. + +### Implementation notes + +```text +[op_swap] +``` + +`` → `` + +## ELEM + +### Operation + +Load a value from an aggregate. + +### Format + +```text +ELEM +``` + +### Forms + +```text +ELEM = 0x1B +``` + +### Operand stack + +`..., aggregate, index` → `..., value` + +### Description + +`aggregate` represents an array, S-expression, or string. + +`ELEM` loads the value stored at `aggregate[index]`. + +### Implementation notes + +```text +[op_elem] +``` + +Calls `Belem(aggregate, index)` and pushes the loaded value. + +`` → `` + +## LD + +### Operation + +Load a variable to the stack. + +### Format + +```text +LD operand:int32 +``` + +### Forms + +```text +LD_GLO = 0x20 +LD_LOC = 0x21 +LD_ARG = 0x22 +LD_CLO = 0x23 +``` + +### Operand stack + +`...` → `..., value` + +### Description + +`LD_GLO`: + +* `operand` field uses the same external global reference encoding described in [external references](#external-references). + +`LD_LOC`: + +* `operand` is a local slot index. + +`LD_ARG`: + +* `operand` is an argument slot index. + +`LD_CLO`: + +* `operand` is a closure capture index. + +### Implementation notes + +```text +LD_GLO -> [op_ld_glo][global_ptr] +LD_LOC -> [op_ld_loc][local] +LD_ARG -> [op_ld_arg][arg] +LD_CLO -> [op_ld_clo][capture] +``` + +`LD_GLO` resolves `operand` during decoding to `global_ptr`. +`LD_CLO` reads captured value `operand` from `closure[operand + 1]`. + +`` → `` + +## ST + +### Operation + +Store a value into a variable. + +### Format + +```text +ST operand:int32 +``` + +### Forms + +```text +ST_GLO = 0x40 +ST_LOC = 0x41 +ST_ARG = 0x42 +ST_CLO = 0x43 +``` + +### Operand stack + +`..., value` → `..., value` + +### Description + +`ST_GLO`: + +* `operand` field uses the same external global reference encoding described in [external references](#external-references). + +`ST_LOC`: + +* `operand` is a local slot index. + +`ST_ARG`: + +* `operand` is an argument slot index. + +`ST_CLO`: + +* `operand` is a closure capture index. + +### Implementation notes + +```text +ST_GLO -> [op_st_glo][global_ptr] +ST_LOC -> [op_st_loc][local] +ST_ARG -> [op_st_arg][arg] +ST_CLO -> [op_st_clo][capture] +``` + +`ST_GLO` resolves `operand` during decoding to `global_ptr`. +`ST_CLO` stores captured value `operand` in `closure[operand + 1]`. + +`` → `` + +## CJMP + +### Operation + +Jump conditionally to a target instruction. + +### Format + +```text +CJMP target:int32 +``` + +### Forms + +```text +CJMP_Z = 0x50 +CJMP_NZ = 0x51 +``` + +### Operand stack + +`..., value` → `...` + +### Description + +`target` is a bytecode offset naming the jump destination within the code section. + +`CJMP_Z` jumps when `value == 0`. `CJMP_NZ` jumps when `value != 0`. + +### Implementation notes + +```text +CJMP_Z -> [op_cjmp_z][target] +CJMP_NZ -> [op_cjmp_nz][target] +``` + +`target` is resolved during decoding to a threaded-code instruction pointer. + +`` → `<(target | ip + 2), sp + 1, bp>` + +## BEGIN + +### Operation + +Enter a function body and allocate local slots. + +### Format + +```text +BEGIN n_args:int32 n_locals:int32 +``` + +### Forms + +```text +BEGIN = 0x52 +``` + +### Operand stack + +`..., arg_1, ..., arg_n` → `..., arg_1, ..., arg_n, local_1, ..., local_m` + +### Description + +`n_args` is the number of function arguments and `n_locals` is the number of local slots allocated for the function body. + +Each local slot is initialized to `BOX(0)`. + +### Implementation notes + +```text +[op_begin][n_args][n_locals] +``` + +`` → `` + +## BEGIN_CLOSURE + +### Operation + +Enter a closure body and allocate local slots. + +### Format + +```text +BEGIN_CLOSURE n_args:int32 n_locals:int32 n_captured:int32 +``` + +### Forms + +```text +BEGIN_CLOSURE = 0x53 +``` + +### Operand stack + +`..., arg_1, ..., arg_n, closure` → `..., arg_1, ..., arg_n, closure, local_1, ..., local_m` + +### Description + +`n_args` is the number of call arguments, `n_locals` is the number of local slots allocated for the closure body, and `n_captured` is the number of captured values expected by the closure entry point. + +Each local slot is initialized to `BOX(0)`. + +### Implementation notes + +```text +[op_begin_closure][n_args][n_locals] +``` + +`n_captured` is validated during decoding and is not carried into the threaded representation. + +`` → `` + +## CLOSURE + +### Operation + +Create a closure object and push it on the stack. + +### Format + +```text +CLOSURE target:int32 n_captured:int32 (kind:uint8 index:int32)* +``` + +### Forms + +```text +CLOSURE = 0x54 +``` + +### Operand stack + +`...` → `..., closure` + +### Description + +`target` is a bytecode offset naming the closure entry point. `n_captured` is the number of captured values stored in the closure. + +Each capture designation is encoded as a `(kind:uint8 index:int32)` pair, where: + +* `kind = 0` denotes a global variable +* `kind = 1` denotes a local variable +* `kind = 2` denotes a function argument +* `kind = 3` denotes a captured closure variable + +The `target` operand uses the same external function reference encoding described in [external references](#external-references). + +### Implementation notes + +```text +capture_* -> LD +[op_closure][target][n_captured] +``` + +Each capture designation is translated during decoding into a corresponding [LD](#ld) instruction. `target` is then resolved to a threaded-code instruction pointer. + +`op_closure` consumes the already loaded captured values from the stack. The runtime closure layout is: + +* `closure[0]` = entry point +* `closure[i + 1]` = captured value `i` + +`` → `` + +## CALLC + +### Operation + +Call a closure value. + +### Format + +```text +CALLC n_args:int32 +``` + +### Forms + +```text +CALLC = 0x55 +``` + +### Operand stack + +`..., arg_1, ..., arg_n, closure` → `..., result` + +### Description + +`n_args` is the number of call arguments. The closure value is taken from the top of the stack. + +### Implementation notes + +```text +[op_callc][n_args] +``` + +Reads the entry point from `closure[0]`, pushes a new call frame, and transfers control to it. + +`` → `` + +## CALL + +### Operation + +Call a function. + +### Format + +```text +CALL target:int32 n_args:int32 +``` + +### Forms + +```text +CALL = 0x56 +``` + +### Operand stack + +`..., arg_1, ..., arg_n` → `..., result` + +### Description + +`target` is a bytecode offset naming the call target. `n_args` is the number of call arguments. + +The `target` operand uses the same external function reference encoding described in [external references](#external-references). + +### Implementation notes + +```text +[op_call][target][n_args] +``` + +`target` is resolved during decoding to a threaded-code instruction pointer. + +`` → `` + +## TAG + +### Operation + +Check whether a value is an S-expression with a given tag and arity. + +### Format + +```text +TAG tag_offset:int32 n_fields:int32 +``` + +### Forms + +```text +TAG = 0x57 +``` + +### Operand stack + +`..., value` → `..., result` + +### Description + +`tag_offset` is an offset into the string table naming the expected S-expression tag. `n_fields` is the expected number of fields. + +### Implementation notes + +```text +[op_tag][tag_hash][n_fields] +``` + +`tag_offset` is resolved during decoding to `tag_hash` (using runtime `LtagHash`). + +`` → `` + +## ARRAY + +### Operation + +Check whether a value is an array of a given length. + +### Format + +```text +ARRAY n:int32 +``` + +### Forms + +```text +ARRAY = 0x58 +``` + +### Operand stack + +`..., value` → `..., result` + +### Description + +`n` is the expected array length. + +### Implementation notes + +```text +[op_array][n] +``` + +`` → `` + +## FAIL + +### Operation + +Raise a pattern-match failure. + +### Format + +```text +FAIL line:int32 col:int32 +``` + +### Forms + +```text +FAIL = 0x59 +FAIL_KEEP = 0x5A +``` + +### Operand stack + +`..., value` → `...` for `FAIL` + +`..., value` → `..., value` for `FAIL_KEEP` + +### Description + +`line` and `col` identify the source position reported for the match failure. + +`FAIL` consumes the top value before reporting the failure. `FAIL_KEEP` reports the failure while keeping the top value. + +### Implementation notes + +```text +[op_fail][line][col][drop_value][module_name] +``` + +`` → `⊥` + +## LINE + +### Operation + +Emit a line marker. + +### Format + +```text +LINE line:int32 +``` + +### Forms + +```text +LINE = 0x5B +``` + +### Operand stack + +`...` → `...` + +### Description + +`line` is a source line number associated with the following bytecode position. + +### Implementation notes + +```text +[op_line][line] +``` + +In non-`DEBUG_PRINT` builds, `LINE` is skipped during decoding and does not appear in the threaded representation. + +`` → `` + +## PATT + +### Operation + +Apply a pattern predicate. + +### Format + +```text +PATT +``` + +### Forms + +```text +PATT_STR_CMP = 0x60 +PATT_STRING = 0x61 +PATT_ARRAY = 0x62 +PATT_SEXP = 0x63 +PATT_BOXED = 0x64 +PATT_UNBOXED = 0x65 +PATT_CLOSURE = 0x66 +``` + +### Operand stack + +`..., x, y` → `..., result` for `PATT_STR_CMP` + +`..., value` → `..., result` for all other forms + +### Description + +Each `PATT_*` instruction applies a runtime predicate used by pattern matching and pushes a boolean(-like) result. + +`PATT_STR_CMP` compares two values. The remaining forms test whether a single value matches the corresponding runtime shape. + +### Implementation notes + +```text +PATT_STR_CMP -> [op_patt_str_cmp] +PATT_STRING -> [op_patt_string] +PATT_ARRAY -> [op_patt_array] +PATT_SEXP -> [op_patt_sexp] +PATT_BOXED -> [op_patt_boxed] +PATT_UNBOXED -> [op_patt_unboxed] +PATT_CLOSURE -> [op_patt_closure] +``` + +`PATT_STR_CMP` behaves like a binary operator. All other `PATT_*` forms behave like unary operators. + +`` → `` for `PATT_STR_CMP` + +`` → `` for all other forms + +## BARRAY + +### Operation + +Construct an array and push it onto the stack. + +### Format + +```text +BARRAY n:int32 +``` + +### Forms + +```text +BARRAY = 0x74 +``` + +### Operand stack + +`..., elem_1, ..., elem_n` → `..., array` + +### Description + +`n` is the number of array elements consumed from the top of the operand stack. + +### Implementation notes + +```text +[op_barray][n] +``` + +`op_barray` consumes `n` stack values, reverses them into array order, allocates a runtime array with `Barray`, and pushes the resulting array value. + +`` → `` + +## EOF + +### Operation + +Mark the end of the bytecode stream. + +### Format + +```text +EOF +``` + +### Forms + +```text +EOF = 0xFF +``` + +### Operand stack + +`...` → `...` + +### Description + +`EOF` terminates the bytecode stream. It must appear at the end of the code section and outside any function body. + +### Implementation notes + +```text +[op_eof] +``` + +`` → `⊥` From 63dd7c8119a5a5729a82595815c172df1f3d3965 Mon Sep 17 00:00:00 2001 From: Danil P Date: Tue, 19 May 2026 02:06:19 +0300 Subject: [PATCH 131/141] lama svg --- virtual_machine/SPEC.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtual_machine/SPEC.md b/virtual_machine/SPEC.md index 26c957165..4a61ccf85 100644 --- a/virtual_machine/SPEC.md +++ b/virtual_machine/SPEC.md @@ -1,4 +1,4 @@ -# ![Lama](lama.svg) Bytecode and VM Instruction Reference +# ![Lama](../lama.svg) Bytecode and VM Instruction Reference This document describes two representations: From 417a39dc9e0e6227eeeb409f0a44e18f17713b1d Mon Sep 17 00:00:00 2001 From: ancavar Date: Wed, 20 May 2026 17:41:51 +0300 Subject: [PATCH 132/141] add magic number --- src/SM.ml | 2 ++ virtual_machine/SPEC.md | 3 ++- virtual_machine/bytecode.c | 13 +++++++++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/SM.ml b/src/SM.ml index a1df79eb7..b6f224d9e 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -165,6 +165,7 @@ module ByteCode = struct (* Public symbol flags *) let pub_flag_function = 0 let pub_flag_global = 1 + let magic = "LaMa" let compile cmd insns = let code = Buffer.create 256 in @@ -425,6 +426,7 @@ module ByteCode = struct let file = Buffer.create 1024 in let add_file_bytes = add_bytes file in let add_file_ints = add_ints file in + Buffer.add_string file magic; add_file_ints [ Bytes.length st ; Hashtbl.length globals diff --git a/virtual_machine/SPEC.md b/virtual_machine/SPEC.md index 4a61ccf85..e6f014c13 100644 --- a/virtual_machine/SPEC.md +++ b/virtual_machine/SPEC.md @@ -51,7 +51,7 @@ The `opcode` field of every bytecode instruction is `uint8`. # Bytecode file layout -1. Header (16 bytes) +1. Header (20 bytes) 2. String table (`string_table_size` bytes) 3. Imports (`imports_count * 4` bytes) 4. Public symbols (`public_symbols_count * 9` bytes) @@ -61,6 +61,7 @@ The `opcode` field of every bytecode instruction is `uint8`. | Field | Type | Description | |:--|:--|:--| +| `magic` | `uint8[4]` | ASCII bytes `LaMa` | | `string_table_size` | `int32` | size of the string table (in bytes) | | `globals_count` | `int32` | number of global variables (stored on the stack) | | `imports_count` | `int32` | number of imports | diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index de9fb70c6..c309104b9 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -11,7 +11,9 @@ #include #include -#define HEADER_SIZE 16 +#define MAGIC "LaMa" +#define MAGIC_SIZE 4 +#define HEADER_SIZE (MAGIC_SIZE + 16) #define PUB_ENTRY_SIZE 9 #define IMPORT_ENTRY_SIZE 4 @@ -47,6 +49,13 @@ bytecode *bytecode_load_fd(int fd) { goto out; } + if (memcmp(data, MAGIC, MAGIC_SIZE) != 0) { + fprintf(stderr, "bytecode_load: invalid magic number\n"); + goto out; + } + + reader_skip(&reader, MAGIC_SIZE); + int32_t string_table_size = reader_i32(&reader); int32_t globals_count = reader_i32(&reader); int32_t num_imports = reader_i32(&reader); @@ -73,7 +82,7 @@ bytecode *bytecode_load_fd(int fd) { size_t code_size = file_size - code_offset; - if (data[code_offset + code_size - 1] != OP_EOF) { + if (code_size == 0 || data[code_offset + code_size - 1] != OP_EOF) { fprintf(stderr, "bytecode_load: bytecode must end with EOF opcode\n"); goto out; } From 574e3b08fbf542b068e53af61eeb6c87b4ce111f Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 21 May 2026 13:54:22 +0300 Subject: [PATCH 133/141] add version number to bytecode format --- src/SM.ml | 4 +++- virtual_machine/SPEC.md | 3 ++- virtual_machine/bytecode.c | 10 +++++++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/SM.ml b/src/SM.ml index b6f224d9e..884e7a0d4 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -166,6 +166,7 @@ module ByteCode = struct let pub_flag_function = 0 let pub_flag_global = 1 let magic = "LaMa" + let format_version = 1 let compile cmd insns = let code = Buffer.create 256 in @@ -428,7 +429,8 @@ module ByteCode = struct let add_file_ints = add_ints file in Buffer.add_string file magic; add_file_ints - [ Bytes.length st + [ format_version + ; Bytes.length st ; Hashtbl.length globals ; List.length imports ; List.length pubs_resolved diff --git a/virtual_machine/SPEC.md b/virtual_machine/SPEC.md index e6f014c13..1c0873907 100644 --- a/virtual_machine/SPEC.md +++ b/virtual_machine/SPEC.md @@ -51,7 +51,7 @@ The `opcode` field of every bytecode instruction is `uint8`. # Bytecode file layout -1. Header (20 bytes) +1. Header (24 bytes) 2. String table (`string_table_size` bytes) 3. Imports (`imports_count * 4` bytes) 4. Public symbols (`public_symbols_count * 9` bytes) @@ -62,6 +62,7 @@ The `opcode` field of every bytecode instruction is `uint8`. | Field | Type | Description | |:--|:--|:--| | `magic` | `uint8[4]` | ASCII bytes `LaMa` | +| `version` | `int32` | bytecode format version, currently `1` | | `string_table_size` | `int32` | size of the string table (in bytes) | | `globals_count` | `int32` | number of global variables (stored on the stack) | | `imports_count` | `int32` | number of imports | diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index c309104b9..b5d711902 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -13,7 +13,8 @@ #define MAGIC "LaMa" #define MAGIC_SIZE 4 -#define HEADER_SIZE (MAGIC_SIZE + 16) +#define BYTECODE_VERSION 1 +#define HEADER_SIZE (MAGIC_SIZE + 20) #define PUB_ENTRY_SIZE 9 #define IMPORT_ENTRY_SIZE 4 @@ -56,11 +57,18 @@ bytecode *bytecode_load_fd(int fd) { reader_skip(&reader, MAGIC_SIZE); + int32_t version = reader_i32(&reader); int32_t string_table_size = reader_i32(&reader); int32_t globals_count = reader_i32(&reader); int32_t num_imports = reader_i32(&reader); int32_t num_pubs = reader_i32(&reader); + if (version != BYTECODE_VERSION) { + fprintf(stderr, "bytecode_load: unsupported bytecode version %d\n", + version); + goto out; + } + if (string_table_size < 0 || globals_count < 0 || num_imports < 0 || num_pubs < 0) { fprintf(stderr, "bytecode_load: negative header field\n"); From 71c37495f6a9e4d28a47dbcf9c280a63db02b8f9 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 21 May 2026 21:26:48 +0300 Subject: [PATCH 134/141] clarify `SPEC.md` --- virtual_machine/SPEC.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virtual_machine/SPEC.md b/virtual_machine/SPEC.md index 1c0873907..98747c580 100644 --- a/virtual_machine/SPEC.md +++ b/virtual_machine/SPEC.md @@ -64,7 +64,7 @@ The `opcode` field of every bytecode instruction is `uint8`. | `magic` | `uint8[4]` | ASCII bytes `LaMa` | | `version` | `int32` | bytecode format version, currently `1` | | `string_table_size` | `int32` | size of the string table (in bytes) | -| `globals_count` | `int32` | number of global variables (stored on the stack) | +| `globals_count` | `int32` | number of global variables | | `imports_count` | `int32` | number of imports | | `public_symbols_count` | `int32` | number of public symbols | @@ -78,7 +78,7 @@ The `opcode` field of every bytecode instruction is `uint8`. | Field | Type | Description | |:--|:--|:--| -| `name_offset` | `int32` | offset into the string table | +| `name_offset` | `int32` | offset into the string table for the name of a public symbol | | `code_offset` | `int32` | bytecode offset for functions, global index for globals | | `flag` | `uint8` | `0` = function, `1` = global | From 34dd5c3e4d53d48da16b31e36541ca3f2510d83b Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 21 May 2026 23:22:15 +0300 Subject: [PATCH 135/141] add well-formedish criteria for instructions --- virtual_machine/SPEC.md | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/virtual_machine/SPEC.md b/virtual_machine/SPEC.md index 98747c580..f37e69a3b 100644 --- a/virtual_machine/SPEC.md +++ b/virtual_machine/SPEC.md @@ -344,7 +344,7 @@ JMP = 0x15 ### Description -`target` is a bytecode offset naming the jump destination within the code section. +`target` is a bytecode offset naming the jump destination within the code section. `target` must not be outside of the function's body. All control flow paths reaching the same jump target must agree on operand stack's depth. ### Implementation notes @@ -380,7 +380,7 @@ END = 0x16 ### Description -`END` terminates the current function body and returns the top stack value to the caller. +`END` terminates the current function body and returns the top stack value to the caller. `END` is only valid when the current operand-stack depth is exactly `1`, i.e. the function body leaves exactly one return value. ### Implementation notes @@ -566,14 +566,17 @@ LD_CLO = 0x23 `LD_LOC`: * `operand` is a local slot index. +* `operand` must satisfy `0 <= operand < n_locals`. `LD_ARG`: * `operand` is an argument slot index. +* `operand` must satisfy `0 <= operand < n_args`. `LD_CLO`: * `operand` is a closure capture index. +* `operand` must satisfy `0 <= operand < n_captured`. ### Implementation notes @@ -623,14 +626,17 @@ ST_CLO = 0x43 `ST_LOC`: * `operand` is a local slot index. +* `operand` must satisfy `0 <= operand < n_locals`. `ST_ARG`: * `operand` is an argument slot index. +* `operand` must satisfy `0 <= operand < n_args`. `ST_CLO`: * `operand` is a closure capture index. +* `operand` must satisfy `0 <= operand < n_captured`. ### Implementation notes @@ -671,9 +677,7 @@ CJMP_NZ = 0x51 ### Description -`target` is a bytecode offset naming the jump destination within the code section. - -`CJMP_Z` jumps when `value == 0`. `CJMP_NZ` jumps when `value != 0`. +`CJMP_Z` jumps when `value == 0`. `CJMP_NZ` jumps when `value != 0`. `target` is a bytecode offset naming the jump destination within the code section. `target` must not be outside of the function's body. All control flow paths reaching the same jump target must agree on operand stack's depth. ### Implementation notes @@ -748,7 +752,7 @@ BEGIN_CLOSURE = 0x53 `n_args` is the number of call arguments, `n_locals` is the number of local slots allocated for the closure body, and `n_captured` is the number of captured values expected by the closure entry point. -Each local slot is initialized to `BOX(0)`. +Each local slot is initialized to `BOX(0)`. Every `CLOSURE` that targets that entry point must use the same `n_captured` value. ### Implementation notes @@ -793,7 +797,12 @@ Each capture designation is encoded as a `(kind:uint8 index:int32)` pair, where: * `kind = 2` denotes a function argument * `kind = 3` denotes a captured closure variable -The `target` operand uses the same external function reference encoding described in [external references](#external-references). +The `target` operand uses the same external function reference encoding described in [external references](#external-references). Internal `target` values must be in range. Internal and inter-unit `target` must point to a function entry `BEGIN` or `BEGIN_CLOSURE`. For each capture designation, the referenced index must be valid for its kind: +* `0 <=` local index `< n_locals`. +* `0 <=` argument index `< n_args`. +* `0 <=` closure capture index `< current function n_captured`. +For a given internal closure entry point, all `CLOSURE` instructions targeting it must agree on `n_captured`. + ### Implementation notes @@ -873,7 +882,7 @@ CALL = 0x56 `target` is a bytecode offset naming the call target. `n_args` is the number of call arguments. -The `target` operand uses the same external function reference encoding described in [external references](#external-references). +The `target` operand uses the same external function reference encoding described in [external references](#external-references). Internal `target` values must be in range. Internal and inter-unit `CALL` must point to a function entry `BEGIN`. ### Implementation notes From 42b07c2190b5b8e1fd03b1505c5559069e47ee9e Mon Sep 17 00:00:00 2001 From: ancavar Date: Sat, 23 May 2026 23:38:49 +0300 Subject: [PATCH 136/141] fix make error? --- virtual_machine/Makefile | 2 +- virtual_machine/dune | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/virtual_machine/Makefile b/virtual_machine/Makefile index e7d174543..94c82c944 100644 --- a/virtual_machine/Makefile +++ b/virtual_machine/Makefile @@ -28,7 +28,7 @@ $(BUILD_DIR)/%.o: %.c | $(BUILD_DIR) $(CC) $(CFLAGS) -c -o $@ $< clean: - rm -f $(OBJECTS) $(TARGET) $(MAKE) -C $(RUNTIME_DIR) BUILD_DIR=$(abspath $(BUILD_DIR)/runtime) clean + $(RM) -r $(BUILD_DIR) $(TARGET) .PHONY: all debug clean diff --git a/virtual_machine/dune b/virtual_machine/dune index 27d192bb5..25a0d7a72 100644 --- a/virtual_machine/dune +++ b/virtual_machine/dune @@ -1,5 +1,7 @@ (rule (target lama.exe) + (mode + (promote (until-clean))) (deps Makefile lama.c From 0db93081291c503c63dca6b11156a9e755368d6d Mon Sep 17 00:00:00 2001 From: ancavar Date: Sat, 13 Jun 2026 22:49:41 +0300 Subject: [PATCH 137/141] add disassembly for bytecode, remove `byterun/` --- byterun/.gitignore | 1 - byterun/byterun.c | 351 ------------------------------------- byterun/dune | 31 ---- virtual_machine/Makefile | 2 +- virtual_machine/bytecode.c | 1 + virtual_machine/bytecode.h | 2 + virtual_machine/disasm.c | 227 ++++++++++++++++++++++++ virtual_machine/lama.c | 37 +++- virtual_machine/opcodes.c | 47 ++--- 9 files changed, 289 insertions(+), 410 deletions(-) delete mode 100644 byterun/.gitignore delete mode 100644 byterun/byterun.c delete mode 100644 byterun/dune create mode 100644 virtual_machine/disasm.c diff --git a/byterun/.gitignore b/byterun/.gitignore deleted file mode 100644 index 68995d9a0..000000000 --- a/byterun/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/byterun.exe \ No newline at end of file diff --git a/byterun/byterun.c b/byterun/byterun.c deleted file mode 100644 index adb86afb9..000000000 --- a/byterun/byterun.c +++ /dev/null @@ -1,351 +0,0 @@ -/* Lama SM Bytecode interpreter */ - -#include -#include -#include -#include -#include "../runtime32/runtime.h" - -void *__start_custom_data; -void *__stop_custom_data; - -/* The unpacked representation of bytecode file */ -typedef struct -{ - char *string_ptr; /* A pointer to the beginning of the string table */ - char *public_ptr; /* A pointer to the beginning of publics table */ - char *code_ptr; /* A pointer to the bytecode itself */ - int *global_ptr; /* A pointer to the global area */ - int stringtab_size; /* The size (in bytes) of the string table */ - int global_area_size; /* The size (in words) of global area */ - int public_symbols_number; /* The number of public symbols */ - char buffer[0]; -} bytefile; - -/* Gets a string from a string table by an index */ -char *get_string(bytefile *f, int pos) -{ - return &f->string_ptr[pos]; -} - -/* Each public symbol entry: int32 name_offset, int32 code_offset, uint8 flag */ -#define PUBLIC_ENTRY_SIZE 9 - -/* Gets a name for a public symbol */ -char *get_public_name(bytefile *f, int i) -{ - return get_string(f, *(int *)(f->public_ptr + i * PUBLIC_ENTRY_SIZE)); -} - -/* Gets an offset for a public symbol */ -int get_public_offset(bytefile *f, int i) -{ - return *(int *)(f->public_ptr + i * PUBLIC_ENTRY_SIZE + sizeof(int)); -} - -/* Gets a flag for a public symbol (0 = function, 1 = global) */ -char get_public_flag(bytefile *f, int i) -{ - return f->public_ptr[i * PUBLIC_ENTRY_SIZE + 2 * sizeof(int)]; -} - -/* Reads a binary bytecode file by name and unpacks it */ -bytefile *read_file(char *fname) -{ - FILE *f = fopen(fname, "rb"); - long size; - bytefile *file; - - if (f == 0) - { - failure("%s\n", strerror(errno)); - } - - if (fseek(f, 0, SEEK_END) == -1) - { - failure("%s\n", strerror(errno)); - } - - file = (bytefile *)malloc(sizeof(void *) * 4 + (size = ftell(f))); - - if (file == 0) - { - failure("*** FAILURE: unable to allocate memory.\n"); - } - - rewind(f); - - if (size != fread(&file->stringtab_size, 1, size, f)) - { - failure("%s\n", strerror(errno)); - } - - fclose(f); - - file->string_ptr = &file->buffer[file->public_symbols_number * PUBLIC_ENTRY_SIZE]; - file->public_ptr = file->buffer; - file->code_ptr = &file->string_ptr[file->stringtab_size]; - file->global_ptr = (int *)malloc(file->global_area_size * sizeof(int)); - - return file; -} - -/* Disassembles the bytecode pool */ -void disassemble(FILE *f, bytefile *bf) -{ - -#define INT (ip += sizeof(int), *(int *)(ip - sizeof(int))) -#define BYTE *ip++ -#define STRING get_string(bf, INT) -#define FAIL failure("ERROR: invalid opcode %d-%d\n", h, l) - - char *ip = bf->code_ptr; - char *ops[] = {"+", "-", "*", "/", "%", "<", "<=", ">", ">=", "==", "!=", "&&", "!!"}; - char *pats[] = {"=str", "#string", "#array", "#sexp", "#ref", "#val", "#fun"}; - char *lds[] = {"LD", "LDA", "ST"}; - do - { - char x = BYTE, - h = (x & 0xF0) >> 4, - l = x & 0x0F; - - fprintf(f, "0x%.8x:\t", ip - bf->code_ptr - 1); - - switch (h) - { - case 15: - goto stop; - - /* BINOP */ - case 0: - fprintf(f, "BINOP\t%s", ops[l - 1]); - break; - - case 1: - switch (l) - { - case 0: - fprintf(f, "CONST\t%d", INT); - break; - - case 1: - fprintf(f, "STRING\t%s", STRING); - break; - - case 2: - fprintf(f, "SEXP\t%s ", STRING); - fprintf(f, "%d", INT); - break; - - case 3: - fprintf(f, "STI"); - break; - - case 4: - fprintf(f, "STA"); - break; - - case 5: - fprintf(f, "JMP\t0x%.8x", INT); - break; - - case 6: - fprintf(f, "END"); - break; - - case 7: - fprintf(f, "RET"); - break; - - case 8: - fprintf(f, "DROP"); - break; - - case 9: - fprintf(f, "DUP"); - break; - - case 10: - fprintf(f, "SWAP"); - break; - - case 11: - fprintf(f, "ELEM"); - break; - - default: - FAIL; - } - break; - - case 2: - case 3: - case 4: - fprintf(f, "%s\t", lds[h - 2]); - switch (l) - { - case 0: - fprintf(f, "G(%d)", INT); - break; - case 1: - fprintf(f, "L(%d)", INT); - break; - case 2: - fprintf(f, "A(%d)", INT); - break; - case 3: - fprintf(f, "C(%d)", INT); - break; - default: - FAIL; - } - break; - - case 5: - switch (l) - { - case 0: - fprintf(f, "CJMPz\t0x%.8x", INT); - break; - - case 1: - fprintf(f, "CJMPnz\t0x%.8x", INT); - break; - - case 2: - fprintf(f, "BEGIN\t%d ", INT); - fprintf(f, "%d", INT); - break; - - case 3: - fprintf(f, "CBEGIN\t%d ", INT); - fprintf(f, "%d", INT); - break; - - case 4: - fprintf(f, "CLOSURE\t0x%.8x", INT); - { - int n = INT; - for (int i = 0; i < n; i++) - { - switch (BYTE) - { - case 0: - fprintf(f, "G(%d)", INT); - break; - case 1: - fprintf(f, "L(%d)", INT); - break; - case 2: - fprintf(f, "A(%d)", INT); - break; - case 3: - fprintf(f, "C(%d)", INT); - break; - default: - FAIL; - } - } - }; - break; - - case 5: - fprintf(f, "CALLC\t%d", INT); - break; - - case 6: - fprintf(f, "CALL\t0x%.8x ", INT); - fprintf(f, "%d", INT); - break; - - case 7: - fprintf(f, "TAG\t%s ", STRING); - fprintf(f, "%d", INT); - break; - - case 8: - fprintf(f, "ARRAY\t%d", INT); - break; - - case 9: - fprintf(f, "FAIL\t%d", INT); - fprintf(f, "%d", INT); - break; - - case 10: - fprintf(f, "LINE\t%d", INT); - break; - - default: - FAIL; - } - break; - - case 6: - fprintf(f, "PATT\t%s", pats[l]); - break; - - case 7: - { - switch (l) - { - case 0: - fprintf(f, "CALL\tread"); - break; - - case 1: - fprintf(f, "CALL\twrite"); - break; - - case 2: - fprintf(f, "CALL\tlength"); - break; - - case 3: - fprintf(f, "CALL\tstring"); - break; - - case 4: - fprintf(f, "CALL\t.array\t%d", INT); - break; - - default: - FAIL; - } - } - break; - - default: - FAIL; - } - - fprintf(f, "\n"); - } while (1); -stop: - fprintf(f, "\n"); -} - -/* Dumps the contents of the file */ -void dump_file(FILE *f, bytefile *bf) -{ - int i; - - fprintf(f, "String table size : %d\n", bf->stringtab_size); - fprintf(f, "Global area size : %d\n", bf->global_area_size); - fprintf(f, "Number of public symbols: %d\n", bf->public_symbols_number); - fprintf(f, "Public symbols :\n"); - - for (i = 0; i < bf->public_symbols_number; i++) - fprintf(f, " 0x%.8x: %s (%s)\n", get_public_offset(bf, i), get_public_name(bf, i), - get_public_flag(bf, i) == 0 ? "function" : "global"); - - fprintf(f, "Code:\n"); - disassemble(f, bf); -} - -int main(int argc, char *argv[]) -{ - bytefile *f = read_file(argv[1]); - dump_file(stdout, f); - return 0; -} diff --git a/byterun/dune b/byterun/dune deleted file mode 100644 index 6b8730c28..000000000 --- a/byterun/dune +++ /dev/null @@ -1,31 +0,0 @@ -(rule - (target byterun.exe) - (deps - (:main byterun.c) - (:runtime ../runtime/runtime.a) - mac-specific-flags.txt) - (mode - (promote (until-clean))) - (action - (run - gcc - %{read-lines:mac-specific-flags.txt} - -g - %{main} - %{runtime} - -o - %{target}))) - -(rule - (target mac-specific-flags.txt) - (enabled_if - (= %{system} "linux")) - (action - (write-file %{target} ""))) - -(rule - (target mac-specific-flags.txt) - (enabled_if - (= %{ocaml-config:system} macosx)) - (action - (write-file %{target} "-arch\nx86_64\n-ld_classic"))) diff --git a/virtual_machine/Makefile b/virtual_machine/Makefile index 94c82c944..a7a9e0237 100644 --- a/virtual_machine/Makefile +++ b/virtual_machine/Makefile @@ -4,7 +4,7 @@ LIBS = -lffi -ldl LDFLAGS = -rdynamic BUILD_DIR = .vm-build TARGET = lama.exe -SOURCES = lama.c converter.c vm.c bytecode.c ffi.c loader.c symbols.c ops.c memory.c opcodes.c +SOURCES = lama.c converter.c vm.c bytecode.c ffi.c loader.c symbols.c ops.c memory.c opcodes.c disasm.c OBJECTS = $(addprefix $(BUILD_DIR)/,$(SOURCES:.c=.o)) RUNTIME_DIR = ../runtime diff --git a/virtual_machine/bytecode.c b/virtual_machine/bytecode.c index b5d711902..a7023965d 100644 --- a/virtual_machine/bytecode.c +++ b/virtual_machine/bytecode.c @@ -101,6 +101,7 @@ bytecode *bytecode_load_fd(int fd) { bc->map_base = data; bc->map_size = file_size; + bc->version = version; bc->string_table = string_table; bc->string_table_size = (size_t)string_table_size; diff --git a/virtual_machine/bytecode.h b/virtual_machine/bytecode.h index 5b2d4ed60..defb82b67 100644 --- a/virtual_machine/bytecode.h +++ b/virtual_machine/bytecode.h @@ -21,6 +21,8 @@ typedef struct { const uint8_t *map_base; size_t map_size; + int32_t version; + const char *string_table; size_t string_table_size; diff --git a/virtual_machine/disasm.c b/virtual_machine/disasm.c new file mode 100644 index 000000000..e6ef984c8 --- /dev/null +++ b/virtual_machine/disasm.c @@ -0,0 +1,227 @@ +#include "disasm.h" +#include "bytecode.h" +#include "opcodes.h" + +#include +#include +#include +#include + +static void print_escaped_string(FILE *f, const char *s) { + for (const uint8_t *p = (const uint8_t *)s; *p; p++) { + switch (*p) { + case '"': + fprintf(f, "\\\""); + break; + case '\\': + switch (p[1]) { + case '\n': + fprintf(f, "\\n"); + p++; + break; + case '\r': + fprintf(f, "\\r"); + p++; + break; + case '\t': + fprintf(f, "\\t"); + p++; + break; + default: + fprintf(f, "\\\\"); + break; + } + break; + default: + if (*p >= 0x20 && *p <= 0x7e) { + fputc(*p, f); + } else { + fprintf(f, "\\x%02x", *p); + } + break; + } + } +} + +static void disassemble(FILE *f, const bytecode *bc) { + byte_reader reader; + reader_init(&reader, bc->code, bc->code_size); + + while (true) { + size_t offset = reader_pos(&reader); + uint8_t x = reader_u8(&reader); + + fprintf(f, "0x%.8x:\t", (unsigned int)offset); + + switch (x) { + case OP_BINOP_ADD: + case OP_BINOP_SUB: + case OP_BINOP_MUL: + case OP_BINOP_DIV: + case OP_BINOP_MOD: + case OP_BINOP_LT: + case OP_BINOP_LE: + case OP_BINOP_GT: + case OP_BINOP_GE: + case OP_BINOP_EQ: + case OP_BINOP_NE: + case OP_BINOP_AND: + case OP_BINOP_OR: + fprintf(f, "%s", opcode_to_string(x)); + break; + + case OP_CONST: + fprintf(f, "%s\t%d", opcode_to_string(x), reader_i32(&reader)); + break; + + case OP_STRING: + fprintf(f, "%s\t", opcode_to_string(x)); + print_escaped_string(f, bytecode_get_string(bc, reader_i32(&reader))); + break; + + case OP_SEXP: + fprintf(f, "%s\t", opcode_to_string(x)); + print_escaped_string(f, bytecode_get_string(bc, reader_i32(&reader))); + fprintf(f, " %d", reader_i32(&reader)); + break; + + case OP_END: + case OP_STA: + case OP_DROP: + case OP_DUP: + case OP_SWAP: + case OP_ELEM: + fprintf(f, "%s", opcode_to_string(x)); + break; + + case OP_JMP: + case OP_CJMP_Z: + case OP_CJMP_NZ: + fprintf(f, "%s\t0x%.8x", opcode_to_string(x), + (unsigned int)reader_i32(&reader)); + break; + + case OP_LD_GLO: + case OP_LD_LOC: + case OP_LD_ARG: + case OP_LD_CLO: + case OP_ST_GLO: + case OP_ST_LOC: + case OP_ST_ARG: + case OP_ST_CLO: + fprintf(f, "%s\t%d", opcode_to_string(x), reader_i32(&reader)); + break; + + case OP_BEGIN: + fprintf(f, "%s\t%d ", opcode_to_string(x), reader_i32(&reader)); + fprintf(f, "%d", reader_i32(&reader)); + break; + + case OP_BEGIN_CLOSURE: + fprintf(f, "%s\t%d ", opcode_to_string(x), reader_i32(&reader)); + fprintf(f, "%d ", reader_i32(&reader)); + fprintf(f, "%d", reader_i32(&reader)); + break; + + case OP_CLOSURE: { + int32_t n; + fprintf(f, "%s\t0x%.8x", opcode_to_string(x), + (unsigned int)reader_i32(&reader)); + n = reader_i32(&reader); + fprintf(f, " %d", n); + for (int32_t i = 0; i < n; i++) { + uint8_t ref = reader_u8(&reader); + fprintf(f, " %u %d", (unsigned int)ref, reader_i32(&reader)); + } + break; + } + + case OP_CALLC: + case OP_ARRAY: + case OP_LINE: + fprintf(f, "%s\t%d", opcode_to_string(x), reader_i32(&reader)); + break; + + case OP_CALL: + fprintf(f, "%s\t0x%.8x ", opcode_to_string(x), + (unsigned int)reader_i32(&reader)); + fprintf(f, "%d", reader_i32(&reader)); + break; + + case OP_TAG: + fprintf(f, "%s\t", opcode_to_string(x)); + print_escaped_string(f, bytecode_get_string(bc, reader_i32(&reader))); + fprintf(f, " %d", reader_i32(&reader)); + break; + + case OP_FAIL: + case OP_FAIL_KEEP: + fprintf(f, "%s\t%d", opcode_to_string(x), reader_i32(&reader)); + fprintf(f, "%d", reader_i32(&reader)); + break; + + case OP_PATT_STR_CMP: + case OP_PATT_STRING: + case OP_PATT_ARRAY: + case OP_PATT_SEXP: + case OP_PATT_BOXED: + case OP_PATT_UNBOXED: + case OP_PATT_CLOSURE: + fprintf(f, "%s", opcode_to_string(x)); + break; + + case OP_BARRAY: + fprintf(f, "%s\t%d", opcode_to_string(x), reader_i32(&reader)); + break; + + case OP_EOF: + fprintf(f, "%s\n", opcode_to_string(x)); + return; + + default: + fprintf(f, "%s\n", opcode_to_string(x)); + } + fprintf(f, "\n"); + } +} + +void dump_bytecode(FILE *f, const bytecode *bc) { + bytecode_iterator pubs, imports; + public_symbol pub; + const char *import_name; + + fprintf(f, "Version: %d\n", bc->version); + fprintf(f, "Size of the string table (in bytes): %zu\n", + bc->string_table_size); + fprintf(f, "Number of global variables: %zu\n", bc->globals_count); + fprintf(f, "Number of imports: %zu\n", bc->imports_len); + fprintf(f, "Number of public symbols: %zu\n", bc->pubs_len); + fprintf(f, "Code size: %zu\n", bc->code_size); + fprintf(f, "Imports: \n"); + + bytecode_imports_init(&imports, bc); + while (bytecode_imports_next(&imports, &import_name)) { + fprintf(f, " %s\n", import_name); + } + + fprintf(f, "Public functions: \n"); + + bytecode_pubs_init(&pubs, bc); + while (bytecode_pubs_next(&pubs, &pub)) { + if (pub.flag == PUB_FLAG_FUNCTION) { + fprintf(f, " 0x%.8x: %s\n", (unsigned int)pub.code_offset, pub.name); + } + } + + fprintf(f, "Public globals: \n"); + + bytecode_pubs_init(&pubs, bc); + while (bytecode_pubs_next(&pubs, &pub)) { + if (pub.flag == PUB_FLAG_GLOBAL) { + fprintf(f, " 0x%.8x: %s\n", (unsigned int)pub.code_offset, pub.name); + } + } + + fprintf(f, "Code:\n"); + disassemble(f, bc); +} diff --git a/virtual_machine/lama.c b/virtual_machine/lama.c index a4c735271..007322e25 100644 --- a/virtual_machine/lama.c +++ b/virtual_machine/lama.c @@ -1,5 +1,6 @@ #define _POSIX_C_SOURCE 200809L +#include "disasm.h" #include "loader.h" #include "memory.h" #include "vm.h" @@ -48,6 +49,7 @@ static void print_usage(FILE *dest, const char *prog_name) { "you need to manually include relevant search paths.\n"); fprintf(dest, "Options:\n"); fprintf(dest, " -h, --help Show this help message\n"); + fprintf(dest, " -d, --disassemble Disassemble the bytecode unit\n"); fprintf(dest, " -I, --include PATH Add PATH to unit search paths (can be " "used multiple times)\n"); @@ -60,21 +62,27 @@ int main(int argc, char *argv[]) { int exit_code = 0; char *bytecode_dir = NULL; char *main_unit_name = NULL; + virtual_machine *vm = NULL; + bool disassemble = false; bool is_path = false; static struct option long_options[] = {{"help", no_argument, 0, 'h'}, + {"disassemble", no_argument, 0, 'd'}, {"include", required_argument, 0, 'I'}, {0, 0, 0, 0}}; int opt; int option_index = 0; - while ((opt = getopt_long(argc, argv, "hI:", long_options, &option_index)) != + while ((opt = getopt_long(argc, argv, "hdI:", long_options, &option_index)) != -1) { switch (opt) { case 'h': print_usage(stdout, argv[0]); return 0; + case 'd': + disassemble = true; + break; case 'I': if (include_path_count < MAX_INCLUDE_PATHS) { include_paths[include_path_count++] = optarg; @@ -104,10 +112,29 @@ int main(int argc, char *argv[]) { main_unit_name = entry_arg; } - virtual_machine *vm = - vm_create(main_unit_name, - (const char **)(is_path ? include_paths : include_paths + 1), - is_path ? include_path_count : include_path_count - 1); + if (disassemble) { + bytecode *bc = NULL; + + if (!is_path) { + fprintf(stderr, "Disassembly requires a .bc file path\n"); + exit_code = 1; + goto cleanup; + } + + bc = bytecode_load(entry_arg); + if (!bc) { + exit_code = 1; + goto cleanup; + } + + dump_bytecode(stdout, bc); + bytecode_free(bc); + goto cleanup; + } + + vm = vm_create(main_unit_name, + (const char **)(is_path ? include_paths : include_paths + 1), + is_path ? include_path_count : include_path_count - 1); if (!vm) { exit_code = 1; goto cleanup; diff --git a/virtual_machine/opcodes.c b/virtual_machine/opcodes.c index 522c913cf..0624bb1bc 100644 --- a/virtual_machine/opcodes.c +++ b/virtual_machine/opcodes.c @@ -2,6 +2,11 @@ #include #include +static void unknown_opcode(uint8_t opcode) { + fprintf(stderr, "Unknown opcode: %d\n", opcode); + exit(EXIT_FAILURE); +} + const char *opcode_to_string(uint8_t opcode) { switch ((opcode_t)opcode) { case OP_BINOP_ADD: @@ -51,29 +56,29 @@ const char *opcode_to_string(uint8_t opcode) { case OP_ELEM: return "ELEM"; case OP_LD_GLO: - return "LD.GLO"; + return "LD_GLO"; case OP_LD_LOC: - return "LD.LOC"; + return "LD_LOC"; case OP_LD_ARG: - return "LD.ARG"; + return "LD_ARG"; case OP_LD_CLO: - return "LD.CLO"; + return "LD_CLO"; case OP_ST_GLO: - return "ST.GLO"; + return "ST_GLO"; case OP_ST_LOC: - return "ST.LOC"; + return "ST_LOC"; case OP_ST_ARG: - return "ST.ARG"; + return "ST_ARG"; case OP_ST_CLO: - return "ST.CLO"; + return "ST_CLO"; case OP_CJMP_Z: - return "CJMP.Z"; + return "CJMP_Z"; case OP_CJMP_NZ: - return "CJMP.NZ"; + return "CJMP_NZ"; case OP_BEGIN: return "BEGIN"; case OP_BEGIN_CLOSURE: - return "BEGIN.CLO"; + return "BEGIN_CLOSURE"; case OP_CLOSURE: return "CLOSURE"; case OP_CALLC: @@ -87,29 +92,29 @@ const char *opcode_to_string(uint8_t opcode) { case OP_FAIL: return "FAIL"; case OP_FAIL_KEEP: - return "FAIL.KEEP"; + return "FAIL_KEEP"; case OP_LINE: return "LINE"; case OP_PATT_STR_CMP: - return "PATT.STRCMP"; + return "PATT_STR_CMP"; case OP_PATT_STRING: - return "PATT.STRING"; + return "PATT_STRING"; case OP_PATT_ARRAY: - return "PATT.ARRAY"; + return "PATT_ARRAY"; case OP_PATT_SEXP: - return "PATT.SEXP"; + return "PATT_SEXP"; case OP_PATT_BOXED: - return "PATT.BOXED"; + return "PATT_BOXED"; case OP_PATT_UNBOXED: - return "PATT.UNBOXED"; + return "PATT_UNBOXED"; case OP_PATT_CLOSURE: - return "PATT.CLOSURE"; + return "PATT_CLOSURE"; case OP_BARRAY: return "BARRAY"; case OP_EOF: return "EOF"; default: - fprintf(stderr, "Unknown opcode: %d\n", opcode); - exit(EXIT_FAILURE); + unknown_opcode(opcode); + return NULL; } } From 2d152dccaea6d459f8dedae42b2f8cb8a199d0ba Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 15 Jun 2026 01:10:24 +0300 Subject: [PATCH 138/141] better `README.md` --- README.md | 9 +- virtual_machine/README.md | 72 ++++++-- virtual_machine/arch.png | Bin 16505 -> 0 bytes virtual_machine/arch.svg | 337 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 397 insertions(+), 21 deletions(-) delete mode 100644 virtual_machine/arch.png create mode 100644 virtual_machine/arch.svg diff --git a/README.md b/README.md index d6faee5c2..63727e31c 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ # Lama -![lama](lama.svg) is a programming language (initualy developed by JetBrains Research) for educational purposes as an exemplary language to introduce the domain of programming languages, compilers, and tools. +![lama](lama.svg) is a programming language (initially developed by JetBrains Research) for educational purposes as an exemplary language to introduce the domain of programming languages, compilers, and tools. Its general characteristics are: * procedural with first-class functions - functions can be passed as arguments, placed in data structures, @@ -27,10 +27,11 @@ The lack of a type system (a vital feature for a real-world language for software engineering) is an intensional decision that allows showing the unchained diversity of runtime behaviors, including those that a typical type system is called to prevent. On the other hand the language can be used in the future as a raw substrate to apply various ways of software verification (including type systems). -The current implementation contains a native code compiler for **x86-64**, written in **OCaml**, a runtime library with garbage-collection support, written in **C**, and a small standard library, written in ![lama](lama.svg) itself. +The current implementation contains a native code compiler for **x86-64**, written in **OCaml**, a runtime library with garbage-collection support and a bytecode virtual machine written in **C**, and a small standard library, written in ![lama](lama.svg) itself. -In addition, a source-level reference interpreter is implemented as well as a compiler to a small stack machine. -The stack machine code can in turn be either interpreted on a stack machine interpreter, or used as an intermediate representation by the native code compiler. +In addition, a source-level reference interpreter is implemented as well as a compiler to a stack machine bytecode. +While the stack machine representation is used as an intermediate representation by the native code compiler, the generated stack machine +bytecode can be interpreted by the virtual machine. ## Language Specification diff --git a/virtual_machine/README.md b/virtual_machine/README.md index a8e72e8ae..c8a42156f 100644 --- a/virtual_machine/README.md +++ b/virtual_machine/README.md @@ -1,29 +1,67 @@ -# Lama virtual machine +# ![Lama](../lama.svg) virtual machine -This directory contains the implementation of the virtual machine for the Lama programming language. The VM is a stack-based execution engine designed to run Lama bytecode. +This directory contains the implementation of the virtual machine for the ![Lama](../lama.svg) programming language. Documentation is split as follows: -* [`SPEC.md`](SPEC.md) - bytecode file format and instruction reference -* `README.md` - architectural overview of the VM implementation +* [`SPEC.md`](SPEC.md) - bytecode file format and instruction reference. +* [`README.md`](README.md) - implementation overview, build instructions, and command-line usage. -## Architecture overview (work in progress) +## Build -The Lama VM follows a stack-based architecture where operands are pushed onto a data stack, and operations consume these operands and push results back. +```bash +# Release +make -![Architecture](arch.png) -(work in progress, each iteration the architecture will change) +# Debug +make debug -### Key Components +# Remove build artifacts +make clean +``` -* **Interpreter (`interpreter.c`)**: The core execution loop that fetches, decodes, and executes bytecode instructions. -* **Data stack (`stack.c`, `stack.h`)**: A growable stack used for evaluating expressions, passing function arguments, and storing local variables. -* **Call stack (`call_stack.c`, `call_stack.h`)**: Manages function activation records (frames), tracking return addresses and stack base pointers. -* **Instruction set (`opcodes.h`)**: Defines the bytecode opcodes +## Usage -### Interaction with Runtime +The input can be a path to the main `.bc` file: -The VM is tightly integrated with the Lama runtime (`../runtime/`). It relies on the runtime for: +```bash +./lama.exe Main.bc +``` -* **Memory management**: Automatic garbage collection for heap-allocated objects. -* **Built-in functions**: IO operations (read/write), array/S-expression/string handling. +or a unit name: + +```bash +./lama.exe Main +``` + +When a `.bc` path is used, its directory is added as the first unit search path. +When a unit name is used, the VM searches only the paths passed with `-I`. + +You can also add directories to the list of searched paths for imported modules: + +```bash +./lama.exe -I stdlib/ -I lib/ Main +``` + +Program arguments are passed after the unit name or `.bc` path: + +```bash +./lama.exe -I stdlib/ Main arg1 arg2 +./lama.exe Main.bc arg1 arg2 +``` + +To print bytecode metadata and instructions without executing the program: + +```bash +./lama.exe --disassemble Main.bc +``` + +Run `./lama.exe --help` for the full list of command-line options. + +## Architecture + +![VM Architecture](arch.svg) + +The figure shows the main components of the virtual machine and the relationships between them. The command-line interface (CLI) is the external entry point: it receives the parameters and passes control to the virtual machine facade. The facade coordinates the remaining components: it loads bytecode files, decodes and links them and prepares its garbage collector. + +The virtual machine follows a stack-based architecture where operands are pushed onto the operand stack, and operations consume these operands and push their results back onto it. diff --git a/virtual_machine/arch.png b/virtual_machine/arch.png deleted file mode 100644 index 51400b6284fdd9ed4fa9f25edf49674f89ca49d3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16505 zcma)jV{~QB*JsR5$7aV?$L!eX*tTuk=-9TCPHwF3*yfFG8*`uM|IW;MXXeA)4|lDz ztLk@l?b=mm*G8nGyaW;g9s(E`7?PBvs1g_$1ON;Sya^5xM1kO$ngsp(E6S>ffgWLC z;bGz6;ox8p5#hdkML|ZzKt)Bu#6&^I!o$KN#KA_x$HT1dL9tK{}PgIUyLQzmsNl1`YLX1;FLPA(pO+-#ZLP<|joKs4IOIDgkPEJlrMPFLY zP)^fSQJ!B(QBYM`NL^iBQO8o*z*g1BK~r5+TT@(5M^Z~mOViv{%fe0H*2h3k+SpLm zz`(%J-rvMI$lNc@!c5WHLdo9V-qJJ5&M(0};Fm>UrfqnEeGoudlCXOrB4CkzZnoPjZcCYLiDsn@?`9Uw&VJuTfaA zd3cCLaBy%yQdvN9c}RLqSXO;#QD0QJRa~@fTwGjKUUPJQOMFR3RB3-)#ow6Psl+(@ zU-1sfiB7+M{Yoh5Oe*jGRrNQeZunQjbXJ;sUY1u@R#tl5VD|6vJiv5b>r7FeZ&^`5 zQBhGr%UEIid}+^0S?_8^Nl;x?ctu4;RrhR7?`*{%VD->W-RMEx_(^?rctcGDpeYsr z0Mz%*0Y-uKV}}jnM=i4#fVr!-=C}?(LU;SGo}Qkzv5nUG>%Pw9f!?&C{*2z<-od%8 z;Xj#Eqj^(PQ^WJyGvft|GsR0wODpqbYm4Q;<;sl$s!@WQDeGvXbB*pz@=i_zv{r&yz{nO*i+e^FMOQ+*Ycfk8<=G#i$`{v-&P0!my z|L61H&zFbK&(En8$R8jTq;(O~a51)b@U$^Ca{-euwKH`xbTKs{G4dp_aB*>P=3!!T zuraiAakaH!G`6>On;azuDR8i*iiXSoq=SKja6GclmE{!HexP(+)}_N#1O+Q+hoSkD zlMt7N=97|SCsf~i8LKt8*8tgVwZ7d0?#k}dTx{&Kf0WN}T=!{BdFSS2a9%EJRN7{k z@5%Ka3OnJ*o1-ru=MkJMc-cCs53h7&_w;VQLAHq)B2DrL-`RwGrq^t%REH!l7tFYdzHxIHR9pL9owl?+YbM(*0FSwHKBh-BQd2G>E%kc}NlC3x^xZ~KX5B8> zWA;uBc&^{)8}y*nH|2V^L6cIe!C*f*&bUM~45WM`B|nr<_ico1VM_w95Uw7{aI60{{YPxGb%I1 z<#H$0CC6L~xVs4(BH2Wh@;%tZat9a1CauGGd3`y5araWpF0v4vJ~}&%5-^!E)aey! z8D62?t9lV@14#npo1wpSwu-Elt0TtnE$J%~N4#47Tv7pR%?~^4auvg&b`T675_HJTlnfG^KhK}TTY~ev@?~021Wg>8HtYXbw zQnbWhN6qOn@%zhyF70C+$XT-I7qzeuO4?fReB&rCAKotU#ibJ_Z)9ID@W#99x}bq z_AZn^(vYx30PL)7bB5Jguc2JXh$kAyR!8aS+E(;(%qSME*Vt8DWXvD8(doX|D0(({ zlct4{;BD2c^$>?1!mfMX4HO@w4s}3w@HMhGTkT;4h@~9&P%y~`Loe=*6xkcOZKErY zf4E(K^A=H(BF*~GSP5w@V#vpv?$COd=SDR%_?BP4XDOE414q;x1OW{wTYIc}NjgUN zqhlKyoKAYXG4b}hvnZmztI||!Q*ux^yk&zE^xLa!M@!dLj0xB0*D&?y+%WK&N{o)Z zrVbP4=p-y_lV#r`UY@3H2~m925wp97>HB2KGf%*CYdUynHpHso-rIGIcmu{((R|l9 z7<}-8%HxrNr6Q_WDneFhO(A%s*V*v$sXeI()>Z=#?WZQZnykHCYZ?&Vh)P>; zqRZA69ae;Lq5715i_rSZ;&_g=dD2!w%c$IHQ!On=k=A@jE^bS@D=bIs7&YVV7Q z;(m2EUU+WDZ>&gZ^DZBV`j{^~!XZ!ZyMybRuw3k@WchNE+BcJ$y8T^0OKEm)eB3ny zppjno7yPp(yy-l>ncuQBCawoR_GSUJ6PCy1-)!pQ@;GC(vY1xQ(9E0R)Ukqhg@e1= zGxXMM<(p1?DOsRSu>9UzhPY%hX+rqnaSt2w6znRh$6EAQ?>l?j3!qX%-x9Zjbw#R*RK11 zKawpdu*Ba9YZ^vei7;D->y#p} zJzZ2u=2|1}<|Z%*p_XE?So1!nyYCr)tc!^CQ1SQI-Zx!`d?A{ds+092`jOg9*qH2l=1KHCDFgdHLGhvx> z)aCT$_pKdMcvm1)5_MzorLG;YU1bt zYG>UQVsMJw8Rob58*LR1V~&G!!g_IZ0aHGUm)+&hne52;2QSC^mxZq^%;HPud1big z^5cTbD=FbKuJ~P$PaO&yC>h9aq!7U;i`^{TtD8U0+-$u1ud;r&&*CTnI+l1PfTivf zzbU;}+14bQfY#THatM$oZX=VqbH+vJlG50wbdlh$S>Enn-WfCef~vmqej)u*uI;O@ zJ00G>!i;A>2c^Tx|CexSLqB1iy_}!Y9geYw%2beMZFATKM{w)tW&$Z=#7muANpX5( zbqY4EXS60iw>Ex*HT;DN3lqf`7{$QOGB0uyKkFB?t3&#Hzc1^s)B*^oVF_0tOV-`g zb1BGKz%9`ASkul!(yG`*Y(o`(JaRBB4-y7qrI@Kv*)#w7+c|5pR%si4ngmE+X7d)( zZLtmAn)H9&xf{(;7U3SB{r2-gLl*bD-7)rQnn;0NbmT*m0pZM7f;BTOes42)okvfB zEVb#TkS9NE6M@bZ9lzHI#X?)WBcw0~#d=4_jr3)=IGXvIlCv_N$8S4-_$gbrF$)UYxt&U(xHAWn4mDVQgS#t4N2Wuas{?PF2rU;X1p$f| zKb9-Bjjr1?;F8ZW?cz{-!U9^!zFlx#fi*j{cMZX|vX$|QtsATgXNrTv_x3eE;IcfZ z*WR@SujpqkiTm`|!oR<+;oaAYq(}1U%XyMk@>&>O{1RK`E1PVKAJm#Fpk#55#xguV zzYsG%FvR9E$)4+&h(f}b5ItV(zIw}Id5kTiJUHhiI)2cg*XfU#J)hK$!KI-I!r^v0 zy~`b#v3HN528Qiniu~O>v|s2zFeE8-i7pqW_puaP^?(Q({>JK}eI2vG9wE$;a;yu9 z6bT2k^R)=sP>m3N;bSNBI+TqL_^7d-i3AjV!f?Iq*{txGj_dy9!K?N6w$97GBN|mh zzu9z^kC>ed`Zzz2!R4~AQAKB=Rb}NeN|TPR>V-1tMzLKzARzi2|VKzzNXSqUMzN?(H5?G z^q(lOK=^26xZP$?&U|mrPqRBztBJgyw%Mk9nb*UCY|B;xPz#&PiE)F|Tl%?rc;yKL3qy6qckL;a&YA$FlT^I-noW5)YF zeemOaKX8NR;#IIq0e55rOyN`)HZ-KI%T<;sqMrbph-Y-nCHKSt@U$pR(cE#Xddi29A zB!V_D6=MfsP~fricuB&x8p9~JX#`DNbPheXWcCeuPN|cuJFKUzAnM13RP+j#jhQj0G4cu`=C}aWoyf z9}r10*a6iF-Caw##{61Q+-X9}@&XzcvhoDj&OPtqS|!B!VPFy6Gol!#U$)wNeouNB z;9;O;t+;S?vO{hNR=C@i9QFD-B{dEler4(7ryRBw0N&gdN)KCnOqg&x?a9tPUX2}h zz)f5P({d0}qUMA+?%vhb5GVo~e!$LFUg=}glM5WaVVBOY|FUxu7SS$L$bxXll?#{c zqNBy1w>_H%PgVYtsJiKE8l*2DP|-#ie8$;$EH1outv$rkHHEz0@b*E2-J7_`p$Pl% z2wt657nVBU!;^EGg?(RJM)2F#{&2ne@%bBs+ISn|zVM*iSgW8pTkH#r!GbQY!T6&N zPtI{i{70cXzbT44`CmdVzlpUgWjdBljMopP}b{ePES}`{!;AlbFl%h ztT$z;=jU;sk6{~Bkr6C2tfqMke_gT4@L`c&0hknYF>dTAMfsp3I1TrL5t9W0?*W~| zz&hF@H0+#Vl*5U8VyMc~z`d8g6Ma_V02Tsf^wrEYR^q2pcp`E)Hg3}T7~(l{26)qz z{P4-`FV9buBcZG!hvu;&>WV+Zuz6_%I1k^Yba$;jln-0UgIgHl%m>;1wLIoZ!e1U|}< z6(8F{6TRNwM%1xKMnYX)JUJmhmOA623{BP+gCN?Rb;u zjhVux2aTM$=5WN`W%s z$~*~GH8Zx)1J(5tk-BD7JDZ9M!;67jdX%_`S+V93e_S2jbU`#XPRIMV0x|DXj~jLZ znF#Wo14enzm=_aI^J9D(1tjCywho%rLpba;N=O~b^(03=h=k2NTn^SqcmAm8*FF2s zaTMEiZP7QXP{oJcci5?3fNe&&v+U2uxN7c%*(`Y@cXahBVyz>$$@ix)5fhe+uMc`u zmo=&W<8PznzNd2q&n{F3`j-?<^>GpbyL+p)DwGAMi8OIfE;bi~n~|JfU*n#qy`H1D z!jR5HOiRnF1=a+zg^9y9W0{!1@b1tTOALNbO9n4g?5bPLg==K`CwoON%~}h^8h>jN zHUq6Hm^MLunHgA$)Rvb?G-Ul|S2>l37Afuxhy6w;mY&&WkwLWA(?XuTp;iCIU>Y{u z{n}a-GKYoxJF01R$LZ|ecZEy?yX^&ayN8Xp+3YMOv#By~1Qo2sYq+INzvJi7oN^?H zB{FS--m(kNTQH6X3{BD!!|3*h*$>=t!)4az8;F9y_?b9kc+?<3JgQceHV^qL(DD)e z723Lb7gftoZvXCY2N6XSqD`bpl#?!?q3qBuu&OL49@P}T@2~0en%C8lDpe{bymG4r zf-TQ&WasJ6!q(nY$@cO_c1`{+w_AOF@TYa~?UA>=087=+D*|iNS9XL4aUdyQ4tB_; z#^?sk4}Byf(fNfMxyw1BJ<@t2uQ2N`XVP~+W}}C>vH7EsXWFR6Re`IQ@j)3a#!!J| zGv9TJkedqai#sDaooAY!P;#P;R2I%VNqxRk%{mfRS-CP#L2J?4-j!vNxdiWWxkdS! z7ziqj2=ZGr6FXn=X4`#J8otL%iVOl@0hmaxL0)G zyHX03?aj%$?GqzhMyj%&^FQ1fOvs&gX`*qLjU^l+P`=h zI2ckyC(Y!w&3g5ai0A$$I*G!flVYVFiOwt*yM?f{59kMf)tC%#%0%!tmpagHwtPV> z6#}nGdSmnnI;9ew8KRJ)9+U?hQOwcKdi$S8J2ex9PiOQEYR@=qJnEEod zY9~pU+IMw~IGS=TVZ3$GES;(>CXH$vPoExaie!IHyyAq)4|v@eY9Q!r$wg&b|2%Vd z5u%DG)WG>9i$*>q)$Z(V!|8OK308C&!$%2G3ppFueigGA{+k;Q^Z!m;5U16N_evrB@Rj!+!N|HS$mYeISZTwfDh42wmt_kDZh@MM@%Zkh31X$Z& zuaUrz`gb$|8K5_v24Zkw2Xiudp+woqoMt@ZjmC==U~gZUIX|D1Rg3B)BP?Q~pAu*G zJxpMmaM}!(8*x2d*`3hxCh33cF~}Z-s~Vsko3-w1`ZUdsS{zHGxAnb{E@S{YrG>g+ zwFL4cy}!rpaY)==dWd{$m}@jq7olTya}pzxs5Lx2;1nK5FIjk8Ur4&mwE(* zBLV@_Z45pBRu%AZZy&#}xcWex!7BJ}vVKMDA6Sjx~_B-wtHPZ0XRyNn9hlMsEl|%4}iH~Q{fP^*g>$1{Wmkd6Mjcp+2F$hvl zu`#-US!OfltwxDWztb1sHCCJ6KaF0lSdP?eq!jb}N{uXH*lrQtO44JTzB4d6Kmt<1 zkYa6XGgt6&>5CE79(o$D4eB)~fT=r@2`jCIkH0-Pw6E%yFCS!)tNUpk3 z_o&q=$WjU>r!FfrhG=fpEcr1xnjLKbg5bwd;@`o^$*E*2pG~#&Y+*6?|r^-Fncef~yBx{k!3YGY$ zz4_dG2+HOht{(Ep@nwVc6gk$ZFGr=&(R`}-M`5yhmg`Qn8tqK-M7(!K6PZrSjBG?ZAu%%J z#r5(!^v^rQGxKW4n))q!0oUnOOFFsoj}UxAv{%O@c3FG!(9i+Yb^QfI|Jw@di{B}O z)_mJpPMWWpt87gdb?^LdPrXjHDXQ7TCTi3kJTZ&q21uvl=sThN!eP0S7LYY%45C46 zQsmiIorOqwWr+xQqh{cYQRODd3WhO?h7r?l*vB{pePM7Zp=EaQpD(bUylzJWem5O| zkfpl1&=D!6d1*jcpGgUKrd@^9OCbQ3MWO!3Q z&yOOHG5%f~&z;TT4yXA;f7oP7@O#!w)4q(s_&2aw9%Gq<#?BWn%WKxO21W z+X**IKSTUWw;*4N@NGn{;KEx!P>?Cz&GjI$v%Y)l4WKiu=%@W0UaD?BMQ_<=$Z1}P z35A47`6dB!Xm0;DjcLLC&7@v2r-oPeQ8Ow+eR(WWm39DpvXm!Wxt_tt_3JT8=jY-WE20Ac-tOs=f12`WY`dSKo4md95!1g)Z-M@ zqtpVAF-)E6C%-b&{FK)ow5W;$$)Ib;Z@H=r9AlNhfM)IYsxO?)f;;LdkE*z0%`QMq zB|XbO_~QGe*Rj2s9ng*}4=%I#77y(5^@(m1wjojt4E;E1N|;Jg>=|PS4l^9C#3%%Y zm{bMJs~K=_P`dd{@*YH!GQGSg>T7x|%g-nFcDC~6s_YjL@cG`W{%OQov{Uvs1kNA& zyij>+WQ$Y~g6fbZU94Wa9OdSFd9YixK6BfOWtcOMYFHV+xoWmRwac7`9k8#nQrOQ` z?`qVV#0~8;X@s}_tgcc?cJ3&IXbYQL9z-y!W zVY!wFq-N74^r7HdFtD5p;@T1LlmeYD8tOgx(Pow@H zt>%Kgo&6j9E%j@K(2I6!m+lCNN^=&ckP5`RS&*s%y*-C7t2_CxG1>all$MApUBVD? zaVe4*T9~1y@|^o7zY+C+UXSC4@i;$q*Aoc6;E>JQoraoS5AOH7d~MVB3DKD1-!*MI zmF2JbxWUV6b=fx!K@@!dCql5{KkF=bodSGJ&2O-Gm8S3}Dmal1aIz{}tWYaMq3hb( zB{{)2=}thMv||*sDBp1?vf7aVWL4#PjniGfiRrKS+QZ{nPepzwCs56m(CDQ4!7YbW z5b}c9HVc@#_frBk8>8GKvFZ=QnR6m@rL-;St$WpPi!c0j7c5v@j8t*mg+EFKdqcCL zYZ|_k_IAxo(WtXsQk;o>_A_j0J$KDOm{{Uw!j_fF+9m`<3J4PxqvloR?`QL~5U!^% zXjTOMX~e9l8ZP{xUOCT~UwBjTnn>p7KiH2zx3`thD~iVaYj`SpU&5qg+krSwGh=;6 zS>ZEkb!jWRXU<8S1-BQyU;A~OXP;v)JHjZ5tVw-rM24pcnN@|I3MSAV`0-B7C%83P z#6-ys;M*WvrXX0!)M9r+ps$quWbib^qpv4YAd*|FOlL?LV=P{jM_92GOKK|3x7^*3THi3WsH-l^|WAi*Jvp$8Jk<@kMLbSC$ChQP$*ea;MPl^ z|8VwpJF>0MAG_W=nQ{LRQPF?tXBe)|myguy__aZvqM&)+6%!e>GEpXwFf{iU281aMDG_pSSI64`x+GQC2-@!<}Re>u?<; z4>y-`-%*bIBXLXFI}P)l5D09+WLMHCxwt#Mj3A3bo7S1kq*YxNONbb%%R-Z?nI(a@eqg^AFuNI%lzh-sJexZQS|^S)TU z#l^M$RCYc5$;_!v&Jw;)$A`6Z`9ueJf7~1KP^%G*Rx6}=uHjH?;Po`1)#;eD#9VxQ z91k^?kJLd|TcFDmJr53ez>d3ZuoG-fjY3DT`9Z_FcX(P$Xg@TXUutTy9ZF zR&f|E>xRS07$QwW8en%iEJ=aI<(;UhRk2N?%+IIo?yAIh9DmZgUjmp9l7RY!0*RV} z%_hVri~G(bgy(F7!(Zm6Kf9o!#GVjX!NaL%@^wtL)s|M3(~Y-nqRL9l7;=e>x;qi$xkSr;-6lM`%xyaI=EaDAZy7=Z$Bj?Da} zx$vR$+h^m3D+v0FI%(o>H*M!Gda-2&z!x99zY4hU_Ais9)$v!!e4L&K%4*&U#-xHB z-0=8R$svyTApMI}db%pgZ{cQvgLp!(CgkL3+iWyzkO_Eqdn$f-k%`Py_Zv8PIrO@r z>Kd@}?LwVYp{Z7TPA*+%*XD!8I3hzKw%br;$0NDN=G5q@ZiAECY+nKG>hsj=uWV>? zSRFalMLepv6LE(HYmJXS5_MRMOQ*?Dswu-|^$)p&qJD19Xfk!s_gjSL;MC@H^~rL9A8lbpoV=LZQq~hasF3KZeEh8sib_b+YM<> z=DqBpw&;5T6Oe?Nj6X@1?L`B|)OoZe7@0h9){QVjID{s8~=94ax=tQX% z z^)^QyQ(-H}8gf&u#1byyM_ml>vDnD^`<%Rx8@V?E4Td=hn%z5VMr$rnQi}K40ARi# z3|uwl+ukG4SLVZ1X6HIWlB%Dm@u`Mm}?oX=Q^iyjEXRzG(g=Xy}S%X!VHM;>vRIB#78$X)Hj;wfJK+0nlB03xCQR zZo@jZVRva2V-bQe=bKNFF|roN)=pYh$-z^&q-CEDYmbE{-(hKV&O@w;l06c2V;Zb) z^&(zRQv!dK>kf8gU|rdh3%(HK+D)CYvDBG3iuuPG+vO$LMR&rXmKZTKJmJe&Ps>_> zo`IE?^Wh_8Jg1Z{@djY26{F1Rw48p2F}LAohHVXIWjBuE?_@735!@|J273jt1gP+d z+g&&d5Ru{YB#s=jzjLE2&5&jN+KSn)Q0r!MEFPA^Erg%vzqYaGWupwr%$`ux_z~Vj zh>^OLgr1X=IhOixA`P7eN@M##nYtgoRGl;m#WaM&ER8!D> zetsiOf6Weu&l#>o_!5m#ZJjvr&#=ut+)dmwUq#RmB|GU%l*4GzyA*x5#C<+PhCR|ns@$P1lHcB%vjnc_T9rvPN>S9q31(TMpkZeBhK5(&4GpKH9Uc{lK7AVjkbhfPD)_7Fhu+K$!z z6G^m#Cgyw4d&pJeB>%+Ypo5D8?knlMTA8je1v9bau0h!s`NxSnxuiRKXxp?s~G>MU!R}wa(zlN zluxywNgBtHS?Fw7cJ8h3Y*8zo7FO^UWaJRRwADD-ga&2>c{c_tS+L5M1g?QMKBG=k z9uk3y)G4aYWOwUHEg^H5k6ifc57HJ~wF>6=-w;)W`qM)=j8XIhtWI(s@&j=}$?=KJ zOf4V%q9%I+ig31Jtc46##vr-A;QSf=+EG11{$e%Pw{M6KXr0U@rVil{!JWZs{!RNL zSl^-!Z&*K;4QYCm=C#BI^(Jb6hJ}4o>(}?dLEjp6FcOWk`d+`GiUe+39rZ7<`iRzf z7T4J@*Q0Jc} zliB~-^a?OE*d~7thRSqtkdi4#a>G^f8)%3O>WyLamOeO^-+M<#H%x5iK`_gEh?y_v zE!RF@@e+VbOXCKnG6(|bEo8RYU7kco>ss#ueHfW{6DxAo)i2k@;^JpMgcnb!vwc^| zl!`v`?bwGtZe9!;k>7-DfwO+1=;#88cXW)v9!e6!vca=ioIadS}>#q+fJr^+R^Ll zZhvV${o8LtUbL&vFKBQ)1N+SWEt6QK7Q&bYBOMl6!;7KZk={gYn7(@9C)ew50Qyh+ z`mTjkSG5e}QI9a2F?GP#n38FUr@v_j>&dL%0zl>bMu$W^=q3ekZq$1i-ABmeKi4N!Osysdu6 zog^`^%18tX(+Q!W-(rb$O+}II(uIIlLtou3&Pypah-UrzNSL6=_N_&UjcvB+pRFUj zzf#Y2I^75}D|L(OmrCgNebks$6QQ1*FC^sj@#B4!v`&e>qo2JMy=}k=JrJEqo*5_l zUFS&8jZW4?cj-5eJ71%w;`6Ro-l5m50<3(~a=36>y8$2Po6&(259Dr8? z;<^v?af`C_)3WLUaWsIAC`@$yJAj`WQIq69zUSN=s!YT_YG$fjGzDDS!bhCmNrH)s zQ3aVA&gGtk2svDYLD5^4^Q~0{%psxdNKkL2K58~9FIn7)v%a)mITnVv@^a$k4XfbW ziF<^HZo9fnLLF>=WL#c_JO6Z)7-8qaq2w9H_>8e9n$iJO%zH@JDkKWyWS_aiTBRmN z!d{^b>4sZT&hOuF#12HSlV`qqAu|%>K4Q#y1;0bdm2*{F@Yv855Zu?_X(=Mt&Vih| zVB899_(4A{;0IAs&_%Y)q|Y& zPaEqYcC~WykJ7=;X1d+G>_5fai8d-@QdP#+Tn0(${G*5p`fZ-`kk7prJUcUqih@VR zs+V}dArHX?cFhKlFG!;Z%MI=gjS$pN9x=_>Yf0{!|B?1a_7OWs*sw#E5ebV}AO+mk zYF9FZKXG#aPp2WlJ$C$t>YF#S45YR1{`#)8l7tvV*~JtFGd-326F_TRrE*v@D1eZs zlR$pr!@vgh3Qya_79#blWY}%Q6P(^m2v{j2R-*OwFJE^4U*vLaSIq(?XBig<_Y+^n z#4jgms`J)%1;}w*FW|0Kv~^hJYAS9FZVtY|?W*Xn+xYWzUqBn-2e5Kvld=M**&*Tt zonI8STpQEz0>8${^8M?vuK+3tMeKt`cw%}GSk(}GiauZC)!MRNW97NrcMqC1fA^>| zrxVtHK;!REkb-_1z%Fr?YcEjSkNyPoUMw(nP0<*E7#++Uu}VvKR=RYK(oh6gZ7bep zKu!=}Q+(G4$#Q2GwQ?$X-o9>w3bWy#XUO{^Tw~ZJs&efVYEDb$V~t6vE(JOz5YFG1 zNIVQ46y8<=5a+?xp9VDAnj$qJIwtq-Py zKT$)Hey#Y3Ni*#A>zZO<1&!-__^FlR^9TID9s^G6#Q~>QLeqOvS+E$S`l4MjZo{v%a7gMrNdkh*%hn>&vD9k)$}oG8AZV8vtF~tgwhvHb^(V{ zQ2ZG}Gdd_Y1Y>2g@;r-4_OJAHZjeua5n_X@E=%4Y0ZSE@q{PWNu2#rFjds_F#hou=$#mMAO zOPUm)IQ|hLCX0DUM?4w_@PKSQpbu?!8nZ`zm+E*Oe=|dmy=5=`c~+!ZW1L3NQ5dcJ zALKdn=`NVpg2&R>OZwq!l?@Ze)*+MpQ43gyY>lcD)NVu^gmUeHxB0b0N-Le;27tH5 zh)%tgaF~zLmOH&I7d|lt-QJ0WlX}RzZ>a}=z|v5D{kBItpdQH@j?5l6oX~@~(m^id z-q;BMvaddEsqX$d_FC$_>f8iW)yeJ7MYKFE@jbssEO|YWe?-oqsY7=Ih#QSZ!si71 znGJ^*DK5KNI?;11dW?akJyY+HcUtc|=8p@E%WoeOJsa?{aR69d$D;EAL5ex<5rWVSS@+Mte#BVbpjkohC1i!t(aKGW32fdL=PElbwGbLv>$aBPyM8M5uy zElmTfe`XfmHv=a{soND`1;I)l&(Y+iYqYIpCN852glkoL(s?8?`t3s*KgU@Md)rp4 zW4x!4o@Jx&g@>1pi<6K*0(^_=+Ses2dzKZdfh_s1ejnuTe=5$7Xd(HpMr-ep_=3Ez z%Jitx6n7yDTXeZnO&?yipxu?rgy7Iqgn+aIQi$7E;lli&G`>}SZ$5s;QOfFE{wAo} zS3dv$3Np5f8HXva%&6j-ZKaqV@otc{e&w-5Id8{hw>t!%qkHG)h@%YylQcWSULga3 zj*JqWdOGpq!J1IF~*B zHd&+gk=6cccv+;yF~@D0B@|@YJwF)Wnl|35aY;uDJ71$TX|9NP-4y;%jPpS!pI__h zBf^iu`7zy;A#E0}oH3&cGt*0VJ&|v8AzS=f&}a|EPUyLa1{!-d@^<(HLjrs8 zK1-yg0Y#@$WbhpV;0mSlNiRU&Py2^Yp%J?UL-%asNqA;w|Vtj;47|KJFj{ z(;&>WU_xC}16Vyx@epPqDTBH5J)n{|&flo$^BSd-*{3N5Ssb`WI6cL5_2rHstSLgZ z@#oD4Y#v*4H8WJYQ>URrbGChF7J*5ir{zt?^9OHhU~7B_#U5&w-GKEOkuTi?eL@ufE-kj=E+@;1WWUV~lh%uMQV;`qUr?F_OQo3c9)BlcyxrvGU;WV4; zSiyGUt)4l>&|^jt?845`<7dyw7psy&hQ!E?;FWm!8yvSda?s9|R{rH6w}|sgs$8mR ztE|qbn76h17)!KPyRB3^;5?cpk72i?)BNm;h3RJ-9+t@}9jdc~<*+pAvFW*}B$+P^9p zIm~_fgCwoXE#95fx*fXU-%j@{josR#A*YrDT@Y4+9&iSS6r3l%IRjfyHApDk)82?<;EwIa~NHwWsMu`@S5YaC?(P z7BH@#9U@Am-CV9+%%Nko^9Ra+hHaV zl)_;1wn8WZqDFYIb9aX$g7JC(p<|XN-1v4cy~>io&DR)t5g<-EHtH_-?SrsN$EFN< zP~03mr;!hkC1PV_596O(I{#MT9)T{5f0D7$DDv&z>t8T6SJ0?lHc~%0?+-4{n}jYA z(YW*Ne0P0JH3GAJ;-{`~bw{4>x=x;38+9iw>L)89P8KAUD#UOQp=Z1^i$zUZ}c@^g_eJP+&QwLFdxp7=yO2J;6t)S75>`!St&N8O(eQ zgc@?5EMr{G^SFC1&4hl1Tj$v-Grv-tk0Jo!sqKZl&4a@==8})Q)O~^1KreKojDTj# zSCb_7c|V>|J%aN9GS|Z0!T{0Gt~eKJ}k;S66I@!&*)-!xtHs zm8EMJ3tT}m!R<)0E#{(y8*K^dx`+oJ9C2#s+$%#1z;6wXB)CphC)B-5#op2FpTxIO zBFz@sTtQ<2kEVgggT6*)@`Yv*7;zVrNpWcyH_8$mnPcA@dARih{beIojUMWq>ixrg+Wu=O@ir_Y7F(kV*%|cQX3cV=r*xCzXgcf z?W8sOs}bl7{BCC3}Q+erLf_ zti~nzhC=~#JNNeuW3%+0i%|$g09ZjIrI&tc1lXGPipF~kLd=!dto3M%)8-L+m8$pT0`i}=pCIeB zAZ;_BAlR@-3zn3p)ho|hG@xA-S~;%g>i0o6afMrPcG7 zDgsWXH!mRr`)rr0T=RwWPkk#~N*qFQz~xwlRd5>JbL~9VB4|Z;IJ^PxN*J}NyC)(0bIIfWsbp2V? z1zcteebQW2klzSvra;N*7X!Xf!h4M%uO&7KRPgqKrYNS#1T$tdN?KSgC$b-EdZd3f zyIg)o3-U^8#jHSVtrs1CD}OTJrqbQ=P_<)Ae9f$K>1JF7%`>`>7u?O#Cx)Wf$+>c? z&7{qne8ewnL*pMiz3mVDL9OXkX-jxROow+ZB8;D&+NzwV;UJ7_70f{`K!Gu@wYyVq zch^L)!+gF}NMG6 zU*4GHcgg?nh-QwGgDCJA{}DRApbuL-u|x&I1tB5u3=9NKe~1 zMu;X=w8rI~gX(cYDey4nZ7u$d(Z1XDkvIsn^B`flaOdjqzfAhipYtbkU#=7Y80vE+ z-_Aif6Jf=_M9Xyu(sXW7Y%^+f+`cut&B_%J-LIu70r|Uq*@YBi`q5$$PB=v68D${- z1})a2D|k1EUbhnQ?TVJF)b$<)+Y@i`%zb>c=PEyG0+<%>D6}_9_cg`fFTxb#kNw^U zY2;)?2dL?1>J2_QL}4N~Hwd>d)mhzb-p*W$2wUi44Uxow0CN=-8v8D5*%X<9Rd)-nf!B2rdl$fU~Ml-974(>PD# zFvd?71Q|B^a&1sL16Ve4G7UOH&6~N{_bXElb<;kn|FkYEC=` zYr2vQfkmQoIx0^39bw1c{F|}svS;kly9l*o5Ge34W0EfM>?+# zk9&aw3po?gdjNt~P=GyG0@z@g(~8qW;gu`$JK($q4Q+4W79)yRPAaCDiMF=>9Ow9F zOyNJ$UzF77+03}vLg}(bh0nK(6$OAyJ)z=&UHJc_y7Ip(TaAyb`8;ET{GhUtvMw`) z_~BW?21vu5=L+)4G1A+o#7DVKu=@z^MPp>&1E$7;3=YEgD^t_j73FI^L1by}#K$7A zd0Xa~xPl^aj%9@(|35W&|GPrd7ro9MS|$G*?PNdxe2x98NXdq(;`nOI3@*=?cZ2Q* z6Q(BeV_kSLwrZfh$!rH&W`N`Qo1#Fs<|Qq4@%ea#m%0$XBH+V875B9(?Wy|6#Aa@aNi diff --git a/virtual_machine/arch.svg b/virtual_machine/arch.svg new file mode 100644 index 000000000..9b238f7b2 --- /dev/null +++ b/virtual_machine/arch.svg @@ -0,0 +1,337 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file From 4c9d0b497a0d539edb2338826e8c7b0f6f1acf67 Mon Sep 17 00:00:00 2001 From: ancavar Date: Mon, 15 Jun 2026 02:00:53 +0300 Subject: [PATCH 139/141] resolve TODOs --- virtual_machine/converter.c | 1 - virtual_machine/lama.c | 13 ++++++------- virtual_machine/memory.h | 1 - 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/virtual_machine/converter.c b/virtual_machine/converter.c index 75462abb2..ba9e312a6 100644 --- a/virtual_machine/converter.c +++ b/virtual_machine/converter.c @@ -682,7 +682,6 @@ static bool decode_internal(decode_ctx *ctx) { break; case OP_STA: - // TODO: DEPTH_DEC(3); DEPTH_PUSH(); EMIT_FUNC(op_sta); diff --git a/virtual_machine/lama.c b/virtual_machine/lama.c index 007322e25..5332d88b0 100644 --- a/virtual_machine/lama.c +++ b/virtual_machine/lama.c @@ -58,7 +58,6 @@ static void print_usage(FILE *dest, const char *prog_name) { int main(int argc, char *argv[]) { char *include_paths[MAX_INCLUDE_PATHS]; int include_path_count = 1; - // TODO: better error handling in general int exit_code = 0; char *bytecode_dir = NULL; char *main_unit_name = NULL; @@ -89,19 +88,19 @@ int main(int argc, char *argv[]) { } else { fprintf(stderr, "Maximum number of include paths (%d) exceeded\n", MAX_INCLUDE_PATHS); - return 1; + return EXIT_FAILURE; } break; default: print_usage(stderr, argv[0]); - return 1; + return EXIT_FAILURE; } } if (optind >= argc) { fprintf(stderr, "No bytecode file specified\n\n"); print_usage(stderr, argv[0]); - return 1; + return EXIT_FAILURE; } char *entry_arg = argv[optind]; @@ -117,13 +116,13 @@ int main(int argc, char *argv[]) { if (!is_path) { fprintf(stderr, "Disassembly requires a .bc file path\n"); - exit_code = 1; + exit_code = EXIT_FAILURE; goto cleanup; } bc = bytecode_load(entry_arg); if (!bc) { - exit_code = 1; + exit_code = EXIT_FAILURE; goto cleanup; } @@ -136,7 +135,7 @@ int main(int argc, char *argv[]) { (const char **)(is_path ? include_paths : include_paths + 1), is_path ? include_path_count : include_path_count - 1); if (!vm) { - exit_code = 1; + exit_code = EXIT_FAILURE; goto cleanup; } diff --git a/virtual_machine/memory.h b/virtual_machine/memory.h index 9a1535b9f..c800cd67f 100644 --- a/virtual_machine/memory.h +++ b/virtual_machine/memory.h @@ -7,7 +7,6 @@ void *emalloc(size_t size, const char *file, int line); void *erealloc(void *ptr, size_t size, const char *file, int line); char *estrdup(const char *s, const char *file, int line); -// TODO: #define EMALLOC(size) emalloc((size), __FILE__, __LINE__) #define EREALLOC(ptr, size) erealloc((ptr), (size), __FILE__, __LINE__) #define ESTRDUP(s) estrdup((s), __FILE__, __LINE__) From 8f2508f6501118616c3d8fc2f44c7326f1f805a3 Mon Sep 17 00:00:00 2001 From: ancavar Date: Thu, 25 Jun 2026 05:22:04 +0300 Subject: [PATCH 140/141] fix wording to abstract stack machine --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 63727e31c..2ef2146ce 100644 --- a/README.md +++ b/README.md @@ -29,9 +29,8 @@ On the other hand the language can be used in the future as a raw substrate to a The current implementation contains a native code compiler for **x86-64**, written in **OCaml**, a runtime library with garbage-collection support and a bytecode virtual machine written in **C**, and a small standard library, written in ![lama](lama.svg) itself. -In addition, a source-level reference interpreter is implemented as well as a compiler to a stack machine bytecode. -While the stack machine representation is used as an intermediate representation by the native code compiler, the generated stack machine -bytecode can be interpreted by the virtual machine. +In addition, a source-level reference interpreter is implemented as well as a compiler to a small abstract stack machine. +This abstract stack machine code can in turn be serialized as bytecode for execution by the virtual machine, or used as an intermediate representation by the native code compiler. ## Language Specification From 9e16c630694ea4850bb900fc9e750d39bf3083a0 Mon Sep 17 00:00:00 2001 From: Danil P Date: Thu, 25 Jun 2026 05:27:14 +0300 Subject: [PATCH 141/141] add cram tests to vm --- regression/gen.ml | 28 +++ regression/vm/dune | 160 ++++++++++++++ regression/vm/test001.t | 3 + regression/vm/test002.t | 3 + regression/vm/test003.t | 5 + regression/vm/test004.t | 3 + regression/vm/test005.t | 3 + regression/vm/test006.t | 8 + regression/vm/test007.t | 3 + regression/vm/test008.t | 3 + regression/vm/test009.t | 3 + regression/vm/test010.t | 3 + regression/vm/test011.t | 3 + regression/vm/test012.t | 10 + regression/vm/test013.t | 10 + regression/vm/test014.t | 33 +++ regression/vm/test015.t | 3 + regression/vm/test016.t | 3 + regression/vm/test017.t | 3 + regression/vm/test018.t | 16 ++ regression/vm/test019.t | 3 + regression/vm/test020.t | 3 + regression/vm/test021.t | 3 + regression/vm/test022.t | 3 + regression/vm/test023.t | 3 + regression/vm/test024.t | 4 + regression/vm/test025.t | 14 ++ regression/vm/test026.t | 23 ++ regression/vm/test027.t | 37 ++++ regression/vm/test028.t | 16 ++ regression/vm/test029.t | 20 ++ regression/vm/test034.t | 18 ++ regression/vm/test036.t | 18 ++ regression/vm/test040.t | 6 + regression/vm/test041.t | 4 + regression/vm/test042.t | 12 ++ regression/vm/test045.t | 42 ++++ regression/vm/test046.t | 15 ++ regression/vm/test050.t | 3 + regression/vm/test054.t | 6 + regression/vm/test059.t | 5 + regression/vm/test063.t | 4 + regression/vm/test072.t | 20 ++ regression/vm/test073.t | 16 ++ regression/vm/test074.t | 38 ++++ regression/vm/test077.t | 8 + regression/vm/test078.t | 14 ++ regression/vm/test079.t | 8 + regression/vm/test080.t | 5 + regression/vm/test081.t | 8 + regression/vm/test082.t | 19 ++ regression/vm/test083.t | 5 + regression/vm/test084.t | 5 + regression/vm/test085.t | 10 + regression/vm/test086.t | 5 + regression/vm/test088.t | 4 + regression/vm/test089.t | 3 + regression/vm/test090.t | 5 + regression/vm/test091.t | 11 + regression/vm/test092.t | 7 + regression/vm/test093.t | 4 + regression/vm/test094.t | 6 + regression/vm/test095.t | 3 + regression/vm/test096.t | 4 + regression/vm/test097.t | 3 + regression/vm/test098.t | 3 + regression/vm/test099.t | 5 + regression/vm/test100.t | 3 + regression/vm/test101.t | 3 + regression/vm/test102.t | 3 + regression/vm/test103.t | 3 + regression/vm/test104.t | 12 ++ regression/vm/test105.t | 3 + regression/vm/test106.t | 4 + regression/vm/test107.t | 3 + regression/vm/test110.t | 6 + regression/vm/test111.t | 8 + regression/vm/test112.t | 13 ++ regression/vm/test801.t | 7 + regression/vm/test802.t | 12 ++ runtime/runtime.c | 2 +- runtime32/runtime.c | 2 +- stdlib/regression/gen.ml | 33 +++ stdlib/regression/vm/dune | 68 ++++++ stdlib/regression/vm/test01.t | 312 ++++++++++++++++++++++++++++ stdlib/regression/vm/test02.t | 4 + stdlib/regression/vm/test03.t | 21 ++ stdlib/regression/vm/test04.t | 308 +++++++++++++++++++++++++++ stdlib/regression/vm/test05.t | 12 ++ stdlib/regression/vm/test06.t | 10 + stdlib/regression/vm/test07.t | 9 + stdlib/regression/vm/test08.t | 6 + stdlib/regression/vm/test09.t | 8 + stdlib/regression/vm/test10.t | 5 + stdlib/regression/vm/test11.t | 9 + stdlib/regression/vm/test12.t | 4 + stdlib/regression/vm/test13.t | 6 + stdlib/regression/vm/test14.t | 6 + stdlib/regression/vm/test15.t | 7 + stdlib/regression/vm/test16.t | 9 + stdlib/regression/vm/test17.t | 15 ++ stdlib/regression/vm/test18.t | 35 ++++ stdlib/regression/vm/test20.t | 15 ++ stdlib/regression/vm/test21.t | 12 ++ stdlib/regression/vm/test22.t | 7 + stdlib/regression/vm/test23.t | 6 + stdlib/regression/vm/test24.t | 6 + stdlib/regression/vm/test25.t | 8 + stdlib/regression/vm/test26.t | 5 + stdlib/regression/vm/test27.t | 4 + stdlib/regression/vm/test28.t | 7 + stdlib/regression/vm/test29.t | 7 + stdlib/regression/vm/test30.t | 203 ++++++++++++++++++ stdlib/regression/vm/test32.t | 5 + stdlib/regression/vm/test33.t | 5 + stdlib/regression/vm/test34.t | 5 + virtual_machine/regression_check.sh | 28 --- 117 files changed, 2051 insertions(+), 30 deletions(-) create mode 100644 regression/vm/dune create mode 100644 regression/vm/test001.t create mode 100644 regression/vm/test002.t create mode 100644 regression/vm/test003.t create mode 100644 regression/vm/test004.t create mode 100644 regression/vm/test005.t create mode 100644 regression/vm/test006.t create mode 100644 regression/vm/test007.t create mode 100644 regression/vm/test008.t create mode 100644 regression/vm/test009.t create mode 100644 regression/vm/test010.t create mode 100644 regression/vm/test011.t create mode 100644 regression/vm/test012.t create mode 100644 regression/vm/test013.t create mode 100644 regression/vm/test014.t create mode 100644 regression/vm/test015.t create mode 100644 regression/vm/test016.t create mode 100644 regression/vm/test017.t create mode 100644 regression/vm/test018.t create mode 100644 regression/vm/test019.t create mode 100644 regression/vm/test020.t create mode 100644 regression/vm/test021.t create mode 100644 regression/vm/test022.t create mode 100644 regression/vm/test023.t create mode 100644 regression/vm/test024.t create mode 100644 regression/vm/test025.t create mode 100644 regression/vm/test026.t create mode 100644 regression/vm/test027.t create mode 100644 regression/vm/test028.t create mode 100644 regression/vm/test029.t create mode 100644 regression/vm/test034.t create mode 100644 regression/vm/test036.t create mode 100644 regression/vm/test040.t create mode 100644 regression/vm/test041.t create mode 100644 regression/vm/test042.t create mode 100644 regression/vm/test045.t create mode 100644 regression/vm/test046.t create mode 100644 regression/vm/test050.t create mode 100644 regression/vm/test054.t create mode 100644 regression/vm/test059.t create mode 100644 regression/vm/test063.t create mode 100644 regression/vm/test072.t create mode 100644 regression/vm/test073.t create mode 100644 regression/vm/test074.t create mode 100644 regression/vm/test077.t create mode 100644 regression/vm/test078.t create mode 100644 regression/vm/test079.t create mode 100644 regression/vm/test080.t create mode 100644 regression/vm/test081.t create mode 100644 regression/vm/test082.t create mode 100644 regression/vm/test083.t create mode 100644 regression/vm/test084.t create mode 100644 regression/vm/test085.t create mode 100644 regression/vm/test086.t create mode 100644 regression/vm/test088.t create mode 100644 regression/vm/test089.t create mode 100644 regression/vm/test090.t create mode 100644 regression/vm/test091.t create mode 100644 regression/vm/test092.t create mode 100644 regression/vm/test093.t create mode 100644 regression/vm/test094.t create mode 100644 regression/vm/test095.t create mode 100644 regression/vm/test096.t create mode 100644 regression/vm/test097.t create mode 100644 regression/vm/test098.t create mode 100644 regression/vm/test099.t create mode 100644 regression/vm/test100.t create mode 100644 regression/vm/test101.t create mode 100644 regression/vm/test102.t create mode 100644 regression/vm/test103.t create mode 100644 regression/vm/test104.t create mode 100644 regression/vm/test105.t create mode 100644 regression/vm/test106.t create mode 100644 regression/vm/test107.t create mode 100644 regression/vm/test110.t create mode 100644 regression/vm/test111.t create mode 100644 regression/vm/test112.t create mode 100644 regression/vm/test801.t create mode 100644 regression/vm/test802.t create mode 100644 stdlib/regression/vm/dune create mode 100644 stdlib/regression/vm/test01.t create mode 100644 stdlib/regression/vm/test02.t create mode 100644 stdlib/regression/vm/test03.t create mode 100644 stdlib/regression/vm/test04.t create mode 100644 stdlib/regression/vm/test05.t create mode 100644 stdlib/regression/vm/test06.t create mode 100644 stdlib/regression/vm/test07.t create mode 100644 stdlib/regression/vm/test08.t create mode 100644 stdlib/regression/vm/test09.t create mode 100644 stdlib/regression/vm/test10.t create mode 100644 stdlib/regression/vm/test11.t create mode 100644 stdlib/regression/vm/test12.t create mode 100644 stdlib/regression/vm/test13.t create mode 100644 stdlib/regression/vm/test14.t create mode 100644 stdlib/regression/vm/test15.t create mode 100644 stdlib/regression/vm/test16.t create mode 100644 stdlib/regression/vm/test17.t create mode 100644 stdlib/regression/vm/test18.t create mode 100644 stdlib/regression/vm/test20.t create mode 100644 stdlib/regression/vm/test21.t create mode 100644 stdlib/regression/vm/test22.t create mode 100644 stdlib/regression/vm/test23.t create mode 100644 stdlib/regression/vm/test24.t create mode 100644 stdlib/regression/vm/test25.t create mode 100644 stdlib/regression/vm/test26.t create mode 100644 stdlib/regression/vm/test27.t create mode 100644 stdlib/regression/vm/test28.t create mode 100644 stdlib/regression/vm/test29.t create mode 100644 stdlib/regression/vm/test30.t create mode 100644 stdlib/regression/vm/test32.t create mode 100644 stdlib/regression/vm/test33.t create mode 100644 stdlib/regression/vm/test34.t delete mode 100755 virtual_machine/regression_check.sh diff --git a/regression/gen.ml b/regression/gen.ml index f8e34841c..1eb135326 100644 --- a/regression/gen.ml +++ b/regression/gen.ml @@ -32,4 +32,32 @@ let () = dprintfn " (deps %s %s))" !lama_file !input_file; Out_channel.with_open_text !cram_file (fun ch -> output_string ch (Buffer.contents cram_buf))) + done); + ignore (Sys.command "mkdir -p vm"); + Out_channel.with_open_text "vm/dune" (fun dunech -> + let dprintfn fmt = Format.kasprintf (Printf.fprintf dunech "%s\n") fmt in + dprintfn "; This file was autogenerated\n"; + dprintfn "(cram (deps ../../src/Driver.exe ../../runtime/Std.i ../../virtual_machine/lama.exe))\n"; + + for i = 0 to count - 1 do + let cram_buf = Buffer.create 100 in + let cram_printfn fmt = + Format.kasprintf (Printf.bprintf cram_buf "%s\n") fmt + in + let cram_file = Printf.sprintf "vm/test%03d.t" i in + let lama_file = Printf.sprintf "test%03d.lama" i in + let input_file = Printf.sprintf "test%03d.input" i in + + if Sys.file_exists lama_file && i <> 803 then ( + cram_printfn + " $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test%03d.lama" + i; + cram_printfn + " $ ../../virtual_machine/lama.exe test%03d.bc < ../test%03d.input" + i i; + + dprintfn "(cram (applies_to test%03d)" i; + dprintfn " (deps ../%s ../%s))" lama_file input_file; + Out_channel.with_open_text cram_file (fun ch -> + output_string ch (Buffer.contents cram_buf))) done) diff --git a/regression/vm/dune b/regression/vm/dune new file mode 100644 index 000000000..89ae6932b --- /dev/null +++ b/regression/vm/dune @@ -0,0 +1,160 @@ +; This file was autogenerated + +(cram (deps ../../src/Driver.exe ../../runtime/Std.i ../../virtual_machine/lama.exe)) + +(cram (applies_to test001) + (deps ../test001.lama ../test001.input)) +(cram (applies_to test002) + (deps ../test002.lama ../test002.input)) +(cram (applies_to test003) + (deps ../test003.lama ../test003.input)) +(cram (applies_to test004) + (deps ../test004.lama ../test004.input)) +(cram (applies_to test005) + (deps ../test005.lama ../test005.input)) +(cram (applies_to test006) + (deps ../test006.lama ../test006.input)) +(cram (applies_to test007) + (deps ../test007.lama ../test007.input)) +(cram (applies_to test008) + (deps ../test008.lama ../test008.input)) +(cram (applies_to test009) + (deps ../test009.lama ../test009.input)) +(cram (applies_to test010) + (deps ../test010.lama ../test010.input)) +(cram (applies_to test011) + (deps ../test011.lama ../test011.input)) +(cram (applies_to test012) + (deps ../test012.lama ../test012.input)) +(cram (applies_to test013) + (deps ../test013.lama ../test013.input)) +(cram (applies_to test014) + (deps ../test014.lama ../test014.input)) +(cram (applies_to test015) + (deps ../test015.lama ../test015.input)) +(cram (applies_to test016) + (deps ../test016.lama ../test016.input)) +(cram (applies_to test017) + (deps ../test017.lama ../test017.input)) +(cram (applies_to test018) + (deps ../test018.lama ../test018.input)) +(cram (applies_to test019) + (deps ../test019.lama ../test019.input)) +(cram (applies_to test020) + (deps ../test020.lama ../test020.input)) +(cram (applies_to test021) + (deps ../test021.lama ../test021.input)) +(cram (applies_to test022) + (deps ../test022.lama ../test022.input)) +(cram (applies_to test023) + (deps ../test023.lama ../test023.input)) +(cram (applies_to test024) + (deps ../test024.lama ../test024.input)) +(cram (applies_to test025) + (deps ../test025.lama ../test025.input)) +(cram (applies_to test026) + (deps ../test026.lama ../test026.input)) +(cram (applies_to test027) + (deps ../test027.lama ../test027.input)) +(cram (applies_to test028) + (deps ../test028.lama ../test028.input)) +(cram (applies_to test029) + (deps ../test029.lama ../test029.input)) +(cram (applies_to test034) + (deps ../test034.lama ../test034.input)) +(cram (applies_to test036) + (deps ../test036.lama ../test036.input)) +(cram (applies_to test040) + (deps ../test040.lama ../test040.input)) +(cram (applies_to test041) + (deps ../test041.lama ../test041.input)) +(cram (applies_to test042) + (deps ../test042.lama ../test042.input)) +(cram (applies_to test045) + (deps ../test045.lama ../test045.input)) +(cram (applies_to test046) + (deps ../test046.lama ../test046.input)) +(cram (applies_to test050) + (deps ../test050.lama ../test050.input)) +(cram (applies_to test054) + (deps ../test054.lama ../test054.input)) +(cram (applies_to test059) + (deps ../test059.lama ../test059.input)) +(cram (applies_to test063) + (deps ../test063.lama ../test063.input)) +(cram (applies_to test072) + (deps ../test072.lama ../test072.input)) +(cram (applies_to test073) + (deps ../test073.lama ../test073.input)) +(cram (applies_to test074) + (deps ../test074.lama ../test074.input)) +(cram (applies_to test077) + (deps ../test077.lama ../test077.input)) +(cram (applies_to test078) + (deps ../test078.lama ../test078.input)) +(cram (applies_to test079) + (deps ../test079.lama ../test079.input)) +(cram (applies_to test080) + (deps ../test080.lama ../test080.input)) +(cram (applies_to test081) + (deps ../test081.lama ../test081.input)) +(cram (applies_to test082) + (deps ../test082.lama ../test082.input)) +(cram (applies_to test083) + (deps ../test083.lama ../test083.input)) +(cram (applies_to test084) + (deps ../test084.lama ../test084.input)) +(cram (applies_to test085) + (deps ../test085.lama ../test085.input)) +(cram (applies_to test086) + (deps ../test086.lama ../test086.input)) +(cram (applies_to test088) + (deps ../test088.lama ../test088.input)) +(cram (applies_to test089) + (deps ../test089.lama ../test089.input)) +(cram (applies_to test090) + (deps ../test090.lama ../test090.input)) +(cram (applies_to test091) + (deps ../test091.lama ../test091.input)) +(cram (applies_to test092) + (deps ../test092.lama ../test092.input)) +(cram (applies_to test093) + (deps ../test093.lama ../test093.input)) +(cram (applies_to test094) + (deps ../test094.lama ../test094.input)) +(cram (applies_to test095) + (deps ../test095.lama ../test095.input)) +(cram (applies_to test096) + (deps ../test096.lama ../test096.input)) +(cram (applies_to test097) + (deps ../test097.lama ../test097.input)) +(cram (applies_to test098) + (deps ../test098.lama ../test098.input)) +(cram (applies_to test099) + (deps ../test099.lama ../test099.input)) +(cram (applies_to test100) + (deps ../test100.lama ../test100.input)) +(cram (applies_to test101) + (deps ../test101.lama ../test101.input)) +(cram (applies_to test102) + (deps ../test102.lama ../test102.input)) +(cram (applies_to test103) + (deps ../test103.lama ../test103.input)) +(cram (applies_to test104) + (deps ../test104.lama ../test104.input)) +(cram (applies_to test105) + (deps ../test105.lama ../test105.input)) +(cram (applies_to test106) + (deps ../test106.lama ../test106.input)) +(cram (applies_to test107) + (deps ../test107.lama ../test107.input)) +(cram (applies_to test110) + (deps ../test110.lama ../test110.input)) +(cram (applies_to test111) + (deps ../test111.lama ../test111.input)) +(cram (applies_to test112) + (deps ../test112.lama ../test112.input)) +(cram (applies_to test801) + (deps ../test801.lama ../test801.input)) +(cram (applies_to test802) + (deps ../test802.lama ../test802.input)) diff --git a/regression/vm/test001.t b/regression/vm/test001.t new file mode 100644 index 000000000..3ac915720 --- /dev/null +++ b/regression/vm/test001.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test001.lama + $ ../../virtual_machine/lama.exe test001.bc < ../test001.input + > > 90 diff --git a/regression/vm/test002.t b/regression/vm/test002.t new file mode 100644 index 000000000..92af9f60e --- /dev/null +++ b/regression/vm/test002.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test002.lama + $ ../../virtual_machine/lama.exe test002.bc < ../test002.input + > > 41 diff --git a/regression/vm/test003.t b/regression/vm/test003.t new file mode 100644 index 000000000..c38684950 --- /dev/null +++ b/regression/vm/test003.t @@ -0,0 +1,5 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test003.lama + $ ../../virtual_machine/lama.exe test003.bc < ../test003.input + > > 7 + 3 + 1 diff --git a/regression/vm/test004.t b/regression/vm/test004.t new file mode 100644 index 000000000..ff0a6e401 --- /dev/null +++ b/regression/vm/test004.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test004.lama + $ ../../virtual_machine/lama.exe test004.bc < ../test004.input + > > 10 diff --git a/regression/vm/test005.t b/regression/vm/test005.t new file mode 100644 index 000000000..c5be28154 --- /dev/null +++ b/regression/vm/test005.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test005.lama + $ ../../virtual_machine/lama.exe test005.bc < ../test005.input + > > 11 diff --git a/regression/vm/test006.t b/regression/vm/test006.t new file mode 100644 index 000000000..857cba182 --- /dev/null +++ b/regression/vm/test006.t @@ -0,0 +1,8 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test006.lama + $ ../../virtual_machine/lama.exe test006.bc < ../test006.input + > > 1 + 1 + 0 + 1 + 0 + 0 diff --git a/regression/vm/test007.t b/regression/vm/test007.t new file mode 100644 index 000000000..eb81b7656 --- /dev/null +++ b/regression/vm/test007.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test007.lama + $ ../../virtual_machine/lama.exe test007.bc < ../test007.input + -4 diff --git a/regression/vm/test008.t b/regression/vm/test008.t new file mode 100644 index 000000000..03b109bb8 --- /dev/null +++ b/regression/vm/test008.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test008.lama + $ ../../virtual_machine/lama.exe test008.bc < ../test008.input + -45 diff --git a/regression/vm/test009.t b/regression/vm/test009.t new file mode 100644 index 000000000..d49d231be --- /dev/null +++ b/regression/vm/test009.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test009.lama + $ ../../virtual_machine/lama.exe test009.bc < ../test009.input + 1024 diff --git a/regression/vm/test010.t b/regression/vm/test010.t new file mode 100644 index 000000000..175255f73 --- /dev/null +++ b/regression/vm/test010.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test010.lama + $ ../../virtual_machine/lama.exe test010.bc < ../test010.input + 499950 diff --git a/regression/vm/test011.t b/regression/vm/test011.t new file mode 100644 index 000000000..5c925cd4c --- /dev/null +++ b/regression/vm/test011.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test011.lama + $ ../../virtual_machine/lama.exe test011.bc < ../test011.input + 2 diff --git a/regression/vm/test012.t b/regression/vm/test012.t new file mode 100644 index 000000000..b25571fec --- /dev/null +++ b/regression/vm/test012.t @@ -0,0 +1,10 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test012.lama + $ ../../virtual_machine/lama.exe test012.bc < ../test012.input + > 0 + 0 + 0 + 1 + 1 + 0 + 1 + 1 diff --git a/regression/vm/test013.t b/regression/vm/test013.t new file mode 100644 index 000000000..7caf4a96a --- /dev/null +++ b/regression/vm/test013.t @@ -0,0 +1,10 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test013.lama + $ ../../virtual_machine/lama.exe test013.bc < ../test013.input + > 10 + 11 + 10 + 11 + 3 + 2 + 1 + 0 diff --git a/regression/vm/test014.t b/regression/vm/test014.t new file mode 100644 index 000000000..f7b04726a --- /dev/null +++ b/regression/vm/test014.t @@ -0,0 +1,33 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test014.lama + $ ../../virtual_machine/lama.exe test014.bc < ../test014.input + > 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 diff --git a/regression/vm/test015.t b/regression/vm/test015.t new file mode 100644 index 000000000..b75dda75f --- /dev/null +++ b/regression/vm/test015.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test015.lama + $ ../../virtual_machine/lama.exe test015.bc < ../test015.input + > 7919 diff --git a/regression/vm/test016.t b/regression/vm/test016.t new file mode 100644 index 000000000..09141d268 --- /dev/null +++ b/regression/vm/test016.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test016.lama + $ ../../virtual_machine/lama.exe test016.bc < ../test016.input + > 3628800 diff --git a/regression/vm/test017.t b/regression/vm/test017.t new file mode 100644 index 000000000..88a828a03 --- /dev/null +++ b/regression/vm/test017.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test017.lama + $ ../../virtual_machine/lama.exe test017.bc < ../test017.input + > 6765 diff --git a/regression/vm/test018.t b/regression/vm/test018.t new file mode 100644 index 000000000..b32d6823e --- /dev/null +++ b/regression/vm/test018.t @@ -0,0 +1,16 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test018.lama + $ ../../virtual_machine/lama.exe test018.bc < ../test018.input + > 2 + 0 + 3 + 4 + 5 + 0 + 7 + 0 + 11 + 0 + 13 + 0 + 17 + 2 diff --git a/regression/vm/test019.t b/regression/vm/test019.t new file mode 100644 index 000000000..6f7cc3a8a --- /dev/null +++ b/regression/vm/test019.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test019.lama + $ ../../virtual_machine/lama.exe test019.bc < ../test019.input + 499950 diff --git a/regression/vm/test020.t b/regression/vm/test020.t new file mode 100644 index 000000000..549d2f2c4 --- /dev/null +++ b/regression/vm/test020.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test020.lama + $ ../../virtual_machine/lama.exe test020.bc < ../test020.input + > 7919 diff --git a/regression/vm/test021.t b/regression/vm/test021.t new file mode 100644 index 000000000..ffca90ec5 --- /dev/null +++ b/regression/vm/test021.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test021.lama + $ ../../virtual_machine/lama.exe test021.bc < ../test021.input + > 3628800 diff --git a/regression/vm/test022.t b/regression/vm/test022.t new file mode 100644 index 000000000..bd46c8a28 --- /dev/null +++ b/regression/vm/test022.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test022.lama + $ ../../virtual_machine/lama.exe test022.bc < ../test022.input + > 6765 diff --git a/regression/vm/test023.t b/regression/vm/test023.t new file mode 100644 index 000000000..66251e008 --- /dev/null +++ b/regression/vm/test023.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test023.lama + $ ../../virtual_machine/lama.exe test023.bc < ../test023.input + > > > > > > 35 diff --git a/regression/vm/test024.t b/regression/vm/test024.t new file mode 100644 index 000000000..d42ee36a8 --- /dev/null +++ b/regression/vm/test024.t @@ -0,0 +1,4 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test024.lama + $ ../../virtual_machine/lama.exe test024.bc < ../test024.input + > 3 + 8 diff --git a/regression/vm/test025.t b/regression/vm/test025.t new file mode 100644 index 000000000..9d1f05cfa --- /dev/null +++ b/regression/vm/test025.t @@ -0,0 +1,14 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test025.lama + $ ../../virtual_machine/lama.exe test025.bc < ../test025.input + > 1 + 100 + 200 + 300 + 2 + 100 + 200 + 300 + 3 + 100 + 200 + 300 diff --git a/regression/vm/test026.t b/regression/vm/test026.t new file mode 100644 index 000000000..e59a2f833 --- /dev/null +++ b/regression/vm/test026.t @@ -0,0 +1,23 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test026.lama + $ ../../virtual_machine/lama.exe test026.bc < ../test026.input + > 1 + 100 + 200 + 300 + 100 + 200 + 300 + 2 + 100 + 200 + 300 + 100 + 200 + 300 + 3 + 100 + 200 + 300 + 100 + 200 + 300 diff --git a/regression/vm/test027.t b/regression/vm/test027.t new file mode 100644 index 000000000..e908e764a --- /dev/null +++ b/regression/vm/test027.t @@ -0,0 +1,37 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test027.lama + $ ../../virtual_machine/lama.exe test027.bc < ../test027.input + > 1 + 100 + 200 + 300 + 1 + 2 + 100 + 200 + 300 + 3 + 100 + 200 + 300 + 3 + 4 + 100 + 200 + 300 + 5 + 100 + 200 + 300 + 5 + 100 + 200 + 300 + 100 + 200 + 300 + 100 + 200 + 300 + 100 + 200 + 300 diff --git a/regression/vm/test028.t b/regression/vm/test028.t new file mode 100644 index 000000000..eb0d81994 --- /dev/null +++ b/regression/vm/test028.t @@ -0,0 +1,16 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test028.lama + $ ../../virtual_machine/lama.exe test028.bc < ../test028.input + > 7 + 5040 + 6 + 720 + 5 + 120 + 4 + 24 + 3 + 6 + 2 + 2 + 1 + 1 diff --git a/regression/vm/test029.t b/regression/vm/test029.t new file mode 100644 index 000000000..037deb913 --- /dev/null +++ b/regression/vm/test029.t @@ -0,0 +1,20 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test029.lama + $ ../../virtual_machine/lama.exe test029.bc < ../test029.input + > 9 + 55 + 8 + 34 + 7 + 21 + 6 + 13 + 5 + 8 + 4 + 5 + 3 + 3 + 2 + 2 + 1 + 1 diff --git a/regression/vm/test034.t b/regression/vm/test034.t new file mode 100644 index 000000000..5e9403398 --- /dev/null +++ b/regression/vm/test034.t @@ -0,0 +1,18 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test034.lama + $ ../../virtual_machine/lama.exe test034.bc < ../test034.input + > 97 + 98 + 99 + 100 + 101 + 102 + 103 + 104 + 99 + 100 + 101 + 102 + 103 + 104 + 105 + 106 diff --git a/regression/vm/test036.t b/regression/vm/test036.t new file mode 100644 index 000000000..05719a59c --- /dev/null +++ b/regression/vm/test036.t @@ -0,0 +1,18 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test036.lama + $ ../../virtual_machine/lama.exe test036.bc < ../test036.input + > 97 + 98 + 99 + 100 + 101 + 102 + 103 + 104 + 97 + 97 + 97 + 97 + 97 + 97 + 97 + 97 diff --git a/regression/vm/test040.t b/regression/vm/test040.t new file mode 100644 index 000000000..6df032abd --- /dev/null +++ b/regression/vm/test040.t @@ -0,0 +1,6 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test040.lama + $ ../../virtual_machine/lama.exe test040.bc < ../test040.input + > 1 + 2 + 3 + 4 diff --git a/regression/vm/test041.t b/regression/vm/test041.t new file mode 100644 index 000000000..fc90179c5 --- /dev/null +++ b/regression/vm/test041.t @@ -0,0 +1,4 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test041.lama + $ ../../virtual_machine/lama.exe test041.bc < ../test041.input + > 600 + 1800 diff --git a/regression/vm/test042.t b/regression/vm/test042.t new file mode 100644 index 000000000..5088e77fd --- /dev/null +++ b/regression/vm/test042.t @@ -0,0 +1,12 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test042.lama + $ ../../virtual_machine/lama.exe test042.bc < ../test042.input + > 0 + 1 + 2 + 3 + 4 + 4 + 4 + 4 + 4 + 4 diff --git a/regression/vm/test045.t b/regression/vm/test045.t new file mode 100644 index 000000000..4452aed39 --- /dev/null +++ b/regression/vm/test045.t @@ -0,0 +1,42 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test045.lama + $ ../../virtual_machine/lama.exe test045.bc < ../test045.input + > 49 + 34 + 97 + 98 + 99 + 34 + 91 + 93 + 91 + 49 + 44 + 32 + 50 + 44 + 32 + 51 + 93 + 67 + 111 + 110 + 115 + 32 + 40 + 49 + 44 + 32 + 67 + 111 + 110 + 115 + 32 + 40 + 50 + 44 + 32 + 78 + 105 + 108 + 41 + 41 diff --git a/regression/vm/test046.t b/regression/vm/test046.t new file mode 100644 index 000000000..c3c4cc89c --- /dev/null +++ b/regression/vm/test046.t @@ -0,0 +1,15 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test046.lama + $ ../../virtual_machine/lama.exe test046.bc < ../test046.input + > 3 + 3 + 3 + 1 + 2 + 3 + 5 + 5 + 1 + 2 + 3 + 4 + 5 diff --git a/regression/vm/test050.t b/regression/vm/test050.t new file mode 100644 index 000000000..3b9a2e1c5 --- /dev/null +++ b/regression/vm/test050.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test050.lama + $ ../../virtual_machine/lama.exe test050.bc < ../test050.input + > 2 diff --git a/regression/vm/test054.t b/regression/vm/test054.t new file mode 100644 index 000000000..dd8e244cc --- /dev/null +++ b/regression/vm/test054.t @@ -0,0 +1,6 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test054.lama + Fatal error: exception Failure("Indirect assignment is not supported yet: If (Var (\"z\"), Scope ([], Ref (\"x\")), Scope ([], Ref (\"y\")))") + [2] + $ ../../virtual_machine/lama.exe test054.bc < ../test054.input + Failed to load unit 'test054' + [1] diff --git a/regression/vm/test059.t b/regression/vm/test059.t new file mode 100644 index 000000000..d3a19e041 --- /dev/null +++ b/regression/vm/test059.t @@ -0,0 +1,5 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test059.lama + $ ../../virtual_machine/lama.exe test059.bc < ../test059.input + > 0 + 1 + 2 diff --git a/regression/vm/test063.t b/regression/vm/test063.t new file mode 100644 index 000000000..f496da825 --- /dev/null +++ b/regression/vm/test063.t @@ -0,0 +1,4 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test063.lama + $ ../../virtual_machine/lama.exe test063.bc < ../test063.input + > 100 + 200 diff --git a/regression/vm/test072.t b/regression/vm/test072.t new file mode 100644 index 000000000..5adde0af4 --- /dev/null +++ b/regression/vm/test072.t @@ -0,0 +1,20 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test072.lama + $ ../../virtual_machine/lama.exe test072.bc < ../test072.input + > 9 + 55 + 8 + 34 + 7 + 21 + 6 + 13 + 5 + 8 + 4 + 5 + 3 + 3 + 2 + 2 + 1 + 1 diff --git a/regression/vm/test073.t b/regression/vm/test073.t new file mode 100644 index 000000000..80f200212 --- /dev/null +++ b/regression/vm/test073.t @@ -0,0 +1,16 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test073.lama + $ ../../virtual_machine/lama.exe test073.bc < ../test073.input + > 7 + 5040 + 6 + 720 + 5 + 120 + 4 + 24 + 3 + 6 + 2 + 2 + 1 + 1 diff --git a/regression/vm/test074.t b/regression/vm/test074.t new file mode 100644 index 000000000..a93eb9666 --- /dev/null +++ b/regression/vm/test074.t @@ -0,0 +1,38 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test074.lama + $ ../../virtual_machine/lama.exe test074.bc < ../test074.input + > 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 3 + 5 + 7 + 9 + 11 + 13 + 15 + 17 + 19 + 5 + 13 + 29 + 61 + 125 + 253 + 509 + 1021 + 2045 diff --git a/regression/vm/test077.t b/regression/vm/test077.t new file mode 100644 index 000000000..aec55f95c --- /dev/null +++ b/regression/vm/test077.t @@ -0,0 +1,8 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test077.lama + $ ../../virtual_machine/lama.exe test077.bc < ../test077.input + > 5 + 6 + 7 + 8 + 9 + 10 diff --git a/regression/vm/test078.t b/regression/vm/test078.t new file mode 100644 index 000000000..225de8ab2 --- /dev/null +++ b/regression/vm/test078.t @@ -0,0 +1,14 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test078.lama + $ ../../virtual_machine/lama.exe test078.bc < ../test078.input + > 1 + 2 + 3 + 4 + 1 + 2 + 3 + 4 + 3 + 4 + 1 + 2 diff --git a/regression/vm/test079.t b/regression/vm/test079.t new file mode 100644 index 000000000..80eae619a --- /dev/null +++ b/regression/vm/test079.t @@ -0,0 +1,8 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test079.lama + $ ../../virtual_machine/lama.exe test079.bc < ../test079.input + > 1 + 1 + 1 + 1 + 0 + 0 diff --git a/regression/vm/test080.t b/regression/vm/test080.t new file mode 100644 index 000000000..1d003313f --- /dev/null +++ b/regression/vm/test080.t @@ -0,0 +1,5 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test080.lama + $ ../../virtual_machine/lama.exe test080.bc < ../test080.input + > 0 + 100 + 300 diff --git a/regression/vm/test081.t b/regression/vm/test081.t new file mode 100644 index 000000000..e346cea15 --- /dev/null +++ b/regression/vm/test081.t @@ -0,0 +1,8 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test081.lama + $ ../../virtual_machine/lama.exe test081.bc < ../test081.input + > 1 + 2 + 3 + 100 + 200 + 300 diff --git a/regression/vm/test082.t b/regression/vm/test082.t new file mode 100644 index 000000000..c0ba37558 --- /dev/null +++ b/regression/vm/test082.t @@ -0,0 +1,19 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test082.lama + $ ../../virtual_machine/lama.exe test082.bc < ../test082.input + > 1 + 1 + 1 + 1 + 1 + 2 + 3 + 100 + 3 + 2 + 1 + 6 + 5 + 4 + 3 + 2 + 1 diff --git a/regression/vm/test083.t b/regression/vm/test083.t new file mode 100644 index 000000000..8e6c83398 --- /dev/null +++ b/regression/vm/test083.t @@ -0,0 +1,5 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test083.lama + $ ../../virtual_machine/lama.exe test083.bc < ../test083.input + > 7 + 7 + 28 diff --git a/regression/vm/test084.t b/regression/vm/test084.t new file mode 100644 index 000000000..d0f4ba91f --- /dev/null +++ b/regression/vm/test084.t @@ -0,0 +1,5 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test084.lama + $ ../../virtual_machine/lama.exe test084.bc < ../test084.input + > 55 + 310 + 310 diff --git a/regression/vm/test085.t b/regression/vm/test085.t new file mode 100644 index 000000000..f3152ea97 --- /dev/null +++ b/regression/vm/test085.t @@ -0,0 +1,10 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test085.lama + $ ../../virtual_machine/lama.exe test085.bc < ../test085.input + > 0 + 15 + 15 + 1 + 2 + 3 + 4 + 5 diff --git a/regression/vm/test086.t b/regression/vm/test086.t new file mode 100644 index 000000000..a3a8a4422 --- /dev/null +++ b/regression/vm/test086.t @@ -0,0 +1,5 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test086.lama + $ ../../virtual_machine/lama.exe test086.bc < ../test086.input + > 1 + 2 + 3 diff --git a/regression/vm/test088.t b/regression/vm/test088.t new file mode 100644 index 000000000..98f1c1828 --- /dev/null +++ b/regression/vm/test088.t @@ -0,0 +1,4 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test088.lama + $ ../../virtual_machine/lama.exe test088.bc < ../test088.input + 0 + 3 diff --git a/regression/vm/test089.t b/regression/vm/test089.t new file mode 100644 index 000000000..7b866c040 --- /dev/null +++ b/regression/vm/test089.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test089.lama + $ ../../virtual_machine/lama.exe test089.bc < ../test089.input + > > > 8 diff --git a/regression/vm/test090.t b/regression/vm/test090.t new file mode 100644 index 000000000..996aa7bc7 --- /dev/null +++ b/regression/vm/test090.t @@ -0,0 +1,5 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test090.lama + $ ../../virtual_machine/lama.exe test090.bc < ../test090.input + > 6 + 7 + 8 diff --git a/regression/vm/test091.t b/regression/vm/test091.t new file mode 100644 index 000000000..cb9cd0086 --- /dev/null +++ b/regression/vm/test091.t @@ -0,0 +1,11 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test091.lama + $ ../../virtual_machine/lama.exe test091.bc < ../test091.input + > 1 + 2 + 3 + 2 + 3 + 4 + 3 + 4 + 5 diff --git a/regression/vm/test092.t b/regression/vm/test092.t new file mode 100644 index 000000000..94222ef7a --- /dev/null +++ b/regression/vm/test092.t @@ -0,0 +1,7 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test092.lama + $ ../../virtual_machine/lama.exe test092.bc < ../test092.input + > 1 + 1 + 1 + 1 + 0 diff --git a/regression/vm/test093.t b/regression/vm/test093.t new file mode 100644 index 000000000..3f1e0a6fc --- /dev/null +++ b/regression/vm/test093.t @@ -0,0 +1,4 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test093.lama + $ ../../virtual_machine/lama.exe test093.bc < ../test093.input + > 11 + 18 diff --git a/regression/vm/test094.t b/regression/vm/test094.t new file mode 100644 index 000000000..f7d455aa3 --- /dev/null +++ b/regression/vm/test094.t @@ -0,0 +1,6 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test094.lama + $ ../../virtual_machine/lama.exe test094.bc < ../test094.input + > 5 + 7 + 12 + -2 diff --git a/regression/vm/test095.t b/regression/vm/test095.t new file mode 100644 index 000000000..73ad459b8 --- /dev/null +++ b/regression/vm/test095.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test095.lama + $ ../../virtual_machine/lama.exe test095.bc < ../test095.input + > 5 diff --git a/regression/vm/test096.t b/regression/vm/test096.t new file mode 100644 index 000000000..cedf73ac4 --- /dev/null +++ b/regression/vm/test096.t @@ -0,0 +1,4 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test096.lama + $ ../../virtual_machine/lama.exe test096.bc < ../test096.input + > 2 + 1 diff --git a/regression/vm/test097.t b/regression/vm/test097.t new file mode 100644 index 000000000..97ff38e0a --- /dev/null +++ b/regression/vm/test097.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test097.lama + $ ../../virtual_machine/lama.exe test097.bc < ../test097.input + > 35 diff --git a/regression/vm/test098.t b/regression/vm/test098.t new file mode 100644 index 000000000..d86a99129 --- /dev/null +++ b/regression/vm/test098.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test098.lama + $ ../../virtual_machine/lama.exe test098.bc < ../test098.input + > 12 diff --git a/regression/vm/test099.t b/regression/vm/test099.t new file mode 100644 index 000000000..9ad4bb758 --- /dev/null +++ b/regression/vm/test099.t @@ -0,0 +1,5 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test099.lama + $ ../../virtual_machine/lama.exe test099.bc < ../test099.input + > 1 + 800 + 800 diff --git a/regression/vm/test100.t b/regression/vm/test100.t new file mode 100644 index 000000000..54b0bfae7 --- /dev/null +++ b/regression/vm/test100.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test100.lama + $ ../../virtual_machine/lama.exe test100.bc < ../test100.input + > 0 diff --git a/regression/vm/test101.t b/regression/vm/test101.t new file mode 100644 index 000000000..c4015064e --- /dev/null +++ b/regression/vm/test101.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test101.lama + $ ../../virtual_machine/lama.exe test101.bc < ../test101.input + > 0 diff --git a/regression/vm/test102.t b/regression/vm/test102.t new file mode 100644 index 000000000..8cde8fbdd --- /dev/null +++ b/regression/vm/test102.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test102.lama + $ ../../virtual_machine/lama.exe test102.bc < ../test102.input + > 5 diff --git a/regression/vm/test103.t b/regression/vm/test103.t new file mode 100644 index 000000000..cf1f3796d --- /dev/null +++ b/regression/vm/test103.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test103.lama + $ ../../virtual_machine/lama.exe test103.bc < ../test103.input + > > > 5 diff --git a/regression/vm/test104.t b/regression/vm/test104.t new file mode 100644 index 000000000..ed3b771ad --- /dev/null +++ b/regression/vm/test104.t @@ -0,0 +1,12 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test104.lama + $ ../../virtual_machine/lama.exe test104.bc < ../test104.input + > 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 diff --git a/regression/vm/test105.t b/regression/vm/test105.t new file mode 100644 index 000000000..763962afa --- /dev/null +++ b/regression/vm/test105.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test105.lama + $ ../../virtual_machine/lama.exe test105.bc < ../test105.input + > 3 diff --git a/regression/vm/test106.t b/regression/vm/test106.t new file mode 100644 index 000000000..479d4716f --- /dev/null +++ b/regression/vm/test106.t @@ -0,0 +1,4 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test106.lama + $ ../../virtual_machine/lama.exe test106.bc < ../test106.input + > 1 + 2 diff --git a/regression/vm/test107.t b/regression/vm/test107.t new file mode 100644 index 000000000..35a7b89fa --- /dev/null +++ b/regression/vm/test107.t @@ -0,0 +1,3 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test107.lama + $ ../../virtual_machine/lama.exe test107.bc < ../test107.input + > 0 diff --git a/regression/vm/test110.t b/regression/vm/test110.t new file mode 100644 index 000000000..847aaeed3 --- /dev/null +++ b/regression/vm/test110.t @@ -0,0 +1,6 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test110.lama + Fatal error: exception Failure("Indirect assignment is not supported yet: If (Const (1), Scope ([], ElemRef (Var (\"x\"), Const (0))), Scope ([], ElemRef (Var (\"y\"), Const (0))))") + [2] + $ ../../virtual_machine/lama.exe test110.bc < ../test110.input + Failed to load unit 'test110' + [1] diff --git a/regression/vm/test111.t b/regression/vm/test111.t new file mode 100644 index 000000000..4a1e8d780 --- /dev/null +++ b/regression/vm/test111.t @@ -0,0 +1,8 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test111.lama + $ ../../virtual_machine/lama.exe test111.bc < ../test111.input + 97 + 98 + 99 + 100 + 97 + 98 diff --git a/regression/vm/test112.t b/regression/vm/test112.t new file mode 100644 index 000000000..a2795af36 --- /dev/null +++ b/regression/vm/test112.t @@ -0,0 +1,13 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test112.lama + $ ../../virtual_machine/lama.exe test112.bc < ../test112.input + 1 + 2 + 5 + 6 + 7 + 8 + 5 + 6 + 7 + 8 + 3 diff --git a/regression/vm/test801.t b/regression/vm/test801.t new file mode 100644 index 000000000..3b3ff3368 --- /dev/null +++ b/regression/vm/test801.t @@ -0,0 +1,7 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test801.lama + $ ../../virtual_machine/lama.exe test801.bc < ../test801.input + 1 + 2 + 3 + 4 + 5 diff --git a/regression/vm/test802.t b/regression/vm/test802.t new file mode 100644 index 000000000..e1fc5266a --- /dev/null +++ b/regression/vm/test802.t @@ -0,0 +1,12 @@ + $ ../../src/Driver.exe -runtime ../../runtime -I ../../stdlib/x64 -b ../test802.lama + $ ../../virtual_machine/lama.exe test802.bc < ../test802.input + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 diff --git a/runtime/runtime.c b/runtime/runtime.c index ace5f2411..e7e0e7cc9 100644 --- a/runtime/runtime.c +++ b/runtime/runtime.c @@ -1270,7 +1270,7 @@ extern aint Lread () { // int result = BOX(0); aint result = BOX(0); - printf("> "); + printf(" > "); fflush(stdout); scanf("%" SCNdAI, &result); diff --git a/runtime32/runtime.c b/runtime32/runtime.c index 14dcda540..37dd4a3b7 100644 --- a/runtime32/runtime.c +++ b/runtime32/runtime.c @@ -1474,7 +1474,7 @@ extern void* Ltl (void *v) { extern int Lread () { int result = BOX(0); - printf ("> "); + printf (" > "); fflush (stdout); scanf ("%d", &result); diff --git a/stdlib/regression/gen.ml b/stdlib/regression/gen.ml index 824fef158..abca85cb1 100644 --- a/stdlib/regression/gen.ml +++ b/stdlib/regression/gen.ml @@ -53,4 +53,37 @@ let () = Out_channel.with_open_text !cram_file (fun ch -> output_string ch "This file was autogenerated.\n"; output_string ch (Buffer.contents cram_buf))) + done); + ignore (Sys.command "mkdir -p vm"); + Out_channel.with_open_text "vm/dune" (fun dunech -> + let dprintfn fmt = Format.kasprintf (Printf.fprintf dunech "%s\n") fmt in + dprintfn "; This file was autogenerated\n"; + dprintfn "(cram (deps ../../../src/Driver.exe ../../../runtime/Std.i ../../../virtual_machine/lama.exe %s))\n" + (String.concat " " (List.concat_map (fun s -> + [sprintf "../../../stdlib/x64/%s.i" s + ;sprintf "../../../stdlib/x64/%s.bc" s + ]) + stdlib)); + + for i = 0 to count - 1 do + let cram_buf = Buffer.create 100 in + let cram_printfn fmt = + Format.kasprintf (Printf.bprintf cram_buf "%s\n") fmt + in + let cram_file = Printf.sprintf "vm/test%02d.t" i in + let lama_file = Printf.sprintf "test%02d.lama" i in + + if Sys.file_exists lama_file then ( + cram_printfn + " $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test%02d.lama" + i; + cram_printfn + " $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test%02d.bc" + i; + + dprintfn "(cram (applies_to test%02d)" i; + dprintfn " (deps ../%s))" lama_file; + Out_channel.with_open_text cram_file (fun ch -> + output_string ch "This file was autogenerated.\n"; + output_string ch (Buffer.contents cram_buf))) done) diff --git a/stdlib/regression/vm/dune b/stdlib/regression/vm/dune new file mode 100644 index 000000000..792e8e7e3 --- /dev/null +++ b/stdlib/regression/vm/dune @@ -0,0 +1,68 @@ +; This file was autogenerated + +(cram (deps ../../../src/Driver.exe ../../../runtime/Std.i ../../../virtual_machine/lama.exe ../../../stdlib/x64/Array.i ../../../stdlib/x64/Array.bc ../../../stdlib/x64/Buffer.i ../../../stdlib/x64/Buffer.bc ../../../stdlib/x64/Collection.i ../../../stdlib/x64/Collection.bc ../../../stdlib/x64/Data.i ../../../stdlib/x64/Data.bc ../../../stdlib/x64/Fun.i ../../../stdlib/x64/Fun.bc ../../../stdlib/x64/Lazy.i ../../../stdlib/x64/Lazy.bc ../../../stdlib/x64/List.i ../../../stdlib/x64/List.bc ../../../stdlib/x64/Matcher.i ../../../stdlib/x64/Matcher.bc ../../../stdlib/x64/Ostap.i ../../../stdlib/x64/Ostap.bc ../../../stdlib/x64/Random.i ../../../stdlib/x64/Random.bc ../../../stdlib/x64/Ref.i ../../../stdlib/x64/Ref.bc ../../../stdlib/x64/STM.i ../../../stdlib/x64/STM.bc ../../../stdlib/x64/Timer.i ../../../stdlib/x64/Timer.bc)) + +(cram (applies_to test01) + (deps ../test01.lama)) +(cram (applies_to test02) + (deps ../test02.lama)) +(cram (applies_to test03) + (deps ../test03.lama)) +(cram (applies_to test04) + (deps ../test04.lama)) +(cram (applies_to test05) + (deps ../test05.lama)) +(cram (applies_to test06) + (deps ../test06.lama)) +(cram (applies_to test07) + (deps ../test07.lama)) +(cram (applies_to test08) + (deps ../test08.lama)) +(cram (applies_to test09) + (deps ../test09.lama)) +(cram (applies_to test10) + (deps ../test10.lama)) +(cram (applies_to test11) + (deps ../test11.lama)) +(cram (applies_to test12) + (deps ../test12.lama)) +(cram (applies_to test13) + (deps ../test13.lama)) +(cram (applies_to test14) + (deps ../test14.lama)) +(cram (applies_to test15) + (deps ../test15.lama)) +(cram (applies_to test16) + (deps ../test16.lama)) +(cram (applies_to test17) + (deps ../test17.lama)) +(cram (applies_to test18) + (deps ../test18.lama)) +(cram (applies_to test20) + (deps ../test20.lama)) +(cram (applies_to test21) + (deps ../test21.lama)) +(cram (applies_to test22) + (deps ../test22.lama)) +(cram (applies_to test23) + (deps ../test23.lama)) +(cram (applies_to test24) + (deps ../test24.lama)) +(cram (applies_to test25) + (deps ../test25.lama)) +(cram (applies_to test26) + (deps ../test26.lama)) +(cram (applies_to test27) + (deps ../test27.lama)) +(cram (applies_to test28) + (deps ../test28.lama)) +(cram (applies_to test29) + (deps ../test29.lama)) +(cram (applies_to test30) + (deps ../test30.lama)) +(cram (applies_to test32) + (deps ../test32.lama)) +(cram (applies_to test33) + (deps ../test33.lama)) +(cram (applies_to test34) + (deps ../test34.lama)) diff --git a/stdlib/regression/vm/test01.t b/stdlib/regression/vm/test01.t new file mode 100644 index 000000000..9b06ee5f0 --- /dev/null +++ b/stdlib/regression/vm/test01.t @@ -0,0 +1,312 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test01.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test01.bc + Set internal structure: MNode (63, 1, 0, MNode (31, 1, 0, MNode (15, 1, 0, MNode (7, 1, 0, MNode (3, 1, 0, MNode (1, 1, 0, MNode (0, 1, 0, 0, 0), MNode (2, 1, 0, 0, 0)), MNode (5, 1, 0, MNode (4, 1, 0, 0, 0), MNode (6, 1, 0, 0, 0))), MNode (11, 1, 0, MNode (9, 1, 0, MNode (8, 1, 0, 0, 0), MNode (10, 1, 0, 0, 0)), MNode (13, 1, 0, MNode (12, 1, 0, 0, 0), MNode (14, 1, 0, 0, 0)))), MNode (23, 1, 0, MNode (19, 1, 0, MNode (17, 1, 0, MNode (16, 1, 0, 0, 0), MNode (18, 1, 0, 0, 0)), MNode (21, 1, 0, MNode (20, 1, 0, 0, 0), MNode (22, 1, 0, 0, 0))), MNode (27, 1, 0, MNode (25, 1, 0, MNode (24, 1, 0, 0, 0), MNode (26, 1, 0, 0, 0)), MNode (29, 1, 0, MNode (28, 1, 0, 0, 0), MNode (30, 1, 0, 0, 0))))), MNode (47, 1, 0, MNode (39, 1, 0, MNode (35, 1, 0, MNode (33, 1, 0, MNode (32, 1, 0, 0, 0), MNode (34, 1, 0, 0, 0)), MNode (37, 1, 0, MNode (36, 1, 0, 0, 0), MNode (38, 1, 0, 0, 0))), MNode (43, 1, 0, MNode (41, 1, 0, MNode (40, 1, 0, 0, 0), MNode (42, 1, 0, 0, 0)), MNode (45, 1, 0, MNode (44, 1, 0, 0, 0), MNode (46, 1, 0, 0, 0)))), MNode (55, 1, 0, MNode (51, 1, 0, MNode (49, 1, 0, MNode (48, 1, 0, 0, 0), MNode (50, 1, 0, 0, 0)), MNode (53, 1, 0, MNode (52, 1, 0, 0, 0), MNode (54, 1, 0, 0, 0))), MNode (59, 1, 0, MNode (57, 1, 0, MNode (56, 1, 0, 0, 0), MNode (58, 1, 0, 0, 0)), MNode (61, 1, 0, MNode (60, 1, 0, 0, 0), MNode (62, 1, 0, 0, 0)))))), MNode (79, 1, -1, MNode (71, 1, 0, MNode (67, 1, 0, MNode (65, 1, 0, MNode (64, 1, 0, 0, 0), MNode (66, 1, 0, 0, 0)), MNode (69, 1, 0, MNode (68, 1, 0, 0, 0), MNode (70, 1, 0, 0, 0))), MNode (75, 1, 0, MNode (73, 1, 0, MNode (72, 1, 0, 0, 0), MNode (74, 1, 0, 0, 0)), MNode (77, 1, 0, MNode (76, 1, 0, 0, 0), MNode (78, 1, 0, 0, 0)))), MNode (87, 1, -1, MNode (83, 1, 0, MNode (81, 1, 0, MNode (80, 1, 0, 0, 0), MNode (82, 1, 0, 0, 0)), MNode (85, 1, 0, MNode (84, 1, 0, 0, 0), MNode (86, 1, 0, 0, 0))), MNode (95, 1, 0, MNode (91, 1, 0, MNode (89, 1, 0, MNode (88, 1, 0, 0, 0), MNode (90, 1, 0, 0, 0)), MNode (93, 1, 0, MNode (92, 1, 0, 0, 0), MNode (94, 1, 0, 0, 0))), MNode (97, 1, -1, MNode (96, 1, 0, 0, 0), MNode (98, 1, -1, 0, MNode (99, 1, 0, 0, 0))))))) + Set elements: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99} + Testing 0 => 1 + Testing 100 => 0 + Testing 1 => 1 + Testing 101 => 0 + Testing 2 => 1 + Testing 102 => 0 + Testing 3 => 1 + Testing 103 => 0 + Testing 4 => 1 + Testing 104 => 0 + Testing 5 => 1 + Testing 105 => 0 + Testing 6 => 1 + Testing 106 => 0 + Testing 7 => 1 + Testing 107 => 0 + Testing 8 => 1 + Testing 108 => 0 + Testing 9 => 1 + Testing 109 => 0 + Testing 10 => 1 + Testing 110 => 0 + Testing 11 => 1 + Testing 111 => 0 + Testing 12 => 1 + Testing 112 => 0 + Testing 13 => 1 + Testing 113 => 0 + Testing 14 => 1 + Testing 114 => 0 + Testing 15 => 1 + Testing 115 => 0 + Testing 16 => 1 + Testing 116 => 0 + Testing 17 => 1 + Testing 117 => 0 + Testing 18 => 1 + Testing 118 => 0 + Testing 19 => 1 + Testing 119 => 0 + Testing 20 => 1 + Testing 120 => 0 + Testing 21 => 1 + Testing 121 => 0 + Testing 22 => 1 + Testing 122 => 0 + Testing 23 => 1 + Testing 123 => 0 + Testing 24 => 1 + Testing 124 => 0 + Testing 25 => 1 + Testing 125 => 0 + Testing 26 => 1 + Testing 126 => 0 + Testing 27 => 1 + Testing 127 => 0 + Testing 28 => 1 + Testing 128 => 0 + Testing 29 => 1 + Testing 129 => 0 + Testing 30 => 1 + Testing 130 => 0 + Testing 31 => 1 + Testing 131 => 0 + Testing 32 => 1 + Testing 132 => 0 + Testing 33 => 1 + Testing 133 => 0 + Testing 34 => 1 + Testing 134 => 0 + Testing 35 => 1 + Testing 135 => 0 + Testing 36 => 1 + Testing 136 => 0 + Testing 37 => 1 + Testing 137 => 0 + Testing 38 => 1 + Testing 138 => 0 + Testing 39 => 1 + Testing 139 => 0 + Testing 40 => 1 + Testing 140 => 0 + Testing 41 => 1 + Testing 141 => 0 + Testing 42 => 1 + Testing 142 => 0 + Testing 43 => 1 + Testing 143 => 0 + Testing 44 => 1 + Testing 144 => 0 + Testing 45 => 1 + Testing 145 => 0 + Testing 46 => 1 + Testing 146 => 0 + Testing 47 => 1 + Testing 147 => 0 + Testing 48 => 1 + Testing 148 => 0 + Testing 49 => 1 + Testing 149 => 0 + Testing 50 => 1 + Testing 150 => 0 + Testing 51 => 1 + Testing 151 => 0 + Testing 52 => 1 + Testing 152 => 0 + Testing 53 => 1 + Testing 153 => 0 + Testing 54 => 1 + Testing 154 => 0 + Testing 55 => 1 + Testing 155 => 0 + Testing 56 => 1 + Testing 156 => 0 + Testing 57 => 1 + Testing 157 => 0 + Testing 58 => 1 + Testing 158 => 0 + Testing 59 => 1 + Testing 159 => 0 + Testing 60 => 1 + Testing 160 => 0 + Testing 61 => 1 + Testing 161 => 0 + Testing 62 => 1 + Testing 162 => 0 + Testing 63 => 1 + Testing 163 => 0 + Testing 64 => 1 + Testing 164 => 0 + Testing 65 => 1 + Testing 165 => 0 + Testing 66 => 1 + Testing 166 => 0 + Testing 67 => 1 + Testing 167 => 0 + Testing 68 => 1 + Testing 168 => 0 + Testing 69 => 1 + Testing 169 => 0 + Testing 70 => 1 + Testing 170 => 0 + Testing 71 => 1 + Testing 171 => 0 + Testing 72 => 1 + Testing 172 => 0 + Testing 73 => 1 + Testing 173 => 0 + Testing 74 => 1 + Testing 174 => 0 + Testing 75 => 1 + Testing 175 => 0 + Testing 76 => 1 + Testing 176 => 0 + Testing 77 => 1 + Testing 177 => 0 + Testing 78 => 1 + Testing 178 => 0 + Testing 79 => 1 + Testing 179 => 0 + Testing 80 => 1 + Testing 180 => 0 + Testing 81 => 1 + Testing 181 => 0 + Testing 82 => 1 + Testing 182 => 0 + Testing 83 => 1 + Testing 183 => 0 + Testing 84 => 1 + Testing 184 => 0 + Testing 85 => 1 + Testing 185 => 0 + Testing 86 => 1 + Testing 186 => 0 + Testing 87 => 1 + Testing 187 => 0 + Testing 88 => 1 + Testing 188 => 0 + Testing 89 => 1 + Testing 189 => 0 + Testing 90 => 1 + Testing 190 => 0 + Testing 91 => 1 + Testing 191 => 0 + Testing 92 => 1 + Testing 192 => 0 + Testing 93 => 1 + Testing 193 => 0 + Testing 94 => 1 + Testing 194 => 0 + Testing 95 => 1 + Testing 195 => 0 + Testing 96 => 1 + Testing 196 => 0 + Testing 97 => 1 + Testing 197 => 0 + Testing 98 => 1 + Testing 198 => 0 + Testing 99 => 1 + Testing 199 => 0 + Set internal structure: MNode (63, 0, 0, MNode (31, 1, 0, MNode (15, 1, 0, MNode (7, 1, 0, MNode (3, 1, 0, MNode (1, 1, 0, MNode (0, 1, 0, 0, 0), MNode (2, 1, 0, 0, 0)), MNode (5, 1, 0, MNode (4, 1, 0, 0, 0), MNode (6, 1, 0, 0, 0))), MNode (11, 1, 0, MNode (9, 1, 0, MNode (8, 1, 0, 0, 0), MNode (10, 1, 0, 0, 0)), MNode (13, 1, 0, MNode (12, 1, 0, 0, 0), MNode (14, 1, 0, 0, 0)))), MNode (23, 1, 0, MNode (19, 1, 0, MNode (17, 1, 0, MNode (16, 1, 0, 0, 0), MNode (18, 1, 0, 0, 0)), MNode (21, 1, 0, MNode (20, 1, 0, 0, 0), MNode (22, 1, 0, 0, 0))), MNode (27, 1, 0, MNode (25, 1, 0, MNode (24, 1, 0, 0, 0), MNode (26, 1, 0, 0, 0)), MNode (29, 1, 0, MNode (28, 1, 0, 0, 0), MNode (30, 1, 0, 0, 0))))), MNode (47, 1, 0, MNode (39, 1, 0, MNode (35, 1, 0, MNode (33, 1, 0, MNode (32, 1, 0, 0, 0), MNode (34, 1, 0, 0, 0)), MNode (37, 1, 0, MNode (36, 1, 0, 0, 0), MNode (38, 1, 0, 0, 0))), MNode (43, 1, 0, MNode (41, 1, 0, MNode (40, 1, 0, 0, 0), MNode (42, 1, 0, 0, 0)), MNode (45, 1, 0, MNode (44, 1, 0, 0, 0), MNode (46, 1, 0, 0, 0)))), MNode (55, 0, 0, MNode (51, 0, 0, MNode (49, 1, 0, MNode (48, 1, 0, 0, 0), MNode (50, 0, 0, 0, 0)), MNode (53, 0, 0, MNode (52, 0, 0, 0, 0), MNode (54, 0, 0, 0, 0))), MNode (59, 0, 0, MNode (57, 0, 0, MNode (56, 0, 0, 0, 0), MNode (58, 0, 0, 0, 0)), MNode (61, 0, 0, MNode (60, 0, 0, 0, 0), MNode (62, 0, 0, 0, 0)))))), MNode (79, 0, -1, MNode (71, 0, 0, MNode (67, 0, 0, MNode (65, 0, 0, MNode (64, 0, 0, 0, 0), MNode (66, 0, 0, 0, 0)), MNode (69, 0, 0, MNode (68, 0, 0, 0, 0), MNode (70, 0, 0, 0, 0))), MNode (75, 0, 0, MNode (73, 0, 0, MNode (72, 0, 0, 0, 0), MNode (74, 0, 0, 0, 0)), MNode (77, 0, 0, MNode (76, 0, 0, 0, 0), MNode (78, 0, 0, 0, 0)))), MNode (87, 0, -1, MNode (83, 0, 0, MNode (81, 0, 0, MNode (80, 0, 0, 0, 0), MNode (82, 0, 0, 0, 0)), MNode (85, 0, 0, MNode (84, 0, 0, 0, 0), MNode (86, 0, 0, 0, 0))), MNode (95, 0, 0, MNode (91, 0, 0, MNode (89, 0, 0, MNode (88, 0, 0, 0, 0), MNode (90, 0, 0, 0, 0)), MNode (93, 0, 0, MNode (92, 0, 0, 0, 0), MNode (94, 0, 0, 0, 0))), MNode (97, 0, -1, MNode (96, 0, 0, 0, 0), MNode (98, 0, -1, 0, MNode (99, 0, 0, 0, 0))))))) + Set elements: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49} + Testing 0 => 1 + Testing 1 => 1 + Testing 2 => 1 + Testing 3 => 1 + Testing 4 => 1 + Testing 5 => 1 + Testing 6 => 1 + Testing 7 => 1 + Testing 8 => 1 + Testing 9 => 1 + Testing 10 => 1 + Testing 11 => 1 + Testing 12 => 1 + Testing 13 => 1 + Testing 14 => 1 + Testing 15 => 1 + Testing 16 => 1 + Testing 17 => 1 + Testing 18 => 1 + Testing 19 => 1 + Testing 20 => 1 + Testing 21 => 1 + Testing 22 => 1 + Testing 23 => 1 + Testing 24 => 1 + Testing 25 => 1 + Testing 26 => 1 + Testing 27 => 1 + Testing 28 => 1 + Testing 29 => 1 + Testing 30 => 1 + Testing 31 => 1 + Testing 32 => 1 + Testing 33 => 1 + Testing 34 => 1 + Testing 35 => 1 + Testing 36 => 1 + Testing 37 => 1 + Testing 38 => 1 + Testing 39 => 1 + Testing 40 => 1 + Testing 41 => 1 + Testing 42 => 1 + Testing 43 => 1 + Testing 44 => 1 + Testing 45 => 1 + Testing 46 => 1 + Testing 47 => 1 + Testing 48 => 1 + Testing 49 => 1 + Testing 50 => 0 + Testing 51 => 0 + Testing 52 => 0 + Testing 53 => 0 + Testing 54 => 0 + Testing 55 => 0 + Testing 56 => 0 + Testing 57 => 0 + Testing 58 => 0 + Testing 59 => 0 + Testing 60 => 0 + Testing 61 => 0 + Testing 62 => 0 + Testing 63 => 0 + Testing 64 => 0 + Testing 65 => 0 + Testing 66 => 0 + Testing 67 => 0 + Testing 68 => 0 + Testing 69 => 0 + Testing 70 => 0 + Testing 71 => 0 + Testing 72 => 0 + Testing 73 => 0 + Testing 74 => 0 + Testing 75 => 0 + Testing 76 => 0 + Testing 77 => 0 + Testing 78 => 0 + Testing 79 => 0 + Testing 80 => 0 + Testing 81 => 0 + Testing 82 => 0 + Testing 83 => 0 + Testing 84 => 0 + Testing 85 => 0 + Testing 86 => 0 + Testing 87 => 0 + Testing 88 => 0 + Testing 89 => 0 + Testing 90 => 0 + Testing 91 => 0 + Testing 92 => 0 + Testing 93 => 0 + Testing 94 => 0 + Testing 95 => 0 + Testing 96 => 0 + Testing 97 => 0 + Testing 98 => 0 + Testing 99 => 0 + List set: MNode (2, 1, -1, MNode (1, 1, 0, 0, 0), MNode (4, 1, 0, MNode (3, 1, 0, 0, 0), MNode (5, 1, 0, 0, 0))) + Set union: MNode (4, 1, -1, MNode (2, 1, 0, MNode (1, 1, 0, 0, 0), MNode (3, 1, 0, 0, 0)), MNode (33, 1, 0, MNode (11, 1, 0, MNode (5, 1, 0, 0, 0), MNode (22, 1, 0, 0, 0)), MNode (44, 1, -1, 0, MNode (55, 1, 0, 0, 0)))) + Elements: {1, 2, 3, 4, 5, 11, 22, 33, 44, 55} + Set difference: MNode (4, 1, -1, MNode (2, 1, 0, MNode (1, 0, 0, 0, 0), MNode (3, 0, 0, 0, 0)), MNode (33, 1, 0, MNode (11, 1, 0, MNode (5, 0, 0, 0, 0), MNode (22, 0, 0, 0, 0)), MNode (44, 0, -1, 0, MNode (55, 1, 0, 0, 0)))) + Elements: {2, 4, 11, 33, 55} diff --git a/stdlib/regression/vm/test02.t b/stdlib/regression/vm/test02.t new file mode 100644 index 000000000..ba88c5a36 --- /dev/null +++ b/stdlib/regression/vm/test02.t @@ -0,0 +1,4 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test02.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test02.bc + Assn ("x", Dec ("3")) diff --git a/stdlib/regression/vm/test03.t b/stdlib/regression/vm/test03.t new file mode 100644 index 000000000..3b9a6efb5 --- /dev/null +++ b/stdlib/regression/vm/test03.t @@ -0,0 +1,21 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test03.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test03.bc + -1 + 1 + 0 + -1 + 1 + 0 + -1 + 1 + 0 + 0 + -1 + 1 + -1 + 1 + 0 + 0 + 1 + -1 diff --git a/stdlib/regression/vm/test04.t b/stdlib/regression/vm/test04.t new file mode 100644 index 000000000..9f3f6bdda --- /dev/null +++ b/stdlib/regression/vm/test04.t @@ -0,0 +1,308 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test04.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test04.bc + Map internal structure: MNode (63, {630}, 0, MNode (31, {310}, 0, MNode (15, {150}, 0, MNode (7, {70}, 0, MNode (3, {30}, 0, MNode (1, {10}, 0, MNode (0, {0}, 0, 0, 0), MNode (2, {20}, 0, 0, 0)), MNode (5, {50}, 0, MNode (4, {40}, 0, 0, 0), MNode (6, {60}, 0, 0, 0))), MNode (11, {110}, 0, MNode (9, {90}, 0, MNode (8, {80}, 0, 0, 0), MNode (10, {100}, 0, 0, 0)), MNode (13, {130}, 0, MNode (12, {120}, 0, 0, 0), MNode (14, {140}, 0, 0, 0)))), MNode (23, {230}, 0, MNode (19, {190}, 0, MNode (17, {170}, 0, MNode (16, {160}, 0, 0, 0), MNode (18, {180}, 0, 0, 0)), MNode (21, {210}, 0, MNode (20, {200}, 0, 0, 0), MNode (22, {220}, 0, 0, 0))), MNode (27, {270}, 0, MNode (25, {250}, 0, MNode (24, {240}, 0, 0, 0), MNode (26, {260}, 0, 0, 0)), MNode (29, {290}, 0, MNode (28, {280}, 0, 0, 0), MNode (30, {300}, 0, 0, 0))))), MNode (47, {470}, 0, MNode (39, {390}, 0, MNode (35, {350}, 0, MNode (33, {330}, 0, MNode (32, {320}, 0, 0, 0), MNode (34, {340}, 0, 0, 0)), MNode (37, {370}, 0, MNode (36, {360}, 0, 0, 0), MNode (38, {380}, 0, 0, 0))), MNode (43, {430}, 0, MNode (41, {410}, 0, MNode (40, {400}, 0, 0, 0), MNode (42, {420}, 0, 0, 0)), MNode (45, {450}, 0, MNode (44, {440}, 0, 0, 0), MNode (46, {460}, 0, 0, 0)))), MNode (55, {550}, 0, MNode (51, {510}, 0, MNode (49, {490}, 0, MNode (48, {480}, 0, 0, 0), MNode (50, {500}, 0, 0, 0)), MNode (53, {530}, 0, MNode (52, {520}, 0, 0, 0), MNode (54, {540}, 0, 0, 0))), MNode (59, {590}, 0, MNode (57, {570}, 0, MNode (56, {560}, 0, 0, 0), MNode (58, {580}, 0, 0, 0)), MNode (61, {610}, 0, MNode (60, {600}, 0, 0, 0), MNode (62, {620}, 0, 0, 0)))))), MNode (79, {790}, -1, MNode (71, {710}, 0, MNode (67, {670}, 0, MNode (65, {650}, 0, MNode (64, {640}, 0, 0, 0), MNode (66, {660}, 0, 0, 0)), MNode (69, {690}, 0, MNode (68, {680}, 0, 0, 0), MNode (70, {700}, 0, 0, 0))), MNode (75, {750}, 0, MNode (73, {730}, 0, MNode (72, {720}, 0, 0, 0), MNode (74, {740}, 0, 0, 0)), MNode (77, {770}, 0, MNode (76, {760}, 0, 0, 0), MNode (78, {780}, 0, 0, 0)))), MNode (87, {870}, -1, MNode (83, {830}, 0, MNode (81, {810}, 0, MNode (80, {800}, 0, 0, 0), MNode (82, {820}, 0, 0, 0)), MNode (85, {850}, 0, MNode (84, {840}, 0, 0, 0), MNode (86, {860}, 0, 0, 0))), MNode (95, {950}, 0, MNode (91, {910}, 0, MNode (89, {890}, 0, MNode (88, {880}, 0, 0, 0), MNode (90, {900}, 0, 0, 0)), MNode (93, {930}, 0, MNode (92, {920}, 0, 0, 0), MNode (94, {940}, 0, 0, 0))), MNode (97, {970}, -1, MNode (96, {960}, 0, 0, 0), MNode (98, {980}, -1, 0, MNode (99, {990}, 0, 0, 0))))))) + Map elements: {[0, 0], [1, 10], [2, 20], [3, 30], [4, 40], [5, 50], [6, 60], [7, 70], [8, 80], [9, 90], [10, 100], [11, 110], [12, 120], [13, 130], [14, 140], [15, 150], [16, 160], [17, 170], [18, 180], [19, 190], [20, 200], [21, 210], [22, 220], [23, 230], [24, 240], [25, 250], [26, 260], [27, 270], [28, 280], [29, 290], [30, 300], [31, 310], [32, 320], [33, 330], [34, 340], [35, 350], [36, 360], [37, 370], [38, 380], [39, 390], [40, 400], [41, 410], [42, 420], [43, 430], [44, 440], [45, 450], [46, 460], [47, 470], [48, 480], [49, 490], [50, 500], [51, 510], [52, 520], [53, 530], [54, 540], [55, 550], [56, 560], [57, 570], [58, 580], [59, 590], [60, 600], [61, 610], [62, 620], [63, 630], [64, 640], [65, 650], [66, 660], [67, 670], [68, 680], [69, 690], [70, 700], [71, 710], [72, 720], [73, 730], [74, 740], [75, 750], [76, 760], [77, 770], [78, 780], [79, 790], [80, 800], [81, 810], [82, 820], [83, 830], [84, 840], [85, 850], [86, 860], [87, 870], [88, 880], [89, 890], [90, 900], [91, 910], [92, 920], [93, 930], [94, 940], [95, 950], [96, 960], [97, 970], [98, 980], [99, 990]} + Testing 0 => Some (0) + Testing 100 => None + Testing 1 => Some (10) + Testing 101 => None + Testing 2 => Some (20) + Testing 102 => None + Testing 3 => Some (30) + Testing 103 => None + Testing 4 => Some (40) + Testing 104 => None + Testing 5 => Some (50) + Testing 105 => None + Testing 6 => Some (60) + Testing 106 => None + Testing 7 => Some (70) + Testing 107 => None + Testing 8 => Some (80) + Testing 108 => None + Testing 9 => Some (90) + Testing 109 => None + Testing 10 => Some (100) + Testing 110 => None + Testing 11 => Some (110) + Testing 111 => None + Testing 12 => Some (120) + Testing 112 => None + Testing 13 => Some (130) + Testing 113 => None + Testing 14 => Some (140) + Testing 114 => None + Testing 15 => Some (150) + Testing 115 => None + Testing 16 => Some (160) + Testing 116 => None + Testing 17 => Some (170) + Testing 117 => None + Testing 18 => Some (180) + Testing 118 => None + Testing 19 => Some (190) + Testing 119 => None + Testing 20 => Some (200) + Testing 120 => None + Testing 21 => Some (210) + Testing 121 => None + Testing 22 => Some (220) + Testing 122 => None + Testing 23 => Some (230) + Testing 123 => None + Testing 24 => Some (240) + Testing 124 => None + Testing 25 => Some (250) + Testing 125 => None + Testing 26 => Some (260) + Testing 126 => None + Testing 27 => Some (270) + Testing 127 => None + Testing 28 => Some (280) + Testing 128 => None + Testing 29 => Some (290) + Testing 129 => None + Testing 30 => Some (300) + Testing 130 => None + Testing 31 => Some (310) + Testing 131 => None + Testing 32 => Some (320) + Testing 132 => None + Testing 33 => Some (330) + Testing 133 => None + Testing 34 => Some (340) + Testing 134 => None + Testing 35 => Some (350) + Testing 135 => None + Testing 36 => Some (360) + Testing 136 => None + Testing 37 => Some (370) + Testing 137 => None + Testing 38 => Some (380) + Testing 138 => None + Testing 39 => Some (390) + Testing 139 => None + Testing 40 => Some (400) + Testing 140 => None + Testing 41 => Some (410) + Testing 141 => None + Testing 42 => Some (420) + Testing 142 => None + Testing 43 => Some (430) + Testing 143 => None + Testing 44 => Some (440) + Testing 144 => None + Testing 45 => Some (450) + Testing 145 => None + Testing 46 => Some (460) + Testing 146 => None + Testing 47 => Some (470) + Testing 147 => None + Testing 48 => Some (480) + Testing 148 => None + Testing 49 => Some (490) + Testing 149 => None + Testing 50 => Some (500) + Testing 150 => None + Testing 51 => Some (510) + Testing 151 => None + Testing 52 => Some (520) + Testing 152 => None + Testing 53 => Some (530) + Testing 153 => None + Testing 54 => Some (540) + Testing 154 => None + Testing 55 => Some (550) + Testing 155 => None + Testing 56 => Some (560) + Testing 156 => None + Testing 57 => Some (570) + Testing 157 => None + Testing 58 => Some (580) + Testing 158 => None + Testing 59 => Some (590) + Testing 159 => None + Testing 60 => Some (600) + Testing 160 => None + Testing 61 => Some (610) + Testing 161 => None + Testing 62 => Some (620) + Testing 162 => None + Testing 63 => Some (630) + Testing 163 => None + Testing 64 => Some (640) + Testing 164 => None + Testing 65 => Some (650) + Testing 165 => None + Testing 66 => Some (660) + Testing 166 => None + Testing 67 => Some (670) + Testing 167 => None + Testing 68 => Some (680) + Testing 168 => None + Testing 69 => Some (690) + Testing 169 => None + Testing 70 => Some (700) + Testing 170 => None + Testing 71 => Some (710) + Testing 171 => None + Testing 72 => Some (720) + Testing 172 => None + Testing 73 => Some (730) + Testing 173 => None + Testing 74 => Some (740) + Testing 174 => None + Testing 75 => Some (750) + Testing 175 => None + Testing 76 => Some (760) + Testing 176 => None + Testing 77 => Some (770) + Testing 177 => None + Testing 78 => Some (780) + Testing 178 => None + Testing 79 => Some (790) + Testing 179 => None + Testing 80 => Some (800) + Testing 180 => None + Testing 81 => Some (810) + Testing 181 => None + Testing 82 => Some (820) + Testing 182 => None + Testing 83 => Some (830) + Testing 183 => None + Testing 84 => Some (840) + Testing 184 => None + Testing 85 => Some (850) + Testing 185 => None + Testing 86 => Some (860) + Testing 186 => None + Testing 87 => Some (870) + Testing 187 => None + Testing 88 => Some (880) + Testing 188 => None + Testing 89 => Some (890) + Testing 189 => None + Testing 90 => Some (900) + Testing 190 => None + Testing 91 => Some (910) + Testing 191 => None + Testing 92 => Some (920) + Testing 192 => None + Testing 93 => Some (930) + Testing 193 => None + Testing 94 => Some (940) + Testing 194 => None + Testing 95 => Some (950) + Testing 195 => None + Testing 96 => Some (960) + Testing 196 => None + Testing 97 => Some (970) + Testing 197 => None + Testing 98 => Some (980) + Testing 198 => None + Testing 99 => Some (990) + Testing 199 => None + Map internal structure: MNode (63, 0, 0, MNode (31, {310}, 0, MNode (15, {150}, 0, MNode (7, {70}, 0, MNode (3, {30}, 0, MNode (1, {10}, 0, MNode (0, {0}, 0, 0, 0), MNode (2, {20}, 0, 0, 0)), MNode (5, {50}, 0, MNode (4, {40}, 0, 0, 0), MNode (6, {60}, 0, 0, 0))), MNode (11, {110}, 0, MNode (9, {90}, 0, MNode (8, {80}, 0, 0, 0), MNode (10, {100}, 0, 0, 0)), MNode (13, {130}, 0, MNode (12, {120}, 0, 0, 0), MNode (14, {140}, 0, 0, 0)))), MNode (23, {230}, 0, MNode (19, {190}, 0, MNode (17, {170}, 0, MNode (16, {160}, 0, 0, 0), MNode (18, {180}, 0, 0, 0)), MNode (21, {210}, 0, MNode (20, {200}, 0, 0, 0), MNode (22, {220}, 0, 0, 0))), MNode (27, {270}, 0, MNode (25, {250}, 0, MNode (24, {240}, 0, 0, 0), MNode (26, {260}, 0, 0, 0)), MNode (29, {290}, 0, MNode (28, {280}, 0, 0, 0), MNode (30, {300}, 0, 0, 0))))), MNode (47, {470}, 0, MNode (39, {390}, 0, MNode (35, {350}, 0, MNode (33, {330}, 0, MNode (32, {320}, 0, 0, 0), MNode (34, {340}, 0, 0, 0)), MNode (37, {370}, 0, MNode (36, {360}, 0, 0, 0), MNode (38, {380}, 0, 0, 0))), MNode (43, {430}, 0, MNode (41, {410}, 0, MNode (40, {400}, 0, 0, 0), MNode (42, {420}, 0, 0, 0)), MNode (45, {450}, 0, MNode (44, {440}, 0, 0, 0), MNode (46, {460}, 0, 0, 0)))), MNode (55, 0, 0, MNode (51, 0, 0, MNode (49, {490}, 0, MNode (48, {480}, 0, 0, 0), MNode (50, 0, 0, 0, 0)), MNode (53, 0, 0, MNode (52, 0, 0, 0, 0), MNode (54, 0, 0, 0, 0))), MNode (59, 0, 0, MNode (57, 0, 0, MNode (56, 0, 0, 0, 0), MNode (58, 0, 0, 0, 0)), MNode (61, 0, 0, MNode (60, 0, 0, 0, 0), MNode (62, 0, 0, 0, 0)))))), MNode (79, 0, -1, MNode (71, 0, 0, MNode (67, 0, 0, MNode (65, 0, 0, MNode (64, 0, 0, 0, 0), MNode (66, 0, 0, 0, 0)), MNode (69, 0, 0, MNode (68, 0, 0, 0, 0), MNode (70, 0, 0, 0, 0))), MNode (75, 0, 0, MNode (73, 0, 0, MNode (72, 0, 0, 0, 0), MNode (74, 0, 0, 0, 0)), MNode (77, 0, 0, MNode (76, 0, 0, 0, 0), MNode (78, 0, 0, 0, 0)))), MNode (87, 0, -1, MNode (83, 0, 0, MNode (81, 0, 0, MNode (80, 0, 0, 0, 0), MNode (82, 0, 0, 0, 0)), MNode (85, 0, 0, MNode (84, 0, 0, 0, 0), MNode (86, 0, 0, 0, 0))), MNode (95, 0, 0, MNode (91, 0, 0, MNode (89, 0, 0, MNode (88, 0, 0, 0, 0), MNode (90, 0, 0, 0, 0)), MNode (93, 0, 0, MNode (92, 0, 0, 0, 0), MNode (94, 0, 0, 0, 0))), MNode (97, 0, -1, MNode (96, 0, 0, 0, 0), MNode (98, 0, -1, 0, MNode (99, 0, 0, 0, 0))))))) + Map elements: {[0, 0], [1, 10], [2, 20], [3, 30], [4, 40], [5, 50], [6, 60], [7, 70], [8, 80], [9, 90], [10, 100], [11, 110], [12, 120], [13, 130], [14, 140], [15, 150], [16, 160], [17, 170], [18, 180], [19, 190], [20, 200], [21, 210], [22, 220], [23, 230], [24, 240], [25, 250], [26, 260], [27, 270], [28, 280], [29, 290], [30, 300], [31, 310], [32, 320], [33, 330], [34, 340], [35, 350], [36, 360], [37, 370], [38, 380], [39, 390], [40, 400], [41, 410], [42, 420], [43, 430], [44, 440], [45, 450], [46, 460], [47, 470], [48, 480], [49, 490]} + Testing 0 => Some (0) + Testing 1 => Some (10) + Testing 2 => Some (20) + Testing 3 => Some (30) + Testing 4 => Some (40) + Testing 5 => Some (50) + Testing 6 => Some (60) + Testing 7 => Some (70) + Testing 8 => Some (80) + Testing 9 => Some (90) + Testing 10 => Some (100) + Testing 11 => Some (110) + Testing 12 => Some (120) + Testing 13 => Some (130) + Testing 14 => Some (140) + Testing 15 => Some (150) + Testing 16 => Some (160) + Testing 17 => Some (170) + Testing 18 => Some (180) + Testing 19 => Some (190) + Testing 20 => Some (200) + Testing 21 => Some (210) + Testing 22 => Some (220) + Testing 23 => Some (230) + Testing 24 => Some (240) + Testing 25 => Some (250) + Testing 26 => Some (260) + Testing 27 => Some (270) + Testing 28 => Some (280) + Testing 29 => Some (290) + Testing 30 => Some (300) + Testing 31 => Some (310) + Testing 32 => Some (320) + Testing 33 => Some (330) + Testing 34 => Some (340) + Testing 35 => Some (350) + Testing 36 => Some (360) + Testing 37 => Some (370) + Testing 38 => Some (380) + Testing 39 => Some (390) + Testing 40 => Some (400) + Testing 41 => Some (410) + Testing 42 => Some (420) + Testing 43 => Some (430) + Testing 44 => Some (440) + Testing 45 => Some (450) + Testing 46 => Some (460) + Testing 47 => Some (470) + Testing 48 => Some (480) + Testing 49 => Some (490) + Testing 50 => None + Testing 51 => None + Testing 52 => None + Testing 53 => None + Testing 54 => None + Testing 55 => None + Testing 56 => None + Testing 57 => None + Testing 58 => None + Testing 59 => None + Testing 60 => None + Testing 61 => None + Testing 62 => None + Testing 63 => None + Testing 64 => None + Testing 65 => None + Testing 66 => None + Testing 67 => None + Testing 68 => None + Testing 69 => None + Testing 70 => None + Testing 71 => None + Testing 72 => None + Testing 73 => None + Testing 74 => None + Testing 75 => None + Testing 76 => None + Testing 77 => None + Testing 78 => None + Testing 79 => None + Testing 80 => None + Testing 81 => None + Testing 82 => None + Testing 83 => None + Testing 84 => None + Testing 85 => None + Testing 86 => None + Testing 87 => None + Testing 88 => None + Testing 89 => None + Testing 90 => None + Testing 91 => None + Testing 92 => None + Testing 93 => None + Testing 94 => None + Testing 95 => None + Testing 96 => None + Testing 97 => None + Testing 98 => None + Testing 99 => None + List map: MNode (2, {20}, -1, MNode (1, {10}, 0, 0, 0), MNode (4, {40}, 0, MNode (3, {30}, 0, 0, 0), MNode (5, {50}, 0, 0, 0))) diff --git a/stdlib/regression/vm/test05.t b/stdlib/regression/vm/test05.t new file mode 100644 index 000000000..fcb0eb29e --- /dev/null +++ b/stdlib/regression/vm/test05.t @@ -0,0 +1,12 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test05.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test05.bc + Cached: 1 + Cached: 1 + Cached: 1 + Cached: 1 + Cached: 1 + Cached: 1 + Cached: 1 + Cached: 1 + Cached: 1 diff --git a/stdlib/regression/vm/test06.t b/stdlib/regression/vm/test06.t new file mode 100644 index 000000000..640d06b25 --- /dev/null +++ b/stdlib/regression/vm/test06.t @@ -0,0 +1,10 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test06.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test06.bc + Flattening: 0 + Flattening: {0, 0, 0, 0} + Flattening: 0 + Flattening: {1, 2, 3} + Flattening: {1, 2, 3, 4, 5, 6, 7, 8, 9} + List to array: [1, 2, 3, 4, 5] + Array to list: {1, 2, 3, 4, 5} diff --git a/stdlib/regression/vm/test07.t b/stdlib/regression/vm/test07.t new file mode 100644 index 000000000..b4ae234bc --- /dev/null +++ b/stdlib/regression/vm/test07.t @@ -0,0 +1,9 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test07.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test07.bc + HashTab internal structure: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, {[{1, 2, 3}, 100]}, 0, 0, 0] + HashTab internal structure: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, {[{1, 2, 3}, 200], [{1, 2, 3}, 100]}, 0, 0, 0] + Searching: Some (200) + Searching: Some (200) + Replaced: Some (800) + Restored: Some (200) diff --git a/stdlib/regression/vm/test08.t b/stdlib/regression/vm/test08.t new file mode 100644 index 000000000..e8300dc25 --- /dev/null +++ b/stdlib/regression/vm/test08.t @@ -0,0 +1,6 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test08.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test08.bc + 6 + 120 + 5040 diff --git a/stdlib/regression/vm/test09.t b/stdlib/regression/vm/test09.t new file mode 100644 index 000000000..6d70f7c2a --- /dev/null +++ b/stdlib/regression/vm/test09.t @@ -0,0 +1,8 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test09.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test09.bc + Parsing a*| against "aa"... Succ ({"a", "a"}) + Parsing a+| against "aa"... Succ ({"a", "a"}) + Parsing list(a)| against "a"... Succ ({"a"}) + Parsing list(a)| against "a,a"... Succ ({"a", "a"}) + Parsing list0(a)| against ""... Succ (0) diff --git a/stdlib/regression/vm/test10.t b/stdlib/regression/vm/test10.t new file mode 100644 index 000000000..86eccc486 --- /dev/null +++ b/stdlib/regression/vm/test10.t @@ -0,0 +1,5 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test10.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test10.bc + Parsing "aaa" with many ... Succ ({"a", "a", "a"}) + Parsing "ab" with bad_alter ... Succ ("ab") diff --git a/stdlib/regression/vm/test11.t b/stdlib/regression/vm/test11.t new file mode 100644 index 000000000..774d82b7e --- /dev/null +++ b/stdlib/regression/vm/test11.t @@ -0,0 +1,9 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test11.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test11.bc + Succ ("a") + Succ (Add ("a", "a")) + Succ (Sub ("a", "a")) + Succ (Sub (Add ("a", "a"), "a")) + Succ (Add ("a", Mul ("a", "a"))) + Succ (Sub (Mul ("a", "a"), Div ("a", "a"))) diff --git a/stdlib/regression/vm/test12.t b/stdlib/regression/vm/test12.t new file mode 100644 index 000000000..71783661d --- /dev/null +++ b/stdlib/regression/vm/test12.t @@ -0,0 +1,4 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test12.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test12.bc + Succ (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul (Mul ("a", "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a"), "a")) diff --git a/stdlib/regression/vm/test13.t b/stdlib/regression/vm/test13.t new file mode 100644 index 000000000..02be8d6c1 --- /dev/null +++ b/stdlib/regression/vm/test13.t @@ -0,0 +1,6 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test13.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test13.bc + Succ (Add ("a", Sub ("a", "a"))) + Succ (Mul (Div (Mul ("a", "a"), "a"), "a")) + Succ (Add (Mul ("a", "a"), Sub (Div ("a", "a"), Mul ("a", "a")))) diff --git a/stdlib/regression/vm/test14.t b/stdlib/regression/vm/test14.t new file mode 100644 index 000000000..237b053b1 --- /dev/null +++ b/stdlib/regression/vm/test14.t @@ -0,0 +1,6 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test14.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test14.bc + Succ (Add ("a", Sub ("a", "a"))) + Succ (Mul (Div (Mul ("a", "a"), "a"), "a")) + Succ (Add (Mul ("a", "a"), Sub (Div ("a", "a"), Mul ("a", "a")))) diff --git a/stdlib/regression/vm/test15.t b/stdlib/regression/vm/test15.t new file mode 100644 index 000000000..d355cac35 --- /dev/null +++ b/stdlib/regression/vm/test15.t @@ -0,0 +1,7 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test15.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test15.bc + Succ (Eq ("a", "a")) + Succ (Eq (Mul ("a", "a"), Mul ("a", "a"))) + Succ (Eq (Add (Mul ("a", "a"), Sub (Div ("a", "a"), Mul ("a", "a"))), Sub (Mul ("a", "a"), "a"))) + Fail ({""*" expected at"}, 1, 2) diff --git a/stdlib/regression/vm/test16.t b/stdlib/regression/vm/test16.t new file mode 100644 index 000000000..fcfc67646 --- /dev/null +++ b/stdlib/regression/vm/test16.t @@ -0,0 +1,9 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test16.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test16.bc + Succ (Eq ("a", "a")) + Succ (Eq ("b", "b")) + Succ (Eq (Mul ("a", "a"), Mul ("a", "a"))) + Succ (Eq (Mul ("b", "b"), Mul ("b", "b"))) + Succ (Eq (Add (Mul ("a", "a"), Sub (Div ("a", "a"), Mul ("a", "a"))), Sub (Mul ("a", "a"), "a"))) + Succ (Eq (Add (Mul ("b", "b"), Sub (Div ("b", "b"), Mul ("b", "b"))), Sub (Mul ("b", "b"), "b"))) diff --git a/stdlib/regression/vm/test17.t b/stdlib/regression/vm/test17.t new file mode 100644 index 000000000..c15e2d648 --- /dev/null +++ b/stdlib/regression/vm/test17.t @@ -0,0 +1,15 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test17.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test17.bc + Lazy body: 0 + Lazy body: 1 + Lazy body: 2 + Lazy body: 3 + Lazy body: 4 + Lazy body: 5 + Lazy body: 6 + Lazy body: 7 + Lazy body: 8 + Lazy body: 9 + First force: 100 + Second force: 100 diff --git a/stdlib/regression/vm/test18.t b/stdlib/regression/vm/test18.t new file mode 100644 index 000000000..fe12ada17 --- /dev/null +++ b/stdlib/regression/vm/test18.t @@ -0,0 +1,35 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test18.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test18.bc + 1 =?= 1 = 0 + symmetricity: ok + 1 =?= 10 = -1 + symmetricity: ok + "abc" =?= "abc" = 0 + symmetricity: ok + "abc" =?= "def" = -1 + symmetricity: ok + 1 =?= "abc" = 1 + symmetricity: ok + S (1) =?= S (1) = 0 + symmetricity: ok + S (2) =?= S (1) = 1 + symmetricity: ok + S (1, 2, 3) =?= S (1, 3, 2) = -1 + symmetricity: ok + S (1, 2, 3) =?= D (5, 6) = 1 + symmetricity: ok + 1 =?= S (5) = 1 + symmetricity: ok + "abs" =?= S (5, 6) = -1 + symmetricity: ok + [1, 2, 3] =?= S (1, 2, 3) = -1 + symmetricity: ok + "abc" =?= [1, 2, 3] = -1 + symmetricity: ok + 1 =?= [1, 2, 3] = 1 + symmetricity: ok + 0 + 0 + 0 + 0 diff --git a/stdlib/regression/vm/test20.t b/stdlib/regression/vm/test20.t new file mode 100644 index 000000000..0e712c562 --- /dev/null +++ b/stdlib/regression/vm/test20.t @@ -0,0 +1,15 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test20.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test20.bc + Empty + Node (0, Empty, Empty) + Node (0, Empty, Node (1, Empty, Empty)) + Node (0, Empty, Node (1, Empty, Node (2, Empty, Empty))) + Node (0, Empty, Node (1, Empty, Node (2, Empty, Node (3, Empty, Empty)))) + Node (0, Empty, Node (1, Empty, Node (2, Empty, Node (3, Empty, Node (4, Empty, Empty))))) + Node (0, Empty, Node (1, Empty, Node (2, Empty, Node (3, Empty, Node (4, Empty, Node (5, Empty, Empty)))))) + Node (0, Empty, Node (1, Empty, Node (2, Empty, Node (3, Empty, Node (4, Empty, Node (5, Empty, Node (6, Empty, Empty))))))) + Node (0, Empty, Node (1, Empty, Node (2, Empty, Node (3, Empty, Node (4, Empty, Node (5, Empty, Node (6, Empty, Node (7, Empty, Empty)))))))) + Node (0, Empty, Node (1, Empty, Node (2, Empty, Node (3, Empty, Node (4, Empty, Node (5, Empty, Node (6, Empty, Node (7, Empty, Node (8, Empty, Empty))))))))) + Node (0, Empty, Node (1, Empty, Node (2, Empty, Node (3, Empty, Node (4, Empty, Node (5, Empty, Node (6, Empty, Node (7, Empty, Node (8, Empty, Node (9, Empty, Empty)))))))))) + Node (0, Empty, Node (1, Empty, Node (2, Empty, Node (3, Empty, Node (4, Empty, Node (5, Empty, Node (6, Empty, Node (7, Empty, Node (8, Empty, Node (9, Empty, Node (10, Empty, Empty))))))))))) diff --git a/stdlib/regression/vm/test21.t b/stdlib/regression/vm/test21.t new file mode 100644 index 000000000..5d06fb611 --- /dev/null +++ b/stdlib/regression/vm/test21.t @@ -0,0 +1,12 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test21.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test21.bc + 1 + 1 + 1 + 1 + 1 + 2 + 3 + 100 + Cons (3, Cons (2, Cons (1, Cons (6, Cons (5, Cons (4, Cons (3, Cons (2, Cons (1, Nil))))))))) diff --git a/stdlib/regression/vm/test22.t b/stdlib/regression/vm/test22.t new file mode 100644 index 000000000..3ddad17db --- /dev/null +++ b/stdlib/regression/vm/test22.t @@ -0,0 +1,7 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test22.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test22.bc + 0 + {1, 2, 3, 4} + {{1}, {2, 3}, {4, {5, 6}}} + {1, 2, 3, 4} diff --git a/stdlib/regression/vm/test23.t b/stdlib/regression/vm/test23.t new file mode 100644 index 000000000..69f308513 --- /dev/null +++ b/stdlib/regression/vm/test23.t @@ -0,0 +1,6 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test23.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test23.bc + 1 + {2, 3, 4} + 2 diff --git a/stdlib/regression/vm/test24.t b/stdlib/regression/vm/test24.t new file mode 100644 index 000000000..e75c84dff --- /dev/null +++ b/stdlib/regression/vm/test24.t @@ -0,0 +1,6 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test24.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test24.bc + 3 + {1} + {1} diff --git a/stdlib/regression/vm/test25.t b/stdlib/regression/vm/test25.t new file mode 100644 index 000000000..5f1c27f29 --- /dev/null +++ b/stdlib/regression/vm/test25.t @@ -0,0 +1,8 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test25.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test25.bc + Cloning int: 5 + Cloning string: abc + Cloning array: [1, 2, 3, 4, 5] + Cloning sexp: A (1, 2, 3, 4, 5) + Cloning closure: address ok, 5, 6 diff --git a/stdlib/regression/vm/test26.t b/stdlib/regression/vm/test26.t new file mode 100644 index 000000000..a6eb4d104 --- /dev/null +++ b/stdlib/regression/vm/test26.t @@ -0,0 +1,5 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test26.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test26.bc + Number of commands-line arguments: 1 + arg [0 ] = "test26.bc" diff --git a/stdlib/regression/vm/test27.t b/stdlib/regression/vm/test27.t new file mode 100644 index 000000000..81874487e --- /dev/null +++ b/stdlib/regression/vm/test27.t @@ -0,0 +1,4 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test27.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test27.bc + Yes diff --git a/stdlib/regression/vm/test28.t b/stdlib/regression/vm/test28.t new file mode 100644 index 000000000..c03f15938 --- /dev/null +++ b/stdlib/regression/vm/test28.t @@ -0,0 +1,7 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test28.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test28.bc + Succ (Seq ("a", "b")) + Succ (Alt ("a")) + Succ (Alt ("b")) + Succ (Rep ({"a", "a", "a"})) diff --git a/stdlib/regression/vm/test29.t b/stdlib/regression/vm/test29.t new file mode 100644 index 000000000..dc7e55d9e --- /dev/null +++ b/stdlib/regression/vm/test29.t @@ -0,0 +1,7 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test29.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test29.bc + Succ (Seq ("a", "b")) + Succ (Alt ("a")) + Succ (Alt ("b")) + Succ (Rep ({"a", "a", "a"})) diff --git a/stdlib/regression/vm/test30.t b/stdlib/regression/vm/test30.t new file mode 100644 index 000000000..23ee7e0c1 --- /dev/null +++ b/stdlib/regression/vm/test30.t @@ -0,0 +1,203 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test30.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test30.bc + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 diff --git a/stdlib/regression/vm/test32.t b/stdlib/regression/vm/test32.t new file mode 100644 index 000000000..c71cd6114 --- /dev/null +++ b/stdlib/regression/vm/test32.t @@ -0,0 +1,5 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test32.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test32.bc + Flattening: 0 + Flattening: {A, B, C, D} diff --git a/stdlib/regression/vm/test33.t b/stdlib/regression/vm/test33.t new file mode 100644 index 000000000..fab652baf --- /dev/null +++ b/stdlib/regression/vm/test33.t @@ -0,0 +1,5 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test33.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test33.bc + {}.string: 0 + {}.stringcat: diff --git a/stdlib/regression/vm/test34.t b/stdlib/regression/vm/test34.t new file mode 100644 index 000000000..46746af93 --- /dev/null +++ b/stdlib/regression/vm/test34.t @@ -0,0 +1,5 @@ +This file was autogenerated. + $ ../../../src/Driver.exe -runtime ../../../runtime -I ../../../runtime -I ../../../stdlib/x64 -b ../test34.lama + $ ../../../virtual_machine/lama.exe -I ../../../stdlib/x64 test34.bc + ' " ` % \ + \h @ $ # ; [ ] diff --git a/virtual_machine/regression_check.sh b/virtual_machine/regression_check.sh deleted file mode 100755 index 4628bf402..000000000 --- a/virtual_machine/regression_check.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -# credit: ProgramSnail - -prefix="../regression/" -suffix=".lama" - -compiler=../_build/default/src/Driver.exe - -echo "Used compiler path:" -echo $compiler - -for test in ../regression/*.lama; do - echo $test - $compiler -b $test >/dev/null - test_path="${test%.*}" - test_file="${test_path##*/}" - echo $test_path: $test_file - cat $test_path.input | ./lama.exe $test_file.bc >test.log 2>&1 - sed -E '1d;s/^//' $test_path.t >test_orig.log - diff -w test.log test_orig.log - - rm $test_file.bc - rm test.log test_orig.log - echo "done" -done - -rm *.o