From e0796d5ffe098d59d3c5e2423033bc2af78f256a Mon Sep 17 00:00:00 2001 From: James Hanlon Date: Sun, 14 Jun 2026 12:36:15 +0100 Subject: [PATCH 01/10] docs: ignore Sphinx build and venv directories --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 83bf7c2..829c876 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,6 @@ a.out simout* simin* logs/ + +docs/_build/ +docs/_venv/ From 7519f9aa06051bd2bdee10e9ec8c291d18b8c2c7 Mon Sep 17 00:00:00 2001 From: James Hanlon Date: Sun, 14 Jun 2026 12:36:34 +0100 Subject: [PATCH 02/10] docs: configure todo extension, plain-text highlighting, numfig --- docs/conf.py | 12 +++++++++++- docs/requirements.txt | 4 ++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 17c27fd..0778865 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -31,8 +31,18 @@ # ones. extensions = [ "sphinx_rtd_theme", + "sphinx.ext.todo", ] +# X and Hex assembly have no Pygments lexer; default code blocks to plain text. +highlight_language = "text" + +# Number figures, tables, and code blocks for cross-referencing. +numfig = True + +# Render .. todo:: admonitions in the built docs. +todo_include_todos = True + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -42,7 +52,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] +exclude_patterns = ["_build", "_venv"] # -- Options for HTML output ------------------------------------------------- diff --git a/docs/requirements.txt b/docs/requirements.txt index 9364148..43a44e0 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,2 @@ -sphinx==4.4.0 -sphinx_rtd_theme==1.0.0 +sphinx>=8.0 +sphinx_rtd_theme>=2.0 From 9daa3f0bcf1ab378c1211f41806b41ec021f0800 Mon Sep 17 00:00:00 2001 From: James Hanlon Date: Sun, 14 Jun 2026 12:48:34 +0100 Subject: [PATCH 03/10] docs: add front matter and full toctree with page placeholders --- docs/architecture/channels.rst | 4 ++ docs/architecture/execution.rst | 4 ++ docs/architecture/instruction-encoding.rst | 4 ++ docs/architecture/instruction-set.rst | 4 ++ docs/architecture/overview.rst | 4 ++ docs/architecture/registers.rst | 4 ++ docs/architecture/simulator-model.rst | 4 ++ docs/architecture/syscalls.rst | 4 ++ docs/compiler/bootstrapping.rst | 4 ++ docs/compiler/codebuffer.rst | 4 ++ docs/compiler/codegen-idioms.rst | 4 ++ docs/compiler/lexical-analyser.rst | 4 ++ docs/compiler/memory-and-calling.rst | 4 ++ docs/compiler/networks.rst | 4 ++ docs/compiler/overview.rst | 4 ++ docs/compiler/syntax-analyser.rst | 4 ++ docs/compiler/translator.rst | 4 ++ docs/hardware/core.rst | 4 ++ docs/hardware/memory-and-links.rst | 4 ++ docs/hardware/network.rst | 4 ++ docs/hardware/overview.rst | 4 ++ docs/hardware/testbench.rst | 4 ++ docs/index.rst | 84 ++++++++++++++++++++++ docs/language/concurrency.rst | 4 ++ docs/language/examples.rst | 4 ++ docs/language/expressions.rst | 4 ++ docs/language/grammar.rst | 4 ++ docs/language/lexical.rst | 4 ++ docs/language/overview.rst | 4 ++ docs/language/procedures-functions.rst | 4 ++ docs/language/program-structure.rst | 4 ++ docs/language/statements.rst | 4 ++ docs/reference/further-reading.rst | 4 ++ docs/reference/instruction-quick-ref.rst | 4 ++ docs/reference/primary-sources.rst | 4 ++ docs/reference/syscall-reference.rst | 4 ++ docs/tools/building.rst | 4 ++ docs/tools/formats.rst | 4 ++ docs/tools/index.rst | 4 ++ docs/tools/testing.rst | 4 ++ 40 files changed, 240 insertions(+) create mode 100644 docs/architecture/channels.rst create mode 100644 docs/architecture/execution.rst create mode 100644 docs/architecture/instruction-encoding.rst create mode 100644 docs/architecture/instruction-set.rst create mode 100644 docs/architecture/overview.rst create mode 100644 docs/architecture/registers.rst create mode 100644 docs/architecture/simulator-model.rst create mode 100644 docs/architecture/syscalls.rst create mode 100644 docs/compiler/bootstrapping.rst create mode 100644 docs/compiler/codebuffer.rst create mode 100644 docs/compiler/codegen-idioms.rst create mode 100644 docs/compiler/lexical-analyser.rst create mode 100644 docs/compiler/memory-and-calling.rst create mode 100644 docs/compiler/networks.rst create mode 100644 docs/compiler/overview.rst create mode 100644 docs/compiler/syntax-analyser.rst create mode 100644 docs/compiler/translator.rst create mode 100644 docs/hardware/core.rst create mode 100644 docs/hardware/memory-and-links.rst create mode 100644 docs/hardware/network.rst create mode 100644 docs/hardware/overview.rst create mode 100644 docs/hardware/testbench.rst create mode 100644 docs/language/concurrency.rst create mode 100644 docs/language/examples.rst create mode 100644 docs/language/expressions.rst create mode 100644 docs/language/grammar.rst create mode 100644 docs/language/lexical.rst create mode 100644 docs/language/overview.rst create mode 100644 docs/language/procedures-functions.rst create mode 100644 docs/language/program-structure.rst create mode 100644 docs/language/statements.rst create mode 100644 docs/reference/further-reading.rst create mode 100644 docs/reference/instruction-quick-ref.rst create mode 100644 docs/reference/primary-sources.rst create mode 100644 docs/reference/syscall-reference.rst create mode 100644 docs/tools/building.rst create mode 100644 docs/tools/formats.rst create mode 100644 docs/tools/index.rst create mode 100644 docs/tools/testing.rst diff --git a/docs/architecture/channels.rst b/docs/architecture/channels.rst new file mode 100644 index 0000000..534d4b3 --- /dev/null +++ b/docs/architecture/channels.rst @@ -0,0 +1,4 @@ +Channels +======== + +.. todo:: Write this page (Stage 2). diff --git a/docs/architecture/execution.rst b/docs/architecture/execution.rst new file mode 100644 index 0000000..4aec83b --- /dev/null +++ b/docs/architecture/execution.rst @@ -0,0 +1,4 @@ +Instruction execution +===================== + +.. todo:: Write this page (Stage 2). diff --git a/docs/architecture/instruction-encoding.rst b/docs/architecture/instruction-encoding.rst new file mode 100644 index 0000000..d9941e0 --- /dev/null +++ b/docs/architecture/instruction-encoding.rst @@ -0,0 +1,4 @@ +Instruction encoding +==================== + +.. todo:: Write this page (Stage 2). diff --git a/docs/architecture/instruction-set.rst b/docs/architecture/instruction-set.rst new file mode 100644 index 0000000..75278b6 --- /dev/null +++ b/docs/architecture/instruction-set.rst @@ -0,0 +1,4 @@ +Instruction set +=============== + +.. todo:: Write this page (Stage 2). diff --git a/docs/architecture/overview.rst b/docs/architecture/overview.rst new file mode 100644 index 0000000..2a4ce60 --- /dev/null +++ b/docs/architecture/overview.rst @@ -0,0 +1,4 @@ +The Hex architecture +==================== + +.. todo:: Write this page (Stage 2). diff --git a/docs/architecture/registers.rst b/docs/architecture/registers.rst new file mode 100644 index 0000000..5d521a3 --- /dev/null +++ b/docs/architecture/registers.rst @@ -0,0 +1,4 @@ +Registers and machine state +=========================== + +.. todo:: Write this page (Stage 2). diff --git a/docs/architecture/simulator-model.rst b/docs/architecture/simulator-model.rst new file mode 100644 index 0000000..0cf1176 --- /dev/null +++ b/docs/architecture/simulator-model.rst @@ -0,0 +1,4 @@ +The reference simulator +======================== + +.. todo:: Write this page (Stage 2). diff --git a/docs/architecture/syscalls.rst b/docs/architecture/syscalls.rst new file mode 100644 index 0000000..d9fb198 --- /dev/null +++ b/docs/architecture/syscalls.rst @@ -0,0 +1,4 @@ +System calls +============ + +.. todo:: Write this page (Stage 2). diff --git a/docs/compiler/bootstrapping.rst b/docs/compiler/bootstrapping.rst new file mode 100644 index 0000000..c1788ea --- /dev/null +++ b/docs/compiler/bootstrapping.rst @@ -0,0 +1,4 @@ +Bootstrapping +============= + +.. todo:: Write this page (Stage 4). diff --git a/docs/compiler/codebuffer.rst b/docs/compiler/codebuffer.rst new file mode 100644 index 0000000..fee5ec1 --- /dev/null +++ b/docs/compiler/codebuffer.rst @@ -0,0 +1,4 @@ +Code buffer +=========== + +.. todo:: Write this page (Stage 4). diff --git a/docs/compiler/codegen-idioms.rst b/docs/compiler/codegen-idioms.rst new file mode 100644 index 0000000..83f2c6f --- /dev/null +++ b/docs/compiler/codegen-idioms.rst @@ -0,0 +1,4 @@ +Code-generation idioms +====================== + +.. todo:: Write this page (Stage 4). diff --git a/docs/compiler/lexical-analyser.rst b/docs/compiler/lexical-analyser.rst new file mode 100644 index 0000000..eed0ce6 --- /dev/null +++ b/docs/compiler/lexical-analyser.rst @@ -0,0 +1,4 @@ +Lexical analyser +================ + +.. todo:: Write this page (Stage 4). diff --git a/docs/compiler/memory-and-calling.rst b/docs/compiler/memory-and-calling.rst new file mode 100644 index 0000000..7812ac3 --- /dev/null +++ b/docs/compiler/memory-and-calling.rst @@ -0,0 +1,4 @@ +Memory layout and calling convention +===================================== + +.. todo:: Write this page (Stage 4). diff --git a/docs/compiler/networks.rst b/docs/compiler/networks.rst new file mode 100644 index 0000000..bca7f93 --- /dev/null +++ b/docs/compiler/networks.rst @@ -0,0 +1,4 @@ +Network containers +================== + +.. todo:: Write this page (Stage 4). diff --git a/docs/compiler/overview.rst b/docs/compiler/overview.rst new file mode 100644 index 0000000..9dc0d91 --- /dev/null +++ b/docs/compiler/overview.rst @@ -0,0 +1,4 @@ +The compiler +============ + +.. todo:: Write this page (Stage 4). diff --git a/docs/compiler/syntax-analyser.rst b/docs/compiler/syntax-analyser.rst new file mode 100644 index 0000000..edbf84b --- /dev/null +++ b/docs/compiler/syntax-analyser.rst @@ -0,0 +1,4 @@ +Syntax analyser +=============== + +.. todo:: Write this page (Stage 4). diff --git a/docs/compiler/translator.rst b/docs/compiler/translator.rst new file mode 100644 index 0000000..446930e --- /dev/null +++ b/docs/compiler/translator.rst @@ -0,0 +1,4 @@ +Translator +========== + +.. todo:: Write this page (Stage 4). diff --git a/docs/hardware/core.rst b/docs/hardware/core.rst new file mode 100644 index 0000000..b214f98 --- /dev/null +++ b/docs/hardware/core.rst @@ -0,0 +1,4 @@ +Processor core +============== + +.. todo:: Write this page (Stage 5). diff --git a/docs/hardware/memory-and-links.rst b/docs/hardware/memory-and-links.rst new file mode 100644 index 0000000..648ddb6 --- /dev/null +++ b/docs/hardware/memory-and-links.rst @@ -0,0 +1,4 @@ +Memory and links +================ + +.. todo:: Write this page (Stage 5). diff --git a/docs/hardware/network.rst b/docs/hardware/network.rst new file mode 100644 index 0000000..9fa6aaf --- /dev/null +++ b/docs/hardware/network.rst @@ -0,0 +1,4 @@ +The multi-core network +====================== + +.. todo:: Write this page (Stage 5). diff --git a/docs/hardware/overview.rst b/docs/hardware/overview.rst new file mode 100644 index 0000000..109b4b2 --- /dev/null +++ b/docs/hardware/overview.rst @@ -0,0 +1,4 @@ +The RTL implementation +====================== + +.. todo:: Write this page (Stage 5). diff --git a/docs/hardware/testbench.rst b/docs/hardware/testbench.rst new file mode 100644 index 0000000..e98eed3 --- /dev/null +++ b/docs/hardware/testbench.rst @@ -0,0 +1,4 @@ +Testbench +========= + +.. todo:: Write this page (Stage 5). diff --git a/docs/index.rst b/docs/index.rst index 73ea333..6e97227 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,10 +2,94 @@ Hex Processor ============= +These documents describe a complete, deliberately minimal computing stack built +from first principles — from logic gates up to a self-hosting programming +language. The project exists as a reference for understanding how programming +languages correspond to processor hardware: every design decision at each layer +is explained in terms of the layer below it, and nothing is hidden behind +abstraction that is not itself documented here. + +The layered stack +----------------- + +The documentation is organised bottom-up, following the dependencies between +layers. Each layer assumes only the one immediately beneath it: + +1. **Hex architecture** — the instruction set, registers, encoding, and + execution model of the Hex processor. See :doc:`architecture/overview`. +2. **X language** — a small, low-level systems language that compiles directly + to Hex instructions. See :doc:`language/overview`. +3. **The compiler** — a single-pass compiler for X, itself written in X + (self-hosting). See :doc:`compiler/overview`. +4. **The RTL** — a synthesisable Verilog implementation of the Hex processor. + See :doc:`hardware/overview`. +5. **The toolchain** — assembler, simulator, linker, and container tools. + See :doc:`tools/index`. + +Historical lineage +------------------ + +The Hex architecture descends from the Transputer family and the "Simple 42" +teaching processor. The X language stands in the tradition of BCPL (Martin +Richards) and Occam (David May), sharing their philosophy of a small, portable +language that maps cleanly onto hardware. The compiler's self-hosting +bootstrap follows the same lineage: an initial implementation in a host +language produces a native compiler that can rebuild itself from source. + +How to read these docs +---------------------- + +The documentation is written as a reference, not a tutorial, and is best read +bottom-up, starting with the architecture. Readers new to the project may +prefer to begin at :doc:`architecture/overview` before moving to the language +and compiler chapters. Working examples can be found in the ``examples/`` +directory of the repository; the repository's ``README`` contains a quickstart +guide for building and running programs. + .. toctree:: :maxdepth: 2 :caption: Contents: + architecture/overview + architecture/registers + architecture/instruction-encoding + architecture/instruction-set + architecture/execution + architecture/channels + architecture/syscalls + architecture/simulator-model + language/overview + language/lexical + language/program-structure + language/statements + language/procedures-functions + language/expressions + language/concurrency + language/examples + language/grammar + compiler/overview + compiler/lexical-analyser + compiler/syntax-analyser + compiler/translator + compiler/codebuffer + compiler/memory-and-calling + compiler/codegen-idioms + compiler/bootstrapping + compiler/networks + hardware/overview + hardware/core + hardware/memory-and-links + hardware/network + hardware/testbench + tools/index + tools/formats + tools/building + tools/testing + reference/instruction-quick-ref + reference/syscall-reference + reference/primary-sources + reference/further-reading + Indices and tables ================== diff --git a/docs/language/concurrency.rst b/docs/language/concurrency.rst new file mode 100644 index 0000000..6c4de60 --- /dev/null +++ b/docs/language/concurrency.rst @@ -0,0 +1,4 @@ +Concurrency +=========== + +.. todo:: Write this page (Stage 3). diff --git a/docs/language/examples.rst b/docs/language/examples.rst new file mode 100644 index 0000000..2527be8 --- /dev/null +++ b/docs/language/examples.rst @@ -0,0 +1,4 @@ +Example programs +================ + +.. todo:: Write this page (Stage 3). diff --git a/docs/language/expressions.rst b/docs/language/expressions.rst new file mode 100644 index 0000000..af86ebb --- /dev/null +++ b/docs/language/expressions.rst @@ -0,0 +1,4 @@ +Expressions +=========== + +.. todo:: Write this page (Stage 3). diff --git a/docs/language/grammar.rst b/docs/language/grammar.rst new file mode 100644 index 0000000..543946e --- /dev/null +++ b/docs/language/grammar.rst @@ -0,0 +1,4 @@ +Grammar +======= + +.. todo:: Write this page (Stage 3). diff --git a/docs/language/lexical.rst b/docs/language/lexical.rst new file mode 100644 index 0000000..b99cc90 --- /dev/null +++ b/docs/language/lexical.rst @@ -0,0 +1,4 @@ +Lexical structure +================= + +.. todo:: Write this page (Stage 3). diff --git a/docs/language/overview.rst b/docs/language/overview.rst new file mode 100644 index 0000000..e9cbf01 --- /dev/null +++ b/docs/language/overview.rst @@ -0,0 +1,4 @@ +The X language +============== + +.. todo:: Write this page (Stage 3). diff --git a/docs/language/procedures-functions.rst b/docs/language/procedures-functions.rst new file mode 100644 index 0000000..0b92378 --- /dev/null +++ b/docs/language/procedures-functions.rst @@ -0,0 +1,4 @@ +Procedures and functions +======================== + +.. todo:: Write this page (Stage 3). diff --git a/docs/language/program-structure.rst b/docs/language/program-structure.rst new file mode 100644 index 0000000..0cccebe --- /dev/null +++ b/docs/language/program-structure.rst @@ -0,0 +1,4 @@ +Program structure +================= + +.. todo:: Write this page (Stage 3). diff --git a/docs/language/statements.rst b/docs/language/statements.rst new file mode 100644 index 0000000..8addbe3 --- /dev/null +++ b/docs/language/statements.rst @@ -0,0 +1,4 @@ +Statements +========== + +.. todo:: Write this page (Stage 3). diff --git a/docs/reference/further-reading.rst b/docs/reference/further-reading.rst new file mode 100644 index 0000000..33d3744 --- /dev/null +++ b/docs/reference/further-reading.rst @@ -0,0 +1,4 @@ +Further reading +=============== + +.. todo:: Write this page (Stage 6). diff --git a/docs/reference/instruction-quick-ref.rst b/docs/reference/instruction-quick-ref.rst new file mode 100644 index 0000000..1251c0b --- /dev/null +++ b/docs/reference/instruction-quick-ref.rst @@ -0,0 +1,4 @@ +Instruction quick reference +=========================== + +.. todo:: Write this page (Stage 6). diff --git a/docs/reference/primary-sources.rst b/docs/reference/primary-sources.rst new file mode 100644 index 0000000..4b8d9c5 --- /dev/null +++ b/docs/reference/primary-sources.rst @@ -0,0 +1,4 @@ +Primary sources +=============== + +.. todo:: Write this page (Stage 6). diff --git a/docs/reference/syscall-reference.rst b/docs/reference/syscall-reference.rst new file mode 100644 index 0000000..37a6822 --- /dev/null +++ b/docs/reference/syscall-reference.rst @@ -0,0 +1,4 @@ +System-call reference +===================== + +.. todo:: Write this page (Stage 6). diff --git a/docs/tools/building.rst b/docs/tools/building.rst new file mode 100644 index 0000000..d1c7e09 --- /dev/null +++ b/docs/tools/building.rst @@ -0,0 +1,4 @@ +Building +======== + +.. todo:: Write this page (Stage 6). diff --git a/docs/tools/formats.rst b/docs/tools/formats.rst new file mode 100644 index 0000000..1632f85 --- /dev/null +++ b/docs/tools/formats.rst @@ -0,0 +1,4 @@ +Binary and container formats +============================ + +.. todo:: Write this page (Stage 6). diff --git a/docs/tools/index.rst b/docs/tools/index.rst new file mode 100644 index 0000000..2d3e98a --- /dev/null +++ b/docs/tools/index.rst @@ -0,0 +1,4 @@ +The toolchain +============= + +.. todo:: Write this page (Stage 6). diff --git a/docs/tools/testing.rst b/docs/tools/testing.rst new file mode 100644 index 0000000..ccce014 --- /dev/null +++ b/docs/tools/testing.rst @@ -0,0 +1,4 @@ +Testing +======= + +.. todo:: Write this page (Stage 6). From afd316c408eb5bf5716895b23a39da082fb854af Mon Sep 17 00:00:00 2001 From: James Hanlon Date: Sun, 14 Jun 2026 14:35:49 +0100 Subject: [PATCH 04/10] docs: write the architecture reference pages --- docs/architecture/channels.rst | 73 +++++++- docs/architecture/execution.rst | 51 +++++- docs/architecture/instruction-encoding.rst | 88 +++++++++- docs/architecture/instruction-set.rst | 191 ++++++++++++++++++++- docs/architecture/overview.rst | 67 +++++++- docs/architecture/registers.rst | 71 +++++++- docs/architecture/simulator-model.rst | 96 ++++++++++- docs/architecture/syscalls.rst | 68 +++++++- 8 files changed, 697 insertions(+), 8 deletions(-) diff --git a/docs/architecture/channels.rst b/docs/architecture/channels.rst index 534d4b3..d1ad59a 100644 --- a/docs/architecture/channels.rst +++ b/docs/architecture/channels.rst @@ -1,4 +1,75 @@ Channels ======== -.. todo:: Write this page (Stage 2). +Hex processors communicate with one another through *channels*: point-to-point +links carrying single-word messages. This is the architecture's only mechanism +for inter-processor communication — there is no shared memory between +processors. The model follows the discipline of the Transputer and occam, +deliberately kept as simple as possible: one process runs on one physical +processor, with no scheduler and no buffering. + +The ``IN`` and ``OUT`` operations +--------------------------------- + +Channel communication uses two ``OPR`` sub-operations, +``IN`` (operand ``0x4``) and ``OUT`` (operand ``0x5``). Both name a channel and a +data word through the registers, symmetrically with how ``ADD`` and ``SUB`` use +``areg`` and ``breg``: + +* ``breg`` selects one of the processor's **link slots** — the channel to use. +* ``areg`` carries the data word: it is the value *received* by ``IN`` and the + value *sent* by ``OUT``. + +So ``OPR OUT`` writes the word in ``areg`` to the channel on slot ``breg``, and +``OPR IN`` reads a word from the channel on slot ``breg`` into ``areg``. + +Blocking rendezvous +------------------- + +A channel transfer is a **synchronous, unbuffered rendezvous**. The first party +to arrive at the channel blocks until its partner is also ready. When both are +present, exactly one word is copied from the writer to the reader and both +processors continue. There is no queue and no buffering: the communication +itself is the point of synchronisation between the two processors. + +While a processor is blocked on a channel operation, its program counter is not +advanced — the operation simply has not completed yet. When the partner arrives +and the rendezvous occurs, the blocked processor resumes with the transferred +word in ``areg`` and steps past the instruction. No "blocked" state is left in +the architectural registers. + +Link slots and wiring +--------------------- + +Each processor has a fixed, small number of channel link slots — four. A +processor can therefore be wired to at most four channels at once, which matches +the hardware link budget. The wiring (which slot on one processor connects to +which slot on another) is fixed when a network is built; see +:doc:`../compiler/networks`. + +Operating on a slot that is not wired to a channel — a slot index outside the +valid range, or a valid slot with no channel attached — is a runtime error. The +simulator reports it, naming the offending processor and program counter, as a +backstop for cases that cannot be ruled out statically. + +If every processor in a network becomes blocked on a channel with no partner +able to proceed, the system is deadlocked. The simulator detects this and +reports it, listing the channel slot each processor is blocked on. + +Mapping to the X language and to networks +----------------------------------------- + +In the :doc:`X language <../language/overview>`, channels appear as the ``chan`` +type and the statement-level operators ``!`` (send) and ``?`` (receive). The +compiler lowers ``c ! e`` to evaluating ``e`` into ``areg``, loading the slot +for ``c`` into ``breg``, and emitting ``OPR OUT``; ``c ? v`` loads the slot into +``breg``, emits ``OPR IN``, and stores ``areg`` into ``v``. The language-level +view of concurrency and message passing is described in +:doc:`../language/concurrency`. + +A ``chan`` value is represented at runtime simply as the integer index of a link +slot on the processor running that code, which is why the same procedure can be +reused on several processors with different wirings. When ``main`` is a top-level +``par``, the compiler emits a *network container* of one image per processor plus +the slot wiring — see :doc:`../compiler/networks` for how the network is built +and :doc:`../hardware/network` for how it is realised in hardware. diff --git a/docs/architecture/execution.rst b/docs/architecture/execution.rst index 4aec83b..db80b09 100644 --- a/docs/architecture/execution.rst +++ b/docs/architecture/execution.rst @@ -1,4 +1,53 @@ Instruction execution ===================== -.. todo:: Write this page (Stage 2). +The execution cycle +------------------- + +Hex executes instructions in a simple three-stage cycle: + +#. **Fetch.** Read the instruction byte addressed by ``pc``. Because memory is + word-addressed but instructions are bytes, the word is selected by the high + bits of ``pc`` and the byte within it by the low bits. +#. **Increment.** Advance ``pc`` to the next instruction byte. +#. **Execute.** Fold the instruction's low nibble into ``oreg`` + (``oreg = oreg | (inst & 0xf)``), then perform the operation selected by the + high nibble. Most operations clear ``oreg`` to ``0`` afterwards; the ``PFIX`` + and ``NFIX`` prefixes instead leave a shifted operand for the next cycle. + +The branch instructions complete by overwriting ``pc`` rather than letting the +increment stand, and the channel operations ``IN``/``OUT`` may suspend the cycle +mid-execution until their partner is ready (see :doc:`channels`). + +The datapath +------------ + +The same cycle is realised in hardware by a small datapath. Its components are: + +* **A multiplexor** — selects the left arithmetic input from ``areg``, ``pc``, + ``oreg``, or zero, depending on the instruction. +* **B multiplexor** — selects the right arithmetic input from ``breg``, + ``oreg``, or zero. +* **Arithmetic unit** — adds or subtracts its two inputs to produce an address + or a result. +* **Memory** — addressed by the arithmetic unit's output; its write data comes + from ``areg``. +* **Result multiplexor** — selects what is written back to the registers, either + the memory read data or the arithmetic unit's output. +* **Instruction register, decoder and control matrix** — hold the fetched + instruction byte and derive the multiplexor selects, the arithmetic operation, + and the memory and register write enables for the current instruction. +* **Clock and timing generator** — sequences the fetch, increment, and execute + steps. + +.. todo:: Add a datapath block diagram. + +Relationship to the implementations +----------------------------------- + +This cycle and datapath are mirrored in two places. The C reference +interpreter in :doc:`simulator-model` implements the same fetch-decode-execute +loop in software, and the SystemVerilog core in :doc:`../hardware/core` +realises the datapath above as real hardware. The two are kept behaviourally +identical, so a program produces the same results whether it runs on the +simulator or on the RTL. diff --git a/docs/architecture/instruction-encoding.rst b/docs/architecture/instruction-encoding.rst index d9941e0..e0bed76 100644 --- a/docs/architecture/instruction-encoding.rst +++ b/docs/architecture/instruction-encoding.rst @@ -1,4 +1,90 @@ Instruction encoding ==================== -.. todo:: Write this page (Stage 2). +Every Hex instruction is a single byte. The high nibble selects the operation +and the low nibble carries a 4-bit immediate operand: + +.. code-block:: text + + bit: 7 6 5 4 3 2 1 0 + +-------+-------+ + | oper | imm | + +-------+-------+ + +The immediate field can directly express the values 0–15. Anything outside that +range — larger offsets, larger constants, and all negative values — is built up +using the prefix instructions described below. + +Operand accumulation +-------------------- + +Operands are assembled in the operand register ``oreg``. Before executing, every +instruction folds its own immediate into ``oreg``:: + + oreg = oreg | (inst & 0xf) + +It then uses ``oreg`` as its operand. Most instructions clear ``oreg`` back to +``0`` afterwards, so that an instruction with no preceding prefixes simply sees +its own 4-bit immediate and the next instruction starts clean. + +The prefix instructions ``PFIX`` and ``NFIX`` are the exception: rather than +clearing ``oreg``, they shift the accumulated nibbles up to make room for the +next instruction's nibble. + +``PFIX`` — positive prefix +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``PFIX`` shifts ``oreg`` left by four bits and continues to the next +instruction:: + + oreg = oreg << 4 + +This concatenates the prefix's nibble with whatever nibble the following +instruction contributes, building up a positive value four bits at a time. + +``NFIX`` — negative prefix +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``NFIX`` shifts left by four bits as well, but also fills the high bits with +ones:: + + oreg = 0xFFFFFF00 | (oreg << 4) + +This sign-extends the operand, and is used to build negative or large-magnitude +immediates. + +Worked example: loading a constant larger than 15 +------------------------------------------------- + +Suppose we want to load the constant ``0x2A`` (decimal 42) into ``areg``. It does +not fit in a single nibble, so the assembler emits one ``PFIX`` followed by the +``LDAC``: + +.. list-table:: + :header-rows: 1 + :widths: 16 20 64 + + * - Byte + - Mnemonic + - Effect on ``oreg`` + * - ``0xE2`` + - ``PFIX 2`` + - ``oreg = oreg | 0x2`` → ``0x2``; then ``oreg = 0x2 << 4`` → ``0x20`` + * - ``0x3A`` + - ``LDAC 10`` + - ``oreg = 0x20 | 0xA`` → ``0x2A``; then ``areg = oreg`` → ``0x2A`` + +The first nibble (``2``) is shifted up by ``PFIX`` to occupy bits 7–4, and the +second nibble (``A``) of ``LDAC`` fills bits 3–0, giving ``0x2A``. Larger +constants chain more ``PFIX`` bytes, four bits per prefix; a negative constant +begins the chain with an ``NFIX`` so the high bits are filled with ones. + +Prefixes are inserted automatically +----------------------------------- + +Programmers and code generators do not emit prefixes by hand. The assembler and +compiler compute the minimal prefix chain needed for each operand. Because a +branch distance depends on the size of the instructions between the branch and +its target — which in turn depends on how many prefixes those instructions need — +the encoding is resolved iteratively. This minimal-prefix encoding is described +in :doc:`../compiler/codebuffer`. diff --git a/docs/architecture/instruction-set.rst b/docs/architecture/instruction-set.rst index 75278b6..b3a8765 100644 --- a/docs/architecture/instruction-set.rst +++ b/docs/architecture/instruction-set.rst @@ -1,4 +1,193 @@ Instruction set =============== -.. todo:: Write this page (Stage 2). +The instruction set is organised into a handful of groups. Each instruction is +a single byte whose high nibble is the opcode listed below and whose low nibble +contributes to the operand ``oreg`` (see :doc:`instruction-encoding`). In the +effect column, ``oreg`` denotes the fully assembled operand and ``mem[w]`` the +word at word-address ``w``. + +Data access +----------- + +.. list-table:: + :header-rows: 1 + :widths: 12 12 28 48 + + * - Mnemonic + - Opcode + - Effect + - Description + * - ``LDAM`` + - ``0x0`` + - ``areg = mem[oreg]`` + - Load ``areg`` from the memory word at address ``oreg``. + * - ``LDBM`` + - ``0x1`` + - ``breg = mem[oreg]`` + - Load ``breg`` from the memory word at address ``oreg``. + * - ``STAM`` + - ``0x2`` + - ``mem[oreg] = areg`` + - Store ``areg`` to the memory word at address ``oreg``. + +Constants and program addresses +------------------------------- + +.. list-table:: + :header-rows: 1 + :widths: 12 12 28 48 + + * - Mnemonic + - Opcode + - Effect + - Description + * - ``LDAC`` + - ``0x3`` + - ``areg = oreg`` + - Load ``areg`` with the constant operand. + * - ``LDBC`` + - ``0x4`` + - ``breg = oreg`` + - Load ``breg`` with the constant operand. + * - ``LDAP`` + - ``0x5`` + - ``areg = pc + oreg`` + - Load ``areg`` with a PC-relative address (the operand added to the + current ``pc``). + +Indexed data structures +------------------------ + +.. list-table:: + :header-rows: 1 + :widths: 12 12 32 44 + + * - Mnemonic + - Opcode + - Effect + - Description + * - ``LDAI`` + - ``0x6`` + - ``areg = mem[areg + oreg]`` + - Load ``areg`` from memory indexed by ``areg`` plus the operand. + * - ``LDBI`` + - ``0x7`` + - ``breg = mem[breg + oreg]`` + - Load ``breg`` from memory indexed by ``breg`` plus the operand. + * - ``STAI`` + - ``0x8`` + - ``mem[breg + oreg] = areg`` + - Store ``areg`` to memory indexed by ``breg`` plus the operand. + +These indexed forms make array and structure access cheap: the base address sits +in a register and the operand supplies the field or element offset. + +Branch, jump and call +--------------------- + +.. list-table:: + :header-rows: 1 + :widths: 12 16 28 44 + + * - Mnemonic + - Opcode + - Effect + - Description + * - ``BR`` + - ``0x9`` + - ``pc = pc + oreg`` + - Unconditional PC-relative branch. + * - ``BRZ`` + - ``0xA`` + - ``if areg == 0: pc = pc + oreg`` + - Branch if ``areg`` is zero. + * - ``BRN`` + - ``0xB`` + - ``if areg < 0: pc = pc + oreg`` + - Branch if ``areg`` is negative (signed). + * - ``BRB`` + - ``OPR 0x0`` + - ``pc = breg`` + - Branch to the absolute address held in ``breg`` (an ``OPR`` sub-op, see + below). + +Procedure calls are built from these primitives rather than from a dedicated +call instruction. The caller uses ``LDAP`` to compute the return address (the +address just past the call) into ``areg`` and a ``BR`` to jump to the callee's +entry point. The callee stores the return address it was handed, runs its body, +loads that return address back into ``breg``, and returns with ``BRB``, which +jumps to the absolute address in ``breg``. The full stack frame layout and +register usage are described in :doc:`../compiler/memory-and-calling`. + +Expression operations (``OPR``) +------------------------------- + +The ``OPR`` opcode (``0xD``) does not take an immediate in the usual sense: +instead its assembled operand selects an inter-register operation. The operand +values are: + +.. list-table:: + :header-rows: 1 + :widths: 12 14 30 44 + + * - Sub-op + - Operand + - Effect + - Description + * - ``BRB`` + - ``0x0`` + - ``pc = breg`` + - Branch to the address in ``breg`` (used for returns and computed jumps). + * - ``ADD`` + - ``0x1`` + - ``areg = areg + breg`` + - Add ``breg`` to ``areg``. + * - ``SUB`` + - ``0x2`` + - ``areg = areg - breg`` + - Subtract ``breg`` from ``areg``. + * - ``SVC`` + - ``0x3`` + - supervisor call + - Perform a system call selected by ``areg`` (see :doc:`syscalls`). + * - ``IN`` + - ``0x4`` + - ``areg = `` channel ``breg`` + - Receive a word from the channel on link slot ``breg`` (blocking). + * - ``OUT`` + - ``0x5`` + - channel ``breg`` ``= areg`` + - Send the word in ``areg`` over the channel on link slot ``breg`` + (blocking). + +The channel operations ``IN`` and ``OUT`` perform a synchronous rendezvous and +are described in detail in :doc:`channels`. + +Prefixes +-------- + +.. list-table:: + :header-rows: 1 + :widths: 12 12 36 40 + + * - Mnemonic + - Opcode + - Effect + - Description + * - ``PFIX`` + - ``0xE`` + - ``oreg = oreg << 4`` + - Positive prefix: shift the accumulated operand up by a nibble. + * - ``NFIX`` + - ``0xF`` + - ``oreg = 0xFFFFFF00 | (oreg << 4)`` + - Negative prefix: shift up and sign-extend, for negative or large operands. + +The prefix arithmetic and a worked example are given in +:doc:`instruction-encoding`. + +.. note:: + + For a compact one-page summary of every opcode and ``OPR`` sub-op, see + :doc:`../reference/instruction-quick-ref`. diff --git a/docs/architecture/overview.rst b/docs/architecture/overview.rst index 2a4ce60..8d1a833 100644 --- a/docs/architecture/overview.rst +++ b/docs/architecture/overview.rst @@ -1,4 +1,69 @@ The Hex architecture ==================== -.. todo:: Write this page (Stage 2). +Purpose +------- + +Hex is a deliberately minimal processor designed to explain how a computer +works. It is small enough to implement in hardware, yet flexible enough to +execute substantial programs — including a self-hosting compiler for the +:doc:`X language <../language/overview>`. The architecture has four 32-bit +registers and a single-byte instruction format, so the whole datapath and +instruction decoder can be understood in their entirety, and the same model +serves both as a pedagogical example and as a working target for real tools. + +Design principles +----------------- + +The architecture follows a few simple rules that keep both the instruction +encoding and the hardware small: + +* **Short instructions** give efficient access to the stack and data regions. + The common operations — loading constants, loading and storing locals, + branching — each fit in a single byte. +* **Word-addressed memory, single-byte instructions.** Data is addressed a word + at a time, but instructions are one byte each. An instruction address is + therefore a byte position *within* a word: the low bits of the program counter + select a byte inside the word the high bits address. +* **Word-length independence.** Because instruction addresses are byte positions + within a word, the same instruction set works for any word length that is a + whole number of bytes. +* **Few registers, some special-purpose.** There are only four registers, and + most have a fixed role (see :doc:`registers`), which keeps the encoding dense + and the decode logic trivial. +* **Easy-to-decode instructions.** Every instruction has the same fixed shape, so + decoding is a matter of splitting one byte into two nibbles. + +The instruction format in brief +------------------------------- + +Every instruction is exactly 8 bits wide: a 4-bit operation in the high nibble +and a 4-bit immediate in the low nibble. The immediate alone covers the values +0–15, which is enough for the great majority of constants, offsets, and branch +distances in real code. + +Two mechanisms extend this compact format: + +* ``OPR`` reinterprets its 4-bit operand not as an immediate but as a selector + for an inter-register operation (addition, subtraction, branch-to-register, + supervisor calls, and channel transfers). +* ``PFIX`` and ``NFIX`` are *prefix* instructions that shift accumulated nibbles + into a wider operand, so any immediate — however large, positive or negative — + can be built from a short chain of bytes. + +The full encoding, including the prefix arithmetic, is described in +:doc:`instruction-encoding`. + +Lineage +------- + +Hex is descended from the Transputer and from the *Simple 42* teaching +processor, inheriting their stack-oriented, prefix-extended instruction style +and their channel-based communication model. + +See also +-------- + +* :doc:`registers` — the machine state and the role of each register. +* :doc:`instruction-set` — the full instruction set, grouped by function. +* :doc:`execution` — the fetch–increment–execute cycle and the datapath. diff --git a/docs/architecture/registers.rst b/docs/architecture/registers.rst index 5d521a3..6dd4b45 100644 --- a/docs/architecture/registers.rst +++ b/docs/architecture/registers.rst @@ -1,4 +1,73 @@ Registers and machine state =========================== -.. todo:: Write this page (Stage 2). +The Hex machine state is intentionally tiny: four 32-bit registers and a flat, +word-addressed memory. There are no condition flags and no general-purpose +register file — every register has a defined role. + +The four registers +------------------- + +.. list-table:: + :header-rows: 1 + :widths: 12 28 60 + + * - Register + - Name + - Role + * - ``pc`` + - Program counter + - Byte address of the next instruction to fetch. + * - ``oreg`` + - Operand register + - Accumulates the immediate operand across ``PFIX``/``NFIX`` prefix chains; + supplies the operand to the instruction being executed. + * - ``areg`` + - A register + - The primary evaluation register: the left operand of an arithmetic + operation and the place its result is left. + * - ``breg`` + - B register + - The secondary evaluation register: the right operand of an arithmetic + operation. + +Why ``oreg`` exists +------------------- + +Because an instruction carries only a 4-bit immediate, larger operands have to +be assembled a nibble at a time. ``oreg`` is the register that holds the +operand under construction. Each instruction first merges its own 4-bit +immediate into ``oreg`` with ``oreg = oreg | (inst & 0xf)``, and then uses +``oreg`` as its operand. Most instructions clear ``oreg`` back to ``0`` once +they have used it, so that the next instruction starts a fresh operand. + +The ``PFIX`` and ``NFIX`` prefixes are the exception: instead of clearing +``oreg``, they shift it left by four bits, leaving it primed for the next +instruction's nibble. A short run of prefixes therefore builds an arbitrarily +wide immediate before a single operative instruction consumes it. The exact +arithmetic is given in :doc:`instruction-encoding`, and the clear-after-use +behaviour is visible in the execution cycle in :doc:`execution`. + +The two evaluation registers +---------------------------- + +``areg`` and ``breg`` are the machine's two evaluation registers. Having two +distinct load targets — the ``LDAx`` family loads ``areg`` and the ``LDBx`` +family loads ``breg`` — lets the compiler set up *both* operands of a binary +operation directly, without first pushing one to the stack. For example, the +two operands of an addition are loaded into ``areg`` and ``breg`` and then +``OPR ADD`` combines them in place. This keeps the common case of a two-operand +expression down to a handful of single-byte instructions. + +Memory +------ + +Memory is word-addressed: each address selects one 32-bit word. Two words at the +base of memory have a fixed meaning: + +* **Word 0** holds a branch to the program's entry point, so that execution + beginning at address 0 jumps straight into the program. +* **Word 1** holds the stack pointer. + +The layout of the stack and the calling convention that uses word 1 are +described in :doc:`../compiler/memory-and-calling`. diff --git a/docs/architecture/simulator-model.rst b/docs/architecture/simulator-model.rst index 0cf1176..dbb96b8 100644 --- a/docs/architecture/simulator-model.rst +++ b/docs/architecture/simulator-model.rst @@ -1,4 +1,98 @@ The reference simulator ======================== -.. todo:: Write this page (Stage 2). +The clearest way to understand the Hex architecture is to read a complete +interpreter for it. The whole machine — fetch, decode, execute — fits in a short +loop over an array of memory and four register variables. This page presents +that loop as a pedagogical model and then relates it to the production C++ +simulator, ``hexsim``. + +State +----- + +The model needs only the memory array and the registers: + +.. code-block:: c + + uint32_t mem[MEM_SIZE]; /* word-addressed memory */ + uint32_t pc; /* program counter (byte address) */ + uint32_t areg, breg; /* the two evaluation registers */ + uint32_t oreg; /* the operand register */ + +The fetch–decode–execute loop +----------------------------- + +Each iteration fetches the instruction byte at ``pc``, advances ``pc``, folds the +low nibble into ``oreg``, and switches on the high nibble. Note how most cases +end by clearing ``oreg`` to zero, while ``PFIX``/``NFIX`` instead shift it: + +.. code-block:: c + + for (;;) { + /* Fetch one byte from the word-addressed memory, then advance pc. */ + uint8_t inst = (mem[pc >> 2] >> ((pc & 3) << 3)) & 0xFF; + pc = pc + 1; + oreg = oreg | (inst & 0xF); /* accumulate the operand nibble */ + + switch (inst >> 4) { /* decode on the high nibble */ + case LDAM: areg = mem[oreg]; oreg = 0; break; + case LDBM: breg = mem[oreg]; oreg = 0; break; + case STAM: mem[oreg] = areg; oreg = 0; break; + case LDAC: areg = oreg; oreg = 0; break; + case LDBC: breg = oreg; oreg = 0; break; + case LDAP: areg = pc + oreg; oreg = 0; break; + case LDAI: areg = mem[areg + oreg]; oreg = 0; break; + case LDBI: breg = mem[breg + oreg]; oreg = 0; break; + case STAI: mem[breg + oreg] = areg; oreg = 0; break; + case BR: pc = pc + oreg; oreg = 0; break; + case BRZ: if (areg == 0) pc = pc + oreg; oreg = 0; break; + case BRN: if ((int)areg < 0) pc = pc + oreg; oreg = 0; break; + case PFIX: oreg = oreg << 4; break; + case NFIX: oreg = 0xFFFFFF00 | (oreg << 4); break; + case OPR: + switch (oreg) { + case BRB: pc = breg; oreg = 0; break; + case ADD: areg = areg + breg; oreg = 0; break; + case SUB: areg = areg - breg; oreg = 0; break; + case SVC: syscall(); oreg = 0; break; + } + break; + } + } + +A few points are worth drawing out: + +* **Fetch.** ``mem[pc >> 2]`` selects the word and ``(pc & 3) << 3`` is the bit + offset of the byte within it, so the byte extraction reflects the word-addressed + memory with byte-addressed instructions described in :doc:`overview`. +* **Operand accumulation.** Every instruction folds its nibble into ``oreg`` + before doing anything else. ``PFIX`` and ``NFIX`` leave ``oreg`` shifted rather + than cleared, so the next instruction's nibble extends it — this is the prefix + mechanism of :doc:`instruction-encoding`. +* **``OPR``** does not use ``oreg`` as a value but as a sub-op selector, branching + to add, subtract, branch-to-``breg``, or a supervisor call. + +Helper functions +---------------- + +The loop above defers a few details to small helpers: + +* **A load helper** reads the program image into ``mem`` at reset, including the + word-0 branch to the entry point and the word-1 stack pointer. +* **``syscall``** reads the stack pointer from ``mem[1]`` and dispatches on + ``areg``: exit, write a byte, or read a byte (see :doc:`syscalls`). +* **``simout`` / ``simin``** back the write and read calls, sending a byte to a + stream or fetching one from it. + +The channel operations ``IN``/``OUT`` are omitted from this minimal loop because +they require more than one processor; they are covered in :doc:`channels`. + +Relationship to ``hexsim`` +-------------------------- + +The production simulator ``hexsim`` is this same loop, made into a ``Processor`` +class with its registers and ``memory`` array as members and the ``switch`` as a +``step()`` method. On top of the core model it adds instruction **tracing**, +**multi-core networks** of processors connected by channels, and **deadlock +detection** when every processor is blocked on a channel. See +:doc:`../tools/index` for how to drive it. diff --git a/docs/architecture/syscalls.rst b/docs/architecture/syscalls.rst index d9fb198..c054413 100644 --- a/docs/architecture/syscalls.rst +++ b/docs/architecture/syscalls.rst @@ -1,4 +1,70 @@ System calls ============ -.. todo:: Write this page (Stage 2). +A program running on Hex reaches the outside world — input, output, and +termination — through *system calls*. These are not separate instructions but a +single ``OPR`` sub-operation, ``SVC`` (operand ``0x3``), that dispatches on a +small set of call numbers. + +The ``SVC`` mechanism +--------------------- + +Executing ``OPR SVC`` performs a system call. The call number is taken from +``areg``, and any further arguments are read from, and results written back to, +the running program's stack. + +The stack pointer lives in word 1 of memory (see :doc:`registers`). On an +``SVC``, the machine reads ``sp = mem[1]`` and then locates the call's arguments +and result relative to ``sp``. The defined calls are: + +.. list-table:: + :header-rows: 1 + :widths: 14 8 78 + + * - Call + - ``areg`` + - Effect + * - ``EXIT`` + - ``0`` + - Halt the processor. The exit code is taken from ``mem[sp + 2]``. + * - ``WRITE`` + - ``1`` + - Write one byte to a stream. The byte value is ``mem[sp + 2]`` and the + stream id is ``mem[sp + 3]``. + * - ``READ`` + - ``2`` + - Read one byte from the stream given by ``mem[sp + 2]`` and store it into + ``mem[sp + 1]``. + +Any other value in ``areg`` is an invalid system call and raises a runtime error. + +By convention, byte input is truncated to its low 8 bits when stored, matching +the behaviour of the X compiler and the hardware; the simulator can optionally +sign-extend instead, which is useful when testing with negative values. + +Streams +------- + +The ``WRITE`` and ``READ`` calls take a stream id that selects where the byte +goes or comes from: + +* Stream ids below 256 use the default console streams — standard output for + ``WRITE`` and standard input for ``READ``. This is the usual case: stream + ``0`` is the console. +* Stream ids of 256 and above are *file-backed*. The file index is taken from + bits [10:8] of the id, selecting one of eight files named ``simout``\ *N* for + output and ``simin``\ *N* for input, opened on first use. + +Exit codes +---------- + +When a processor performs an ``EXIT`` call it halts and reports the exit code +read from its stack. In a multi-processor network the system's exit code is that +of the first processor to exit. The runner and simulator front-ends surface this +exit code to the caller; see :doc:`../tools/index`. + +See also +-------- + +For the full numeric reference of each call's number, arguments, and stack +layout, see :doc:`../reference/syscall-reference`. From 4d0cfe9da1fccdffefcd0ff46bf5b2230e3780af Mon Sep 17 00:00:00 2001 From: James Hanlon Date: Sun, 14 Jun 2026 15:03:46 +0100 Subject: [PATCH 05/10] docs: write the X language reference pages --- docs/language/concurrency.rst | 88 ++++++++++++++++++- docs/language/examples.rst | 111 +++++++++++++++++++++++- docs/language/expressions.rst | 82 +++++++++++++++++- docs/language/grammar.rst | 105 ++++++++++++++++++++++- docs/language/lexical.rst | 100 +++++++++++++++++++++- docs/language/overview.rst | 69 ++++++++++++++- docs/language/procedures-functions.rst | 112 ++++++++++++++++++++++++- docs/language/program-structure.rst | 57 ++++++++++++- docs/language/statements.rst | 101 +++++++++++++++++++++- 9 files changed, 816 insertions(+), 9 deletions(-) diff --git a/docs/language/concurrency.rst b/docs/language/concurrency.rst index 6c4de60..b0d63cc 100644 --- a/docs/language/concurrency.rst +++ b/docs/language/concurrency.rst @@ -1,4 +1,90 @@ Concurrency =========== -.. todo:: Write this page (Stage 3). +X expresses concurrency in the style of the Transputer and occam, kept as simple +as possible: **one process maps to one physical processor**. There is no +scheduler and no time-slicing — parallelism comes from *placing* sequential +processes on real Hex cores and wiring them together with channels. Parallel +processes share no memory; they communicate only by passing messages. + +This page covers the language constructs (``chan``, ``par`` and the ``!`` / ``?`` +operators). The underlying machine mechanism is described in +:doc:`../architecture/channels`, and the multi-core hardware in +:doc:`../hardware/network`. + +Channels +-------- + +A *channel* is a point-to-point link that carries one word per message. A +channel is declared with ``chan`` (at global scope, or as a local declaration of +a procedure):: + + chan a; + chan b; + +and passed into a process as a ``chan`` formal:: + + proc relay(chan in, chan out) is ... + +A channel connects exactly two processes: one that sends on it and one that +receives. There is no shared memory between parallel processes, so channels are +the *only* way they exchange data. + +Send and receive +----------------- + +Two statements operate on channels: + +* **Send**, ``channel ! expression``, evaluates the expression and sends the + word over the channel. +* **Receive**, ``channel ? element``, receives a word and stores it in the + variable or array element. + +Communication is a synchronous, unbuffered **rendezvous**: the first party to +reach the channel blocks until its partner is ready, the word is exchanged, and +both continue. So a relay that forwards one value is simply:: + + proc relay(chan in, chan out) is + var v; + { in ? v; out ! v } + +``par`` +------- + +A ``par`` block runs its branches **in parallel, each on a separate processor**, +connected by the channels passed to them:: + + par { source(a); relay(a, b); sink(b) } + +Each branch must be a procedure call — it is the entry process for one +processor. The branches are written inside braces and separated by semicolons, +like a sequence, but they execute concurrently rather than in order. The +channels named in the calls (here ``a`` and ``b``) are what wire the processors +together: ``source`` and ``relay`` share channel ``a``, ``relay`` and ``sink`` +share ``b``. The same procedure may be placed on more than one processor with +different channel arguments — ``ring.x`` reuses one ``forwarder`` on two cores. + +``par`` is static and has no join: it is the program's top-level structure, each +branch runs to its own termination, and the machine halts when all branches +have halted. There is no code "after" a ``par``. + +When ``main`` ends in a top-level ``par``, the compiler does not emit a single +processor image. Instead it emits a **network container**: one image per core, +plus a table describing how the cores' channel slots are wired together. See +:doc:`../compiler/networks` for how this is built, and +:doc:`../architecture/channels` for the channel mechanism. + +A complete example +------------------ + +The three-stage pipeline ``source -> relay -> sink`` runs on three processors +connected by two channels. The source emits a character, the relay forwards it, +and the sink prints it: + +.. literalinclude:: ../../examples/pipe.x + :language: text + +Other concurrent example programs — ``pingpong.x`` (two cores exchanging a +value), ``ring.x`` (a token ring), ``buffer.x`` (streaming with a sentinel), +``sieve.x`` (a concurrent prime sieve) and more — are surveyed on the +:doc:`examples` page. diff --git a/docs/language/examples.rst b/docs/language/examples.rst index 2527be8..2e998c1 100644 --- a/docs/language/examples.rst +++ b/docs/language/examples.rst @@ -1,4 +1,113 @@ Example programs ================ -.. todo:: Write this page (Stage 3). +The ``examples/`` directory holds runnable X programs. They range from one-line +programs up to ``xhexb.x``, a complete X compiler written in X. This page tours +them; see :doc:`../tools/index` for how to compile and run a program (``xcmp``, +``hexsim``, ``xrun``). + +Sequential programs +------------------- + +These run on a single processor. + +.. list-table:: + :header-rows: 1 + :widths: 24 76 + + * - Program + - Description + * - ``exit.x`` + - The smallest program: ``main`` is ``skip``. + * - ``echo_char.x`` + - Reads a byte and writes it back out. + * - ``hello_putval.x`` + - Prints ``hello world`` one character at a time with ``putval``. + * - ``hello_prints.x`` + - Prints a string literal using a ``prints`` routine that unpacks the + packed string layout. + * - ``strlen.x`` + - Computes the length of a packed string. + * - ``printn.x`` / ``printhex.x`` + - Print a number in decimal / hexadecimal. + * - ``fib.x`` + - Recursive Fibonacci (the introductory example). + * - ``fac.x`` + - Recursive factorial. + * - ``gcd.x`` + - Greatest common divisor by Euclid's algorithm; uses the ``div`` / ``rem`` + helpers. + * - ``collatz.x`` + - Length of the Collatz (3n+1) sequence; ``while`` loops and ``div`` / + ``rem``. + * - ``ackermann.x`` + - The Ackermann function — deep non-primitive recursion; a stress test of + calls and frames. + * - ``primes.x`` + - Prints primes below ``n`` by trial division; nested loops and early + ``return``. + * - ``binsearch.x`` + - Binary search over a sorted array. + * - ``reverse.x`` + - Reverses an array in place with a two-index swap loop. + * - ``bubblesort.x`` + - Bubble sort of an array, with a checker function. + * - ``hanoi.x`` + - Towers of Hanoi: recursion that drives output. + * - ``div.x`` / ``mul.x`` / ``mul2.x`` / ``exp2.x`` + - Software division, multiplication and powers of two (X has no built-in + ``*`` or ``/``). + * - ``globals.x`` + - Exercises global ``var`` / ``val`` / ``array`` declarations and + constant-expression array sizes. + * - ``xhexb.x`` + - A complete X compiler written in X — the largest example program. + +Concurrent programs +------------------- + +These end in a top-level ``par`` and compile to a *network* of cores (each +program comments which cores and channels it uses). They mirror the table in the +project README. + +.. list-table:: + :header-rows: 1 + :widths: 22 78 + + * - Program + - What it demonstrates + * - ``pipe.x`` + - Three-stage pipeline (``source -> relay -> sink``). + * - ``pingpong.x`` + - Two cores exchanging a value and echoing it back. + * - ``ring.x`` + - Token ring; reuses one ``forwarder`` process on two cores. + * - ``buffer.x`` + - Streaming a sequence through a buffer with a zero sentinel. + * - ``sieve.x`` + - Concurrent prime sieve (a pipeline of filter processes). + * - ``farm.x`` + - Worker farm: distributor → workers → collector (fan-out / fan-in). + * - ``reduce.x`` + - Parallel sum over a binary tree of cores. + * - ``scan.x`` + - Parallel prefix sum (scan) along a line of cores. + * - ``stencil.x`` + - 1D nearest-neighbour halo exchange with bidirectional channels. + * - ``mergesort.x`` + - Divide-and-conquer parallel sort with a stream merge. + * - ``horner.x`` + - Systolic polynomial evaluation (a multiply-accumulate pipeline). + +Two programs in full +-------------------- + +The Fibonacci program, a compact recursive sequential example: + +.. literalinclude:: ../../examples/fib.x + :language: text + +The three-stage pipeline, a minimal concurrent example: + +.. literalinclude:: ../../examples/pipe.x + :language: text diff --git a/docs/language/expressions.rst b/docs/language/expressions.rst index af86ebb..5ce645c 100644 --- a/docs/language/expressions.rst +++ b/docs/language/expressions.rst @@ -1,4 +1,84 @@ Expressions =========== -.. todo:: Write this page (Stage 3). +An *expression* computes a value. Expressions are built from operands combined +by monadic (unary) and diadic (binary) operators. The forms below match the +expression parser in ``src/xcmp.hpp``. + +Operands and elements +---------------------- + +The simplest expressions are *elements*: + +* a **variable** reference, ``name``; +* an **array element**, ``name[subscript]``, where the subscript is itself an + expression and indexing is from zero; +* an **integer literal**, ``42`` or ``#ff``; a **character constant**, ``'P'``; + a **string**, ``"hi"``; or the boolean literals ``true`` and ``false``; +* a **function or syscall call**, ``name(args)`` or ``number(args)``, which is + an expression yielding the call's result; +* a **parenthesised expression**, ``( expression )``, used both for grouping and + (with no operators between) wherever a sub-expression is wanted. + +Array elements are read and written through the same ``name[subscript]`` syntax; +as an expression it reads the element, and as the left side of ``:=`` it writes +it. + +Monadic operators +----------------- + +Two prefix operators apply to a single element: + +* ``-`` negates: ``-n`` is ``0 - n`` (arithmetic is signed). +* ``~`` is logical/bitwise *not*: ``~e`` inverts ``e``. It is written ``~``, not + a ``not`` keyword. It appears in the examples as ``~lsu(x, y)``. + +Diadic operators +---------------- + +The binary operators are: + +.. list-table:: + :header-rows: 1 + :widths: 24 18 58 + + * - Category + - Operators + - Meaning + * - Arithmetic + - ``+`` ``-`` + - Signed addition and subtraction. + * - Logical + - ``and`` ``or`` + - Logical/bitwise conjunction and disjunction (keywords). + * - Relational + - ``=`` ``~=`` ``<`` ``<=`` ``>`` ``>=`` + - Equal, not-equal, and the four orderings. + +Note the spellings: not-equal is ``~=`` (not ``<>``), and the boolean +connectives ``and`` / ``or`` are reserved words while negation uses the ``~`` +operator. There is no division or multiplication operator built into the +language — programs needing them call helper functions such as the ``div`` / +``rem`` pair found in ``gcd.x`` and several other examples. + +No precedence, and associativity +-------------------------------- + +X has **no operator precedence**: no operator binds more tightly than any other. +An expression that mixes *different* operators must be parenthesised to make the +grouping explicit. For instance ``(x < 0) = (y < 0)`` and ``(n + n + n) + 1`` +both appear in the examples with brackets that a precedence-based language would +let you omit. A run of the *same* associative operator, however, may be chained +without brackets — ``1 + 2 + 3`` and ``a and b and c`` are accepted directly. + +Because relational and arithmetic operators sit at the same level, a comparison +used as a value (as in ``return x < y``) and a comparison used as a condition +(``while i < n do ...``) are the same kind of expression: a non-zero result is +true and zero is false. + +Lowering +-------- + +How comparisons and boolean expressions are turned into the Hex processor's +conditional branches (``BRZ`` / ``BRN``) is covered in +:doc:`../compiler/codegen-idioms`. diff --git a/docs/language/grammar.rst b/docs/language/grammar.rst index 543946e..1662ccb 100644 --- a/docs/language/grammar.rst +++ b/docs/language/grammar.rst @@ -1,4 +1,107 @@ Grammar ======= -.. todo:: Write this page (Stage 3). +This page consolidates the grammar of X in one place. It is reconciled with the +recursive-descent parser in ``src/xcmp.hpp`` (and cross-checked against the +compiler written in X, ``examples/xhexb.x``); where it differs from the original +2014 language note, the differences are flagged below and the parser is taken as +authoritative. + +The notation is informal EBNF: ``|`` separates alternatives, ``[ x ]`` means an +optional ``x``, and ``{ x }`` means zero or more repetitions of ``x``. Terminals +are quoted. + +.. code-block:: text + + program := global-decls proc-decls + + global-decls := { global-decl } + global-decl := "val" name "=" expression ";" + | "var" name ";" + | "array" name "[" expression "]" ";" + | "chan" name ";" + + proc-decls := { proc-decl } + proc-decl := ( "proc" | "func" ) name "(" [ formals ] ")" "is" + local-decls statement + + local-decls := { local-decl } + local-decl := "val" name "=" expression ";" + | "var" name ";" + | "chan" name ";" + + formals := formal { "," formal } + formal := "val" name + | "var" name + | "array" name + | "chan" name + | "proc" name + | "func" name + + statement := "skip" + | "stop" + | "return" expression + | "if" expression "then" statement "else" statement + | "while" expression "do" statement + | "{" statement { ";" statement } "}" + | "par" "{" statement { ";" statement } "}" + | element ":=" expression + | element "!" expression + | element "?" element + | name "(" [ expr-list ] ")" + | number "(" [ expr-list ] ")" + + expr-list := expression { "," expression } + + expression := "-" element + | "~" element + | element [ binary-op element { binary-op element } ] + + binary-op := "+" | "-" | "and" | "or" + | "=" | "~=" | "<" | "<=" | ">" | ">=" + + element := name + | name "[" expression "]" + | name "(" [ expr-list ] ")" + | number "(" [ expr-list ] ")" + | number + | string + | "true" + | "false" + | "(" expression ")" + + name := alpha { alpha | digit | "_" } + number := digit { digit } | "#" hexdigit { hexdigit } + | "'" character "'" + string := '"' { character } '"' + +Notes on the grammar +-------------------- + +* **No precedence; chaining only for one operator.** ``binary-op`` does not + encode precedence levels. The parser will chain a repeated *associative* + operator (``a + b + c``), but an expression that mixes different operators must + be parenthesised. See :doc:`expressions`. + +* **``par`` branches are restricted.** Syntactically a ``par`` block is a + brace-delimited, semicolon-separated list of statements, but each branch must + be a procedure call; the compiler rejects any other statement form as a + branch. See :doc:`concurrency`. + +* **Channel input and output** are statements (``element "!" expression`` and + ``element "?" element``), distinguished from assignment by the ``!`` / ``?`` + operator after the leading element. + +* **Calls double as statements and elements.** A ``name(...)`` or + ``number(...)`` call is both a statement (a procedure or syscall call) and an + element (a function or syscall result used in an expression). A statement that + begins with a number must be a syscall. + +* **No ``valof``.** The 2014 note expressed function results with a + ``valof`` / ``return`` block. The parser has no ``valof``: a ``func`` body is + an ordinary ``statement`` (preceded by optional local declarations) that + yields its result with ``return``. See :doc:`procedures-functions`. + +* **Comments and escapes** follow the lexer, not the original note: ``|`` + introduces a to-end-of-line comment, and string/character escapes are + C-style (``\n``, ``\t``, ``\\`` …). See :doc:`lexical`. diff --git a/docs/language/lexical.rst b/docs/language/lexical.rst index b99cc90..241a49d 100644 --- a/docs/language/lexical.rst +++ b/docs/language/lexical.rst @@ -1,4 +1,102 @@ Lexical structure ================= -.. todo:: Write this page (Stage 3). +This page describes how the source text of an X program is broken into tokens. +The rules below match the lexer in ``src/xcmp.hpp`` (class ``Lexer``), which is +the authority where this description and the original 2014 language note differ. + +Character set +------------- + +Source is plain ASCII text. Whitespace — spaces, tabs and newlines — separates +tokens but is otherwise insignificant; it may be used freely to lay out a +program. There is no line-continuation or significant-indentation rule. + +Names +----- + +A *name* (identifier) begins with an alphabetic character and continues with any +number of alphabetic characters, decimal digits and underscores. Names are +**case sensitive**, so ``foo``, ``Foo`` and ``FOO`` are distinct. Examples drawn +from the example programs include ``div_x``, ``foo_BAR`` and ``bytesperword``. + +A name that matches a reserved word is treated as that keyword rather than as an +identifier. The reserved words are: + +.. code-block:: text + + and array chan do else false func if is + or par proc return skip stop then true val var while + +Literals +-------- + +**Integer literals** are word-sized. A decimal literal is a run of digits, for +example ``0``, ``10`` or ``256``. A hexadecimal literal is written with a +leading ``#`` followed by hexadecimal digits (``0``–``9``, ``a``–``f``, +``A``–``F``), for example ``#ff`` or ``#100``. (Note: the prefix is ``#``, not +``0x``.) + +**Character constants** are written between single quotes, as in ``'P'`` or +``'\n'``. A character constant denotes the integer value of that one character, +so it can be used anywhere an integer is expected — ``out ! 'P'`` sends the +code for ``P``, and ``rem(n, 10) + '0'`` converts a digit to its ASCII +character. + +**String literals** are written between double quotes, for example +``"hello world\n"``. A string is stored packed into consecutive words, with the +length of the string held in the low byte of the first word and the characters +following it; the ``prints`` procedure in ``examples/hello_prints.x`` shows how +this layout is read back at run time. Strings are passed to procedures as +``array`` arguments. + +**Boolean literals** ``true`` and ``false`` are reserved words denoting the +usual truth values. + +Comments +-------- + +A comment begins with a vertical bar ``|`` and runs to the end of the line; the +``|`` and everything after it on that line are ignored. (This is a single +end-of-line delimiter — the bar does not need to be closed by a matching bar.) +Comments are used both for whole-line headers and for trailing annotations: + +.. code-block:: text + + | A token ring of three processors. + { out ! 90; | inject the token + in ? v } | wait for it to return + +Escape sequences +---------------- + +Inside character and string literals, a backslash introduces an escape sequence. +The lexer recognises the following escapes; any other character after a +backslash is an error. + +.. list-table:: + :header-rows: 1 + :widths: 20 80 + + * - Escape + - Meaning + * - ``\n`` + - Newline (line feed) + * - ``\r`` + - Carriage return + * - ``\t`` + - Horizontal tab + * - ``\'`` + - Single quote + * - ``\"`` + - Double quote + * - ``\\`` + - Backslash + +.. note:: + + The original 2014 language note used the occam-style ``*n`` escape convention + (``*n``, ``*t``, ``*'``, ``**``, ``*#hh``) and ``| ... |`` paired comments. + The current compiler instead uses C-style backslash escapes and ``|`` + to-end-of-line comments as described above, and has no numeric ``*#hh``-style + escape. Where this page and the old note disagree, the compiler wins. diff --git a/docs/language/overview.rst b/docs/language/overview.rst index e9cbf01..aabe7a7 100644 --- a/docs/language/overview.rst +++ b/docs/language/overview.rst @@ -1,4 +1,71 @@ The X language ============== -.. todo:: Write this page (Stage 3). +X is a small imperative programming language designed to be easy to compile. +It has procedures and functions, the data abstractions ``val``, ``var`` and +``array``, the usual control flow (``if``/``then``/``else`` and ``while``/``do``), +and a message-passing form of concurrency built from ``par``, ``chan`` and the +``!`` / ``?`` channel operators. It compiles to the Hex processor's 8-bit +instruction set (see :doc:`../architecture/instruction-set`). + +The language is deliberately minimal. Its compiler, ``xcmp``, is itself +expressible in X — the program :doc:`examples/xhexb.x ` is a working +X compiler written in X — so the whole tool chain can in principle be +bootstrapped on the Hex machine. Keeping the language small is what makes that +practical. + +Design choices +-------------- + +X trades expressive convenience for a grammar and code generator that are easy +to understand and to port: + +* **No operator precedence.** Operators do not bind more tightly than one + another; an expression mixing different operators must be parenthesised, for + example ``(a + b) < c``. A run of the *same* associative operator may be + written without brackets, as in ``1 + 2 + 3``. +* **Mandatory ``else``.** Every ``if`` has both a ``then`` and an ``else`` arm; + there is no one-armed conditional. Use ``else skip`` when the alternative does + nothing. +* **Pass-by-value scalars.** ``val`` parameters are passed by value. Arrays and + channels are passed by reference (the callee shares the caller's storage). +* **Single-dimension arrays.** Arrays are one-dimensional and indexed from zero. +* **Procedures and functions, including higher-order ones.** A ``proc`` performs + a process and returns nothing; a ``func`` computes and returns a value with + ``return``. Procedures and functions can themselves be passed as ``proc`` and + ``func`` parameters. +* **Three data abstractions.** ``val`` names a compile-time constant, ``var`` + declares a mutable word-sized variable, and ``array`` declares a block of + words. + +A complete program +------------------ + +The Fibonacci program is a complete X program. ``main`` reads a number from +input stream ``0`` with the read syscall (``get``, syscall ``2``), computes its +Fibonacci number recursively, and passes the result to the exit syscall +(``exit``, syscall ``0``): + +.. literalinclude:: ../../examples/fib.x + :language: text + +A program is a sequence of global declarations followed by procedure and +function definitions; execution begins at ``main``. The numeric syscall +identifiers (``0`` for exit, ``1`` for write, ``2`` for read) are conventionally +bound to ``val`` names such as ``exit``, ``put`` and ``get`` at the top of a +program. + +Where to go next +---------------- + +The remaining pages document each part of the language in detail: + +* :doc:`lexical` — the character set, names, literals, comments and escapes. +* :doc:`program-structure` — declarations, abbreviations and scope. +* :doc:`statements` — the process forms (``skip``, assignment, sequence, + ``if``, ``while``). +* :doc:`expressions` — operands, operators and the no-precedence rule. +* :doc:`procedures-functions` — definitions, formals, calls and ``return``. +* :doc:`concurrency` — ``par``, ``chan`` and message passing. +* :doc:`examples` — a tour of the programs in ``examples/``. +* :doc:`grammar` — the consolidated grammar. diff --git a/docs/language/procedures-functions.rst b/docs/language/procedures-functions.rst index 0b92378..aadd96b 100644 --- a/docs/language/procedures-functions.rst +++ b/docs/language/procedures-functions.rst @@ -1,4 +1,114 @@ Procedures and functions ======================== -.. todo:: Write this page (Stage 3). +X has two kinds of named, callable definition. A **procedure** (``proc``) +carries out a process and returns no value; a **function** (``func``) computes +and yields a value. They share the same definition form and the same parameter +mechanism, differing only in whether they return a result. + +Definitions +----------- + +A procedure is defined as:: + + proc name(formals) is [local declarations] body + +and a function identically but with ``func``:: + + func name(formals) is [local declarations] body + +The parentheses are always present, even when there are no formals +(``proc main() is ...``). After ``is`` a definition may declare local ``val``, +``var`` and ``chan`` names (each terminated by a semicolon) before its single +body statement; that body is usually a brace-delimited sequence:: + + proc sort(array a, val n) is + var i; + var j; + var tmp; + { i := 0; + while i < n do ... + } + +Formal parameters +----------------- + +Each formal is introduced by a keyword that fixes how the argument is passed: + +.. list-table:: + :header-rows: 1 + :widths: 22 78 + + * - Formal + - Argument + * - ``val name`` + - A value, passed by value (the callee gets a private copy). + * - ``array name`` + - An array, passed by reference (callee and caller share the storage). + * - ``chan name`` + - A channel (see :doc:`concurrency`). + * - ``proc name`` + - A procedure, passed as a higher-order argument. + * - ``func name`` + - A function, passed as a higher-order argument. + +Because ``proc`` and ``func`` may appear as formals, X is higher-order: a +procedure can take another procedure or function as an argument and call it. The +``globals.x`` example declares ``proc formal_args(val f1, array f2, proc f3, +proc f4)`` to exercise this. + +Calls +----- + +A call names the callee and supplies a parenthesised, comma-separated list of +actual arguments:: + + sort(data, length) + exit(fib(get(0))) + +A call with no arguments still has empty parentheses: ``newline()``. The number +of actuals must match the formals, and each actual must be compatible with its +formal's kind (a value for ``val``, an array for ``array``, and so on). The +meaning of a call is *substitution*: the body runs with each formal standing for +its actual argument. + +A numeric call such as ``0(code)`` or ``put(c, 0)`` is a **syscall** — the +callee is a syscall number rather than a name. The conventional ``val`` bindings +are ``exit`` = ``0``, ``put`` = ``1`` and ``get`` = ``2`` (see +:doc:`../architecture/syscalls`). + +Functions and ``return`` +------------------------ + +A function returns a value with a ``return`` statement: ``return expression`` +evaluates the expression and yields it as the function's result. Control flow +inside a function body may branch, but every path that completes the function +must end in a ``return`` — the final process executed by a function must be a +``return``. The recursive ``fib`` function shows the pattern, with a ``return`` +on each arm of the conditional:: + + func fib(val n) is + if n = 0 then return 0 + else if n = 1 then return 1 + else return fib(n-1) + fib(n-2) + +A function call is an :doc:`expression ` and may appear anywhere a +value is expected, including as an argument to another call (``exit(fib(...))``) +or within a larger expression (``fib(n-1) + fib(n-2)``). A procedure call is a +:doc:`statement ` and yields no value. + +.. note:: + + The original language note described function bodies using a ``valof`` / + ``return`` form. The current compiler has no ``valof`` keyword: a ``func`` + body is an ordinary statement (optionally preceded by local declarations) + that produces its result with ``return``, as shown above. + +Compilation +----------- + +Procedures and functions can be compiled either by *substitution* (inlining the +body at the call site) or as *closed subroutines* (a single shared body entered +by a call and left by a return), with arguments and locals laid out in an +activation frame. How ``xcmp`` lays out frames and links calls is described in +:doc:`../compiler/translator` and :doc:`../compiler/memory-and-calling`. diff --git a/docs/language/program-structure.rst b/docs/language/program-structure.rst index 0cccebe..5b88d00 100644 --- a/docs/language/program-structure.rst +++ b/docs/language/program-structure.rst @@ -1,4 +1,59 @@ Program structure ================= -.. todo:: Write this page (Stage 3). +An X program is a sequence of **global declarations** followed by a sequence of +**procedure and function definitions**:: + + + + +Execution begins at the procedure named ``main``. There is no separate +"top-level process" — the program *is* its definitions, and ``main`` is the one +that runs. The Fibonacci program in :doc:`overview` is a minimal example: two +``val`` declarations, then ``proc main`` and ``func fib``. + +Declarations +------------ + +A declaration introduces a name and gives its meaning for the rest of the +enclosing scope. + +``val name = expression`` + A *constant abbreviation*: ``name`` stands for the value of the constant + expression. Conventionally used for syscall numbers and other named + constants, for example ``val put = 1;`` or ``val length = 10;``. The + right-hand side is evaluated at compile time, so it may itself refer to + earlier ``val`` names: ``val c2 = 1 + 2 + 3;``. + +``var name`` + Declares a mutable, word-sized variable. A fresh global ``var`` is zero; a + local ``var`` is uninitialised until first assigned. + +``array name[expression]`` + Declares a one-dimensional array of the given (constant) number of words, + indexed from zero. The size expression may use earlier ``val`` names, e.g. + ``array data[length];``. Arrays may only be declared at global scope. + +``chan name`` + Declares a channel for message-passing concurrency. See :doc:`concurrency`. + +Each declaration is terminated by a semicolon. Global declarations may be +``val``, ``var``, ``array`` or ``chan``; the ``globals.x`` example exercises all +the constant-expression forms of array sizing. + +Scope and abbreviation +---------------------- + +A declaration is in scope from the point it appears to the end of the program +(for globals) or to the end of the enclosing procedure (for locals). The meaning +of an abbreviation is *substitution*: wherever the abbreviated name appears, it +denotes the thing it was bound to. For a ``val``, every use of the name behaves +as though the constant value had been written in its place; this is why ``val`` +right-hand sides must be constant expressions. + +Procedures and functions are likewise named definitions whose names may be used +(as calls, or passed as ``proc``/``func`` arguments) anywhere in scope. They are +described in full on the :doc:`procedures-functions` page. A procedure may +declare its own local ``val``, ``var`` and ``chan`` declarations between ``is`` +and its body; these are local abbreviations following the same substitution +rule. diff --git a/docs/language/statements.rst b/docs/language/statements.rst index 8addbe3..6b630b5 100644 --- a/docs/language/statements.rst +++ b/docs/language/statements.rst @@ -1,4 +1,103 @@ Statements ========== -.. todo:: Write this page (Stage 3). +A *statement* (or *process*) is a unit of execution. X has a small fixed set of +statement forms, listed here with their meaning. The syntax matches the parser +in ``src/xcmp.hpp`` and the example programs. + +``skip`` +-------- + +``skip`` does nothing and terminates immediately. It is most often used as the +mandatory ``else`` arm of a conditional whose alternative has no effect:: + + if a[j] > a[j+1] then swap() else skip + +``stop`` +-------- + +``stop`` halts the process. Whereas ``skip`` completes successfully and lets +execution continue, ``stop`` does not proceed. + +Assignment +---------- + +:: + + variable := expression + +evaluates the expression and stores the result in the variable. The +left-hand side may be a simple variable or an array element:: + + i := 0 + a[j] := a[j+1] + div_x := div_x - y + +The assignment operator is ``:=`` (a colon immediately followed by an equals +sign); a bare ``=`` is the equality operator in expressions, not assignment. + +Sequence +-------- + +:: + + { p ; q ; ... } + +runs the statements ``p``, ``q``, ... in order. The braces ``{`` and ``}`` +delimit the sequence and the statements are separated by semicolons. By +convention the semicolon may be written at the start of the following line +rather than the end of the preceding one; both styles appear in the examples:: + + { i := 0; + while i < n do step(); + return done() } + +Conditional +----------- + +:: + + if e then p else q + +evaluates ``e``; if it is non-zero (true) the statement ``p`` runs, otherwise +``q`` runs. **Both arms are required** — there is no one-armed ``if``. Multi-way +branching is written by nesting conditionals in the ``else`` arm, exactly as in +``fib``:: + + if n = 0 then return 0 + else if n = 1 then return 1 + else return fib(n-1) + fib(n-2) + +Loop +---- + +:: + + while e do p + +evaluates ``e`` and, while it is non-zero (true), runs the statement ``p`` and +re-tests. The loop body is a single statement, usually a brace-delimited +sequence:: + + while i < n do + { sum := sum + a[i]; + i := i + 1 + } + +The ``while`` loop is definable in terms of the conditional: ``while e do p`` is +equivalent to "if ``e`` then { ``p``; while ``e`` do ``p`` } else skip". The +compiler emits it directly as a test-and-branch loop rather than by this +expansion; see :doc:`../compiler/codegen-idioms`. + +Other statement forms +--------------------- + +Three further forms are statements but are documented on their own pages because +they belong with larger features: + +* A **procedure or syscall call**, ``name(args)`` or ``number(args)`` — see + :doc:`procedures-functions`. +* A **return**, ``return expression`` — used in function bodies, see + :doc:`procedures-functions`. +* **Channel input and output**, ``chan ? element`` and ``chan ! expression``, + and the ``par`` block — see :doc:`concurrency`. From 02a5cdba3c31f24d83d1c4af0f62fb3668a25ab3 Mon Sep 17 00:00:00 2001 From: James Hanlon Date: Sun, 14 Jun 2026 16:35:40 +0100 Subject: [PATCH 06/10] docs: build strictly (-W) via the venv Sphinx in the CMake target --- cmake/FindSphinx.cmake | 5 ++++- docs/CMakeLists.txt | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/cmake/FindSphinx.cmake b/cmake/FindSphinx.cmake index 353a2c3..896a817 100644 --- a/cmake/FindSphinx.cmake +++ b/cmake/FindSphinx.cmake @@ -1,7 +1,10 @@ -# Look for an executable called sphinx-build +# Look for an executable called sphinx-build. Prefer the project-local +# virtualenv (docs/_venv, created from docs/requirements.txt) if present, so the +# pinned Sphinx is used; otherwise fall back to one on PATH. find_program( SPHINX_EXECUTABLE NAMES sphinx-build + HINTS ${CMAKE_SOURCE_DIR}/docs/_venv/bin DOC "Path to sphinx-build executable") include(FindPackageHandleStandardArgs) diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index a4c7098..f0209e0 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -5,9 +5,11 @@ set(SPHINX_BUILD ${CMAKE_CURRENT_BINARY_DIR}/sphinx) configure_file(conf.py ${CMAKE_CURRENT_BINARY_DIR}) +# Build strictly: -W turns warnings (orphan pages, broken cross-references) into +# errors and --keep-going reports them all, matching how the docs are verified. add_custom_target( Sphinx ALL - COMMAND ${SPHINX_EXECUTABLE} -b html -c ${CMAKE_CURRENT_BINARY_DIR} - ${SPHINX_SOURCE} ${SPHINX_BUILD} + COMMAND ${SPHINX_EXECUTABLE} -b html -W --keep-going + -c ${CMAKE_CURRENT_BINARY_DIR} ${SPHINX_SOURCE} ${SPHINX_BUILD} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMENT "Generating documentation with Sphinx") From 13fdc52eb83464189ae99a77ffac4ca58ec04d5b Mon Sep 17 00:00:00 2001 From: James Hanlon Date: Sun, 14 Jun 2026 18:26:05 +0100 Subject: [PATCH 07/10] docs: write the compiler internals reference pages --- docs/compiler/bootstrapping.rst | 58 ++++++++++++- docs/compiler/codebuffer.rst | 85 ++++++++++++++++++- docs/compiler/codegen-idioms.rst | 75 ++++++++++++++++- docs/compiler/lexical-analyser.rst | 75 ++++++++++++++++- docs/compiler/memory-and-calling.rst | 102 ++++++++++++++++++++++- docs/compiler/networks.rst | 74 ++++++++++++++++- docs/compiler/overview.rst | 66 ++++++++++++++- docs/compiler/syntax-analyser.rst | 85 ++++++++++++++++++- docs/compiler/translator.rst | 119 ++++++++++++++++++++++++++- 9 files changed, 730 insertions(+), 9 deletions(-) diff --git a/docs/compiler/bootstrapping.rst b/docs/compiler/bootstrapping.rst index c1788ea..e62fdb9 100644 --- a/docs/compiler/bootstrapping.rst +++ b/docs/compiler/bootstrapping.rst @@ -1,4 +1,60 @@ Bootstrapping ============= -.. todo:: Write this page (Stage 4). +The original X compiler, ``examples/xhexb.x``, is written in X itself — around +3,000 lines (the file is 3,053 lines). Because it is an X program, it can be +compiled to a Hex image and then used to compile X programs, including its own +source. This self-hosting property is the practical demonstration that X is +expressive enough to write a non-trivial program: a complete lexer, parser, tree +builder and code generator, all in the same language the compiler accepts. + +The bootstrap chain +------------------- + +Self-hosting raises the usual chicken-and-egg question: you need an X compiler +to compile the X compiler. The project breaks the cycle with a hand-prepared +assembly image of the compiler, ``tests/asm/xhexb.S`` (transcribed from the +original published assembly listing). The chain is: + +#. Assemble ``tests/asm/xhexb.S`` with the assembler ``hexasm`` to produce the + compiler image ``xhexb.bin``. +#. Run ``xhexb.bin`` under the simulator ``hexsim``, feeding an X program on + standard input. The compiler reads the source from stdin and writes the + compiled binary (the integration tests collect it as ``simout2``). +#. In particular, feeding ``examples/xhexb.x`` to ``xhexb.bin`` compiles the + compiler with itself, closing the loop. + +This is exactly what the integration tests in ``tests/tests.py`` do: ``setUp`` +assembles ``xhexb.bin`` from the ``.S`` source, the per-program tests pipe an X +file into it, and ``test_x_compiler_sim`` pipes ``xhexb.x`` itself in and checks +the reported tree size, program size and image size. The same test is repeated +against the RTL via the Verilator testbench ``hextb``. + +In rough form, the steps look like:: + + # Build the seed compiler image from the hand-assembled source. + hexasm tests/asm/xhexb.S -o xhexb.bin + + # Use it to compile an X program (source on stdin, image on stdout). + hexsim xhexb.bin < examples/xhexb.x + +The original compiler's structure mirrors the stages documented elsewhere in +this section: the lexer procedures (``rdline``, ``rch``, ``readnumber``, +``readstring`` and ``nextsymbol``; see :doc:`lexical-analyser`), the tree +constructors ``cons1`` … ``cons4`` that build the tagged-vector AST (see +:doc:`syntax-analyser`), and the translator that walks that tree to emit Hex +instructions. + +The C++ implementation +---------------------- + +Alongside the self-hosting compiler, the project provides a parallel +reimplementation in C++: ``src/xcmp.hpp`` (with ``src/lexer.hpp``) and the +command-line front end ``tools/xcmp.cpp``, built as the ``xcmp`` tool. It +accepts the same language and produces compatible images, but is an ordinary +host program — it does not require the bootstrap chain — and it is the compiler +these reference pages describe in detail. The two compilers are useful as a +cross-check on each other. + +The tools used above (``hexasm``, ``hexsim``, ``xcmp``, ``hextb`` and the +end-to-end runner ``xrun``) are described in :doc:`../tools/index`. diff --git a/docs/compiler/codebuffer.rst b/docs/compiler/codebuffer.rst index fee5ec1..7b79feb 100644 --- a/docs/compiler/codebuffer.rst +++ b/docs/compiler/codebuffer.rst @@ -1,4 +1,87 @@ Code buffer =========== -.. todo:: Write this page (Stage 4). +The code buffer holds the program as an ordered list of directives and is +responsible for turning that representation into an executable binary. In +``src/xcmp.hpp`` the ``xcmp::CodeBuffer`` class accumulates the directives +emitted by code generation (instructions in ``instrs`` and statically +allocated words in ``data``), and provides the ``gen…`` helpers used +throughout translation. The conversion of the final directive list into bytes +is performed by ``hexasm::CodeGen`` in ``src/hexasm.hpp``, which the compiler +invokes once the directive stream is complete. + +Directives +---------- + +Each entry in the program is a ``hexasm::Directive``: a ``Label`` (zero size), +a ``Data`` word (always four bytes, word-aligned), an ``InstrImm`` (an +instruction with an immediate value), an ``InstrLabel`` (an instruction whose +operand is a label, marked relative or absolute), an ``InstrOp`` (a one-byte +``OPR`` instruction) or ``Padding``. A directive knows its own size and its +assigned byte offset. Crucially, the size of an immediate or label-relative +instruction is *not* fixed: it depends on the magnitude of the value it +encodes, because larger values need more PFIX/NFIX prefix nibbles. + +Minimal-prefix selection +------------------------ + +An operand value is emitted as a single 4-bit nibble in the instruction itself, +preceded by as many ``PFIX`` (positive) or ``NFIX`` (negative) prefix +instructions as are needed to supply the remaining nibbles. ``numNibbles`` +computes how many nibbles a value needs, and ``InstrImm::getSize`` / +``InstrLabel::getSize`` use it to report the encoded length (negative values +that fit in one nibble still need an ``NFIX``, hence a minimum of two). The +compiler always picks the shortest encoding for the value, so small constants +and nearby branches cost a single byte. + +For a *relative* label reference there is a circularity: the offset to the +target depends on the encoded length of the instruction, but that length +depends on the offset. ``instrLen`` resolves it by increasing the length until +it is consistent with the distance it has to span: + +.. literalinclude:: ../../src/hexasm.hpp + :language: cpp + :lines: 301-307 + +Iterative offset resolution +--------------------------- + +Because instruction sizes depend on offsets, and offsets depend on the sizes of +all preceding instructions, the program cannot be laid out in a single pass. +``hexasm::CodeGen::resolveLabels`` repeats the layout until it stabilises: on +each iteration it walks the program assigning byte offsets and label values, +and it stops when a full pass leaves the total size unchanged. + +.. literalinclude:: ../../src/hexasm.hpp + :language: cpp + :lines: 731-782 + +Each iteration aligns ``Data`` to a four-byte boundary, records each label's +byte offset, and rewrites each instruction's operand: a relative reference +becomes ``target - here - instrLen`` and an absolute reference becomes the +word-aligned target shifted right by two (an absolute *word* address). As +offsets shrink, some branches need fewer prefix bytes, which shifts later +offsets, so the process re-expands (and contracts) until the load points no +longer move. After resolution the program is padded out to a word boundary. + +Output format +------------- + +The binary image is produced by ``emitImage`` (used directly for a single image +and via ``emitBin`` for a file). Its layout, shared with the readers through +``src/heximage.hpp``, is: + +* a ``uint32`` header giving the program size in **words**; +* the program bytes — ``emitProgramBin`` writes each directive, emitting the + PFIX/NFIX prefix bytes followed by the opcode nibble for instructions, the + raw word for data (with alignment padding), and zero bytes for padding; +* an optional debug-info block: a string table followed by a symbol table + mapping ``func``/``proc`` names to byte offsets. + +The very first instruction of every image is a ``BR`` to the program's +``start`` label, and the second word (the ``SP_VALUE`` slot) holds the initial +stack pointer; this fixed preamble is what lets the simulator boot any image by +jumping to word 0. The memory map behind those first words is described in +:doc:`memory-and-calling`, the instruction encoding in +:doc:`../architecture/instruction-encoding`, and the on-disk image and network +container layouts in :doc:`../tools/formats`. diff --git a/docs/compiler/codegen-idioms.rst b/docs/compiler/codegen-idioms.rst index 83f2c6f..3b7fe4f 100644 --- a/docs/compiler/codegen-idioms.rst +++ b/docs/compiler/codegen-idioms.rst @@ -1,4 +1,77 @@ Code-generation idioms ====================== -.. todo:: Write this page (Stage 4). +The Hex instruction set is small: it has add and subtract but no multiply, +divide, remainder or bitwise instructions, and it has no instruction that +produces a boolean result. This page describes the recurring patterns the +compiler emits to bridge that gap. They are produced by the nested +``ExprCodeGen`` and ``StmtCodeGen`` visitors of ``CodeBuffer`` in +``src/xcmp.hpp``. The instructions themselves are described in +:doc:`../architecture/instruction-set`, and the source-level operators in +:doc:`../language/expressions`. + +Arithmetic +---------- + +``+`` and ``-`` map directly onto ``ADD`` and ``SUB``. ``genBinopOperands`` +materialises the left operand in the A register and the right operand in the B +register, then emits the single ``OPR ADD`` or ``OPR SUB``. When the right +operand is itself a non-trivial expression, it is evaluated first and spilled to +a temporary stack word so that generating the left operand cannot overwrite it. +Recall that ``OptimiseExpr`` has already rewritten unary ``-x`` to ``0 - x``, so +negation is just a subtraction. + +Logical ``and`` and ``or`` +-------------------------- + +The logical operators are short-circuiting and are implemented with conditional +branches rather than as values. For ``a and b`` the compiler evaluates ``a`` and +emits ``BRZ end``; if ``a`` is zero the result (still in the A register) is +false, otherwise control falls through and ``b`` is evaluated to give the +result. For ``a or b`` it evaluates ``a``, and if non-zero branches past the +evaluation of ``b``; otherwise ``b`` supplies the result. Unary ``~`` is +generated as a branch that produces ``LDAC 0`` or ``LDAC 1``. + +Comparisons +----------- + +Recall from :doc:`translator` that ``OptimiseExpr`` reduces all six relational +operators to combinations of ``<``, ``=`` and ``~``, so code generation only +implements two comparisons directly. + +For ``a = b`` the compiler computes ``a - b`` and tests it for zero. As a +special case, comparing against zero skips the subtraction. It then materialises +a boolean with a ``BRZ`` and two ``LDAC`` arms: + +.. literalinclude:: ../../src/xcmp.hpp + :language: cpp + :lines: 2627-2651 + +For ``a < b`` it computes ``a - b`` and tests the sign with ``BRN`` (again, +comparing against zero skips the subtraction since the sign of ``a`` is enough): + +.. literalinclude:: ../../src/xcmp.hpp + :language: cpp + :lines: 2652-2676 + +Comparison in a condition +------------------------- + +Materialising a 0/1 boolean is only necessary when the comparison's *value* is +used. When a comparison appears directly as the condition of an ``if`` or +``while``, the surrounding statement generation emits its own ``BRZ`` against +the condition, so the value is consumed without ever building the explicit +boolean. The peephole pass ``OptimiseDirectives`` further cleans up the +generated branches (for example dropping a ``BR`` to the next instruction), so +conditionals are tight in practice. + +Multiplication, division and remainder +-------------------------------------- + +Because there are no multiply, divide or bitwise instructions, these operations +are not generated inline. They are written as ordinary X library procedures +built from add, subtract and the comparison idioms above, and called like any +other function. The example programs include shift-and-add multiplication in +``examples/mul.x`` (and ``examples/mul2.x``) and long division in +``examples/div.x`` (which also yields the remainder). A program that needs these +operations includes the corresponding routine and calls it. diff --git a/docs/compiler/lexical-analyser.rst b/docs/compiler/lexical-analyser.rst index eed0ce6..e27efe0 100644 --- a/docs/compiler/lexical-analyser.rst +++ b/docs/compiler/lexical-analyser.rst @@ -1,4 +1,77 @@ Lexical analyser ================ -.. todo:: Write this page (Stage 4). +The lexical analyser turns the source text into a stream of tokens. Each call +to ``getNextToken`` consumes characters from the input and returns the next +``xcmp::Token``, recording any associated value (an identifier name, an integer +value, or a string) so that the parser can retrieve it. The token kinds and the +lexer live in ``src/xcmp.hpp``; the shared character-stream machinery is in +``src/lexer.hpp``. + +The character-stream base +------------------------- + +Both the assembler and the X compiler share a common scanning layer, +``hexlex::LexerBase`` in ``src/lexer.hpp``. It owns the input stream +(opened from a file with ``openFile`` or from memory with ``loadBuffer``), +tracks the current line and column for error reporting, and provides the +low-level helpers ``readChar``, ``skipWhitespace``, ``readIdentifier`` and +``readDecInt``. A language-specific lexer derives from it and supplies the +token-recognition logic by overriding ``readToken``: + +.. literalinclude:: ../../src/lexer.hpp + :language: cpp + :lines: 85-91 + +The name table and keywords +--------------------------- + +``xcmp::Lexer`` keeps a ``TokenTable`` mapping identifier strings to tokens. +The language keywords (``and``, ``array``, ``chan``, ``do``, ``if``, ``proc``, +``while``, and the rest) are pre-loaded into the table by ``declareKeywords`` +when the lexer is constructed. When an alphabetic character is seen, +``readIdentifier`` scans the whole word and ``TokenTable::lookup`` is used to +classify it: a keyword returns its reserved ``Token``, while any other word is +recorded as ``Token::IDENTIFIER`` (and the name is retrievable with +``getIdentifier``). There is no separate symbol/name table at this stage — +this single keyword table is all the lexer needs; meaning is attached to names +later, during :doc:`symbol-table construction `. + +Numbers, characters and strings +------------------------------- + +``readToken`` recognises three flavours of literal: + +* **Numbers.** A leading digit is scanned as a decimal integer by ``readDecInt``; + a ``#`` prefix selects a hexadecimal integer (``readHexInt``). Both yield + ``Token::NUMBER`` with the value available from ``getNumber``. + +* **Character constants.** A single-quoted character is read by + ``readCharConst``, which also handles the escape sequences ``\\``, ``\'``, + ``\"``, ``\t``, ``\r`` and ``\n``. A character constant produces a + ``Token::NUMBER`` carrying the character's code. + +* **Strings.** A double-quoted string is read by ``readString`` (a sequence of + ``readCharConst`` calls) and produces a ``Token::STRING``; the text is + retrievable with ``getString``. + +Punctuation and operators are handled by a ``switch`` on the current character. +Multi-character tokens are recognised by lookahead: ``<`` and ``<=``, ``>`` and +``>=``, ``~`` and ``~=``, and ``:=`` (a bare ``:`` is an error). Comments begin +with ``|`` and run to the end of the line, after which lexing continues +recursively. + +Relationship to the original +----------------------------- + +In the self-hosting compiler ``examples/xhexb.x`` the same job is done by a +group of procedures: ``rdline`` and ``rch`` read the input a line and a +character at a time, ``readnumber`` scans integer literals in a given base, +``readstring`` collects string literals, and ``nextsymbol`` is the main +dispatch that classifies the next symbol (keywords are installed by +``declsyswords``). The C++ ``readToken`` plays the role of ``nextsymbol``, and +``hexlex::LexerBase`` subsumes ``rch``/``rdline``. + +The corresponding source-language rules — what constitutes an identifier, a +number, a string and the comment syntax — are described in +:doc:`../language/lexical`. diff --git a/docs/compiler/memory-and-calling.rst b/docs/compiler/memory-and-calling.rst index 7812ac3..95024f4 100644 --- a/docs/compiler/memory-and-calling.rst +++ b/docs/compiler/memory-and-calling.rst @@ -1,4 +1,104 @@ Memory layout and calling convention ===================================== -.. todo:: Write this page (Stage 4). +This page describes how the compiler arranges memory and how it implements +procedure and function calls. It documents what ``src/xcmp.hpp`` actually emits; +the relevant constants are defined near the top of the code-generation section +(``SP_OFFSET``, ``MAX_ADDRESS``, ``SP_LINK_VALUE_OFFSET``, +``SP_RETURN_VALUE_OFFSET``, ``FB_PARAM_OFFSET_FUNC``, ``FB_PARAM_OFFSET_PROC``). + +The memory map +-------------- + +Word addresses run from low to high. The fixed low words and the data regions +are laid out as follows: + +* **Word 0** — a ``BR`` to the program ``start`` label. Booting an image means + jumping to word 0. +* **Word 1** — the stack pointer (``SP_OFFSET`` is ``1``). It is initialised by + the compiler to a word emitted in the ``SP_VALUE`` slot, with the value + ``MAX_ADDRESS - globalsOffset - 1`` (see ``LowerDirectives``), i.e. just below + the global arrays. +* **Globals, constants and strings** — the data section. Each global ``var`` + is a ``DATA 0`` word with its own label (``visitPost(VarDecl&)``); large + constants are pooled (``genConstPool``) and string literals are packed into + words (``genString``), each with a label. +* **Program** — the emitted instructions. +* **Stack** — grows downward from the initial stack pointer. +* **Arrays** — global arrays are allocated at the very top of memory. + ``visitPost(ArrayDecl&)`` allocates ``decl.getSize()`` words ending at + ``MAX_ADDRESS``, and stores the array's base address into the global word that + names it. A global array reference therefore loads a word whose contents are + the array's address. + +Constants that fit in a 16-bit operand are loaded directly with +``LDAC``/``LDBC`` (with PFIX/NFIX prefixes); larger constants are loaded from +the pooled data word with ``LDAM``/``LDBM`` (``genConst``). + +The stack frame +--------------- + +A ``Frame`` (one per procedure/function) tracks the running and maximum frame +size and the procedure's exit label. Storage is addressed as offsets from the +*frame base*, which is the first word above the callee's frame — that is, the +boundary with the caller's frame. The fixed slots at the base are: + +* offset 0 — the saved link (return) address (``SP_LINK_VALUE_OFFSET``); +* offset 1 — the return value slot, for functions (``SP_RETURN_VALUE_OFFSET``); +* the actual parameters, starting at ``FB_PARAM_OFFSET_PROC`` (``1``) for + procedures and ``FB_PARAM_OFFSET_FUNC`` (``2``) for functions, assigned in + order by ``FormalLocations``. + +Local variables and arrays are allocated *below* the frame base (negative +offsets) by ``LocalDeclLocations``, growing the frame size as they go. Because +the prologue extends the stack pointer by the whole frame size, accesses are +emitted against the frame base as ``LDAI_FB``/``LDBI_FB``/``STAI_FB`` and then +lowered by ``LowerDirectives`` to plain ``LDAI``/``LDBI``/``STAI`` with the +offset recomputed as ``frameSize - 1 + fbOffset``. + +Variable access then comes in two forms (``CodeBuffer::genVar``): + +* **Global**: ``LDAM label`` (or ``LDBM label``) — a direct memory load by + label. +* **Local**: ``LDAM 1`` (load the stack pointer from word 1) followed by an + indexed ``LDAI`` at the frame-relative offset. + +The calling sequence +-------------------- + +A call is built by ``genProcCall``/``genFuncCall`` (and ``genSysCall`` for +system calls). The caller evaluates the actual parameters and writes them into +the callee's parameter slots, just above the current stack pointer +(``loadActuals``). Arguments that themselves contain calls are evaluated first +into temporary stack words (``genCallActuals``) so that nested calls do not +clobber the parameter area before it is fully populated. The caller then loads a +link (return) address with ``LDAP`` and branches with ``BR`` to the callee; +``LDAP`` makes the return address PC-relative, which keeps calls position +independent. After a function call the result is read back from the return-value +slot with ``LDAM 1; LDAI 1``. + +On entry, the **prologue** (emitted from the ``PROLOGUE`` directive in +``LowerDirectives``) saves the link address and opens the frame: + +* ``LDBM 1; STAI 0`` — store the current stack pointer's link slot (offset 0); +* if the frame is non-empty, ``LDAC -frameSize; ADD; STAM 1`` — decrement the + stack pointer by the frame size (the stack grows downward). + +On exit, the **epilogue** (from the ``EPILOGUE`` directive) reverses this. For a +function it first stores the result (already in the A register) into the +caller-visible return-value slot with ``LDBM 1; STAI frameSize+1``; then, for +both procedures and functions, if the frame is non-empty it contracts the stack +pointer (``LDAC frameSize; ADD; STAM 1``), reloads the (unadjusted) stack +pointer, and returns with ``LDBI frameSize; OPR BRB`` — ``BRB`` branches to the +return address held in the B register. The procedure's exit label is the target +that ``ReturnStatement`` branches to, so all returns funnel through this single +epilogue. + +The program is bootstrapped by ``CodeGen::visitPre(Program&)``, which emits the +word-0 branch, the ``SP_VALUE`` placeholder, and a small entry sequence that +branches-and-links to ``main`` and, on return, performs an exit system call. +The ``stop`` statement compiles to the same exit syscall. + +See :doc:`../architecture/instruction-set` for the instructions used here and +:doc:`../architecture/registers` for the A, B and program-counter registers and +the role of ``BRB``. diff --git a/docs/compiler/networks.rst b/docs/compiler/networks.rst index bca7f93..75a544d 100644 --- a/docs/compiler/networks.rst +++ b/docs/compiler/networks.rst @@ -1,4 +1,76 @@ Network containers ================== -.. todo:: Write this page (Stage 4). +A single X program compiles to one processor image. When the program describes +a *network* of communicating processes — its ``main`` is a top-level ``par`` — +the compiler instead emits a **network container**: one image per processor, +plus a description of how the processors' channel link slots are wired together. +This page documents how the container is built and what it contains. The +language-level model is described in :doc:`../language/concurrency` and the +hardware channels in :doc:`../architecture/channels`. + +When a container is produced +---------------------------- + +In ``xcmp::Driver::run``, the binary stage first checks whether ``main`` is a +top-level ``par`` (``network::getTopLevelPar``). If it is not, the ordinary +single-image pipeline runs and a plain ``.bin`` is written. If it is, the driver +analyses the network and emits a container instead. The parser enforces that +each branch of a top-level ``par`` is a procedure call — the entry process for +one processor. + +Analysing the network +--------------------- + +``network::analyseNetwork`` (in ``src/xcmp.hpp``) turns the ``par`` into a +``Network`` of processors and wiring edges: + +* Each ``par`` branch becomes one processor, running the named entry procedure. + Its channel arguments are assigned **link slots** in argument order (a + processor may use at most four channels — the 4-link limit is checked). +* The body of each entry procedure is scanned (``ChannelDirections``) to see + which channel formals it writes (``!``) and which it reads (``?``). +* Channels are matched by the variable passed as the argument: every channel + must connect **exactly two** processes, with **exactly one writer and one + reader**. A violation raises a ``NetworkError``. Each validated channel + becomes an ``Edge`` recording the two ``(processor, slot)`` endpoints. + +Per-processor images +-------------------- + +For each processor, ``compileProcessorImage`` re-parses the source and rewrites +``main`` to a single call to that processor's entry procedure, passing its +link-slot indices ``0 .. n-1`` as constants, then runs the full single-image +pipeline. Each processor therefore boots straight into its own process, and a +channel formal is simply the integer index of the link slot it uses (a channel +formal is passed by value, like a ``val``). + +The container format +-------------------- + +The container is written by ``emitNetworkContainer`` and read back by +``hexcontainer::read`` in ``src/hexcontainer.hpp``. The reader is shared by the +C++ simulator ``hexsim`` and the Verilator testbench ``hextb`` so the two cannot +drift. The layout is little-endian: + +.. literalinclude:: ../../src/hexcontainer.hpp + :language: cpp + :lines: 16-25 + +That is: the magic word ``0x4E584548`` (the ASCII ``"HEXN"``, defined as +``network::CONTAINER_MAGIC`` in the compiler and ``hexcontainer::MAGIC`` in the +reader), the processor and edge counts, the edges (each four words: ``procA``, +``slotA``, ``procB``, ``slotB``), then each processor's image as a size-prefixed +standard single-image binary. + +Booting and running a network +----------------------------- + +Detection is by the magic word: ``hexcontainer::read`` treats a file that lacks +``"HEXN"`` as a single plain image (the whole file), and one that has it as a +network. ``hexsim`` and ``hextb`` use this to decide whether to boot a single +core or the whole network, wiring up the cores' link slots according to the +edges. ``hexsim`` reports the exit code of the first processor to halt, and +detects deadlock when every core is simultaneously blocked on a channel. The +on-disk single-image and container formats are catalogued in +:doc:`../tools/formats`. diff --git a/docs/compiler/overview.rst b/docs/compiler/overview.rst index 9dc0d91..4e41a79 100644 --- a/docs/compiler/overview.rst +++ b/docs/compiler/overview.rst @@ -1,4 +1,68 @@ The compiler ============ -.. todo:: Write this page (Stage 4). +The X compiler translates a program written in the :doc:`X language +<../language/overview>` into a binary image for the Hex processor's 8-bit +:doc:`instruction set <../architecture/instruction-set>`. It is deliberately +simple: it performs only a handful of local optimisations, so that the object +code tracks the structure of the source closely and is easy to follow when +reading a trace or disassembly. The generated code is also *position +independent* — branches are encoded as PC-relative offsets and the only +absolute addresses are those of statically-allocated data words — so an image +can be loaded at any base address. + +Two implementations +------------------- + +There are two compilers for X, which accept the same language: + +* ``examples/xhexb.x`` — the original *self-hosting* compiler, written in X + itself (around 3,000 lines). Because it is written in X it can compile its + own source; see :doc:`bootstrapping`. + +* ``src/xcmp.hpp`` plus ``src/lexer.hpp`` — a modern C++ reimplementation, + inspired by ``xhexb.x`` and the LLVM Kaleidoscope tutorial. The command-line + front end is ``tools/xcmp.cpp``. + +These reference pages document the C++ implementation in ``src/xcmp.hpp``, +which is the primary compiler used in the project, and note where its +structure differs from the original. + +The pipeline +------------ + +Compilation is driven by ``xcmp::Driver::run`` in ``src/xcmp.hpp``, which +threads the source through a sequence of passes. Each stage has its own +reference page: + +#. **Lexical analysis** — ``xcmp::Lexer`` (built on ``hexlex::LexerBase`` in + ``src/lexer.hpp``) turns source text into a stream of ``xcmp::Token`` + values. See :doc:`lexical-analyser`. + +#. **Syntax analysis** — ``xcmp::Parser`` is a recursive-descent parser that + builds an abstract syntax tree of ``xcmp::AstNode`` subclasses. See + :doc:`syntax-analyser`. + +#. **Semantic passes and optimisation over the tree** — ``CreateSymbols`` + populates a ``SymbolTable``, ``ConstProp`` folds constant expressions, and + ``OptimiseExpr`` normalises relational and unary operators. See + :doc:`translator`. + +#. **Translation (code generation)** — ``CodeGen`` walks the tree and emits a + sequence of intermediate directives into a ``CodeBuffer``; ``LowerDirectives`` + resolves frames and calling sequences, and ``OptimiseDirectives`` cleans up + the instruction stream. See :doc:`translator`, :doc:`codegen-idioms` and + :doc:`memory-and-calling`. + +#. **Assembly and emission** — the directive stream is handed to + ``hexasm::CodeGen`` (in ``src/hexasm.hpp``), which iteratively resolves + branch offsets and emits the binary image. See :doc:`codebuffer`. + +The same ``Driver`` can stop after any stage to report the intermediate form +(tokens, tree, intermediate instructions, lowered or optimised directives, or +assembly text); these correspond to the ``DriverAction`` values and the +command-line options of ``tools/xcmp.cpp`` (``--tokens``, ``--tree``, +``--insts``, ``-S`` and so on). + +When ``main`` is a top-level ``par``, the binary stage instead emits a +*network container* with one image per processor; see :doc:`networks`. diff --git a/docs/compiler/syntax-analyser.rst b/docs/compiler/syntax-analyser.rst index edbf84b..673cbcf 100644 --- a/docs/compiler/syntax-analyser.rst +++ b/docs/compiler/syntax-analyser.rst @@ -1,4 +1,87 @@ Syntax analyser =============== -.. todo:: Write this page (Stage 4). +The syntax analyser reads the token stream and builds an abstract syntax tree +(AST). It is a hand-written *recursive-descent* parser, ``xcmp::Parser`` in +``src/xcmp.hpp``: each grammar production corresponds to a parsing method, and +the structure of those methods mirrors the structure of the +:doc:`grammar <../language/grammar>`. + +Recursive descent +----------------- + +``Parser`` holds a reference to the lexer and drives it one token at a time. +The helper ``expect`` checks that the current token is the one required and +advances past it (raising ``UnexpectedTokenError`` otherwise), and +``parseIdentifier`` reads a name. The entry point ``parseProgram`` parses the +global declarations followed by the procedure and function declarations: + +.. literalinclude:: ../../src/xcmp.hpp + :language: cpp + :lines: 1961-1969 + +From there the methods descend through the grammar: +``parseGlobalDecls``/``parseLocalDecls`` and ``parseDecl`` for ``val``, ``var``, +``array`` and ``chan`` declarations; ``parseProcDecl`` for procedure and +function bodies (including their formals and local declarations); +``parseStatement``/``parseStatements`` for the statement forms; and +``parseExpr``/``parseElement`` for expressions. + +Expressions are kept simple: there is no operator precedence. ``parseExpr`` +reads a leading element, optionally negated by a unary ``-`` or ``~``, and at +most one binary operator. Chains are only permitted for the associative +operators ``+``, ``and`` and ``or`` (see ``isAssociative`` and +``parseBinOpRHS``); any other combination of operators must be explicitly +bracketed. ``parseElement`` handles the atoms: variable references, array +subscripts, procedure/function calls, numeric system calls, numbers, strings, +``true``/``false`` and parenthesised expressions. + +The AST +------- + +The tree is a hierarchy of C++ classes rooted at ``xcmp::AstNode``. This is a +significant departure from the original ``xhexb.x`` compiler, which represents +the tree as tagged vectors built by the ``cons1`` … ``cons4`` constructor +functions; in the C++ version each node kind is its own class with typed +fields. The main families are: + +* **Expressions** (``Expr`` and subclasses): ``BinaryOpExpr``, ``UnaryOpExpr``, + ``NumberExpr``, ``BooleanExpr``, ``StringExpr``, ``VarRefExpr``, + ``ArraySubscriptExpr`` and ``CallExpr``. Every ``Expr`` carries an optional + constant value, set later by constant propagation (``isConst``/``getValue``). + +* **Declarations** (``Decl``): ``ValDecl``, ``VarDecl``, ``ArrayDecl`` and + ``ChanDecl``. + +* **Formals** (``Formal``): ``ValFormal``, ``VarFormal``, ``ArrayFormal``, + ``ProcFormal``, ``FuncFormal`` and ``ChanFormal``. + +* **Statements** (``Statement``): ``SkipStatement``, ``StopStatement``, + ``ReturnStatement``, ``IfStatement``, ``WhileStatement``, ``SeqStatement``, + ``CallStatement``, ``AssStatement``, ``ParStatement``, ``OutStatement`` and + ``InStatement``. + +* **Top level**: a ``Proc`` (used for both procedures and functions, with an + ``isFunction`` flag) holds its formals, local declarations and body + statement; a ``Program`` holds the global declarations and the list of + ``Proc`` definitions. + +For example, parsing a statement that begins with an identifier produces a +``CallStatement``, an ``OutStatement`` (``!``), an ``InStatement`` (``?``) or an +``AssStatement`` depending on what follows the element: + +.. literalinclude:: ../../src/xcmp.hpp + :language: cpp + :lines: 1871-1895 + +The visitor pattern +------------------- + +Each node implements ``accept(AstVisitor *)``, which calls the visitor's +``visitPre`` method, recurses into children, then calls ``visitPost``. All the +later passes — symbol-table construction, constant propagation, the expression +optimiser, code generation and the AST printer — are written as subclasses of +``AstVisitor``. A visitor can also replace an expression node in place (via +``setExprReplacement``), which is how the optimiser rewrites the tree. The +``--tree`` option of ``tools/xcmp.cpp`` runs the ``AstPrinter`` visitor to dump +the parsed tree. diff --git a/docs/compiler/translator.rst b/docs/compiler/translator.rst index 446930e..8fb2fd8 100644 --- a/docs/compiler/translator.rst +++ b/docs/compiler/translator.rst @@ -1,4 +1,121 @@ Translator ========== -.. todo:: Write this page (Stage 4). +The translator turns the abstract syntax tree into a sequence of Hex +instructions. In ``src/xcmp.hpp`` this is not a single pass but a short +sequence of tree visitors that prepare the tree, followed by the ``CodeGen`` +visitor that emits intermediate directives into a ``CodeBuffer``, and two +lowering passes that turn those directives into concrete instructions. This +page covers scope handling, the optimisations, and how statements, expressions +and control flow are generated. The memory map and the calling sequence are +described separately in :doc:`memory-and-calling`, and the individual +instruction sequences in :doc:`codegen-idioms`. + +Symbols and scope +----------------- + +Names are resolved through a ``SymbolTable``, keyed by a +``(scope, name)`` pair. The scope is either the empty string (global) or the +name of the enclosing procedure; X has only two levels of scope, so a flat map +is sufficient. The ``CreateSymbols`` visitor walks the whole tree first and +inserts a ``Symbol`` for every declaration, formal, procedure and function, +recording its ``SymbolType`` (``VAL``, ``VAR``, ``ARRAY``, ``FUNC``, ``PROC`` +or ``CHAN``) and a pointer back to its AST node. ``SymbolTable::lookup`` tries +the current scope first and falls back to the global scope, so a procedure can +refer to globals as well as its own formals and locals. + +Tree optimisations +------------------ + +Two visitors run over the tree before code generation. Both are intentionally +local and simple. + +**Constant propagation** (``ConstProp``). This pass annotates every ``Expr`` +that can be evaluated at compile time with its constant value. ``NumberExpr`` +and ``BooleanExpr`` are constant by definition; a ``VarRefExpr`` or a ``call`` +that resolves to a ``val`` declaration takes that declaration's value; and a +``BinaryOpExpr`` or ``UnaryOpExpr`` whose operands are all constant is folded +to a single value. The fold covers ``+``, ``-``, all six relational operators, +``and``, ``or``, unary minus and unary ``~``: + +.. literalinclude:: ../../src/xcmp.hpp + :language: cpp + :lines: 2153-2196 + +The same pass also validates and resolves system calls: a numeric callee, or a +``val`` whose value names a syscall, is checked against ``hex::Syscall``. + +**Expression normalisation** (``OptimiseExpr``). This pass rewrites the +relational and unary operators so that code generation only has to deal with +``<``, ``=`` and ``~``. The rewrites are: + +* ``a ~= b`` becomes ``~(a = b)`` +* ``a >= b`` becomes ``~(a < b)`` +* ``a > b`` becomes ``b < a`` +* ``a <= b`` becomes ``~(b < a)`` +* unary ``-x`` becomes ``0 - x`` + +Each rewrite installs a replacement node via ``setExprReplacement``, which the +parent node then moves into place. There is no general algebraic +simplification, common-subexpression elimination or strength reduction — the +compiler does only what is described here, keeping the object code close to the +source. + +Code generation +--------------- + +``CodeGen`` walks the tree and emits an *intermediate* instruction stream into +a ``CodeBuffer``. Real Hex instructions (``LDAM``, ``LDAC``, ``ADD``, ``BR``, +``OPR`` and so on) are emitted directly through the ``gen…`` helpers, but +frame-relative accesses and the procedure prologue/epilogue are emitted as +*placeholder* directives (``LDAI_FB``/``STAI_FB``, ``PROLOGUE``, ``EPILOGUE``, +``SP_VALUE``) whose concrete form depends on the final frame size, which is not +yet known. These are concrete subclasses of ``hexasm::Directive`` +(``IntermediateDirective``, ``InstrStackOffset``, ``Prologue``, ``Epilogue``, +``SPValue``). + +Expressions are generated by the nested ``ExprCodeGen`` visitor, which targets +the A or B register, and statements by ``StmtCodeGen``. Per-procedure setup in +``CodeGen::visitPre(Proc&)`` creates a ``Frame``, assigns stack offsets to +formals (``FormalLocations``) and locals (``LocalDeclLocations``), and emits the +prologue before the body; ``visitPost(Proc&)`` emits the epilogue. + +Control flow +------------ + +Control flow is threaded with generated labels (``CodeBuffer::getLabel`` +produces ``lab0``, ``lab1``, …) and the conditional branches ``BRZ`` (branch +if A is zero) and ``BRN`` (branch if A is negative): + +* ``IfStatement`` emits the condition, a ``BRZ`` to the else/end label, the + then-branch, a ``BR`` past the else, then the else-branch. A ``skip`` in + either arm is recognised and that arm's code (and branch) is omitted. + +* ``WhileStatement`` emits a begin label, the condition, a ``BRZ`` to the end + label, the body, and a ``BR`` back to the begin label. + +* ``ReturnStatement`` evaluates the result expression and branches to the + current frame's exit label (set up by the prologue/epilogue). Tail calls are + not specially handled — the code contains ``TODO`` notes acknowledging this — + so a ``return f(...)`` is an ordinary call followed by a branch to the exit. + +* Procedure and function calls go through ``genProcCall``/``genFuncCall``, + which load actual parameters into the callee's frame, branch with a link + address (``LDAP`` + ``BR``), and (for functions) read the result back. The + details are in :doc:`memory-and-calling`. + +Lowering and directive optimisation +----------------------------------- + +After ``CodeGen``, ``LowerDirectives`` replaces every placeholder directive +with concrete instructions now that frame sizes are known: ``SP_VALUE`` becomes +the initial stack-pointer word followed by the data section, ``PROLOGUE`` and +``EPILOGUE`` expand to the stack-pointer adjustment and return sequence, and the +``*_FB`` accesses are converted to plain ``LDAI``/``LDBI``/``STAI`` with the +offset recomputed from the final frame size. + +Finally ``OptimiseDirectives`` runs a small peephole pass over the lowered +stream, matching three patterns: a ``BR`` to the immediately following label +(dropped), a ``STAM x; LDAM x`` redundant reload (the load dropped), and the +array store/reload sequence ``LDBM 1; STAI x; LDAM 1; LDAI x`` (the reload +dropped). The result is handed to the assembler; see :doc:`codebuffer`. From 03b1eeb101b247898d6463ef98583121a10d9a24 Mon Sep 17 00:00:00 2001 From: James Hanlon Date: Sun, 14 Jun 2026 18:32:54 +0100 Subject: [PATCH 08/10] docs: write the RTL implementation reference pages --- docs/hardware/core.rst | 138 ++++++++++++++++++++++++++++- docs/hardware/memory-and-links.rst | 94 +++++++++++++++++++- docs/hardware/network.rst | 89 ++++++++++++++++++- docs/hardware/overview.rst | 105 +++++++++++++++++++++- docs/hardware/testbench.rst | 93 ++++++++++++++++++- 5 files changed, 514 insertions(+), 5 deletions(-) diff --git a/docs/hardware/core.rst b/docs/hardware/core.rst index b214f98..de04a44 100644 --- a/docs/hardware/core.rst +++ b/docs/hardware/core.rst @@ -1,4 +1,140 @@ Processor core ============== -.. todo:: Write this page (Stage 5). +The processor datapath lives in ``rtl/processor.sv``. It implements the abstract +execution cycle of :doc:`../architecture/execution` directly: each clock edge +fetches one instruction, decodes it, and updates the architectural registers. +There is no pipeline — every register is updated combinationally from the +current instruction and the current register values, and the whole next state is +committed on the rising clock edge. + +Registers and state +------------------- + +The processor holds exactly the four architectural registers of +:doc:`../architecture/registers`, each as a flip-flop with a combinational +"next value": + +.. list-table:: + :header-rows: 1 + :widths: 20 25 55 + + * - Register + - State / next + - Role + * - PC + - ``pc_q`` / ``pc_d`` + - Program counter (a byte address, ``MEM_ADDR_WIDTH`` = 21 bits). + * - AREG + - ``areg_q`` / ``areg_d`` + - Accumulator and ALU result; also the value sent on ``OUT`` and the + received value on ``IN``. + * - BREG + - ``breg_q`` / ``breg_d`` + - Second ALU operand and index register; its low bits select the channel + slot on a channel op. + * - OREG + - ``oreg_q`` / ``oreg_d`` + - Operand register accumulated by ``PFIX`` / ``NFIX``. + +All four are reset to zero and, on every non-stalled cycle, take their ``_d`` +values: + +.. literalinclude:: ../../rtl/processor.sv + :language: verilog + :lines: 48-59 + +Instruction fetch and decode +---------------------------- + +Fetch is trivial: the processor drives ``o_f_addr = pc_q`` with ``o_f_valid`` +permanently high, and the memory returns the addressed byte on ``i_f_data``. The +byte is typed as ``hex_pkg::instr_t``, a packed struct of a 4-bit ``opcode`` and +a 4-bit ``operand``, so decode is just field extraction: + +.. literalinclude:: ../../rtl/processor.sv + :language: verilog + :lines: 61-69 + +The shared opcode values come from ``rtl/hex_pkg.sv``, which is the single source +of truth for the encoding documented in :doc:`../architecture/instruction-set`: + +.. literalinclude:: ../../rtl/hex_pkg.sv + :language: verilog + :lines: 27-58 + +These enumerators carry exactly the opcode numbers in the instruction-set +reference — ``LDAM = 0x0`` … ``NFIX = 0xF`` — so the RTL ``unique case`` +statements over ``instr.opcode`` correspond directly to the table there. The +register-to-register operations live behind ``OPR`` (``0xD``) and are selected by +the operand via ``opr_opcode_t`` (``ADD``, ``SUB``, ``SVC``, ``IN``, ``OUT``). +``instr_svc`` / ``instr_in`` / ``instr_out`` are decoded as ``OPR`` with the +matching operand. + +The operand register and PFIX/NFIX +---------------------------------- + +Every instruction's effective operand is ``opr_d = oreg_q | instr.operand`` — the +accumulated prefix bits OR'd with the current instruction's 4-bit operand field. +``PFIX`` updates ``oreg_d = opr_d << 4`` to shift the accumulated value up a +nibble, and ``NFIX`` does the same but with the sign bits set +(``32'hFFFFFF00 | (opr_d << 4)``) so that negative immediates can be built. For +any non-prefix instruction ``oreg_d`` is cleared back to zero, so the operand +accumulation is consumed by the instruction that follows the prefix chain. This +is the hardware realisation of the PFIX/NFIX scheme in +:doc:`../architecture/instruction-encoding`. + +The A/B multiplexors and the ALU +-------------------------------- + +Each register's next value is a multiplexor over the opcode. ``areg_d`` is the +busiest: it selects between a data-memory word (``LDAM`` / ``LDAI``), the operand +(``LDAC``), a PC-relative address (``LDAP``), the received channel word +(``IN``), and the ALU outputs for ``ADD`` / ``SUB``: + +.. literalinclude:: ../../rtl/processor.sv + :language: verilog + :lines: 108-125 + +The ALU is just the ``areg_q + breg_q`` and ``areg_q - breg_q`` adders inside +this multiplexor — there is no separate ALU block. ``breg_d`` is a smaller mux +(``LDBM`` / ``LDBI`` from memory, ``LDBC`` from the operand), and ``pc_d`` +defaults to ``pc_q + 1`` but is overridden by the branch opcodes (``BR``, +``BRZ``, ``BRN``, and ``OPR BRB`` which jumps to ``breg``). + +Memory and syscall interface +---------------------------- + +The data-memory port is driven combinationally from the decode. ``o_d_valid`` is +asserted for the load/store opcodes, ``o_d_we`` for the two stores +(``STAM`` / ``STAI``), and ``o_d_addr`` is computed per opcode — a direct operand +address for the ``*M`` forms and a base-plus-offset (``areg``/``breg`` plus +operand) for the indexed ``*I`` forms. Stores always present ``areg_q`` on +``o_d_data``. Supervisor calls are surfaced by asserting ``o_syscall_valid`` on +``instr_svc`` with ``o_syscall = syscall_t'(areg_q)``; the testbench services the +call (see :doc:`../architecture/syscalls`). + +Channel operations and the stall path +------------------------------------- + +``IN`` and ``OUT`` are the only instructions that can take more than one cycle. +The processor hands the operation off to the per-core link interface and freezes +while the rendezvous completes: + +.. literalinclude:: ../../rtl/processor.sv + :language: verilog + :lines: 71-78 + +The channel slot is taken from the low ``SLOT_W`` bits of ``breg_q`` and the word +to send from ``areg_q``. While the link interface reports ``i_liu_busy``, the +processor asserts ``stall``, which gates the state-register update shown above so +that *all* of PC/AREG/BREG/OREG hold their values. When the link interface +completes, ``stall`` drops; on ``IN`` the received word reaches ``areg`` via the +``areg_d = i_liu_in_word`` mux arm, and the default ``pc_d = pc_q + 1`` then +advances past the instruction. The mechanics of the rendezvous itself are +covered in :doc:`memory-and-links`, and the channel semantics in +:doc:`../architecture/channels`. + +In the single-core ``hex`` top (``rtl/hex.sv``) these channel ports are tied off +— ``i_liu_busy`` is held low and the outputs left unconnected — so a sequential +image never stalls on a channel op. diff --git a/docs/hardware/memory-and-links.rst b/docs/hardware/memory-and-links.rst index 648ddb6..2b30da7 100644 --- a/docs/hardware/memory-and-links.rst +++ b/docs/hardware/memory-and-links.rst @@ -1,4 +1,96 @@ Memory and links ================ -.. todo:: Write this page (Stage 5). +A core couples the processor to two stateful units: the private ``memory`` it +fetches from and stores to, and the ``link_interface`` that turns the +processor's ``IN`` / ``OUT`` hand-off into messages on the network. Both are +instantiated inside ``rtl/core.sv`` alongside the processor. + +Memory +------ + +``rtl/memory.sv`` is a single-port-per-function model: a read-only instruction +fetch port and a read/write data port over one register array. + +.. literalinclude:: ../../rtl/memory.sv + :language: verilog + :lines: 1-15 + +The storage is word-addressed — ``memory_q`` is an array of ``MEM_WIDTH`` (32) +bit words, ``MEM_DEPTH`` deep — and writes are word writes: on a clocked +``i_d_valid && i_d_we`` the 32-bit ``i_d_data`` is stored at the word address +``i_d_addr``. The data read port is purely combinational +(``o_d_data = memory_q[i_d_addr]``), so a load returns its word in the same cycle +the processor presents the address. + +Instructions, however, are bytes (see +:doc:`../architecture/instruction-encoding`), so the fetch port extracts one byte +from the addressed word. The fetch address ``i_f_addr`` is a *byte* address; its +top bits select the word and its low two bits select the byte within that word: + +.. literalinclude:: ../../rtl/memory.sv + :language: verilog + :lines: 24-28 + +``fetch_byte_addr`` turns the byte offset ``i_f_addr[1:0]`` into a bit offset +(``<< 3``, i.e. ×8) and the indexed part-select ``[fetch_byte_addr +: 8]`` reads +the corresponding 8-bit instruction out of the 32-bit word. This is why the +processor's program counter is a byte address while its data addresses are word +addresses. + +The link interface +------------------ + +``rtl/link_interface.sv`` (the "LIU") is the per-core messaging engine. It +realises the synchronous channel rendezvous of :doc:`../architecture/channels` in +hardware: the first party to a channel blocks until the other arrives, the word +is exchanged exactly once, and both then continue. Its port list shows the three +faces it presents — the processor side, the reset-time route-table config, and +the DATA and ACK network ports: + +.. literalinclude:: ../../rtl/link_interface.sv + :language: verilog + :lines: 1-34 + +Route table and receive buffers +------------------------------- + +Each core's logical channel slots (``0`` … ``NUM_LINKS-1``) are *addresses*, not +wires. The LIU holds a small **route table** mapping each slot to a +``(dst_core, dst_slot)`` pair; it is written at reset over the ``i_cfg_*`` port +from the network container's edges (so the same binary runs unchanged on +``hexsim`` and on the RTL). It also holds **per-slot receive buffers** — one +one-word register per slot (``rx_valid`` / ``rx_src`` / ``rx_word``). Because +each ``(core, slot)`` channel has a single writer with at most one outstanding +message, one buffer per slot can never overflow, and indexing by ``dst_slot`` +stops a message parked for one channel from blocking another. + +The rendezvous handshake +------------------------ + +The processor drives ``i_op_out`` / ``i_op_in`` with the slot in ``i_slot`` and +the word in ``i_areg``, and watches ``o_busy`` (which becomes the processor's +``stall``), ``o_done``, and ``o_in_word``. Internally a four-state FSM +(``IDLE``, ``OUT_SEND``, ``OUT_WAIT``, ``IN_ACK``) runs the protocol against the +two networks: + +* **OUT** — from ``IDLE`` the FSM enters ``OUT_SEND`` and asserts ``o_dnet_valid`` + with a DATA flit ``{dst_core, dst_slot, src_core=i_core_id, word=i_areg}`` taken + from the route table. When the DATA network accepts it (``i_dnet_in_ready``) it + moves to ``OUT_WAIT`` and the processor stays stalled until the matching **ACK** + arrives on ``i_anet_valid``, at which point ``o_done`` pulses and it returns to + ``IDLE``. +* **IN** — the LIU continuously accepts delivered DATA flits into the addressed + slot buffer (``o_dnet_out_ready`` is high whenever that slot is empty), + independent of the FSM. An ``IN`` on a slot blocks in ``IDLE`` until that slot's + ``rx_valid`` is set; it then enters ``IN_ACK``, presents the buffered word on + ``o_in_word``, and emits an ACK addressed back to the sender (``rx_src``). When + the ACK network takes the ACK (``i_anet_in_ready``) it pulses ``o_done``, clears + the slot, and returns to ``IDLE``. + +The decisive rule is **ACK-on-consume**: the writer only unblocks because the +reader executed its ``IN`` and consumed the word — not merely because the word +landed in a buffer. That is exactly the occam/X synchronous channel semantics of +:doc:`../architecture/channels`, preserved despite the buffering in the network. +The split into separate DATA and ACK networks (see :doc:`network`) keeps ACKs +from ever queuing behind DATA, which is what makes the transport deadlock-free. diff --git a/docs/hardware/network.rst b/docs/hardware/network.rst index 9fa6aaf..5b4339a 100644 --- a/docs/hardware/network.rst +++ b/docs/hardware/network.rst @@ -1,4 +1,91 @@ The multi-core network ====================== -.. todo:: Write this page (Stage 5). +To run X programs with ``par`` and channels on the RTL, ``rtl/network_top.sv`` +assembles ``NUM_CORES`` cores around a pair of routers. Each core (see +:doc:`core` and :doc:`memory-and-links`) injects messages into, and is delivered +messages from, a central crossbar. The design follows the transputer +T9000 + C104 model at small scale: a routing switch carrying buffered, +addressed packets, with every message acknowledged end-to-end so that channel +communication stays synchronous. There is deliberately no virtual-channel +multiplexing. + +The router +---------- + +``rtl/router.sv`` is a stateless ``N``×``N`` address forwarder, parameterised by +flit width (``FLIT_W``) and instantiated twice — once per network. Each of the +``NUM_CORES`` input ports has a one-deep registered input buffer; each output +port runs a per-output round-robin arbiter over the input buffers whose +``dst_core`` names that output: + +.. literalinclude:: ../../rtl/router.sv + :language: verilog + :lines: 1-15 + +There is no routing table inside the router: a flit's ``dst_core`` field +*directly* selects the output port. The arbiter (``rr[j]`` is the round-robin +pointer for output ``j``) grants one flit per output per cycle and registers it +to that output, freeing the winning input buffer. A stalled output +(``i_out_ready`` low) holds its flit and back-pressures the input, so flits are +never lost or overwritten; one hop costs one cycle, which keeps timing clean and +avoids combinational loops through the crossbar. + +The network top +--------------- + +``rtl/network_top.sv`` wires core ``k`` to router input port ``k`` and router +output port ``k``, for both a **DATA** network and an **ACK** network: + +.. literalinclude:: ../../rtl/network_top.sv + :language: verilog + :lines: 81-103 + +Splitting DATA and ACK into two independent routers means an acknowledgement can +never be stuck behind data — the classic request/response deadlock — so the +transport itself is deadlock-free; the only deadlock that can remain is a genuine +occam-style cycle of processes all blocked on channel ops. + +Configuration vs. wiring +------------------------ + +The physical wiring is *static*: ``NUM_CORES`` cores, two ``N``×``N`` crossbars, +fixed port assignments. The *topology* a particular program needs — which core's +slot talks to which other core's slot — is realised entirely by +**configuration**, not by re-elaborating the RTL. At reset the testbench reads +the network container's edges and programs each core's route table over the +``i_cfg_*`` port (``network_top`` decodes ``i_cfg_core`` to a per-core write +enable). This is the hardware counterpart of the compiler's container wiring +described in :doc:`../compiler/networks`: the compiler emits ``(procA, slotA, +procB, slotB)`` edges, and those same edges become the route-table entries that +tell each LIU where its slots lead. One elaboration runs any topology that fits +in ``NUM_CORES``. + +Flits +----- + +Messages are single-flit, one word per message. The flit structs are defined in +``rtl/hex_pkg.sv``: + +.. literalinclude:: ../../rtl/hex_pkg.sv + :language: verilog + :lines: 65-81 + +A **DATA** flit carries ``dst_core`` (selects the router output), ``dst_slot`` +(which receive buffer at the destination), ``src_core`` (so the reader can +address the ACK back), and the 32-bit ``word``. An **ACK** flit is just a +``dst_core`` — the original ``src_core`` — because a writer has at most one +outstanding ``OUT``, so an ACK arriving at a core unambiguously completes it. The +testbench mirrors this packing in ``tests/rtl/flit_layout.hpp``: the DATA flit is +a 38-bit value laid out, MSB to LSB, as ``[37:36] dst_core``, ``[35:34] +dst_slot``, ``[33:32] src_core``, ``[31:0] word``, and the ACK flit is the 2-bit +``dst_core``. + +Putting it together, sending ``c ! x`` from a writer to a reader's ``c ? v`` is: +the writer's LIU injects a DATA flit toward the reader and stalls; the DATA +router delivers it into the reader's slot buffer; the reader's ``IN`` consumes +the word and injects an ACK flit toward the writer; the ACK router delivers it +and the writer unblocks. Both cores have then synchronised on the communication — +exactly the rendezvous of :doc:`../architecture/channels`. + +.. todo:: Add a network topology diagram. diff --git a/docs/hardware/overview.rst b/docs/hardware/overview.rst index 109b4b2..ed74cd0 100644 --- a/docs/hardware/overview.rst +++ b/docs/hardware/overview.rst @@ -1,4 +1,107 @@ The RTL implementation ====================== -.. todo:: Write this page (Stage 5). +The ``rtl/`` directory holds a SystemVerilog implementation of the Hex +processor. It is deliberately small and direct: the processor datapath is a +single combinational decode-and-execute block clocked once per instruction, +with no pipeline, no caches, and no resource virtualisation. The whole core +(:doc:`processor ` plus :doc:`memory and link interface +`) is a few hundred lines of synthesizable RTL, and the +single-instruction-per-cycle model maps one-to-one onto the abstract execution +cycle described in :doc:`../architecture/execution`. + +The implementation comes in two flavours that share the same datapath: + +* a **single-core** top, :doc:`hex.sv `, which is one processor plus its + private memory and is used for running an individual image; and +* a **multi-core network**, :doc:`network_top.sv `, which wraps the + processor in a ``core`` together with a :doc:`link interface + ` and wires ``NUM_CORES`` of them to a pair of + :doc:`routers ` so that ``par`` programs with channels run on real + hardware. + +Module map +---------- + +.. list-table:: + :header-rows: 1 + :widths: 25 75 + + * - File + - Role + * - ``rtl/hex_pkg.sv`` + - Shared package: parameters, the ``opcode_t`` / ``opr_opcode_t`` / + ``syscall_t`` enumerations, address and data typedefs, and the network + flit structs. Every other module imports it. + * - ``rtl/processor.sv`` + - The processor datapath: registers, instruction decode, the A/B + multiplexors and ALU, memory address generation, and the channel hand-off + to the link interface. See :doc:`core`. + * - ``rtl/memory.sv`` + - Word-addressed memory with a read-only byte-wide instruction-fetch port + and a read/write word data port. See :doc:`memory-and-links`. + * - ``rtl/link_interface.sv`` + - The per-core messaging engine that realises the ``IN`` / ``OUT`` + rendezvous in hardware. See :doc:`memory-and-links`. + * - ``rtl/core.sv`` + - A single core: ``processor`` + ``memory`` + ``link_interface``, exposing + the syscall, route-table config, and DATA/ACK network ports. See + :doc:`core`. + * - ``rtl/router.sv`` + - An ``N``×``N`` address-forwarding crossbar, instantiated once for the + DATA network and once for the ACK network. See :doc:`network`. + * - ``rtl/network_top.sv`` + - The multi-core top: ``NUM_CORES`` cores plus the DATA and ACK routers. + See :doc:`network`. + * - ``rtl/hex.sv`` + - The single-core top: one ``processor`` + ``memory``, with channel ports + tied off. See :doc:`core`. + * - ``rtl/processor.v`` + - A plain-Verilog (Verilog-2005) variant of ``processor.sv``, generated by + ``sv2v``. See below. + +Module hierarchy +---------------- + +The two tops instantiate the shared leaf modules as follows:: + + hex (single-core top) + |- processor + |- memory + + network_top (multi-core top) + |- core (x NUM_CORES, generate loop g_core[k].u_core) + | |- processor + | |- memory + | `- link_interface + |- router u_dnet (DATA network) + `- router u_anet (ACK network) + +In the single-core ``hex`` top the processor's channel ports are left +unconnected and ``i_liu_busy`` is tied low, so a ``IN`` / ``OUT`` instruction +never stalls — that top is for sequential images only. The multi-core +``network_top`` instead gives each processor a real ``link_interface`` and +connects every core to both routers; this is the configuration the +:doc:`testbench ` drives. + +The plain-Verilog variant +-------------------------- + +``rtl/processor.v`` is a Verilog-2005 translation of ``processor.sv`` produced +by ``sv2v`` (note the ``// Generated using sv2v`` header). It exists so that the +processor datapath can be fed to tools that do not accept SystemVerilog (older +synthesis flows, for example). It is a derivative of ``processor.sv`` and is not +the authoritative source: the SystemVerilog files are. The translation flattens +the package parameters into local constants and lowers the packed +``instr_t`` struct into explicit bit-slices of an 8-bit ``instr`` wire, but the +behaviour is identical to ``processor.sv``. + +Reading order +------------- + +The remaining hardware pages follow the datapath outwards: + +* :doc:`core` — the processor and its decode/execute cycle. +* :doc:`memory-and-links` — the memory model and the per-core link interface. +* :doc:`network` — the router and the multi-core network. +* :doc:`testbench` — the Verilator testbench and the RTL unit tests. diff --git a/docs/hardware/testbench.rst b/docs/hardware/testbench.rst index e98eed3..b19b444 100644 --- a/docs/hardware/testbench.rst +++ b/docs/hardware/testbench.rst @@ -1,4 +1,95 @@ Testbench ========= -.. todo:: Write this page (Stage 5). +The RTL is exercised at two levels: ``hextb``, a Verilator front-end that runs a +whole image or network container on the multi-core network, and a set of small +C++ unit testbenches under ``tests/rtl/`` that check individual modules. + +Verilator +--------- + +All of the RTL testing is driven through `Verilator +`_, which compiles the SystemVerilog into a +cycle-accurate C++ model. The project targets a recent Verilator (the 5.x +series); ``≥ ~4.200`` is the practical floor, and distribution packages older +than that (for example the 4.038 shipped by some apt repositories) are too old. +The build uses a suitable system Verilator if one is found and otherwise fetches +and builds a pinned version automatically — see :doc:`../tools/building` for the +toolchain setup and :doc:`../tools/testing` for running the suite. + +The hextb tool +-------------- + +``tools/hextb.cpp`` is the Verilator testbench for the full system. It elaborates +``network_top`` (as ``Vntb``) and runs either a single image or a multi-core +network container on it. Its flow is: + +#. **Load.** Read the container, fill every core's memory with a quiescent halt + loop, then copy each image's code into its core's ``memory_q`` (reached through + the generate-loop instance names, e.g. ``g_core[k].u_core``). A plain binary is + one image; a ``par`` program is a container of several. +#. **Configure routes.** Hold reset for a few cycles, then for each container edge + program both endpoints' route tables via the ``i_cfg_*`` port — the same + ``(procA, slotA, procB, slotB)`` edges the compiler emitted (see + :doc:`../compiler/networks`) — then release reset. +#. **Run.** Clock the design, and each cycle service any core asserting + ``o_syscall_valid``. Syscalls (``EXIT`` / ``WRITE`` / ``READ``) read their + arguments from that core's own memory through the shared ``HexSimIO``; the + first ``EXIT`` sets the system exit code. +#. **Detect deadlock.** A channel rendezvous legitimately freezes the + participating cores' PCs for a few cycles. The harness only reports deadlock + after a sustained stretch (``DEADLOCK_THRESHOLD`` cycles) with no PC change and + no syscall on any core; ``--max-cycles`` is a runaway backstop. + +Because the same container runs unchanged on both ``hexsim`` and ``hextb``, and +``hexsim`` is a semantically simple functional model of the channel rendezvous, +``hexsim`` serves as an independent golden model: the two must produce identical +program output and exit code even though their channel *timing* differs. + +Command-line usage:: + + hextb [-t|--trace] [--max-cycles N] + +where ``file`` is a binary or network container, ``-t`` enables per-core PC +tracing, and ``--max-cycles`` bounds the simulation (``0`` = unbounded). Passing +``+trace`` to the underlying Verilated model dumps a VCD waveform. + +RTL unit testbenches +-------------------- + +``tests/rtl/`` holds focused, self-checking testbenches — each instantiates one +Verilated module, drives it directly, and ``assert``\ s on the results: + +.. list-table:: + :header-rows: 1 + :widths: 28 72 + + * - File + - What it exercises + * - ``core_tb.cpp`` + - A whole ``core`` (``Vcore``): loads a tiny program that does ``OUT`` on a + configured slot, checks the injected DATA flit's fields, verifies the + processor's PC stays frozen until the ACK arrives, then confirms it + unblocks once the ACK is returned. + * - ``liu_tb.cpp`` + - The ``link_interface`` in isolation (``Vlink_interface``): an ``OUT`` that + injects the right DATA flit and stalls until an ACK; an ``IN`` that blocks + until its slot buffer fills then delivers the word and emits an ACK; and + per-slot independence (a delivery to one slot must not satisfy an ``IN`` + waiting on another). + * - ``router_tb.cpp`` + - The ``router`` crossbar (``Vrouter``): a flit is forwarded to the output + named by ``dst_core`` and is held under output back-pressure (not lost), + and round-robin arbitration is fair when two inputs target one output (no + starvation). + * - ``flit_layout.hpp`` + - Shared helper header — not a test. It defines the DATA/ACK flit + bit-packing (``make_dnet_flit`` and the ``flit_*`` accessors) in one place + so ``core_tb`` and ``liu_tb`` agree on the layout described in + :doc:`network`. + +Together these check the messaging engine bottom-up: the router forwards and +arbitrates correctly, the link interface implements the OUT/IN/ACK protocol with +independent per-slot buffers, and a full core ties the processor's stall path to +that protocol — while ``hextb`` validates the assembled network end-to-end +against the ``hexsim`` golden model. From 0bdff6cf8787dcd60ba87ee7dad590eb772b2a4b Mon Sep 17 00:00:00 2001 From: James Hanlon Date: Sun, 14 Jun 2026 19:13:35 +0100 Subject: [PATCH 09/10] docs: write the toolchain and reference pages --- docs/reference/further-reading.rst | 47 ++++- docs/reference/instruction-quick-ref.rst | 108 +++++++++- docs/reference/primary-sources.rst | 36 +++- docs/reference/syscall-reference.rst | 49 ++++- docs/tools/building.rst | 100 +++++++++- docs/tools/formats.rst | 117 ++++++++++- docs/tools/index.rst | 244 ++++++++++++++++++++++- docs/tools/testing.rst | 82 +++++++- 8 files changed, 775 insertions(+), 8 deletions(-) diff --git a/docs/reference/further-reading.rst b/docs/reference/further-reading.rst index 33d3744..b411283 100644 --- a/docs/reference/further-reading.rst +++ b/docs/reference/further-reading.rst @@ -1,4 +1,49 @@ Further reading =============== -.. todo:: Write this page (Stage 6). +The Hex project sits in a long line of work on small processors and small, +portable languages. This page is a short, annotated guide to that lineage and +to the narrative companion to these reference docs. + +Historical lineage +------------------ + +**The Transputer and "Simple 42".** + The Hex architecture descends from the Transputer family — a processor + designed around point-to-point communication and concurrency — and from the + "Simple 42" teaching processor. Hex keeps the Transputer's idea of a core + that communicates over channels (the ``IN``/``OUT`` operations and the + network of link slots) while paring the design back to something small + enough to explain end to end. + +**BCPL and the small-language tradition.** + X stands in the tradition of BCPL, the small systems language created by + Martin Richards. BCPL pioneered the approach of a compact, typeless, + close-to-the-machine language that is easy to retarget — the same approach X + takes in compiling directly to Hex instructions. + +**Occam and channel-based concurrency.** + X's concurrency — ``par`` blocks and the ``!``/``?`` channel operators — + follows Occam, the Transputer's language, also designed by David May. As in + Occam, processes share nothing and communicate only by synchronous message + passing over channels. + +**Bootstrapping and portability.** + The compiler is written in its own language and bootstraps itself: a small + compiler, portable by retargeting its code generator, that can rebuild + itself from source. This is the classic route to a self-hosting toolchain, + and is described in :doc:`../compiler/bootstrapping`. + +The narrative companion +----------------------- + +The author's blog post **"From logic gates to a programming language using the +Hex architecture"** walks the whole stack as a single narrative — from logic +gates up through the processor, the X language and the self-hosting compiler. +It is the readable, story-shaped counterpart to this reference, and the best +starting point for understanding *why* the pieces fit together the way they do. + +.. seealso:: + + :doc:`../index` for the layered overview of the whole stack, and + :doc:`primary-sources` for David May's original notes on Hex and X. diff --git a/docs/reference/instruction-quick-ref.rst b/docs/reference/instruction-quick-ref.rst index 1251c0b..e4f8814 100644 --- a/docs/reference/instruction-quick-ref.rst +++ b/docs/reference/instruction-quick-ref.rst @@ -1,4 +1,110 @@ Instruction quick reference =========================== -.. todo:: Write this page (Stage 6). +A consolidated, at-a-glance table of every Hex instruction and ``OPR`` +sub-operation, with its encoded value (in hex) and a one-line effect. This is +the companion to the prose treatment in :doc:`../architecture/instruction-set`; +the opcode values are those defined in ``src/hex.hpp``. + +Each instruction is one byte: a 4-bit opcode in the high nibble and a 4-bit +operand in the low nibble. Operands larger than four bits — and negative ones — +are built up with chains of ``PFIX`` and ``NFIX`` before the instruction they +modify. + +Primary opcodes +--------------- + +.. list-table:: Opcodes + :header-rows: 1 + :widths: 14 12 74 + + * - Mnemonic + - Opcode + - Effect + * - ``LDAM`` + - ``0x0`` + - Load A from memory: ``areg = mem[oreg]``. + * - ``LDBM`` + - ``0x1`` + - Load B from memory: ``breg = mem[oreg]``. + * - ``STAM`` + - ``0x2`` + - Store A to memory: ``mem[oreg] = areg``. + * - ``LDAC`` + - ``0x3`` + - Load A with a constant: ``areg = oreg``. + * - ``LDBC`` + - ``0x4`` + - Load B with a constant: ``breg = oreg``. + * - ``LDAP`` + - ``0x5`` + - Load A with a PC-relative address: ``areg = pc + oreg``. + * - ``LDAI`` + - ``0x6`` + - Load A from indexed memory: ``areg = mem[breg + oreg]``. + * - ``LDBI`` + - ``0x7`` + - Load B from indexed memory: ``breg = mem[breg + oreg]``. + * - ``STAI`` + - ``0x8`` + - Store A to indexed memory: ``mem[breg + oreg] = areg``. + * - ``BR`` + - ``0x9`` + - Branch PC-relative: ``pc = pc + oreg``. + * - ``BRZ`` + - ``0xA`` + - Branch if A is zero: ``pc = pc + oreg`` when ``areg == 0``. + * - ``BRN`` + - ``0xB`` + - Branch if A is negative: ``pc = pc + oreg`` when ``areg < 0``. + * - ``OPR`` + - ``0xD`` + - Operate: select a register-to-register sub-operation by the operand (see + below). + * - ``PFIX`` + - ``0xE`` + - Prefix: ``oreg = oreg << 4``, carrying the operand into the next + instruction. + * - ``NFIX`` + - ``0xF`` + - Negative prefix: ``oreg = (~oreg) << 4``, building negative or large + operands. + +OPR sub-operations +------------------ + +When the opcode is ``OPR`` (``0xD``) the operand selects one of the following +register-to-register operations: + +.. list-table:: OPR sub-operations + :header-rows: 1 + :widths: 14 12 74 + + * - Sub-op + - Operand + - Effect + * - ``BRB`` + - ``0x0`` + - Branch to the address in B: ``pc = breg``. + * - ``ADD`` + - ``0x1`` + - ``areg = areg + breg``. + * - ``SUB`` + - ``0x2`` + - ``areg = areg - breg``. + * - ``SVC`` + - ``0x3`` + - Supervisor call: invoke the system call selected by A (see + :doc:`syscall-reference`). + * - ``IN`` + - ``0x4`` + - Receive a word into A from the channel link slot selected by B + (blocking). + * - ``OUT`` + - ``0x5`` + - Send the word in A to the channel link slot selected by B (blocking). + +.. seealso:: + + :doc:`../architecture/instruction-set` for the full description of each + instruction, and :doc:`syscall-reference` for the ``SVC`` system calls. diff --git a/docs/reference/primary-sources.rst b/docs/reference/primary-sources.rst index 4b8d9c5..1b36403 100644 --- a/docs/reference/primary-sources.rst +++ b/docs/reference/primary-sources.rst @@ -1,4 +1,38 @@ Primary sources =============== -.. todo:: Write this page (Stage 6). +The Hex architecture and the X language originate in a set of notes by David +May, included here in their original form. These documents are the authoritative +description of the *original* design and a valuable narrative companion to the +present reference. They predate the additions made in this project — channels, +``par`` and the network container, and the current C++ toolchain — so where the +two disagree, these reference pages describe the project as it stands today and +the PDFs describe the design they grew from. + +The original notes +------------------ + +:download:`The Hex Architecture (hexb.pdf) <../PDFs/hexb.pdf>` + David May, 2014. Describes the processor itself: the four registers, the + 8-bit instruction format with its 4-bit opcode and 4-bit operand, the + ``PFIX``/``NFIX`` mechanism for building larger operands, the full + instruction set, and a small C reference simulator. + +:download:`X and Hex (xhexnotes.pdf) <../PDFs/xhexnotes.pdf>` + David May, 2014. The X language reference together with the Hex ISA and an + overview of the compiler internals — the lexer, syntax analyser, translator + and code buffer pipeline — along with the memory layout and calling + convention used by the generated code. + +:download:`The X compiler in X (xhexb.pdf) <../PDFs/xhexb.pdf>` + The source listing of the X compiler written in X itself: the self-hosting + bootstrap from which the compiler can rebuild itself. + +:download:`The X compiler in Hex assembly (xhexba.pdf) <../PDFs/xhexba.pdf>` + The same compiler compiled down to Hex assembly: the bootstrap output + listing, i.e. the native form that runs the self-hosting build. + +.. seealso:: + + :doc:`../compiler/bootstrapping` for how the self-hosting compiler is built, + and :doc:`further-reading` for the wider historical context. diff --git a/docs/reference/syscall-reference.rst b/docs/reference/syscall-reference.rst index 37a6822..cc69f4b 100644 --- a/docs/reference/syscall-reference.rst +++ b/docs/reference/syscall-reference.rst @@ -1,4 +1,51 @@ System-call reference ===================== -.. todo:: Write this page (Stage 6). +System calls are made with the ``SVC`` ``OPR`` sub-operation: the A register +holds the call number and the arguments are read from the stack. This page is +a quick reference for the three calls; the calling model is described in prose +in :doc:`../architecture/syscalls`, and the numbers and conventions here are +those implemented in ``src/hexsim.hpp``. + +Calling convention +------------------ + +The stack pointer is held in memory word 1, so ``sp = mem[1]``. Arguments are +passed in the words just above the stack pointer. Writing ``sp`` for that word +index, the arguments to a call occupy ``mem[sp+1]``, ``mem[sp+2]`` and +``mem[sp+3]`` as needed. The call number itself is loaded into A before the +``SVC`` executes. + +.. list-table:: System calls + :header-rows: 1 + :widths: 12 8 80 + + * - Call + - A + - Arguments and effect + * - ``EXIT`` + - ``0`` + - Halt the processor. The exit code is taken from ``mem[sp+2]``. In a + network, the first processor to call ``EXIT`` sets the system exit code. + * - ``WRITE`` + - ``1`` + - Write one byte to an output stream. The byte is ``mem[sp+2]`` and the + target stream is ``mem[sp+3]``. + * - ``READ`` + - ``2`` + - Read one byte from an input stream. The source stream is ``mem[sp+2]``, + and the byte read is stored back into ``mem[sp+1]``. + +Stream semantics +---------------- + +The stream argument selects which input or output channel the byte is routed +to. The simulator's I/O layer maps these onto the host's standard input and +output, so ``WRITE`` to the default stream appears on the simulator's +``stdout`` and ``READ`` blocks on the simulator's ``stdin``. Values read in +are masked to a byte before being stored. + +.. seealso:: + + :doc:`../architecture/syscalls` for the full description of the system-call + model, and :doc:`instruction-quick-ref` for the ``SVC`` encoding. diff --git a/docs/tools/building.rst b/docs/tools/building.rst index d1c7e09..c14aa81 100644 --- a/docs/tools/building.rst +++ b/docs/tools/building.rst @@ -1,4 +1,102 @@ Building ======== -.. todo:: Write this page (Stage 6). +The project is built with CMake. A configure step fetches the few external +dependencies, after which a normal build produces the six tools described in +:doc:`index` and installs them under a chosen prefix. + +Dependencies +------------ + +* CMake 3.20 or newer. +* A C++20-capable compiler. +* Python 3 (for the integration tests and the documentation build). +* Verilator 5.0 or newer (optional; only needed for the RTL testbench + ``hextb``). If no suitable Verilator is found, a pinned version is fetched + and built automatically. + +All other libraries — ``fmt`` and Catch2 — are fetched automatically via +CMake's ``FetchContent``, so no manual installation is required. + +On Ubuntu the system packages needed to bootstrap are: + +.. code-block:: bash + + $ apt update && apt install build-essential cmake + +On macOS with Homebrew: + +.. code-block:: bash + + $ brew install cmake + +Configuring and building +------------------------- + +Configure into a ``build`` directory, build, and install: + +.. code-block:: bash + + $ mkdir build + $ cd build + $ cmake .. -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_INSTALL_PREFIX=$(pwd)/install \ + -DUSE_VERILATOR=OFF + $ make -j8 + $ make install + +This installs the tools into ``build/install/bin``; add that directory to your +``PATH`` to run them by name: + +.. code-block:: bash + + $ export PATH=$(pwd)/install/bin:$PATH + +Verilator and the RTL +--------------------- + +``-DUSE_VERILATOR`` defaults to ``ON``. Drop the ``-DUSE_VERILATOR=OFF`` from +the configure line above to build the RTL testbench ``hextb`` as well. When +Verilator is enabled, CMake uses a suitable system Verilator (5.0 or newer) if +it can find one — honouring ``-DVERILATOR_ROOT=`` to point at a specific +installation — and otherwise fetches and builds a pinned version automatically: + +.. code-block:: bash + + $ cmake .. -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_INSTALL_PREFIX=$(pwd)/install \ + -DVERILATOR_ROOT=/opt/verilator + +A recent Verilator is required for ``hextb``: the project targets the 5.x +series, and roughly 4.200 is the practical floor — older releases (such as the +4.038 shipped by some distributions) are too old to build the testbench. + +Building the documentation +-------------------------- + +These Sphinx documents can be built two ways. + +The first is through CMake. Configure with ``-DBUILD_DOCS=ON`` and build the +``Sphinx`` target: + +.. code-block:: bash + + $ cmake .. -DBUILD_DOCS=ON + $ make Sphinx + +The CMake docs target prefers the project's virtual-environment Sphinx (see +below) and builds with ``-W``, so documentation warnings are treated as build +errors. + +The second is to drive Sphinx directly from a Python virtual environment, which +is convenient for iterating on the docs without reconfiguring CMake: + +.. code-block:: bash + + $ python3 -m venv docs/_venv + $ docs/_venv/bin/pip install -r docs/requirements.txt + $ docs/_venv/bin/sphinx-build -b html -W --keep-going docs docs/_build/html + +The ``-W`` flag turns warnings into errors and ``--keep-going`` reports all of +them rather than stopping at the first. The HTML output is written to +``docs/_build/html``. diff --git a/docs/tools/formats.rst b/docs/tools/formats.rst index 1632f85..09a7035 100644 --- a/docs/tools/formats.rst +++ b/docs/tools/formats.rst @@ -1,4 +1,119 @@ Binary and container formats ============================ -.. todo:: Write this page (Stage 6). +The toolchain uses two on-disk formats: a single-image ``.bin`` file holding +one processor's program, and a *network container* holding one image per core +plus the channel wiring between them. Both are little-endian and word-oriented +(a word is 32 bits). The single-image format is defined by +``src/heximage.hpp`` and the container by ``src/hexcontainer.hpp``; the two +share the same serialisation primitives so the writers (``hexasm``, ``xcmp``) +and the readers (``hexsim``, ``hexdis``, ``hextb``) cannot drift. + +The single-image format +----------------------- + +An image is a program-size word, the program itself, and an optional debug-info +block. There is no separate header beyond the size word: the first instruction +of the program *is* the entry point, and the compiler arranges for it to be a +branch (``BR``) over the data area to ``main``. + +.. list-table:: Image layout + :header-rows: 1 + :widths: 22 14 64 + + * - Field + - Size + - Description + * - ``programSizeWords`` + - 1 word + - The number of 32-bit words of program that follow (code, constants and + string data). This is the value ``hexdis`` and the simulators use to + know where the program ends and the optional debug block begins. + * - program + - ``programSizeWords`` words + - The instruction bytes, packed four to a word, followed by any constant + and string data the program references. Execution begins at the first + byte; the leading instruction branches over the data to ``main``. + * - debug info + - variable, optional + - A string table and symbol table (see below). Present in images emitted + by ``xcmp``; absent from hand-written assembly that defines no symbols. + +The debug-info block, when present, is laid out as: + +.. list-table:: Debug-info block + :header-rows: 1 + :widths: 22 16 62 + + * - Field + - Size + - Description + * - ``numStrings`` + - 1 word + - The number of names in the string table. + * - strings + - variable + - ``numStrings`` null-terminated strings, one per symbol (no string + pooling). + * - ``numSymbols`` + - 1 word + - The number of symbols in the symbol table. + * - symbols + - ``numSymbols`` × 2 words + - Each symbol is a ``(stringIndex, byteOffset)`` pair: an index into the + string table giving the symbol's name, and the byte offset in the + program that it labels. + +``hexdis`` reads this block to print function labels and ``symbol+offset`` +addresses; ``hexsim`` reads it to annotate its instruction trace. The buffer +that accumulates the program and its symbols during compilation is described in +:doc:`../compiler/codebuffer`. + +The network-container format +---------------------------- + +A network container packages several images together with the point-to-point +channel wiring that connects their link slots. It begins with the 32-bit magic +``0x4E584548`` (the ASCII bytes ``"HEXN"``); any file lacking this magic is +treated as a single plain image, which is how ``hexsim`` and ``hextb`` accept +both formats through one code path. + +.. list-table:: Container layout + :header-rows: 1 + :widths: 22 16 62 + + * - Field + - Size + - Description + * - ``magic`` + - 1 word + - ``0x4E584548`` (``"HEXN"``). Identifies the file as a network + container. + * - ``numProcessors`` + - 1 word + - The number of processor images that follow. + * - ``numEdges`` + - 1 word + - The number of channel edges in the wiring table. + * - edges + - ``numEdges`` × 4 words + - Each edge is a ``(procA, slotA, procB, slotB)`` tuple: a bidirectional + channel connecting link slot ``slotA`` of processor ``procA`` to link + slot ``slotB`` of processor ``procB``. + * - images + - variable + - ``numProcessors`` size-prefixed images. Each is a ``uint32`` + ``imageSizeBytes`` followed by exactly that many bytes of a standard + single-image binary (size word + program + optional debug info). + +When loading a container, ``hexsim`` instantiates one processor per image and +builds the channels from the edge list, while ``hextb`` loads each image into a +core's memory and programs the RTL routers from the same edges. The compiler +side of this — how a top-level ``par`` becomes a container, and how slots are +assigned — is described in :doc:`../compiler/networks`. + +.. seealso:: + + :doc:`../compiler/codebuffer` for how images are assembled in memory, and + :doc:`../compiler/networks` for how the container's edges and images are + generated. diff --git a/docs/tools/index.rst b/docs/tools/index.rst index 2d3e98a..92cd268 100644 --- a/docs/tools/index.rst +++ b/docs/tools/index.rst @@ -1,4 +1,246 @@ The toolchain ============= -.. todo:: Write this page (Stage 6). +The project ships six command-line tools that together cover the whole flow +from source to silicon: assembling and disassembling Hex binaries, compiling +the X language, and executing the result either on the C++ simulator or on the +Verilog RTL. All six are built by CMake (see :doc:`building`) and installed +into ``install/bin``. + +.. list-table:: The six tools + :header-rows: 1 + :widths: 12 88 + + * - Tool + - Purpose + * - ``hexasm`` + - Assembler: translates ``.S`` Hex assembly into a ``.bin`` image. + * - ``hexdis`` + - Disassembler: turns a ``.bin`` image back into a readable listing. + * - ``xcmp`` + - X compiler: compiles a ``.x`` program to a ``.bin`` image (or a network + container when ``main`` is a top-level ``par``). + * - ``hexsim`` + - Simulator: executes a single image or a multi-core network container in + software. + * - ``xrun`` + - Runner: compiles an X program and immediately executes it on the + simulator. + * - ``hextb`` + - Verilator testbench: executes an image or container on the RTL + multi-core network (requires Verilator). + +The image and container file formats produced and consumed by these tools are +documented in :doc:`formats`. + +hexasm — the assembler +---------------------- + +``hexasm`` assembles a single ``.S`` source file into a ``.bin`` image. + +.. code-block:: text + + Usage: hexasm file [options] + + file A source file to assemble + -h, --help Display this message + --tokens Tokenise the input only + --instrs Display the instruction sequence only + -o, --output file Specify a file for binary output (default a.out) + +The default output filename is ``a.out``; pass ``-o`` to choose another. The +``--tokens`` and ``--instrs`` flags stop after lexing or after instruction +selection respectively, which is useful for inspecting the assembler's +intermediate stages. + +Assemble and run the ``hello`` assembly program: + +.. code-block:: bash + + $ hexasm tests/asm/hello.S -o hello.bin + $ hexsim hello.bin + hello + +hexdis — the disassembler +------------------------- + +``hexdis`` reads a ``.bin`` image and prints an instruction listing. + +.. code-block:: text + + Usage: hexdis file [options] + + file A binary file to disassemble + -h, --help Display this message + --no-labels Don't display debug labels + +If the image carries a debug-info block (images produced by ``xcmp`` do), the +disassembler prints function labels and resolves addresses to ``symbol+offset`` +form. Each line shows the byte address, the raw instruction byte, the decoded +mnemonic and its operand: + +.. code-block:: bash + + $ hexdis hello.bin | head -10 + 0x0000 97 BR 7 + 0x0001 00 LDAM 0 + ... + 0x0008 51 LDAP 1 + 0x0009 94 BR 4 + 0x000a 11 LDBM 1 + 0x000b 30 LDAC 0 + 0x000c 82 STAI 2 + 0x000d d3 SVC + + main: + +Pass ``--no-labels`` to suppress the debug labels and print a flat listing: + +.. code-block:: bash + + $ hexdis hello.bin --no-labels | head -3 + +xcmp — the X compiler +--------------------- + +``xcmp`` compiles an X program to a ``.bin`` image. When the program's ``main`` +ends in a top-level ``par`` block, the output is a *network container* holding +one image per core (see :doc:`formats` and :doc:`../compiler/networks`). + +.. code-block:: text + + Usage: xcmp file [options] + + file A source file to compile + -h, --help Display this message + --tokens Tokenise the input only + --tree Display the syntax tree only + --tree-opt Display the optimised syntax tree only + --insts Display the intermediate instructions only + --insts-lowered Display the lowered instructions only + --insts-optimised Display the lowered optimised instructions only + --insts-asm Display the assembled instructions only + --memory-info Report memory information + -S Emit the assembly program + -o, --output file Specify a file for output (default a.out) + +The intermediate-stage flags expose every phase of the compiler in turn — +tokens, syntax tree, optimised tree, intermediate and lowered instructions, and +the final assembly — mirroring the pipeline described in +:doc:`../compiler/overview`. ``-S`` emits a ``.S`` assembly file that +``hexasm`` could then assemble. + +Compile and run the ``hello`` X program: + +.. code-block:: bash + + $ xcmp examples/hello_putval.x -o hello.bin + $ hexsim hello.bin + hello world + +hexsim — the simulator +---------------------- + +``hexsim`` executes a single image or a network container. It detects the +container magic automatically and boots either one processor or the whole +network. + +.. code-block:: text + + Usage: hexsim file [options] + + file A binary file to simulate + -h, --help Display this message + -d, --dump Dump the binary file contents + -t, --trace Enable instruction tracing + --max-cycles N Limit the number of simulation cycles (default: 0) + +A ``--max-cycles`` value of ``0`` (the default) means run without a cycle +limit. For a network container, ``hexsim`` reports the exit code of the *first +processor to halt* and detects deadlock when every core is simultaneously +blocked on a channel. + +Run with instruction tracing (``-t``) to see each cycle's execution: + +.. code-block:: bash + + $ hexsim hello.bin -t | head -5 + 0 0 BR 7 pc = pc + oreg (7) (0x000008) + 1 8 LDAP 1 areg = pc (9) + oreg (1) 10 + 2 9 BR 4 pc = pc + oreg (4) (0x00000e) + 3 14 main+0 LDBM 1 breg = mem[oreg (0x000001)] (65536) + 4 15 main+1 STAI 0 mem[breg (65536) + oreg (0) = 0x010000] = areg (10) + +The trace columns are, left to right: + +.. list-table:: Trace columns + :header-rows: 1 + :widths: 18 82 + + * - Column + - Meaning + * - Cycle + - The simulation cycle number (one instruction per cycle). + * - PC address + - The byte address of the instruction being executed. + * - Symbol+offset + - The nearest debug symbol and the byte offset past it (blank when no + debug info is present, as in the bootstrap entry sequence). + * - Instruction + - The decoded mnemonic and operand. + * - Operation + - The concrete effect on registers or memory, with resolved values. + +xrun — compile and run +---------------------- + +``xrun`` is a convenience front-end that compiles an X program and immediately +executes it on the simulator, without writing a persistent binary. + +.. code-block:: text + + Usage: xrun file [options] + + file A source file to run + -h, --help Display this message + -t, --trace Enable instruction tracing + --max-cycles N Limit the number of simulation cycles (default: 0) + +.. code-block:: bash + + $ xrun examples/hello_putval.x + hello world + +Internally ``xrun`` compiles to a temporary ``a.bin`` and then runs it through +the same simulator engine as ``hexsim``, so the ``-t`` and ``--max-cycles`` +options behave identically. + +hextb — the RTL testbench +------------------------- + +``hextb`` runs an image or a network container on the Verilated multi-core RTL +network rather than the C++ model. It requires the project to have been built +with Verilator (see :doc:`building`); the RTL design is described in +:doc:`../hardware/network`. + +.. code-block:: text + + Usage: hextb file [options] + + file A binary or network container to execute + -h, --help Display this message + -t, --trace Enable per-core PC tracing + --max-cycles N Limit simulation cycles (default: 0) + +``hextb`` loads each image into a core's memory, programs the routers from the +container's edge list, fills any unused core with a quiescent halt loop, and +then clocks the network until every active core has exited. Like ``hexsim``, +it returns the exit code of the first core to call the exit syscall and raises a +deadlock error if no core makes progress for a sustained number of cycles. Its +``-t`` flag prints per-core program counters each cycle rather than a decoded +instruction trace. + +.. seealso:: + + :doc:`formats` for the binary and container layouts these tools read and + write, and :doc:`building` for how to compile the tools themselves. diff --git a/docs/tools/testing.rst b/docs/tools/testing.rst index ccce014..4e414f1 100644 --- a/docs/tools/testing.rst +++ b/docs/tools/testing.rst @@ -1,4 +1,84 @@ Testing ======= -.. todo:: Write this page (Stage 6). +The project has three layers of tests: C++ unit tests written with Catch2, a +Python integration-test driver that exercises the tools end to end, and +Verilator testbenches for the RTL. All of them are wired into CTest, so the +quickest way to run everything is from the build directory: + +.. code-block:: bash + + $ ctest --output-on-failure + +``--output-on-failure`` prints the captured output of any test that fails, +which is usually enough to see what went wrong. + +C++ unit tests +-------------- + +The unit tests live in ``tests/unit/`` and are compiled into a single +``UnitTests`` executable. Run them directly for fast feedback: + +.. code-block:: bash + + $ ./UnitTests # run all unit tests + $ ./UnitTests "" # run a single named test + +The test sources cover the assembler, the disassembler, the X language and its +programs, and the multi-core simulator: + +.. list-table:: Unit-test sources + :header-rows: 1 + :widths: 34 66 + + * - File + - Covers + * - ``AssemblerTests.cpp`` + - The assembler: lexing, instruction selection and encoding. + * - ``AssemblerProgramTests.cpp`` + - Whole assembly programs assembled and run end to end. + * - ``DisassemblerTests.cpp`` + - The disassembler: decoding images back to listings. + * - ``XLanguageTests.cpp`` + - X language front-end features (parsing and compilation). + * - ``XProgramTests.cpp`` + - Whole X programs compiled and executed for their results. + * - ``SimTests.cpp`` + - The simulator, including channels and multi-core message-passing + networks. + +The directory also contains a collection of ``.x`` programs (for example +``ackermann.x``, ``mergesort.x``, ``sieve.x`` and the self-hosting +``xhexb.x``) that the program-level tests compile and run. ``TestContext.hpp`` +provides the shared fixture used across the suites. + +Python integration tests +------------------------- + +The end-to-end integration tests are driven by ``tests/tests.py``, which +invokes the built tools to compile and execute programs and checks their +output: + +.. code-block:: bash + + $ python3 ../tests/tests.py + +These complement the unit tests by verifying the complete compile-and-run flow +through the installed binaries rather than the library internals. The +assembly programs they reference live in ``tests/asm/`` (``hello.S``, +``hello_procedure.S``, ``exit0.S``, ``exit255.S`` and ``xhexb.S``). + +RTL testbenches +--------------- + +The hardware tests are Verilator testbenches in ``tests/rtl/``: +``core_tb.cpp`` exercises a single processor core, ``router_tb.cpp`` and +``liu_tb.cpp`` exercise the router and link interface, and +``flit_layout.hpp`` holds the shared flit definitions they use. These require +a Verilator build (see :doc:`building`) and are described in more detail in +:doc:`../hardware/testbench`. + +.. seealso:: + + :doc:`../hardware/testbench` for the structure of the RTL testbenches and + how they drive the design. From afaab252da6f822cfd1ce94942706cd982ebef1b Mon Sep 17 00:00:00 2001 From: James Hanlon Date: Sun, 14 Jun 2026 21:17:42 +0100 Subject: [PATCH 10/10] docs: group pages into top-level chapters in the toctree --- docs/architecture/overview.rst | 11 +++++++++++ docs/compiler/overview.rst | 12 ++++++++++++ docs/hardware/overview.rst | 8 ++++++++ docs/index.rst | 35 +--------------------------------- docs/language/overview.rst | 12 ++++++++++++ docs/reference/index.rst | 24 +++++++++++++++++++++++ docs/tools/index.rst | 7 +++++++ 7 files changed, 75 insertions(+), 34 deletions(-) create mode 100644 docs/reference/index.rst diff --git a/docs/architecture/overview.rst b/docs/architecture/overview.rst index 8d1a833..d4d17d9 100644 --- a/docs/architecture/overview.rst +++ b/docs/architecture/overview.rst @@ -67,3 +67,14 @@ See also * :doc:`registers` — the machine state and the role of each register. * :doc:`instruction-set` — the full instruction set, grouped by function. * :doc:`execution` — the fetch–increment–execute cycle and the datapath. + +.. toctree:: + :hidden: + + registers + instruction-encoding + instruction-set + execution + channels + syscalls + simulator-model diff --git a/docs/compiler/overview.rst b/docs/compiler/overview.rst index 4e41a79..b9889af 100644 --- a/docs/compiler/overview.rst +++ b/docs/compiler/overview.rst @@ -66,3 +66,15 @@ command-line options of ``tools/xcmp.cpp`` (``--tokens``, ``--tree``, When ``main`` is a top-level ``par``, the binary stage instead emits a *network container* with one image per processor; see :doc:`networks`. + +.. toctree:: + :hidden: + + lexical-analyser + syntax-analyser + translator + codebuffer + memory-and-calling + codegen-idioms + bootstrapping + networks diff --git a/docs/hardware/overview.rst b/docs/hardware/overview.rst index ed74cd0..92a4132 100644 --- a/docs/hardware/overview.rst +++ b/docs/hardware/overview.rst @@ -105,3 +105,11 @@ The remaining hardware pages follow the datapath outwards: * :doc:`memory-and-links` — the memory model and the per-core link interface. * :doc:`network` — the router and the multi-core network. * :doc:`testbench` — the Verilator testbench and the RTL unit tests. + +.. toctree:: + :hidden: + + core + memory-and-links + network + testbench diff --git a/docs/index.rst b/docs/index.rst index 6e97227..18117b4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -51,44 +51,11 @@ guide for building and running programs. :caption: Contents: architecture/overview - architecture/registers - architecture/instruction-encoding - architecture/instruction-set - architecture/execution - architecture/channels - architecture/syscalls - architecture/simulator-model language/overview - language/lexical - language/program-structure - language/statements - language/procedures-functions - language/expressions - language/concurrency - language/examples - language/grammar compiler/overview - compiler/lexical-analyser - compiler/syntax-analyser - compiler/translator - compiler/codebuffer - compiler/memory-and-calling - compiler/codegen-idioms - compiler/bootstrapping - compiler/networks hardware/overview - hardware/core - hardware/memory-and-links - hardware/network - hardware/testbench tools/index - tools/formats - tools/building - tools/testing - reference/instruction-quick-ref - reference/syscall-reference - reference/primary-sources - reference/further-reading + reference/index Indices and tables diff --git a/docs/language/overview.rst b/docs/language/overview.rst index aabe7a7..ff4a6a6 100644 --- a/docs/language/overview.rst +++ b/docs/language/overview.rst @@ -69,3 +69,15 @@ The remaining pages document each part of the language in detail: * :doc:`concurrency` — ``par``, ``chan`` and message passing. * :doc:`examples` — a tour of the programs in ``examples/``. * :doc:`grammar` — the consolidated grammar. + +.. toctree:: + :hidden: + + lexical + program-structure + statements + procedures-functions + expressions + concurrency + examples + grammar diff --git a/docs/reference/index.rst b/docs/reference/index.rst new file mode 100644 index 0000000..d1e2922 --- /dev/null +++ b/docs/reference/index.rst @@ -0,0 +1,24 @@ +Reference +========= + +Quick-reference tables and source material for the Hex processor and the X +language. These pages condense the detail from the rest of the documentation +into at-a-glance form, and point back to the original specifications the project +is built on. + +* :doc:`instruction-quick-ref` — every instruction and ``OPR`` sub-op with its + opcode and effect, the companion to :doc:`../architecture/instruction-set`. +* :doc:`syscall-reference` — the supervisor-call numbers and their stack + conventions, the companion to :doc:`../architecture/syscalls`. +* :doc:`primary-sources` — the original 2014 design notes (the four PDFs under + ``docs/PDFs/``) that this documentation rewrites and modernises. +* :doc:`further-reading` — the historical lineage and the narrative blog post + that accompanies these docs. + +.. toctree:: + :hidden: + + instruction-quick-ref + syscall-reference + primary-sources + further-reading diff --git a/docs/tools/index.rst b/docs/tools/index.rst index 92cd268..a3330b9 100644 --- a/docs/tools/index.rst +++ b/docs/tools/index.rst @@ -244,3 +244,10 @@ instruction trace. :doc:`formats` for the binary and container layouts these tools read and write, and :doc:`building` for how to compile the tools themselves. + +.. toctree:: + :hidden: + + formats + building + testing