diff --git a/Bender.yml b/Bender.yml index 8a0199c..077c2ec 100644 --- a/Bender.yml +++ b/Bender.yml @@ -1,38 +1,63 @@ package: name: ace + # Authors in alphabetical order (surname) authors: - # Alphabetically ordered by last name (maintainers first) + - "Aleksi Korsman " + - "Riccardo Tedeschi " dependencies: - axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.39.0-beta.2 } + axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.39.9 } + common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.39.0 } + axi_riscv_atomics: { git: "https://github.com/pulp-platform/axi_riscv_atomics.git", rev: 6d3c8b4 } # branch: master + apb: { git: "https://github.com/pulp-platform/apb.git", version: 0.2.4 } export_include_dirs: - include sources: - # Source files grouped in levels. Files in level 0 have no dependencies on files in this - # package. Files in level 1 only depend on files in level 0, files in level 2 on files in - # levels 1 and 0, etc. Files within a level are ordered alphabetically. - # Level 0 + # Generic ACE package and interfaces - src/ace_pkg.sv - - src/snoop_pkg.sv - # Level 1 - src/ace_intf.sv - src/snoop_intf.sv - # Level 2 - - src/ace_trs_dec.sv - - src/ccu_fsm.sv - # Level 3 - - src/ace_ccu_top.sv + # CCU package + - src/ccu/ccu_pkg.sv + # CCU generated regs + - src/ccu/regs/generated/ccu_csr_pkg.sv + - src/ccu/regs/generated/ccu_csr.sv + # CCU source files + - src/ccu/ccu_csr_wrap.sv + - src/ccu/ccu_exclusive_monitor.sv + - src/ccu/ccu_frontend_arbiter.sv + - src/ccu/ccu_frontend.sv + - src/ccu/ccu_read_engine.sv + - src/ccu/ccu_replay.sv + - src/ccu/ccu_scoreboard.sv + - src/ccu/ccu_snoop_pipeline.sv + - src/ccu/ccu_top.sv + - src/ccu/ccu_write_engine.sv - - target: simulation - files: - - src/ace_test.sv - - src/snoop_test.sv + #- target: simulation + # files: + # - src/ace_test.sv + # - src/snoop_test.sv - - target: test - files: - # Level 0 - - test/tb_ace_ccu_pkg.sv - # Level 1 - - test/tb_ace_ccu_top.sv + #- target: test + # files: + # # Level 0 + # - test/tb_ace_ccu_pkg.sv + # # Level 1 + # - test/tb_ace_ccu_top.sv + + #- target: vscode + # files: + # - src/ccu/ccu_ctrl_wr_snoop.sv + + # - target: test + # files: + # # Level 0 + # - test/vip/ace_test_pkg.sv + # - test/vip/snoop_test_pkg.sv + # # Level 1 + # - test/vip/cache_test_pkg.sv + # # Level 2 + # - test/tb_ace_ccu_top.sv diff --git a/Makefile b/Makefile index 8d0f048..3be7a62 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,44 @@ TBS ?= ace_ccu_top \ SIM_TARGETS := $(addsuffix .log,$(addprefix sim-,$(TBS))) +####### Simulation parameters ####### +# Address width +ADDR_WIDTH ?= 32 +# AXI/ACE data width +DATA_WIDTH ?= 64 +# Cache line word width +WORD_WIDTH ?= 64 +# Number of words in a cache line +CACHELINE_WORDS ?= 4 +# Number of ways in the cache model +WAYS ?= 2 +# Number of sets in the cache model +SETS ?= 16 +# Number of cached masters +NMASTERS ?= 4 +# Number of master groups +NGROUPS ?= 2 +# Number of transactions to be generated per master +NTRANSACTIONS ?= 100 +# Location of the generated files +MEM_DIR ?= $(PWD)/build/mem +# Seed for initial state generation. If empty, no seed +SEED ?= 10 +# Run coherency check after simulation +CHECK ?= 0 +# Debug mode for coherency checking +DEBUG ?= 1 + +export ADDR_WIDTH +export DATA_WIDTH +export WORD_WIDTH +export CACHELINE_WORDS +export WAYS +export SETS +export NMASTERS +export NGROUPS +export NTRANSACTIONS +export MEM_DIR .SHELL: bash @@ -51,6 +89,24 @@ sim_all: $(SIM_TARGETS) build: mkdir -p $@ +build/mem: build + mkdir -p $@ + +init_mem: build/mem + python3 test/vip/python/cache_coherency_test.py \ + --addr_width ${ADDR_WIDTH} \ + --data_width ${DATA_WIDTH} \ + --word_width ${WORD_WIDTH} \ + --cacheline_words ${CACHELINE_WORDS} \ + --ways ${WAYS} \ + --sets ${SETS} \ + --n_caches ${NMASTERS} \ + --n_transactions ${NTRANSACTIONS} \ + --target_dir $(MEM_DIR) \ + --seed $(SEED) \ + $(if $(filter 1, $(CHECK)),--check) \ + $(if $(filter 1, $(DEBUG)),--debug) + elab.log: Bender.yml | build export SYNOPSYS_DC="$(SYNOPSYS_DC)"; cd build && ../scripts/synth.sh | tee ../$@ diff --git a/README.md b/README.md index e66e687..03832ce 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,56 @@ This repository provides modules to implement cache coherence SoC's. |------------------------------------------------------|--------------------------------------------------------------------------------------------------------------|--------------------------------| | [`ace_ccu_top`](src/ace_ccu_top.sv) | ACE interconnector, broadcasts snooping messages to the cache controllers and AXI transactions to the slave | [Doc](doc/ace_ccu_top.md) | +## Verification + +Generate the initial cache and memory states, as well as the transaction streams, with the following command: + +``` +make init_mem +``` + +You can control simulation parameters, such as the memory and cache sizes and structures, number of caches, and number of transactions, in `Makefile`. + +You can simulate the top level design with +``` +make -B sim-ace_ccu_top.log +``` + +### Coherency check + +To run coherency check, run +``` +make init_mem CHECK=1 +``` +It will generate the initial cache and memory states, and stall until given a prompt. + +Next, open another terminal and simulate the top level design with +``` +make -B sim-ace_ccu_top.log +``` +Once the simulation finishes, press enter on the coherency check prompt. A coherency check will be run. A log file is generated called `cache_python.log`. Search with keyword `ERROR` to find whether coherency was broken during the simulation. When run with `DEBUG=1` (the default option), a pdb session is opened the moment a coherency problem is found. + ## License -The ACE repository is released under Solderpad v0.51 (SHL-0.51) see [LICENSE](LICENSE) \ No newline at end of file +The ACE repository is released under Solderpad v0.51 (SHL-0.51) see [LICENSE](LICENSE) + +## Publication + +If you use ACE/Culsans in your work, you can cite us: + +``` +@article{tedeschi2024culsans, + title={Culsans: An Efficient Snoop-based Coherency Unit + for the CVA6 Open Source RISC-V application processor}, + volume={10}, + number={2}, + journal={WiPiEC Journal - Works in Progress in Embedded Computing Journal}, + author={Tedeschi, Riccardo and Valente, Luca and Ottavi, Gianmarco and + Zelioli, Enrico and Wistoff, Nils and + Giacometti, Massimiliano and Basit Sajjad, Abdul and + Benini, Luca and Rossi, Davide}, + year={2024}, + month={Aug.} +} + +``` diff --git a/include/ace/assign.svh b/include/ace/assign.svh index d37943c..f2d6cdc 100644 --- a/include/ace/assign.svh +++ b/include/ace/assign.svh @@ -17,6 +17,7 @@ `define ACE_ASSIGN_SVH_ `include "axi/assign.svh" +`include "ace/assign.svh" //////////////////////////////////////////////////////////////////////////////////////////////////// // Internal implementation for assigning one ACE struct or interface to another struct or interface. @@ -71,9 +72,7 @@ __opt_as __lhs.b_ready = __rhs.b_ready; \ `__ACE_TO_AR(__opt_as, __lhs.ar, __lhs_sep, __rhs.ar, __rhs_sep) \ __opt_as __lhs.ar_valid = __rhs.ar_valid; \ - __opt_as __lhs.r_ready = __rhs.r_ready; \ - __opt_as __lhs.wack = __rhs.wack; \ - __opt_as __lhs.rack = __rhs.rack; + __opt_as __lhs.r_ready = __rhs.r_ready; `define __ACE_TO_RESP(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ __opt_as __lhs.aw_ready = __rhs.aw_ready; \ __opt_as __lhs.ar_ready = __rhs.ar_ready; \ @@ -116,9 +115,7 @@ `AXI_ASSIGN_W(slv, mst) \ `AXI_ASSIGN_B(mst, slv) \ `ACE_ASSIGN_AR(slv, mst) \ - `ACE_ASSIGN_R(mst, slv) \ - assign slv.wack = mst.wack; \ - assign slv.rack = mst.rack; + `ACE_ASSIGN_R(mst, slv) //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -146,9 +143,7 @@ assign mon_dv.ar_ready = axi_if.ar_ready; \ `__ACE_TO_R(assign, mon_dv.r, _, axi_if.r, _) \ assign mon_dv.r_valid = axi_if.r_valid; \ - assign mon_dv.r_ready = axi_if.r_ready; \ - assign mon_dv.wack = axi_if.wack; \ - assign mon_dv.rack = axi_if.rack; + assign mon_dv.r_ready = axi_if.r_ready; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -317,7 +312,7 @@ __opt_as __lhs.cd_valid = __rhs.cd_valid; \ `__SNOOP_TO_CD(__opt_as, __lhs.cd, __lhs_sep, __rhs.cd, __rhs_sep) \ __opt_as __lhs.cr_valid = __rhs.cr_valid; \ - __opt_as __lhs.cr_resp = __rhs.cr_resp; + __opt_as __lhs.cr = __rhs.cr; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/include/ace/convert.svh b/include/ace/convert.svh new file mode 100644 index 0000000..0c771cd --- /dev/null +++ b/include/ace/convert.svh @@ -0,0 +1,128 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + + +`ifndef ACE_CONVERT_SVH_ +`define ACE_CONVERT_SVH_ + +`include "axi/assign.svh" + +`define __ACE_TO_AXI_R(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp[1:0]; \ + __opt_as __lhs``__lhs_sep``last = __rhs``__rhs_sep``last; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_ACE_AW(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``addr = __rhs``__rhs_sep``addr; \ + __opt_as __lhs``__lhs_sep``len = __rhs``__rhs_sep``len; \ + __opt_as __lhs``__lhs_sep``size = __rhs``__rhs_sep``size; \ + __opt_as __lhs``__lhs_sep``burst = __rhs``__rhs_sep``burst; \ + __opt_as __lhs``__lhs_sep``lock = __rhs``__rhs_sep``lock; \ + __opt_as __lhs``__lhs_sep``cache = __rhs``__rhs_sep``cache; \ + __opt_as __lhs``__lhs_sep``prot = __rhs``__rhs_sep``prot; \ + __opt_as __lhs``__lhs_sep``qos = __rhs``__rhs_sep``qos; \ + __opt_as __lhs``__lhs_sep``region = __rhs``__rhs_sep``region; \ + __opt_as __lhs``__lhs_sep``atop = __rhs``__rhs_sep``atop; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; \ + __opt_as __lhs``__lhs_sep``snoop = '0; \ + __opt_as __lhs``__lhs_sep``bar = '0; \ + __opt_as __lhs``__lhs_sep``domain = '0; \ + __opt_as __lhs``__lhs_sep``awunique = '0; +`define __AXI_TO_ACE_AR(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``addr = __rhs``__rhs_sep``addr; \ + __opt_as __lhs``__lhs_sep``len = __rhs``__rhs_sep``len; \ + __opt_as __lhs``__lhs_sep``size = __rhs``__rhs_sep``size; \ + __opt_as __lhs``__lhs_sep``burst = __rhs``__rhs_sep``burst; \ + __opt_as __lhs``__lhs_sep``lock = __rhs``__rhs_sep``lock; \ + __opt_as __lhs``__lhs_sep``cache = __rhs``__rhs_sep``cache; \ + __opt_as __lhs``__lhs_sep``prot = __rhs``__rhs_sep``prot; \ + __opt_as __lhs``__lhs_sep``qos = __rhs``__rhs_sep``qos; \ + __opt_as __lhs``__lhs_sep``region = __rhs``__rhs_sep``region; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; \ + __opt_as __lhs``__lhs_sep``snoop = '0; \ + __opt_as __lhs``__lhs_sep``bar = '0; \ + __opt_as __lhs``__lhs_sep``domain = '0; +`define __AXI_TO_ACE_R(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``resp = {2'b00, __rhs``__rhs_sep``resp}; \ + __opt_as __lhs``__lhs_sep``last = __rhs``__rhs_sep``last; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; + +`define ACE_TO_AXI_ASSIGN_R_STRUCT(dst, src) \ + `__ACE_TO_AXI_R(assign, dst, ., src, .) + +`define AXI_TO_ACE_ASSIGN_AW_STRUCT(dst, src) \ + `__AXI_TO_ACE_AW(assign, dst, ., src, .) + +`define AXI_TO_ACE_ASSIGN_AR_STRUCT(dst, src) \ + `__AXI_TO_ACE_AR(assign, dst, ., src, .) + +`define AXI_TO_ACE_ASSIGN_R_STRUCT(dst, src) \ + `__AXI_TO_ACE_R(assign, dst, ., src, .) + +`define ACE_TO_AXI_SET_R_STRUCT(dst, src) \ + `__ACE_TO_AXI_R(, dst, ., src, .) + +`define AXI_TO_ACE_SET_AW_STRUCT(dst, src) \ + `__AXI_TO_ACE_AW(, dst, ., src, .) + +`define AXI_TO_ACE_SET_AR_STRUCT(dst, src) \ + `__AXI_TO_ACE_AR(, dst, ., src, .) + +`define AXI_TO_ACE_SET_R_STRUCT(dst, src) \ + `__AXI_TO_ACE_R(, dst, ., src, .) + + +`define ACE_TO_AXI_ASSIGN_REQ(dst, src) \ + `AXI_ASSIGN_AW_STRUCT(dst.aw, src.aw) \ + `AXI_ASSIGN_AR_STRUCT(dst.ar, src.ar) \ + `AXI_ASSIGN_W_STRUCT(dst.w, src.w) \ + assign dst.aw_valid = src.aw_valid; \ + assign dst.ar_valid = src.ar_valid; \ + assign dst.w_valid = src.w_valid; \ + assign dst.b_ready = src.b_ready; \ + assign dst.r_ready = src.r_ready; + +`define ACE_TO_AXI_ASSIGN_RESP(dst, src) \ + `ACE_TO_AXI_ASSIGN_R_STRUCT(dst.r, src.r) \ + `AXI_ASSIGN_B_STRUCT(dst.b, src.b) \ + assign dst.aw_ready = src.aw_ready; \ + assign dst.ar_ready = src.ar_ready; \ + assign dst.w_ready = src.w_ready; \ + assign dst.b_valid = src.b_valid; \ + assign dst.r_valid = src.r_valid; + +`define AXI_TO_ACE_ASSIGN_REQ(dst, src) \ + `AXI_TO_ACE_ASSIGN_AW_STRUCT(dst.aw, src.aw) \ + `AXI_TO_ACE_ASSIGN_AR_STRUCT(dst.ar, src.ar) \ + `AXI_ASSIGN_W_STRUCT(dst.w, src.w) \ + assign dst.aw_valid = src.aw_valid; \ + assign dst.ar_valid = src.ar_valid; \ + assign dst.w_valid = src.w_valid; \ + assign dst.b_ready = src.b_ready; \ + assign dst.r_ready = src.r_ready; + + +`define AXI_TO_ACE_ASSIGN_RESP(dst, src) \ + `AXI_TO_ACE_ASSIGN_R_STRUCT(dst.r, src.r) \ + `AXI_ASSIGN_B_STRUCT(dst.b, src.b) \ + assign dst.aw_ready = src.aw_ready; \ + assign dst.ar_ready = src.ar_ready; \ + assign dst.w_ready = src.w_ready; \ + assign dst.b_valid = src.b_valid; \ + assign dst.r_valid = src.r_valid; + + +`endif // ACE_CONVERT_SVH_ diff --git a/include/ace/domain.svh b/include/ace/domain.svh new file mode 100644 index 0000000..9b1ec81 --- /dev/null +++ b/include/ace/domain.svh @@ -0,0 +1,30 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + + +`ifndef ACE_DOMAIN_SVH_ +`define ACE_DOMAIN_SVH_ + + ////////////////// + // Domain types // + ////////////////// + +`define ACE_DECLARE_DOMAIN_MAP_T(__num_subordinates) \ + struct packed { \ + logic [__num_subordinates-1:0] initiator; \ + logic [__num_subordinates-1:0] inner; \ + logic [__num_subordinates-1:0] outer; \ + } + +`define ACE_TYPEDEF_DOMAIN_TYPEDEF_MAP_T(__num_subordinates, __map_t) \ + typedef `ACE_DECLARE_DOMAIN_MAP_T(__num_subordinates) __map_t; + +`endif // ACE_DOMAIN_SVH_ diff --git a/include/ace/typedef.svh b/include/ace/typedef.svh index 10d95a6..54fffa9 100644 --- a/include/ace/typedef.svh +++ b/include/ace/typedef.svh @@ -19,7 +19,7 @@ `include "axi/typedef.svh" //////////////////////////////////////////////////////////////////////////////////////////////////// -// AXI4+ATOP Channel and Request/Response Structs (with snoop support) +// ACE4+ATOP Channel and Request/Response Structs // // Usage Example: // `ACE_TYPEDEF_AW_CHAN_T(axi_aw_t, axi_addr_t, axi_id_t, axi_user_t) @@ -42,8 +42,8 @@ axi_pkg::atop_t atop; \ user_t user; \ ace_pkg::awsnoop_t snoop; \ - ace_pkg::bar_t bar; \ - ace_pkg::domain_t domain; \ + ace_pkg::axbar_t bar; \ + ace_pkg::axdomain_t domain; \ ace_pkg::awunique_t awunique; \ } aw_chan_t; `define ACE_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) \ @@ -60,8 +60,8 @@ axi_pkg::region_t region; \ user_t user; \ ace_pkg::arsnoop_t snoop; \ - ace_pkg::bar_t bar; \ - ace_pkg::domain_t domain; \ + ace_pkg::axbar_t bar; \ + ace_pkg::axdomain_t domain; \ } ar_chan_t; `define ACE_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) \ typedef struct packed { \ @@ -73,16 +73,14 @@ } r_chan_t; `define ACE_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) \ typedef struct packed { \ - aw_chan_t aw; \ + aw_chan_t aw; \ logic aw_valid; \ w_chan_t w; \ logic w_valid; \ logic b_ready; \ - ar_chan_t ar; \ + ar_chan_t ar; \ logic ar_valid; \ logic r_ready; \ - logic wack; \ - logic rack; \ } req_t; `define ACE_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) \ typedef struct packed { \ @@ -97,25 +95,43 @@ //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// -// All AXI4+ATOP Channels and Request/Response Structs in One Macro (with snoop support) +// All ACE4+ATOP Channels and Request/Response Structs in One Macro - Custom Type Name Version +// +// This can be used whenever the user is interested in "precise" control of the naming of the +// individual channels. +// +// Usage Example: +// `ACE_TYPEDEF_ALL_CT(axi, axi_req_t, axi_rsp_t, addr_t, id_t, data_t, strb_t, user_t) +// +// This defines `axi_req_t` and `axi_rsp_t` request/response structs as well as `axi_aw_chan_t`, +// `axi_w_chan_t`, `axi_b_chan_t`, `axi_ar_chan_t`, and `axi_r_chan_t` channel structs. +`define ACE_TYPEDEF_ALL_CT(__name, __req, __rsp, __addr_t, __id_t, __data_t, __strb_t, __user_t) \ + `ACE_TYPEDEF_AW_CHAN_T(__name``_aw_chan_t, __addr_t, __id_t, __user_t) \ + `AXI_TYPEDEF_W_CHAN_T(__name``_w_chan_t, __data_t, __strb_t, __user_t) \ + `AXI_TYPEDEF_B_CHAN_T(__name``_b_chan_t, __id_t, __user_t) \ + `ACE_TYPEDEF_AR_CHAN_T(__name``_ar_chan_t, __addr_t, __id_t, __user_t) \ + `ACE_TYPEDEF_R_CHAN_T(__name``_r_chan_t, __data_t, __id_t, __user_t) \ + `ACE_TYPEDEF_REQ_T(__req, __name``_aw_chan_t, __name``_w_chan_t, __name``_ar_chan_t) \ + `ACE_TYPEDEF_RESP_T(__rsp, __name``_b_chan_t, __name``_r_chan_t) +//////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// All ACE4+ATOP Channels and Request/Response Structs in One Macro // // This can be used whenever the user is not interested in "precise" control of the naming of the // individual channels. // // Usage Example: -// `AXI_TYPEDEF_ALL(axi, addr_t, id_t, data_t, strb_t, user_t) +// `ACE_TYPEDEF_ALL(axi, addr_t, id_t, data_t, strb_t, user_t) // // This defines `axi_req_t` and `axi_resp_t` request/response structs as well as `axi_aw_chan_t`, // `axi_w_chan_t`, `axi_b_chan_t`, `axi_ar_chan_t`, and `axi_r_chan_t` channel structs. -`define ACE_TYPEDEF_ALL(__name, __addr_t, __id_t, __data_t, __strb_t, __user_t) \ - `ACE_TYPEDEF_AW_CHAN_T(__name``_aw_chan_t, __addr_t, __id_t, __user_t) \ - `AXI_TYPEDEF_W_CHAN_T(__name``_w_chan_t, __data_t, __strb_t, __user_t) \ - `AXI_TYPEDEF_B_CHAN_T(__name``_b_chan_t, __id_t, __user_t) \ - `ACE_TYPEDEF_AR_CHAN_T(__name``_ar_chan_t, __addr_t, __id_t, __user_t) \ - `ACE_TYPEDEF_R_CHAN_T(__name``_r_chan_t, __data_t, __id_t, __user_t) \ - `ACE_TYPEDEF_REQ_T(__name``_req_t, __name``_aw_chan_t, __name``_w_chan_t, __name``_ar_chan_t) \ - `ACE_TYPEDEF_RESP_T(__name``_resp_t, __name``_b_chan_t, __name``_r_chan_t) +`define ACE_TYPEDEF_ALL(__name, __addr_t, __id_t, __data_t, __strb_t, __user_t) \ + `ACE_TYPEDEF_ALL_CT(__name, __name``_req_t, __name``_resp_t, __addr_t, __id_t, __data_t, __strb_t, __user_t) +//////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////// +// ACE4 Snoop Channel and Request/Response Structs // Usage Example: // `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, snoop_addr_t) // 'SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, snoop_data_t) @@ -124,8 +140,8 @@ `define SNOOP_TYPEDEF_AC_CHAN_T(ac_chan_t, addr_t) \ typedef struct packed { \ addr_t addr; \ - snoop_pkg::acsnoop_t snoop; \ - snoop_pkg::acprot_t prot; \ + ace_pkg::acsnoop_t snoop; \ + ace_pkg::acprot_t prot; \ } ac_chan_t; `define SNOOP_TYPEDEF_CD_CHAN_T(cd_chan_t, data_t) \ typedef struct packed { \ @@ -133,7 +149,9 @@ logic last; \ } cd_chan_t; `define SNOOP_TYPEDEF_CR_CHAN_T(cr_chan_t) \ - typedef snoop_pkg::crresp_t cr_chan_t; + typedef struct packed { \ + ace_pkg::crresp_t resp; \ + } cr_chan_t; `define SNOOP_TYPEDEF_REQ_T(req_t, ac_chan_t) \ typedef struct packed { \ logic ac_valid; \ @@ -147,20 +165,32 @@ logic cd_valid; \ cd_chan_t cd; \ logic cr_valid; \ - cr_chan_t cr_resp; \ + cr_chan_t cr; \ } resp_t; //////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// // Usage Example: -// `SNOOP_TYPEDEF_ALL(snoop, addr_t, data_t) +// `SNOOP_TYPEDEF_ALL_CT(snoop, addr_t, data_t) // // This defines `snoop_req_t` and `snoop_resp_t` request/response structs as well as `snoop_ac_chan_t`, // `snoop_cd_chan_t` and `snoop_cr_chan_t` channel structs. - `define SNOOP_TYPEDEF_ALL(__name, __addr_t, __data_t) \ - `SNOOP_TYPEDEF_AC_CHAN_T(__name``_aw_chan_t, __addr_t) \ + `define SNOOP_TYPEDEF_ALL_CT(__name, __req, __rsp, __addr_t, __data_t) \ + `SNOOP_TYPEDEF_AC_CHAN_T(__name``_ac_chan_t, __addr_t) \ `SNOOP_TYPEDEF_CR_CHAN_T(__name``_cr_chan_t) \ - `SNOOP_TYPEDEF_REQ_T(__name``_req_t, __name``_ac_chan_t) \ - `SNOOP_TYPEDEF_RESP_T(__name``_resp_t, __name``_cd_chan_t, __name``_cr_chan_t) + `SNOOP_TYPEDEF_CD_CHAN_T(__name``_cd_chan_t, __data_t) \ + `SNOOP_TYPEDEF_REQ_T(__req, __name``_ac_chan_t) \ + `SNOOP_TYPEDEF_RESP_T(__rsp, __name``_cd_chan_t, __name``_cr_chan_t) +//////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Usage Example: +// `SNOOP_TYPEDEF_ALL(snoop, addr_t, data_t) +// +// This defines `snoop_req_t` and `snoop_resp_t` request/response structs as well as `snoop_ac_chan_t`, +// `snoop_cd_chan_t` and `snoop_cr_chan_t` channel structs. + `define SNOOP_TYPEDEF_ALL(__name, __addr_t, __data_t) \ + `SNOOP_TYPEDEF_ALL_CT(__name, __name``_req_t, __name``_resp_t, __addr_t, __data_t) //////////////////////////////////////////////////////////////////////////////////////////////////// `endif diff --git a/lint/Makefile b/lint/Makefile new file mode 100644 index 0000000..240869c --- /dev/null +++ b/lint/Makefile @@ -0,0 +1,28 @@ +BENDER ?= bender +VERIBLE_FORMAT ?= verible-verilog-format +VERIBLE_LINT ?= verible-verilog-lint +PROJ_ROOT = $(abspath ..) + +format: + $(VERIBLE_FORMAT) \ + --inplace \ + --indentation_spaces=4 \ + --column_limit=100 \ + --assignment_statement_alignment=align \ + --case_items_alignment=align \ + --module_net_variable_alignment=align \ + --port_declarations_alignment=align \ + --named_parameter_alignment=align \ + --named_port_alignment=align \ + --formal_parameters_alignment=align \ + --struct_union_members_alignment=align \ + --compact_indexing_and_selections=true \ + --port_declarations_right_align_packed_dimensions=true \ + --port_declarations_right_align_unpacked_dimensions=true \ + $$($(BENDER) script --no-deps -d $(PROJ_ROOT) flist) + +lint: + $(VERIBLE_LINT) \ + --rules_config verible_rules.cfg \ + --waiver_files verible.waiver \ + $$($(BENDER) script --no-deps -d $(PROJ_ROOT) flist) diff --git a/lint/verible.waiver b/lint/verible.waiver new file mode 100644 index 0000000..97681e3 --- /dev/null +++ b/lint/verible.waiver @@ -0,0 +1,3 @@ +waive --rule=interface-name-style --location=".*ace_intf\.sv" +waive --rule=line-length --location=".*ace_intf\.sv" +waive --rule=interface-name-style --location=".*snoop_intf\.sv" diff --git a/lint/verible_rules.cfg b/lint/verible_rules.cfg new file mode 100644 index 0000000..66353f2 --- /dev/null +++ b/lint/verible_rules.cfg @@ -0,0 +1,5 @@ +line-length=length:100 +parameter-name-style=localparam_style:CamelCase|ALL_CAPS;parameter_style:CamelCase|ALL_CAPS +no-tabs +no-trailing-spaces +signal-name-style diff --git a/scripts/run_vsim.sh b/scripts/run_vsim.sh index 9ed7409..7228e95 100755 --- a/scripts/run_vsim.sh +++ b/scripts/run_vsim.sh @@ -26,9 +26,16 @@ fi # regression-consistent. SEEDS=(0) +# $VSIM -do ${ROOT}/scripts/dofile.do -sv_seed $seed "$@" | tee vsim.log 2>&1 +# echo "run -all" | $VSIM -sv_seed $seed "$@" | tee vsim.log 2>&1 call_vsim() { for seed in ${SEEDS[@]}; do - echo "run -all" | $VSIM -sv_seed $seed "$@" | tee vsim.log 2>&1 + #echo "run -all" | $VSIM -sv_seed $seed "$@" | tee vsim.log 2>&1 + if [ -f ${ROOT}/scripts/$1.do ]; then + $VSIM -do ${ROOT}/scripts/$1.do -sv_seed $seed "$@" | tee vsim.log 2>&1 + else + $VSIM -sv_seed $seed "$@" | tee vsim.log 2>&1 + fi grep "Errors: 0," vsim.log done } @@ -40,37 +47,30 @@ exec_test() { fi case "$1" in ace_ccu_top) - for NumMst in 2 4 6; do - for NumSlv in 1; do - for Atop in 0 1 ; do - for Exclusive in 0 1; do - for UniqueIds in 0 1 ; do - call_vsim tb_ace_ccu_top -gTbNumMst=$NumMst -gTbNumSlv=$NumSlv \ - -gTbEnAtop=$Atop -gTbEnExcl=$Exclusive \ - -gTbUniqueIds=$UniqueIds - done - done - done - done - done + call_vsim tb_ace_ccu_top -t 1ns -classdebug -coverage -voptargs="+acc" \ + -gAddrWidth=$ADDR_WIDTH \ + -gDataWidth=$DATA_WIDTH \ + -gWordWidth=$WORD_WIDTH \ + -gCachelineWords=$CACHELINE_WORDS \ + -gWays=$WAYS \ + -gSets=$SETS \ + -gTbNumMst=$NMASTERS \ + -gNoMstGroups=$NGROUPS \ + -gMemDir=$MEM_DIR ;; - ace_ccu_top_sanity) - for NumMst in 2; do - for NumSlv in 1; do - for Atop in 0; do - for Exclusive in 0; do - for UniqueIds in 0; do - call_vsim tb_ace_ccu_top -gTbNumMst=$NumMst -gTbNumSlv=$NumSlv \ - -gTbEnAtop=$Atop -gTbEnExcl=$Exclusive \ - -gTbUniqueIds=$UniqueIds - done - done - done - done - done + ccu_ctrl_r_snoop) + call_vsim tb_ccu_ctrl_r_snoop -t 1ns -coverage -voptargs="+acc" \ + -gAddrWidth=$ADDR_WIDTH \ + -gDataWidth=$DATA_WIDTH \ + -gWordWidth=$WORD_WIDTH \ + -gCachelineWords=$CACHELINE_WORDS \ + -gWays=$WAYS \ + -gSets=$SETS \ + -gTbNumMst=$NMASTERS \ + -gMemDir=$MEM_DIR ;; *) - call_vsim tb_$1 -t 1ns -coverage -voptargs="+acc +cover=bcesfx" + call_vsim tb_$1 -t 1ns -coverage -voptargs="+acc" ;; esac } diff --git a/scripts/snoop_types.do b/scripts/snoop_types.do new file mode 100644 index 0000000..50b4363 --- /dev/null +++ b/scripts/snoop_types.do @@ -0,0 +1,23 @@ +radix define WriteSnoop { + 3'b000 "WrNoSnp/WrUnq/Br" + 3'b001 "WriteLineUnique" + 3'b010 "WriteClean" + 3'b011 "WriteBack" + 3'b100 "Evict" + 3'b101 "WriteEvict" +} + +radix define ReadSnoop { + 4'b0000 "RdNoSnp/RdOnce/Br" + 4'b0001 "ReadShared" + 4'b0010 "ReadClean" + 4'b0011 "ReadNotSharedDirty" + 4'b0111 "ReadUnique" + 4'b1011 "CleanUnique" + 4'b1100 "MakeUnique" + 4'b1000 "CleanShared" + 4'b1001 "CleanInvalid" + 4'b1101 "MakeInvalid" + 4'b1110 "DVMComplete" + 4'b1111 "DVMMessage" +} \ No newline at end of file diff --git a/scripts/tb_ace_ccu_top.do b/scripts/tb_ace_ccu_top.do new file mode 100644 index 0000000..c6bc62e --- /dev/null +++ b/scripts/tb_ace_ccu_top.do @@ -0,0 +1,26 @@ +log -r * + +log -class cache_test_pkg::cache_scoreboard::cache_scoreboard__1 + +do snoop_types.do + +# Figure out number of masters from number of ACE interfaces +set n_masters [examine -radix unsigned sim:/tb_ace_ccu_top/TbNumMst] + +add wave -divider "Clock and Reset" +add wave sim:/tb_ace_ccu_top/clk +add wave sim:/tb_ace_ccu_top/rst_n + +add wave -divider "Towards memory" +add wave sim:/tb_ace_ccu_top/axi_intf/* + +for {set n 0} {$n < $n_masters} {incr n 1} { + add wave -divider "Towards cached master m$n" + add wave sim:/tb_ace_ccu_top/ace_intf[$n]/* + add wave -divider "Towards snooped cache m$n" + add wave sim:/tb_ace_ccu_top/snoop_intf[$n]/* +} + +onfinish stop +run -all +view wave diff --git a/src/ace_ccu_top.sv b/src/ace_ccu_top.sv deleted file mode 100644 index 8ba1fff..0000000 --- a/src/ace_ccu_top.sv +++ /dev/null @@ -1,354 +0,0 @@ -// Copyright (c) 2014-2018 ETH Zurich, University of Bologna -// Copyright (c) 2023 PlanV GmbH -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// ace_ccu_top: Top level module for closely coupled cache coherency protocol -`include "ace/assign.svh" -`include "ace/typedef.svh" - -module ace_ccu_top - import cf_math_pkg::idx_width; -#( - parameter ace_pkg::ccu_cfg_t Cfg = '0, - parameter bit ATOPs = 1'b1, - parameter type slv_aw_chan_t = logic, - parameter type mst_aw_chan_t = logic, - parameter type mst_stg_aw_chan_t = logic, - parameter type w_chan_t = logic, - parameter type slv_b_chan_t = logic, - parameter type mst_b_chan_t = logic, - parameter type mst_stg_b_chan_t = logic, - parameter type slv_ar_chan_t = logic, - parameter type mst_ar_chan_t = logic, - parameter type mst_stg_ar_chan_t = logic, - parameter type slv_r_chan_t = logic, - parameter type mst_r_chan_t = logic, - parameter type mst_stg_r_chan_t = logic, - parameter type slv_req_t = logic, - parameter type slv_resp_t = logic, - parameter type mst_req_t = logic, - parameter type mst_resp_t = logic, - parameter type mst_stg_req_t = logic, - parameter type mst_stg_resp_t = logic, - parameter type snoop_req_t = logic, - parameter type snoop_resp_t = logic - -) ( - input logic clk_i, - input logic rst_ni, - input logic test_i, - input slv_req_t [Cfg.NoSlvPorts-1:0] slv_ports_req_i, - output slv_resp_t [Cfg.NoSlvPorts-1:0] slv_ports_resp_o, - output snoop_req_t [Cfg.NoSlvPorts-1:0] slv_snp_req_o, - input snoop_resp_t [Cfg.NoSlvPorts-1:0] slv_snp_resp_i, - output mst_req_t mst_ports_req_o, - input mst_resp_t mst_ports_resp_i -); - -// signals from the ace_demuxes -slv_req_t [Cfg.NoSlvPorts-1:0] [1:0] slv_reqs; // one for non-shareable and one for shareable req -slv_resp_t [Cfg.NoSlvPorts-1:0] [1:0] slv_resps; -// signals into the ace_muxes -mst_stg_req_t [Cfg.NoSlvPorts:0] mst_reqs; // one extra port for CCU -mst_stg_resp_t [Cfg.NoSlvPorts:0] mst_resps; -mst_stg_req_t [Cfg.NoSlvPorts:0] mst_reqs_tmp; -// signals into the CCU -slv_req_t [Cfg.NoSlvPorts-1:0] ccu_reqs_i; -slv_resp_t [Cfg.NoSlvPorts-1:0] ccu_resps_o; -// signals from the CCU -mst_stg_req_t ccu_reqs_mux_o; -mst_stg_resp_t ccu_resps_mux_i; -mst_stg_req_t ccu_reqs_o; -mst_stg_resp_t ccu_resps_i; - -// selection lines for mux and demuxes -logic [Cfg.NoSlvPorts-1:0] slv_aw_select, slv_ar_select; - - -for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_slv_port_demux - - // routing of incoming request through transaction type - ace_trs_dec #( - .slv_ace_req_t ( slv_req_t ) - ) i_ace_trs_dec ( - .slv_reqs_i ( slv_ports_req_i[i] ), - .snoop_aw_trs ( slv_aw_select[i] ), - .snoop_ar_trs ( slv_ar_select[i] ) - ); - - // demux - axi_demux #( - .AxiIdWidth ( Cfg.AxiIdWidthSlvPorts ), // ID Width - .AtopSupport ( ATOPs ), - .aw_chan_t ( slv_aw_chan_t ), // AW Channel Type - .w_chan_t ( w_chan_t ), // W Channel Type - .b_chan_t ( slv_b_chan_t ), // B Channel Type - .ar_chan_t ( slv_ar_chan_t ), // AR Channel Type - .r_chan_t ( slv_r_chan_t ), // R Channel Type - .axi_req_t ( slv_req_t ), - .axi_resp_t ( slv_resp_t ), - .NoMstPorts ( 2 ), // one for CCU module and one for mux - .MaxTrans ( Cfg.MaxMstTrans ), - .AxiLookBits ( Cfg.AxiIdUsedSlvPorts ), - .UniqueIds ( Cfg.UniqueIds ), - //.FallThrough ( Cfg.FallThrough ), - .SpillAw ( Cfg.LatencyMode[9] ), - .SpillW ( Cfg.LatencyMode[8] ), - .SpillB ( Cfg.LatencyMode[7] ), - .SpillAr ( Cfg.LatencyMode[6] ), - .SpillR ( Cfg.LatencyMode[5] ) - ) i_axi_demux ( - .clk_i, // Clock - .rst_ni, // Asynchronous reset active low - .test_i, // Testmode enable - .slv_req_i ( slv_ports_req_i[i] ), - .slv_aw_select_i ( slv_aw_select[i] ), - .slv_ar_select_i ( slv_ar_select[i] ), - .slv_resp_o ( slv_ports_resp_o[i] ), - .mst_reqs_o ( slv_reqs[i] ), - .mst_resps_i ( slv_resps[i] ) - ); -end - -axi_mux #( - .SlvAxiIDWidth ( Cfg.AxiIdWidthSlvPorts+$clog2(Cfg.NoSlvPorts) ), // ID width of the slave ports - .slv_aw_chan_t ( mst_stg_aw_chan_t ), // AW Channel Type, slave ports - .mst_aw_chan_t ( mst_aw_chan_t ), // AW Channel Type, master port - .w_chan_t ( w_chan_t ), // W Channel Type, all ports - .slv_b_chan_t ( mst_stg_b_chan_t ), // B Channel Type, slave ports - .mst_b_chan_t ( mst_b_chan_t ), // B Channel Type, master port - .slv_ar_chan_t ( mst_stg_ar_chan_t ), // AR Channel Type, slave ports - .mst_ar_chan_t ( mst_ar_chan_t ), // AR Channel Type, master port - .slv_r_chan_t ( mst_stg_r_chan_t ), // R Channel Type, slave ports - .mst_r_chan_t ( mst_r_chan_t ), // R Channel Type, master port - .slv_req_t ( mst_stg_req_t ), - .slv_resp_t ( mst_stg_resp_t ), - .mst_req_t ( mst_req_t ), - .mst_resp_t ( mst_resp_t ), - .NoSlvPorts ( Cfg.NoSlvPorts + 1 ), // Number of Masters for the modules - .MaxWTrans ( Cfg.MaxMstTrans ), - .FallThrough ( Cfg.FallThrough ), - .SpillAw ( Cfg.LatencyMode[4] ), - .SpillW ( Cfg.LatencyMode[3] ), - .SpillB ( Cfg.LatencyMode[2] ), - .SpillAr ( Cfg.LatencyMode[1] ), - .SpillR ( Cfg.LatencyMode[0] ) -) i_axi_mux ( - .clk_i, // Clock - .rst_ni, // Asynchronous reset active low - .test_i, // Test Mode enable - .slv_reqs_i ( mst_reqs ), - .slv_resps_o ( mst_resps ), - .mst_req_o ( mst_ports_req_o ), - .mst_resp_i ( mst_ports_resp_i ) -); - - -// connection reqs and resps for non-shareable transactions with axi_mux -for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_non_shared_conn - `ACE_ASSIGN_REQ_STRUCT(mst_reqs_tmp[i], slv_reqs[i][0]) - `ACE_ASSIGN_RESP_STRUCT(slv_resps[i][0], mst_resps[i]) - - always_comb begin - mst_reqs[i] = mst_reqs_tmp[i]; - mst_reqs[i].aw.user[$clog2(Cfg.NoSlvPorts)-1:0] = i[$clog2(Cfg.NoSlvPorts)-1:0]; - mst_reqs[i].ar.user[$clog2(Cfg.NoSlvPorts)-1:0] = i[$clog2(Cfg.NoSlvPorts)-1:0]; - - end -end - -// connect CCU reqs and resps to mux -always_comb begin - mst_reqs[Cfg.NoSlvPorts] = mst_reqs_tmp[Cfg.NoSlvPorts]; - mst_reqs[Cfg.NoSlvPorts].aw.user[$clog2(Cfg.NoSlvPorts)-1:0] = mst_reqs_tmp[Cfg.NoSlvPorts].aw.id[Cfg.AxiIdWidthSlvPorts +: $clog2(Cfg.NoSlvPorts)]; - mst_reqs[Cfg.NoSlvPorts].ar.user[$clog2(Cfg.NoSlvPorts)-1:0] = mst_reqs_tmp[Cfg.NoSlvPorts].ar.id[Cfg.AxiIdWidthSlvPorts +: $clog2(Cfg.NoSlvPorts)]; -end -`ACE_ASSIGN_REQ_STRUCT(mst_reqs_tmp[Cfg.NoSlvPorts], ccu_reqs_o) -`ACE_ASSIGN_RESP_STRUCT(ccu_resps_i, mst_resps[Cfg.NoSlvPorts]) - -// connection reqs and resps for shareable transactions with CCU -for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_shared_conn - `ACE_ASSIGN_REQ_STRUCT(ccu_reqs_i[i], slv_reqs[i][1]) - `ACE_ASSIGN_RESP_STRUCT(slv_resps[i][1], ccu_resps_o[i]) -end - - -axi_mux #( - .SlvAxiIDWidth ( Cfg.AxiIdWidthSlvPorts ), // ID width of the slave ports - .slv_aw_chan_t ( slv_aw_chan_t ), // AW Channel Type, slave ports - .mst_aw_chan_t ( mst_stg_aw_chan_t ), // AW Channel Type, master port - .w_chan_t ( w_chan_t ), // W Channel Type, all ports - .slv_b_chan_t ( slv_b_chan_t ), // B Channel Type, slave ports - .mst_b_chan_t ( mst_stg_b_chan_t ), // B Channel Type, master port - .slv_ar_chan_t ( slv_ar_chan_t ), // AR Channel Type, slave ports - .mst_ar_chan_t ( mst_stg_ar_chan_t ), // AR Channel Type, master port - .slv_r_chan_t ( slv_r_chan_t ), // R Channel Type, slave ports - .mst_r_chan_t ( mst_stg_r_chan_t ), // R Channel Type, master port - .slv_req_t ( slv_req_t ), - .slv_resp_t ( slv_resp_t ), - .mst_req_t ( mst_stg_req_t ), - .mst_resp_t ( mst_stg_resp_t ), - .NoSlvPorts ( Cfg.NoSlvPorts ), // Number of Masters for the modules - .MaxWTrans ( Cfg.MaxMstTrans ), - .FallThrough ( Cfg.FallThrough ), - .SpillAw ( Cfg.LatencyMode[4] ), - .SpillW ( Cfg.LatencyMode[3] ), - .SpillB ( Cfg.LatencyMode[2] ), - .SpillAr ( Cfg.LatencyMode[1] ), - .SpillR ( Cfg.LatencyMode[0] ) -) i_ace_mux ( - .clk_i, // Clock - .rst_ni, // Asynchronous reset active low - .test_i, // Test Mode enable - .slv_reqs_i ( ccu_reqs_i ), - .slv_resps_o ( ccu_resps_o ), - .mst_req_o ( ccu_reqs_mux_o ), - .mst_resp_i ( ccu_resps_mux_i ) -); - -ccu_fsm #( - .DcacheLineWidth ( Cfg.DcacheLineWidth ), - .AxiDataWidth ( Cfg.AxiDataWidth ), - .NoMstPorts ( Cfg.NoSlvPorts ), - .SlvAxiIDWidth ( Cfg.AxiIdWidthSlvPorts ), // ID width of the slave ports - .mst_req_t ( mst_stg_req_t ), - .mst_resp_t ( mst_stg_resp_t ), - .snoop_req_t ( snoop_req_t ), - .snoop_resp_t ( snoop_resp_t ) - -) fsm ( - .clk_i, - .rst_ni, - .ccu_req_i ( ccu_reqs_mux_o ), - .ccu_resp_o ( ccu_resps_mux_i ), - .ccu_req_o ( ccu_reqs_o ), - .ccu_resp_i ( ccu_resps_i ), - .s2m_req_o ( slv_snp_req_o ), - .m2s_resp_i ( slv_snp_resp_i ) -); - -endmodule - - - -module ace_ccu_top_intf - import cf_math_pkg::idx_width; -#( - parameter ace_pkg::ccu_cfg_t Cfg = '0, - parameter bit ATOPS = 1'b1 -) ( - input logic clk_i, - input logic rst_ni, - input logic test_i, - SNOOP_BUS.Slave snoop_ports [Cfg.NoSlvPorts-1:0], - ACE_BUS.Slave slv_ports [Cfg.NoSlvPorts-1:0], - AXI_BUS.Master mst_ports -); - - localparam int unsigned AxiIdWidthMstPortsStage = Cfg.AxiIdWidthSlvPorts +$clog2(Cfg.NoSlvPorts); - localparam int unsigned AxiIdWidthMstPorts = AxiIdWidthMstPortsStage + $clog2(Cfg.NoSlvPorts+1); - - typedef logic [AxiIdWidthMstPortsStage-1:0] id_mst_stg_t; - typedef logic [AxiIdWidthMstPorts -1:0] id_mst_t; - typedef logic [Cfg.AxiIdWidthSlvPorts -1:0] id_slv_t; - typedef logic [Cfg.AxiAddrWidth -1:0] addr_t; - typedef logic [Cfg.AxiDataWidth -1:0] data_t; - typedef logic [Cfg.AxiDataWidth/8 -1:0] strb_t; - typedef logic [Cfg.AxiUserWidth -1:0] user_t; - - // snoop channel conversion - `ACE_TYPEDEF_AW_CHAN_T(mst_ace_stg_aw_chan_t, addr_t, id_mst_stg_t, user_t) - `ACE_TYPEDEF_AW_CHAN_T(mst_ace_aw_chan_t, addr_t, id_mst_t, user_t) - `ACE_TYPEDEF_AW_CHAN_T(slv_ace_aw_chan_t, addr_t, id_slv_t, user_t) - `ACE_TYPEDEF_AR_CHAN_T(mst_ace_stg_ar_chan_t, addr_t, id_mst_stg_t, user_t) - `ACE_TYPEDEF_AR_CHAN_T(mst_ace_ar_chan_t, addr_t, id_mst_t, user_t) - `ACE_TYPEDEF_AR_CHAN_T(slv_ace_ar_chan_t, addr_t, id_slv_t, user_t) - `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(mst_stg_b_chan_t, id_mst_stg_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(mst_b_chan_t, id_mst_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(slv_b_chan_t, id_slv_t, user_t) - `ACE_TYPEDEF_R_CHAN_T(mst_ace_stg_r_chan_t, data_t, id_mst_stg_t, user_t) - `ACE_TYPEDEF_R_CHAN_T(mst_ace_r_chan_t, data_t, id_mst_t, user_t) - `ACE_TYPEDEF_R_CHAN_T(slv_ace_r_chan_t, data_t, id_slv_t, user_t) - `ACE_TYPEDEF_REQ_T(mst_ace_stg_req_t, mst_ace_stg_aw_chan_t, w_chan_t, mst_ace_stg_ar_chan_t) - `ACE_TYPEDEF_REQ_T(mst_ace_req_t, mst_ace_aw_chan_t, w_chan_t, mst_ace_ar_chan_t) - `ACE_TYPEDEF_REQ_T(slv_ace_req_t, slv_ace_aw_chan_t, w_chan_t, slv_ace_ar_chan_t) - `ACE_TYPEDEF_RESP_T(mst_ace_stg_resp_t, mst_stg_b_chan_t, mst_ace_stg_r_chan_t) - `ACE_TYPEDEF_RESP_T(mst_ace_resp_t, mst_b_chan_t, mst_ace_r_chan_t) - `ACE_TYPEDEF_RESP_T(slv_ace_resp_t, slv_b_chan_t, slv_ace_r_chan_t) - `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) - `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) - `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) - `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) - `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) - - - mst_ace_req_t mst_ace_reqs; - mst_ace_resp_t mst_ace_resps; - slv_ace_req_t [Cfg.NoSlvPorts-1:0] slv_ace_reqs; - slv_ace_resp_t [Cfg.NoSlvPorts-1:0] slv_ace_resps; - snoop_req_t [Cfg.NoSlvPorts-1:0] snoop_reqs; - snoop_resp_t [Cfg.NoSlvPorts-1:0] snoop_resps; - - - /// Assigning ACE request from CCU Mux to slave(RAM) - `AXI_ASSIGN_FROM_REQ(mst_ports, mst_ace_reqs) - /// Assigning AXI response from slave (RAM) to CCU mux which accepts only ACE type response - `ACE_ASSIGN_TO_RESP(mst_ace_resps, mst_ports) - - - for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_assign_slv - `ACE_ASSIGN_TO_REQ(slv_ace_reqs[i], slv_ports[i]) - `ACE_ASSIGN_FROM_RESP(slv_ports[i], slv_ace_resps[i]) - /// Assigning SNOOP request from CCU logic to master - `SNOOP_ASSIGN_FROM_REQ(snoop_ports[i], snoop_reqs[i]) - /// Assigning SNOOP response from master to CCU logic - `SNOOP_ASSIGN_TO_RESP(snoop_resps[i], snoop_ports[i]) - end - - - ace_ccu_top #( - .Cfg ( Cfg ), - .ATOPs ( ATOPS ), - .slv_aw_chan_t ( slv_ace_aw_chan_t ), - .mst_stg_aw_chan_t ( mst_ace_stg_aw_chan_t ), - .mst_aw_chan_t ( mst_ace_aw_chan_t ), - .w_chan_t ( w_chan_t ), - .slv_b_chan_t ( slv_b_chan_t ), - .mst_b_chan_t ( mst_b_chan_t ), - .mst_stg_b_chan_t ( mst_stg_b_chan_t ), - .slv_ar_chan_t ( slv_ace_ar_chan_t ), - .mst_ar_chan_t ( mst_ace_ar_chan_t ), - .mst_stg_ar_chan_t ( mst_ace_stg_ar_chan_t ), - .slv_r_chan_t ( slv_ace_r_chan_t ), - .mst_r_chan_t ( mst_ace_r_chan_t ), - .mst_stg_r_chan_t ( mst_ace_stg_r_chan_t ), - .slv_req_t ( slv_ace_req_t ), - .slv_resp_t ( slv_ace_resp_t ), - .mst_req_t ( mst_ace_req_t ), - .mst_resp_t ( mst_ace_resp_t ), - .mst_stg_req_t ( mst_ace_stg_req_t ), - .mst_stg_resp_t ( mst_ace_stg_resp_t ), - .snoop_req_t ( snoop_req_t ), - .snoop_resp_t ( snoop_resp_t ) - ) i_ccu_top ( - .clk_i, - .rst_ni, - .test_i, - .slv_ports_req_i ( slv_ace_reqs ), - .slv_ports_resp_o ( slv_ace_resps ), - .slv_snp_req_o ( snoop_reqs ), - .slv_snp_resp_i ( snoop_resps ), - .mst_ports_req_o ( mst_ace_reqs ), - .mst_ports_resp_i ( mst_ace_resps ) - ); - -endmodule diff --git a/src/ace_intf.sv b/src/ace_intf.sv index 55abbaa..2b27407 100644 --- a/src/ace_intf.sv +++ b/src/ace_intf.sv @@ -1,5 +1,6 @@ // Copyright (c) 2014-2018 ETH Zurich, University of Bologna // Copyright (c) 2022 PlanV GmbH +// Copyright (c) 2025 ETH Zurich, University of Bologna // // Copyright and related rights are licensed under the Solderpad Hardware // License, Version 0.51 (the "License"); you may not use this file except in @@ -10,266 +11,290 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +interface CLK_IF ( + input clk_i +); +endinterface // ACE bus interafces interface ACE_BUS #( - parameter int unsigned AXI_ADDR_WIDTH = 0, - parameter int unsigned AXI_DATA_WIDTH = 0, - parameter int unsigned AXI_ID_WIDTH = 0, - parameter int unsigned AXI_USER_WIDTH = 0 + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0 ); - localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; + localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; + + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_STRB_WIDTH-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; - typedef logic [AXI_ID_WIDTH-1:0] id_t; - typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; - typedef logic [AXI_DATA_WIDTH-1:0] data_t; - typedef logic [AXI_STRB_WIDTH-1:0] strb_t; - typedef logic [AXI_USER_WIDTH-1:0] user_t; + id_t aw_id; + addr_t aw_addr; + axi_pkg::len_t aw_len; + axi_pkg::size_t aw_size; + axi_pkg::burst_t aw_burst; + logic aw_lock; + axi_pkg::cache_t aw_cache; + axi_pkg::prot_t aw_prot; + axi_pkg::qos_t aw_qos; + axi_pkg::region_t aw_region; + axi_pkg::atop_t aw_atop; + user_t aw_user; + logic aw_valid; + logic aw_ready; + ace_pkg::awsnoop_t aw_snoop; + ace_pkg::axbar_t aw_bar; + ace_pkg::axdomain_t aw_domain; + ace_pkg::awunique_t aw_awunique; - id_t aw_id; - addr_t aw_addr; - axi_pkg::len_t aw_len; - axi_pkg::size_t aw_size; - axi_pkg::burst_t aw_burst; - logic aw_lock; - axi_pkg::cache_t aw_cache; - axi_pkg::prot_t aw_prot; - axi_pkg::qos_t aw_qos; - axi_pkg::region_t aw_region; - axi_pkg::atop_t aw_atop; - user_t aw_user; - logic aw_valid; - logic aw_ready; - ace_pkg::awsnoop_t aw_snoop; - ace_pkg::bar_t aw_bar; - ace_pkg::domain_t aw_domain; - ace_pkg::awunique_t aw_awunique; + data_t w_data; + strb_t w_strb; + logic w_last; + user_t w_user; + logic w_valid; + logic w_ready; - data_t w_data; - strb_t w_strb; - logic w_last; - user_t w_user; - logic w_valid; - logic w_ready; + id_t b_id; + axi_pkg::resp_t b_resp; + user_t b_user; + logic b_valid; + logic b_ready; - id_t b_id; - axi_pkg::resp_t b_resp; - user_t b_user; - logic b_valid; - logic b_ready; + id_t ar_id; + addr_t ar_addr; + axi_pkg::len_t ar_len; + axi_pkg::size_t ar_size; + axi_pkg::burst_t ar_burst; + logic ar_lock; + axi_pkg::cache_t ar_cache; + axi_pkg::prot_t ar_prot; + axi_pkg::qos_t ar_qos; + axi_pkg::region_t ar_region; + user_t ar_user; + logic ar_valid; + logic ar_ready; + ace_pkg::arsnoop_t ar_snoop; + ace_pkg::axbar_t ar_bar; + ace_pkg::axdomain_t ar_domain; - id_t ar_id; - addr_t ar_addr; - axi_pkg::len_t ar_len; - axi_pkg::size_t ar_size; - axi_pkg::burst_t ar_burst; - logic ar_lock; - axi_pkg::cache_t ar_cache; - axi_pkg::prot_t ar_prot; - axi_pkg::qos_t ar_qos; - axi_pkg::region_t ar_region; - user_t ar_user; - logic ar_valid; - logic ar_ready; - ace_pkg::arsnoop_t ar_snoop; - ace_pkg::bar_t ar_bar; - ace_pkg::domain_t ar_domain; - - id_t r_id; - data_t r_data; - ace_pkg::rresp_t r_resp; - logic r_last; - user_t r_user; - logic r_valid; - logic r_ready; + id_t r_id; + data_t r_data; + ace_pkg::rresp_t r_resp; + logic r_last; + user_t r_user; + logic r_valid; + logic r_ready; - logic wack; - logic rack; + logic wack; + logic rack; - modport Master ( - output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, input aw_ready, - output w_data, w_strb, w_last, w_user, w_valid, input w_ready, - input b_id, b_resp, b_user, b_valid, output b_ready, - output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar, ar_domain, input ar_ready, - input r_id, r_data, r_resp, r_last, r_user, r_valid, output r_ready, - output wack, rack - ); + modport Master( + output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, + input aw_ready, + output w_data, w_strb, w_last, w_user, w_valid, + input w_ready, + input b_id, b_resp, b_user, b_valid, + output b_ready, + output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar, ar_domain, + input ar_ready, + input r_id, r_data, r_resp, r_last, r_user, r_valid, + output r_ready, + output wack, rack + ); - modport Slave ( - input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, output aw_ready, - input w_data, w_strb, w_last, w_user, w_valid, output w_ready, - output b_id, b_resp, b_user, b_valid, input b_ready, - input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar, ar_domain, output ar_ready, - output r_id, r_data, r_resp, r_last, r_user, r_valid, input r_ready, - input wack, rack - ); + modport Slave( + input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, + output aw_ready, + input w_data, w_strb, w_last, w_user, w_valid, + output w_ready, + output b_id, b_resp, b_user, b_valid, + input b_ready, + input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar, ar_domain, + output ar_ready, + output r_id, r_data, r_resp, r_last, r_user, r_valid, + input r_ready, + input wack, rack + ); - modport Monitor ( - input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_ready, aw_snoop, aw_bar, aw_domain, aw_awunique, + modport Monitor( + input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_ready, aw_snoop, aw_bar, aw_domain, aw_awunique, w_data, w_strb, w_last, w_user, w_valid, w_ready, b_id, b_resp, b_user, b_valid, b_ready, ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_ready, ar_snoop, ar_bar, ar_domain, r_id, r_data, r_resp, r_last, r_user, r_valid, r_ready, wack, rack - ); + ); endinterface /// A clocked ACE interface for use in design verification. interface ACE_BUS_DV #( - parameter int unsigned AXI_ADDR_WIDTH = 0, - parameter int unsigned AXI_DATA_WIDTH = 0, - parameter int unsigned AXI_ID_WIDTH = 0, - parameter int unsigned AXI_USER_WIDTH = 0 -)( - input logic clk_i + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0 +) ( + input logic clk_i ); - localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; + localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; - typedef logic [AXI_ID_WIDTH-1:0] id_t; - typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; - typedef logic [AXI_DATA_WIDTH-1:0] data_t; - typedef logic [AXI_STRB_WIDTH-1:0] strb_t; - typedef logic [AXI_USER_WIDTH-1:0] user_t; + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_STRB_WIDTH-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; - id_t aw_id; - addr_t aw_addr; - axi_pkg::len_t aw_len; - axi_pkg::size_t aw_size; - axi_pkg::burst_t aw_burst; - logic aw_lock; - axi_pkg::cache_t aw_cache; - axi_pkg::prot_t aw_prot; - axi_pkg::qos_t aw_qos; - axi_pkg::region_t aw_region; - axi_pkg::atop_t aw_atop; - user_t aw_user; - logic aw_valid; - logic aw_ready; - ace_pkg::awsnoop_t aw_snoop; - ace_pkg::bar_t aw_bar; - ace_pkg::domain_t aw_domain; - ace_pkg::awunique_t aw_awunique; + id_t aw_id; + addr_t aw_addr; + axi_pkg::len_t aw_len; + axi_pkg::size_t aw_size; + axi_pkg::burst_t aw_burst; + logic aw_lock; + axi_pkg::cache_t aw_cache; + axi_pkg::prot_t aw_prot; + axi_pkg::qos_t aw_qos; + axi_pkg::region_t aw_region; + axi_pkg::atop_t aw_atop; + user_t aw_user; + logic aw_valid; + logic aw_ready; + ace_pkg::awsnoop_t aw_snoop; + ace_pkg::axbar_t aw_bar; + ace_pkg::axdomain_t aw_domain; + ace_pkg::awunique_t aw_awunique; - data_t w_data; - strb_t w_strb; - logic w_last; - user_t w_user; - logic w_valid; - logic w_ready; + data_t w_data; + strb_t w_strb; + logic w_last; + user_t w_user; + logic w_valid; + logic w_ready; - id_t b_id; - axi_pkg::resp_t b_resp; - user_t b_user; - logic b_valid; - logic b_ready; + id_t b_id; + axi_pkg::resp_t b_resp; + user_t b_user; + logic b_valid; + logic b_ready; - id_t ar_id; - addr_t ar_addr; - axi_pkg::len_t ar_len; - axi_pkg::size_t ar_size; - axi_pkg::burst_t ar_burst; - logic ar_lock; - axi_pkg::cache_t ar_cache; - axi_pkg::prot_t ar_prot; - axi_pkg::qos_t ar_qos; - axi_pkg::region_t ar_region; - user_t ar_user; - logic ar_valid; - logic ar_ready; - ace_pkg::arsnoop_t ar_snoop; - ace_pkg::bar_t ar_bar; - ace_pkg::domain_t ar_domain; + id_t ar_id; + addr_t ar_addr; + axi_pkg::len_t ar_len; + axi_pkg::size_t ar_size; + axi_pkg::burst_t ar_burst; + logic ar_lock; + axi_pkg::cache_t ar_cache; + axi_pkg::prot_t ar_prot; + axi_pkg::qos_t ar_qos; + axi_pkg::region_t ar_region; + user_t ar_user; + logic ar_valid; + logic ar_ready; + ace_pkg::arsnoop_t ar_snoop; + ace_pkg::axbar_t ar_bar; + ace_pkg::axdomain_t ar_domain; - id_t r_id; - data_t r_data; - ace_pkg::rresp_t r_resp; - logic r_last; - user_t r_user; - logic r_valid; - logic r_ready; + id_t r_id; + data_t r_data; + ace_pkg::rresp_t r_resp; + logic r_last; + user_t r_user; + logic r_valid; + logic r_ready; - logic wack; - logic rack; + logic wack; + logic rack; - modport Master ( - output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, input aw_ready, - output w_data, w_strb, w_last, w_user, w_valid, input w_ready, - input b_id, b_resp, b_user, b_valid, output b_ready, - output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar,ar_domain, input ar_ready, - input r_id, r_data, r_resp, r_last, r_user, r_valid, output r_ready, - output wack, rack - ); + modport Master( + output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, + input aw_ready, + output w_data, w_strb, w_last, w_user, w_valid, + input w_ready, + input b_id, b_resp, b_user, b_valid, + output b_ready, + output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar,ar_domain, + input ar_ready, + input r_id, r_data, r_resp, r_last, r_user, r_valid, + output r_ready, + output wack, rack + ); - modport Slave ( - input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, output aw_ready, - input w_data, w_strb, w_last, w_user, w_valid, output w_ready, - output b_id, b_resp, b_user, b_valid, input b_ready, - input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar, ar_domain, output ar_ready, - output r_id, r_data, r_resp, r_last, r_user, r_valid, input r_ready, - input wack, rack - ); + modport Slave( + input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_snoop, aw_bar, aw_domain, aw_awunique, + output aw_ready, + input w_data, w_strb, w_last, w_user, w_valid, + output w_ready, + output b_id, b_resp, b_user, b_valid, + input b_ready, + input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_snoop, ar_bar, ar_domain, + output ar_ready, + output r_id, r_data, r_resp, r_last, r_user, r_valid, + input r_ready, + input wack, rack + ); - modport Monitor ( - input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_ready, aw_snoop, aw_bar, aw_domain, aw_awunique, + modport Monitor( + input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_atop, aw_user, aw_valid, aw_ready, aw_snoop, aw_bar, aw_domain, aw_awunique, w_data, w_strb, w_last, w_user, w_valid, w_ready, b_id, b_resp, b_user, b_valid, b_ready, ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, ar_ready, ar_snoop, ar_bar, ar_domain, r_id, r_data, r_resp, r_last, r_user, r_valid, r_ready, wack, rack - ); + ); - // pragma translate_off - `ifndef VERILATOR - // Single-Channel Assertions: Signals including valid must not change between valid and handshake. - // AW - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_id))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_addr))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_len))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_size))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_burst))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_lock))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_cache))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_prot))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_qos))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_region))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_atop))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_user))); - assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> aw_valid)); - // W - assert property (@(posedge clk_i) ( w_valid && ! w_ready |=> $stable(w_data))); - assert property (@(posedge clk_i) ( w_valid && ! w_ready |=> $stable(w_strb))); - assert property (@(posedge clk_i) ( w_valid && ! w_ready |=> $stable(w_last))); - assert property (@(posedge clk_i) ( w_valid && ! w_ready |=> $stable(w_user))); - assert property (@(posedge clk_i) ( w_valid && ! w_ready |=> w_valid)); - // B - assert property (@(posedge clk_i) ( b_valid && ! b_ready |=> $stable(b_id))); - assert property (@(posedge clk_i) ( b_valid && ! b_ready |=> $stable(b_resp))); - assert property (@(posedge clk_i) ( b_valid && ! b_ready |=> $stable(b_user))); - assert property (@(posedge clk_i) ( b_valid && ! b_ready |=> b_valid)); - // AR - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_id))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_addr))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_len))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_size))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_burst))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_lock))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_cache))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_prot))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_qos))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_region))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_user))); - assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> ar_valid)); - // R - assert property (@(posedge clk_i) ( r_valid && ! r_ready |=> $stable(r_id))); - assert property (@(posedge clk_i) ( r_valid && ! r_ready |=> $stable(r_data))); - assert property (@(posedge clk_i) ( r_valid && ! r_ready |=> $stable(r_resp))); - assert property (@(posedge clk_i) ( r_valid && ! r_ready |=> $stable(r_last))); - assert property (@(posedge clk_i) ( r_valid && ! r_ready |=> $stable(r_user))); - assert property (@(posedge clk_i) ( r_valid && ! r_ready |=> r_valid)); - `endif - // pragma translate_on + // pragma translate_off +`ifndef VERILATOR + // Single-Channel Assertions: Signals including valid must not change between valid and handshake. + // AW + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_id))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_addr))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_len))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_size))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_burst))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_lock))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_cache))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_prot))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_qos))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_region))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_atop))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> $stable(aw_user))); + assert property (@(posedge clk_i) (aw_valid && !aw_ready |=> aw_valid)); + // W + assert property (@(posedge clk_i) (w_valid && !w_ready |=> $stable(w_data))); + assert property (@(posedge clk_i) (w_valid && !w_ready |=> $stable(w_strb))); + assert property (@(posedge clk_i) (w_valid && !w_ready |=> $stable(w_last))); + assert property (@(posedge clk_i) (w_valid && !w_ready |=> $stable(w_user))); + assert property (@(posedge clk_i) (w_valid && !w_ready |=> w_valid)); + // B + assert property (@(posedge clk_i) (b_valid && !b_ready |=> $stable(b_id))); + assert property (@(posedge clk_i) (b_valid && !b_ready |=> $stable(b_resp))); + assert property (@(posedge clk_i) (b_valid && !b_ready |=> $stable(b_user))); + assert property (@(posedge clk_i) (b_valid && !b_ready |=> b_valid)); + // AR + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_id))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_addr))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_len))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_size))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_burst))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_lock))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_cache))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_prot))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_qos))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_region))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> $stable(ar_user))); + assert property (@(posedge clk_i) (ar_valid && !ar_ready |=> ar_valid)); + // R + assert property (@(posedge clk_i) (r_valid && !r_ready |=> $stable(r_id))); + assert property (@(posedge clk_i) (r_valid && !r_ready |=> $stable(r_data))); + assert property (@(posedge clk_i) (r_valid && !r_ready |=> $stable(r_resp))); + assert property (@(posedge clk_i) (r_valid && !r_ready |=> $stable(r_last))); + assert property (@(posedge clk_i) (r_valid && !r_ready |=> $stable(r_user))); + assert property (@(posedge clk_i) (r_valid && !r_ready |=> r_valid)); +`endif + // pragma translate_on endinterface diff --git a/src/ace_pkg.sv b/src/ace_pkg.sv index cbd0462..6dd8cb0 100644 --- a/src/ace_pkg.sv +++ b/src/ace_pkg.sv @@ -1,5 +1,6 @@ // Copyright (c) 2014-2018 ETH Zurich, University of Bologna // Copyright (c) 2022 PlanV GmbH +// Copyright (c) 2025 ETH Zurich, University of Bologna // // Copyright and related rights are licensed under the Solderpad Hardware // License, Version 0.51 (the "License"); you may not use this file except in @@ -13,73 +14,346 @@ //! ACE Package /// Contains all necessary type definitions, constants, and generally useful functions. +/// Naming conventions are chosen to align with the ACE specification. package ace_pkg; - // Support for snoop channels - typedef logic [3:0] arsnoop_t; - typedef logic [2:0] awsnoop_t; - typedef logic [1:0] bar_t; - typedef logic [1:0] domain_t; - typedef logic [0:0] awunique_t; - typedef logic [3:0] rresp_t; - - /// Slice on Demux AW channel. - localparam logic [9:0] DemuxAw = (1 << 9); - /// Slice on Demux W channel. - localparam logic [9:0] DemuxW = (1 << 8); - /// Slice on Demux B channel. - localparam logic [9:0] DemuxB = (1 << 7); - /// Slice on Demux AR channel. - localparam logic [9:0] DemuxAr = (1 << 6); - /// Slice on Demux R channel. - localparam logic [9:0] DemuxR = (1 << 5); - /// Slice on Mux AW channel. - localparam logic [9:0] MuxAw = (1 << 4); - /// Slice on Mux W channel. - localparam logic [9:0] MuxW = (1 << 3); - /// Slice on Mux B channel. - localparam logic [9:0] MuxB = (1 << 2); - /// Slice on Mux AR channel. - localparam logic [9:0] MuxAr = (1 << 1); - /// Slice on Mux R channel. - localparam logic [9:0] MuxR = (1 << 0); - /// Latency configuration for `ace_xbar`. - typedef enum logic [9:0] { - NO_LATENCY = 10'b000_00_000_00, - CUT_SLV_AX = DemuxAw | DemuxAr, - CUT_MST_AX = MuxAw | MuxAr, - CUT_ALL_AX = DemuxAw | DemuxAr | MuxAw | MuxAr, - CUT_SLV_PORTS = DemuxAw | DemuxW | DemuxB | DemuxAr | DemuxR, - CUT_MST_PORTS = MuxAw | MuxW | MuxB | MuxAr | MuxR, - CUT_ALL_PORTS = 10'b111_11_111_11 - } ccu_latency_e; - - /// Configuration for `ace_ccu`. - typedef struct packed { - int unsigned NoSlvPorts; - int unsigned MaxMstTrans; - int unsigned MaxSlvTrans; - bit FallThrough; - ccu_latency_e LatencyMode; - int unsigned AxiIdWidthSlvPorts; - int unsigned AxiIdUsedSlvPorts; - bit UniqueIds; - int unsigned AxiAddrWidth; - int unsigned AxiDataWidth; - int unsigned AxiUserWidth; - int unsigned DcacheLineWidth; - } ccu_cfg_t; - - // transaction type - typedef enum logic[2:0] { - READ_NO_SNOOP, - READ_ONCE, - READ_SHARED, - READ_UNIQUE, - CLEAN_UNIQUE, - WRITE_NO_SNOOP, - WRITE_BACK, - WRITE_UNIQUE - } ace_trs_t; + ////////////// + // Typedefs // + ////////////// + + // Additional types for already existing AXI channels + typedef logic [3:0] arsnoop_t; + typedef logic [2:0] awsnoop_t; + typedef logic [1:0] axbar_t; + typedef logic [1:0] axdomain_t; + typedef logic [3:0] rresp_t; + typedef logic [0:0] awunique_t; + + // Snoop related types + typedef logic [3:0] acsnoop_t; + typedef logic [2:0] acprot_t; + + typedef struct packed { + logic WasUnique; + logic IsShared; + logic PassDirty; + logic Error; + logic DataTransfer; + } crresp_t; + + /////////////// + // Encodings // + /////////////// + + // AxDOMAIN + localparam axdomain_t NonShareable = 2'b00; + localparam axdomain_t InnerShareable = 2'b01; + localparam axdomain_t OuterShareable = 2'b10; + localparam axdomain_t System = 2'b11; + + + // AxBAR + localparam axbar_t NormalAccessRespectingBarriers = 2'b00; + localparam axbar_t MemoryBarrier = 2'b01; + localparam axbar_t NormalAccessIgnoringBarriers = 2'b10; + localparam axbar_t SynchronizationBarrier = 2'b11; + + // Uniquely defined here both for ARSNOOP and AWSNOOP + localparam int unsigned Barrier = 0; + + // ARSNOOP + localparam arsnoop_t ReadNoSnoop = 4'b0000; + localparam arsnoop_t ReadOnce = 4'b0000; + localparam arsnoop_t ReadShared = 4'b0001; + localparam arsnoop_t ReadClean = 4'b0010; + localparam arsnoop_t ReadNotSharedDirty = 4'b0011; + localparam arsnoop_t ReadUnique = 4'b0111; + localparam arsnoop_t CleanUnique = 4'b1011; + localparam arsnoop_t MakeUnique = 4'b1100; + localparam arsnoop_t CleanShared = 4'b1000; + localparam arsnoop_t CleanInvalid = 4'b1001; + localparam arsnoop_t MakeInvalid = 4'b1101; + localparam arsnoop_t DVMComplete = 4'b1110; + localparam arsnoop_t DVMMessage = 4'b1111; + /* Barrier is already defined */ + + // AWSNOOP + localparam awsnoop_t WriteNoSnoop = 3'b000; + localparam awsnoop_t WriteUnique = 3'b000; + localparam awsnoop_t WriteLineUnique = 3'b001; + localparam awsnoop_t WriteClean = 3'b010; + localparam awsnoop_t WriteBack = 3'b011; + localparam awsnoop_t Evict = 3'b100; + localparam awsnoop_t WriteEvict = 3'b101; + /* Barrier is already defined */ + + // ACSNOOP + // + // The encoding is shared with ARSNOOP transactions for the following cases: + // - ReadOnce + // - ReadShared + // - ReadClean + // - ReadNotSharedDirty + // - ReadUnique + // - CleanShared + // - CleanInvalid + // - MakeInvalid + // - DVMComplete + // - DVMMessage + // Cast the parameters to acsnoop_t for consistency (but works anyway) + + // RRESP + // Bit position for additional ACE-related fields + localparam int unsigned RESP_IS_DIRTY = 2; + localparam int unsigned RESP_IS_SHARED = 3; + + /////////////// + // Functions // + /////////////// + + // AWSNOOP decoding + function automatic logic ace_is_write_no_snoop(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); + return ( + awbar0 == 1'b0 && + awdomain inside {NonShareable, System} && + awsnoop == WriteNoSnoop + ); + endfunction + + function automatic logic ace_is_write_unique(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); + return ( + awbar0 == 1'b0 && + awdomain inside {InnerShareable, OuterShareable} && + awsnoop == WriteUnique + ); + endfunction + + function automatic logic ace_is_write_line_unique(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); + return ( + awbar0 == 1'b0 && + awdomain inside {InnerShareable, OuterShareable} && + awsnoop == WriteLineUnique + ); + endfunction + + function automatic logic ace_is_write_clean(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); + return ( + awbar0 == 1'b0 && + awdomain inside {NonShareable, InnerShareable, OuterShareable} && + awsnoop == WriteClean + ); + endfunction + + function automatic logic ace_is_write_back(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); + return ( + awbar0 == 1'b0 && + awdomain inside {NonShareable, InnerShareable, OuterShareable} && + awsnoop == WriteBack + ); + endfunction + + function automatic logic ace_is_evict(logic awbar0, axdomain_t awdomain, awsnoop_t awsnoop); + return ( + awbar0 == 1'b0 && + awdomain inside {InnerShareable, OuterShareable} && + awsnoop == Evict + ); + endfunction + + function automatic logic ace_is_write_evict(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); + return ( + awbar0 == 1'b0 && + awdomain inside {NonShareable, InnerShareable, OuterShareable} && + awsnoop == WriteEvict + ); + endfunction + + // ARSNOOP decoding + function automatic logic ace_is_read_no_snoop(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return (arbar0 == 1'b0 && ardomain inside {NonShareable, System} && arsnoop == ReadNoSnoop); + endfunction + + function automatic logic ace_is_read_once(logic arbar0, axdomain_t ardomain, arsnoop_t arsnoop); + return (arbar0 == 1'b0 && ardomain inside {InnerShareable, OuterShareable} && arsnoop == ReadOnce); + endfunction + + function automatic logic ace_is_read_shared(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {InnerShareable, OuterShareable} && + arsnoop == ReadShared + ); + endfunction + + function automatic logic ace_is_read_clean(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {InnerShareable, OuterShareable} && + arsnoop == ReadClean + ); + endfunction + + function automatic logic ace_is_read_not_shared_dirty(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {InnerShareable, OuterShareable} && + arsnoop == ReadNotSharedDirty + ); + endfunction + + function automatic logic ace_is_read_unique(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {InnerShareable, OuterShareable} && + arsnoop == ReadUnique + ); + endfunction + + function automatic logic ace_is_clean_unique(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {InnerShareable, OuterShareable} && + arsnoop == CleanUnique + ); + endfunction + + function automatic logic ace_is_make_unique(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {InnerShareable, OuterShareable} && + arsnoop == MakeUnique + ); + endfunction + + function automatic logic ace_is_clean_shared(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {NonShareable, InnerShareable, OuterShareable} && + arsnoop == CleanShared + ); + endfunction + + function automatic logic ace_is_clean_invalid(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {NonShareable, InnerShareable, OuterShareable} && + arsnoop == CleanInvalid + ); + endfunction + + function automatic logic ace_is_make_invalid(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ( + arbar0 == 1'b0 && + ardomain inside {NonShareable, InnerShareable, OuterShareable} && + arsnoop == MakeInvalid + ); + endfunction + + // Transaction groups + + function automatic logic ace_aw_is_shareable(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); + return ace_is_write_unique(awbar0, awdomain, awsnoop) || + ace_is_write_line_unique(awbar0, awdomain, awsnoop); + endfunction + + function automatic logic ace_aw_is_memory_update(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); + return ace_is_write_clean(awbar0, awdomain, awsnoop) || + ace_is_write_back(awbar0, awdomain, awsnoop) || + ace_is_evict(awbar0, awdomain, awsnoop) || + ace_is_write_evict(awbar0, awdomain, awsnoop); + endfunction + + function automatic logic ace_aw_is_non_blocking(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); + return ace_aw_is_memory_update(awbar0, awdomain, awsnoop) || + ace_is_write_no_snoop(awbar0, awdomain, awsnoop); + endfunction + + function automatic logic ace_ar_is_shareable(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ace_is_read_once(arbar0, ardomain, arsnoop) || + ace_is_read_shared(arbar0, ardomain, arsnoop) || + ace_is_read_clean(arbar0, ardomain, arsnoop) || + ace_is_read_not_shared_dirty(arbar0, ardomain, arsnoop) || + ace_is_read_unique(arbar0, ardomain, arsnoop) || + ace_is_clean_unique(arbar0, ardomain, arsnoop) || + ace_is_make_unique(arbar0, ardomain, arsnoop); + endfunction + + function automatic logic ace_ar_is_clean(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ace_is_clean_unique(arbar0, ardomain, arsnoop) || + ace_is_clean_shared(arbar0, ardomain, arsnoop) || + ace_is_clean_invalid(arbar0, ardomain, arsnoop); + endfunction + + function automatic logic ace_ar_is_make(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ace_is_make_invalid(arbar0, ardomain, arsnoop) || + ace_is_make_unique(arbar0, ardomain, arsnoop); + endfunction + + // Snoop transaction from initiating master transaction + function automatic acsnoop_t ace_ar_acsnoop_map(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ace_is_clean_unique(arbar0, ardomain, arsnoop) ? acsnoop_t'(CleanInvalid) : + ace_is_make_unique(arbar0, ardomain, arsnoop) ? acsnoop_t'(MakeInvalid) : + acsnoop_t'(arsnoop); + endfunction + + function automatic acsnoop_t ace_aw_acsnoop_map(logic awbar0, axdomain_t awdomain, + awsnoop_t awsnoop); + return ace_is_write_unique(awbar0, awdomain, awsnoop) ? acsnoop_t'(CleanInvalid) : + ace_is_write_line_unique(awbar0, awdomain, awsnoop) ? acsnoop_t'(MakeInvalid) : + acsnoop_t'(CleanInvalid); + endfunction + + function automatic logic ace_ar_accepts_dirty(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ace_is_read_not_shared_dirty(arbar0, ardomain, arsnoop) || + ace_is_read_shared(arbar0, ardomain, arsnoop) || + ace_is_read_unique(arbar0, ardomain, arsnoop); + endfunction + + function automatic logic ace_ar_accepts_dirty_shared(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ace_is_read_shared(arbar0, ardomain, arsnoop); + endfunction + + function automatic logic ace_ar_accepts_shared(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop); + return ace_is_read_not_shared_dirty(arbar0, ardomain, arsnoop) || + ace_is_read_shared(arbar0, ardomain, arsnoop) || + ace_is_read_clean(arbar0, ardomain, arsnoop); + endfunction + + function automatic logic ace_ar_is_exclusive_load(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop, logic arlock); + return (ace_is_read_shared(arbar0, ardomain, arsnoop) || + ace_is_read_clean(arbar0, ardomain, arsnoop)) && arlock; + endfunction + + function automatic logic ace_ar_is_exclusive_store(logic arbar0, axdomain_t ardomain, + arsnoop_t arsnoop, logic arlock); + return (ace_is_clean_unique(arbar0, ardomain, arsnoop)) && arlock; + endfunction endpackage diff --git a/src/ace_test.sv b/src/ace_test.sv deleted file mode 100644 index 6c8d667..0000000 --- a/src/ace_test.sv +++ /dev/null @@ -1,1733 +0,0 @@ -// Copyright (c) 2014-2018 ETH Zurich, University of Bologna -// Copyright (c) 2022 PlanV GmbH -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// - - -/// A set of testbench utilities for ACE interfaces. -package ace_test; - - import axi_pkg::*; - import ace_pkg::*; - - /// The data transferred on a beat on the AW/AR channels. - class ace_ax_beat #( - parameter AW = 32, - parameter IW = 8 , - parameter UW = 1 - ); - rand logic [IW-1:0] ax_id = '0; - rand logic [AW-1:0] ax_addr = '0; - logic [7:0] ax_len = '0; - logic [2:0] ax_size = '0; - logic [1:0] ax_burst = '0; - logic ax_lock = '0; - logic [3:0] ax_cache = '0; - logic [2:0] ax_prot = '0; - rand logic [3:0] ax_qos = '0; - logic [3:0] ax_region = '0; - logic [5:0] ax_atop = '0; // Only defined on the AW channel. - rand logic [UW-1:0] ax_user = '0; - rand logic [3:0] ax_snoop = '0; // AW channel requires 3 bits, AR channel requires 4 bits - rand logic [1:0] ax_bar = '0; - rand logic [1:0] ax_domain = '0; - rand logic ax_awunique = '0; // Only for AW - endclass - - /// The data transferred on a beat on the R channel. - class ace_r_beat #( - parameter DW = 32, - parameter IW = 8 , - parameter UW = 1 - ); - rand logic [IW-1:0] r_id = '0; - rand logic [DW-1:0] r_data = '0; - ace_pkg::rresp_t r_resp = '0; - logic r_last = '0; - rand logic [UW-1:0] r_user = '0; - endclass - - /// The data transferred on a beat on the W channel. - class axi_w_beat #( - parameter DW = 32, - parameter UW = 1 - ); - rand logic [DW-1:0] w_data = '0; - rand logic [DW/8-1:0] w_strb = '0; - logic w_last = '0; - rand logic [UW-1:0] w_user = '0; -endclass - - /// The data transferred on a beat on the B channel. -class axi_b_beat #( - parameter IW = 8, - parameter UW = 1 - ); - rand logic [IW-1:0] b_id = '0; - axi_pkg::resp_t b_resp = '0; - rand logic [UW-1:0] b_user = '0; -endclass - - - /// A driver for AXI4 interface. - class ace_driver #( - parameter int AW = 32 , - parameter int DW = 32 , - parameter int IW = 8 , - parameter int UW = 1 , - parameter time TA = 0ns , // stimuli application time - parameter time TT = 0ns // stimuli test time - ); - virtual ACE_BUS_DV #( - .AXI_ADDR_WIDTH(AW), - .AXI_DATA_WIDTH(DW), - .AXI_ID_WIDTH(IW), - .AXI_USER_WIDTH(UW) - ) ace; - -// typedef axi_test::axi_driver #( -// .AW(AW), .DW(DW), .IW(IW), .UW(UW), .TA(TA), .TT(TT) -// ) axi_driver_t; - typedef ace_ax_beat #(.AW(AW), .IW(IW), .UW(UW)) ax_ace_beat_t; - typedef axi_w_beat #(.DW(DW), .UW(UW)) w_beat_t; - typedef axi_b_beat #(.IW(IW), .UW(UW)) b_beat_t; -// typedef axi_driver_t::w_beat_t w_beat_t; -// typedef axi_driver_t::b_beat_t b_beat_t; - typedef ace_r_beat #(.DW(DW), .IW(IW), .UW(UW)) r_ace_beat_t; - - function new( - virtual ACE_BUS_DV #( - .AXI_ADDR_WIDTH(AW), - .AXI_DATA_WIDTH(DW), - .AXI_ID_WIDTH(IW), - .AXI_USER_WIDTH(UW) - ) ace - ); - this.ace = ace; - endfunction - - function void reset_master(); - ace.aw_id <= '0; - ace.aw_addr <= '0; - ace.aw_len <= '0; - ace.aw_size <= '0; - ace.aw_burst <= '0; - ace.aw_lock <= '0; - ace.aw_cache <= '0; - ace.aw_prot <= '0; - ace.aw_qos <= '0; - ace.aw_region <= '0; - ace.aw_atop <= '0; - ace.aw_user <= '0; - ace.aw_valid <= '0; - ace.aw_snoop <= '0; - ace.aw_bar <= '0; - ace.aw_domain <= '0; - ace.aw_awunique <= '0; - ace.w_data <= '0; - ace.w_strb <= '0; - ace.w_last <= '0; - ace.w_user <= '0; - ace.w_valid <= '0; - ace.b_ready <= '0; - ace.ar_id <= '0; - ace.ar_addr <= '0; - ace.ar_len <= '0; - ace.ar_size <= '0; - ace.ar_burst <= '0; - ace.ar_lock <= '0; - ace.ar_cache <= '0; - ace.ar_prot <= '0; - ace.ar_qos <= '0; - ace.ar_region <= '0; - ace.ar_user <= '0; - ace.ar_snoop <= '0; - ace.ar_bar <= '0; - ace.ar_domain <= '0; - ace.ar_valid <= '0; - ace.r_ready <= '0; - ace.wack <= '0; - ace.rack <= '0; - endfunction - - function void reset_slave(); - ace.aw_ready <= '0; - ace.w_ready <= '0; - ace.b_id <= '0; - ace.b_resp <= '0; - ace.b_user <= '0; - ace.b_valid <= '0; - ace.ar_ready <= '0; - ace.r_id <= '0; - ace.r_data <= '0; - ace.r_resp <= '0; - ace.r_last <= '0; - ace.r_user <= '0; - ace.r_valid <= '0; - endfunction - - task cycle_start; - #TT; - endtask - - task cycle_end; - @(posedge ace.clk_i); - endtask - - /// Issue a beat on the AW channel. - task send_aw ( - input ax_ace_beat_t beat - ); - ace.aw_id <= #TA beat.ax_id; - ace.aw_addr <= #TA beat.ax_addr; - ace.aw_len <= #TA beat.ax_len; - ace.aw_size <= #TA beat.ax_size; - ace.aw_burst <= #TA beat.ax_burst; - ace.aw_lock <= #TA beat.ax_lock; - ace.aw_cache <= #TA beat.ax_cache; - ace.aw_prot <= #TA beat.ax_prot; - ace.aw_qos <= #TA beat.ax_qos; - ace.aw_region <= #TA beat.ax_region; - ace.aw_atop <= #TA beat.ax_atop; - ace.aw_user <= #TA beat.ax_user; - ace.aw_valid <= #TA 1; - ace.aw_snoop <= #TA beat.ax_snoop; - ace.aw_bar <= #TA beat.ax_bar; - ace.aw_domain <= #TA beat.ax_domain; - ace.aw_awunique <= #TA beat.ax_awunique; - cycle_start(); - while (ace.aw_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - ace.aw_id <= #TA '0; - ace.aw_addr <= #TA '0; - ace.aw_len <= #TA '0; - ace.aw_size <= #TA '0; - ace.aw_burst <= #TA '0; - ace.aw_lock <= #TA '0; - ace.aw_cache <= #TA '0; - ace.aw_prot <= #TA '0; - ace.aw_qos <= #TA '0; - ace.aw_region <= #TA '0; - ace.aw_atop <= #TA '0; - ace.aw_user <= #TA '0; - ace.aw_valid <= #TA 0; - ace.aw_snoop <= #TA '0; - ace.aw_bar <= #TA '0; - ace.aw_domain <= #TA '0; - ace.aw_awunique <= #TA 0; - endtask - - /// Issue a beat on the W channel. - task send_w ( - input w_beat_t beat - ); - ace.w_data <= #TA beat.w_data; - ace.w_strb <= #TA beat.w_strb; - ace.w_last <= #TA beat.w_last; - ace.w_user <= #TA beat.w_user; - ace.w_valid <= #TA 1; - cycle_start(); - while (ace.w_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - ace.w_data <= #TA '0; - ace.w_strb <= #TA '0; - ace.w_last <= #TA '0; - ace.w_user <= #TA '0; - ace.w_valid <= #TA 0; - endtask - - /// Issue a beat on the B channel. - task send_b ( - input b_beat_t beat - ); - ace.b_id <= #TA beat.b_id; - ace.b_resp <= #TA beat.b_resp; - ace.b_user <= #TA beat.b_user; - ace.b_valid <= #TA 1; - cycle_start(); - while (ace.b_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - ace.b_id <= #TA '0; - ace.b_resp <= #TA '0; - ace.b_user <= #TA '0; - ace.b_valid <= #TA 0; - cycle_start(); - while (ace.wack != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - endtask - - /// Issue a beat on the AR channel. - task send_ar ( - input ax_ace_beat_t beat - ); - ace.ar_id <= #TA beat.ax_id; - ace.ar_addr <= #TA beat.ax_addr; - ace.ar_len <= #TA beat.ax_len; - ace.ar_size <= #TA beat.ax_size; - ace.ar_burst <= #TA beat.ax_burst; - ace.ar_lock <= #TA beat.ax_lock; - ace.ar_cache <= #TA beat.ax_cache; - ace.ar_prot <= #TA beat.ax_prot; - ace.ar_qos <= #TA beat.ax_qos; - ace.ar_region <= #TA beat.ax_region; - ace.ar_user <= #TA beat.ax_user; - ace.ar_valid <= #TA 1; - ace.ar_snoop <= #TA beat.ax_snoop; - ace.ar_bar <= #TA beat.ax_bar; - ace.ar_domain <= #TA beat.ax_domain; - cycle_start(); - while (ace.ar_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - ace.ar_id <= #TA '0; - ace.ar_addr <= #TA '0; - ace.ar_len <= #TA '0; - ace.ar_size <= #TA '0; - ace.ar_burst <= #TA '0; - ace.ar_lock <= #TA '0; - ace.ar_cache <= #TA '0; - ace.ar_prot <= #TA '0; - ace.ar_qos <= #TA '0; - ace.ar_region <= #TA '0; - ace.ar_user <= #TA '0; - ace.ar_valid <= #TA 0; - ace.ar_snoop <= #TA '0; - ace.ar_bar <= #TA '0; - ace.ar_domain <= #TA '0; - endtask - - /// Issue a beat on the R channel. - task send_r ( - input r_ace_beat_t beat - ); - ace.r_id <= #TA beat.r_id; - ace.r_data <= #TA beat.r_data; - ace.r_resp <= #TA beat.r_resp; - ace.r_last <= #TA beat.r_last; - ace.r_user <= #TA beat.r_user; - ace.r_valid <= #TA 1; - cycle_start(); - while (ace.r_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - ace.r_id <= #TA '0; - ace.r_data <= #TA '0; - ace.r_resp <= #TA '0; - ace.r_last <= #TA '0; - ace.r_user <= #TA '0; - ace.r_valid <= #TA 0; - cycle_start(); - while (ace.rack != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - endtask - - /// Wait for a beat on the AW channel. - task recv_aw ( - output ax_ace_beat_t beat - ); - ace.aw_ready <= #TA 1; - cycle_start(); - while (ace.aw_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.ax_id = ace.aw_id; - beat.ax_addr = ace.aw_addr; - beat.ax_len = ace.aw_len; - beat.ax_size = ace.aw_size; - beat.ax_burst = ace.aw_burst; - beat.ax_lock = ace.aw_lock; - beat.ax_cache = ace.aw_cache; - beat.ax_prot = ace.aw_prot; - beat.ax_qos = ace.aw_qos; - beat.ax_region = ace.aw_region; - beat.ax_atop = ace.aw_atop; - beat.ax_user = ace.aw_user; - beat.ax_snoop = ace.aw_snoop; - beat.ax_bar = ace.aw_bar; - beat.ax_domain = ace.aw_domain; - beat.ax_awunique = ace.aw_awunique; - cycle_end(); - ace.aw_ready <= #TA 0; - endtask - - /// Wait for a beat on the W channel. - task recv_w ( - output w_beat_t beat - ); - ace.w_ready <= #TA 1; - cycle_start(); - while (ace.w_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.w_data = ace.w_data; - beat.w_strb = ace.w_strb; - beat.w_last = ace.w_last; - beat.w_user = ace.w_user; - cycle_end(); - ace.w_ready <= #TA 0; - endtask - - /// Wait for a beat on the B channel. - task recv_b ( - output b_beat_t beat - ); - ace.b_ready <= #TA 1; - cycle_start(); - while (ace.b_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.b_id = ace.b_id; - beat.b_resp = ace.b_resp; - beat.b_user = ace.b_user; - cycle_end(); - ace.b_ready <= #TA 0; - ace.wack <= #TA 1; - cycle_start(); - ace.wack <= #TA 0; - endtask - - /// Wait for a beat on the AR channel. - task recv_ar ( - output ax_ace_beat_t beat - ); - ace.ar_ready <= #TA 1; - cycle_start(); - while (ace.ar_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.ax_id = ace.ar_id; - beat.ax_addr = ace.ar_addr; - beat.ax_len = ace.ar_len; - beat.ax_size = ace.ar_size; - beat.ax_burst = ace.ar_burst; - beat.ax_lock = ace.ar_lock; - beat.ax_cache = ace.ar_cache; - beat.ax_prot = ace.ar_prot; - beat.ax_qos = ace.ar_qos; - beat.ax_region = ace.ar_region; - beat.ax_atop = 'X; // Not defined on the AR channel. - beat.ax_user = ace.ar_user; - beat.ax_snoop = ace.ar_snoop; - beat.ax_bar = ace.ar_bar; - beat.ax_domain = ace.ar_domain; - cycle_end(); - ace.ar_ready <= #TA 0; - endtask - - /// Wait for a beat on the R channel. - task recv_r ( - output r_ace_beat_t beat - ); - ace.r_ready <= #TA 1; - cycle_start(); - while (ace.r_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.r_id = ace.r_id; - beat.r_data = ace.r_data; - beat.r_resp = ace.r_resp; - beat.r_last = ace.r_last; - beat.r_user = ace.r_user; - cycle_end(); - ace.r_ready <= #TA 0; - ace.rack <= #TA 1; - cycle_start(); - ace.rack <= #TA 0; - endtask - - /// Monitor the AW channel and return the next beat. - task mon_aw ( - output ax_ace_beat_t beat - ); - cycle_start(); - while (!(ace.aw_valid && ace.aw_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.ax_id = ace.aw_id; - beat.ax_addr = ace.aw_addr; - beat.ax_len = ace.aw_len; - beat.ax_size = ace.aw_size; - beat.ax_burst = ace.aw_burst; - beat.ax_lock = ace.aw_lock; - beat.ax_cache = ace.aw_cache; - beat.ax_prot = ace.aw_prot; - beat.ax_qos = ace.aw_qos; - beat.ax_region = ace.aw_region; - beat.ax_atop = ace.aw_atop; - beat.ax_user = ace.aw_user; - beat.ax_snoop = ace.aw_snoop; - beat.ax_bar = ace.aw_bar; - beat.ax_domain = ace.aw_domain; - beat.ax_awunique = ace.aw_awunique; - cycle_end(); - endtask - - /// Monitor the W channel and return the next beat. - task mon_w ( - output w_beat_t beat - ); - cycle_start(); - while (!(ace.w_valid && ace.w_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.w_data = ace.w_data; - beat.w_strb = ace.w_strb; - beat.w_last = ace.w_last; - beat.w_user = ace.w_user; - cycle_end(); - endtask - - /// Monitor the B channel and return the next beat. - task mon_b ( - output b_beat_t beat - ); - cycle_start(); - while (!(ace.b_valid && ace.b_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.b_id = ace.b_id; - beat.b_resp = ace.b_resp; - beat.b_user = ace.b_user; - cycle_end(); - endtask - - /// Monitor the AR channel and return the next beat. - task mon_ar ( - output ax_ace_beat_t beat - ); - cycle_start(); - while (!(ace.ar_valid && ace.ar_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.ax_id = ace.ar_id; - beat.ax_addr = ace.ar_addr; - beat.ax_len = ace.ar_len; - beat.ax_size = ace.ar_size; - beat.ax_burst = ace.ar_burst; - beat.ax_lock = ace.ar_lock; - beat.ax_cache = ace.ar_cache; - beat.ax_prot = ace.ar_prot; - beat.ax_qos = ace.ar_qos; - beat.ax_region = ace.ar_region; - beat.ax_atop = 'X; // Not defined on the AR channel. - beat.ax_user = ace.ar_user; - beat.ax_snoop = ace.ar_snoop; - beat.ax_bar = ace.ar_bar; - beat.ax_domain = ace.ar_domain; - cycle_end(); - endtask - - /// Monitor the R channel and return the next beat. - task mon_r ( - output r_ace_beat_t beat - ); - cycle_start(); - while (!(ace.r_valid && ace.r_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.r_id = ace.r_id; - beat.r_data = ace.r_data; - beat.r_resp = ace.r_resp; - beat.r_last = ace.r_last; - beat.r_user = ace.r_user; - cycle_end(); - endtask - - endclass - - class ace_rand_master #( - // AXI interface parameters - parameter int AW = 32, - parameter int DW = 32, - parameter int IW = 8, - parameter int UW = 1, - // Stimuli application and test time - parameter time TA = 0ps, - parameter time TT = 0ps, - // Maximum number of read and write transactions in flight - parameter int MAX_READ_TXNS = 1, - parameter int MAX_WRITE_TXNS = 1, - // Upper and lower bounds on wait cycles on Ax, W, and resp (R and B) channels - parameter int AX_MIN_WAIT_CYCLES = 0, - parameter int AX_MAX_WAIT_CYCLES = 100, - parameter int W_MIN_WAIT_CYCLES = 0, - parameter int W_MAX_WAIT_CYCLES = 5, - parameter int RESP_MIN_WAIT_CYCLES = 0, - parameter int RESP_MAX_WAIT_CYCLES = 20, - // AXI feature usage - parameter int AXI_MAX_BURST_LEN = 0, // maximum number of beats in burst; 0 = AXI max (256) - parameter int TRAFFIC_SHAPING = 0, - parameter bit AXI_EXCLS = 1'b0, - parameter bit AXI_ATOPS = 1'b0, - parameter bit AXI_BURST_FIXED = 1'b1, - parameter bit AXI_BURST_INCR = 1'b1, - parameter bit AXI_BURST_WRAP = 1'b0, - parameter bit UNIQUE_IDS = 1'b0, // guarantee that the ID of each transaction is - // unique among all in-flight transactions in the - // same direction - // Dependent parameters, do not override. - parameter int AXI_STRB_WIDTH = DW/8, - parameter int N_AXI_IDS = 2**IW - ); - typedef ace_test::ace_driver #( - .AW(AW), .DW(DW), .IW(IW), .UW(UW), .TA(TA), .TT(TT) - ) ace_driver_t; - typedef logic [AW-1:0] addr_t; - typedef axi_pkg::burst_t burst_t; - typedef axi_pkg::cache_t cache_t; - typedef logic [DW-1:0] data_t; - typedef logic [IW-1:0] id_t; - typedef axi_pkg::len_t len_t; - typedef axi_pkg::size_t size_t; - typedef ace_pkg::arsnoop_t snoop_t; // use only arsnoop_t, which is bigger than awsnoop_t - typedef ace_pkg::bar_t bar_t; - typedef ace_pkg::domain_t domain_t; - typedef ace_pkg::awunique_t awunique_t; - - - typedef logic [UW-1:0] user_t; - typedef axi_pkg::mem_type_t mem_type_t; - - typedef ace_driver_t::ax_ace_beat_t ax_ace_beat_t; - typedef ace_driver_t::b_beat_t b_beat_t; - typedef ace_driver_t::r_ace_beat_t r_ace_beat_t; - typedef ace_driver_t::w_beat_t w_beat_t; - - static addr_t PFN_MASK = '{11: 1'b0, 10: 1'b0, 9: 1'b0, 8: 1'b0, 7: 1'b0, 6: 1'b0, 5: 1'b0, - 4: 1'b0, 3: 1'b0, 2: 1'b0, 1: 1'b0, 0: 1'b0, default: '1}; - - ace_driver_t drv; - - int unsigned r_flight_cnt[N_AXI_IDS-1:0], - w_flight_cnt[N_AXI_IDS-1:0], - tot_r_flight_cnt, - tot_w_flight_cnt; - logic [N_AXI_IDS-1:0] atop_resp_b, - atop_resp_r; - - len_t max_len; - burst_t allowed_bursts[$]; - - semaphore cnt_sem; - - ax_ace_beat_t aw_ace_queue[$], - w_queue[$], - excl_queue[$]; - - typedef struct packed { - addr_t addr_begin; - addr_t addr_end; - mem_type_t mem_type; - } mem_region_t; - mem_region_t mem_map[$]; - - struct packed { - int unsigned len ; - int unsigned cprob; - } traffic_shape[$]; - int unsigned max_cprob; - - function new( - virtual ACE_BUS_DV #( - .AXI_ADDR_WIDTH(AW), - .AXI_DATA_WIDTH(DW), - .AXI_ID_WIDTH(IW), - .AXI_USER_WIDTH(UW) - ) ace - ); - if (AXI_MAX_BURST_LEN <= 0 || AXI_MAX_BURST_LEN > 256) begin - this.max_len = 255; - end else begin - this.max_len = AXI_MAX_BURST_LEN - 1; - end - this.drv = new(ace); - this.cnt_sem = new(1); - this.reset(); - if (AXI_BURST_FIXED) begin - this.allowed_bursts.push_back(BURST_FIXED); - end - if (AXI_BURST_INCR) begin - this.allowed_bursts.push_back(BURST_INCR); - end - if (AXI_BURST_WRAP) begin - this.allowed_bursts.push_back(BURST_WRAP); - end - assert(allowed_bursts.size()) else $fatal(1, "At least one burst type has to be specified!"); - endfunction - - function void reset(); - drv.reset_master(); - r_flight_cnt = '{default: 0}; - w_flight_cnt = '{default: 0}; - tot_r_flight_cnt = 0; - tot_w_flight_cnt = 0; - atop_resp_b = '0; - atop_resp_r = '0; - endfunction - - function void add_memory_region(input addr_t addr_begin, input addr_t addr_end, input mem_type_t mem_type); - mem_map.push_back({addr_begin, addr_end, mem_type}); - endfunction - - function void add_traffic_shaping(input int unsigned len, input int unsigned freq); - if (traffic_shape.size() == 0) - traffic_shape.push_back({len, freq}); - else - traffic_shape.push_back({len, traffic_shape[$].cprob + freq}); - - max_cprob = traffic_shape[$].cprob; - endfunction : add_traffic_shaping - - function ax_ace_beat_t new_rand_burst(input logic is_read); - automatic logic rand_success; - automatic ax_ace_beat_t ax_ace_beat = new; - automatic addr_t addr; - automatic burst_t burst; - automatic cache_t cache; - automatic id_t id; - automatic qos_t qos; - automatic len_t len; - automatic size_t size; - automatic bar_t bar; - automatic domain_t domain; - automatic snoop_t snoop; - automatic awunique_t awunique; - automatic int unsigned mem_region_idx; - automatic mem_region_t mem_region; - automatic int cprob; - automatic logic [2:0] trs; - - // No memory regions defined - if (mem_map.size() == 0) begin - // Return a dummy region - mem_region = '{ - addr_begin: '0, - addr_end: '1, - mem_type: axi_pkg::NORMAL_NONCACHEABLE_BUFFERABLE - }; - end else begin - // Randomly pick a memory region - mem_region_idx = $urandom_range(0,mem_map.size()-1); - // std::randomize(mem_region_idx) with { - // mem_region_idx < mem_map.size(); - // }; assert(rand_success); - mem_region = mem_map[mem_region_idx]; - end - - // Randomly pick burst type. - burst = BURST_FIXED; - // rand_success = std::randomize(burst) with { - // burst inside {this.allowed_bursts}; - // }; assert(rand_success); - ax_ace_beat.ax_burst = burst; - // Determine memory type. - ax_ace_beat.ax_cache = is_read ? axi_pkg::get_arcache(mem_region.mem_type) : axi_pkg::get_awcache(mem_region.mem_type); - // Randomize beat size. - if (TRAFFIC_SHAPING) begin - cprob = $urandom_range(0,max_cprob-1); - // rand_success = std::randomize(cprob) with { - // cprob >= 0; cprob < max_cprob; - // }; assert(rand_success); - - for (int i = 0; i < traffic_shape.size(); i++) - if (traffic_shape[i].cprob > cprob) begin - len = traffic_shape[i].len; - if (ax_ace_beat.ax_burst == BURST_WRAP) begin - assert (len inside {len_t'(1), len_t'(3), len_t'(7), len_t'(15)}); - end - break; - end - - // Randomize address. Make sure that the burst does not cross a 4KiB boundary. - forever begin - size = $clog2(AXI_STRB_WIDTH)-1; - // rand_success = std::randomize(size) with { - // 2**size <= AXI_STRB_WIDTH; - // 2**size <= len; - // }; assert(rand_success); - ax_ace_beat.ax_size = size; - ax_ace_beat.ax_len = ((len + (1 << size) - 1) >> size) - 1; - - addr = mem_region.addr_begin; - // rand_success = std::randomize(addr) with { - // addr >= mem_region.addr_begin; - // addr <= mem_region.addr_end; - // addr + len <= mem_region.addr_end; - // }; assert(rand_success); - - if (ax_ace_beat.ax_burst == axi_pkg::BURST_FIXED) begin - if (((addr + 2**ax_ace_beat.ax_size) & PFN_MASK) == (addr & PFN_MASK)) begin - break; - end - end else begin // BURST_INCR - if (((addr + 2**ax_ace_beat.ax_size * (ax_ace_beat.ax_len + 1)) & PFN_MASK) == (addr & PFN_MASK)) begin - break; - end - end - end - end else begin - // Randomize address. Make sure that the burst does not cross a 4KiB boundary. - forever begin - // Randomize address - addr = $urandom_range(mem_region.addr_begin, mem_region.addr_end); - - if (ax_ace_beat.ax_burst == axi_pkg::BURST_FIXED) begin - if (((addr + 2**ax_ace_beat.ax_size) & PFN_MASK) == (addr & PFN_MASK)) begin - break; - end - end else begin // BURST_INCR, BURST_WRAP - if (((addr + 2**ax_ace_beat.ax_size * (ax_ace_beat.ax_len + 1)) & PFN_MASK) == (addr & PFN_MASK)) begin - break; - end - end - end - end - - id = $urandom(); - qos = $urandom(); - awunique = 0; - trs = $urandom_range(0,7); - size = $clog2(AXI_STRB_WIDTH)-1; - case(trs ) - ace_pkg::READ_NO_SNOOP: begin - snoop = 'b0000; - domain = 'b00; - bar = 'b00; - len = $urandom(); - end - ace_pkg::READ_ONCE: begin - snoop = 'b0000; - domain = 'b01; - bar = 'b00; - len = 1; - end - ace_pkg::READ_SHARED: begin - snoop = 'b0001; - domain = 'b01; - bar = 'b00; - len = 1; - end - ace_pkg::READ_UNIQUE: begin - snoop = 'b0111; - domain = 'b01; - bar = 'b00; - len = 1; - end - - ace_pkg::CLEAN_UNIQUE: begin - snoop = 'b1011; - domain = 'b01; - bar = 'b00; - len = 0; - end - - ace_pkg::WRITE_NO_SNOOP: begin - snoop = 'b0000; - domain = 'b00; - bar = 'b00; - len = $urandom(); - end - ace_pkg::WRITE_BACK: begin - snoop = 'b0011; - domain = 'b00; - bar = 'b00; - len = 1; - end - ace_pkg::WRITE_UNIQUE: begin - snoop = 'b0000; - domain = 'b10; - bar = 'b00; - len = 1; - end - - - default: begin - snoop = 'b0000; - domain = 'b00; - bar = 'b00; - len = $urandom(); - end - endcase - - ax_ace_beat.ax_addr = addr; - ax_ace_beat.ax_size = size; - ax_ace_beat.ax_len = len; - ax_ace_beat.ax_id = id; - ax_ace_beat.ax_qos = qos; - ax_ace_beat.ax_snoop = snoop; - ax_ace_beat.ax_bar = bar; - ax_ace_beat.ax_domain = domain; - ax_ace_beat.ax_awunique = awunique; - - return ax_ace_beat; - endfunction - - task rand_atop_burst(inout ax_ace_beat_t beat); - automatic logic rand_success; - beat.ax_atop[5:4] = $random(); - if (beat.ax_atop[5:4] != 2'b00 && !AXI_BURST_INCR) begin - // We can emit ATOPs only if INCR bursts are allowed. - $warning("ATOP suppressed because INCR bursts are disabled!"); - beat.ax_atop[5:4] = 2'b00; - end - if (beat.ax_atop[5:4] != 2'b00) begin // ATOP - // Determine `ax_atop`. - if (beat.ax_atop[5:4] == axi_pkg::ATOP_ATOMICSTORE || - beat.ax_atop[5:4] == axi_pkg::ATOP_ATOMICLOAD) begin - // Endianness - beat.ax_atop[3] = $random(); - // Atomic operation - beat.ax_atop[2:0] = $random(); - end else begin // Atomic{Swap,Compare} - beat.ax_atop[3:1] = '0; - beat.ax_atop[0] = $random(); - end - // Determine `ax_size` and `ax_len`. - if (2**beat.ax_size < AXI_STRB_WIDTH) begin - // Transaction does *not* occupy full data bus, so we must send just one beat. [E1.1.3] - beat.ax_len = '0; - end else begin - automatic int unsigned bytes; - if (beat.ax_atop == axi_pkg::ATOP_ATOMICCMP) begin - // Total data transferred in burst can be 2, 4, 8, 16, or 32 B. - automatic int unsigned log_bytes; - log_bytes = 3; - // rand_success = std::randomize(log_bytes) with { - // log_bytes > 0; 2**log_bytes <= 32; - // }; assert(rand_success); - bytes = 2**log_bytes; - end else begin - // Total data transferred in burst can be 1, 2, 4, or 8 B. - if (AXI_STRB_WIDTH >= 8) begin - bytes = AXI_STRB_WIDTH; - end else begin - automatic int unsigned log_bytes; - log_bytes = 5; - // rand_success = std::randomize(log_bytes); assert(rand_success); - log_bytes = log_bytes % (4 - $clog2(AXI_STRB_WIDTH)) - $clog2(AXI_STRB_WIDTH); - bytes = 2**log_bytes; - end - end - beat.ax_len = bytes / AXI_STRB_WIDTH - 1; - end - // Determine `ax_addr` and `ax_burst`. - if (beat.ax_atop == axi_pkg::ATOP_ATOMICCMP) begin - // The address must be aligned to half the outbound data size. [E1.1.3] - beat.ax_addr = beat.ax_addr & ~((1'b1 << beat.ax_size) - 1); - // If the address is aligned to the total size of outgoing data, the burst type must be - // INCR. Otherwise, it must be WRAP. [E1.1.3] - beat.ax_burst = (beat.ax_addr % ((beat.ax_len+1) * 2**beat.ax_size) == 0) ? - axi_pkg::BURST_INCR : axi_pkg::BURST_WRAP; - // If we are not allowed to emit WRAP bursts, align the address to the total size of - // outgoing data and fall back to INCR. - if (beat.ax_burst == axi_pkg::BURST_WRAP && !AXI_BURST_WRAP) begin - beat.ax_addr -= (beat.ax_addr % ((beat.ax_len+1) * 2**beat.ax_size)); - beat.ax_burst = axi_pkg::BURST_INCR; - end - end else begin - // The address must be aligned to the data size. [E1.1.3] - beat.ax_addr = beat.ax_addr & ~((1'b1 << (beat.ax_size+1)) - 1); - // Only INCR allowed. - beat.ax_burst = axi_pkg::BURST_INCR; - end - end - endtask - - function void rand_excl_ar(inout ax_ace_beat_t ar_ace_beat); - ar_ace_beat.ax_lock = $random(); - if (ar_ace_beat.ax_lock) begin - automatic logic rand_success; - automatic int unsigned n_bytes; - automatic size_t size; - automatic addr_t addr_mask; - ar_ace_beat.ax_size = $clog2(AXI_STRB_WIDTH)-1; - - // The address must be aligned to the total number of bytes in the burst. - ar_ace_beat.ax_addr = ar_ace_beat.ax_addr & ~(2); - ar_ace_beat.ax_snoop = $urandom(); - if( ar_ace_beat.ax_snoop == 4'b1001 || ar_ace_beat.ax_snoop == 4'b1011) begin - ar_ace_beat.ax_len = 0; - end else begin - ar_ace_beat.ax_len = 1; - end - ar_ace_beat.ax_bar = $urandom(); - ar_ace_beat.ax_domain = $urandom(); - - end - endfunction - - // TODO: The `rand_wait` task exists in `rand_verif_pkg`, but that task cannot be called with - // `this.drv.ace.clk_i` as `clk` argument. What is the syntax for getting an assignable - // reference? - task automatic rand_wait(input int unsigned min, max); - int unsigned rand_success, cycles; - cycles = $urandom_range(min,max); - // rand_success = std::randomize(cycles) with { - // cycles >= min; - // cycles <= max; - // }; - //assert (rand_success) else $error("Failed to randomize wait cycles!"); - repeat (cycles) @(posedge this.drv.ace.clk_i); - endtask - - // Determine if the ID of an AXI Ax beat is currently legal. This function may only be called - // while holding the `cnt_sem` semaphore. - function bit id_is_legal(input bit is_read, input ax_ace_beat_t beat); - if (AXI_ATOPS) begin - // The ID must not be the same as that of any in-flight ATOP. - if (atop_resp_b[beat.ax_id] || atop_resp_r[beat.ax_id]) return 1'b0; - // If this beat starts an ATOP, its ID must not be the same as that of any other in-flight - // AXI transaction. - if (!is_read && beat.ax_atop[5:4] != 2'b00 && ( - r_flight_cnt[beat.ax_id] != 0 || w_flight_cnt[beat.ax_id] !=0 - )) return 1'b0; - end - if (UNIQUE_IDS) begin - // This master may only emit transactions with an ID that is unique among all in-flight - // transactions in the same direction. - if (is_read && r_flight_cnt[beat.ax_id] != 0) return 1'b0; - if (!is_read && w_flight_cnt[beat.ax_id] != 0) return 1'b0; - end - // There is no reason why this ID would be illegal, so it is legal. - return 1'b1; - endfunction - - // Legalize the ID of an AXI Ax beat (drawing a new ID at random if the existing ID is currently - // not legal) and add it to the in-flight transactions. - task legalize_id(input bit is_read, inout ax_ace_beat_t beat); - automatic logic rand_success; - automatic id_t id = beat.ax_id; - // Loop until a legal ID is found. - forever begin - // Acquire semaphore on in-flight counters. - cnt_sem.get(); - // Exit loop if the current ID is legal. - if (id_is_legal(is_read, beat)) begin - break; - end else begin - // The current ID is currently not legal, so try another ID in the next cycle and - // release the semaphore until then. - cnt_sem.put(); - rand_wait(1, 1); - if (!beat.ax_lock) begin // The ID of an exclusive transfer must not be changed. - //rand_success = std::randomize(id); assert(rand_success); - id = 1; - beat.ax_id = id; - end - end - end - // Mark transfer for decided ID as in flight. - if (!is_read) begin - w_flight_cnt[beat.ax_id]++; - tot_w_flight_cnt++; - if (beat.ax_atop != 2'b00) begin - // This is an ATOP, so it gives rise to a write response. - atop_resp_b[beat.ax_id] = 1'b1; - if (beat.ax_atop[axi_pkg::ATOP_R_RESP]) begin - // This ATOP type additionally gives rise to a read response. - atop_resp_r[beat.ax_id] = 1'b1; - end - end - end else begin - r_flight_cnt[beat.ax_id]++; - tot_r_flight_cnt++; - end - // Release semaphore on in-flight counters. - cnt_sem.put(); - endtask - - task send_ars(input int n_reads); - automatic logic rand_success; - repeat (n_reads) begin - automatic id_t id; - automatic ax_ace_beat_t ar_ace_beat = new_rand_burst(1'b1); - while (tot_r_flight_cnt >= MAX_READ_TXNS) begin - rand_wait(1, 1); - end - if (AXI_EXCLS) begin - rand_excl_ar(ar_ace_beat); - end - legalize_id(1'b1, ar_ace_beat); - rand_wait(AX_MIN_WAIT_CYCLES, AX_MAX_WAIT_CYCLES); - drv.send_ar(ar_ace_beat); - if (ar_ace_beat.ax_lock) excl_queue.push_back(ar_ace_beat); - end - endtask - - task recv_rs(ref logic ar_done, aw_done); - while (!(ar_done && tot_r_flight_cnt == 0 && - (!AXI_ATOPS || (AXI_ATOPS && aw_done && atop_resp_r == '0)) - )) begin - automatic r_ace_beat_t r_ace_beat; - rand_wait(RESP_MIN_WAIT_CYCLES, RESP_MAX_WAIT_CYCLES); - if (tot_r_flight_cnt > 0 || atop_resp_r > 0) begin - drv.recv_r(r_ace_beat); - if (r_ace_beat.r_last) begin - cnt_sem.get(); - if (atop_resp_r[r_ace_beat.r_id]) begin - atop_resp_r[r_ace_beat.r_id] = 1'b0; - end else begin - r_flight_cnt[r_ace_beat.r_id]--; - tot_r_flight_cnt--; - end - cnt_sem.put(); - end - end - end - endtask - - task create_aws(input int n_writes); - automatic logic rand_success; - repeat (n_writes) begin - automatic bit excl = 1'b0; - automatic ax_ace_beat_t aw_ace_beat; - if (AXI_EXCLS && excl_queue.size() > 0) excl = $random(); - if (excl) begin - aw_ace_beat = excl_queue.pop_front(); - end else begin - aw_ace_beat = new_rand_burst(1'b0); - if (AXI_ATOPS) rand_atop_burst(aw_ace_beat); - end - while (tot_w_flight_cnt >= MAX_WRITE_TXNS) begin - rand_wait(1, 1); - end - legalize_id(1'b0, aw_ace_beat); - aw_ace_queue.push_back(aw_ace_beat); - w_queue.push_back(aw_ace_beat); - end - endtask - - task send_aws(ref logic aw_done); - while (!(aw_done && aw_ace_queue.size() == 0)) begin - automatic ax_ace_beat_t aw_ace_beat; - wait (aw_ace_queue.size() > 0 || (aw_done && aw_ace_queue.size() == 0)); - aw_ace_beat = aw_ace_queue.pop_front(); - rand_wait(AX_MIN_WAIT_CYCLES, AX_MAX_WAIT_CYCLES); - drv.send_aw(aw_ace_beat); - end - endtask - - task send_ws(ref logic aw_done); - while (!(aw_done && w_queue.size() == 0)) begin - automatic ax_ace_beat_t aw_ace_beat; - automatic addr_t addr; - static logic rand_success; - wait (w_queue.size() > 0 || (aw_done && w_queue.size() == 0)); - aw_ace_beat = w_queue.pop_front(); - for (int unsigned i = 0; i < aw_ace_beat.ax_len + 1; i++) begin - automatic w_beat_t w_beat = new; - automatic int unsigned begin_byte, end_byte, n_bytes; - automatic logic [AXI_STRB_WIDTH-1:0] rand_strb, strb_mask; - addr = axi_pkg::beat_addr(aw_ace_beat.ax_addr, aw_ace_beat.ax_size, aw_ace_beat.ax_len, - aw_ace_beat.ax_burst, i); - //rand_success = w_beat.randomize(); assert (rand_success); - // Determine strobe. - w_beat.w_strb = '0; - n_bytes = 2**aw_ace_beat.ax_size; - begin_byte = addr % AXI_STRB_WIDTH; - end_byte = ((begin_byte + n_bytes) >> aw_ace_beat.ax_size) << aw_ace_beat.ax_size; - strb_mask = '0; - for (int unsigned b = begin_byte; b < end_byte; b++) - strb_mask[b] = 1'b1; - rand_strb = $urandom(); - //rand_success = std::randomize(rand_strb); assert (rand_success); - w_beat.w_strb |= (rand_strb & strb_mask); - // Determine last. - w_beat.w_last = (i == aw_ace_beat.ax_len); - rand_wait(W_MIN_WAIT_CYCLES, W_MAX_WAIT_CYCLES); - drv.send_w(w_beat); - end - end - endtask - - task recv_bs(ref logic aw_done); - while (!(aw_done && tot_w_flight_cnt == 0)) begin - automatic b_beat_t b_beat; - rand_wait(RESP_MIN_WAIT_CYCLES, RESP_MAX_WAIT_CYCLES); - drv.recv_b(b_beat); - cnt_sem.get(); - if (atop_resp_b[b_beat.b_id]) begin - atop_resp_b[b_beat.b_id] = 1'b0; - end - w_flight_cnt[b_beat.b_id]--; - tot_w_flight_cnt--; - cnt_sem.put(); - end - endtask - - // Issue n_reads random read and n_writes random write transactions to an address range. - task run(input int n_reads, input int n_writes); - automatic logic ar_done = 1'b0, - aw_done = 1'b0; - fork - begin - send_ars(n_reads); - ar_done = 1'b1; - end - recv_rs(ar_done, aw_done); - begin - create_aws(n_writes); - aw_done = 1'b1; - end - send_aws(aw_done); - send_ws(aw_done); - recv_bs(aw_done); - join - endtask - - endclass - - class ace_rand_slave #( - // AXI interface parameters - parameter int AW = 32, - parameter int DW = 32, - parameter int IW = 8, - parameter int UW = 1, - // Stimuli application and test time - parameter time TA = 0ps, - parameter time TT = 0ps, - parameter bit RAND_RESP = 0, - // Upper and lower bounds on wait cycles on Ax, W, and resp (R and B) channels - parameter int AX_MIN_WAIT_CYCLES = 0, - parameter int AX_MAX_WAIT_CYCLES = 100, - parameter int R_MIN_WAIT_CYCLES = 0, - parameter int R_MAX_WAIT_CYCLES = 5, - parameter int RESP_MIN_WAIT_CYCLES = 0, - parameter int RESP_MAX_WAIT_CYCLES = 20, - /// This parameter eneables an internal memory, which gets randomly initialized, if it is read - /// and retains written data. This mode does currently not support `axi_pkg::BURST_WRAP`! - /// All responses are `axi_pkg::RESP_OKAY` when in this mode. - parameter bit MAPPED = 1'b0 - ); - typedef ace_test::ace_driver #( - .AW(AW), .DW(DW), .IW(IW), .UW(UW), .TA(TA), .TT(TT) - ) ace_driver_t; - typedef rand_id_queue_pkg::rand_id_queue #( - .data_t (ace_driver_t::ax_ace_beat_t), - .ID_WIDTH (IW) - ) rand_ax_ace_beat_queue_t; - typedef ace_driver_t::ax_ace_beat_t ax_ace_beat_t; - typedef ace_driver_t::b_beat_t b_beat_t; - typedef ace_driver_t::r_ace_beat_t r_ace_beat_t; - typedef ace_driver_t::w_beat_t w_beat_t; - - typedef logic [AW-1:0] addr_t; - typedef logic [7:0] byte_t; - - ace_driver_t drv; - rand_ax_ace_beat_queue_t ar_ace_queue; - ax_ace_beat_t aw_ace_queue[$]; - int unsigned b_wait_cnt; - - // Memory array for when the `MAPPED` parameter is set. - byte_t memory_q[addr_t]; - - function new( - virtual ACE_BUS_DV #( - .AXI_ADDR_WIDTH(AW), - .AXI_DATA_WIDTH(DW), - .AXI_ID_WIDTH(IW), - .AXI_USER_WIDTH(UW) - ) ace - ); - this.drv = new(ace); - this.ar_ace_queue = new; - this.b_wait_cnt = 0; - this.reset(); - endfunction - - function void reset(); - this.drv.reset_slave(); - this.memory_q.delete(); - endfunction - - // TODO: The `rand_wait` task exists in `rand_verif_pkg`, but that task cannot be called with - // `this.drv.ace.clk_i` as `clk` argument. What is the syntax getting an assignable reference? - task automatic rand_wait(input int unsigned min, max); - int unsigned rand_success, cycles; - cycles = $urandom_range(min,max); - // rand_success = std::randomize(cycles) with { - // cycles >= min; - // cycles <= max; - // }; - // assert (rand_success) else $error("Failed to randomize wait cycles!"); - repeat (cycles) @(posedge this.drv.ace.clk_i); - endtask - - task recv_ars(); - forever begin - automatic ax_ace_beat_t ar_ace_beat; - rand_wait(AX_MIN_WAIT_CYCLES, AX_MAX_WAIT_CYCLES); - drv.recv_ar(ar_ace_beat); - if (MAPPED) begin - assert (ar_ace_beat.ax_burst != axi_pkg::BURST_WRAP) else - $error("axi_pkg::BURST_WRAP not supported in MAPPED mode."); - end - ar_ace_queue.push(ar_ace_beat.ax_id, ar_ace_beat); - end - endtask - - task send_rs(); - forever begin - automatic logic rand_success; - automatic ax_ace_beat_t ar_ace_beat; - automatic r_ace_beat_t r_ace_beat = new; - automatic addr_t byte_addr; - wait (ar_ace_queue.size > 0); - ar_ace_beat = ar_ace_queue.peek(); - byte_addr = axi_pkg::aligned_addr(ar_ace_beat.ax_addr, axi_pkg::size_t'($clog2(DW/8))); - //rand_success = std::randomize(r_beat); assert(rand_success); - //rand_success = r_beat.randomize(); assert(rand_success); - if (MAPPED) begin - // Either use the actual data, or save the random generated. - for (int unsigned i = 0; i < (DW/8); i++) begin - if (this.memory_q.exists(byte_addr)) begin - r_ace_beat.r_data[i*8+:8] = this.memory_q[byte_addr]; - end else begin - this.memory_q[byte_addr] = r_ace_beat.r_data[i*8+:8]; - end - byte_addr++; - end - r_ace_beat.r_resp = axi_pkg::RESP_OKAY; - end - r_ace_beat.r_id = ar_ace_beat.ax_id; - if (RAND_RESP && !ar_ace_beat.ax_atop[axi_pkg::ATOP_R_RESP]) - r_ace_beat.r_resp[1] = $random(); - if (ar_ace_beat.ax_lock) - r_ace_beat.r_resp[0]= $random(); - r_ace_beat.r_resp[2] = $random(); - r_ace_beat.r_resp[3] = $random(); - rand_wait(R_MIN_WAIT_CYCLES, R_MAX_WAIT_CYCLES); - if (ar_ace_beat.ax_len == '0) begin - r_ace_beat.r_last = 1'b1; - void'(ar_ace_queue.pop_id(ar_ace_beat.ax_id)); - end else begin - if ((ar_ace_beat.ax_burst == axi_pkg::BURST_INCR) && MAPPED) begin - ar_ace_beat.ax_addr = axi_pkg::aligned_addr(ar_ace_beat.ax_addr, ar_ace_beat.ax_size) + - 2**ar_ace_beat.ax_size; - end - ar_ace_beat.ax_len--; - ar_ace_queue.set(ar_ace_beat.ax_id, ar_ace_beat); - end - drv.send_r(r_ace_beat); - end - endtask - - task recv_aws(); - forever begin - automatic ax_ace_beat_t aw_ace_beat; - rand_wait(AX_MIN_WAIT_CYCLES, AX_MAX_WAIT_CYCLES); - drv.recv_aw(aw_ace_beat); - if (MAPPED) begin - assert (aw_ace_beat.ax_atop == '0) else - $error("ATOP not supported in MAPPED mode."); - assert (aw_ace_beat.ax_burst != axi_pkg::BURST_WRAP) else - $error("axi_pkg::BURST_WRAP not supported in MAPPED mode."); - end - aw_ace_queue.push_back(aw_ace_beat); - // Atomic{Load,Swap,Compare}s require an R response. - if (aw_ace_beat.ax_atop[axi_pkg::ATOP_R_RESP]) begin - ar_ace_queue.push(aw_ace_beat.ax_id, aw_ace_beat); - end - end - endtask - - task recv_ws(); - forever begin - automatic ax_ace_beat_t aw_ace_beat; - automatic addr_t byte_addr; - forever begin - automatic w_beat_t w_beat; - rand_wait(RESP_MIN_WAIT_CYCLES, RESP_MAX_WAIT_CYCLES); - drv.recv_w(w_beat); - if (MAPPED) begin - wait (aw_ace_queue.size() > 0); - aw_ace_beat = aw_ace_queue[0]; - byte_addr = axi_pkg::aligned_addr(aw_ace_beat.ax_addr, $clog2(DW/8)); - - // Write Data if the strobe is defined - for (int unsigned i = 0; i < (DW/8); i++) begin - if (w_beat.w_strb[i]) begin - this.memory_q[byte_addr] = w_beat.w_data[i*8+:8]; - end - byte_addr++; - end - // Update address in beat - if (aw_ace_beat.ax_burst == axi_pkg::BURST_INCR) begin - aw_ace_beat.ax_addr = axi_pkg::aligned_addr(aw_ace_beat.ax_addr, aw_ace_beat.ax_size) + - 2**aw_ace_beat.ax_size; - end - aw_ace_queue[0] = aw_ace_beat; - end - if (w_beat.w_last) - break; - end - b_wait_cnt++; - end - endtask - - task send_bs(); - forever begin - automatic ax_ace_beat_t aw_ace_beat; - automatic b_beat_t b_beat = new; - automatic logic rand_success; - wait (b_wait_cnt > 0 && (aw_ace_queue.size() != 0)); - aw_ace_beat = aw_ace_queue.pop_front(); - //rand_success = b_beat.randomize(); assert(rand_success); - b_beat.b_id = aw_ace_beat.ax_id; - if (RAND_RESP && !aw_ace_beat.ax_atop[axi_pkg::ATOP_R_RESP]) - b_beat.b_resp[1] = $random(); - if (aw_ace_beat.ax_lock) begin - b_beat.b_resp[0]= $random(); - end - rand_wait(RESP_MIN_WAIT_CYCLES, RESP_MAX_WAIT_CYCLES); - if (MAPPED) begin - b_beat.b_resp = axi_pkg::RESP_OKAY; - end - drv.send_b(b_beat); - b_wait_cnt--; - end - endtask - - task run(); - fork - recv_ars(); - send_rs(); - recv_aws(); - recv_ws(); - send_bs(); - join - endtask - - endclass - - /// ACE Monitor. - class ace_monitor #( - /// AXI4+ATOP ID width - parameter int unsigned IW = 0, - /// AXI4+ATOP address width - parameter int unsigned AW = 0, - /// AXI4+ATOP data width - parameter int unsigned DW = 0, - /// AXI4+ATOP user width - parameter int unsigned UW = 0, - /// Stimuli test time - parameter time TT = 0ns - ); - - typedef ace_test::ace_driver #( - .AW(AW), .DW(DW), .IW(IW), .UW(UW), .TA(TT), .TT(TT) - ) ace_driver_t; - - typedef ace_driver_t::ax_ace_beat_t ax_ace_beat_t; - typedef ace_driver_t::w_beat_t w_beat_t; - typedef ace_driver_t::b_beat_t b_beat_t; - typedef ace_driver_t::r_ace_beat_t r_ace_beat_t; - - ace_driver_t drv; - mailbox aw_mbx = new, w_mbx = new, b_mbx = new, - ar_mbx = new, r_mbx = new; - - function new( - virtual ACE_BUS_DV #( - .AXI_ADDR_WIDTH(AW), - .AXI_DATA_WIDTH(DW), - .AXI_ID_WIDTH(IW), - .AXI_USER_WIDTH(UW) - ) axi - ); - this.drv = new(axi); - endfunction - - task monitor; - fork - // AW - forever begin - automatic ax_ace_beat_t ax; - this.drv.mon_aw(ax); - aw_mbx.put(ax); - end - // W - forever begin - automatic w_beat_t w; - this.drv.mon_w(w); - w_mbx.put(w); - end - // B - forever begin - automatic b_beat_t b; - this.drv.mon_b(b); - b_mbx.put(b); - end - // AR - forever begin - automatic ax_ace_beat_t ax; - this.drv.mon_ar(ax); - ar_mbx.put(ax); - end - // R - forever begin - automatic r_ace_beat_t r; - this.drv.mon_r(r); - r_mbx.put(r); - end - join - endtask - endclass - -endpackage - -// non synthesisable axi logger module -// this module logs the activity of the input axi channel -// the log files will be found in "./axi_log//" -// one log file for all writes -// a log file per id for the reads -// atomic transactions with read response are injected into the corresponding log file of the read -module ace_chan_logger #( - parameter time TestTime = 8ns, // Time after clock, where sampling happens - parameter string LoggerName = "ace_logger", // name of the logger - parameter type aw_chan_t = logic, // axi AW type - parameter type w_chan_t = logic, // axi W type - parameter type b_chan_t = logic, // axi B type - parameter type ar_chan_t = logic, // axi AR type - parameter type r_chan_t = logic // axi R type -) ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low, when `1'b0` no sampling - input logic end_sim_i, // end of simulation - // AW channel - input aw_chan_t aw_chan_i, - input logic aw_valid_i, - input logic aw_ready_i, - // W channel - input w_chan_t w_chan_i, - input logic w_valid_i, - input logic w_ready_i, - // B channel - input b_chan_t b_chan_i, - input logic b_valid_i, - input logic b_ready_i, - // AR channel - input ar_chan_t ar_chan_i, - input logic ar_valid_i, - input logic ar_ready_i, - // R channel - input r_chan_t r_chan_i, - input logic r_valid_i, - input logic r_ready_i -); - // id width from channel - localparam int unsigned IdWidth = $bits(aw_chan_i.id); - localparam int unsigned NoIds = 2**IdWidth; - - // queues for writes and reads - aw_chan_t aw_queue[$]; - w_chan_t w_queue[$]; - b_chan_t b_queue[$]; - aw_chan_t ar_queues[NoIds-1:0][$]; - r_chan_t r_queues[NoIds-1:0][$]; - - // channel sampling into queues - always @(posedge clk_i) #TestTime begin : proc_channel_sample - automatic aw_chan_t ar_beat; - automatic int fd; - automatic string log_file; - automatic string log_str; - // only execute when reset is high - if (rst_ni) begin - // AW channel - if (aw_valid_i && aw_ready_i) begin - aw_queue.push_back(aw_chan_i); - log_file = $sformatf("./axi_log/%s/write.log", LoggerName); - fd = $fopen(log_file, "a"); - if (fd) begin - log_str = $sformatf("%0t> ID: %h AW on channel: LEN: %d, ATOP: %b", - $time, aw_chan_i.id, aw_chan_i.len, aw_chan_i.atop); - $fdisplay(fd, log_str); - $fclose(fd); - end - - // inject AR into queue, if there is an atomic - if (aw_chan_i.atop[axi_pkg::ATOP_R_RESP]) begin - $display("Atomic detected with response"); - ar_beat.id = aw_chan_i.id; - ar_beat.addr = aw_chan_i.addr; - if (aw_chan_i.len > 1) begin - ar_beat.len = aw_chan_i.len / 2; - end else begin - ar_beat.len = aw_chan_i.len; - end - ar_beat.size = aw_chan_i.size; - ar_beat.burst = aw_chan_i.burst; - ar_beat.lock = aw_chan_i.lock; - ar_beat.cache = aw_chan_i.cache; - ar_beat.prot = aw_chan_i.prot; - ar_beat.qos = aw_chan_i.qos; - ar_beat.region = aw_chan_i.region; - ar_beat.atop = aw_chan_i.atop; - ar_beat.user = aw_chan_i.user; - ar_queues[aw_chan_i.id].push_back(ar_beat); - log_file = $sformatf("./axi_log/%s/read_%0h.log", LoggerName, aw_chan_i.id); - fd = $fopen(log_file, "a"); - if (fd) begin - log_str = $sformatf("%0t> ID: %h AR on channel: LEN: %d injected ATOP: %b", - $time, ar_beat.id, ar_beat.len, ar_beat.atop); - $fdisplay(fd, log_str); - $fclose(fd); - end - end - end - // W channel - if (w_valid_i && w_ready_i) begin - w_queue.push_back(w_chan_i); - end - // B channel - if (b_valid_i && b_ready_i) begin - b_queue.push_back(b_chan_i); - end - // AR channel - if (ar_valid_i && ar_ready_i) begin - log_file = $sformatf("./axi_log/%s/read_%0h.log", LoggerName, ar_chan_i.id); - fd = $fopen(log_file, "a"); - if (fd) begin - log_str = $sformatf("%0t> ID: %h AR on channel: LEN: %d", - $time, ar_chan_i.id, ar_chan_i.len); - $fdisplay(fd, log_str); - $fclose(fd); - end - ar_beat.id = ar_chan_i.id; - ar_beat.addr = ar_chan_i.addr; - ar_beat.len = ar_chan_i.len; - ar_beat.size = ar_chan_i.size; - ar_beat.burst = ar_chan_i.burst; - ar_beat.lock = ar_chan_i.lock; - ar_beat.cache = ar_chan_i.cache; - ar_beat.prot = ar_chan_i.prot; - ar_beat.qos = ar_chan_i.qos; - ar_beat.region = ar_chan_i.region; - ar_beat.atop = '0; - ar_beat.user = ar_chan_i.user; - ar_beat.snoop=ar_chan_i.snoop; - ar_beat.bar=ar_chan_i.bar; - ar_beat.domain=ar_chan_i.domain; - ar_queues[ar_chan_i.id].push_back(ar_beat); - end - // R channel - if (r_valid_i && r_ready_i) begin - r_queues[r_chan_i.id].push_back(r_chan_i); - end - end - end - - initial begin : proc_log - automatic string log_name; - automatic string log_string; - automatic aw_chan_t aw_beat; - automatic w_chan_t w_beat; - automatic int unsigned no_w_beat = 0; - automatic b_chan_t b_beat; - automatic aw_chan_t ar_beat; - automatic r_chan_t r_beat; - automatic int unsigned no_r_beat[NoIds]; - automatic int fd; - - // init r counter - for (int unsigned i = 0; i < NoIds; i++) begin - no_r_beat[i] = 0; - end - - // make the log dirs - log_name = $sformatf("mkdir -p ./axi_log/%s/", LoggerName); - $system(log_name); - - // open log files - log_name = $sformatf("./axi_log/%s/write.log", LoggerName); - fd = $fopen(log_name, "w"); - if (fd) begin - $display("File was opened successfully : %s", log_name); - $fdisplay(fd, "This is the write log file"); - $fclose(fd); - end else - $display("File was NOT opened successfully : %s", log_name); - for (int unsigned i = 0; i < NoIds; i++) begin - log_name = $sformatf("./axi_log/%s/read_%0h.log", LoggerName, i); - fd = $fopen(log_name, "w"); - if (fd) begin - $display("File was opened successfully : %s", log_name); - $fdisplay(fd, "This is the read log file for ID: %0h", i); - $fclose(fd); - end else - $display("File was NOT opened successfully : %s", log_name); - end - - // on each clock cycle update the logs if there is something in the queues - wait (rst_ni); - while (!end_sim_i) begin - @(posedge clk_i); - - // update the write log file - while (aw_queue.size() != 0 && w_queue.size() != 0) begin - aw_beat = aw_queue[0]; - w_beat = w_queue.pop_front(); - - log_string = $sformatf("%0t> ID: %h W %d of %d, LAST: %b ATOP: %b", - $time, aw_beat.id, no_w_beat, aw_beat.len, w_beat.last, aw_beat.atop); - - log_name = $sformatf("./axi_log/%s/write.log", LoggerName); - fd = $fopen(log_name, "a"); - if (fd) begin - $fdisplay(fd, log_string); - // write out error if last beat does not match! - if (w_beat.last && !(aw_beat.len == no_w_beat)) begin - $fdisplay(fd, "ERROR> Last flag was not expected!!!!!!!!!!!!!"); - end - $fclose(fd); - end - // pop the AW if the last flag is set - no_w_beat++; - if (w_beat.last) begin - aw_beat = aw_queue.pop_front(); - no_w_beat = 0; - end - end - - // check b queue - if (b_queue.size() != 0) begin - b_beat = b_queue.pop_front(); - log_string = $sformatf("%0t> ID: %h B recieved", - $time, b_beat.id); - log_name = $sformatf("./axi_log/%s/write.log", LoggerName); - fd = $fopen(log_name, "a"); - if (fd) begin - $fdisplay(fd, log_string); - $fclose(fd); - end - end - - // update the read log files - for (int unsigned i = 0; i < NoIds; i++) begin - while (ar_queues[i].size() != 0 && r_queues[i].size() != 0) begin - ar_beat = ar_queues[i][0]; - r_beat = r_queues[i].pop_front(); - - log_name = $sformatf("./axi_log/%s/read_%0h.log", LoggerName, i); - fd = $fopen(log_name, "a"); - if (fd) begin - log_string = $sformatf("%0t> ID: %h R %d of %d, LAST: %b ATOP: %b", - $time, r_beat.id, no_r_beat[i], ar_beat.len, r_beat.last, ar_beat.atop); - - $fdisplay(fd, log_string); - // write out error if last beat does not match! - if (r_beat.last && !(ar_beat.len == no_r_beat[i])) begin - $fdisplay(fd, "ERROR> Last flag was not expected!!!!!!!!!!!!!"); - end - $fclose(fd); - end - no_r_beat[i]++; - // pop the queue if it is the last flag - if (r_beat.last) begin - ar_beat = ar_queues[i].pop_front(); - no_r_beat[i] = 0; - end - end - end - end - $fclose(fd); - end -endmodule diff --git a/src/ace_trs_dec.sv b/src/ace_trs_dec.sv deleted file mode 100644 index ecb2442..0000000 --- a/src/ace_trs_dec.sv +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2019 ETH Zurich and University of Bologna. -// Copyright (c) 2022 PlanV GmbH -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// - -module ace_trs_dec -#( - parameter type slv_ace_req_t = logic -) ( - // incoming request from master - input slv_ace_req_t slv_reqs_i, - // Write transaction shareable - output logic snoop_aw_trs, - // Read transaction shareable - output logic snoop_ar_trs -); - -/// Types of transactions bypassing CCU -logic write_back, write_no_snoop, read_no_snoop; - -assign write_back = (slv_reqs_i.aw.snoop == 'b011) && (slv_reqs_i.aw.bar[0] == 'b0) && - ((slv_reqs_i.aw.domain == 'b00) || (slv_reqs_i.aw.domain == 'b01) || - (slv_reqs_i.aw.domain == 'b10)); - -assign write_no_snoop = (slv_reqs_i.aw.snoop == 'b000) && (slv_reqs_i.aw.bar[0] == 'b0) && - ((slv_reqs_i.aw.domain == 'b00) || (slv_reqs_i.aw.domain == 'b11) ); -assign read_no_snoop = (slv_reqs_i.ar.snoop == 'b0000) && (slv_reqs_i.ar.bar[0] =='b0) && - ((slv_reqs_i.ar.domain == 'b00) || (slv_reqs_i.ar.domain == 'b11) ); - -assign snoop_aw_trs = ~(write_back | write_no_snoop); -assign snoop_ar_trs = ~(read_no_snoop); - -endmodule diff --git a/src/ccu/ccu_csr_wrap.sv b/src/ccu/ccu_csr_wrap.sv new file mode 100644 index 0000000..46eefd4 --- /dev/null +++ b/src/ccu/ccu_csr_wrap.sv @@ -0,0 +1,71 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +module ccu_csr_wrap + import ccu_pkg::*; + import ccu_csr_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + parameter type apb_req_t = logic, + parameter type apb_resp_t = logic, + parameter int unsigned numEvents = 16 +) ( + input logic clk_i, + input logic rst_ni, + // APB interface + input apb_req_t apb_req_i, + output apb_resp_t apb_resp_o, + // Performance events + input logic [numEvents-1:0] events_i +); + + ccu_csr__in_t hwif_in; + ccu_csr__out_t hwif_out; + + for (genvar i = 0; i < numPerfCounters; i++) begin : gen_hwif + assign hwif_in.perf_counter[i].val.incr = &{ + hwif_out.perf_eventsel[i].event_id.value < numEvents, + !hwif_out.perf_countinhibit.inh.value[i], + events_i[hwif_out.perf_eventsel[i].event_id.value] + }; + end + + ccu_csr u_csr_regs ( + .clk (clk_i), + .arst_n (rst_ni), + .s_apb_psel (apb_req_i.psel), + .s_apb_penable (apb_req_i.penable), + .s_apb_pwrite (apb_req_i.pwrite), + .s_apb_pprot (apb_req_i.pprot), + .s_apb_paddr (apb_req_i.paddr[CCU_CSR_MIN_ADDR_WIDTH-1:0]), + .s_apb_pwdata (apb_req_i.pwdata), + .s_apb_pstrb (apb_req_i.pstrb), + .s_apb_pready (apb_resp_o.pready), + .s_apb_prdata (apb_resp_o.prdata), + .s_apb_pslverr (apb_resp_o.pslverr), + + .hwif_in (hwif_in), + .hwif_out (hwif_out) + ); + + // pragma translate_off + `ifndef VERILATOR + initial begin + tooManyPerformanceEvents: assert (numEvents <= 256) + else $fatal("Number of events exceeds 256!"); + end + `endif + // pragma translate_on + +endmodule diff --git a/src/ccu/ccu_exclusive_monitor.sv b/src/ccu/ccu_exclusive_monitor.sv new file mode 100644 index 0000000..e79c8a4 --- /dev/null +++ b/src/ccu/ccu_exclusive_monitor.sv @@ -0,0 +1,220 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +`include "ace/assign.svh" + +module ccu_exclusive_monitor + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + + parameter type ccu_ace_ar_t = logic, + parameter type ccu_ace_r_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + input logic [ccuCfg.u.numSubordinates-1:0] dealloc_i, + output logic [ccuCfg.u.numSubordinates-1:0] lock_o, + output logic [ccuCfg.u.numSubordinates-1:0] + [ccuCfg.u.axiSubordinateIdWidth-1:0] entry_id_o, + output logic sc_fail_o, + + input ccu_ace_ar_t ar_i, + input logic ar_valid_i, + output logic ar_ready_o, + + output ccu_ace_ar_t ar_o, + output logic ar_valid_o, + input logic ar_ready_i, + + input ccu_ace_r_t r_i, + input logic r_valid_i, + output logic r_ready_o, + + output ccu_ace_r_t r_o, + output logic r_valid_o, + input logic r_ready_i +); + +typedef struct packed { + logic [ccuCfg.u.axiSubordinateIdWidth-1:0] id; +} exclusive_monitor_entry_t; + +typedef struct packed { + logic [ccuCfg.axiCcuIdWidth-1:0] id; + logic [ccuCfg.u.axiUserWidth-1:0] user; +} r_register_entry_t; + +exclusive_monitor_entry_t [ccuCfg.u.numSubordinates-1:0] entry_q, entry_d; +logic [ccuCfg.u.numSubordinates-1:0] valid_q, valid_d; +logic [ccuCfg.u.numSubordinates-1:0] lock_q, lock_d; + +logic [ccuCfg.subordinateIndexWidth-1:0] ar_sub_idx; +assign ar_sub_idx = ar_i.id[ccuCfg.axiCcuIdWidth-1-:ccuCfg.subordinateIndexWidth]; + +logic is_exclusive_load; +logic is_exclusive_store; +logic is_exclusive_sequence; +logic exclusive_store_will_fail; + +assign is_exclusive_load = ace_ar_is_exclusive_load( + ar_i.bar[0], ar_i.domain, ar_i.snoop, ar_i.lock +); + +assign is_exclusive_store = ace_ar_is_exclusive_store( + ar_i.bar[0], ar_i.domain, ar_i.snoop, ar_i.lock +); + +assign is_exclusive_sequence = is_exclusive_load || is_exclusive_store; +assign exclusive_store_will_fail = is_exclusive_store && !valid_q[ar_sub_idx]; + +logic ar_handshake; +assign ar_handshake = ar_valid_i && ar_ready_o; + +logic exclusive_store_pass; +assign exclusive_store_pass = ar_handshake && is_exclusive_store && valid_q[ar_sub_idx]; + +logic reservation_set; +assign reservation_set = ar_handshake && is_exclusive_sequence; + +always_comb begin + entry_d = entry_q; + valid_d = valid_q; + lock_d = lock_q; + + for (int s = 0; s < ccuCfg.u.numSubordinates; s++) begin + if (dealloc_i[s]) begin + lock_d[s] = 1'b0; + end else if (exclusive_store_pass && + ccuCfg.subordinateIndexWidth'(s) == ar_sub_idx) begin + lock_d[s] = 1'b1; + end else if (exclusive_store_pass && + ccuCfg.subordinateIndexWidth'(s) != ar_sub_idx) begin + valid_d[s] = 1'b0; + end else if (reservation_set && + ccuCfg.subordinateIndexWidth'(s) == ar_sub_idx) begin + valid_d[s] = 1'b1; + entry_d[s].id = ar_i.id[ccuCfg.u.axiSubordinateIdWidth-1:0]; + end + end +end + +always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + entry_q <= '0; + valid_q <= '0; + lock_q <= '0; + end else begin + entry_q <= entry_d; + valid_q <= valid_d; + lock_q <= lock_d; + end +end + +assign lock_o = lock_q; + +for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_entry_id + assign entry_id_o[s] = entry_q[s].id; +end + +logic r_register_valid, r_register_ready; + +stream_demux #( + .N_OUP (2) +) u_ar_demux ( + .inp_valid_i (ar_valid_i), + .inp_ready_o (ar_ready_o), + .oup_sel_i (exclusive_store_will_fail), + .oup_valid_o ({r_register_valid, ar_valid_o}), + .oup_ready_i ({r_register_ready, ar_ready_i}) +); + +`ACE_ASSIGN_AR_STRUCT(ar_o, ar_i) + +r_register_entry_t r_register_wdata, r_register_rdata; +logic r_ack_valid, r_ack_ready; +ccu_ace_r_t r_ack; + +assign r_register_wdata = '{ + id: ar_i.id, + user: ar_i.user +}; + +stream_register #( + .T (r_register_entry_t) +) u_r_register ( + .clk_i, + .rst_ni, + .clr_i (1'b0), + .testmode_i (1'b0), + .valid_i (r_register_valid), + .ready_o (r_register_ready), + .data_i (r_register_wdata), + .valid_o (r_ack_valid), + .ready_i (r_ack_ready), + .data_o (r_register_rdata) +); + +assign r_ack = '{ + id: r_register_rdata.id, + data: '0, + resp: {2'b00, axi_pkg::RESP_OKAY}, + last: 1'b1, + user: r_register_rdata.user +}; + +logic [1:0] r_arbiter_valid; +logic [1:0] r_arbiter_ready; +logic [1:0] mask_d, mask_q; + +assign r_arbiter_valid = {r_ack_valid, r_valid_i} & ~mask_q; +assign {r_ack_ready, r_ready_o} = r_arbiter_ready & ~mask_q; + +always_comb begin : mask_comb + mask_d = mask_q; + if (r_valid_o && r_ready_i && r_o.last) + mask_d = '0; + else if (r_valid_o && r_ready_i) + mask_d = ~(r_arbiter_valid & r_arbiter_ready); +end + +always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) mask_q <= '0; + else mask_q <= mask_d; +end + +rr_arb_tree #( + .NumIn (2), + .DataType (ccu_ace_r_t), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1), + .FairArb (1'b1) +) u_r_arbiter ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .rr_i ('0), + .req_i (r_arbiter_valid), + .gnt_o (r_arbiter_ready), + .data_i ({r_ack, r_i}), + .req_o (r_valid_o), + .gnt_i (r_ready_i), + .data_o (r_o), + .idx_o (sc_fail_o) +); + +endmodule diff --git a/src/ccu/ccu_frontend.sv b/src/ccu/ccu_frontend.sv new file mode 100644 index 0000000..b137950 --- /dev/null +++ b/src/ccu/ccu_frontend.sv @@ -0,0 +1,346 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +`include "ace/assign.svh" + +module ccu_frontend + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + + parameter type ccu_ace_manager_ar_t = logic, + parameter type ccu_ace_manager_aw_t = logic, + parameter type ccu_w_t = logic, + parameter type ccu_ace_manager_r_t = logic, + parameter type ccu_ace_manager_b_t = logic, + parameter type ccu_ace_manager_req_t = logic, + parameter type ccu_ace_manager_resp_t = logic, + + parameter type ccu_ace_subordinate_ar_t = logic, + parameter type ccu_ace_subordinate_aw_t = logic, + parameter type ccu_ace_subordinate_r_t = logic, + parameter type ccu_ace_subordinate_b_t = logic, + parameter type ccu_ace_subordinate_req_t = logic, + parameter type ccu_ace_subordinate_resp_t = logic, + + localparam int unsigned scoreboardEntryIndexWidth = ccuCfg.transactionIndexWidth + +) ( + input logic clk_i, + input logic rst_ni, + + input ccu_ace_subordinate_req_t [ccuCfg.u.numSubordinates-1:0] subordinate_req_i, + output ccu_ace_subordinate_resp_t [ccuCfg.u.numSubordinates-1:0] subordinate_resp_o, + input logic [ccuCfg.u.numSubordinates-1:0] subordinate_rack_i, + input logic [ccuCfg.u.numSubordinates-1:0] subordinate_wack_i, + + output ccu_ace_manager_req_t manager_req_o, + input ccu_ace_manager_resp_t manager_resp_i, + + output logic scoreboard_dealloc_check_o, + output logic [ccuCfg.axiCcuIdWidth-1:0] scoreboard_dealloc_id_o, + input logic scoreboard_dealloc_hit_i, + input logic [scoreboardEntryIndexWidth-1:0] scoreboard_dealloc_entry_i, + output logic [ccuCfg.u.numSubordinates-1:0] scoreboard_dealloc_o, + output logic [ccuCfg.u.numSubordinates-1:0][scoreboardEntryIndexWidth-1:0] scoreboard_dealloc_entry_o +); + + typedef struct packed { + logic [ccuCfg.transactionIndexWidth-1:0] tid; + logic dealloc; + logic exclusive; + } rack_fifo_entry_t; + + typedef struct packed { + ccu_ace_manager_r_t r; + logic sc_fail; + } r_spill_entry_t; + + ccu_ace_subordinate_req_t [ccuCfg.u.numSubordinates-1:0] subordinate_req; + ccu_ace_subordinate_resp_t [ccuCfg.u.numSubordinates-1:0] subordinate_resp; + + ccu_ace_manager_req_t arbiter_req; + ccu_ace_manager_resp_t arbiter_resp; + + logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_lock; + logic [ccuCfg.u.numSubordinates-1:0][ccuCfg.u.axiSubordinateIdWidth-1:0] exclusive_monitor_entry_id; + logic exclusive_monitor_sc_fail; + logic [ccuCfg.u.numSubordinates-1:0] exclusive_monitor_dealloc; + + // Exclusive monitor AR/R (post AR-spill, pre manager) + ccu_ace_manager_ar_t exclusive_monitor_ar_in; + logic exclusive_monitor_ar_valid_in; + logic exclusive_monitor_ar_ready_in; + + // Exclusive monitor R output (pre R-spill) + ccu_ace_manager_r_t exclusive_monitor_r; + logic exclusive_monitor_r_valid; + logic exclusive_monitor_r_ready; + + r_spill_entry_t r_spill_in; + r_spill_entry_t r_spill_out; + logic r_spill_valid_out; + logic r_spill_ready_out; + + // Per-subordinate logic + // {{{ + for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_subordinate + + logic is_exclusive_sequence; + logic lock_stall; + + logic rack_fifo_full; + rack_fifo_entry_t rack_fifo_wdata; + rack_fifo_entry_t rack_fifo_rdata; + logic rack_fifo_push; + logic rack_fifo_pop; + + logic r_id_hit; + + assign is_exclusive_sequence = + ace_ar_is_exclusive_load ( + subordinate_req_i[s].ar.bar[0], + subordinate_req_i[s].ar.domain, + subordinate_req_i[s].ar.snoop, + subordinate_req_i[s].ar.lock + ) || + ace_ar_is_exclusive_store( + subordinate_req_i[s].ar.bar[0], + subordinate_req_i[s].ar.domain, + subordinate_req_i[s].ar.snoop, + subordinate_req_i[s].ar.lock + ); + + assign lock_stall = is_exclusive_sequence && + |exclusive_monitor_lock && !exclusive_monitor_lock[s]; + + `ACE_ASSIGN_AR_STRUCT(subordinate_req[s].ar, subordinate_req_i[s].ar) + assign subordinate_req[s].ar_valid = subordinate_req_i[s].ar_valid && !lock_stall; + assign subordinate_resp_o[s].ar_ready = subordinate_resp[s].ar_ready && !lock_stall; + + `ACE_ASSIGN_R_STRUCT(subordinate_resp_o[s].r, subordinate_resp[s].r) + assign subordinate_resp_o[s].r_valid = subordinate_resp[s].r_valid && !rack_fifo_full; + assign subordinate_req[s].r_ready = subordinate_req_i[s].r_ready && !rack_fifo_full; + + `ACE_ASSIGN_AW_STRUCT(subordinate_req[s].aw, subordinate_req_i[s].aw) + assign subordinate_req[s].aw_valid = subordinate_req_i[s].aw_valid; + assign subordinate_resp_o[s].aw_ready = subordinate_resp[s].aw_ready; + + `AXI_ASSIGN_W_STRUCT(subordinate_req[s].w, subordinate_req_i[s].w) + assign subordinate_req[s].w_valid = subordinate_req_i[s].w_valid; + assign subordinate_resp_o[s].w_ready = subordinate_resp[s].w_ready; + + `AXI_ASSIGN_B_STRUCT(subordinate_resp_o[s].b, subordinate_resp[s].b) + assign subordinate_resp_o[s].b_valid = subordinate_resp[s].b_valid; + assign subordinate_req[s].b_ready = subordinate_req_i[s].b_ready; + + // The xACK signal is used to extend the lifetime of + // a transaction beyond the last R handshake. + // Since ACE uses xACK signals to trigger many events + // and xACK signals enforce FIFO ordering, a plain FIFO + // can be used to push and pop relevant metadata between + // a channel response and the associated xACK + assign rack_fifo_push = + subordinate_resp_o[s].r_valid && subordinate_req_i[s].r_ready && subordinate_resp_o[s].r.last; + + // RACK-related metadata are used to: + // - clear the corresponding scoreboard entry + // - clear the corresponding exclusive monitor entry + // SC failure responses are locally generated, thus no entry should be cleared + // once the RACK arrives + assign r_id_hit = exclusive_monitor_entry_id[s] == subordinate_resp_o[s].r.id; + + assign rack_fifo_wdata = '{ + tid: scoreboard_dealloc_entry_i, + dealloc: scoreboard_dealloc_hit_i && !r_spill_out.sc_fail, + exclusive: r_id_hit && exclusive_monitor_lock[s] && !r_spill_out.sc_fail + }; + + assign rack_fifo_pop = subordinate_rack_i[s]; + + fifo_v3 #( + .FALL_THROUGH (1'b0), + .DEPTH (2), + .dtype (rack_fifo_entry_t) + ) u_rack_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i (1'b0), + .usage_o (), + .empty_o (), + .full_o (rack_fifo_full), + .data_i (rack_fifo_wdata), + .push_i (rack_fifo_push), + .data_o (rack_fifo_rdata), + .pop_i (rack_fifo_pop) + ); + + assign scoreboard_dealloc_o[s] = subordinate_rack_i[s] && rack_fifo_rdata.dealloc; + assign scoreboard_dealloc_entry_o[s] = rack_fifo_rdata.tid; + assign exclusive_monitor_dealloc[s] = subordinate_rack_i[s] && rack_fifo_rdata.exclusive; + end + // }}} + + // Point of Serialization (PoS) + // {{{ + ccu_frontend_arbiter #( + .numSubordinates (ccuCfg.u.numSubordinates), + .aceSubordinateIdWidth(ccuCfg.u.axiSubordinateIdWidth), + .maxWTrans (ccuCfg.u.numWriteTransactions), + .fallThrough (1'b1), + + .ccu_ace_manager_ar_t (ccu_ace_manager_ar_t), + .ccu_ace_manager_aw_t (ccu_ace_manager_aw_t), + .ccu_w_t (ccu_w_t), + .ccu_ace_manager_r_t (ccu_ace_manager_r_t), + .ccu_ace_manager_b_t (ccu_ace_manager_b_t), + .ccu_ace_manager_req_t (ccu_ace_manager_req_t), + .ccu_ace_manager_resp_t(ccu_ace_manager_resp_t), + + .ccu_ace_subordinate_ar_t (ccu_ace_subordinate_ar_t), + .ccu_ace_subordinate_aw_t (ccu_ace_subordinate_aw_t), + .ccu_ace_subordinate_r_t (ccu_ace_subordinate_r_t), + .ccu_ace_subordinate_b_t (ccu_ace_subordinate_b_t), + .ccu_ace_subordinate_req_t (ccu_ace_subordinate_req_t), + .ccu_ace_subordinate_resp_t(ccu_ace_subordinate_resp_t) + ) u_subordinate_arbiter ( + .clk_i, + .rst_ni, + .subordinate_req_i (subordinate_req), + .subordinate_resp_o (subordinate_resp), + .manager_req_o (arbiter_req), + .manager_resp_i (arbiter_resp) + ); + // }}} + + // Per-channel spill registers + // {{{ + spill_register #( + .T (ccu_ace_manager_ar_t), + .Bypass (!ccuCfg.u.frontendPipeAr) + ) u_ar_spill ( + .clk_i, + .rst_ni, + .valid_i (arbiter_req.ar_valid), + .ready_o (arbiter_resp.ar_ready), + .data_i (arbiter_req.ar), + .valid_o (exclusive_monitor_ar_valid_in), + .ready_i (exclusive_monitor_ar_ready_in), + .data_o (exclusive_monitor_ar_in) + ); + + spill_register #( + .T (ccu_ace_manager_aw_t), + .Bypass (!ccuCfg.u.frontendPipeAw) + ) u_aw_spill ( + .clk_i, + .rst_ni, + .valid_i (arbiter_req.aw_valid), + .ready_o (arbiter_resp.aw_ready), + .data_i (arbiter_req.aw), + .valid_o (manager_req_o.aw_valid), + .ready_i (manager_resp_i.aw_ready), + .data_o (manager_req_o.aw) + ); + + spill_register #( + .T (ccu_w_t), + .Bypass (!ccuCfg.u.frontendPipeW) + ) u_w_spill ( + .clk_i, + .rst_ni, + .valid_i (arbiter_req.w_valid), + .ready_o (arbiter_resp.w_ready), + .data_i (arbiter_req.w), + .valid_o (manager_req_o.w_valid), + .ready_i (manager_resp_i.w_ready), + .data_o (manager_req_o.w) + ); + + spill_register #( + .T (ccu_ace_manager_b_t), + .Bypass (!ccuCfg.u.frontendPipeB) + ) u_b_spill ( + .clk_i, + .rst_ni, + .valid_i (manager_resp_i.b_valid), + .ready_o (manager_req_o.b_ready), + .data_i (manager_resp_i.b), + .valid_o (arbiter_resp.b_valid), + .ready_i (arbiter_req.b_ready), + .data_o (arbiter_resp.b) + ); + + // R channel: wrap R data + sc_fail through the spill register + assign r_spill_in = '{ + r: exclusive_monitor_r, + sc_fail: exclusive_monitor_sc_fail + }; + + spill_register #( + .T (r_spill_entry_t), + .Bypass (!ccuCfg.u.frontendPipeR) + ) u_r_spill ( + .clk_i, + .rst_ni, + .valid_i (exclusive_monitor_r_valid), + .ready_o (exclusive_monitor_r_ready), + .data_i (r_spill_in), + .valid_o (r_spill_valid_out), + .ready_i (r_spill_ready_out), + .data_o (r_spill_out) + ); + + `ACE_ASSIGN_R_STRUCT(arbiter_resp.r, r_spill_out.r) + assign arbiter_resp.r_valid = r_spill_valid_out; + assign r_spill_ready_out = arbiter_req.r_ready; + // }}} + + // ACE exclusive monitor + // {{{ + ccu_exclusive_monitor #( + .ccuCfg (ccuCfg), + .ccu_ace_ar_t (ccu_ace_manager_ar_t), + .ccu_ace_r_t (ccu_ace_manager_r_t) + ) u_ccu_exclusive_monitor ( + .clk_i, + .rst_ni, + .dealloc_i (exclusive_monitor_dealloc), + .lock_o (exclusive_monitor_lock), + .entry_id_o (exclusive_monitor_entry_id), + .sc_fail_o (exclusive_monitor_sc_fail), + .ar_i (exclusive_monitor_ar_in), + .ar_valid_i (exclusive_monitor_ar_valid_in), + .ar_ready_o (exclusive_monitor_ar_ready_in), + .ar_o (manager_req_o.ar), + .ar_valid_o (manager_req_o.ar_valid), + .ar_ready_i (manager_resp_i.ar_ready), + .r_i (manager_resp_i.r), + .r_valid_i (manager_resp_i.r_valid), + .r_ready_o (manager_req_o.r_ready), + .r_o (exclusive_monitor_r), + .r_valid_o (exclusive_monitor_r_valid), + .r_ready_i (exclusive_monitor_r_ready) + ); + // }}} + + // Scoreboard dealloc check on post-spill R (aligned with per-sub R demux) + // {{{ + assign scoreboard_dealloc_check_o = + r_spill_valid_out && r_spill_ready_out && r_spill_out.r.last; + assign scoreboard_dealloc_id_o = r_spill_out.r.id; + // }}} +endmodule diff --git a/src/ccu/ccu_frontend_arbiter.sv b/src/ccu/ccu_frontend_arbiter.sv new file mode 100644 index 0000000..de62f28 --- /dev/null +++ b/src/ccu/ccu_frontend_arbiter.sv @@ -0,0 +1,279 @@ +// Copyright (c) 2026 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +module ccu_frontend_arbiter + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter int unsigned numSubordinates = 0, + parameter int unsigned aceSubordinateIdWidth = 0, + parameter int unsigned maxWTrans = 0, + parameter int unsigned fallThrough = 0, + + parameter type ccu_ace_manager_ar_t = logic, + parameter type ccu_ace_manager_aw_t = logic, + parameter type ccu_w_t = logic, + parameter type ccu_ace_manager_r_t = logic, + parameter type ccu_ace_manager_b_t = logic, + parameter type ccu_ace_manager_req_t = logic, + parameter type ccu_ace_manager_resp_t = logic, + + parameter type ccu_ace_subordinate_ar_t = logic, + parameter type ccu_ace_subordinate_aw_t = logic, + parameter type ccu_ace_subordinate_r_t = logic, + parameter type ccu_ace_subordinate_b_t = logic, + parameter type ccu_ace_subordinate_req_t = logic, + parameter type ccu_ace_subordinate_resp_t = logic + +) ( + input logic clk_i, + input logic rst_ni, + + input ccu_ace_subordinate_req_t [numSubordinates-1:0] subordinate_req_i, + output ccu_ace_subordinate_resp_t [numSubordinates-1:0] subordinate_resp_o, + output ccu_ace_manager_req_t manager_req_o, + input ccu_ace_manager_resp_t manager_resp_i +); + +localparam int unsigned subordinateIndexWidth = numSubordinates > 1 ? $clog2(numSubordinates) : 1; +localparam int unsigned aceManagerIdWidth = subordinateIndexWidth + aceSubordinateIdWidth; + +ccu_ace_manager_aw_t [numSubordinates-1:0] subordinate_aw; +logic [numSubordinates-1:0] subordinate_aw_valid; +logic [numSubordinates-1:0] subordinate_aw_ready; +ccu_w_t [numSubordinates-1:0] subordinate_w; +logic [numSubordinates-1:0] subordinate_w_valid; +logic [numSubordinates-1:0] subordinate_w_ready; +ccu_ace_manager_b_t [numSubordinates-1:0] subordinate_b; +logic [numSubordinates-1:0] subordinate_b_valid; +logic [numSubordinates-1:0] subordinate_b_ready; +ccu_ace_manager_ar_t [numSubordinates-1:0] subordinate_ar; +logic [numSubordinates-1:0] subordinate_ar_valid; +logic [numSubordinates-1:0] subordinate_ar_ready; +ccu_ace_manager_r_t [numSubordinates-1:0] subordinate_r; +logic [numSubordinates-1:0] subordinate_r_valid; +logic [numSubordinates-1:0] subordinate_r_ready; + +logic aw_arbiter_valid; +logic aw_arbiter_ready; + +for (genvar s = 0; s < numSubordinates; s++) begin : gen_id_prepend + + axi_id_prepend #( + .NoBus (32'd1), + .AxiIdWidthSlvPort (aceSubordinateIdWidth), + .AxiIdWidthMstPort (aceManagerIdWidth), + .slv_aw_chan_t (ccu_ace_subordinate_aw_t), + .slv_w_chan_t (ccu_w_t), + .slv_b_chan_t (ccu_ace_subordinate_b_t), + .slv_ar_chan_t (ccu_ace_subordinate_ar_t), + .slv_r_chan_t (ccu_ace_subordinate_r_t), + .mst_aw_chan_t (ccu_ace_manager_aw_t), + .mst_w_chan_t (ccu_w_t), + .mst_b_chan_t (ccu_ace_manager_b_t), + .mst_ar_chan_t (ccu_ace_manager_ar_t), + .mst_r_chan_t (ccu_ace_manager_r_t) + ) u_id_prepend ( + .pre_id_i (subordinateIndexWidth'(s)), + .slv_aw_chans_i (subordinate_req_i[s].aw), + .slv_aw_valids_i (subordinate_req_i[s].aw_valid), + .slv_aw_readies_o (subordinate_resp_o[s].aw_ready), + .slv_w_chans_i (subordinate_req_i[s].w), + .slv_w_valids_i (subordinate_req_i[s].w_valid), + .slv_w_readies_o (subordinate_resp_o[s].w_ready), + .slv_b_chans_o (subordinate_resp_o[s].b), + .slv_b_valids_o (subordinate_resp_o[s].b_valid), + .slv_b_readies_i (subordinate_req_i[s].b_ready), + .slv_ar_chans_i (subordinate_req_i[s].ar), + .slv_ar_valids_i (subordinate_req_i[s].ar_valid), + .slv_ar_readies_o (subordinate_resp_o[s].ar_ready), + .slv_r_chans_o (subordinate_resp_o[s].r), + .slv_r_valids_o (subordinate_resp_o[s].r_valid), + .slv_r_readies_i (subordinate_req_i[s].r_ready), + .mst_aw_chans_o (subordinate_aw[s]), + .mst_aw_valids_o (subordinate_aw_valid[s]), + .mst_aw_readies_i (subordinate_aw_ready[s]), + .mst_w_chans_o (subordinate_w[s]), + .mst_w_valids_o (subordinate_w_valid[s]), + .mst_w_readies_i (subordinate_w_ready[s]), + .mst_b_chans_i (subordinate_b[s]), + .mst_b_valids_i (subordinate_b_valid[s]), + .mst_b_readies_o (subordinate_b_ready[s]), + .mst_ar_chans_o (subordinate_ar[s]), + .mst_ar_valids_o (subordinate_ar_valid[s]), + .mst_ar_readies_i (subordinate_ar_ready[s]), + .mst_r_chans_i (subordinate_r[s]), + .mst_r_valids_i (subordinate_r_valid[s]), + .mst_r_readies_o (subordinate_r_ready[s]) + ); +end + +// AW +// {{{ + logic w_ctrl_fifo_valid_in; + logic w_ctrl_fifo_ready_in; + logic [subordinateIndexWidth-1:0] w_ctrl_fifo_wdata; + logic aw_is_evict; + + rr_arb_tree #( + .NumIn (numSubordinates), + .DataType (ccu_ace_manager_aw_t), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1), + .FairArb (1'b1) + ) u_aw_arbiter ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .rr_i ('0), + .req_i (subordinate_aw_valid), + .gnt_o (subordinate_aw_ready), + .data_i (subordinate_aw), + .req_o (aw_arbiter_valid), + .gnt_i (aw_arbiter_ready), + .data_o (manager_req_o.aw), + .idx_o (w_ctrl_fifo_wdata) + ); + + assign aw_is_evict = ace_is_evict(manager_req_o.aw.bar[0], manager_req_o.aw.domain, manager_req_o.aw.snoop); + + stream_fork_dynamic #( + .N_OUP(2) + ) u_aw_fork ( + .clk_i, + .rst_ni, + .valid_i (aw_arbiter_valid), + .ready_o (aw_arbiter_ready), + .sel_i ({!aw_is_evict, 1'b1}), + .sel_valid_i (1'b1), + .sel_ready_o (), + .valid_o ({w_ctrl_fifo_valid_in, manager_req_o.aw_valid}), + .ready_i ({w_ctrl_fifo_ready_in, manager_resp_i.aw_ready}) + ); +// }}} + +// W +// {{{ + logic w_ctrl_fifo_valid_out; + logic w_ctrl_fifo_ready_out; + logic [subordinateIndexWidth-1:0] w_ctrl_fifo_rdata; + logic w_mux_valid_out; + logic w_mux_ready_out; + + stream_fifo #( + .FALL_THROUGH(fallThrough), + .DATA_WIDTH (subordinateIndexWidth), + .DEPTH (maxWTrans) + ) u_w_ctrl_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (), + .data_i (w_ctrl_fifo_wdata), + .valid_i (w_ctrl_fifo_valid_in), + .ready_o (w_ctrl_fifo_ready_in), + .data_o (w_ctrl_fifo_rdata), + .valid_o (w_ctrl_fifo_valid_out), + .ready_i (w_ctrl_fifo_ready_out && manager_req_o.w.last) + ); + + stream_mux #( + .DATA_T(ccu_w_t), + .N_INP (numSubordinates) + ) u_w_mux ( + .inp_data_i (subordinate_w), + .inp_valid_i(subordinate_w_valid), + .inp_ready_o(subordinate_w_ready), + .inp_sel_i (w_ctrl_fifo_rdata), + .oup_data_o (manager_req_o.w), + .oup_valid_o(w_mux_valid_out), + .oup_ready_i(w_mux_ready_out) + ); + + stream_join #( + .N_INP(2) + ) u_w_join ( + .inp_valid_i({w_ctrl_fifo_valid_out, w_mux_valid_out}), + .inp_ready_o({w_ctrl_fifo_ready_out, w_mux_ready_out}), + .oup_valid_o(manager_req_o.w_valid), + .oup_ready_i(manager_resp_i.w_ready) + ); +// }}} + +// B +// {{{ +logic [subordinateIndexWidth-1:0] b_demux_sel; + +assign b_demux_sel = manager_resp_i.b.id[aceManagerIdWidth-1:aceSubordinateIdWidth]; + +stream_demux #( + .N_OUP (numSubordinates) +) u_b_demux ( + .inp_valid_i (manager_resp_i.b_valid), + .inp_ready_o (manager_req_o.b_ready), + .oup_sel_i (b_demux_sel), + .oup_valid_o (subordinate_b_valid), + .oup_ready_i (subordinate_b_ready) +); + +assign subordinate_b = {numSubordinates{manager_resp_i.b}}; +// }}} + +// AR +// {{{ + rr_arb_tree #( + .NumIn (numSubordinates), + .DataType (ccu_ace_manager_ar_t), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1), + .FairArb (1'b1) + ) u_ar_arbiter ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .rr_i ('0), + .req_i (subordinate_ar_valid), + .gnt_o (subordinate_ar_ready), + .data_i (subordinate_ar), + .req_o (manager_req_o.ar_valid), + .gnt_i (manager_resp_i.ar_ready), + .data_o (manager_req_o.ar), + .idx_o () + ); +// }}} + +// R +// {{{ +logic [subordinateIndexWidth-1:0] r_demux_sel; + +assign r_demux_sel = manager_resp_i.r.id[aceManagerIdWidth-1:aceSubordinateIdWidth]; + +stream_demux #( + .N_OUP (numSubordinates) +) u_r_demux ( + .inp_valid_i (manager_resp_i.r_valid), + .inp_ready_o (manager_req_o.r_ready), + .oup_sel_i (r_demux_sel), + .oup_valid_o (subordinate_r_valid), + .oup_ready_i (subordinate_r_ready) +); + +assign subordinate_r = {numSubordinates{manager_resp_i.r}}; +// }}} + + +endmodule diff --git a/src/ccu/ccu_pkg.sv b/src/ccu/ccu_pkg.sv new file mode 100644 index 0000000..3420105 --- /dev/null +++ b/src/ccu/ccu_pkg.sv @@ -0,0 +1,138 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +package ccu_pkg; + + // Available memory mapped IO interfaces + typedef enum { + CCU_MMIO_APB, + CCU_MMIO_REGBUS + } ccu_mmio_intf_e; + + typedef struct packed { + // Number of subordinate ports (i.e. coherent managers) + int unsigned numSubordinates; + // Number of shareable simultaneous inflight transactions + int unsigned numShareableTransactions; + // Number of simultaneous write transactions + int unsigned numWriteTransactions; + // Number of simultaneous inflight snoop transactions + int unsigned numSnoopTransactions; + // Enable replay of conflicting requests + bit enableReplay; + // Number of replay list entries + int unsigned numReplayEntries; + // AXI/ACE parameters + int unsigned axiAddressWidth; + int unsigned axiDataWidth; + int unsigned axiUserWidth; + int unsigned axiSubordinateIdWidth; + // Cache parameters + int unsigned cachelineWidth; + // LSB address bit used for hazard checks (inclusive) + int unsigned addressCheckLsb; + // MSB address bit used for hazard checks (inclusive) + int unsigned addressCheckMsb; + // Make snoop request FIFOs fall through + bit snoopReqFifoFallthrough; + // Make snoop response FIFOs fall through + bit snoopRespFifoFallthrough; + // Protocol used to access the memory mapped registers + ccu_mmio_intf_e mmioIntf; + // Instantiate CCU control and status registers + bit enableCSRs; + // Insert spill registers in the frontend + bit frontendPipeAw; + bit frontendPipeW; + bit frontendPipeB; + bit frontendPipeAr; + bit frontendPipeR; + } ccu_user_config_t; + + typedef struct packed { + // User parameters + ccu_user_config_t u; + // Derived internal parameters + // Manager index width + int unsigned subordinateIndexWidth; + // ID width internal to the CCU + int unsigned axiCcuIdWidth; + // ID width of the manager interface of the CCU + int unsigned axiManagerIdWidth; + // Byte index in cacheline + int unsigned cachelineByteIndexWidth; + // Cacheline address minus offset + int unsigned numLineWidth; + // Write transaction index width + int unsigned writeTransactionIndexWidth; + // Number of transfers for a single cacheline + int unsigned cachelineAxiTransfers; + // Transaction index width + int unsigned transactionIndexWidth; + // Replay entry index width + int unsigned replayEntryIndexWidth; + // AXI data size + int unsigned axiDataSize; + // Address slice width used for hazard checks + int unsigned addressCheckWidth; + } ccu_config_t; + + function automatic ccu_config_t ccu_build_cfg(ccu_user_config_t u); + ccu_config_t p; + + p.u = u; + + p.subordinateIndexWidth = $clog2(u.numSubordinates); + p.axiCcuIdWidth = u.axiSubordinateIdWidth + p.subordinateIndexWidth; + p.axiManagerIdWidth = p.axiCcuIdWidth + 1; + p.cachelineByteIndexWidth = u.cachelineWidth > 8 ? $clog2(u.cachelineWidth / 8) : 1; + p.numLineWidth = u.axiAddressWidth - p.cachelineByteIndexWidth; + p.writeTransactionIndexWidth = u.numWriteTransactions > 1 ? $clog2(u.numWriteTransactions) : 1; + p.cachelineAxiTransfers = u.cachelineWidth / u.axiDataWidth; + p.transactionIndexWidth = u.numShareableTransactions > 1 ? $clog2(u.numShareableTransactions) : 1; + p.replayEntryIndexWidth = u.numReplayEntries > 1 ? $clog2(u.numReplayEntries) : 1; + p.axiDataSize = u.axiDataWidth > 8 ? $clog2(u.axiDataWidth / 8) : 1; + p.addressCheckWidth = u.addressCheckMsb - u.addressCheckLsb + 1; + + return p; + endfunction + + // Performance events + typedef struct packed { + logic snoop_hit; // 0x16 + logic snoop_miss; // 0x15 + logic stage1_read_no_snoop; // 0x14 + logic stage1_read_once; // 0x13 + logic stage1_read_shared; // 0x12 + logic stage1_read_clean; // 0x11 + logic stage1_read_not_shared_dirty; // 0x10 + logic stage1_read_unique; // 0x0F + logic stage1_clean_unique; // 0x0E + logic stage1_make_unique; // 0x0D + logic stage1_clean_shared; // 0x0C + logic stage1_clean_invalid; // 0x0B + logic stage1_make_invalid; // 0x0A + logic stage0_stall_other; // 0x09 + logic stage0_stall_scoreboard_hit; // 0x08 + logic stage0_stall_scoreboard_full; // 0x07 + logic stage0_stall_ac_fifo_full; // 0x06 + logic stage0_stall_stage1_fifo_full; // 0x05 + logic stage1_stall_other; // 0x04 + logic stage1_stall_cr_not_valid; // 0x03 + logic stage1_stall_write_engine_busy; // 0x02 + logic stage1_stall_read_engine_busy; // 0x01 + logic stage1_stall_cd_engine_busy; // 0x00 + } ccu_snoop_pipeline_events_t; + +endpackage diff --git a/src/ccu/ccu_read_engine.sv b/src/ccu/ccu_read_engine.sv new file mode 100644 index 0000000..bcf3409 --- /dev/null +++ b/src/ccu/ccu_read_engine.sv @@ -0,0 +1,139 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +`include "axi/assign.svh" +`include "ace/convert.svh" + +module ccu_read_engine + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + parameter type ccu_axi_ar_t = logic, + parameter type ccu_ace_r_t = logic, + parameter type ccu_axi_r_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + input logic ar_valid_i, + output logic ar_ready_o, + input ccu_axi_ar_t ar_i, + + output logic ar_addr_check_o, + input logic ar_addr_hit_i, + output logic [ccuCfg.addressCheckWidth-1:0] ar_addr_slice_o, + + input logic snoop_pipeline_r_valid_i, + output logic snoop_pipeline_r_ready_o, + input ccu_ace_r_t snoop_pipeline_r_i, + + output logic r_valid_o, + input logic r_ready_i, + output ccu_ace_r_t r_o, + + output logic ar_valid_o, + input logic ar_ready_i, + output ccu_axi_ar_t ar_o, + input logic r_valid_i, + output logic r_ready_o, + input ccu_axi_r_t r_i +); + +// AR channel +// {{{ + logic ar_fifo_valid; + logic ar_fifo_ready; + ccu_axi_ar_t ar_fifo_wdata; + ccu_axi_ar_t ar_fifo_rdata; + + `AXI_ASSIGN_AR_STRUCT(ar_fifo_wdata, ar_i) + + // Fallthrough FIFO inserted to decouple + // snoop pipeline requests from the read + // engine when address hazards happen + // TODO: is one entry enough? + stream_fifo #( + .FALL_THROUGH (1'b1), + .DEPTH (1), + .T (ccu_axi_ar_t) + ) u_ar_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i (1'b0), + .usage_o (), + .data_i (ar_fifo_wdata), + .valid_i (ar_valid_i), + .ready_o (ar_ready_o), + .data_o (ar_fifo_rdata), + .valid_o (ar_fifo_valid), + .ready_i (ar_fifo_ready) + ); + + assign ar_addr_check_o = ar_fifo_valid; + assign ar_addr_slice_o = ar_fifo_rdata.addr[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; + assign ar_valid_o = !ar_addr_hit_i && ar_fifo_valid; + assign ar_fifo_ready = !ar_addr_hit_i && ar_ready_i; + `AXI_ASSIGN_AR_STRUCT(ar_o, ar_fifo_rdata) +// }}} + +// R channel +// {{{ + ccu_ace_r_t r_in; + logic [1:0] r_arbiter_valid; + logic [1:0] r_arbiter_ready; + logic [1:0] mask_d; + logic [1:0] mask_q; + + `AXI_TO_ACE_ASSIGN_R_STRUCT(r_in, r_i) + + assign r_arbiter_valid = {r_valid_i, snoop_pipeline_r_valid_i} & ~mask_q; + assign {r_ready_o, snoop_pipeline_r_ready_o} = r_arbiter_ready & ~mask_q; + + always_comb begin : mask_comb + mask_d = mask_q; + if (r_valid_o && r_ready_i && r_o.last) + mask_d = '0; + else if (r_valid_o && r_ready_i) + mask_d = ~(r_arbiter_valid & r_arbiter_ready); + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) mask_q <= '0; + else mask_q <= mask_d; + end + + rr_arb_tree #( + .NumIn (2), + .DataType (ccu_ace_r_t), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1), + .FairArb (1'b1) + ) u_r_arbiter ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .rr_i ('0), + .req_i (r_arbiter_valid), + .gnt_o (r_arbiter_ready), + .data_i ({r_in, snoop_pipeline_r_i}), + .req_o (r_valid_o), + .gnt_i (r_ready_i), + .data_o (r_o), + .idx_o () + ); +// }}} +endmodule diff --git a/src/ccu/ccu_replay.sv b/src/ccu/ccu_replay.sv new file mode 100644 index 0000000..6c42981 --- /dev/null +++ b/src/ccu/ccu_replay.sv @@ -0,0 +1,197 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +module ccu_replay + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + parameter type ccu_ace_ar_t = logic, + localparam int unsigned numScoreboardEntries = ccuCfg.u.numShareableTransactions, + localparam int unsigned scoreboardEntryIndexWidth = ccuCfg.transactionIndexWidth + +) ( + input logic clk_i, + input logic rst_ni, + input logic alloc_i, + input ccu_ace_ar_t alloc_ar_i, + input logic [scoreboardEntryIndexWidth-1:0] alloc_scoreboard_entry_i, + input logic [scoreboardEntryIndexWidth-1:0] replay_scoreboard_entry_i, + output ccu_ace_ar_t replay_ar_o, + output logic replay_ar_valid_o, + input logic replay_ar_ready_i, + input logic [numScoreboardEntries-1:0] scoreboard_dealloc_i, + output logic full_o +); + +// Shared signals +// {{{ + typedef struct packed { + ccu_ace_ar_t ar; + logic [scoreboardEntryIndexWidth-1:0] dependency; + } replay_entry_t; + + typedef struct packed { + logic head; + logic tail; + logic [ccuCfg.replayEntryIndexWidth-1:0] next; + } replay_linked_list_t; + + replay_entry_t [ccuCfg.u.numReplayEntries-1:0] entry_q; + replay_entry_t [ccuCfg.u.numReplayEntries-1:0] entry_d; + replay_linked_list_t [ccuCfg.u.numReplayEntries-1:0] list_q; + replay_linked_list_t [ccuCfg.u.numReplayEntries-1:0] list_d; + logic [ccuCfg.u.numReplayEntries-1:0] valid_q; + logic [ccuCfg.u.numReplayEntries-1:0] valid_d; + logic [ccuCfg.u.numReplayEntries-1:0] hazard_q; + logic [ccuCfg.u.numReplayEntries-1:0] hazard_d; + + logic [ccuCfg.replayEntryIndexWidth-1:0] alloc_entry; + logic alloc_hazard; + logic alloc_head; + logic [ccuCfg.addressCheckWidth-1:0] alloc_addr_slice; + + logic [ccuCfg.u.numReplayEntries-1:0] address_hit; + logic [ccuCfg.u.numReplayEntries-1:0] replay_req; + logic [ccuCfg.u.numReplayEntries-1:0] replay_gnt; + ccu_ace_ar_t [ccuCfg.u.numReplayEntries-1:0] replay_ar; + logic replay_is_tail; + logic [ccuCfg.replayEntryIndexWidth-1:0] replay_entry; + logic [ccuCfg.replayEntryIndexWidth-1:0] replay_next_entry; + + assign full_o = &valid_q; + + always_comb begin : alloc_entry_comb + alloc_entry = '0; + for (int unsigned e = 0; e < ccuCfg.u.numReplayEntries; e++) begin + if (!valid_q[e]) begin + alloc_entry = e; + break; + end + end + end + + // Entries which are being allocated the same cycle the corresponding + // scoreboard entry is being deallocated AND are list heads can replay + // from the next cycle + assign alloc_hazard = !alloc_head || !scoreboard_dealloc_i[alloc_scoreboard_entry_i]; + assign alloc_head = ~|(address_hit & valid_q & ~replay_gnt); + assign alloc_addr_slice = alloc_ar_i.addr[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; +// }}} + +// Per-entry logic +// {{{ + for (genvar e = 0; e < ccuCfg.u.numReplayEntries; e++) begin : gen_entry + logic alloc; + logic clear_hazard; + logic link; + logic make_head; + logic [ccuCfg.addressCheckWidth-1:0] addr_slice; + + assign addr_slice = entry_q[e].ar.addr[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; + assign address_hit[e] = addr_slice == alloc_addr_slice; + assign replay_ar[e] = entry_q[e].ar; + + assign alloc = alloc_entry == e && alloc_i; + assign make_head = replay_next_entry == e && |replay_gnt && !replay_is_tail; + assign link = valid_q[e] && list_q[e].tail && address_hit[e] && !replay_gnt[e] && alloc_i; + assign clear_hazard = valid_q[e] && list_q[e].head && scoreboard_dealloc_i[entry_q[e].dependency]; + assign replay_req[e] = valid_q[e] && list_q[e].head && !hazard_q[e]; + + always_comb begin : entry_comb + list_d [e] = list_q [e]; + entry_d [e] = entry_q [e]; + valid_d [e] = valid_q [e]; + hazard_d[e] = hazard_q[e]; + + unique case (1'b1) + // The scoreboard dependency for a first allocation + // is given by the scoreboard entry with a colliding + // address + alloc: begin + valid_d [e] = 1'b1; + entry_d [e].ar = alloc_ar_i; + entry_d [e].dependency = alloc_scoreboard_entry_i; + list_d [e].tail = 1'b1; + list_d [e].head = alloc_head; + hazard_d[e] = alloc_hazard; + end + // The scoreboard dependency for a node being promoted + // to list head is the scoreboard entry being allocated + // to the previous head of the list + make_head: begin + entry_d [e].dependency = replay_scoreboard_entry_i; + list_d [e].head = 1'b1; + end + link: begin + list_d [e].tail = 1'b0; + list_d [e].next = alloc_entry; + end + replay_gnt[e]: begin + valid_d [e] = 1'b0; + end + default: ; + endcase + + // Hazard clearing can happen concurrently + // to other events, thus it cannot be in the + // unique case statement above + if (clear_hazard) begin + hazard_d[e] = 1'b0; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + list_q [e] <= '0; + entry_q [e] <= '0; + valid_q [e] <= '0; + hazard_q[e] <= '0; + end else begin + list_q [e] <= list_d [e]; + entry_q [e] <= entry_d [e]; + valid_q [e] <= valid_d [e]; + hazard_q[e] <= hazard_d[e]; + end + end + end +// }}} + +// Replay arbitration to snoop pipeline +// {{{ + rr_arb_tree #( + .NumIn (ccuCfg.u.numReplayEntries), + .DataType (ccu_ace_ar_t), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1), + .FairArb (1'b1) + ) u_replay_arbiter ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .rr_i ('0), + .req_i (replay_req), + .gnt_o (replay_gnt), + .data_i (replay_ar), + .req_o (replay_ar_valid_o), + .gnt_i (replay_ar_ready_i), + .data_o (replay_ar_o), + .idx_o (replay_entry) + ); + + assign replay_next_entry = list_q[replay_entry].next; + assign replay_is_tail = list_q[replay_entry].tail; +// }}} +endmodule diff --git a/src/ccu/ccu_scoreboard.sv b/src/ccu/ccu_scoreboard.sv new file mode 100644 index 0000000..a5125dd --- /dev/null +++ b/src/ccu/ccu_scoreboard.sv @@ -0,0 +1,134 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +module ccu_scoreboard + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + localparam int unsigned numScoreboardEntries = ccuCfg.u.numShareableTransactions, + localparam int unsigned scoreboardEntryIndexWidth = ccuCfg.transactionIndexWidth + +) ( + input logic clk_i, + input logic rst_ni, + + output logic full_o, + + input logic alloc_check_i, + input logic alloc_i, + input logic [ccuCfg.u.axiAddressWidth-1:0] alloc_addr_i, + input logic [ccuCfg.axiCcuIdWidth-1:0] alloc_id_i, + output logic alloc_hit_o, + output logic [scoreboardEntryIndexWidth-1:0] alloc_hit_entry_o, + output logic [scoreboardEntryIndexWidth-1:0] alloc_entry_o, + + input logic dealloc_check_i, + input logic [ccuCfg.axiCcuIdWidth-1:0] dealloc_id_i, + output logic dealloc_hit_o, + output logic [scoreboardEntryIndexWidth-1:0] dealloc_hit_entry_o, + + input logic [ccuCfg.u.numSubordinates-1:0] dealloc_i, + input logic [ccuCfg.u.numSubordinates-1:0][scoreboardEntryIndexWidth-1:0] dealloc_entry_i, + output logic [numScoreboardEntries-1:0] dealloc_o +); + +// Shared signals +// {{{ + typedef struct packed { + logic [ccuCfg.addressCheckWidth-1:0] addr; + logic [ccuCfg.axiCcuIdWidth-1:0] id; + } scoreboard_entry_t; + + logic [ccuCfg.addressCheckWidth-1:0] alloc_addr_slice; + + logic [numScoreboardEntries-1:0] valid_q; + logic [numScoreboardEntries-1:0] valid_d; + scoreboard_entry_t [numScoreboardEntries-1:0] entry_q; + scoreboard_entry_t [numScoreboardEntries-1:0] entry_d; + logic [numScoreboardEntries-1:0] address_hit; + logic [numScoreboardEntries-1:0] dealloc_id_hit; + + assign alloc_addr_slice = alloc_addr_i[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; + + assign alloc_hit_o = alloc_check_i && |(valid_q & address_hit); + assign dealloc_hit_o = dealloc_check_i && |(valid_q & dealloc_id_hit); + + always_comb begin : alloc_entry_comb + alloc_entry_o = '0; + for (int unsigned e = 0; e < numScoreboardEntries; e++) begin + if (!valid_q[e]) begin + alloc_entry_o = e; + break; + end + end + end + + assign full_o = &valid_q; + + onehot_to_bin #( + .ONEHOT_WIDTH (numScoreboardEntries) + ) u_alloc_onehot_to_bin ( + .onehot (address_hit & valid_q), + .bin (alloc_hit_entry_o) + ); + + onehot_to_bin #( + .ONEHOT_WIDTH (numScoreboardEntries) + ) u_dealloc_onehot_to_bin ( + .onehot (dealloc_id_hit & valid_q), + .bin (dealloc_hit_entry_o) + ); +// }}} + +// Per-entry logic +// {{{ + for (genvar e = 0; e < numScoreboardEntries; e++) begin : gen_entry + logic [ccuCfg.subordinateIndexWidth-1:0] subordinate_index; + logic alloc; + assign subordinate_index = entry_q[e].id[ccuCfg.axiCcuIdWidth-1-:ccuCfg.subordinateIndexWidth]; + assign alloc = alloc_i && alloc_entry_o == e && !full_o; + assign dealloc_o[e] = dealloc_i[subordinate_index] && dealloc_entry_i[subordinate_index] == e; + assign address_hit[e] = alloc_addr_slice == entry_q[e].addr; + assign dealloc_id_hit[e] = dealloc_id_i == entry_q[e].id; + + always_comb begin : entry_comb + valid_d[e] = valid_q[e]; + entry_d[e] = entry_q[e]; + + unique case (1'b1) + alloc: begin + valid_d[e] = 1'b1; + entry_d[e].addr = alloc_addr_slice; + entry_d[e].id = alloc_id_i; + end + dealloc_o[e]: begin + valid_d[e] = 1'b0; + end + default: ; + endcase + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + valid_q[e] <= 1'b0; + entry_q[e] <= '0; + end else begin + valid_q[e] <= valid_d[e]; + entry_q[e] <= entry_d[e]; + end + end + end +// }}} +endmodule diff --git a/src/ccu/ccu_snoop_pipeline.sv b/src/ccu/ccu_snoop_pipeline.sv new file mode 100644 index 0000000..cfc9167 --- /dev/null +++ b/src/ccu/ccu_snoop_pipeline.sv @@ -0,0 +1,695 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +`include "axi/assign.svh" + +module ccu_snoop_pipeline + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + parameter type ccu_ace_ar_t = logic, + parameter type ccu_ace_r_t = logic, + parameter type ccu_snoop_ac_t = logic, + parameter type ccu_snoop_cr_t = logic, + parameter type ccu_snoop_cd_t = logic, + parameter type ccu_w_t = logic, + parameter type ccu_axi_ar_t = logic, + parameter type ccu_axi_aw_t = logic, + parameter type domain_map_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + input domain_map_t [ccuCfg.u.numSubordinates-1:0] domain_map_i, + + input ccu_ace_ar_t ar_i, + input logic ar_valid_i, + output logic ar_ready_o, + + output logic scoreboard_alloc_check_o, + output logic scoreboard_alloc_o, + input logic scoreboard_alloc_hit_i, + input logic scoreboard_full_i, + + output logic replay_alloc_o, + + output logic [ccuCfg.u.numSubordinates-1:0] ac_valid_o, + input logic [ccuCfg.u.numSubordinates-1:0] ac_ready_i, + output ccu_snoop_ac_t [ccuCfg.u.numSubordinates-1:0] ac_o, + + input logic [ccuCfg.u.numSubordinates-1:0] cr_valid_i, + output logic [ccuCfg.u.numSubordinates-1:0] cr_ready_o, + input ccu_snoop_cr_t [ccuCfg.u.numSubordinates-1:0] cr_i, + + input logic [ccuCfg.u.numSubordinates-1:0] cd_valid_i, + output logic [ccuCfg.u.numSubordinates-1:0] cd_ready_o, + input ccu_snoop_cd_t [ccuCfg.u.numSubordinates-1:0] cd_i, + + output logic write_engine_aw_valid_o, + input logic write_engine_aw_ready_i, + output ccu_axi_aw_t write_engine_aw_o, + output logic write_engine_w_valid_o, + input logic write_engine_w_ready_i, + output ccu_w_t write_engine_w_o, + + output logic read_engine_ar_valid_o, + input logic read_engine_ar_ready_i, + output ccu_axi_ar_t read_engine_ar_o, + output logic read_engine_r_valid_o, + input logic read_engine_r_ready_i, + output ccu_ace_r_t read_engine_r_o, + + output ccu_snoop_pipeline_events_t events_o +); + +// AC channel +// {{{ + typedef struct packed { + ccu_snoop_ac_t ac; + logic [ccuCfg.u.numSubordinates-1:0] sel; + } ac_fifo_entry_t; + + ccu_snoop_ac_t ac; + logic [ccuCfg.u.numSubordinates-1:0] ac_sel; + logic ac_valid; + logic ac_ready; + ac_fifo_entry_t ac_fifo_wdata; + ac_fifo_entry_t ac_fifo_rdata; + logic ac_fifo_valid; + logic ac_fifo_ready; + + assign ac_fifo_wdata = '{ + ac: ac, + sel: ac_sel + }; + + stream_fifo #( + .FALL_THROUGH (ccuCfg.u.snoopReqFifoFallthrough), + .DEPTH (ccuCfg.u.numSnoopTransactions), + .T (ac_fifo_entry_t) + ) u_ac_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i (1'b0), + .usage_o (), + .data_i (ac_fifo_wdata), + .valid_i (ac_valid), + .ready_o (ac_ready), + .data_o (ac_fifo_rdata), + .valid_o (ac_fifo_valid), + .ready_i (ac_fifo_ready) + ); + + assign ac_o = {ccuCfg.u.numSubordinates{ac_fifo_rdata.ac}}; + + stream_fork_dynamic #( + .N_OUP (ccuCfg.u.numSubordinates) + ) u_ac_fifo_fork ( + .clk_i, + .rst_ni, + .valid_i (ac_fifo_valid), + .ready_o (ac_fifo_ready), + .sel_i (ac_fifo_rdata.sel), + .sel_valid_i (1'b1), + .sel_ready_o (), + .valid_o (ac_valid_o), + .ready_i (ac_ready_i) + ); +// }}} + +// CR channel +// {{{ + logic [ccuCfg.u.numSubordinates-1:0] cr_fifo_valid; + logic [ccuCfg.u.numSubordinates-1:0] cr_fifo_ready; + ccu_snoop_cr_t [ccuCfg.u.numSubordinates-1:0] cr_fifo_rdata; + + for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_cr_fifo + stream_fifo #( + .FALL_THROUGH (ccuCfg.u.snoopRespFifoFallthrough), + .DEPTH (ccuCfg.u.numSnoopTransactions), + .T (ccu_snoop_cr_t) + ) u_cr_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i (1'b0), + .usage_o (), + .data_i (cr_i[s]), + .valid_i (cr_valid_i[s]), + .ready_o (cr_ready_o[s]), + .data_o (cr_fifo_rdata[s]), + .valid_o (cr_fifo_valid[s]), + .ready_i (cr_fifo_ready[s]) + ); + end +// }}} + +// CD channel +// {{{ + logic [ccuCfg.u.numSubordinates-1:0] cd_fifo_valid; + logic [ccuCfg.u.numSubordinates-1:0] cd_fifo_ready; + ccu_snoop_cd_t [ccuCfg.u.numSubordinates-1:0] cd_fifo_rdata; + + for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_cd_fifo + stream_fifo #( + .FALL_THROUGH (ccuCfg.u.snoopRespFifoFallthrough), + .DEPTH (ccuCfg.u.numSnoopTransactions), + .T (ccu_snoop_cd_t) + ) u_cd_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i (1'b0), + .usage_o (), + .data_i (cd_i[s]), + .valid_i (cd_valid_i[s]), + .ready_o (cd_ready_o[s]), + .data_o (cd_fifo_rdata[s]), + .valid_o (cd_fifo_valid[s]), + .ready_i (cd_fifo_ready[s]) + ); + end +// }}} + + +// Stage 0 +// {{{ + logic [ccuCfg.subordinateIndexWidth-1:0] subordinate_ar_index; + logic ar_fork_valid; + logic ar_fork_ready; + logic ar_is_read_no_snoop; + acsnoop_t ac_snoop; + logic stage0_valid; + logic stage0_ready; + + assign ac_snoop = ace_ar_acsnoop_map( + ar_i.bar[0], + ar_i.domain, + ar_i.snoop + ); + + assign ar_is_read_no_snoop = ace_is_read_no_snoop( + ar_i.bar[0], + ar_i.domain, + ar_i.snoop + ); + + assign subordinate_ar_index = ar_i.id[ccuCfg.u.axiSubordinateIdWidth+:ccuCfg.subordinateIndexWidth]; + + always_comb begin : ace_sel_comb + unique case (ar_i.domain) + NonShareable : ac_sel = '0; + InnerShareable: ac_sel = domain_map_i[subordinate_ar_index].inner; + OuterShareable: ac_sel = domain_map_i[subordinate_ar_index].outer; + default: ac_sel = ~domain_map_i[subordinate_ar_index].initiator; + endcase + end + + assign ac = '{ + addr: axi_pkg::aligned_addr(ar_i.addr, ccuCfg.cachelineByteIndexWidth), + snoop: ac_snoop, + prot: ar_i.prot + }; + + assign scoreboard_alloc_check_o = !ar_is_read_no_snoop && ar_valid_i; + assign replay_alloc_o = scoreboard_alloc_hit_i; + + always_comb begin : ar_stall_comb + ar_fork_valid = ar_valid_i; + ar_ready_o = ar_fork_ready; + + // If a request arrives here, it means + // the replay table is not full since the check + // was performed upstream. + // We can safely allocate one entry as long + // as the replay table is actually instantiated. + if (scoreboard_alloc_hit_i) begin + ar_fork_valid = 1'b0; + ar_ready_o = ccuCfg.u.enableReplay; + end + end + + assign scoreboard_alloc_o = !ar_is_read_no_snoop && ar_fork_valid && ar_fork_ready; + + stream_fork_dynamic #( + .N_OUP (2) + ) u_ac_fork ( + .clk_i, + .rst_ni, + .valid_i (ar_fork_valid), + .ready_o (ar_fork_ready), + .sel_i ({!ar_is_read_no_snoop, 1'b1}), + .sel_valid_i (ar_is_read_no_snoop || !scoreboard_full_i), + .sel_ready_o (), + .valid_o ({ac_valid, stage0_valid}), + .ready_i ({ac_ready, stage0_ready}) + ); +// }}} + +// Stage 1 +// {{{ + typedef struct packed { + ccu_ace_ar_t ar; + logic [ccuCfg.u.numSubordinates-1:0] sel; + } stage1_fifo_entry_t; + + logic stage1_fifo_valid; + logic stage1_fifo_ready; + stage1_fifo_entry_t stage1_fifo_wdata; + stage1_fifo_entry_t stage1_fifo_rdata; + logic accepts_dirty; + logic accepts_shared; + logic is_clean_or_make; + ccu_snoop_cr_t cr; + logic [ccuCfg.u.numSubordinates-1:0] cd_data_transfer; + logic engine_fork_valid; + logic engine_fork_ready; + logic read_engine_sel; + logic write_engine_sel; + logic cd_engine_sel; + logic cd_engine_forward_to_read; + logic cd_engine_forward_to_write; + logic cd_engine_ack_to_read; + logic cd_engine_valid; + logic cd_engine_ready; + logic cd_engine_resp_shared; + logic cd_engine_resp_dirty; + + assign stage1_fifo_wdata = '{ + ar : ar_i, + sel: ac_sel + }; + + stream_fifo #( + .FALL_THROUGH (1'b0), + .DEPTH (ccuCfg.u.numSnoopTransactions), + .T (stage1_fifo_entry_t) + ) u_stage1_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i (1'b0), + .usage_o (), + .data_i (stage1_fifo_wdata), + .valid_i (stage0_valid), + .ready_o (stage0_ready), + .data_o (stage1_fifo_rdata), + .valid_o (stage1_fifo_valid), + .ready_i (stage1_fifo_ready) + ); + + stream_join_dynamic #( + .N_INP (ccuCfg.u.numSubordinates+1) + ) u_cr_join ( + .inp_valid_i ({cr_fifo_valid, stage1_fifo_valid}), + .inp_ready_o ({cr_fifo_ready, stage1_fifo_ready}), + .sel_i ({stage1_fifo_rdata.sel, 1'b1}), + .oup_valid_o (engine_fork_valid), + .oup_ready_i (engine_fork_ready) + ); + + always_comb begin : cr_comb + cr = '0; + cd_data_transfer = '0; + for (int unsigned s = 0; s < ccuCfg.u.numSubordinates; s++) begin + if (stage1_fifo_rdata.sel[s]) begin + cr = cr | cr_fifo_rdata[s]; + cd_data_transfer[s] = cr_fifo_rdata[s].resp.DataTransfer; + end + end + end + + assign accepts_dirty = ace_ar_accepts_dirty( + stage1_fifo_rdata.ar.bar[0], + stage1_fifo_rdata.ar.domain, + stage1_fifo_rdata.ar.snoop + ); + + assign accepts_shared = ace_ar_accepts_shared( + stage1_fifo_rdata.ar.bar[0], + stage1_fifo_rdata.ar.domain, + stage1_fifo_rdata.ar.snoop + ); + + assign is_clean_or_make = ace_ar_is_clean( + stage1_fifo_rdata.ar.bar[0], + stage1_fifo_rdata.ar.domain, + stage1_fifo_rdata.ar.snoop + ) || ace_ar_is_make( + stage1_fifo_rdata.ar.bar[0], + stage1_fifo_rdata.ar.domain, + stage1_fifo_rdata.ar.snoop + ); + + always_comb begin : engine_sel_comb + read_engine_sel = 1'b0; + write_engine_sel = 1'b0; + cd_engine_sel = 1'b0; + cd_engine_forward_to_read = 1'b0; + cd_engine_forward_to_write = 1'b0; + cd_engine_ack_to_read = 1'b0; + + case ({cr.resp.DataTransfer, is_clean_or_make}) + // Forward the request to memory + 2'b00: read_engine_sel = 1'b1; + // Send only the clean R response + 2'b01: begin + cd_engine_sel = 1'b1; + cd_engine_ack_to_read = 1'b1; + end + // At least one snooped manager + // is providing data + default: begin + cd_engine_sel = 1'b1; + cd_engine_forward_to_read = !is_clean_or_make; + cd_engine_ack_to_read = is_clean_or_make; + if (cr.resp.PassDirty && !accepts_dirty) begin + // The initiator cannot accept dirty data, + // thus we need a writeback + cd_engine_forward_to_write = 1'b1; + write_engine_sel = 1'b1; + end + end + endcase + end + + stream_fork_dynamic #( + .N_OUP(3) + ) u_engine_fork ( + .clk_i, + .rst_ni, + .valid_i (engine_fork_valid), + .ready_o (engine_fork_ready), + .sel_i ({read_engine_sel, write_engine_sel, cd_engine_sel}), + .sel_valid_i(1'b1), + .sel_ready_o(), + .valid_o ({read_engine_ar_valid_o, write_engine_aw_valid_o, cd_engine_valid}), + .ready_i ({read_engine_ar_ready_i, write_engine_aw_ready_i, cd_engine_ready}) + ); + + always_comb begin : read_engine_ar_comb + // ACE to AXI conversion can be done via the macro + `AXI_SET_AR_STRUCT(read_engine_ar_o, stage1_fifo_rdata.ar) + end + + always_comb begin : write_engine_aw_comb + // Derive an AW struct to issue writebacks from the + // original AR request + // The additional ID bit is used to uniquely identify + // writeback operations + // TODO: this might be overkill? + write_engine_aw_o.id = {1'b1, stage1_fifo_rdata.ar.id}; + write_engine_aw_o.addr = axi_pkg::aligned_addr(stage1_fifo_rdata.ar.addr, ccuCfg.cachelineByteIndexWidth); + write_engine_aw_o.len = ccuCfg.cachelineAxiTransfers - 1; + write_engine_aw_o.size = ccuCfg.axiDataSize; + write_engine_aw_o.burst = axi_pkg::BURST_WRAP; + write_engine_aw_o.lock = 1'b0; + // Enforce non-bufferable requirements + // This should fix premature B responses + write_engine_aw_o.cache = stage1_fifo_rdata.ar.cache & ~axi_pkg::CACHE_BUFFERABLE; + write_engine_aw_o.prot = stage1_fifo_rdata.ar.prot; + write_engine_aw_o.qos = stage1_fifo_rdata.ar.qos; + write_engine_aw_o.region = stage1_fifo_rdata.ar.region; + write_engine_aw_o.atop = '0; + write_engine_aw_o.user = stage1_fifo_rdata.ar.user; + end + + assign cd_engine_resp_shared = cr.resp.IsShared && accepts_shared; + assign cd_engine_resp_dirty = cr.resp.PassDirty && accepts_dirty; +// }}} + +// CD engine +// {{{ + localparam int unsigned cachelineTransferIndexWidth = ccuCfg.cachelineAxiTransfers > 1 ? + $clog2(ccuCfg.cachelineAxiTransfers) : 1; + + typedef struct packed { + logic [ccuCfg.u.numSubordinates-1:0] sel; + logic forward_to_read; + logic forward_to_write; + logic ack_to_read; + logic resp_shared; + logic resp_dirty; + logic [ccuCfg.axiCcuIdWidth-1:0] ar_id; + logic [ccuCfg.u.axiUserWidth-1:0] ar_user; + logic ar_lock; + } cd_engine_fifo_entry_t; + + logic cd_engine_fifo_valid; + logic cd_engine_fifo_ready; + cd_engine_fifo_entry_t cd_engine_fifo_wdata; + cd_engine_fifo_entry_t cd_engine_fifo_rdata; + + assign cd_engine_fifo_wdata = '{ + sel: cd_data_transfer, + forward_to_read: cd_engine_forward_to_read, + forward_to_write: cd_engine_forward_to_write, + ack_to_read: cd_engine_ack_to_read, + resp_shared: cd_engine_resp_shared, + resp_dirty: cd_engine_resp_dirty, + ar_id: stage1_fifo_rdata.ar.id, + ar_user: stage1_fifo_rdata.ar.user, + ar_lock: stage1_fifo_rdata.ar.lock + }; + + logic cd_global_counter_en; + logic [cachelineTransferIndexWidth-1:0] cd_global_counter_q; + logic cd_fork_valid; + logic cd_fork_ready; + logic r_mux_ack_valid; + logic r_mux_ack_ready; + + logic read_engine_r_last; + rresp_t read_engine_r_resp; + logic [ccuCfg.u.numSubordinates-1:0] cd_up_to_date; + logic [ccuCfg.u.numSubordinates-1:0] cd_valid; + logic [ccuCfg.u.numSubordinates-1:0] cd_ready; + ccu_snoop_cd_t cd; + logic cd_engine_data_valid; + logic cd_engine_data_ready; + logic cd_read_engine_r_valid; + logic cd_read_engine_r_ready; + + stream_fifo #( + .FALL_THROUGH (1'b0), + .DEPTH (ccuCfg.u.numSnoopTransactions), + .T (cd_engine_fifo_entry_t) + ) u_cd_engine_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i (1'b0), + .usage_o (), + .data_i (cd_engine_fifo_wdata), + .valid_i (cd_engine_valid), + .ready_o (cd_engine_ready), + .data_o (cd_engine_fifo_rdata), + .valid_o (cd_engine_fifo_valid), + .ready_i (cd_engine_fifo_ready) + ); + + stream_fork_dynamic #( + .N_OUP (2) + ) u_cd_engine_fork ( + .clk_i, + .rst_ni, + .valid_i (cd_engine_fifo_valid), + .ready_o (cd_engine_fifo_ready), + .sel_i ({cd_engine_fifo_rdata.ack_to_read, |cd_engine_fifo_rdata.sel}), + .sel_valid_i (1'b1), + .sel_ready_o (), + .valid_o ({r_mux_ack_valid, cd_fork_valid}), + .ready_i ({r_mux_ack_ready, cd_fork_ready} & {1'b1, cd.last}) + ); + + assign cd_valid = cd_fifo_valid & cd_engine_fifo_rdata.sel; + assign cd_fifo_ready = cd_ready & cd_engine_fifo_rdata.sel; + + assign cd_engine_data_valid = |(cd_valid & cd_up_to_date); + assign cd_global_counter_en = cd_engine_data_valid && cd_engine_data_ready; + + counter #( + .WIDTH (cachelineTransferIndexWidth) + ) u_cd_global_counter ( + .clk_i, + .rst_ni, + .clear_i (1'b0), + .en_i (cd_global_counter_en), + .load_i (1'b0), + .down_i (1'b0), + .d_i ('0), + .q_o (cd_global_counter_q), + .overflow_o () + ); + + for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_cd_local_counter + + logic cd_local_counter_en; + logic [cachelineTransferIndexWidth-1:0] cd_local_counter_q; + + counter #( + .WIDTH (cachelineTransferIndexWidth) + ) u_cd_local_counter ( + .clk_i, + .rst_ni, + .clear_i (1'b0), + .en_i (cd_local_counter_en), + .load_i (1'b0), + .down_i (1'b0), + .d_i ('0), + .q_o (cd_local_counter_q), + .overflow_o () + ); + assign cd_local_counter_en = cd_valid[s] && cd_ready[s]; + assign cd_up_to_date[s] = cd_local_counter_q == cd_global_counter_q; + assign cd_ready[s] = !cd_up_to_date[s] || cd_engine_data_ready; + end + + always_comb begin : cd_mux_comb + cd = cd_fifo_rdata[0]; + for (int s = 0; s < ccuCfg.u.numSubordinates; s++) begin + if (cd_valid[s] && cd_up_to_date[s]) begin + cd = cd_fifo_rdata[s]; + break; + end + end + end + + stream_fork_dynamic #( + .N_OUP (2) + ) u_cd_data_fork ( + .clk_i, + .rst_ni, + .valid_i (cd_engine_data_valid), + .ready_o (cd_engine_data_ready), + .sel_i ({cd_engine_fifo_rdata.forward_to_read, cd_engine_fifo_rdata.forward_to_write}), + .sel_valid_i (cd_fork_valid), + .sel_ready_o (cd_fork_ready), + .valid_o ({cd_read_engine_r_valid, write_engine_w_valid_o}), + .ready_i ({cd_read_engine_r_ready, write_engine_w_ready_i}) + ); + + always_comb begin : rresp_comb + read_engine_r_resp[RESP_IS_DIRTY] = cd_engine_fifo_rdata.resp_dirty; + read_engine_r_resp[RESP_IS_SHARED] = cd_engine_fifo_rdata.resp_shared; + if (cd_engine_fifo_rdata.ar_lock) + read_engine_r_resp[axi_pkg::RespWidth-1:0] = axi_pkg::RESP_EXOKAY; + else + read_engine_r_resp[axi_pkg::RespWidth-1:0] = axi_pkg::RESP_OKAY; + end + + // Only the `r_last` field has to be multiplexed + stream_mux #( + .N_INP (2), + .DATA_T(logic) + ) u_r_mux ( + .inp_data_i ({1'b1, cd.last}), + .inp_valid_i ({r_mux_ack_valid, cd_read_engine_r_valid}), + .inp_ready_o ({r_mux_ack_ready, cd_read_engine_r_ready}), + .inp_sel_i (cd_engine_fifo_rdata.ack_to_read), + .oup_data_o (read_engine_r_last), + .oup_valid_o (read_engine_r_valid_o), + .oup_ready_i (read_engine_r_ready_i) + ); + + assign read_engine_r_o = '{ + id: cd_engine_fifo_rdata.ar_id, + data: cd.data, + resp: read_engine_r_resp, + last: read_engine_r_last, + user: cd_engine_fifo_rdata.ar_user + }; + + assign write_engine_w_o = '{ + data: cd.data, + strb: '1, + last: cd.last, + user: cd_engine_fifo_rdata.ar_user + }; +// }}} + +// Performance events +// {{{ + ccu_snoop_pipeline_events_t events_d; + + always_comb begin : perf_events_comb + events_d = '0; + // Transaction occurrence + if (stage1_fifo_valid && stage1_fifo_ready) begin + events_d.stage1_read_no_snoop = + ace_is_read_no_snoop(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_read_once = + ace_is_read_once(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_read_shared = + ace_is_read_shared(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_read_clean = + ace_is_read_clean(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_read_not_shared_dirty = + ace_is_read_not_shared_dirty(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_read_unique = + ace_is_read_unique(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_clean_unique = + ace_is_clean_unique(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_make_unique = + ace_is_make_unique(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_clean_shared = + ace_is_clean_shared(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_clean_invalid = + ace_is_clean_invalid(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + events_d.stage1_make_invalid = + ace_is_make_invalid(stage1_fifo_rdata.ar.bar[0], stage1_fifo_rdata.ar.domain, stage1_fifo_rdata.ar.snoop); + end + // Stage 0 stalls + if (ar_valid_i && !ar_ready_o) begin + events_d.stage0_stall_scoreboard_hit = scoreboard_alloc_hit_i; + events_d.stage0_stall_scoreboard_full = scoreboard_full_i; + events_d.stage0_stall_ac_fifo_full = ac_valid && !ac_ready; + events_d.stage0_stall_stage1_fifo_full = stage0_valid && !stage0_ready; + // Catch all event + events_d.stage0_stall_other = ~|{ + events_d.stage0_stall_scoreboard_hit, + events_d.stage0_stall_scoreboard_full, + events_d.stage0_stall_ac_fifo_full, + events_d.stage0_stall_stage1_fifo_full + }; + end + // Stage 1 stalls + if (stage1_fifo_valid && !stage1_fifo_ready) begin + events_d.stage1_stall_cr_not_valid = stage1_fifo_valid && |(~cr_fifo_valid & stage1_fifo_rdata.sel); + events_d.stage1_stall_write_engine_busy = write_engine_aw_valid_o && !write_engine_aw_ready_i; + events_d.stage1_stall_read_engine_busy = read_engine_ar_valid_o && !read_engine_ar_ready_i; + events_d.stage1_stall_cd_engine_busy = cd_engine_valid && !cd_engine_ready; + // Catch all event + events_d.stage1_stall_other = ~|{ + events_d.stage1_stall_cr_not_valid, + events_d.stage1_stall_write_engine_busy, + events_d.stage1_stall_read_engine_busy, + events_d.stage1_stall_cd_engine_busy + }; + end + + if (stage1_fifo_valid && stage1_fifo_ready && |stage1_fifo_rdata.sel && !is_clean_or_make) begin + events_d.snoop_hit = cr.resp.DataTransfer; + events_d.snoop_miss = !cr.resp.DataTransfer; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) events_o <= '0; + else events_o <= events_d; + end +// }}} +endmodule diff --git a/src/ccu/ccu_top.sv b/src/ccu/ccu_top.sv new file mode 100644 index 0000000..ffe8c51 --- /dev/null +++ b/src/ccu/ccu_top.sv @@ -0,0 +1,486 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +`include "axi/typedef.svh" +`include "ace/typedef.svh" +`include "axi/assign.svh" +`include "ace/assign.svh" +`include "apb/typedef.svh" + +module ccu_top + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + parameter type domain_map_t = logic, + parameter type ccu_ace_subordinate_ar_t = logic, + parameter type ccu_ace_subordinate_aw_t = logic, + parameter type ccu_w_t = logic, + parameter type ccu_ace_subordinate_r_t = logic, + parameter type ccu_ace_subordinate_b_t = logic, + parameter type ccu_ace_subordinate_req_t = logic, + parameter type ccu_ace_subordinate_resp_t = logic, + parameter type ccu_axi_manager_ar_t = logic, + parameter type ccu_axi_manager_aw_t = logic, + parameter type ccu_axi_manager_r_t = logic, + parameter type ccu_axi_manager_b_t = logic, + parameter type ccu_axi_manager_req_t = logic, + parameter type ccu_axi_manager_resp_t = logic, + parameter type ccu_snoop_ac_t = logic, + parameter type ccu_snoop_cr_t = logic, + parameter type ccu_snoop_cd_t = logic, + parameter type ccu_snoop_req_t = logic, + parameter type ccu_snoop_resp_t = logic, + parameter type mmio_req_t = logic, + parameter type mmio_resp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + input domain_map_t [ccuCfg.u.numSubordinates-1:0] domain_map_i, + input ccu_ace_subordinate_req_t [ccuCfg.u.numSubordinates-1:0] subordinate_req_i, + output ccu_ace_subordinate_resp_t [ccuCfg.u.numSubordinates-1:0] subordinate_resp_o, + input logic [ccuCfg.u.numSubordinates-1:0] subordinate_rack_i, + input logic [ccuCfg.u.numSubordinates-1:0] subordinate_wack_i, + output ccu_snoop_req_t [ccuCfg.u.numSubordinates-1:0] snoop_req_o, + input ccu_snoop_resp_t [ccuCfg.u.numSubordinates-1:0] snoop_resp_i, + output ccu_axi_manager_req_t manager_req_o, + input ccu_axi_manager_resp_t manager_resp_i, + + input mmio_req_t mmio_subordinate_req_i, + output mmio_resp_t mmio_subordinate_resp_o +); + +// AXI/ACE typedefs +// {{{ + typedef logic [ccuCfg.axiCcuIdWidth-1:0] ccu_id_t; + typedef logic [ccuCfg.u.axiAddressWidth-1:0] ccu_address_t; + typedef logic [ccuCfg.u.axiDataWidth-1:0] ccu_data_t; + typedef logic [ccuCfg.u.axiDataWidth/8-1:0] ccu_strb_t; + typedef logic [ccuCfg.u.axiUserWidth-1:0] ccu_user_t; + + `ACE_TYPEDEF_AW_CHAN_T(ccu_ace_aw_t, ccu_address_t, ccu_id_t, ccu_user_t) + `AXI_TYPEDEF_B_CHAN_T(ccu_ace_b_t, ccu_id_t, ccu_user_t) + `ACE_TYPEDEF_AR_CHAN_T(ccu_ace_ar_t, ccu_address_t, ccu_id_t, ccu_user_t) + `ACE_TYPEDEF_R_CHAN_T(ccu_ace_r_t, ccu_data_t, ccu_id_t, ccu_user_t) + `ACE_TYPEDEF_REQ_T(ccu_ace_req_t, ccu_ace_aw_t, ccu_w_t, ccu_ace_ar_t) + `ACE_TYPEDEF_RESP_T(ccu_ace_resp_t, ccu_ace_b_t, ccu_ace_r_t) +// }}} + +localparam int unsigned scoreboardEntryIndexWidth = ccuCfg.transactionIndexWidth; +localparam int unsigned numScoreboardEntries = ccuCfg.u.numShareableTransactions; + +logic scoreboard_full; +logic scoreboard_alloc_check; +logic scoreboard_alloc; +logic scoreboard_alloc_hit; +logic [scoreboardEntryIndexWidth-1:0] scoreboard_alloc_hit_entry; +logic [scoreboardEntryIndexWidth-1:0] scoreboard_alloc_entry; +logic scoreboard_dealloc_check; +logic [ccuCfg.axiCcuIdWidth-1:0] scoreboard_dealloc_id; +logic scoreboard_dealloc_hit; +logic [scoreboardEntryIndexWidth-1:0] scoreboard_dealloc_hit_entry; +logic [ccuCfg.u.numSubordinates-1:0] scoreboard_dealloc; +logic [ccuCfg.u.numSubordinates-1:0][scoreboardEntryIndexWidth-1:0] scoreboard_dealloc_entry; +logic [numScoreboardEntries-1:0] scoreboard_dealloc_bitvector; + +logic [ccuCfg.u.numSubordinates-1:0] snoop_ac_valid; +logic [ccuCfg.u.numSubordinates-1:0] snoop_ac_ready; +ccu_snoop_ac_t [ccuCfg.u.numSubordinates-1:0] snoop_ac; +logic [ccuCfg.u.numSubordinates-1:0] snoop_cr_valid; +logic [ccuCfg.u.numSubordinates-1:0] snoop_cr_ready; +ccu_snoop_cr_t [ccuCfg.u.numSubordinates-1:0] snoop_cr; +logic [ccuCfg.u.numSubordinates-1:0] snoop_cd_valid; +logic [ccuCfg.u.numSubordinates-1:0] snoop_cd_ready; +ccu_snoop_cd_t [ccuCfg.u.numSubordinates-1:0] snoop_cd; + +logic snoop_write_engine_aw_valid; +logic snoop_write_engine_aw_ready; +ccu_axi_manager_aw_t snoop_write_engine_aw; +logic snoop_write_engine_w_valid; +logic snoop_write_engine_w_ready; +ccu_w_t snoop_write_engine_w; +logic snoop_read_engine_ar_valid; +logic snoop_read_engine_ar_ready; +ccu_axi_manager_ar_t snoop_read_engine_ar; +logic snoop_read_engine_r_valid; +logic snoop_read_engine_r_ready; +ccu_ace_r_t snoop_read_engine_r; + + +logic read_engine_addr_check; +logic read_engine_addr_hit; +logic [ccuCfg.addressCheckWidth-1:0] read_engine_addr_slice; + +logic replay_alloc; +logic ar_valid; +logic ar_ready; +ccu_ace_ar_t ar; + +ccu_axi_manager_req_t manager_cut_req; +ccu_axi_manager_resp_t manager_cut_resp; + +ccu_snoop_pipeline_events_t perf_events; + +// Frontend +// {{{ + // The frontend acts as the Point of Serialization (PoS) + ccu_ace_req_t frontend_req; + ccu_ace_resp_t frontend_resp; + + ccu_frontend #( + .ccuCfg (ccuCfg), + .ccu_ace_manager_ar_t (ccu_ace_ar_t), + .ccu_ace_manager_aw_t (ccu_ace_aw_t), + .ccu_w_t (ccu_w_t), + .ccu_ace_manager_r_t (ccu_ace_r_t), + .ccu_ace_manager_b_t (ccu_ace_b_t), + .ccu_ace_manager_req_t (ccu_ace_req_t), + .ccu_ace_manager_resp_t (ccu_ace_resp_t), + .ccu_ace_subordinate_ar_t (ccu_ace_subordinate_ar_t), + .ccu_ace_subordinate_aw_t (ccu_ace_subordinate_aw_t), + .ccu_ace_subordinate_r_t (ccu_ace_subordinate_r_t), + .ccu_ace_subordinate_b_t (ccu_ace_subordinate_b_t), + .ccu_ace_subordinate_req_t (ccu_ace_subordinate_req_t), + .ccu_ace_subordinate_resp_t (ccu_ace_subordinate_resp_t) + ) u_ccu_frontend ( + .clk_i, + .rst_ni, + .subordinate_req_i (subordinate_req_i), + .subordinate_resp_o (subordinate_resp_o), + .subordinate_rack_i (subordinate_rack_i), + .subordinate_wack_i (subordinate_wack_i), + .manager_req_o (frontend_req), + .manager_resp_i (frontend_resp), + .scoreboard_dealloc_check_o (scoreboard_dealloc_check), + .scoreboard_dealloc_id_o (scoreboard_dealloc_id), + .scoreboard_dealloc_hit_i (scoreboard_dealloc_hit), + .scoreboard_dealloc_entry_i (scoreboard_dealloc_hit_entry), + .scoreboard_dealloc_o (scoreboard_dealloc), + .scoreboard_dealloc_entry_o (scoreboard_dealloc_entry) + ); +// }}} + +// Replay list + if (ccuCfg.u.enableReplay) begin : gen_replay + logic replay_ar_valid; + logic replay_ar_ready; + ccu_ace_ar_t replay_ar; + logic replay_full; + logic frontend_ar_valid; + logic frontend_ar_ready; + logic frontend_ar_is_read_no_snoop; + + ccu_replay #( + .ccuCfg (ccuCfg), + .ccu_ace_ar_t (ccu_ace_ar_t) + ) u_ccu_replay ( + .clk_i, + .rst_ni, + .alloc_i (replay_alloc), + .alloc_ar_i (frontend_req.ar), + .alloc_scoreboard_entry_i (scoreboard_alloc_hit_entry), + .replay_scoreboard_entry_i (scoreboard_alloc_entry), + .replay_ar_o (replay_ar), + .replay_ar_valid_o (replay_ar_valid), + .replay_ar_ready_i (replay_ar_ready), + .scoreboard_dealloc_i (scoreboard_dealloc_bitvector), + .full_o (replay_full) + ); + + // Fixed priority arbitration gives precedence + // to replayable requests + // Shareable frontend requests are stalled once the + // replay list is full + assign frontend_ar_is_read_no_snoop = ace_is_read_no_snoop( + frontend_req.ar.bar[0], + frontend_req.ar.domain, + frontend_req.ar.snoop + ); + + assign frontend_ar_valid = (!replay_full || frontend_ar_is_read_no_snoop) && frontend_req.ar_valid; + assign frontend_resp.ar_ready = (!replay_full || frontend_ar_is_read_no_snoop) && frontend_ar_ready; + + rr_arb_tree #( + .NumIn (2), + .DataType (ccu_ace_ar_t), + .ExtPrio (1'b1), + .AxiVldRdy (1'b1), + .LockIn (1'b0), + .FairArb (1'b1) + ) u_ccu_replay_arbiter ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .rr_i ('1), + .req_i ({replay_ar_valid, frontend_ar_valid}), + .gnt_o ({replay_ar_ready, frontend_ar_ready}), + .data_i ({replay_ar , frontend_req.ar}), + .req_o (ar_valid), + .gnt_i (ar_ready), + .data_o (ar), + .idx_o (replay) + ); + end else begin : gen_no_replay + assign replay = 1'b0; + assign ar_valid = frontend_req.ar_valid; + assign frontend_resp.ar_ready = ar_ready; + assign ar = frontend_req.ar; + end +// }}} + +// AR-related snoop pipeline +// {{{ + ccu_snoop_pipeline #( + .ccuCfg (ccuCfg), + .ccu_ace_ar_t (ccu_ace_ar_t), + .ccu_ace_r_t (ccu_ace_r_t), + .ccu_snoop_ac_t (ccu_snoop_ac_t), + .ccu_snoop_cr_t (ccu_snoop_cr_t), + .ccu_snoop_cd_t (ccu_snoop_cd_t), + .ccu_w_t (ccu_w_t), + .ccu_axi_ar_t (ccu_axi_manager_ar_t), + .ccu_axi_aw_t (ccu_axi_manager_aw_t), + .domain_map_t (domain_map_t) + ) u_ccu_snoop_pipeline ( + .clk_i, + .rst_ni, + .domain_map_i (domain_map_i), + .ar_i (ar), + .ar_valid_i (ar_valid), + .ar_ready_o (ar_ready), + .scoreboard_alloc_check_o (scoreboard_alloc_check), + .scoreboard_alloc_o (scoreboard_alloc), + .scoreboard_alloc_hit_i (scoreboard_alloc_hit), + .scoreboard_full_i (scoreboard_full), + .replay_alloc_o (replay_alloc), + .ac_valid_o (snoop_ac_valid), + .ac_ready_i (snoop_ac_ready), + .ac_o (snoop_ac), + .cr_valid_i (snoop_cr_valid), + .cr_ready_o (snoop_cr_ready), + .cr_i (snoop_cr), + .cd_valid_i (snoop_cd_valid), + .cd_ready_o (snoop_cd_ready), + .cd_i (snoop_cd), + .write_engine_aw_valid_o (snoop_write_engine_aw_valid), + .write_engine_aw_ready_i (snoop_write_engine_aw_ready), + .write_engine_aw_o (snoop_write_engine_aw), + .write_engine_w_valid_o (snoop_write_engine_w_valid), + .write_engine_w_ready_i (snoop_write_engine_w_ready), + .write_engine_w_o (snoop_write_engine_w), + .read_engine_ar_valid_o (snoop_read_engine_ar_valid), + .read_engine_ar_ready_i (snoop_read_engine_ar_ready), + .read_engine_ar_o (snoop_read_engine_ar), + .read_engine_r_valid_o (snoop_read_engine_r_valid), + .read_engine_r_ready_i (snoop_read_engine_r_ready), + .read_engine_r_o (snoop_read_engine_r), + .events_o (perf_events) + ); + + for (genvar s = 0; s < ccuCfg.u.numSubordinates; s++) begin : gen_snoop_assignments + `SNOOP_ASSIGN_AC_STRUCT(snoop_req_o[s].ac, snoop_ac[s]) + assign snoop_req_o[s].ac_valid = snoop_ac_valid[s]; + assign snoop_ac_ready[s] = snoop_resp_i[s].ac_ready; + `SNOOP_ASSIGN_CR_STRUCT(snoop_cr[s], snoop_resp_i[s].cr) + assign snoop_cr_valid[s] = snoop_resp_i[s].cr_valid; + assign snoop_req_o[s].cr_ready = snoop_cr_ready[s]; + `SNOOP_ASSIGN_CD_STRUCT(snoop_cd[s], snoop_resp_i[s].cd) + assign snoop_cd_valid[s] = snoop_resp_i[s].cd_valid; + assign snoop_req_o[s].cd_ready = snoop_cd_ready[s]; + end +// }}} + +// Scoreboard +// {{{ + ccu_scoreboard #( + .ccuCfg (ccuCfg) + ) u_ccu_scoreboard ( + .clk_i, + .rst_ni, + .full_o (scoreboard_full), + .alloc_check_i (scoreboard_alloc_check), + .alloc_i (scoreboard_alloc), + .alloc_addr_i (ar.addr), + .alloc_id_i (ar.id), + .alloc_hit_o (scoreboard_alloc_hit), + .alloc_hit_entry_o (scoreboard_alloc_hit_entry), + .alloc_entry_o (scoreboard_alloc_entry), + .dealloc_check_i (scoreboard_dealloc_check), + .dealloc_id_i (scoreboard_dealloc_id), + .dealloc_hit_o (scoreboard_dealloc_hit), + .dealloc_hit_entry_o (scoreboard_dealloc_hit_entry), + .dealloc_i (scoreboard_dealloc), + .dealloc_entry_i (scoreboard_dealloc_entry), + .dealloc_o (scoreboard_dealloc_bitvector) + ); +// }}} + +// Write engine +// {{{ + /* + NOTE: AW/W/B pipelining happens in the frontend + by adding spill registers on these channels + */ + + // Intermediate signals to zero-pad the ID + // and drop the coherence fields + ccu_axi_manager_aw_t frontend_req_aw; + ccu_axi_manager_b_t frontend_resp_b; + + `AXI_ASSIGN_AW_STRUCT(frontend_req_aw, frontend_req.aw) + `AXI_ASSIGN_B_STRUCT(frontend_resp.b, frontend_resp_b) + + ccu_write_engine #( + .ccuCfg (ccuCfg), + .ccu_axi_aw_t (ccu_axi_manager_aw_t), + .ccu_w_t (ccu_w_t), + .ccu_axi_b_t (ccu_axi_manager_b_t) + ) u_ccu_write_engine ( + .clk_i, + .rst_ni, + .aw_valid_i (frontend_req.aw_valid), + .aw_ready_o (frontend_resp.aw_ready), + .aw_i (frontend_req_aw), + .w_valid_i (frontend_req.w_valid), + .w_ready_o (frontend_resp.w_ready), + .w_i (frontend_req.w), + .b_valid_o (frontend_resp.b_valid), + .b_ready_i (frontend_req.b_ready), + .b_o (frontend_resp_b), + .writeback_aw_valid_i (snoop_write_engine_aw_valid), + .writeback_aw_ready_o (snoop_write_engine_aw_ready), + .writeback_aw_i (snoop_write_engine_aw), + .writeback_w_valid_i (snoop_write_engine_w_valid), + .writeback_w_ready_o (snoop_write_engine_w_ready), + .writeback_w_i (snoop_write_engine_w), + .aw_valid_o (manager_cut_req.aw_valid), + .aw_ready_i (manager_cut_resp.aw_ready), + .aw_o (manager_cut_req.aw), + .w_valid_o (manager_cut_req.w_valid), + .w_ready_i (manager_cut_resp.w_ready), + .w_o (manager_cut_req.w), + .b_valid_i (manager_cut_resp.b_valid), + .b_ready_o (manager_cut_req.b_ready), + .b_i (manager_cut_resp.b), + .read_engine_addr_check_i (read_engine_addr_check), + .read_engine_addr_hit_o (read_engine_addr_hit), + .read_engine_addr_slice_i (read_engine_addr_slice) + ); +// }}} + +// Read engine +// {{{ + ccu_read_engine #( + .ccuCfg (ccuCfg), + .ccu_axi_ar_t (ccu_axi_manager_ar_t), + .ccu_ace_r_t (ccu_ace_r_t), + .ccu_axi_r_t (ccu_axi_manager_r_t) + ) u_ccu_read_engine ( + .clk_i, + .rst_ni, + .ar_valid_i (snoop_read_engine_ar_valid), + .ar_ready_o (snoop_read_engine_ar_ready), + .ar_i (snoop_read_engine_ar), + .ar_addr_check_o (read_engine_addr_check), + .ar_addr_hit_i (read_engine_addr_hit), + .ar_addr_slice_o (read_engine_addr_slice), + .snoop_pipeline_r_valid_i (snoop_read_engine_r_valid), + .snoop_pipeline_r_ready_o (snoop_read_engine_r_ready), + .snoop_pipeline_r_i (snoop_read_engine_r), + .r_valid_o (frontend_resp.r_valid), + .r_ready_i (frontend_req.r_ready), + .r_o (frontend_resp.r), + .ar_valid_o (manager_cut_req.ar_valid), + .ar_ready_i (manager_cut_resp.ar_ready), + .ar_o (manager_cut_req.ar), + .r_valid_i (manager_cut_resp.r_valid), + .r_ready_o (manager_cut_req.r_ready), + .r_i (manager_cut_resp.r) + ); +// }}} + +// AXI cut +// {{{ + axi_cut #( + .Bypass (1'b0), + .aw_chan_t (ccu_axi_manager_aw_t), + .w_chan_t (ccu_w_t), + .b_chan_t (ccu_axi_manager_b_t), + .ar_chan_t (ccu_axi_manager_ar_t), + .r_chan_t (ccu_axi_manager_r_t), + .axi_req_t (ccu_axi_manager_req_t), + .axi_resp_t (ccu_axi_manager_resp_t) + ) u_ccu_axi_manager_cut ( + .clk_i, + .rst_ni, + .slv_req_i (manager_cut_req), + .slv_resp_o (manager_cut_resp), + .mst_req_o (manager_req_o), + .mst_resp_i (manager_resp_i) + ); +// }}} + +// Control and status registers +// {{{ + typedef logic [$bits(mmio_subordinate_req_i.addr)-1:0] addr_t; + typedef logic [31:0] data_t; + typedef logic [3:0] strb_t; + + `APB_TYPEDEF_REQ_T(apb_req_t, addr_t, data_t, strb_t) + `APB_TYPEDEF_RESP_T(apb_resp_t, data_t) + + apb_req_t apb_req; + apb_resp_t apb_resp; + + if (ccuCfg.u.mmioIntf == CCU_MMIO_REGBUS) begin : gen_reg_to_apb + reg_to_apb #( + .reg_req_t (mmio_req_t), + .reg_rsp_t (mmio_resp_t), + .apb_req_t (apb_req_t), + .apb_rsp_t (apb_resp_t) + ) u_reg_to_apb ( + .clk_i, + .rst_ni, + .reg_req_i (mmio_subordinate_req_i), + .reg_rsp_o (mmio_subordinate_resp_o), + .apb_req_o (apb_req), + .apb_rsp_i (apb_resp) + ); + end else if (ccuCfg.u.mmioIntf == CCU_MMIO_APB) begin : gen_apb_passthrough + assign apb_req = mmio_subordinate_req_i; + assign mmio_subordinate_resp_o = apb_resp; + end + + if (ccuCfg.u.enableCSRs) begin : gen_csr + ccu_csr_wrap #( + .ccuCfg (ccuCfg), + .apb_req_t (apb_req_t), + .apb_resp_t (apb_resp_t), + .numEvents ($bits(ccu_snoop_pipeline_events_t)) + ) u_ccu_csrs ( + .clk_i, + .rst_ni, + .apb_req_i (apb_req), + .apb_resp_o (apb_resp), + .events_i (perf_events) + ); + end else begin + always_comb begin : apb_sink_tieoff + apb_resp = '0; + apb_resp.pready = 1'b1; + end + end +// }}} +endmodule diff --git a/src/ccu/ccu_write_engine.sv b/src/ccu/ccu_write_engine.sv new file mode 100644 index 0000000..3e773a3 --- /dev/null +++ b/src/ccu/ccu_write_engine.sv @@ -0,0 +1,213 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Riccardo Tedeschi + +`include "axi/assign.svh" + +module ccu_write_engine + import ace_pkg::*; + import ccu_pkg::*; +#( + parameter ccu_config_t ccuCfg = '{default: '0}, + parameter type ccu_axi_aw_t = logic, + parameter type ccu_w_t = logic, + parameter type ccu_axi_b_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + input logic aw_valid_i, + output logic aw_ready_o, + input ccu_axi_aw_t aw_i, + input logic w_valid_i, + output logic w_ready_o, + input ccu_w_t w_i, + output logic b_valid_o, + input logic b_ready_i, + output ccu_axi_b_t b_o, + + input logic writeback_aw_valid_i, + output logic writeback_aw_ready_o, + input ccu_axi_aw_t writeback_aw_i, + input logic writeback_w_valid_i, + output logic writeback_w_ready_o, + input ccu_w_t writeback_w_i, + + output logic aw_valid_o, + input logic aw_ready_i, + output ccu_axi_aw_t aw_o, + output logic w_valid_o, + input logic w_ready_i, + output ccu_w_t w_o, + input logic b_valid_i, + output logic b_ready_o, + input ccu_axi_b_t b_i, + + input logic read_engine_addr_check_i, + output logic read_engine_addr_hit_o, + input logic [ccuCfg.addressCheckWidth-1:0] read_engine_addr_slice_i +); + +// Inflight addresses associative map +// {{{ + logic [ccuCfg.addressCheckWidth-1:0] write_inflight_map_wdata; + logic write_inflight_map_push; + logic write_inflight_map_pop; + logic write_inflight_map_full; + + assign write_inflight_map_wdata = aw_o.addr[ccuCfg.u.addressCheckMsb:ccuCfg.u.addressCheckLsb]; + assign write_inflight_map_push = aw_valid_o && aw_ready_i; + assign write_inflight_map_pop = b_valid_i && b_ready_o; + + id_queue #( + .ID_WIDTH (ccuCfg.axiManagerIdWidth), + .CAPACITY (ccuCfg.u.numWriteTransactions), + .FULL_BW (1'b1), + .CUT_OUP_POP_INP_GNT (1'b1), + .NUM_CMP_PORTS (1), + .data_t (logic [ccuCfg.addressCheckWidth-1:0]) + ) u_write_inflight_map ( + .clk_i, + .rst_ni, + .inp_id_i (aw_o.id), + .inp_data_i (write_inflight_map_wdata), + .inp_req_i (write_inflight_map_push), + .inp_gnt_o (), + .exists_data_i (read_engine_addr_slice_i), + .exists_mask_i ('1), + .exists_req_i (read_engine_addr_check_i), + .exists_o (read_engine_addr_hit_o), + .exists_gnt_o (), + .oup_id_i (b_i.id), + .oup_pop_i (1'b1), + .oup_req_i (write_inflight_map_pop), + .oup_data_o (), + .oup_data_valid_o (), + .oup_gnt_o (), + .full_o (write_inflight_map_full), + .empty_o () + ); +// }}} + +// AW channel +// {{{ + + logic w_ctrl_fifo_valid_in; + logic w_ctrl_fifo_ready_in; + logic aw_is_writeback; + + rr_arb_tree #( + .NumIn (2), + .DataType (ccu_axi_aw_t), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1), + .FairArb (1'b1) + ) u_aw_arbiter ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .rr_i ('0), + .req_i ({writeback_aw_valid_i, aw_valid_i}), + .gnt_o ({writeback_aw_ready_o, aw_ready_o}), + .data_i ({writeback_aw_i , aw_i }), + .req_o (aw_valid), + .gnt_i (aw_ready), + .data_o (aw_o), + .idx_o (aw_is_writeback) + ); + + stream_fork_dynamic #( + .N_OUP(2) + ) u_aw_fork ( + .clk_i, + .rst_ni, + .valid_i (aw_valid), + .ready_o (aw_ready), + .sel_i ('1), + .sel_valid_i (!write_inflight_map_full), + .sel_ready_o (), + .valid_o ({aw_valid_o, w_ctrl_fifo_valid_in}), + .ready_i ({aw_ready_i, w_ctrl_fifo_ready_in}) + ); +// }}} + +// W muxing +// {{{ + logic w_ctrl_fifo_valid_out; + logic w_ctrl_fifo_ready_out; + logic w_is_writeback; + logic w_mux_valid_out; + logic w_mux_ready_out; + + stream_fifo #( + .FALL_THROUGH(1'b1), + .DATA_WIDTH (1), + .DEPTH (2) + ) u_w_ctrl_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (), + .data_i (aw_is_writeback), + .valid_i (w_ctrl_fifo_valid_in), + .ready_o (w_ctrl_fifo_ready_in), + .data_o (w_is_writeback), + .valid_o (w_ctrl_fifo_valid_out), + .ready_i (w_ctrl_fifo_ready_out && w_o.last) + ); + + stream_mux #( + .DATA_T(ccu_w_t), + .N_INP (2) + ) u_w_mux ( + .inp_data_i ({writeback_w_i , w_i}), + .inp_valid_i({writeback_w_valid_i, w_valid_i}), + .inp_ready_o({writeback_w_ready_o, w_ready_o}), + .inp_sel_i (w_is_writeback), + .oup_data_o (w_o), + .oup_valid_o(w_mux_valid_out), + .oup_ready_i(w_mux_ready_out) + ); + + stream_join #( + .N_INP(2) + ) u_w_join ( + .inp_valid_i({w_ctrl_fifo_valid_out, w_mux_valid_out}), + .inp_ready_o({w_ctrl_fifo_ready_out, w_mux_ready_out}), + .oup_valid_o(w_valid_o), + .oup_ready_i(w_ready_i) + ); +// }}} + +// B channel filtering +// {{{ + logic b_is_write_back; + + // The additional ID bit is used to uniquely identify + // writeback operations + // TODO: this might be overkill? + assign b_is_write_back = b_i.id[ccuCfg.axiCcuIdWidth]; + + stream_filter u_b_filter ( + .valid_i(b_valid_i), + .ready_o(b_ready_o), + .drop_i (b_is_write_back), + .valid_o(b_valid_o), + .ready_i(b_ready_i) + ); + + `AXI_ASSIGN_B_STRUCT(b_o, b_i) +// }}} +endmodule diff --git a/src/ccu/regs/ccu_csr.rdl b/src/ccu/regs/ccu_csr.rdl new file mode 100644 index 0000000..592c363 --- /dev/null +++ b/src/ccu/regs/ccu_csr.rdl @@ -0,0 +1,57 @@ +addrmap ccu_csr #( + longint unsigned numPerfCounters = 32 +) { + name = "CCU control and status registers"; + + reg perf_eventsel_r { + name = "Performance Event Selector"; + desc = "Selects the microarchitectural event to monitor."; + regwidth = 32; + + field { + name = "Event Code"; + desc = "Hardware event ID."; + sw = rw; + hw = r; + fieldwidth = 8; + } event_id = 0; + }; + + reg perf_counter_r { + name = "Performance Counter"; + desc = "32-bit counter value incremented by the hardware."; + regwidth = 32; + + field { + name = "Count"; + sw = rw; + hw = r; + counter; + fieldwidth = 32; + } val = 0; + }; + + reg perf_countinhibit_r { + name = "Counter Inhibit"; + desc = "Bit [i] = 1 means counter [i] is STOPPED. Bit [i] = 0 lets it run."; + regwidth = 32; + + field { + name = "Inhibit Bitmask"; + sw = rw; + hw = r; + fieldwidth = numPerfCounters; + } inh = (1 << numPerfCounters) - 1; + }; + + // --- Memory Map Instantiation --- + + // Global Control @ 0x00 + perf_countinhibit_r perf_countinhibit @ 0x00; + + // Array of Event Selectors @ 0x40 + perf_eventsel_r perf_eventsel[numPerfCounters] @ 0x40; + + // Array of 32-bit Counters @ 0xC0 + perf_counter_r perf_counter[numPerfCounters] @ 0xC0; +}; diff --git a/src/ccu/regs/generated/ccu_csr.sv b/src/ccu/regs/generated/ccu_csr.sv new file mode 100644 index 0000000..577beaa --- /dev/null +++ b/src/ccu/regs/generated/ccu_csr.sv @@ -0,0 +1,293 @@ +// Generated by PeakRDL-regblock - A free and open-source SystemVerilog generator +// https://github.com/SystemRDL/PeakRDL-regblock + +module ccu_csr ( + input wire clk, + input wire arst_n, + + input wire s_apb_psel, + input wire s_apb_penable, + input wire s_apb_pwrite, + input wire [2:0] s_apb_pprot, + input wire [8:0] s_apb_paddr, + input wire [31:0] s_apb_pwdata, + input wire [3:0] s_apb_pstrb, + output logic s_apb_pready, + output logic [31:0] s_apb_prdata, + output logic s_apb_pslverr, + + input ccu_csr_pkg::ccu_csr__in_t hwif_in, + output ccu_csr_pkg::ccu_csr__out_t hwif_out + ); + + //-------------------------------------------------------------------------- + // CPU Bus interface logic + //-------------------------------------------------------------------------- + logic cpuif_req; + logic cpuif_req_is_wr; + logic [8:0] cpuif_addr; + logic [31:0] cpuif_wr_data; + logic [31:0] cpuif_wr_biten; + logic cpuif_req_stall_wr; + logic cpuif_req_stall_rd; + + logic cpuif_rd_ack; + logic cpuif_rd_err; + logic [31:0] cpuif_rd_data; + + logic cpuif_wr_ack; + logic cpuif_wr_err; + + // Request + logic is_active; + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + is_active <= '0; + cpuif_req <= '0; + cpuif_req_is_wr <= '0; + cpuif_addr <= '0; + cpuif_wr_data <= '0; + cpuif_wr_biten <= '0; + end else begin + if(~is_active) begin + if(s_apb_psel) begin + is_active <= '1; + cpuif_req <= '1; + cpuif_req_is_wr <= s_apb_pwrite; + cpuif_addr <= {s_apb_paddr[8:2], 2'b0}; + cpuif_wr_data <= s_apb_pwdata; + for(int i=0; i<4; i++) begin + cpuif_wr_biten[i*8 +: 8] <= {8{s_apb_pstrb[i]}}; + end + end + end else begin + cpuif_req <= '0; + if(cpuif_rd_ack || cpuif_wr_ack) begin + is_active <= '0; + end + end + end + end + + // Response + assign s_apb_pready = cpuif_rd_ack | cpuif_wr_ack; + assign s_apb_prdata = cpuif_rd_data; + assign s_apb_pslverr = cpuif_rd_err | cpuif_wr_err; + + logic cpuif_req_masked; + + // Read & write latencies are balanced. Stalls not required + assign cpuif_req_stall_rd = '0; + assign cpuif_req_stall_wr = '0; + assign cpuif_req_masked = cpuif_req + & !(!cpuif_req_is_wr & cpuif_req_stall_rd) + & !(cpuif_req_is_wr & cpuif_req_stall_wr); + + //-------------------------------------------------------------------------- + // Address Decode + //-------------------------------------------------------------------------- + typedef struct { + logic perf_countinhibit; + logic perf_eventsel[32]; + logic perf_counter[32]; + } decoded_reg_strb_t; + decoded_reg_strb_t decoded_reg_strb; + logic decoded_err; + logic [8:0] decoded_addr; + logic decoded_req; + logic decoded_req_is_wr; + logic [31:0] decoded_wr_data; + logic [31:0] decoded_wr_biten; + + always_comb begin + automatic logic is_valid_addr; + automatic logic is_valid_rw; + is_valid_addr = '1; // No valid address check + is_valid_rw = '1; // No valid RW check + decoded_reg_strb.perf_countinhibit = cpuif_req_masked & (cpuif_addr == 9'h0); + for(int i0=0; i0<32; i0++) begin + decoded_reg_strb.perf_eventsel[i0] = cpuif_req_masked & (cpuif_addr == 9'h40 + (9)'(i0) * 9'h4); + end + for(int i0=0; i0<32; i0++) begin + decoded_reg_strb.perf_counter[i0] = cpuif_req_masked & (cpuif_addr == 9'hc0 + (9)'(i0) * 9'h4); + end + decoded_err = '0; + end + + // Pass down signals to next stage + assign decoded_addr = cpuif_addr; + assign decoded_req = cpuif_req_masked; + assign decoded_req_is_wr = cpuif_req_is_wr; + assign decoded_wr_data = cpuif_wr_data; + assign decoded_wr_biten = cpuif_wr_biten; + + //-------------------------------------------------------------------------- + // Field logic + //-------------------------------------------------------------------------- + typedef struct { + struct { + struct { + logic [31:0] next; + logic load_next; + } inh; + } perf_countinhibit; + struct { + struct { + logic [7:0] next; + logic load_next; + } event_id; + } perf_eventsel[32]; + struct { + struct { + logic [31:0] next; + logic load_next; + logic incrthreshold; + logic overflow; + } val; + } perf_counter[32]; + } field_combo_t; + field_combo_t field_combo; + + typedef struct { + struct { + struct { + logic [31:0] value; + } inh; + } perf_countinhibit; + struct { + struct { + logic [7:0] value; + } event_id; + } perf_eventsel[32]; + struct { + struct { + logic [31:0] value; + } val; + } perf_counter[32]; + } field_storage_t; + field_storage_t field_storage; + + // Field: ccu_csr.perf_countinhibit.inh + always_comb begin + automatic logic [31:0] next_c; + automatic logic load_next_c; + next_c = field_storage.perf_countinhibit.inh.value; + load_next_c = '0; + if(decoded_reg_strb.perf_countinhibit && decoded_req_is_wr) begin // SW write + next_c = (field_storage.perf_countinhibit.inh.value & ~decoded_wr_biten[31:0]) | (decoded_wr_data[31:0] & decoded_wr_biten[31:0]); + load_next_c = '1; + end + field_combo.perf_countinhibit.inh.next = next_c; + field_combo.perf_countinhibit.inh.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.perf_countinhibit.inh.value <= 32'hffffffff; + end else begin + if(field_combo.perf_countinhibit.inh.load_next) begin + field_storage.perf_countinhibit.inh.value <= field_combo.perf_countinhibit.inh.next; + end + end + end + assign hwif_out.perf_countinhibit.inh.value = field_storage.perf_countinhibit.inh.value; + for(genvar i0=0; i0<32; i0++) begin + // Field: ccu_csr.perf_eventsel[].event_id + always_comb begin + automatic logic [7:0] next_c; + automatic logic load_next_c; + next_c = field_storage.perf_eventsel[i0].event_id.value; + load_next_c = '0; + if(decoded_reg_strb.perf_eventsel[i0] && decoded_req_is_wr) begin // SW write + next_c = (field_storage.perf_eventsel[i0].event_id.value & ~decoded_wr_biten[7:0]) | (decoded_wr_data[7:0] & decoded_wr_biten[7:0]); + load_next_c = '1; + end + field_combo.perf_eventsel[i0].event_id.next = next_c; + field_combo.perf_eventsel[i0].event_id.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.perf_eventsel[i0].event_id.value <= 8'h0; + end else begin + if(field_combo.perf_eventsel[i0].event_id.load_next) begin + field_storage.perf_eventsel[i0].event_id.value <= field_combo.perf_eventsel[i0].event_id.next; + end + end + end + assign hwif_out.perf_eventsel[i0].event_id.value = field_storage.perf_eventsel[i0].event_id.value; + end + for(genvar i0=0; i0<32; i0++) begin + // Field: ccu_csr.perf_counter[].val + always_comb begin + automatic logic [31:0] next_c; + automatic logic load_next_c; + next_c = field_storage.perf_counter[i0].val.value; + load_next_c = '0; + if(decoded_reg_strb.perf_counter[i0] && decoded_req_is_wr) begin // SW write + next_c = (field_storage.perf_counter[i0].val.value & ~decoded_wr_biten[31:0]) | (decoded_wr_data[31:0] & decoded_wr_biten[31:0]); + load_next_c = '1; + end + if(hwif_in.perf_counter[i0].val.incr) begin // increment + field_combo.perf_counter[i0].val.overflow = (((33)'(next_c) + 32'h1) > 33'hffffffff); + next_c = next_c + 32'h1; + load_next_c = '1; + end else begin + field_combo.perf_counter[i0].val.overflow = '0; + end + field_combo.perf_counter[i0].val.incrthreshold = (field_storage.perf_counter[i0].val.value >= 32'hffffffff); + field_combo.perf_counter[i0].val.next = next_c; + field_combo.perf_counter[i0].val.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.perf_counter[i0].val.value <= 32'h0; + end else begin + if(field_combo.perf_counter[i0].val.load_next) begin + field_storage.perf_counter[i0].val.value <= field_combo.perf_counter[i0].val.next; + end + end + end + assign hwif_out.perf_counter[i0].val.value = field_storage.perf_counter[i0].val.value; + end + + //-------------------------------------------------------------------------- + // Write response + //-------------------------------------------------------------------------- + assign cpuif_wr_ack = decoded_req & decoded_req_is_wr; + // Writes are always granted with no error response + assign cpuif_wr_err = '0; + + //-------------------------------------------------------------------------- + // Readback + //-------------------------------------------------------------------------- + + logic [8:0] rd_mux_addr; + assign rd_mux_addr = decoded_addr; + + logic readback_err; + logic readback_done; + logic [31:0] readback_data; + always_comb begin + automatic logic [31:0] readback_data_var; + readback_data_var = '0; + if(rd_mux_addr == 9'h0) begin + readback_data_var[31:0] = field_storage.perf_countinhibit.inh.value; + end + for(int i0=0; i0<32; i0++) begin + if(rd_mux_addr == 9'h40 + (9)'(i0) * 9'h4) begin + readback_data_var[7:0] = field_storage.perf_eventsel[i0].event_id.value; + end + end + for(int i0=0; i0<32; i0++) begin + if(rd_mux_addr == 9'hc0 + (9)'(i0) * 9'h4) begin + readback_data_var[31:0] = field_storage.perf_counter[i0].val.value; + end + end + readback_data = readback_data_var; + readback_done = decoded_req & ~decoded_req_is_wr; + readback_err = '0; + end + + assign cpuif_rd_ack = readback_done; + assign cpuif_rd_data = readback_data; + assign cpuif_rd_err = readback_err; +endmodule diff --git a/src/ccu/regs/generated/ccu_csr_pkg.sv b/src/ccu/regs/generated/ccu_csr_pkg.sv new file mode 100644 index 0000000..a807f05 --- /dev/null +++ b/src/ccu/regs/generated/ccu_csr_pkg.sv @@ -0,0 +1,52 @@ +// Generated by PeakRDL-regblock - A free and open-source SystemVerilog generator +// https://github.com/SystemRDL/PeakRDL-regblock + +package ccu_csr_pkg; + + localparam CCU_CSR_DATA_WIDTH = 32; + localparam CCU_CSR_MIN_ADDR_WIDTH = 9; + localparam CCU_CSR_SIZE = 'h140; + localparam numPerfCounters = 'h20; + + typedef struct { + logic incr; + } ccu_csr__perf_counter_r__val__in_t; + + typedef struct { + ccu_csr__perf_counter_r__val__in_t val; + } ccu_csr__perf_counter_r__in_t; + + typedef struct { + ccu_csr__perf_counter_r__in_t perf_counter[32]; + } ccu_csr__in_t; + + typedef struct { + logic [31:0] value; + } ccu_csr__perf_countinhibit_r__inh__out_t; + + typedef struct { + ccu_csr__perf_countinhibit_r__inh__out_t inh; + } ccu_csr__perf_countinhibit_r__out_t; + + typedef struct { + logic [7:0] value; + } ccu_csr__perf_eventsel_r__event_id__out_t; + + typedef struct { + ccu_csr__perf_eventsel_r__event_id__out_t event_id; + } ccu_csr__perf_eventsel_r__out_t; + + typedef struct { + logic [31:0] value; + } ccu_csr__perf_counter_r__val__out_t; + + typedef struct { + ccu_csr__perf_counter_r__val__out_t val; + } ccu_csr__perf_counter_r__out_t; + + typedef struct { + ccu_csr__perf_countinhibit_r__out_t perf_countinhibit; + ccu_csr__perf_eventsel_r__out_t perf_eventsel[32]; + ccu_csr__perf_counter_r__out_t perf_counter[32]; + } ccu_csr__out_t; +endpackage diff --git a/src/ccu_fsm.sv b/src/ccu_fsm.sv deleted file mode 100644 index f1cfe87..0000000 --- a/src/ccu_fsm.sv +++ /dev/null @@ -1,705 +0,0 @@ -// Copyright 2022 ETH Zurich and University of Bologna. -// Solderpad Hardware License, Version 0.51, see LICENSE for details. -// SPDX-License-Identifier: SHL-0.51 - -`include "ace/assign.svh" -`include "ace/typedef.svh" - -module ccu_fsm -#( - parameter int unsigned DcacheLineWidth = 0, - parameter int unsigned AxiDataWidth = 0, - parameter int unsigned NoMstPorts = 4, - parameter int unsigned SlvAxiIDWidth = 0, - parameter type mst_req_t = logic, - parameter type mst_resp_t = logic, - parameter type snoop_req_t = logic, - parameter type snoop_resp_t = logic -) ( - //clock and reset - input clk_i, - input rst_ni, - // CCU Request In and response out - input mst_req_t ccu_req_i, - output mst_resp_t ccu_resp_o, - //CCU Request Out and response in - output mst_req_t ccu_req_o, - input mst_resp_t ccu_resp_i, - // Snoop channel resuest and response - output snoop_req_t [NoMstPorts-1:0] s2m_req_o, - input snoop_resp_t [NoMstPorts-1:0] m2s_resp_i -); - - localparam int unsigned DcacheLineWords = DcacheLineWidth / AxiDataWidth; - localparam int unsigned MstIdxBits = $clog2(NoMstPorts); - - enum logic [5:0] { - IDLE, // 0 - DECODE_R, // 1 - SEND_INVALID_R, // 2 - WAIT_INVALID_R, // 3 - SEND_AXI_REQ_WRITE_BACK_R, // 4 - WRITE_BACK_MEM_R, // 5 - SEND_READ, // 6 - WAIT_RESP_R, // 7 - READ_SNP_DATA, // 8 - SEND_AXI_REQ_R, // 9 - READ_MEM, // 10 - DECODE_W, // 11 - SEND_INVALID_W, // 12 - WAIT_INVALID_W, // 13 - SEND_AXI_REQ_WRITE_BACK_W, // 14 - WRITE_BACK_MEM_W, // 15 - SEND_AXI_REQ_W, // 16 - WRITE_MEM // 17 - } state_d, state_q; - - - // snoop resoponse valid - logic [NoMstPorts-1:0] cr_valid; - // snoop channel ac valid - logic [NoMstPorts-1:0] ac_valid; - // snoop channel ac ready - logic [NoMstPorts-1:0] ac_ready; - // snoop channel cd last - logic [NoMstPorts-1:0] cd_last; - // check for availablilty of data - logic [NoMstPorts-1:0] data_available; - // check for response error - logic [NoMstPorts-1:0] response_error; - // check for data received - logic [NoMstPorts-1:0] data_received; - // check for shared in cr_resp - logic [NoMstPorts-1:0] shared; - // check for dirty in cr_resp - logic [NoMstPorts-1:0] dirty; - // request holder - mst_req_t ccu_req_holder; - // response holder - mst_resp_t ccu_resp_holder; - // snoop response holder - snoop_resp_t [NoMstPorts-1:0] m2s_resp_holder; - // initiating master port - logic [NoMstPorts-1:0] initiator_d, initiator_q; - logic [MstIdxBits-1:0] first_responder; - - logic [DcacheLineWords-1:0][AxiDataWidth-1:0] cd_data; - logic [$clog2(DcacheLineWords+1)-1:0] stored_cd_data; - - logic r_last; - logic w_last; - logic r_eot; - logic w_eot; - - typedef struct packed { - logic waiting_w; - logic waiting_r; - } prio_t; - - prio_t prio_d, prio_q; - - // ---------------------- - // Current State Block - // ---------------------- - always_ff @(posedge clk_i, negedge rst_ni) begin : ccu_present_state - if(!rst_ni) begin - state_q <= IDLE; - initiator_q <= '0; - prio_q <= '0; - end else begin - state_q <= state_d; - initiator_q <= initiator_d; - prio_q <= prio_d; - end - end - - // ---------------------- - // Next State Block - // ---------------------- - always_comb begin : ccu_state_ctrl - - state_d = state_q; - initiator_d = initiator_q; - prio_d = prio_q; - - case(state_q) - - IDLE: begin - initiator_d = '0; - prio_d = '0; - // wait for incoming valid request from master - if((ccu_req_i.ar_valid & !ccu_req_i.aw_valid) | - (ccu_req_i.ar_valid & prio_q.waiting_r) | - (ccu_req_i.ar_valid & !prio_q.waiting_w)) begin - state_d = DECODE_R; - initiator_d[ccu_req_i.ar.id[SlvAxiIDWidth+:MstIdxBits]] = 1'b1; - prio_d.waiting_w = ccu_req_i.aw_valid; - end else if((ccu_req_i.aw_valid & !ccu_req_i.ar_valid) | - (ccu_req_i.aw_valid & prio_q.waiting_w)) begin - state_d = DECODE_W; - initiator_d[ccu_req_i.aw.id[SlvAxiIDWidth+:MstIdxBits]] = 1'b1; - prio_d.waiting_r = ccu_req_i.ar_valid; - end else begin - state_d = IDLE; - end - end - - //--------------------- - //---- Read Branch ---- - //--------------------- - DECODE_R: begin - //check read transaction type - if (ccu_req_holder.ar.snoop == snoop_pkg::CLEAN_UNIQUE) begin // check if CleanUnique then send Invalidate - state_d = SEND_INVALID_R; - end else if (ccu_req_holder.ar.lock) begin // AMO LR, invalidate - state_d = SEND_INVALID_R; - end else begin - state_d = SEND_READ; - end - end - - SEND_INVALID_R: begin - // wait for all snoop masters to assert AC ready - if (ac_ready != '1) begin - state_d = SEND_INVALID_R; - end else begin - state_d = WAIT_INVALID_R; - end - end - - WAIT_INVALID_R: begin - // wait for all snoop masters to assert CR valid - if ((cr_valid == '1) && (ccu_req_i.r_ready || ccu_req_holder.ar.lock)) begin - if(|(data_available & ~response_error)) begin - state_d = SEND_AXI_REQ_WRITE_BACK_R; - end else begin - if (ccu_req_holder.ar.lock) begin // AMO LR, read memory - state_d = SEND_AXI_REQ_R; - end else begin - state_d = IDLE; - end - end - end else begin - state_d = WAIT_INVALID_R; - end - end - - SEND_AXI_REQ_WRITE_BACK_R: begin - // wait for responding slave to assert aw_ready - if(ccu_resp_i.aw_ready !='b1) begin - state_d = SEND_AXI_REQ_WRITE_BACK_R; - end else begin - state_d = WRITE_BACK_MEM_R; - end - end - - WRITE_BACK_MEM_R: begin - // wait for responding slave to send b_valid - if((ccu_resp_i.b_valid && ccu_req_o.b_ready)) begin - if (ccu_req_holder.ar.lock) begin // AMO LR, read memory - state_d = SEND_AXI_REQ_R; - end else begin - state_d = IDLE; - end - end else begin - state_d = WRITE_BACK_MEM_R; - end - end - - SEND_READ: begin - // wait for all snoop masters to de-assert AC ready - if (ac_ready != '1) begin - state_d = SEND_READ; - end else begin - state_d = WAIT_RESP_R; - end - end - - WAIT_RESP_R: begin - // wait for all snoop masters to assert CR valid - if (cr_valid != '1) begin - state_d = WAIT_RESP_R; - end else if(|(data_available & ~response_error)) begin - state_d = READ_SNP_DATA; - end else begin - state_d = SEND_AXI_REQ_R; - end - end - - READ_SNP_DATA: begin - if(cd_last == data_available && (r_eot == 1'b1 || (ccu_req_i.r_ready == 1'b1 && r_last == 1'b1))) begin - state_d = IDLE; - end else begin - state_d = READ_SNP_DATA; - end - end - - SEND_AXI_REQ_R: begin - // wait for responding slave to assert ar_ready - if(ccu_resp_i.ar_ready !='b1) begin - state_d = SEND_AXI_REQ_R; - end else begin - state_d = READ_MEM; - end - end - - READ_MEM: begin - // wait for responding slave to assert r_valid - if(ccu_resp_i.r_valid && ccu_req_i.r_ready) begin - if(ccu_resp_i.r.last) begin - state_d = IDLE; - end else begin - state_d = READ_MEM; - end - end else begin - state_d = READ_MEM; - end - end - - - //--------------------- - //---- Write Branch --- - //--------------------- - - DECODE_W: begin - state_d = SEND_INVALID_W; - end - - SEND_INVALID_W: begin - // wait for all snoop masters to assert AC ready - if (ac_ready != '1) begin - state_d = SEND_INVALID_W; - end else begin - state_d = WAIT_INVALID_W; - end - end - - WAIT_INVALID_W: begin - // wait for all snoop masters to assert CR valid - if (cr_valid != '1) begin - state_d = WAIT_INVALID_W; - end else if(|(data_available & ~response_error)) begin - state_d = SEND_AXI_REQ_WRITE_BACK_W; - end else begin - state_d = SEND_AXI_REQ_W; - end - end - - SEND_AXI_REQ_WRITE_BACK_W: begin - // wait for responding slave to assert aw_ready - if(ccu_resp_i.aw_ready !='b1) begin - state_d = SEND_AXI_REQ_WRITE_BACK_W; - end else begin - state_d = WRITE_BACK_MEM_W; - end - end - - WRITE_BACK_MEM_W: begin - // wait for responding slave to send b_valid - if((ccu_resp_i.b_valid && ccu_req_o.b_ready)) begin - state_d = SEND_AXI_REQ_W; - end else begin - state_d = WRITE_BACK_MEM_W; - end - end - - SEND_AXI_REQ_W: begin - // wait for responding slave to assert aw_ready - if(ccu_resp_i.aw_ready !='b1) begin - state_d = SEND_AXI_REQ_W; - end else begin - state_d = WRITE_MEM; - end - end - - WRITE_MEM: begin - // wait for responding slave to send b_valid - if((ccu_resp_i.b_valid && ccu_req_i.b_ready)) begin - if(ccu_req_holder.aw.atop [5]) begin - state_d = READ_MEM; - end else begin - state_d = IDLE; - end - end else begin - state_d = WRITE_MEM; - end - end - - default: state_d = IDLE; - - - endcase - end - - // ---------------------- - // Output Block - // ---------------------- - always_comb begin : ccu_output_block - logic ar_addr_offset; - - ar_addr_offset = ccu_req_holder.ar.addr[3]; - - // Default Assignments - ccu_req_o = '0; - ccu_resp_o = '0; - s2m_req_o = '0; - - case(state_q) - IDLE: begin - - end - - //--------------------- - //---- Read Branch ---- - //--------------------- - DECODE_R:begin - ccu_resp_o.ar_ready = 'b1; - end - SEND_READ: begin - // send request to snooping masters - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin - s2m_req_o[n].ac.addr = ccu_req_holder.ar.addr; - s2m_req_o[n].ac.prot = ccu_req_holder.ar.prot; - s2m_req_o[n].ac.snoop = ccu_req_holder.ar.snoop; - s2m_req_o[n].ac_valid = !ac_ready[n]; - end - end - - SEND_INVALID_R:begin - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin - s2m_req_o[n].ac.addr = ccu_req_holder.ar.addr; - s2m_req_o[n].ac.prot = ccu_req_holder.ar.prot; - s2m_req_o[n].ac.snoop = snoop_pkg::CLEAN_INVALID; - s2m_req_o[n].ac_valid = !ac_ready[n]; - end - end - - WAIT_RESP_R, WAIT_INVALID_W: begin - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - s2m_req_o[n].cr_ready = !cr_valid[n]; //'b1; - end - - WAIT_INVALID_R: begin - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - s2m_req_o[n].cr_ready = !cr_valid[n]; //'b1; - - if ((cr_valid == '1) && (!ccu_req_holder.ar.lock)) begin - ccu_resp_o.r = '0; - ccu_resp_o.r.id = ccu_req_holder.ar.id; - ccu_resp_o.r.last = 'b1; - ccu_resp_o.r_valid = 'b1; - end - end - - READ_SNP_DATA: begin - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - s2m_req_o[n].cd_ready = !cd_last[n] & data_available[n]; - // response to intiating master - if (!r_eot) begin - if (ccu_req_holder.ar.len == 0) begin - // single data request - logic critical_word_valid; - critical_word_valid = (stored_cd_data == ar_addr_offset + 1); - ccu_resp_o.r.data = cd_data[ar_addr_offset]; - ccu_resp_o.r.last = critical_word_valid; - ccu_resp_o.r_valid = critical_word_valid; - end else begin - // cache line request - ccu_resp_o.r.data = cd_data[r_last]; - ccu_resp_o.r.last = r_last; - ccu_resp_o.r_valid = |stored_cd_data; - end - ccu_resp_o.r.id = ccu_req_holder.ar.id; - ccu_resp_o.r.resp[3] = |shared; // update if shared - ccu_resp_o.r.resp[2] = |dirty; // update if any line dirty - end - end - - SEND_AXI_REQ_WRITE_BACK_R: begin - // send writeback request - ccu_req_o.aw_valid = 'b1; - ccu_req_o.aw = '0; //default - ccu_req_o.aw.addr = ccu_req_holder.ar.addr; - ccu_req_o.aw.addr[3:0] = 4'b0; // writeback is always full cache line - ccu_req_o.aw.size = 2'b11; - ccu_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction - ccu_req_o.aw.id = {first_responder, ccu_req_holder.ar.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations - ccu_req_o.aw.len = DcacheLineWords-1; - // WRITEBACK - ccu_req_o.aw.domain = 2'b00; - ccu_req_o.aw.snoop = 3'b011; - end - - WRITE_BACK_MEM_R: begin - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - s2m_req_o[n].cd_ready = !cd_last[n] & data_available[n]; - // write data to slave (RAM) - ccu_req_o.w_valid = |stored_cd_data; - ccu_req_o.w.strb = '1; - ccu_req_o.w.data = cd_data[w_last]; - ccu_req_o.w.last = w_last; - ccu_req_o.b_ready = 'b1; - end - - SEND_AXI_REQ_R: begin - // forward request to slave (RAM) - ccu_req_o.ar_valid = 'b1; - ccu_req_o.ar = ccu_req_holder.ar; - ccu_req_o.r_ready = ccu_req_holder.r_ready ; - end - - READ_MEM: begin - // indicate slave to send data on r channel - ccu_req_o.r_ready = ccu_req_i.r_ready ; - ccu_resp_o.r = ccu_resp_i.r; - ccu_resp_o.r_valid = ccu_resp_i.r_valid; - end - - //--------------------- - //---- Write Branch --- - //--------------------- - DECODE_W: begin - ccu_resp_o.aw_ready = 'b1; - end - - SEND_INVALID_W:begin - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) begin - s2m_req_o[n].ac.addr = ccu_req_holder.aw.addr; - s2m_req_o[n].ac.prot = ccu_req_holder.aw.prot; - s2m_req_o[n].ac.snoop = snoop_pkg::CLEAN_INVALID; - s2m_req_o[n].ac_valid = !ac_ready[n]; - end - end - - SEND_AXI_REQ_WRITE_BACK_W: begin - // send writeback request - ccu_req_o.aw_valid = 'b1; - ccu_req_o.aw = '0; //default - ccu_req_o.aw.addr = ccu_req_holder.aw.addr; - ccu_req_o.aw.addr[3:0] = 4'b0; // writeback is always full cache line - ccu_req_o.aw.size = 2'b11; - ccu_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction - ccu_req_o.aw.id = {first_responder, ccu_req_holder.aw.id[SlvAxiIDWidth-1:0]}; // It should be visible this data originates from the responder, important e.g. for AMO operations - ccu_req_o.aw.len = DcacheLineWords-1; - // WRITEBACK - ccu_req_o.aw.domain = 2'b00; - ccu_req_o.aw.snoop = 3'b011; - end - - WRITE_BACK_MEM_W: begin - for (int unsigned n = 0; n < NoMstPorts; n = n + 1) - s2m_req_o[n].cd_ready = !cd_last[n] & data_available[n]; - // response to intiating master - if (!r_eot) begin - ccu_req_o.w_valid = |stored_cd_data; - ccu_req_o.w.strb = '1; - ccu_req_o.w.data = cd_data[w_last]; - ccu_req_o.w.last = w_last; - ccu_req_o.b_ready = 'b1; - end - end - - SEND_AXI_REQ_W: begin - // forward request to slave (RAM) - ccu_req_o.aw_valid = 'b1; - ccu_req_o.aw = ccu_req_holder.aw; - end - - WRITE_MEM: begin - ccu_req_o.w = ccu_req_i.w; - ccu_req_o.w_valid = ccu_req_i.w_valid; - ccu_req_o.b_ready = ccu_req_i.b_ready; - - ccu_resp_o.b = ccu_resp_i.b; - ccu_resp_o.b_valid = ccu_resp_i.b_valid; - ccu_resp_o.w_ready = ccu_resp_i.w_ready; - end - - endcase - end // end output block - - // Hold incoming ACE request - always_ff @(posedge clk_i , negedge rst_ni) begin - if(!rst_ni) begin - ccu_req_holder <= '0; - end else if(state_q == IDLE && - ((ccu_req_i.ar_valid & !ccu_req_i.aw_valid) | - (ccu_req_i.ar_valid & prio_q.waiting_r) | - (ccu_req_i.ar_valid & !prio_q.waiting_w))) begin - ccu_req_holder.ar <= ccu_req_i.ar; - ccu_req_holder.ar_valid <= ccu_req_i.ar_valid; - ccu_req_holder.r_ready <= ccu_req_i.r_ready; - - end else if(state_q == IDLE && - ((ccu_req_i.aw_valid & !ccu_req_i.ar_valid) | - (ccu_req_i.aw_valid & prio_q.waiting_w))) begin - ccu_req_holder.aw <= ccu_req_i.aw; - ccu_req_holder.aw_valid <= ccu_req_i.aw_valid; - end - end - - // Hold snoop AC_ready - always_ff @ (posedge clk_i, negedge rst_ni) begin - if(!rst_ni) begin - ac_ready <= '0; - ac_valid <= '0; - end else if(state_q == DECODE_R || state_q == DECODE_W) begin - ac_ready <= initiator_q; - end else if(state_q == SEND_READ || state_q == SEND_INVALID_R || state_q == SEND_INVALID_W) begin - for (int i = 0; i < NoMstPorts; i = i + 1) begin - ac_ready[i] <= ac_ready[i] | (m2s_resp_i[i].ac_ready & s2m_req_o[i].ac_valid); - ac_valid[i] <= ac_valid[i] | (m2s_resp_i[i].ac_ready & s2m_req_o[i].ac_valid); - end - end else begin - ac_ready <= '0; - ac_valid <= '0; - end - end - - // Hold snoop CR - always_ff @ (posedge clk_i, negedge rst_ni) begin - logic snoop_resp_found; - if(!rst_ni) begin - cr_valid <= '0; - data_available <= '0; - shared <= '0; - dirty <= '0; - response_error <= '0; - first_responder <= '0; - snoop_resp_found <= 1'b0; - end else if(state_q == IDLE) begin - cr_valid <= '0; - data_available <= '0; - shared <= '0; - dirty <= '0; - response_error <= '0; - first_responder <= '0; - snoop_resp_found <= 1'b0; - end else if(state_q == SEND_READ || state_q == SEND_INVALID_R || state_q == SEND_INVALID_W) begin - cr_valid <= initiator_q; - end else begin - for (int i = 0; i < NoMstPorts; i = i + 1) begin - if(m2s_resp_i[i].cr_valid & s2m_req_o[i].cr_ready) begin - cr_valid[i] <= cr_valid[i] | 1'b1; - data_available[i] <= m2s_resp_i[i].cr_resp.dataTransfer; - shared[i] <= m2s_resp_i[i].cr_resp.isShared; - dirty[i] <= m2s_resp_i[i].cr_resp.passDirty; - response_error[i] <= m2s_resp_i[i].cr_resp.error; - end - end - if (!snoop_resp_found) begin - for (int i = 0; i < NoMstPorts; i = i + 1) begin - if(m2s_resp_i[i].cr_valid & s2m_req_o[i].cr_ready & m2s_resp_i[i].cr_resp.dataTransfer & !m2s_resp_i[i].cr_resp.error) begin - first_responder <= i[MstIdxBits-1:0]; - snoop_resp_found <= 1'b1; - break; - end - end - end - end - end - - // Hold snoop CD - always_ff @ (posedge clk_i, negedge rst_ni) begin - if(!rst_ni) begin - data_received <= '0; - cd_last <= '0; - m2s_resp_holder <= '0; - cd_data <= '0; - stored_cd_data <= '0; - end else begin - if(state_q == IDLE) begin - data_received <= '0; - m2s_resp_holder <= '0; - cd_last <= '0; - cd_data <= '0; - stored_cd_data <= '0; - end - else begin - for (int i = 0; i < NoMstPorts; i = i + 1) begin - if (state_q == READ_SNP_DATA) begin - if(m2s_resp_i[i].cd_valid) begin - data_received[i] <= m2s_resp_i[i].cd_valid; - cd_last[i] <= cd_last[i] | (m2s_resp_i[i].cd.last & data_available[i]); - m2s_resp_holder[i] <= m2s_resp_i[i]; - end - if (data_received[i] & ccu_resp_o.r_valid) begin - data_received[i] <= '0; - m2s_resp_holder <= '0; - end - if (m2s_resp_i[first_responder].cd_valid & s2m_req_o[first_responder].cd_ready) begin - cd_data[m2s_resp_i[first_responder].cd.last] <= m2s_resp_i[first_responder].cd.data; - end - if (s2m_req_o[first_responder].cd_ready & m2s_resp_i[first_responder].cd_valid & !(ccu_resp_o.r_valid & ccu_req_i.r_ready)) begin - stored_cd_data <= stored_cd_data + 1; - end else if(ccu_resp_o.r_valid & ccu_req_i.r_ready & !(s2m_req_o[first_responder].cd_ready & m2s_resp_i[first_responder].cd_valid)) begin - stored_cd_data <= stored_cd_data - 1; - end - end else if (state_q == WRITE_BACK_MEM_R || state_q == WRITE_BACK_MEM_W) begin - if(m2s_resp_i[i].cd_valid) begin - data_received[i] <= m2s_resp_i[i].cd_valid; - cd_last[i] <= cd_last[i] | (m2s_resp_i[i].cd.last & data_available[i]); - m2s_resp_holder[i] <= m2s_resp_i[i]; - end - if (data_received[i] & ccu_req_o.w_valid) begin - data_received[i] <= '0; - m2s_resp_holder <= '0; - end - if (m2s_resp_i[first_responder].cd_valid & s2m_req_o[first_responder].cd_ready) begin - cd_data[m2s_resp_i[first_responder].cd.last] <= m2s_resp_i[first_responder].cd.data; - end - if (s2m_req_o[first_responder].cd_ready & m2s_resp_i[first_responder].cd_valid & !(ccu_req_o.w_valid & ccu_resp_i.w_ready)) begin - stored_cd_data <= stored_cd_data + 1; - end else if(ccu_req_o.w_valid & ccu_resp_i.w_ready & !(s2m_req_o[first_responder].cd_ready & m2s_resp_i[first_responder].cd_valid)) begin - stored_cd_data <= stored_cd_data - 1; - end - end - end - end - end - end - - always_ff @ (posedge clk_i, negedge rst_ni) begin - if(!rst_ni) begin - r_last <= 1'b0; - r_eot <= 1'b0; - end else begin - if(state_q == IDLE) begin - r_last <= 1'b0; - r_eot <= 1'b0; - end else if (ccu_req_i.r_ready & ccu_resp_o.r_valid) begin - r_last <= !r_last; - if (ccu_resp_o.r.last) - r_eot <= 1'b1; - end - end - end - - always_ff @ (posedge clk_i, negedge rst_ni) begin - if(!rst_ni) begin - w_last <= 1'b0; - w_eot <= 1'b0; - end else begin - if(state_q == IDLE) begin - w_last <= 1'b0; - w_eot <= 1'b0; - end else if (ccu_resp_i.w_ready & ccu_req_o.w_valid) begin - w_last <= !w_last; - if (w_last) - w_eot <= 1'b1; - end - end - end - - `ifndef VERILATOR - // pragma translate_off - initial begin - a_dcache_line_words : assert (DcacheLineWords == 2) else - $error("The ccu_fsm module is currently hardcoded to only support DcacheLineWidth = 2 * AxiDataWidth"); - end - // pragma translate_on - `endif - - - -endmodule diff --git a/src/snoop_intf.sv b/src/snoop_intf.sv index 269a5ac..11cc5bc 100644 --- a/src/snoop_intf.sv +++ b/src/snoop_intf.sv @@ -1,5 +1,6 @@ // Copyright (c) 2014-2018 ETH Zurich, University of Bologna // Copyright (c) 2022 PlanV GmbH +// Copyright (c) 2025 ETH Zurich, University of Bologna // // Copyright and related rights are licensed under the Solderpad Hardware // License, Version 0.51 (the "License"); you may not use this file except in @@ -13,110 +14,122 @@ // Snoop bus interafces interface SNOOP_BUS #( - parameter int unsigned SNOOP_ADDR_WIDTH = 0, - parameter int unsigned SNOOP_DATA_WIDTH = 0 + parameter int unsigned SNOOP_ADDR_WIDTH = 0, + parameter int unsigned SNOOP_DATA_WIDTH = 0 ); - typedef logic [SNOOP_ADDR_WIDTH-1:0] addr_t; - typedef logic [SNOOP_DATA_WIDTH-1:0] data_t; - - addr_t ac_addr; - snoop_pkg::acprot_t ac_prot; - snoop_pkg::acsnoop_t ac_snoop; - logic ac_valid; - logic ac_ready; - - snoop_pkg::crresp_t cr_resp; - logic cr_valid; - logic cr_ready; - - data_t cd_data; - logic cd_last; - logic cd_valid; - logic cd_ready; - - modport Master ( - input ac_addr, ac_prot, ac_snoop, ac_valid, output ac_ready, - input cr_ready, output cr_valid, cr_resp, - input cd_ready, output cd_data, cd_last, cd_valid - ); - - modport Slave ( - output ac_addr, ac_prot, ac_snoop, ac_valid, input ac_ready, - output cr_ready, input cr_valid, cr_resp, - output cd_ready, input cd_data, cd_last, cd_valid - ); - - - modport Monitor ( - input ac_addr, ac_prot, ac_snoop, ac_valid, ac_ready, + typedef logic [SNOOP_ADDR_WIDTH-1:0] addr_t; + typedef logic [SNOOP_DATA_WIDTH-1:0] data_t; + + addr_t ac_addr; + ace_pkg::acprot_t ac_prot; + ace_pkg::acsnoop_t ac_snoop; + logic ac_valid; + logic ac_ready; + + ace_pkg::crresp_t cr_resp; + logic cr_valid; + logic cr_ready; + + data_t cd_data; + logic cd_last; + logic cd_valid; + logic cd_ready; + + modport Master( + input ac_addr, ac_prot, ac_snoop, ac_valid, + output ac_ready, + input cr_ready, + output cr_valid, cr_resp, + input cd_ready, + output cd_data, cd_last, cd_valid + ); + + modport Slave( + output ac_addr, ac_prot, ac_snoop, ac_valid, + input ac_ready, + output cr_ready, + input cr_valid, cr_resp, + output cd_ready, + input cd_data, cd_last, cd_valid + ); + + + modport Monitor( + input ac_addr, ac_prot, ac_snoop, ac_valid, ac_ready, cr_ready, cr_valid, cr_resp, cd_ready, cd_data, cd_last, cd_valid - ); + ); endinterface /// A clocked SNOOP interface for use in design verification. interface SNOOP_BUS_DV #( - parameter int unsigned SNOOP_ADDR_WIDTH = 0, - parameter int unsigned SNOOP_DATA_WIDTH = 0 -)( - input clk_i + parameter int unsigned SNOOP_ADDR_WIDTH = 0, + parameter int unsigned SNOOP_DATA_WIDTH = 0 +) ( + input clk_i ); - typedef logic [SNOOP_ADDR_WIDTH-1:0] addr_t; - typedef logic [SNOOP_DATA_WIDTH-1:0] data_t; - - addr_t ac_addr; - snoop_pkg::acprot_t ac_prot; - snoop_pkg::acsnoop_t ac_snoop; - logic ac_valid; - logic ac_ready; - - snoop_pkg::crresp_t cr_resp; - logic cr_valid; - logic cr_ready; - - data_t cd_data; - logic cd_last; - logic cd_valid; - logic cd_ready; - - modport Master ( - input ac_addr, ac_prot, ac_snoop, ac_valid, output ac_ready, - input cr_ready, output cr_valid, cr_resp, - input cd_ready, output cd_data, cd_last, cd_valid - ); - - modport Slave ( - output ac_addr, ac_prot, ac_snoop, ac_valid, input ac_ready, - output cr_ready, input cr_valid, cr_resp, - output cd_ready, input cd_data, cd_last, cd_valid - ); - - - modport Monitor ( - input ac_addr, ac_prot, ac_snoop, ac_valid, ac_ready, + typedef logic [SNOOP_ADDR_WIDTH-1:0] addr_t; + typedef logic [SNOOP_DATA_WIDTH-1:0] data_t; + + addr_t ac_addr; + ace_pkg::acprot_t ac_prot; + ace_pkg::acsnoop_t ac_snoop; + logic ac_valid; + logic ac_ready; + + ace_pkg::crresp_t cr_resp; + logic cr_valid; + logic cr_ready; + + data_t cd_data; + logic cd_last; + logic cd_valid; + logic cd_ready; + + modport Master( + input ac_addr, ac_prot, ac_snoop, ac_valid, + output ac_ready, + input cr_ready, + output cr_valid, cr_resp, + input cd_ready, + output cd_data, cd_last, cd_valid + ); + + modport Slave( + output ac_addr, ac_prot, ac_snoop, ac_valid, + input ac_ready, + output cr_ready, + input cr_valid, cr_resp, + output cd_ready, + input cd_data, cd_last, cd_valid + ); + + + modport Monitor( + input ac_addr, ac_prot, ac_snoop, ac_valid, ac_ready, cr_ready, cr_valid, cr_resp, cd_ready, cd_data, cd_last, cd_valid - ); - - // pragma translate_off - `ifndef VERILATOR - // Single-Channel Assertions: Signals including valid must not change between valid and handshake. - // AC - assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> $stable(ac_addr))); - assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> $stable(ac_snoop))); - assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> $stable(ac_prot))); - assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> ac_valid)); - // CR - assert property (@(posedge clk_i) (cr_valid && !cr_ready |=> $stable(cr_resp))); - assert property (@(posedge clk_i) (cr_valid && !cr_ready |=> cr_valid)); - // CD - assert property (@(posedge clk_i) (cd_valid && !cd_ready |=> $stable(cd_data))); - assert property (@(posedge clk_i) (cd_valid && !cd_ready |=> $stable(cd_last))); - assert property (@(posedge clk_i) (cd_valid && !cd_ready |=> cd_valid)); - `endif - // pragma translate_on + ); + + // pragma translate_off +`ifndef VERILATOR + // Single-Channel Assertions: Signals including valid must not change between valid and handshake. + // AC + assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> $stable(ac_addr))); + assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> $stable(ac_snoop))); + assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> $stable(ac_prot))); + assert property (@(posedge clk_i) (ac_valid && !ac_ready |=> ac_valid)); + // CR + assert property (@(posedge clk_i) (cr_valid && !cr_ready |=> $stable(cr_resp))); + assert property (@(posedge clk_i) (cr_valid && !cr_ready |=> cr_valid)); + // CD + assert property (@(posedge clk_i) (cd_valid && !cd_ready |=> $stable(cd_data))); + assert property (@(posedge clk_i) (cd_valid && !cd_ready |=> $stable(cd_last))); + assert property (@(posedge clk_i) (cd_valid && !cd_ready |=> cd_valid)); +`endif + // pragma translate_on endinterface diff --git a/src/snoop_pkg.sv b/src/snoop_pkg.sv deleted file mode 100644 index e1df838..0000000 --- a/src/snoop_pkg.sv +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2014-2018 ETH Zurich, University of Bologna -// Copyright (c) 2022 PlanV GmbH -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - - -//! ACE Package -/// Contains all necessary type definitions, constants, and generally useful functions. -package snoop_pkg; - - // CRRESP - typedef struct packed { - logic wasUnique; - logic isShared; - logic passDirty; - logic error; - logic dataTransfer; - } crresp_t; - - /// Support for snoop channels - typedef logic [3:0] acsnoop_t; - typedef logic [2:0] acprot_t; - - // AC snoop encoding - localparam READ_ONCE = 4'b0000; - localparam READ_SHARED = 4'b0001; - localparam READ_CLEAN = 4'b0010; - localparam READ_NOT_SHARED_DIRTY = 4'b0011; - localparam READ_UNIQUE = 4'b0111; - localparam CLEAN_SHARED = 4'b1000; - localparam CLEAN_INVALID = 4'b1001; - localparam CLEAN_UNIQUE = 4'b1011; - localparam MAKE_INVALID = 4'b1101; - localparam DVM_COMPLETE = 4'b1110; - localparam DVM_MESSAGE = 4'b1111; - -endpackage diff --git a/src/snoop_test.sv b/src/snoop_test.sv deleted file mode 100644 index 270460c..0000000 --- a/src/snoop_test.sv +++ /dev/null @@ -1,675 +0,0 @@ -// Copyright (c) 2014-2018 ETH Zurich, University of Bologna -// Copyright (c) 2022 PlanV GmbH -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// - - -/// A set of testbench utilities for AXI interfaces. -package snoop_test; - - import axi_pkg::*; - import ace_pkg::*; - - /// The data transferred on a beat on the AC channel. - class ace_ac_beat #( - parameter AW = 32 - ); - rand logic [AW-1:0] ac_addr = '0; - logic [3:0] ac_snoop = '0; - logic [2:0] ac_prot = '0; - endclass - - /// The data transferred on a beat on the CR channel. - class ace_cr_beat; - snoop_pkg::crresp_t cr_resp = '0; - endclass - - /// The data transferred on a beat on the CD channel. - class ace_cd_beat #( - parameter DW = 32 - ); - rand logic [DW-1:0] cd_data = '0; - logic cd_last; - endclass - - class snoop_driver #( - parameter AW = 32, - parameter DW = 32, - parameter time TA = 0ns , // stimuli application time - parameter time TT = 0ns // stimuli test time - ); - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH(AW), - .SNOOP_DATA_WIDTH(DW) - ) snoop; - - typedef ace_ac_beat #(.AW(AW)) ace_ac_beat_t; - typedef ace_cd_beat #(.DW(DW)) ace_cd_beat_t; - typedef ace_cr_beat ace_cr_beat_t; - - function new( - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH(AW), - .SNOOP_DATA_WIDTH(DW) - ) snoop - ); - this.snoop = snoop; - endfunction - - function void reset_master(); - snoop.ac_valid <= '0; - snoop.ac_addr <= '0; - snoop.ac_snoop <= '0; - snoop.ac_prot <= '0; - snoop.cr_ready <= '0; - snoop.cd_ready <= '0; - endfunction - - function void reset_slave(); - snoop.ac_ready <= '0; - snoop.cr_valid <= '0; - snoop.cr_resp <= '0; - snoop.cd_valid <= '0; - snoop.cd_data <= '0; - snoop.cd_last <= '0; - endfunction - - task cycle_start; - #TT; - endtask - - task cycle_end; - @(posedge snoop.clk_i); - endtask - - /// Issue a beat on the AC channel. - task send_ac ( - input ace_ac_beat_t beat - ); - snoop.ac_valid <= #TA 1; - snoop.ac_addr <= #TA beat.ac_addr; - snoop.ac_snoop <= #TA beat.ac_snoop; - snoop.ac_prot <= #TA beat.ac_prot; - cycle_start(); - while (snoop.ac_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - snoop.ac_valid <= #TA '0; - snoop.ac_addr <= #TA '0; - snoop.ac_snoop <= #TA '0; - snoop.ac_prot <= #TA '0; - endtask - - /// Issue a beat on the CR channel. - task send_cr ( - input ace_cr_beat_t beat - ); - snoop.cr_valid <= #TA 1; - snoop.cr_resp <= #TA beat.cr_resp; - cycle_start(); - while (snoop.cr_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - snoop.cr_valid <= #TA '0; - snoop.cr_resp <= #TA '0; - endtask - - /// Issue a beat on the CD channel. - task send_cd ( - input ace_cd_beat_t beat - ); - snoop.cd_valid <= #TA 1; - snoop.cd_data <= #TA beat.cd_data; - snoop.cd_last <= #TA beat.cd_last; - cycle_start(); - while (snoop.cd_ready != 1) begin cycle_end(); cycle_start(); end - cycle_end(); - snoop.cd_valid <= #TA '0; - snoop.cd_data <= #TA '0; - snoop.cd_last <= #TA '0; - endtask - - /// Wait for a beat on the AC channel. - task recv_ac ( - output ace_ac_beat_t beat - ); - snoop.ac_ready <= #TA 1; - cycle_start(); - while (snoop.ac_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.ac_addr = snoop.ac_addr; - beat.ac_snoop = snoop.ac_snoop; - beat.ac_prot = snoop.ac_prot; - cycle_end(); - snoop.ac_ready <= #TA 0; - endtask - - /// Wait for a beat on the CR channel. - task recv_cr ( - output ace_cr_beat_t beat - ); - snoop.cr_ready <= #TA 1; - cycle_start(); - while (snoop.cr_valid != 1) begin cycle_end(); cycle_start(); end - beat = new; - beat.cr_resp = snoop.cr_resp; - cycle_end(); - snoop.cr_ready <= #TA 0; - endtask - - /// Wait for a beat on the CD channel. - task recv_cd ( - output ace_cd_beat_t beat - ); - beat = new; - beat.cd_last = '0; - while (!beat.cd_last) begin - snoop.cd_ready <= #TA 1; - cycle_start(); - while (snoop.cd_valid != 1) begin cycle_end(); cycle_start(); end - beat.cd_data = snoop.cd_data; - beat.cd_last = snoop.cd_last; - cycle_end(); - snoop.cd_ready <= #TA 0; - end - endtask - - /// Monitor the AC channel and return the next beat. - task mon_ac ( - output ace_ac_beat_t beat - ); - cycle_start(); - while (!(snoop.ac_valid && snoop.ac_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.ac_addr = snoop.ac_addr; - beat.ac_snoop = snoop.ac_snoop; - beat.ac_prot = snoop.ac_prot; - cycle_end(); - endtask - - /// Monitor the CR channel and return the next beat. - task mon_cr ( - output ace_cr_beat_t beat - ); - cycle_start(); - while (!(snoop.cr_valid && snoop.cr_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.cr_resp = snoop.cr_resp; - cycle_end(); - endtask - - /// Monitor the CD channel and return the next beat. - task mon_cd ( - output ace_cd_beat_t beat - ); - cycle_start(); - while (!(snoop.cd_valid && snoop.cd_ready)) begin cycle_end(); cycle_start(); end - beat = new; - beat.cd_data = snoop.cd_data; - beat.cd_last = snoop.cd_last; - cycle_end(); - endtask - - endclass - - class snoop_rand_master #( - // AXI interface parameters - parameter int AW = 32, - parameter int DW = 32, - // Stimuli application and test time - parameter time TA = 0ps, - parameter time TT = 0ps, - // Upper and lower bounds on wait cycles on AC, CR, and CD channels - parameter int AC_MIN_WAIT_CYCLES = 0, - parameter int AC_MAX_WAIT_CYCLES = 100, - parameter int CR_MIN_WAIT_CYCLES = 0, - parameter int CR_MAX_WAIT_CYCLES = 5, - parameter int CD_MIN_WAIT_CYCLES = 0, - parameter int CD_MAX_WAIT_CYCLES = 20 - ); - typedef snoop_test::snoop_driver #( - .AW(AW), .DW(DW), .TA(TA), .TT(TT) - ) snoop_driver_t; - typedef logic [AW-1:0] addr_t; - typedef logic [DW-1:0] data_t; - typedef snoop_pkg::acsnoop_t acsnoop_t; - typedef snoop_pkg::acprot_t acprot_t; - typedef snoop_pkg::crresp_t crresp_t; - - typedef snoop_driver_t::ace_ac_beat_t ace_ac_beat_t; - typedef snoop_driver_t::ace_cr_beat_t ace_cr_beat_t; - typedef snoop_driver_t::ace_cd_beat_t ace_cd_beat_t; - - snoop_driver_t drv; - - typedef struct packed { - addr_t addr_begin; - addr_t addr_end; - mem_type_t mem_type; - } mem_region_t; - mem_region_t mem_map[$]; - - function new( - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH(AW), - .SNOOP_DATA_WIDTH(DW) - ) snoop - ); - this.drv = new(snoop); - this.reset(); - endfunction - - function void reset(); - drv.reset_master(); - endfunction - - function void add_memory_region(input addr_t addr_begin, input addr_t addr_end, input mem_type_t mem_type); - mem_map.push_back({addr_begin, addr_end, mem_type}); - endfunction - - function ace_ac_beat_t new_rand_burst(); - automatic logic rand_success; - automatic ace_ac_beat_t ace_ac_beat = new; - automatic addr_t addr; - automatic snoop_pkg::acsnoop_t snoop; - automatic snoop_pkg::acprot_t prot; - automatic int unsigned mem_region_idx; - automatic mem_region_t mem_region; - - // No memory regions defined - if (mem_map.size() == 0) begin - // Return a dummy region - mem_region = '{ - addr_begin: '0, - addr_end: '1, - mem_type: axi_pkg::NORMAL_NONCACHEABLE_BUFFERABLE - }; - end else begin - // Randomly pick a memory region - mem_region_idx = $urandom_range(0,mem_map.size()-1); - // std::randomize(mem_region_idx) with { - // mem_region_idx < mem_map.size(); - // }; assert(rand_success); - mem_region = mem_map[mem_region_idx]; - end - - // Randomize address - addr = mem_region.addr_begin + $urandom_range(mem_region.addr_end-mem_region.addr_begin+1); - - ace_ac_beat.ac_addr = addr; - snoop = $urandom(); - prot = $urandom(); - - // rand_success = std::randomize(id); assert(rand_success); - // rand_success = std::randomize(qos); assert(rand_success); - // The random ID *must* be legalized with `legalize_id()` before the beat is sent! This is - // currently done in the functions `create_aws()` and `send_ars()`. - ace_ac_beat.ac_snoop = snoop; - ace_ac_beat.ac_prot = prot; - - return ace_ac_beat; - endfunction - - // TODO: The `rand_wait` task exists in `rand_verif_pkg`, but that task cannot be called with - // `this.drv.ace.clk_i` as `clk` argument. What is the syntax for getting an assignable - // reference? - task automatic rand_wait(input int unsigned min, max); - int unsigned rand_success, cycles; - cycles = $urandom_range(min,max); - // rand_success = std::randomize(cycles) with { - // cycles >= min; - // cycles <= max; - // }; - //assert (rand_success) else $error("Failed to randomize wait cycles!"); - repeat (cycles) @(posedge this.drv.snoop.clk_i); - endtask - - task send_acs(input int n_reads); - automatic logic rand_success; - repeat (n_reads) begin - automatic ace_ac_beat_t ace_ac_beat = new_rand_burst(); - rand_wait(AC_MIN_WAIT_CYCLES, AC_MAX_WAIT_CYCLES); - drv.send_ac(ace_ac_beat); - end - endtask - - task recv_crs(ref logic ac_done); - while (!ac_done) begin - automatic ace_cr_beat_t ace_cr_beat; - automatic ace_cd_beat_t ace_cd_beat; - rand_wait(CR_MIN_WAIT_CYCLES, CR_MAX_WAIT_CYCLES); - drv.recv_cr(ace_cr_beat); - if (!ace_cr_beat.cr_resp.error & ace_cr_beat.cr_resp.dataTransfer) - drv.recv_cd(ace_cd_beat); - end - endtask - - task recv_cds(ref logic ac_done); - while (!ac_done) begin - automatic ace_cd_beat_t ace_cd_beat; - rand_wait(CD_MIN_WAIT_CYCLES, CD_MAX_WAIT_CYCLES); - drv.recv_cd(ace_cd_beat); - end - endtask - - // Issue n_reads random read transactions to an address range - task run(input int n_reads); - automatic logic ac_done = 1'b0; - fork - begin - send_acs(n_reads); - ac_done = 1'b1; - end - recv_crs(ac_done); - join - endtask - - endclass - - class snoop_rand_slave #( - // AXI interface parameters - parameter int AW = 32, - parameter int DW = 32, - // Stimuli application and test time - parameter time TA = 0ps, - parameter time TT = 0ps, - parameter bit RAND_RESP = 0, - // Upper and lower bounds on wait cycles on Ax, W, and resp (R and B) channels - parameter int AC_MIN_WAIT_CYCLES = 0, - parameter int AC_MAX_WAIT_CYCLES = 100, - parameter int CR_MIN_WAIT_CYCLES = 0, - parameter int CR_MAX_WAIT_CYCLES = 5, - parameter int CD_MIN_WAIT_CYCLES = 0, - parameter int CD_MAX_WAIT_CYCLES = 20 - ); - typedef snoop_test::snoop_driver #( - .AW(AW), .DW(DW), .TA(TA), .TT(TT) - ) snoop_driver_t; - typedef snoop_driver_t::ace_ac_beat_t ace_ac_beat_t; - typedef snoop_driver_t::ace_cr_beat_t ace_cr_beat_t; - typedef snoop_driver_t::ace_cd_beat_t ace_cd_beat_t; - - typedef logic [AW-1:0] addr_t; - - snoop_driver_t drv; - ace_ac_beat_t ace_ac_queue[$]; - int unsigned cd_wait_cnt; - - function new( - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH(AW), - .SNOOP_DATA_WIDTH(DW) - ) snoop - ); - this.drv = new(snoop); - this.cd_wait_cnt = 0; - this.reset(); - endfunction - - function void reset(); - this.drv.reset_slave(); - endfunction - - // TODO: The `rand_wait` task exists in `rand_verif_pkg`, but that task cannot be called with - // `this.drv.ace.clk_i` as `clk` argument. What is the syntax getting an assignable reference? - task automatic rand_wait(input int unsigned min, max); - int unsigned rand_success, cycles; - cycles = $urandom_range(min,max); - // rand_success = std::randomize(cycles) with { - // cycles >= min; - // cycles <= max; - // }; - // assert (rand_success) else $error("Failed to randomize wait cycles!"); - repeat (cycles) @(posedge this.drv.snoop.clk_i); - endtask - - task recv_acs(); - forever begin - automatic ace_ac_beat_t ace_ac_beat; - rand_wait(AC_MIN_WAIT_CYCLES, AC_MAX_WAIT_CYCLES); - drv.recv_ac(ace_ac_beat); - ace_ac_queue.push_back(ace_ac_beat); - end - endtask - - task send_crs(); - forever begin - automatic logic rand_success; - automatic ace_ac_beat_t ace_ac_beat; - automatic ace_cr_beat_t ace_cr_beat = new; - wait (ace_ac_queue.size() > 0); - ace_ac_beat = ace_ac_queue.pop_front(); - if(ace_ac_beat.ac_snoop == snoop_pkg::CLEAN_INVALID) begin - ace_cr_beat.cr_resp = 0; - end else begin - ace_cr_beat.cr_resp[4:2] = $urandom_range(0,3'b111);//$urandom_range(0,5'b11111); - ace_cr_beat.cr_resp[1] = 'b0; - ace_cr_beat.cr_resp[0] = $urandom_range(0,1); - end - rand_wait(CR_MIN_WAIT_CYCLES, CR_MAX_WAIT_CYCLES); - drv.send_cr(ace_cr_beat); - if (ace_cr_beat.cr_resp.dataTransfer && !ace_cr_beat.cr_resp.error) begin - cd_wait_cnt++; - end - end - endtask - - task send_cds(); - forever begin - automatic logic rand_success; - automatic ace_ac_beat_t ace_ac_beat; - automatic ace_cd_beat_t ace_cd_beat = new; - automatic addr_t byte_addr; - wait (cd_wait_cnt > 0); - // random response - ace_cd_beat.cd_data = $urandom(); - ace_cd_beat.cd_last = 1'b0; - rand_wait(CD_MIN_WAIT_CYCLES, CD_MAX_WAIT_CYCLES); - drv.send_cd(ace_cd_beat); - ace_cd_beat.cd_data = $urandom(); - ace_cd_beat.cd_last = 1'b1; - rand_wait(CD_MIN_WAIT_CYCLES, CD_MAX_WAIT_CYCLES); - drv.send_cd(ace_cd_beat); - cd_wait_cnt--; - end - endtask - - task run(); - fork - recv_acs(); - send_crs(); - send_cds(); - join - endtask - - endclass - - /// Snoop Monitor. - class snoop_monitor #( - parameter AW = 32, - parameter DW = 32, - parameter time TA = 0ns , // stimuli application time - parameter time TT = 0ns // stimuli test time - ); - - typedef snoop_test::snoop_driver #( - .AW(AW), .DW(DW), .TA(TA), .TT(TT) - ) snoop_driver_t; - - typedef snoop_driver_t::ace_ac_beat_t ace_ac_beat_t; - typedef snoop_driver_t::ace_cd_beat_t ace_cd_beat_t; - typedef snoop_driver_t::ace_cr_beat_t ace_cr_beat_t; - - snoop_driver_t drv; - mailbox ac_mbx = new, cd_mbx = new, cr_mbx = new; - - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH(AW), - .SNOOP_DATA_WIDTH(DW) - ) snoop; - - function new( - virtual SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH(AW), - .SNOOP_DATA_WIDTH(DW) - ) snoop - ); - this.drv = new(snoop); - endfunction - - task monitor; - fork - // AC - forever begin - automatic ace_ac_beat_t beat; - this.drv.mon_ac(beat); - ac_mbx.put(beat); - end - // CR - forever begin - automatic ace_cr_beat_t beat; - this.drv.mon_cr(beat); - cr_mbx.put(beat); - end - // CD - forever begin - automatic ace_cd_beat_t beat; - this.drv.mon_cd(beat); - cd_mbx.put(beat); - end - join - endtask - endclass - -endpackage - - -// non synthesisable ace snoop logger module -// this module logs the activity of the input snoop channel -// the log files will be found in "./ace_log//" -// one log file for all writes -// a log file per id for the reads -// atomic transactions with read response are injected into the corresponding log file of the read -module snoop_chan_logger #( - parameter time TestTime = 8ns, // Time after clock, where sampling happens - parameter string LoggerName = "snoop_logger", // name of the logger - parameter type ac_chan_t = logic, // ACE AC type - parameter type cr_chan_t = logic, // ACE CR type - parameter type cd_chan_t = logic // ACE CD type -) ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low, when `1'b0` no sampling - input logic end_sim_i, // end of simulation - // AC channel - input ac_chan_t ac_chan_i, - input logic ac_valid_i, - input logic ac_ready_i, - // CR channel - input cr_chan_t cr_chan_i, - input logic cr_valid_i, - input logic cr_ready_i, - // CD channel - input cd_chan_t cd_chan_i, - input logic cd_valid_i, - input logic cd_ready_i -); - - // queues for writes and reads - ac_chan_t ac_queues[$]; - cr_chan_t cr_queues[$]; - cd_chan_t cd_queues[$]; - - // channel sampling into queues - always @(posedge clk_i) #TestTime begin : proc_channel_sample - automatic ac_chan_t ac_beat; - automatic int fd; - automatic string log_file; - automatic string log_str; - // only execute when reset is high - if (rst_ni) begin - // AC channel - if (ac_valid_i && ac_ready_i) begin - log_file = $sformatf("./ace_log/%s/snoop_read.log", LoggerName); - fd = $fopen(log_file, "a"); - if (fd) begin - log_str = $sformatf("%0t> AC, ADDR: 0x%h SNOOP %b, PROT %b", $time, ac_chan_i.addr, ac_chan_i.snoop, ac_chan_i.prot); - $fdisplay(fd, log_str); - $fclose(fd); - end - ac_beat.addr = ac_chan_i.addr; - ac_beat.snoop = ac_chan_i.snoop; - ac_beat.prot = ac_chan_i.prot; - ac_queues.push_back(ac_beat); - end - // CR channel - if (cr_valid_i && cr_ready_i) begin - cr_queues.push_back(cr_chan_i); - end - // CD channel - if (cd_valid_i && cd_ready_i) begin - cd_queues.push_back(cd_chan_i); - end - end - end - - initial begin : proc_log - automatic string log_name; - automatic string log_string; - automatic ac_chan_t ac_beat; - automatic cr_chan_t cr_beat; - automatic cd_chan_t cd_beat; - automatic int unsigned no_r_beat; - automatic int fd; - - no_r_beat = 0; - - // make the log dirs - log_name = $sformatf("mkdir -p ./ace_log/%s/", LoggerName); - $system(log_name); - - // open log files - log_name = $sformatf("./ace_log/%s/snoop_read.log", LoggerName); - fd = $fopen(log_name, "w"); - if (fd) begin - $display("File was opened successfully : %s", log_name); - $fclose(fd); - end else - $display("File was NOT opened successfully : %s", log_name); - - // on each clock cycle update the logs if there is something in the queues - wait (rst_ni); - while (!end_sim_i) begin - @(posedge clk_i); - - // update the read log files - while (ac_queues.size() != 0 && cr_queues.size() != 0) begin - ac_beat = ac_queues.pop_front(); - cr_beat = cr_queues.pop_front(); - log_name = $sformatf("./ace_log/%s/snoop_read.log", LoggerName); - fd = $fopen(log_name, "a"); - if (fd) begin - log_string = $sformatf("%0t ns> CR %d RESP: %b, ", - $time, no_r_beat, cr_beat); - $fdisplay(fd, log_string); - if (cr_beat.dataTransfer && !cr_beat.error) begin - while(cd_queues.size() != 0) begin - cd_beat = cd_queues.pop_front(); - log_string = $sformatf("%0t ns> CD %d DATA: %h, ", - $time, no_r_beat, cd_beat.data); - $fdisplay(fd, log_string); - end - end - $fclose(fd); - end - no_r_beat++; - end - end - $fclose(fd); - end -endmodule diff --git a/test/tb_ace_ccu_snoop_interconnect.sv b/test/tb_ace_ccu_snoop_interconnect.sv new file mode 100644 index 0000000..946f61f --- /dev/null +++ b/test/tb_ace_ccu_snoop_interconnect.sv @@ -0,0 +1,229 @@ +`include "ace/typedef.svh" +`include "ace/assign.svh" + +`timescale 1ns/1ps + +module tb_ace_ccu_snoop_interconnect import ace_pkg::*; ( + +); + + localparam time CyclTime = 10ns; + localparam time ApplTime = 2ns; + localparam time TestTime = 8ns; + + localparam int unsigned AxiAddrWidth = 64; + localparam int unsigned AxiDataWidth = 64; + + localparam int unsigned TbNumMst = 4; + + typedef snoop_test::snoop_rand_slave #( + .AW ( AxiAddrWidth ), + .DW ( AxiDataWidth ), + .TA ( ApplTime), + .TT ( TestTime), + .RAND_RESP ( '0), + .AC_MIN_WAIT_CYCLES ( 2), + .AC_MAX_WAIT_CYCLES ( 15), + .CR_MIN_WAIT_CYCLES ( 2), + .CR_MAX_WAIT_CYCLES ( 15), + .CD_MIN_WAIT_CYCLES ( 2), + .CD_MAX_WAIT_CYCLES ( 15) + ) snoop_rand_slave_t; + + typedef snoop_test::snoop_rand_master #( + .AW ( AxiAddrWidth ), + .DW ( AxiDataWidth ), + .TA ( ApplTime), + .TT ( TestTime), + .AC_MIN_WAIT_CYCLES ( 2), + .AC_MAX_WAIT_CYCLES ( 15), + .CR_MIN_WAIT_CYCLES ( 2), + .CR_MAX_WAIT_CYCLES ( 15), + .CD_MIN_WAIT_CYCLES ( 2), + .CD_MAX_WAIT_CYCLES ( 15) + ) snoop_rand_master_t; + + typedef logic [AxiAddrWidth-1:0] addr_t; + typedef logic [AxiDataWidth-1:0] data_t; + + `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) + `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) + `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) + `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) + `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) + + + logic clk; + logic rst_n; + + task cycle_start; + #(ApplTime); + endtask + + task cycle_end; + @(posedge clk); + endtask + + // snoop structs + snoop_req_t [TbNumMst-1:0] inp_snoop_req; + snoop_resp_t [TbNumMst-1:0] inp_snoop_resp; + snoop_req_t [TbNumMst-1:0] oup_snoop_req; + snoop_resp_t [TbNumMst-1:0] oup_snoop_resp; + + SNOOP_BUS #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) inp_snoop [TbNumMst-1:0] (); + + SNOOP_BUS #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) oup_snoop [TbNumMst-1:0] (); + + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) inp_snoop_dv [TbNumMst-1:0](clk); + + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) oup_snoop_dv [TbNumMst-1:0](clk); + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_snoop + `SNOOP_ASSIGN(inp_snoop[i], inp_snoop_dv[i]) + `SNOOP_ASSIGN(oup_snoop_dv[i], oup_snoop[i]) + `SNOOP_ASSIGN_TO_REQ(inp_snoop_req[i], inp_snoop[i]) + `SNOOP_ASSIGN_FROM_RESP(inp_snoop[i], inp_snoop_resp[i]) + `SNOOP_ASSIGN_FROM_REQ(oup_snoop[i], oup_snoop_req[i]) + `SNOOP_ASSIGN_TO_RESP(oup_snoop_resp[i], oup_snoop[i]) + end + + snoop_rand_master_t snoop_rand_master [TbNumMst]; + for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_snoop_mst + initial begin + snoop_rand_master[i] = new( inp_snoop_dv[i] ); + snoop_rand_master[i].reset(); + @(posedge rst_n); + snoop_rand_master[i].run(1024); + end + end + + snoop_rand_slave_t snoop_rand_slave [TbNumMst]; + for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_snoop_slv + initial begin + snoop_rand_slave[i] = new( oup_snoop_dv[i] ); + snoop_rand_slave[i].reset(); + @(posedge rst_n); + snoop_rand_slave[i].run(); + end + end + + initial begin : rst_gen + rst_n = 1'b0; + + repeat (5) @(negedge clk); + + rst_n = 1'b1; + end + + initial begin : clk_gen + clk = 1'b0; + forever #(CyclTime/2) clk = !clk; + end + + logic [TbNumMst-1:0][TbNumMst-1:0] inp_sel; + + logic [TbNumMst-1:0] sel_done; + + initial begin + @(posedge rst_n); + cycle_start(); + while (sel_done != '1) begin + cycle_end(); + cycle_start(); + end + cycle_end(); + $finish; + end + + logic [TbNumMst-1:0] sel_done; + + initial begin + @(posedge rst_n); + cycle_start(); + while (sel_done != '1) begin + cycle_end(); + cycle_start(); + end + cycle_end(); + $finish; + end + + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_sel + + localparam int unsigned idx = i; + logic [TbNumMst-1:0] temp_inp_sel; + + initial begin + + sel_done[i] = 1'b0; + + @(posedge rst_n); + + + repeat (64) begin + // Randomize the temp variable with the constraint + std::randomize(temp_inp_sel) with { + temp_inp_sel != '0; + temp_inp_sel[idx] == 1'b0; + }; + // Assign the randomized value to inp_sel[i] + inp_sel[i] <= #(ApplTime) temp_inp_sel; + + cycle_start(); + while (!(inp_snoop_req[i].ac_valid && inp_snoop_resp[i].ac_ready)) begin + + cycle_end(); + cycle_start(); + end + cycle_end(); + + end + sel_done[i] = 1'b1; + end + + end + + logic lup_valid, lup_ready; + + ace_ccu_snoop_interconnect #( + .NumInp (TbNumMst), + .NumOup (TbNumMst), + .ConfCheck (1), + .NumLup (1), + .AddrBase (4), + .AddrLength (16), + .ac_chan_t (snoop_ac_t), + .cr_chan_t (snoop_cr_t), + .cd_chan_t (snoop_cd_t), + .snoop_req_t (snoop_req_t), + .snoop_resp_t (snoop_resp_t) + ) i_dut ( + .clk_i (clk), + .rst_ni (rst_n), + .inp_sel_i (inp_sel), + .inp_req_i (inp_snoop_req), + .inp_resp_o (inp_snoop_resp), + .oup_req_o (oup_snoop_req), + .oup_resp_i (oup_snoop_resp), + .lup_valid_o (lup_valid), + .lup_ready_i (lup_ready), + .lup_addr_o (), + .lup_valid_i (lup_valid), + .lup_ready_o (lup_ready), + .lup_clr_o () + ); + +endmodule diff --git a/test/tb_ace_ccu_top.sv b/test/tb_ace_ccu_top.sv index ea2c27d..f89a9cd 100644 --- a/test/tb_ace_ccu_top.sv +++ b/test/tb_ace_ccu_top.sv @@ -1,5 +1,4 @@ -// Copyright (c) 2019 ETH Zurich and University of Bologna. -// Copyright (c) 2022 PlanV GmbH +// Copyright (c) 2025 ETH Zurich, University of Bologna // // Copyright and related rights are licensed under the Solderpad Hardware // License, Version 0.51 (the "License"); you may not use this file except in @@ -9,528 +8,338 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -// -// Authors: -// - Florian Zaruba -// - Andreas Kurth -// Directed Random Verification Testbench for `axi_xbar`: The crossbar is instantiated with -// a number of random axi master and slave modules. Each random master executes a fixed number of -// writes and reads over the whole addess map. All masters simultaneously issue transactions -// through the crossbar, thereby saturating it. A monitor, which snoops the transactions of each -// master and slave port and models the crossbar with a network of FIFOs, checks whether each -// transaction follows the expected route. +// Directed random verification testbench for `ace_ccu_top`. `include "ace/typedef.svh" `include "ace/assign.svh" +`include "ace/domain.svh" module tb_ace_ccu_top #( - parameter bit TbEnAtop = 1'b1, // enable atomic operations (ATOPs) - parameter bit TbEnExcl = 1'b0, // enable exclusive accesses - parameter bit TbUniqueIds = 1'b0, // restrict to only unique IDs - parameter int unsigned TbNumMst = 32'd4, // how many AXI masters there are - parameter int unsigned TbNumSlv = 32'd1 // how many AXI slaves there are + /// Address space + parameter int unsigned AddrWidth = 0, + /// Memory bus data width + parameter int unsigned DataWidth = 0, + /// Cache word width + parameter int unsigned WordWidth = 0, + /// Words per cache line + parameter int unsigned CachelineWords = 0, + /// Cache ways + parameter int unsigned Ways = 0, + /// Cache sets + parameter int unsigned Sets = 0, + /// Number of cached masters + parameter int unsigned TbNumMst = 0, + /// Number of master groups (a group share the snooping FSM) + parameter int unsigned NoMstGroups = 1, + /// Directory for files + parameter string MemDir = "" ); - // Random master no Transactions - localparam int unsigned NoWrites = 80; // How many writes per master - localparam int unsigned NoReads = 80; // How many reads per master - // timing parameters - localparam time CyclTime = 10ns; - localparam time ApplTime = 2ns; - localparam time TestTime = 8ns; - - // axi configuration - localparam int unsigned AxiIdWidthMasters = 4; - localparam int unsigned AxiIdUsed = 3; // Has to be <= AxiIdWidthMasters - localparam int unsigned AxiIdWidthSlaves = AxiIdWidthMasters + $clog2(TbNumMst)+$clog2(TbNumMst+1); - localparam int unsigned AxiAddrWidth = 32; // Axi Address Width - localparam int unsigned AxiDataWidth = 64; // Axi Data Width - localparam int unsigned AxiStrbWidth = AxiDataWidth / 8; - localparam int unsigned AxiUserWidth = 5; - - // in the bench can change this variables which are set here freely - localparam ace_pkg::ccu_cfg_t ccu_cfg = '{ - NoSlvPorts: TbNumMst, - MaxMstTrans: 10, - MaxSlvTrans: 6, - FallThrough: 1'b1, - LatencyMode: ace_pkg::NO_LATENCY, - AxiIdWidthSlvPorts: AxiIdWidthMasters, - AxiIdUsedSlvPorts: AxiIdUsed, - UniqueIds: TbUniqueIds, - AxiAddrWidth: AxiAddrWidth, - AxiDataWidth: AxiDataWidth - }; - - - typedef logic [AxiIdWidthMasters-1:0] id_mst_t; - typedef logic [AxiIdWidthSlaves-1:0] id_slv_t; - typedef logic [AxiAddrWidth-1:0] addr_t; - typedef logic [AxiDataWidth-1:0] data_t; - typedef logic [AxiStrbWidth-1:0] strb_t; - typedef logic [AxiUserWidth-1:0] user_t; - - `ACE_TYPEDEF_AW_CHAN_T(aw_chan_mst_t, addr_t, id_mst_t, user_t) - `AXI_TYPEDEF_AW_CHAN_T(aw_chan_slv_t, addr_t, id_slv_t, user_t) - `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(b_chan_mst_t, id_mst_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(b_chan_slv_t, id_slv_t, user_t) - - `ACE_TYPEDEF_AR_CHAN_T(ar_chan_mst_t, addr_t, id_mst_t, user_t) - `AXI_TYPEDEF_AR_CHAN_T(ar_chan_slv_t, addr_t, id_slv_t, user_t) - `ACE_TYPEDEF_R_CHAN_T(r_chan_mst_t, data_t, id_mst_t, user_t) - `AXI_TYPEDEF_R_CHAN_T(r_chan_slv_t, data_t, id_slv_t, user_t) - - `ACE_TYPEDEF_REQ_T(mst_req_t, aw_chan_mst_t, w_chan_t, ar_chan_mst_t) - `ACE_TYPEDEF_RESP_T(mst_resp_t, b_chan_mst_t, r_chan_mst_t) - `AXI_TYPEDEF_REQ_T(slv_req_t, aw_chan_slv_t, w_chan_t, ar_chan_slv_t) - `AXI_TYPEDEF_RESP_T(slv_resp_t, b_chan_slv_t, r_chan_slv_t) - - `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) - `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) - `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) - `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) - `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) - - - typedef ace_test::ace_rand_master #( - // AXI interface parameters - .AW ( AxiAddrWidth ), - .DW ( AxiDataWidth ), - .IW ( AxiIdWidthMasters ), - .UW ( AxiUserWidth ), - // Stimuli application and test time - .TA ( ApplTime ), - .TT ( TestTime ), - // Maximum number of read and write transactions in flight - .MAX_READ_TXNS ( 20 ), - .MAX_WRITE_TXNS ( 20 ), - .AXI_EXCLS ( TbEnExcl ), - .AXI_ATOPS ( TbEnAtop ), - .UNIQUE_IDS ( TbUniqueIds ) - ) ace_rand_master_t; - typedef axi_test::axi_rand_slave #( - // AXI interface parameters - .AW ( AxiAddrWidth ), - .DW ( AxiDataWidth ), - .IW ( AxiIdWidthSlaves ), - .UW ( AxiUserWidth ), - // Stimuli application and test time - .TA ( ApplTime ), - .TT ( TestTime ) - ) axi_rand_slave_t; - - typedef snoop_test::snoop_rand_slave #( - // ADDR and Data interface parameters - .AW ( AxiAddrWidth ), - .DW ( AxiDataWidth ), - // Stimuli application and test time - .TA ( ApplTime), - .TT ( TestTime), - .RAND_RESP ( '0), - // Upper and lower bounds on wait cycles on Ax, W, and resp (R and B) channels - .AC_MIN_WAIT_CYCLES ( 2), - .AC_MAX_WAIT_CYCLES ( 15), - .CR_MIN_WAIT_CYCLES ( 2), - .CR_MAX_WAIT_CYCLES ( 15), - .CD_MIN_WAIT_CYCLES ( 2), - .CD_MAX_WAIT_CYCLES ( 15) - )snoop_rand_slave_t; - // ------------- - // DUT signals - // ------------- - logic clk; - // DUT signals - logic rst_n; - logic [TbNumMst-1:0] end_of_sim; - - // master structs - mst_req_t [TbNumMst-1:0] masters_req; - mst_resp_t [TbNumMst-1:0] masters_resp; - - // slave structs - slv_req_t [TbNumSlv-1:0] slaves_req; - slv_resp_t [TbNumSlv-1:0] slaves_resp; - - // snoop structs - snoop_req_t [TbNumMst-1:0] snoop_req; - snoop_resp_t [TbNumMst-1:0] snoop_resp; - - - // ------------------------------- - // AXI Interfaces - // ------------------------------- - ACE_BUS #( - .AXI_ADDR_WIDTH ( AxiAddrWidth ), - .AXI_DATA_WIDTH ( AxiDataWidth ), - .AXI_ID_WIDTH ( AxiIdWidthMasters ), - .AXI_USER_WIDTH ( AxiUserWidth ) - ) master [TbNumMst-1:0] (); - ACE_BUS_DV #( - .AXI_ADDR_WIDTH ( AxiAddrWidth ), - .AXI_DATA_WIDTH ( AxiDataWidth ), - .AXI_ID_WIDTH ( AxiIdWidthMasters ), - .AXI_USER_WIDTH ( AxiUserWidth ) - ) master_dv [TbNumMst-1:0] (clk); - ACE_BUS_DV #( - .AXI_ADDR_WIDTH ( AxiAddrWidth ), - .AXI_DATA_WIDTH ( AxiDataWidth ), - .AXI_ID_WIDTH ( AxiIdWidthMasters ), - .AXI_USER_WIDTH ( AxiUserWidth ) - ) master_monitor_dv [TbNumMst-1:0] (clk); - for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_masters - `ACE_ASSIGN (master[i], master_dv[i]) - `ACE_ASSIGN_TO_REQ(masters_req[i], master[i]) - `ACE_ASSIGN_TO_RESP(masters_resp[i], master[i]) - end - - AXI_BUS #( - .AXI_ADDR_WIDTH ( AxiAddrWidth ), - .AXI_DATA_WIDTH ( AxiDataWidth ), - .AXI_ID_WIDTH ( AxiIdWidthSlaves ), - .AXI_USER_WIDTH ( AxiUserWidth ) - ) slave [TbNumSlv-1:0] (); - AXI_BUS_DV #( - .AXI_ADDR_WIDTH ( AxiAddrWidth ), - .AXI_DATA_WIDTH ( AxiDataWidth ), - .AXI_ID_WIDTH ( AxiIdWidthSlaves ), - .AXI_USER_WIDTH ( AxiUserWidth ) - ) slave_dv [TbNumSlv-1:0](clk); - AXI_BUS_DV #( - .AXI_ADDR_WIDTH ( AxiAddrWidth ), - .AXI_DATA_WIDTH ( AxiDataWidth ), - .AXI_ID_WIDTH ( AxiIdWidthSlaves ), - .AXI_USER_WIDTH ( AxiUserWidth ) - ) slave_monitor_dv [TbNumSlv-1:0](clk); - for (genvar i = 0; i < TbNumSlv; i++) begin : gen_conn_dv_slaves - `AXI_ASSIGN(slave_dv[i], slave[i]) - `AXI_ASSIGN_TO_REQ(slaves_req[i], slave[i]) - `AXI_ASSIGN_TO_RESP(slaves_resp[i], slave[i]) - end - - SNOOP_BUS #( - .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), - .SNOOP_DATA_WIDTH ( AxiDataWidth ) - ) snoop [TbNumMst-1:0] (); - SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), - .SNOOP_DATA_WIDTH ( AxiDataWidth ) - ) snoop_dv [TbNumMst-1:0](clk); - SNOOP_BUS_DV #( - .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), - .SNOOP_DATA_WIDTH ( AxiDataWidth ) - ) snoop_monitor_dv [TbNumMst-1:0](clk); - for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_snoop - `SNOOP_ASSIGN(snoop_dv[i], snoop[i]) - `SNOOP_ASSIGN_TO_REQ(snoop_req[i], snoop[i]) - `SNOOP_ASSIGN_TO_RESP(snoop_resp[i], snoop[i]) - end - - // ------------------------------- - // AXI and SNOOP Rand Masters and Slaves - // ------------------------------- - // Masters control simulation run time - ace_rand_master_t ace_rand_master [TbNumMst]; - for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_master + + // timing parameters + localparam time CyclTime = 10ns; + localparam time ApplTime = 2ns; + localparam time TestTime = 8ns; + + localparam CachelineBits = CachelineWords * WordWidth; + + // How many cached masters per group + localparam MstPerGroup = TbNumMst / NoMstGroups; + localparam NoGroups = NoMstGroups; + + // axi configuration + localparam int unsigned AxiIdWidthMasters = 4; + localparam int unsigned AxiIdUsed = 3; + localparam int unsigned AxiAddrWidth = AddrWidth; + localparam int unsigned AxiDataWidth = DataWidth; + localparam int unsigned AxiStrbWidth = AxiDataWidth / 8; + localparam int unsigned AxiUserWidth = 5; + localparam int unsigned WriteBackLen = CachelineWords - 1; + localparam int unsigned WriteBackSize = $clog2(DataWidth / 8); + + localparam ace_ccu_pkg::ace_ccu_user_cfg_t CcuUserCfg = '{ + SlvPorts : TbNumMst, + MaxTransactions : 8, + ShareableWFifoDepth : 4, + ReplayEn : 0, + NLineWidth : AxiAddrWidth - $clog2(CachelineBits / 8), + AxiUniqueIds : 0, + AxiIdLookupBits : 3, + AxiAddrWidth : AxiAddrWidth, + AxiDataWidth : AxiDataWidth, + AxiUserWidth : AxiUserWidth, + AxiSlvIdWidth : AxiIdWidthMasters, + CachelineWidth : CachelineBits, + CutSlvReq : 1, + CutSlvResp : 1, + CutMstReq : 1, + CutMstResp : 1, + CutSnoopReq : 1, + CutSnoopResp : 1 + }; + + localparam ace_ccu_pkg::ace_ccu_cfg_t CcuCfg = ace_ccu_pkg::ace_ccu_build_cfg(CcuUserCfg); + localparam int unsigned AxiIdWidthSlave = CcuCfg.AxiMstIdWidth; + + typedef logic [AxiIdWidthMasters-1:0] id_t; + typedef logic [AxiIdWidthSlave-1:0] id_slv_t; + typedef logic [AxiAddrWidth-1:0] addr_t; + typedef logic [AxiDataWidth-1:0] data_t; + typedef logic [AxiStrbWidth-1:0] strb_t; + typedef logic [AxiUserWidth-1:0] user_t; + + `ACE_TYPEDEF_AW_CHAN_T(ace_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T (ace_w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T (ace_b_chan_t, id_t, user_t ) + `ACE_TYPEDEF_AR_CHAN_T(ace_ar_chan_t, addr_t, id_t, user_t ) + `ACE_TYPEDEF_R_CHAN_T (ace_r_chan_t, data_t, id_t, user_t ) + `ACE_TYPEDEF_REQ_T (ace_req_t, ace_aw_chan_t, ace_w_chan_t, ace_ar_chan_t) + `ACE_TYPEDEF_RESP_T (ace_resp_t, ace_b_chan_t, ace_r_chan_t) + + `AXI_TYPEDEF_AW_CHAN_T(axi_aw_chan_t, addr_t, id_slv_t, user_t) + `AXI_TYPEDEF_W_CHAN_T (axi_w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T (axi_b_chan_t, id_slv_t, user_t ) + `AXI_TYPEDEF_AR_CHAN_T(axi_ar_chan_t, addr_t, id_slv_t, user_t) + `AXI_TYPEDEF_R_CHAN_T (axi_r_chan_t, data_t, id_slv_t, user_t) + `AXI_TYPEDEF_REQ_T (axi_req_t, axi_aw_chan_t, axi_w_chan_t, axi_ar_chan_t) + `AXI_TYPEDEF_RESP_T (axi_resp_t, axi_b_chan_t, axi_r_chan_t) + + `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) + `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) + `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) + `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) + `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) + + logic clk, rst_n; + logic [TbNumMst-1:0] end_of_sim = '0; + + // Defines domain_mask_t and domain_rule_t + `DOMAIN_TYPEDEF_ALL(TbNumMst, mst_bv_t, domain_rule_t) + + domain_rule_t [TbNumMst-1:0] domain_rule; initial begin - ace_rand_master[i] = new( master_dv[i] ); - end_of_sim[i] <= 1'b0; - ace_rand_master[i].add_memory_region(32'h0000_0000, 32'h0000_3000, - axi_pkg::DEVICE_NONBUFFERABLE); - ace_rand_master[i].reset(); - @(posedge rst_n); - ace_rand_master[i].run(NoReads, NoWrites); - end_of_sim[i] <= 1'b1; + for (int i = 0; i < TbNumMst; i++) begin + domain_rule[i].initiator = 1 << i; + domain_rule[i].inner = ~(1 << i); + domain_rule[i].outer = ~(1 << i); + end end - end - snoop_rand_slave_t snoop_rand_slave [TbNumMst]; - for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_snoop - initial begin - snoop_rand_slave[i] = new( snoop_dv[i] ); - snoop_rand_slave[i].reset(); - @(posedge rst_n); - snoop_rand_slave[i].run(); + + // Cache data memory initial state + string data_mem_file_template = {MemDir, "/data_mem_%0d.mem"}; + // Cache tag memory initial state + string tag_mem_file_template = {MemDir, "/tag_mem_%0d.mem"}; + // Cache line status initial state + string status_file_template = {MemDir, "/state_%0d.mem"}; + // Cache transactions + string txn_file_template = {MemDir, "/txns_%0d.txt"}; + // Initial main memory state + string init_main_mem = {MemDir, "/main_mem.mem"}; + // Logged cache state changes + string diff_file_template = {MemDir, "/cache_diff_%0d.txt"}; + string diff_main_mem = {MemDir, "/main_mem_diff.txt"}; + + ACE_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMasters ), + .AXI_USER_WIDTH ( AxiIdWidthMasters ) + ) ace_dv_intf [TbNumMst-1:0] (clk); + + ACE_BUS #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMasters ), + .AXI_USER_WIDTH ( AxiIdWidthMasters ) + ) ace_intf [TbNumMst-1:0](); + + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) snoop_dv_intf [TbNumMst-1:0](clk); + + SNOOP_BUS #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) snoop_intf [TbNumMst-1:0](); + + AXI_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlave ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) axi_dv_intf (clk); + + AXI_BUS #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlave ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) axi_intf(); + + MONITOR_BUS_DV #( + .ADDR_WIDTH (AxiAddrWidth), + .DATA_WIDTH ( AxiDataWidth), + .ID_WIDTH ( AxiIdWidthSlave), + .USER_WIDTH (AxiUserWidth) + ) sim_mem_mon_intf (clk); + + // Interface with clock for generating delays + CLK_IF clk_if (clk); + + typedef virtual ACE_BUS_DV #( + .AXI_ADDR_WIDTH (AxiAddrWidth), + .AXI_DATA_WIDTH (AxiDataWidth), + .AXI_ID_WIDTH (AxiIdWidthMasters), + .AXI_USER_WIDTH (AxiIdWidthMasters) + ) ace_bus_v_t; + + typedef virtual SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH (AxiAddrWidth), + .SNOOP_DATA_WIDTH (AxiDataWidth) + ) snoop_bus_v_t; + + typedef virtual CLK_IF clk_if_v_t; + + typedef virtual MONITOR_BUS_DV #( + .ADDR_WIDTH (AxiAddrWidth), + .DATA_WIDTH ( AxiDataWidth), + .ID_WIDTH ( AxiIdWidthSlave), + .USER_WIDTH (AxiUserWidth) + ) mon_bus_t; + + + // Clock generator + clk_rst_gen #( + .ClkPeriod ( CyclTime ), + .RstClkCycles ( 5 ) + ) i_clk_gen ( + .clk_o (clk), + .rst_no (rst_n) + ); + + cache_test_pkg::mem_logger #( + .AW(AxiAddrWidth), + .DW(AxiDataWidth), + .IW(AxiIdWidthSlave), + .UW(AxiUserWidth), + .TA(ApplTime), + .TT(TestTime), + .mon_bus_t(mon_bus_t) + ) axi_mem_logger; + + cache_test_pkg::cache_top_agent #( + .AW (AxiAddrWidth), + .DW (AxiDataWidth), + .AC_AW (AxiAddrWidth), + .CD_DW (AxiDataWidth), + .IW (AxiIdWidthMasters), + .UW (AxiUserWidth), + .TA (ApplTime), + .TT (TestTime), + .CACHELINE_WORDS (CachelineWords), + .WORD_WIDTH (WordWidth), + .WAYS (Ways), + .SETS (Sets), + .ace_bus_t (ace_bus_v_t), + .snoop_bus_t (snoop_bus_v_t), + .clk_if_t (clk_if_v_t) + ) ace_master [TbNumMst-1:0]; + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_cache_agents + `ACE_ASSIGN(ace_intf[i], ace_dv_intf[i]); + end + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_snoop + `SNOOP_ASSIGN(snoop_dv_intf[i], snoop_intf[i]) end - end + for (genvar i = 0; i < TbNumMst; i++) begin : init_cache_agents + initial begin + string data_mem_file, tag_mem_file, status_file, txn_file; + string diff_file; + $sformat(data_mem_file, data_mem_file_template, i); + $sformat(tag_mem_file, tag_mem_file_template, i); + $sformat(status_file, status_file_template, i); + $sformat(txn_file, txn_file_template, i); + $sformat(diff_file, diff_file_template, i); + ace_master[i] = new( + ace_dv_intf[i], + snoop_dv_intf[i], + clk_if, + data_mem_file, + tag_mem_file, + status_file, + txn_file, + diff_file, + i + ); + ace_master[i].reset(); + @(posedge rst_n); + ace_master[i].run(); + @(posedge clk); + @(posedge clk); + @(posedge clk); + end_of_sim[i] = '1; + end + end + + always @(*) begin + if (&end_of_sim) $finish(); + end - axi_rand_slave_t axi_rand_slave [1]; - for (genvar i = 0; i < TbNumSlv; i++) begin : gen_rand_slave initial begin - axi_rand_slave[i] = new( slave_dv[i] ); - axi_rand_slave[i].reset(); - @(posedge rst_n); - axi_rand_slave[i].run(); + axi_mem_logger = new( + sim_mem_mon_intf, + diff_main_mem + ); + @(posedge rst_n); + axi_mem_logger.run(); end - end - - - - - initial begin : proc_monitor - static tb_ace_ccu_pkg::ace_ccu_monitor #( - .AxiAddrWidth ( AxiAddrWidth ), - .AxiDataWidth ( AxiDataWidth ), - .AxiIdWidthMasters ( AxiIdWidthMasters ), - .AxiIdWidthSlaves ( AxiIdWidthSlaves ), - .AxiUserWidth ( AxiUserWidth ), - .NoMasters ( TbNumMst ), - .NoSlaves ( TbNumSlv ), - .TimeTest ( TestTime ) - ) monitor = new( master_monitor_dv, slave_monitor_dv, snoop_monitor_dv ); - fork - monitor.run(); - do begin - #TestTime; - if(end_of_sim == '1) begin - monitor.print_result(); - $stop(); - end - @(posedge clk); - end while (1'b1); - join - end - - //----------------------------------- - // Clock generator - //----------------------------------- - clk_rst_gen #( - .ClkPeriod ( CyclTime ), - .RstClkCycles ( 5 ) - ) i_clk_gen ( - .clk_o (clk), - .rst_no(rst_n) - ); - - //----------------------------------- - // DUT - //----------------------------------- - ace_ccu_top_intf #( - .AXI_USER_WIDTH ( AxiUserWidth ), - .Cfg ( ccu_cfg ) - ) i_ccu_dut ( - .clk_i ( clk ), - .rst_ni ( rst_n ), - .test_i ( 1'b0 ), - .snoop_ports ( snoop ), - .slv_ports ( master ), - .mst_ports ( slave[0] ) - ); - - // logger for master modules - for (genvar i = 0; i < TbNumMst; i++) begin : gen_master_logger - ace_chan_logger #( - .TestTime ( TestTime ), // Time after clock, where sampling happens - .LoggerName( $sformatf("axi_logger_master_%0d", i)), - .aw_chan_t ( aw_chan_mst_t ), // axi AW type - .w_chan_t ( w_chan_t ), // axi W type - .b_chan_t ( b_chan_mst_t ), // axi B type - .ar_chan_t ( ar_chan_mst_t ), // axi AR type - .r_chan_t ( r_chan_mst_t ) // axi R type - ) i_mst_channel_logger ( - .clk_i ( clk ), // Clock - .rst_ni ( rst_n ), // Asynchronous reset active low, when `1'b0` no sampling - .end_sim_i ( &end_of_sim ), - // AW channel - .aw_chan_i ( masters_req[i].aw ), - .aw_valid_i ( masters_req[i].aw_valid ), - .aw_ready_i ( masters_resp[i].aw_ready ), - // W channel - .w_chan_i ( masters_req[i].w ), - .w_valid_i ( masters_req[i].w_valid ), - .w_ready_i ( masters_resp[i].w_ready ), - // B channel - .b_chan_i ( masters_resp[i].b ), - .b_valid_i ( masters_resp[i].b_valid ), - .b_ready_i ( masters_req[i].b_ready ), - // AR channel - .ar_chan_i ( masters_req[i].ar ), - .ar_valid_i ( masters_req[i].ar_valid ), - .ar_ready_i ( masters_resp[i].ar_ready ), - // R channel - .r_chan_i ( masters_resp[i].r ), - .r_valid_i ( masters_resp[i].r_valid ), - .r_ready_i ( masters_req[i].r_ready ) - ); - end - // logger for slave modules - for (genvar i = 0; i < 1; i++) begin : gen_slave_logger - axi_chan_logger #( - .TestTime ( TestTime ), // Time after clock, where sampling happens - .LoggerName( $sformatf("axi_logger_slave_%0d",i)), - .aw_chan_t ( aw_chan_slv_t ), // axi AW type - .w_chan_t ( w_chan_t ), // axi W type - .b_chan_t ( b_chan_slv_t ), // axi B type - .ar_chan_t ( ar_chan_slv_t ), // axi AR type - .r_chan_t ( r_chan_slv_t ) // axi R type - ) i_slv_channel_logger ( - .clk_i ( clk ), // Clock - .rst_ni ( rst_n ), // Asynchronous reset active low, when `1'b0` no sampling - .end_sim_i ( &end_of_sim ), - // AW channel - .aw_chan_i ( slaves_req[i].aw ), - .aw_valid_i ( slaves_req[i].aw_valid ), - .aw_ready_i ( slaves_resp[i].aw_ready ), - // W channel - .w_chan_i ( slaves_req[i].w ), - .w_valid_i ( slaves_req[i].w_valid ), - .w_ready_i ( slaves_resp[i].w_ready ), - // B channel - .b_chan_i ( slaves_resp[i].b ), - .b_valid_i ( slaves_resp[i].b_valid ), - .b_ready_i ( slaves_req[i].b_ready ), - // AR channel - .ar_chan_i ( slaves_req[i].ar ), - .ar_valid_i ( slaves_req[i].ar_valid ), - .ar_ready_i ( slaves_resp[i].ar_ready ), - // R channel - .r_chan_i ( slaves_resp[i].r ), - .r_valid_i ( slaves_resp[i].r_valid ), - .r_ready_i ( slaves_req[i].r_ready ) + + + // AXI Simulation Memory + axi_sim_mem_intf #( + // AXI interface parameters + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlave ), + .AXI_USER_WIDTH ( AxiUserWidth ), + .APPL_DELAY ( ApplTime ), + .ACQ_DELAY ( TestTime ) + ) axi_mem ( + .clk_i(clk), + .rst_ni(rst_n), + .axi_slv(axi_intf), + .mon_w_valid_o(sim_mem_mon_intf.w_valid), + .mon_w_addr_o(sim_mem_mon_intf.w_addr), + .mon_w_data_o(sim_mem_mon_intf.w_data), + .mon_w_id_o(sim_mem_mon_intf.w_id), + .mon_w_user_o(sim_mem_mon_intf.w_user), + .mon_w_beat_count_o(sim_mem_mon_intf.w_beat_count), + .mon_w_last_o(sim_mem_mon_intf.w_last), + .mon_r_valid_o(sim_mem_mon_intf.r_valid), + .mon_r_addr_o(sim_mem_mon_intf.r_addr), + .mon_r_data_o(sim_mem_mon_intf.r_data), + .mon_r_id_o(sim_mem_mon_intf.r_id), + .mon_r_user_o(sim_mem_mon_intf.r_user), + .mon_r_beat_count_o(sim_mem_mon_intf.r_beat_count), + .mon_r_last_o(sim_mem_mon_intf.r_last) ); - end - -// logger for snoop modules - for (genvar i = 0; i < TbNumMst; i++) begin : gen_snoop_logger - snoop_chan_logger #( - .TestTime ( TestTime ), // Time after clock, where sampling happens - .LoggerName( $sformatf("axi_logger_snoop_%0d",i)), - .ac_chan_t ( snoop_ac_t ), // AW type - .cr_chan_t ( snoop_cr_t ), // CR type - .cd_chan_t ( snoop_cd_t ) // CD type - ) i_snoop_channel_logger ( - .clk_i ( clk ), // Clock - .rst_ni ( rst_n ), // Asynchronous reset active low, when `1'b0` no sampling - .end_sim_i ( &end_of_sim ), - // AC channel - .ac_chan_i ( snoop_req[i].ac ), - .ac_valid_i ( snoop_req[i].ac_valid ), - .ac_ready_i ( snoop_resp[i].ac_ready ), - // CR channel - .cr_chan_i ( snoop_resp[i].cr_resp ), - .cr_valid_i ( snoop_resp[i].cr_valid), - .cr_ready_i ( snoop_req[i].cr_ready ), - // CR channel - .cd_chan_i ( snoop_resp[i].cd ), - .cd_valid_i ( snoop_resp[i].cd_valid), - .cd_ready_i ( snoop_req[i].cd_ready ) + + initial begin + $readmemh(init_main_mem, axi_mem.i_sim_mem.mem); + end + + ace_ccu_top_intf #( + .CCU_CFG (CcuCfg) + ) ccu ( + .clk_i (clk), + .rst_ni (rst_n), + .domain_rule_i (domain_rule), + .slv (ace_intf), + .snoop (snoop_intf), + .mst (axi_intf) ); - end - - for (genvar i = 0; i < TbNumMst; i++) begin : gen_connect_master_monitor - assign master_monitor_dv[i].aw_id = master[i].aw_id ; - assign master_monitor_dv[i].aw_addr = master[i].aw_addr ; - assign master_monitor_dv[i].aw_len = master[i].aw_len ; - assign master_monitor_dv[i].aw_size = master[i].aw_size ; - assign master_monitor_dv[i].aw_burst = master[i].aw_burst ; - assign master_monitor_dv[i].aw_lock = master[i].aw_lock ; - assign master_monitor_dv[i].aw_cache = master[i].aw_cache ; - assign master_monitor_dv[i].aw_prot = master[i].aw_prot ; - assign master_monitor_dv[i].aw_qos = master[i].aw_qos ; - assign master_monitor_dv[i].aw_region = master[i].aw_region; - assign master_monitor_dv[i].aw_atop = master[i].aw_atop ; - assign master_monitor_dv[i].aw_user = master[i].aw_user ; - assign master_monitor_dv[i].aw_valid = master[i].aw_valid ; - assign master_monitor_dv[i].aw_ready = master[i].aw_ready ; - assign master_monitor_dv[i].aw_snoop = master[i].aw_snoop; - assign master_monitor_dv[i].aw_bar = master[i].aw_bar ; - assign master_monitor_dv[i].aw_domain = master[i].aw_domain ; - assign master_monitor_dv[i].aw_awunique = master[i].aw_awunique ; - assign master_monitor_dv[i].w_data = master[i].w_data ; - assign master_monitor_dv[i].w_strb = master[i].w_strb ; - assign master_monitor_dv[i].w_last = master[i].w_last ; - assign master_monitor_dv[i].w_user = master[i].w_user ; - assign master_monitor_dv[i].w_valid = master[i].w_valid ; - assign master_monitor_dv[i].w_ready = master[i].w_ready ; - assign master_monitor_dv[i].b_id = master[i].b_id ; - assign master_monitor_dv[i].b_resp = master[i].b_resp ; - assign master_monitor_dv[i].b_user = master[i].b_user ; - assign master_monitor_dv[i].b_valid = master[i].b_valid ; - assign master_monitor_dv[i].b_ready = master[i].b_ready ; - assign master_monitor_dv[i].ar_id = master[i].ar_id ; - assign master_monitor_dv[i].ar_addr = master[i].ar_addr ; - assign master_monitor_dv[i].ar_len = master[i].ar_len ; - assign master_monitor_dv[i].ar_size = master[i].ar_size ; - assign master_monitor_dv[i].ar_burst = master[i].ar_burst ; - assign master_monitor_dv[i].ar_lock = master[i].ar_lock ; - assign master_monitor_dv[i].ar_cache = master[i].ar_cache ; - assign master_monitor_dv[i].ar_prot = master[i].ar_prot ; - assign master_monitor_dv[i].ar_qos = master[i].ar_qos ; - assign master_monitor_dv[i].ar_region = master[i].ar_region; - assign master_monitor_dv[i].ar_user = master[i].ar_user ; - assign master_monitor_dv[i].ar_valid = master[i].ar_valid ; - assign master_monitor_dv[i].ar_ready = master[i].ar_ready ; - assign master_monitor_dv[i].ar_snoop = master[i].ar_snoop ; - assign master_monitor_dv[i].ar_bar = master[i].ar_bar ; - assign master_monitor_dv[i].ar_domain = master[i].ar_domain ; - assign master_monitor_dv[i].r_id = master[i].r_id ; - assign master_monitor_dv[i].r_data = master[i].r_data ; - assign master_monitor_dv[i].r_resp = master[i].r_resp ; - assign master_monitor_dv[i].r_last = master[i].r_last ; - assign master_monitor_dv[i].r_user = master[i].r_user ; - assign master_monitor_dv[i].r_valid = master[i].r_valid ; - assign master_monitor_dv[i].r_ready = master[i].r_ready ; - end - for (genvar i = 0; i < TbNumSlv; i++) begin : gen_connect_slave_monitor - assign slave_monitor_dv[i].aw_id = slave[i].aw_id ; - assign slave_monitor_dv[i].aw_addr = slave[i].aw_addr ; - assign slave_monitor_dv[i].aw_len = slave[i].aw_len ; - assign slave_monitor_dv[i].aw_size = slave[i].aw_size ; - assign slave_monitor_dv[i].aw_burst = slave[i].aw_burst ; - assign slave_monitor_dv[i].aw_lock = slave[i].aw_lock ; - assign slave_monitor_dv[i].aw_cache = slave[i].aw_cache ; - assign slave_monitor_dv[i].aw_prot = slave[i].aw_prot ; - assign slave_monitor_dv[i].aw_qos = slave[i].aw_qos ; - assign slave_monitor_dv[i].aw_region = slave[i].aw_region; - assign slave_monitor_dv[i].aw_atop = slave[i].aw_atop ; - assign slave_monitor_dv[i].aw_user = slave[i].aw_user ; - assign slave_monitor_dv[i].aw_valid = slave[i].aw_valid ; - assign slave_monitor_dv[i].aw_ready = slave[i].aw_ready ; - assign slave_monitor_dv[i].w_data = slave[i].w_data ; - assign slave_monitor_dv[i].w_strb = slave[i].w_strb ; - assign slave_monitor_dv[i].w_last = slave[i].w_last ; - assign slave_monitor_dv[i].w_user = slave[i].w_user ; - assign slave_monitor_dv[i].w_valid = slave[i].w_valid ; - assign slave_monitor_dv[i].w_ready = slave[i].w_ready ; - assign slave_monitor_dv[i].b_id = slave[i].b_id ; - assign slave_monitor_dv[i].b_resp = slave[i].b_resp ; - assign slave_monitor_dv[i].b_user = slave[i].b_user ; - assign slave_monitor_dv[i].b_valid = slave[i].b_valid ; - assign slave_monitor_dv[i].b_ready = slave[i].b_ready ; - assign slave_monitor_dv[i].ar_id = slave[i].ar_id ; - assign slave_monitor_dv[i].ar_addr = slave[i].ar_addr ; - assign slave_monitor_dv[i].ar_len = slave[i].ar_len ; - assign slave_monitor_dv[i].ar_size = slave[i].ar_size ; - assign slave_monitor_dv[i].ar_burst = slave[i].ar_burst ; - assign slave_monitor_dv[i].ar_lock = slave[i].ar_lock ; - assign slave_monitor_dv[i].ar_cache = slave[i].ar_cache ; - assign slave_monitor_dv[i].ar_prot = slave[i].ar_prot ; - assign slave_monitor_dv[i].ar_qos = slave[i].ar_qos ; - assign slave_monitor_dv[i].ar_region = slave[i].ar_region; - assign slave_monitor_dv[i].ar_user = slave[i].ar_user ; - assign slave_monitor_dv[i].ar_valid = slave[i].ar_valid ; - assign slave_monitor_dv[i].ar_ready = slave[i].ar_ready ; - assign slave_monitor_dv[i].r_id = slave[i].r_id ; - assign slave_monitor_dv[i].r_data = slave[i].r_data ; - assign slave_monitor_dv[i].r_resp = slave[i].r_resp ; - assign slave_monitor_dv[i].r_last = slave[i].r_last ; - assign slave_monitor_dv[i].r_user = slave[i].r_user ; - assign slave_monitor_dv[i].r_valid = slave[i].r_valid ; - assign slave_monitor_dv[i].r_ready = slave[i].r_ready ; - end - for (genvar i = 0; i < TbNumMst; i++) begin : gen_connect_snoop_monitor - assign snoop_monitor_dv[i].ac_valid = snoop[i].ac_valid; - assign snoop_monitor_dv[i].ac_ready = snoop[i].ac_ready; - assign snoop_monitor_dv[i].ac_snoop = snoop[i].ac_snoop; - assign snoop_monitor_dv[i].ac_addr = snoop[i].ac_addr; - assign snoop_monitor_dv[i].ac_prot = snoop[i].ac_prot; - assign snoop_monitor_dv[i].cr_valid = snoop[i].cr_valid; - assign snoop_monitor_dv[i].cr_ready = snoop[i].cr_ready; - assign snoop_monitor_dv[i].cr_resp = snoop[i].cr_resp; - assign snoop_monitor_dv[i].cd_valid = snoop[i].cd_valid; - assign snoop_monitor_dv[i].cd_ready = snoop[i].cd_ready; - assign snoop_monitor_dv[i].cd_data = snoop[i].cd_data; - assign snoop_monitor_dv[i].cd_last = snoop[i].cd_last; - end -endmodule \ No newline at end of file +endmodule diff --git a/test/tb_ace_ccu_top_old.sv b/test/tb_ace_ccu_top_old.sv new file mode 100644 index 0000000..ea2c27d --- /dev/null +++ b/test/tb_ace_ccu_top_old.sv @@ -0,0 +1,536 @@ +// Copyright (c) 2019 ETH Zurich and University of Bologna. +// Copyright (c) 2022 PlanV GmbH +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Florian Zaruba +// - Andreas Kurth + +// Directed Random Verification Testbench for `axi_xbar`: The crossbar is instantiated with +// a number of random axi master and slave modules. Each random master executes a fixed number of +// writes and reads over the whole addess map. All masters simultaneously issue transactions +// through the crossbar, thereby saturating it. A monitor, which snoops the transactions of each +// master and slave port and models the crossbar with a network of FIFOs, checks whether each +// transaction follows the expected route. + +`include "ace/typedef.svh" +`include "ace/assign.svh" + +module tb_ace_ccu_top #( + parameter bit TbEnAtop = 1'b1, // enable atomic operations (ATOPs) + parameter bit TbEnExcl = 1'b0, // enable exclusive accesses + parameter bit TbUniqueIds = 1'b0, // restrict to only unique IDs + parameter int unsigned TbNumMst = 32'd4, // how many AXI masters there are + parameter int unsigned TbNumSlv = 32'd1 // how many AXI slaves there are +); + // Random master no Transactions + localparam int unsigned NoWrites = 80; // How many writes per master + localparam int unsigned NoReads = 80; // How many reads per master + // timing parameters + localparam time CyclTime = 10ns; + localparam time ApplTime = 2ns; + localparam time TestTime = 8ns; + + // axi configuration + localparam int unsigned AxiIdWidthMasters = 4; + localparam int unsigned AxiIdUsed = 3; // Has to be <= AxiIdWidthMasters + localparam int unsigned AxiIdWidthSlaves = AxiIdWidthMasters + $clog2(TbNumMst)+$clog2(TbNumMst+1); + localparam int unsigned AxiAddrWidth = 32; // Axi Address Width + localparam int unsigned AxiDataWidth = 64; // Axi Data Width + localparam int unsigned AxiStrbWidth = AxiDataWidth / 8; + localparam int unsigned AxiUserWidth = 5; + + // in the bench can change this variables which are set here freely + localparam ace_pkg::ccu_cfg_t ccu_cfg = '{ + NoSlvPorts: TbNumMst, + MaxMstTrans: 10, + MaxSlvTrans: 6, + FallThrough: 1'b1, + LatencyMode: ace_pkg::NO_LATENCY, + AxiIdWidthSlvPorts: AxiIdWidthMasters, + AxiIdUsedSlvPorts: AxiIdUsed, + UniqueIds: TbUniqueIds, + AxiAddrWidth: AxiAddrWidth, + AxiDataWidth: AxiDataWidth + }; + + + typedef logic [AxiIdWidthMasters-1:0] id_mst_t; + typedef logic [AxiIdWidthSlaves-1:0] id_slv_t; + typedef logic [AxiAddrWidth-1:0] addr_t; + typedef logic [AxiDataWidth-1:0] data_t; + typedef logic [AxiStrbWidth-1:0] strb_t; + typedef logic [AxiUserWidth-1:0] user_t; + + `ACE_TYPEDEF_AW_CHAN_T(aw_chan_mst_t, addr_t, id_mst_t, user_t) + `AXI_TYPEDEF_AW_CHAN_T(aw_chan_slv_t, addr_t, id_slv_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_mst_t, id_mst_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_slv_t, id_slv_t, user_t) + + `ACE_TYPEDEF_AR_CHAN_T(ar_chan_mst_t, addr_t, id_mst_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(ar_chan_slv_t, addr_t, id_slv_t, user_t) + `ACE_TYPEDEF_R_CHAN_T(r_chan_mst_t, data_t, id_mst_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(r_chan_slv_t, data_t, id_slv_t, user_t) + + `ACE_TYPEDEF_REQ_T(mst_req_t, aw_chan_mst_t, w_chan_t, ar_chan_mst_t) + `ACE_TYPEDEF_RESP_T(mst_resp_t, b_chan_mst_t, r_chan_mst_t) + `AXI_TYPEDEF_REQ_T(slv_req_t, aw_chan_slv_t, w_chan_t, ar_chan_slv_t) + `AXI_TYPEDEF_RESP_T(slv_resp_t, b_chan_slv_t, r_chan_slv_t) + + `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) + `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) + `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) + `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) + `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) + + + typedef ace_test::ace_rand_master #( + // AXI interface parameters + .AW ( AxiAddrWidth ), + .DW ( AxiDataWidth ), + .IW ( AxiIdWidthMasters ), + .UW ( AxiUserWidth ), + // Stimuli application and test time + .TA ( ApplTime ), + .TT ( TestTime ), + // Maximum number of read and write transactions in flight + .MAX_READ_TXNS ( 20 ), + .MAX_WRITE_TXNS ( 20 ), + .AXI_EXCLS ( TbEnExcl ), + .AXI_ATOPS ( TbEnAtop ), + .UNIQUE_IDS ( TbUniqueIds ) + ) ace_rand_master_t; + typedef axi_test::axi_rand_slave #( + // AXI interface parameters + .AW ( AxiAddrWidth ), + .DW ( AxiDataWidth ), + .IW ( AxiIdWidthSlaves ), + .UW ( AxiUserWidth ), + // Stimuli application and test time + .TA ( ApplTime ), + .TT ( TestTime ) + ) axi_rand_slave_t; + + typedef snoop_test::snoop_rand_slave #( + // ADDR and Data interface parameters + .AW ( AxiAddrWidth ), + .DW ( AxiDataWidth ), + // Stimuli application and test time + .TA ( ApplTime), + .TT ( TestTime), + .RAND_RESP ( '0), + // Upper and lower bounds on wait cycles on Ax, W, and resp (R and B) channels + .AC_MIN_WAIT_CYCLES ( 2), + .AC_MAX_WAIT_CYCLES ( 15), + .CR_MIN_WAIT_CYCLES ( 2), + .CR_MAX_WAIT_CYCLES ( 15), + .CD_MIN_WAIT_CYCLES ( 2), + .CD_MAX_WAIT_CYCLES ( 15) + )snoop_rand_slave_t; + // ------------- + // DUT signals + // ------------- + logic clk; + // DUT signals + logic rst_n; + logic [TbNumMst-1:0] end_of_sim; + + // master structs + mst_req_t [TbNumMst-1:0] masters_req; + mst_resp_t [TbNumMst-1:0] masters_resp; + + // slave structs + slv_req_t [TbNumSlv-1:0] slaves_req; + slv_resp_t [TbNumSlv-1:0] slaves_resp; + + // snoop structs + snoop_req_t [TbNumMst-1:0] snoop_req; + snoop_resp_t [TbNumMst-1:0] snoop_resp; + + + // ------------------------------- + // AXI Interfaces + // ------------------------------- + ACE_BUS #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMasters ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) master [TbNumMst-1:0] (); + ACE_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMasters ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) master_dv [TbNumMst-1:0] (clk); + ACE_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMasters ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) master_monitor_dv [TbNumMst-1:0] (clk); + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_masters + `ACE_ASSIGN (master[i], master_dv[i]) + `ACE_ASSIGN_TO_REQ(masters_req[i], master[i]) + `ACE_ASSIGN_TO_RESP(masters_resp[i], master[i]) + end + + AXI_BUS #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) slave [TbNumSlv-1:0] (); + AXI_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) slave_dv [TbNumSlv-1:0](clk); + AXI_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) slave_monitor_dv [TbNumSlv-1:0](clk); + for (genvar i = 0; i < TbNumSlv; i++) begin : gen_conn_dv_slaves + `AXI_ASSIGN(slave_dv[i], slave[i]) + `AXI_ASSIGN_TO_REQ(slaves_req[i], slave[i]) + `AXI_ASSIGN_TO_RESP(slaves_resp[i], slave[i]) + end + + SNOOP_BUS #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) snoop [TbNumMst-1:0] (); + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) snoop_dv [TbNumMst-1:0](clk); + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) snoop_monitor_dv [TbNumMst-1:0](clk); + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_snoop + `SNOOP_ASSIGN(snoop_dv[i], snoop[i]) + `SNOOP_ASSIGN_TO_REQ(snoop_req[i], snoop[i]) + `SNOOP_ASSIGN_TO_RESP(snoop_resp[i], snoop[i]) + end + + // ------------------------------- + // AXI and SNOOP Rand Masters and Slaves + // ------------------------------- + // Masters control simulation run time + ace_rand_master_t ace_rand_master [TbNumMst]; + for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_master + initial begin + ace_rand_master[i] = new( master_dv[i] ); + end_of_sim[i] <= 1'b0; + ace_rand_master[i].add_memory_region(32'h0000_0000, 32'h0000_3000, + axi_pkg::DEVICE_NONBUFFERABLE); + ace_rand_master[i].reset(); + @(posedge rst_n); + ace_rand_master[i].run(NoReads, NoWrites); + end_of_sim[i] <= 1'b1; + end + end + + snoop_rand_slave_t snoop_rand_slave [TbNumMst]; + for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_snoop + initial begin + snoop_rand_slave[i] = new( snoop_dv[i] ); + snoop_rand_slave[i].reset(); + @(posedge rst_n); + snoop_rand_slave[i].run(); + end + end + + + axi_rand_slave_t axi_rand_slave [1]; + for (genvar i = 0; i < TbNumSlv; i++) begin : gen_rand_slave + initial begin + axi_rand_slave[i] = new( slave_dv[i] ); + axi_rand_slave[i].reset(); + @(posedge rst_n); + axi_rand_slave[i].run(); + end + end + + + + + initial begin : proc_monitor + static tb_ace_ccu_pkg::ace_ccu_monitor #( + .AxiAddrWidth ( AxiAddrWidth ), + .AxiDataWidth ( AxiDataWidth ), + .AxiIdWidthMasters ( AxiIdWidthMasters ), + .AxiIdWidthSlaves ( AxiIdWidthSlaves ), + .AxiUserWidth ( AxiUserWidth ), + .NoMasters ( TbNumMst ), + .NoSlaves ( TbNumSlv ), + .TimeTest ( TestTime ) + ) monitor = new( master_monitor_dv, slave_monitor_dv, snoop_monitor_dv ); + fork + monitor.run(); + do begin + #TestTime; + if(end_of_sim == '1) begin + monitor.print_result(); + $stop(); + end + @(posedge clk); + end while (1'b1); + join + end + + //----------------------------------- + // Clock generator + //----------------------------------- + clk_rst_gen #( + .ClkPeriod ( CyclTime ), + .RstClkCycles ( 5 ) + ) i_clk_gen ( + .clk_o (clk), + .rst_no(rst_n) + ); + + //----------------------------------- + // DUT + //----------------------------------- + ace_ccu_top_intf #( + .AXI_USER_WIDTH ( AxiUserWidth ), + .Cfg ( ccu_cfg ) + ) i_ccu_dut ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .test_i ( 1'b0 ), + .snoop_ports ( snoop ), + .slv_ports ( master ), + .mst_ports ( slave[0] ) + ); + + // logger for master modules + for (genvar i = 0; i < TbNumMst; i++) begin : gen_master_logger + ace_chan_logger #( + .TestTime ( TestTime ), // Time after clock, where sampling happens + .LoggerName( $sformatf("axi_logger_master_%0d", i)), + .aw_chan_t ( aw_chan_mst_t ), // axi AW type + .w_chan_t ( w_chan_t ), // axi W type + .b_chan_t ( b_chan_mst_t ), // axi B type + .ar_chan_t ( ar_chan_mst_t ), // axi AR type + .r_chan_t ( r_chan_mst_t ) // axi R type + ) i_mst_channel_logger ( + .clk_i ( clk ), // Clock + .rst_ni ( rst_n ), // Asynchronous reset active low, when `1'b0` no sampling + .end_sim_i ( &end_of_sim ), + // AW channel + .aw_chan_i ( masters_req[i].aw ), + .aw_valid_i ( masters_req[i].aw_valid ), + .aw_ready_i ( masters_resp[i].aw_ready ), + // W channel + .w_chan_i ( masters_req[i].w ), + .w_valid_i ( masters_req[i].w_valid ), + .w_ready_i ( masters_resp[i].w_ready ), + // B channel + .b_chan_i ( masters_resp[i].b ), + .b_valid_i ( masters_resp[i].b_valid ), + .b_ready_i ( masters_req[i].b_ready ), + // AR channel + .ar_chan_i ( masters_req[i].ar ), + .ar_valid_i ( masters_req[i].ar_valid ), + .ar_ready_i ( masters_resp[i].ar_ready ), + // R channel + .r_chan_i ( masters_resp[i].r ), + .r_valid_i ( masters_resp[i].r_valid ), + .r_ready_i ( masters_req[i].r_ready ) + ); + end + // logger for slave modules + for (genvar i = 0; i < 1; i++) begin : gen_slave_logger + axi_chan_logger #( + .TestTime ( TestTime ), // Time after clock, where sampling happens + .LoggerName( $sformatf("axi_logger_slave_%0d",i)), + .aw_chan_t ( aw_chan_slv_t ), // axi AW type + .w_chan_t ( w_chan_t ), // axi W type + .b_chan_t ( b_chan_slv_t ), // axi B type + .ar_chan_t ( ar_chan_slv_t ), // axi AR type + .r_chan_t ( r_chan_slv_t ) // axi R type + ) i_slv_channel_logger ( + .clk_i ( clk ), // Clock + .rst_ni ( rst_n ), // Asynchronous reset active low, when `1'b0` no sampling + .end_sim_i ( &end_of_sim ), + // AW channel + .aw_chan_i ( slaves_req[i].aw ), + .aw_valid_i ( slaves_req[i].aw_valid ), + .aw_ready_i ( slaves_resp[i].aw_ready ), + // W channel + .w_chan_i ( slaves_req[i].w ), + .w_valid_i ( slaves_req[i].w_valid ), + .w_ready_i ( slaves_resp[i].w_ready ), + // B channel + .b_chan_i ( slaves_resp[i].b ), + .b_valid_i ( slaves_resp[i].b_valid ), + .b_ready_i ( slaves_req[i].b_ready ), + // AR channel + .ar_chan_i ( slaves_req[i].ar ), + .ar_valid_i ( slaves_req[i].ar_valid ), + .ar_ready_i ( slaves_resp[i].ar_ready ), + // R channel + .r_chan_i ( slaves_resp[i].r ), + .r_valid_i ( slaves_resp[i].r_valid ), + .r_ready_i ( slaves_req[i].r_ready ) + ); + end + +// logger for snoop modules + for (genvar i = 0; i < TbNumMst; i++) begin : gen_snoop_logger + snoop_chan_logger #( + .TestTime ( TestTime ), // Time after clock, where sampling happens + .LoggerName( $sformatf("axi_logger_snoop_%0d",i)), + .ac_chan_t ( snoop_ac_t ), // AW type + .cr_chan_t ( snoop_cr_t ), // CR type + .cd_chan_t ( snoop_cd_t ) // CD type + ) i_snoop_channel_logger ( + .clk_i ( clk ), // Clock + .rst_ni ( rst_n ), // Asynchronous reset active low, when `1'b0` no sampling + .end_sim_i ( &end_of_sim ), + // AC channel + .ac_chan_i ( snoop_req[i].ac ), + .ac_valid_i ( snoop_req[i].ac_valid ), + .ac_ready_i ( snoop_resp[i].ac_ready ), + // CR channel + .cr_chan_i ( snoop_resp[i].cr_resp ), + .cr_valid_i ( snoop_resp[i].cr_valid), + .cr_ready_i ( snoop_req[i].cr_ready ), + // CR channel + .cd_chan_i ( snoop_resp[i].cd ), + .cd_valid_i ( snoop_resp[i].cd_valid), + .cd_ready_i ( snoop_req[i].cd_ready ) + ); + end + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_connect_master_monitor + assign master_monitor_dv[i].aw_id = master[i].aw_id ; + assign master_monitor_dv[i].aw_addr = master[i].aw_addr ; + assign master_monitor_dv[i].aw_len = master[i].aw_len ; + assign master_monitor_dv[i].aw_size = master[i].aw_size ; + assign master_monitor_dv[i].aw_burst = master[i].aw_burst ; + assign master_monitor_dv[i].aw_lock = master[i].aw_lock ; + assign master_monitor_dv[i].aw_cache = master[i].aw_cache ; + assign master_monitor_dv[i].aw_prot = master[i].aw_prot ; + assign master_monitor_dv[i].aw_qos = master[i].aw_qos ; + assign master_monitor_dv[i].aw_region = master[i].aw_region; + assign master_monitor_dv[i].aw_atop = master[i].aw_atop ; + assign master_monitor_dv[i].aw_user = master[i].aw_user ; + assign master_monitor_dv[i].aw_valid = master[i].aw_valid ; + assign master_monitor_dv[i].aw_ready = master[i].aw_ready ; + assign master_monitor_dv[i].aw_snoop = master[i].aw_snoop; + assign master_monitor_dv[i].aw_bar = master[i].aw_bar ; + assign master_monitor_dv[i].aw_domain = master[i].aw_domain ; + assign master_monitor_dv[i].aw_awunique = master[i].aw_awunique ; + assign master_monitor_dv[i].w_data = master[i].w_data ; + assign master_monitor_dv[i].w_strb = master[i].w_strb ; + assign master_monitor_dv[i].w_last = master[i].w_last ; + assign master_monitor_dv[i].w_user = master[i].w_user ; + assign master_monitor_dv[i].w_valid = master[i].w_valid ; + assign master_monitor_dv[i].w_ready = master[i].w_ready ; + assign master_monitor_dv[i].b_id = master[i].b_id ; + assign master_monitor_dv[i].b_resp = master[i].b_resp ; + assign master_monitor_dv[i].b_user = master[i].b_user ; + assign master_monitor_dv[i].b_valid = master[i].b_valid ; + assign master_monitor_dv[i].b_ready = master[i].b_ready ; + assign master_monitor_dv[i].ar_id = master[i].ar_id ; + assign master_monitor_dv[i].ar_addr = master[i].ar_addr ; + assign master_monitor_dv[i].ar_len = master[i].ar_len ; + assign master_monitor_dv[i].ar_size = master[i].ar_size ; + assign master_monitor_dv[i].ar_burst = master[i].ar_burst ; + assign master_monitor_dv[i].ar_lock = master[i].ar_lock ; + assign master_monitor_dv[i].ar_cache = master[i].ar_cache ; + assign master_monitor_dv[i].ar_prot = master[i].ar_prot ; + assign master_monitor_dv[i].ar_qos = master[i].ar_qos ; + assign master_monitor_dv[i].ar_region = master[i].ar_region; + assign master_monitor_dv[i].ar_user = master[i].ar_user ; + assign master_monitor_dv[i].ar_valid = master[i].ar_valid ; + assign master_monitor_dv[i].ar_ready = master[i].ar_ready ; + assign master_monitor_dv[i].ar_snoop = master[i].ar_snoop ; + assign master_monitor_dv[i].ar_bar = master[i].ar_bar ; + assign master_monitor_dv[i].ar_domain = master[i].ar_domain ; + assign master_monitor_dv[i].r_id = master[i].r_id ; + assign master_monitor_dv[i].r_data = master[i].r_data ; + assign master_monitor_dv[i].r_resp = master[i].r_resp ; + assign master_monitor_dv[i].r_last = master[i].r_last ; + assign master_monitor_dv[i].r_user = master[i].r_user ; + assign master_monitor_dv[i].r_valid = master[i].r_valid ; + assign master_monitor_dv[i].r_ready = master[i].r_ready ; + end + for (genvar i = 0; i < TbNumSlv; i++) begin : gen_connect_slave_monitor + assign slave_monitor_dv[i].aw_id = slave[i].aw_id ; + assign slave_monitor_dv[i].aw_addr = slave[i].aw_addr ; + assign slave_monitor_dv[i].aw_len = slave[i].aw_len ; + assign slave_monitor_dv[i].aw_size = slave[i].aw_size ; + assign slave_monitor_dv[i].aw_burst = slave[i].aw_burst ; + assign slave_monitor_dv[i].aw_lock = slave[i].aw_lock ; + assign slave_monitor_dv[i].aw_cache = slave[i].aw_cache ; + assign slave_monitor_dv[i].aw_prot = slave[i].aw_prot ; + assign slave_monitor_dv[i].aw_qos = slave[i].aw_qos ; + assign slave_monitor_dv[i].aw_region = slave[i].aw_region; + assign slave_monitor_dv[i].aw_atop = slave[i].aw_atop ; + assign slave_monitor_dv[i].aw_user = slave[i].aw_user ; + assign slave_monitor_dv[i].aw_valid = slave[i].aw_valid ; + assign slave_monitor_dv[i].aw_ready = slave[i].aw_ready ; + assign slave_monitor_dv[i].w_data = slave[i].w_data ; + assign slave_monitor_dv[i].w_strb = slave[i].w_strb ; + assign slave_monitor_dv[i].w_last = slave[i].w_last ; + assign slave_monitor_dv[i].w_user = slave[i].w_user ; + assign slave_monitor_dv[i].w_valid = slave[i].w_valid ; + assign slave_monitor_dv[i].w_ready = slave[i].w_ready ; + assign slave_monitor_dv[i].b_id = slave[i].b_id ; + assign slave_monitor_dv[i].b_resp = slave[i].b_resp ; + assign slave_monitor_dv[i].b_user = slave[i].b_user ; + assign slave_monitor_dv[i].b_valid = slave[i].b_valid ; + assign slave_monitor_dv[i].b_ready = slave[i].b_ready ; + assign slave_monitor_dv[i].ar_id = slave[i].ar_id ; + assign slave_monitor_dv[i].ar_addr = slave[i].ar_addr ; + assign slave_monitor_dv[i].ar_len = slave[i].ar_len ; + assign slave_monitor_dv[i].ar_size = slave[i].ar_size ; + assign slave_monitor_dv[i].ar_burst = slave[i].ar_burst ; + assign slave_monitor_dv[i].ar_lock = slave[i].ar_lock ; + assign slave_monitor_dv[i].ar_cache = slave[i].ar_cache ; + assign slave_monitor_dv[i].ar_prot = slave[i].ar_prot ; + assign slave_monitor_dv[i].ar_qos = slave[i].ar_qos ; + assign slave_monitor_dv[i].ar_region = slave[i].ar_region; + assign slave_monitor_dv[i].ar_user = slave[i].ar_user ; + assign slave_monitor_dv[i].ar_valid = slave[i].ar_valid ; + assign slave_monitor_dv[i].ar_ready = slave[i].ar_ready ; + assign slave_monitor_dv[i].r_id = slave[i].r_id ; + assign slave_monitor_dv[i].r_data = slave[i].r_data ; + assign slave_monitor_dv[i].r_resp = slave[i].r_resp ; + assign slave_monitor_dv[i].r_last = slave[i].r_last ; + assign slave_monitor_dv[i].r_user = slave[i].r_user ; + assign slave_monitor_dv[i].r_valid = slave[i].r_valid ; + assign slave_monitor_dv[i].r_ready = slave[i].r_ready ; + end + for (genvar i = 0; i < TbNumMst; i++) begin : gen_connect_snoop_monitor + assign snoop_monitor_dv[i].ac_valid = snoop[i].ac_valid; + assign snoop_monitor_dv[i].ac_ready = snoop[i].ac_ready; + assign snoop_monitor_dv[i].ac_snoop = snoop[i].ac_snoop; + assign snoop_monitor_dv[i].ac_addr = snoop[i].ac_addr; + assign snoop_monitor_dv[i].ac_prot = snoop[i].ac_prot; + assign snoop_monitor_dv[i].cr_valid = snoop[i].cr_valid; + assign snoop_monitor_dv[i].cr_ready = snoop[i].cr_ready; + assign snoop_monitor_dv[i].cr_resp = snoop[i].cr_resp; + assign snoop_monitor_dv[i].cd_valid = snoop[i].cd_valid; + assign snoop_monitor_dv[i].cd_ready = snoop[i].cd_ready; + assign snoop_monitor_dv[i].cd_data = snoop[i].cd_data; + assign snoop_monitor_dv[i].cd_last = snoop[i].cd_last; + end +endmodule \ No newline at end of file diff --git a/test/tb_ccu_ctrl_r_snoop.sv b/test/tb_ccu_ctrl_r_snoop.sv new file mode 100644 index 0000000..e5481d4 --- /dev/null +++ b/test/tb_ccu_ctrl_r_snoop.sv @@ -0,0 +1,268 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "ace/typedef.svh" +`include "ace/assign.svh" + +module tb_ccu_ctrl_r_snoop #( + parameter int unsigned AddrWidth = 0, + parameter int unsigned DataWidth = 0, + parameter int unsigned WordWidth = 0, + parameter int unsigned CachelineWords = 0, + parameter int unsigned Ways = 0, + parameter int unsigned Sets = 0, + parameter int unsigned TbNumMst = 0, + parameter string MemDir = "" +); + // Random ace_intf no Transactions + localparam int unsigned NoWrites = 80; // How many writes per ace_intf + localparam int unsigned NoReads = 0; // How many reads per ace_intf + // timing parameters + localparam time CyclTime = 10ns; + localparam time ApplTime = 2ns; + localparam time TestTime = 8ns; + + // axi configuration + localparam int unsigned AxiIdWidthMasters = 4; + localparam int unsigned AxiIdUsed = 3; + localparam int unsigned AxiIdWidthSlaves = AxiIdWidthMasters + $clog2(TbNumMst)+$clog2(TbNumMst+1); + localparam int unsigned AxiAddrWidth = AddrWidth; + localparam int unsigned AxiDataWidth = DataWidth; + localparam int unsigned AxiStrbWidth = AxiDataWidth / 8; + localparam int unsigned AxiUserWidth = 5; + localparam int unsigned WriteBackLen = CachelineWords - 1; + localparam int unsigned WriteBackSize = $clog2(DataWidth / 8); + + typedef logic [AxiIdWidthMasters-1:0] id_t; + typedef logic [AxiIdWidthSlaves-1:0] id_slv_t; + typedef logic [AxiAddrWidth-1:0] addr_t; + typedef logic [AxiDataWidth-1:0] data_t; + typedef logic [AxiStrbWidth-1:0] strb_t; + typedef logic [AxiUserWidth-1:0] user_t; + + `ACE_TYPEDEF_AW_CHAN_T(slave_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_AW_CHAN_T(master_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(slave_w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(slave_b_chan_t, id_t, user_t) + `ACE_TYPEDEF_AR_CHAN_T(slave_ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(master_ar_chan_t, addr_t, id_t, user_t) + `ACE_TYPEDEF_R_CHAN_T(slave_r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(master_r_chan_t, data_t, id_t, user_t) + `ACE_TYPEDEF_REQ_T(slv_req_t, slave_aw_chan_t, slave_w_chan_t, slave_ar_chan_t) + `AXI_TYPEDEF_REQ_T(mst_req_t, master_aw_chan_t, slave_w_chan_t, master_ar_chan_t) + `ACE_TYPEDEF_RESP_T(slv_resp_t, slave_b_chan_t, slave_r_chan_t) + `AXI_TYPEDEF_RESP_T(mst_resp_t, slave_b_chan_t, master_r_chan_t) + `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) + `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) + `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) + `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) + `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) + + logic clk, rst_n; + + string data_mem_file_template = {MemDir, "/data_mem_%0d.mem"}; + string tag_mem_file_template = {MemDir, "/tag_mem_%0d.mem"}; + string status_file_template = {MemDir, "/state_%0d.mem"}; + string txn_file_template = {MemDir, "/txns_%0d.txt"}; + + ACE_BUS_DV #( + .AXI_ADDR_WIDTH (AxiAddrWidth), + .AXI_DATA_WIDTH (AxiDataWidth), + .AXI_ID_WIDTH (AxiIdWidthMasters), + .AXI_USER_WIDTH (AxiIdWidthMasters) + ) ace_intf [TbNumMst] (clk); + + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH (AxiAddrWidth), + .SNOOP_DATA_WIDTH (AxiDataWidth) + ) snoop_intf [TbNumMst](clk); + + CLK_IF clk_if (clk); + + typedef virtual ACE_BUS_DV #( + .AXI_ADDR_WIDTH (AxiAddrWidth), + .AXI_DATA_WIDTH (AxiDataWidth), + .AXI_ID_WIDTH (AxiIdWidthMasters), + .AXI_USER_WIDTH (AxiIdWidthMasters) + ) ace_bus_v_t; + + typedef virtual SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH (AxiAddrWidth), + .SNOOP_DATA_WIDTH (AxiDataWidth) + ) snoop_bus_v_t; + + typedef virtual CLK_IF clk_if_v_t; + + // Connections: + // cache_top_agent -> ACE -> DUT -> ACE -> AXI -> axi_sim_mem + // DUT outputs ACE, but it connects to an AXI interface + // This is fine because each subfield is connected separately + // ace.aw = axi.aw would not work because the structs have different widths + + //----------------------------------- + // Clock generator + //----------------------------------- + clk_rst_gen #( + .ClkPeriod ( CyclTime ), + .RstClkCycles ( 5 ) + ) i_clk_gen ( + .clk_o (clk), + .rst_no (rst_n) + ); + + + cache_test_pkg::cache_top_agent #( + .AW(AxiAddrWidth), + .DW(AxiDataWidth), + .AC_AW(AxiAddrWidth), + .CD_DW(AxiDataWidth), + .IW(AxiIdWidthMasters), + .UW(AxiUserWidth), + .TA(ApplTime), + .TT(TestTime), + .CACHELINE_WORDS(CachelineWords), + .WORD_WIDTH(WordWidth), + .WAYS(Ways), + .SETS(Sets), + .ace_bus_t(ace_bus_v_t), + .snoop_bus_t(snoop_bus_v_t), + .clk_if_t(clk_if_v_t) + ) ace_master [TbNumMst]; + + slv_req_t [TbNumMst] masters_req; + slv_resp_t [TbNumMst] masters_resp; + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_masters + `ACE_ASSIGN_TO_REQ(masters_req[i], ace_intf[i]) + `ACE_ASSIGN_FROM_RESP(ace_intf[i], masters_resp[i]) + end + + AXI_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) axi_intf (clk); + + slv_req_t slaves_req; + slv_resp_t slaves_resp; + + mst_req_t main_mem_req; + mst_resp_t main_mem_resp; + + `AXI_ASSIGN_FROM_REQ(axi_intf, slaves_req) + `AXI_ASSIGN_TO_RESP(slaves_resp, axi_intf) + + `AXI_ASSIGN_TO_REQ(main_mem_req, axi_intf) + `AXI_ASSIGN_FROM_RESP(axi_intf, main_mem_resp) + + snoop_req_t [TbNumMst] snoop_req; + snoop_resp_t [TbNumMst] snoop_resp; + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_snoop + `SNOOP_ASSIGN_FROM_REQ(snoop_intf[i], snoop_req[i]) + `SNOOP_ASSIGN_TO_RESP(snoop_resp[i], snoop_intf[i]) + end + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_master + initial begin + string data_mem_file, tag_mem_file, status_file, txn_file; + $sformat(data_mem_file, data_mem_file_template, i); + $sformat(tag_mem_file, tag_mem_file_template, i); + $sformat(status_file, status_file_template, i); + $sformat(txn_file, txn_file_template, i); + ace_master[i] = new( + ace_intf[i], + snoop_intf[i], + clk_if, + data_mem_file, + tag_mem_file, + status_file, + txn_file + ); + ace_master[i].reset(); + @(posedge rst_n); + ace_master[i].run(); + end + end + + axi_sim_mem #( + // AXI interface parameters + .AddrWidth ( AxiAddrWidth ), + .DataWidth ( AxiDataWidth ), + .IdWidth ( AxiIdWidthSlaves ), + .UserWidth ( AxiUserWidth ), + .NumPorts (1), + .axi_req_t(mst_req_t), + .axi_rsp_t(mst_resp_t), + .ApplDelay ( ApplTime ), + .AcqDelay (TestTime ) + ) axi_mem ( + .clk_i(clk), + .rst_ni(rst_n), + .axi_req_i(main_mem_req), + .axi_rsp_o(main_mem_resp), + .mon_w_valid_o(), + .mon_w_addr_o(), + .mon_w_data_o(), + .mon_w_id_o(), + .mon_w_user_o(), + .mon_w_beat_count_o(), + .mon_w_last_o(), + .mon_r_valid_o(), + .mon_r_addr_o(), + .mon_r_data_o(), + .mon_r_id_o(), + .mon_r_user_o(), + .mon_r_beat_count_o(), + .mon_r_last_o() + ); + + initial begin + $readmemh({MemDir, "/main_mem.mem"}, axi_mem.mem); + end + + ace_pkg::snoop_info_t snoopy_trs; + + // DUT + + ace_ar_transaction_decoder #( + .ar_chan_t(slave_ar_chan_t) + ) aw_trs_decoder ( + .ar_i(slaves_req.ar), + .snoop_info_o(snoopy_trs), + .illegal_trs_o(illegal) + ); + + ccu_ctrl_r_snoop #( + .slv_req_t(slv_req_t), + .slv_resp_t(slv_resp_t), + .mst_req_t(slv_req_t), + .mst_resp_t(slv_resp_t), + .slv_ar_chan_t(slave_ar_chan_t), + .mst_snoop_req_t(snoop_req_t), + .mst_snoop_resp_t(snoop_resp_t), + .AXLEN(WriteBackLen), + .AXSIZE(WriteBackSize) + ) DUT ( + .clk_i(clk), + .rst_ni(rst_n), + .snoop_info_i(snoopy_trs), + .slv_req_i(masters_req[0]), + .slv_resp_o(masters_resp[0]), + .mst_req_o(slaves_req), + .mst_resp_i(slaves_resp), + .snoop_resp_i(snoop_resp), + .snoop_req_o(snoop_req), + .ardomain_o() + ); + +endmodule diff --git a/test/tb_ccu_ctrl_wr_snoop.sv b/test/tb_ccu_ctrl_wr_snoop.sv new file mode 100644 index 0000000..bcfc833 --- /dev/null +++ b/test/tb_ccu_ctrl_wr_snoop.sv @@ -0,0 +1,236 @@ +// Copyright (c) 2025 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "ace/typedef.svh" +`include "ace/assign.svh" + +module tb_ccu_ctrl_wr_snoop #( + parameter int unsigned TbNumMst = 32'd1 // how many AXI masters there are +); + // Random ace_intf no Transactions + localparam int unsigned NoWrites = 80; // How many writes per ace_intf + localparam int unsigned NoReads = 0; // How many reads per ace_intf + // timing parameters + localparam time CyclTime = 10ns; + localparam time ApplTime = 2ns; + localparam time TestTime = 8ns; + + // axi configuration + localparam int unsigned AxiIdWidthMasters = 4; + localparam int unsigned AxiIdUsed = 3; + localparam int unsigned AxiIdWidthSlaves = AxiIdWidthMasters + $clog2(TbNumMst)+$clog2(TbNumMst+1); + localparam int unsigned AxiAddrWidth = 32; + localparam int unsigned AxiDataWidth = 64; + localparam int unsigned AxiStrbWidth = AxiDataWidth / 8; + localparam int unsigned AxiUserWidth = 5; + + typedef logic [AxiIdWidthMasters-1:0] id_t; + typedef logic [AxiIdWidthSlaves-1:0] id_slv_t; + typedef logic [AxiAddrWidth-1:0] addr_t; + typedef logic [AxiDataWidth-1:0] data_t; + typedef logic [AxiStrbWidth-1:0] strb_t; + typedef logic [AxiUserWidth-1:0] user_t; + + `ACE_TYPEDEF_AW_CHAN_T(slave_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_AW_CHAN_T(master_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(slave_w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(slave_b_chan_t, id_t, user_t) + `ACE_TYPEDEF_AR_CHAN_T(slave_ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(master_ar_chan_t, addr_t, id_t, user_t) + `ACE_TYPEDEF_R_CHAN_T(slave_r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(master_r_chan_t, data_t, id_t, user_t) + `ACE_TYPEDEF_REQ_T(slv_req_t, slave_aw_chan_t, slave_w_chan_t, slave_ar_chan_t) + `AXI_TYPEDEF_REQ_T(mst_req_t, master_aw_chan_t, slave_w_chan_t, master_ar_chan_t) + `ACE_TYPEDEF_RESP_T(slv_resp_t, slave_b_chan_t, slave_r_chan_t) + `AXI_TYPEDEF_RESP_T(mst_resp_t, slave_b_chan_t, master_r_chan_t) + `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) + `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) + `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) + `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) + `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) + + logic clk, rst_n; + + ACE_BUS_DV #( + .AXI_ADDR_WIDTH (AxiAddrWidth), + .AXI_DATA_WIDTH (AxiDataWidth), + .AXI_ID_WIDTH (AxiIdWidthMasters), + .AXI_USER_WIDTH (AxiIdWidthMasters) + ) ace_intf [TbNumMst] (clk); + + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH (AxiAddrWidth), + .SNOOP_DATA_WIDTH (AxiDataWidth) + ) snoop_intf [TbNumMst](clk); + + CLK_IF clk_if (clk); + + typedef virtual ACE_BUS_DV #( + .AXI_ADDR_WIDTH (AxiAddrWidth), + .AXI_DATA_WIDTH (AxiDataWidth), + .AXI_ID_WIDTH (AxiIdWidthMasters), + .AXI_USER_WIDTH (AxiIdWidthMasters) + ) ace_bus_v_t; + + typedef virtual SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH (AxiAddrWidth), + .SNOOP_DATA_WIDTH (AxiDataWidth) + ) snoop_bus_v_t; + + typedef virtual CLK_IF clk_if_v_t; + + // Connections: + // cache_top_agent -> ACE -> DUT -> ACE -> AXI -> axi_sim_mem + // DUT outputs ACE, but it connects to an AXI interface + // This is fine because each subfield is connected separately + // ace.aw = axi.aw would not work because the structs have different widths + + //----------------------------------- + // Clock generator + //----------------------------------- + clk_rst_gen #( + .ClkPeriod ( CyclTime ), + .RstClkCycles ( 5 ) + ) i_clk_gen ( + .clk_o (clk), + .rst_no (rst_n) + ); + + cache_test_pkg::cache_top_agent #( + .AW(AxiAddrWidth), + .DW(AxiDataWidth), + .AC_AW(AxiAddrWidth), + .CD_DW(AxiDataWidth), + .IW(AxiIdWidthMasters), + .UW(AxiUserWidth), + .TA(ApplTime), + .TT(TestTime), + .ace_bus_t(ace_bus_v_t), + .snoop_bus_t(snoop_bus_v_t), + .clk_if_t(clk_if_v_t), + .mem_file("/scratch2/akorsman/ace/src/test/cache/memory_m0.csv") + ) ace_master [TbNumMst]; + + slv_req_t [TbNumMst] masters_req; + slv_resp_t [TbNumMst] masters_resp; + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_masters + `ACE_ASSIGN_TO_REQ(masters_req[i], ace_intf[i]) + `ACE_ASSIGN_FROM_RESP(ace_intf[i], masters_resp[i]) + end + + AXI_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) axi_intf (clk); + + slv_req_t slaves_req; + slv_resp_t slaves_resp; + + mst_req_t main_mem_req; + mst_resp_t main_mem_resp; + + `AXI_ASSIGN_FROM_REQ(axi_intf, slaves_req) + `AXI_ASSIGN_TO_RESP(slaves_resp, axi_intf) + + `AXI_ASSIGN_TO_REQ(main_mem_req, axi_intf) + `AXI_ASSIGN_FROM_RESP(axi_intf, main_mem_resp) + + snoop_req_t [TbNumMst] snoop_req; + snoop_resp_t [TbNumMst] snoop_resp; + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_conn_dv_snoop + `SNOOP_ASSIGN_FROM_REQ(snoop_intf[i], snoop_req[i]) + `SNOOP_ASSIGN_TO_RESP(snoop_resp[i], snoop_intf[i]) + end + + for (genvar i = 0; i < TbNumMst; i++) begin : gen_rand_master + initial begin + ace_master[i] = new(ace_intf[i], snoop_intf[i], clk_if); + ace_master[i].reset(); + @(posedge rst_n); + ace_master[i].run(); + end + end + + axi_sim_mem #( + // AXI interface parameters + .AddrWidth ( AxiAddrWidth ), + .DataWidth ( AxiDataWidth ), + .IdWidth ( AxiIdWidthSlaves ), + .UserWidth ( AxiUserWidth ), + .NumPorts (1), + .axi_req_t(mst_req_t), + .axi_rsp_t(mst_resp_t), + .ApplDelay ( ApplTime ), + .AcqDelay (TestTime ) + ) axi_mem ( + .clk_i(clk), + .rst_ni(rst_n), + .axi_req_i(main_mem_req), + .axi_rsp_o(main_mem_resp), + .mon_w_valid_o(), + .mon_w_addr_o(), + .mon_w_data_o(), + .mon_w_id_o(), + .mon_w_user_o(), + .mon_w_beat_count_o(), + .mon_w_last_o(), + .mon_r_valid_o(), + .mon_r_addr_o(), + .mon_r_data_o(), + .mon_r_id_o(), + .mon_r_user_o(), + .mon_r_beat_count_o(), + .mon_r_last_o() + ); + + initial begin + $readmemb("/scratch2/akorsman/ace/scripts/python/main_mem.mem", axi_mem.mem); + end + + ace_pkg::acsnoop_t snoopy_trs; + + // DUT + + ace_aw_transaction_decoder #( + .aw_chan_t(slave_aw_chan_t) + ) aw_trs_decoder ( + .aw_i(masters_req[0].aw), + .acsnoop_o(snoopy_trs), + .snoop_trs_o(snoop_trs), + .illegal_trs_o(illegal) + ); + + ccu_ctrl_wr_snoop #( + .slv_req_t(slv_req_t), + .slv_resp_t(slv_resp_t), + .mst_req_t(slv_req_t), + .mst_resp_t(slv_resp_t), + .slv_aw_chan_t(slave_aw_chan_t), + .mst_snoop_req_t(snoop_req_t), + .mst_snoop_resp_t(snoop_resp_t) + ) DUT ( + .clk_i(clk), + .rst_ni(rst_n), + .snoop_trs_i(snoopy_trs), + .slv_req_i(masters_req[0]), + .slv_resp_o(masters_resp[0]), + .mst_req_o(slaves_req), + .mst_resp_i(slaves_resp), + .snoop_resp_i(snoop_resp), + .snoop_req_o(snoop_req), + .awdomain_o() + ); + +endmodule diff --git a/test/tb_ccu_ctrl_wr_snoop_old.sv b/test/tb_ccu_ctrl_wr_snoop_old.sv new file mode 100644 index 0000000..5b79aea --- /dev/null +++ b/test/tb_ccu_ctrl_wr_snoop_old.sv @@ -0,0 +1,231 @@ +`include "ace/typedef.svh" +`include "ace/assign.svh" + +module tb_ccu_ctrl_wr_snoop #( +); + + + localparam int unsigned NoWrites = 8000; // How many writes per master + localparam int unsigned NoReads = 0; // How many reads per master + + // axi configuration + localparam int unsigned AxiIdWidthMasters = 1; + localparam int unsigned AxiIdUsed = 1; // Has to be <= AxiIdWidthMasters + localparam int unsigned AxiIdWidthSlaves = 1; + localparam int unsigned AxiAddrWidth = 32; // Axi Address Width + localparam int unsigned AxiDataWidth = 64; // Axi Data Width + localparam int unsigned AxiStrbWidth = AxiDataWidth / 8; + localparam int unsigned AxiUserWidth = 5; + + // Address space for memory which is initialized + localparam int mem_addr_space = 8; + + localparam time CyclTime = 10ns; + localparam time ApplTime = 2ns; + localparam time TestTime = 8ns; + + // in the bench can change this variables which are set here freely + localparam ccu_pkg::ccu_cfg_t ccu_cfg = '{ + NoSlvPorts: 1, + MaxMstTrans: 10, + MaxSlvTrans: 6, + FallThrough: 1'b1, + LatencyMode: ccu_pkg::NO_LATENCY, + AxiIdWidthSlvPorts: AxiIdWidthMasters, + AxiIdUsedSlvPorts: AxiIdUsed, + UniqueIds: 1, + AxiAddrWidth: AxiAddrWidth, + AxiDataWidth: AxiDataWidth + }; + + logic clk, rst_n; + logic end_of_sim; + + typedef logic [AxiAddrWidth-1:0] addr_t; + typedef logic [AxiIdWidthMasters-1:0] id_t; + typedef logic [AxiUserWidth-1:0] user_t; + typedef logic [AxiDataWidth-1:0] data_t; + typedef logic [AxiDataWidth/8 -1:0] strb_t; + + `ACE_TYPEDEF_AW_CHAN_T(slave_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(slave_w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(slave_b_chan_t, id_t, user_t) + `ACE_TYPEDEF_AR_CHAN_T(slave_ar_chan_t, addr_t, id_t, user_t) + `ACE_TYPEDEF_R_CHAN_T(slave_r_chan_t, data_t, id_t, user_t) + `ACE_TYPEDEF_REQ_T(mst_req_t, slave_aw_chan_t, slave_w_chan_t, slave_ar_chan_t) + `ACE_TYPEDEF_REQ_T(slv_req_t, slave_aw_chan_t, slave_w_chan_t, slave_ar_chan_t) + `ACE_TYPEDEF_RESP_T(mst_resp_t, slave_b_chan_t, slave_r_chan_t) + `ACE_TYPEDEF_RESP_T(slv_resp_t, slave_b_chan_t, slave_r_chan_t) + `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_t, addr_t) + `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_t, data_t) + `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_t) + `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_t) + `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_t, snoop_cr_t) + + //----------------------------------- + // Clock generator + //----------------------------------- + clk_rst_gen #( + .ClkPeriod ( CyclTime ), + .RstClkCycles ( 5 ) + ) i_clk_gen ( + .clk_o (clk), + .rst_no (rst_n) + ); + + ACE_BUS #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMasters ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) master (); + ACE_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMasters ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) master_dv (clk); + + mst_req_t masters_req; + mst_resp_t masters_resp; + + `ACE_ASSIGN (master, master_dv) + `ACE_ASSIGN_TO_REQ(masters_req, master) + `ACE_ASSIGN_FROM_RESP(master, masters_resp) + + AXI_BUS #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) slave (); + AXI_BUS_DV #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_USER_WIDTH ( AxiUserWidth ) + ) slave_dv(clk); + + slv_req_t slaves_req; + slv_resp_t slaves_resp; + + `AXI_ASSIGN(slave_dv, slave) + `AXI_ASSIGN_FROM_REQ(slave, slaves_req) + `AXI_ASSIGN_TO_RESP(slaves_resp, slave) + + SNOOP_BUS #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) snoop (); + SNOOP_BUS_DV #( + .SNOOP_ADDR_WIDTH ( AxiAddrWidth ), + .SNOOP_DATA_WIDTH ( AxiDataWidth ) + ) snoop_dv (clk); + + snoop_req_t snoop_req; + snoop_resp_t snoop_resp; + + `SNOOP_ASSIGN(snoop_dv, snoop) + `SNOOP_ASSIGN_FROM_REQ(snoop, snoop_req) + `SNOOP_ASSIGN_TO_RESP(snoop_resp, snoop) + + + ace_sim_master::ace_rand_master #( + .AW (AxiAddrWidth), + .DW (AxiDataWidth), + .IW (AxiIdWidthMasters), + .UW (AxiUserWidth), + .MAX_READ_TXNS (20), + .MAX_WRITE_TXNS (20), + .UNIQUE_IDS (1), + .TA ( ApplTime ), + .TT (TestTime ), + .CACHELINE_WIDTH (32), + .MEM_ADDR_SPACE (mem_addr_space) + ) ace_master; + + axi_test::axi_rand_slave #( + // AXI interface parameters + .AW ( AxiAddrWidth ), + .DW ( AxiDataWidth ), + .IW ( AxiIdWidthSlaves ), + .UW ( AxiUserWidth ), + .TA ( ApplTime ), + .TT (TestTime ) + ) axi_rand_slave; + + snoop_chan_logger #( + .TestTime (TestTime), + .LoggerName ( "snoop_logger" ), + .ac_chan_t (snoop_ac_t), + .cr_chan_t (snoop_cr_t), + .cd_chan_t (snoop_cd_t) + ) snoop_chan_logger ( + .clk_i (clk), + .rst_ni (rst_n), + .end_sim_i (end_of_sim), + .ac_chan_i (snoop_req.ac), + .ac_valid_i (snoop_req.ac_valid), + .ac_ready_i (snoop_resp.ac_ready), + .cr_chan_i (snoop_resp.cr_resp), + .cr_valid_i (snoop_resp.cr_valid), + .cr_ready_i (snoop_req.cr_ready), + .cd_chan_i (snoop_resp.cd), + .cd_valid_i (snoop_resp.cd_valid), + .cd_ready_i ( snoop_req.cd_ready) + ); + + initial begin + ace_master = new(master_dv, snoop_dv); + end_of_sim <= 1'b0; + ace_master.add_memory_region( + 32'h0000_0000, 32'h0000_3000, + axi_pkg::DEVICE_NONBUFFERABLE); + ace_master.init_cache_memory(); + ace_master.reset(); + @(posedge rst_n); + ace_master.run(NoReads, NoWrites); + end_of_sim <= 1'b1; + $finish; + end + + initial begin + axi_rand_slave = new(slave_dv); + axi_rand_slave.reset(); + @(posedge rst_n); + axi_rand_slave.run(); + end + + ace_pkg::acsnoop_t snoopy_trs; + logic snoop_trs, illegal; + + ace_aw_transaction_decoder #( + .aw_chan_t(slave_aw_chan_t) + ) aw_trs_decoder ( + .aw_i(slaves_req.aw), + .acsnoop_o(snoopy_trs), + .snoop_trs_o(snoop_trs), + .illegal_trs_o(illegal) + ); + + ccu_ctrl_wr_snoop #( + .slv_req_t(slv_req_t), + .slv_resp_t(slv_resp_t), + .mst_req_t(mst_req_t), + .mst_resp_t(mst_resp_t), + .slv_aw_chan_t(slave_aw_chan_t), + .mst_snoop_req_t(snoop_req_t), + .mst_snoop_resp_t(snoop_resp_t) + ) DUT ( + .clk_i(clk), + .rst_ni(rst_n), + .snoop_trs_i(snoopy_trs), + .slv_req_i(masters_req), + .slv_resp_o(masters_resp), + .mst_req_o(slaves_req), + .mst_resp_i(slaves_resp), + .snoop_resp_i(snoop_resp), + .snoop_req_o(snoop_req) + ); + +endmodule \ No newline at end of file diff --git a/test/vip/Python_README.md b/test/vip/Python_README.md new file mode 100644 index 0000000..ff0ab52 --- /dev/null +++ b/test/vip/Python_README.md @@ -0,0 +1,42 @@ +# Coherency framework + +The purpose of the Python framework is the following: +- Generate a coherent, randomized initial state for caches and main memory +- Generate randomized cache transaction +- Reconstruct the state of caches and main memory after simulation +- Run a coherency check + +The file `cache_coherency_test.py` contains a command line interface for generating the initial states and running the coherency check. The CLI is replicated in the Makefile of this repository. See the README of this repository for instructions how to run it. + +## Components + +The main class is `CacheCoherencyTest`. The example tests included, `RandomTest` and `ConflictTest`, extend this class. It provides functions to either generate a randomized initial state and transactions, or define them manually. It also contains methods for running coherency check. + +`CacheCoherencyTest` includes the following key components: +- `caches` - a list of `CacheState` elements +- `mem_state` - a `MemoryState` element +- `transactions` - a list of `CacheTransactionSequence` elements +- `mem_ranges` - a list of `MemoryRange` elements + +## Principle + +The operating principle of the Python flow is as follows: +1. Generate the initial state +1. Run a coherency check on the initial state +1. Generate the transactions +1. (run the RTL simulation) +1. Start to reconstruct state from the logfiles generated by SystemVerilog +1. After each change, run the coherency check +1. Check from logfile whether the check was succesful + +## Coherency check +By default, the coherency check is run at two occasions: +- After generating the initial state +- During state reconstruction - here the check is run after each change in cache to detect the exact timestamp when coherency was lost. + +The check checks for the following conditions: +- A modified (i.e. different from main memory) cache line must not be in Exclusive state +- A modified cache line must be in either Owned or Modified state in one of the caches +- Cache line states must be compatible (e.g. one cache line in both Modified and Shared states is not allowed) + +The checking is implementing in a very robust way (all cache entries are checked each timestamp), so, unless someone implements a more optimized algorithm, for larger cache sizes the check can take an unbearably long time. Thus, it is recommended to keep the memory and cache sizes around the same size as what is provided by default. It also makes sense because smaller cache and memory sizes generate more snoop traffic. diff --git a/test/vip/SV_README.md b/test/vip/SV_README.md new file mode 100644 index 0000000..40c2486 --- /dev/null +++ b/test/vip/SV_README.md @@ -0,0 +1,84 @@ +# Open-source ACE verification IP + +These IPs target the AXI4 ACE specification, specifically the issue E. + +## Limitations +- Does not test exclusive access +- Does not test cache maintenance operations +- Does not test barriers +- Does not test DVMs +- Does not support snoop filtering +- Does not support ACE-Lite + +## Transaction converage +The cache model currently generates the following ACE transactions: + - ReadUnique + - ReadShared + - CleanUnique + - WriteUnique + - WriteLineUnique + - WriteBack + +## ACE + +### ace_agent +Collects all ace related components. + +### ace_beat_types +Collects the datatypes for ACE transactions. + +### ace_driver +Component which receives ACE transactions in a mailbox (AW, W and AR channels) and drives the ACE interface signals accordingly. It also drives the signals to receive responses (R and B), but does not collect them. + +### ace_monitor +Component which monitors the ACE interface signals and detects received B or R responses. Puts the responses in a mailbox. + +### ace_sequencer +Contains the abstract class `ace_sequencer` and two classes `ace_rand_sequencer` and `ace_mbox_sequencer` which extend the abstract class. In this project, `ace_mbox_sequencer` is used, but the other one can also be used for a standalone testbench without coherency-checking. + +`ace_mbox_sequencer` receives ACE transactions in a mailbox and sends them to `ace_driver`. + +## Snoop + +### snoop_agent +Collects all snoop-channel related components. + +### snoop_beat_types +Collects the datatypes for snoop transactions, i.e. AC/CD/CR + +### snoop_driver +Component which receives snoop responses in a mailbox and drives the snoop interface signals (CR and CD). It also drives the ready signal of the AC channel, toggling it randomly. + +### snoop_monitor +Component which monitors the snoop interface's AC channel and puts the received transaction into a mailbox. + +### snoop_sequencer +Receives AC transactions in a mailbox and converts them into cache requests. Requests the cache for data. Receives the response, and puts the CR and possible CD responses into a mailbox. + +This class also contains functions to generate a random response instead of requesting the cache. + +## Cache + +### cache_beat_types +Collects the datatypes for cache and memory transactions. These are in custom format. + +### cache_scoreboard +Contains the behavioral model for an n-way set-associative cache. It has two sources of requests - cache requests from `cache_sequencer` and snoop requests from `snoop_sequencer`. These are not arbitrated in any way, as it is an interconnect requirement that a cache line must not see a snoop to the same cache line during an outstanding transaction. + +Large part of the cache model is to generate correct ACE transactions for a specific situation. For example, a non-cached write generates a WriteUnique transaction, while a cached write would generate a ReadUnique transaction for the allocation of the cache line. + +The cache can be initialized from a file. The Python part of this framework handles the generation of a randomized, cache-coherent initial state. + +All cache modifications are logged in a file for both debugging purposes and to reconstruct it later in Python domain for coherency check. + +### cache_sequencer +Generates cache requests from a transaction file which is generated by the Python scripts. + +### cache_top_agent +Collects all verification components. This is the module to instantiate in the testbench. + +### mem_logger +Connects to `axi_sim_mem`. Logs changes in memory to a file for both debugging purposes and to reconstruct the state in Python. + +### mem_sequencer +Receives memory requests in a mailbox and converts them into ACE requests. Puts them into a mailbox. diff --git a/test/vip/ace/ace_agent.svh b/test/vip/ace/ace_agent.svh new file mode 100644 index 0000000..933d8bc --- /dev/null +++ b/test/vip/ace/ace_agent.svh @@ -0,0 +1,112 @@ +`ifndef _ACE_TEST_PKG +*** INCLUDED IN ace_test_pkg *** +`endif +class ace_agent #( + /// Address width + parameter AW = 32, + /// Data width + parameter DW = 32, + /// ID width + parameter IW = 8 , + /// User width + parameter UW = 1, + /// Stimuli application time + parameter time TA = 0ns, + /// Stimuli test time + parameter time TT = 0ns, + /// ACE bus interface type + parameter type ace_bus_t = logic, + /// Clock interface type + parameter type clk_if_t = logic, + parameter type aw_beat_t = logic, + parameter type w_beat_t = logic, + parameter type ar_beat_t = logic, + parameter type r_beat_t = logic, + parameter type b_beat_t = logic +); + + mailbox #(aw_beat_t) i_aw_mbx = new; + mailbox #(w_beat_t) i_w_mbx = new; + mailbox #(ar_beat_t) i_ar_mbx = new; + + mailbox #(aw_beat_t) aw_mbx; + mailbox #(w_beat_t) w_mbx; + mailbox #(ar_beat_t) ar_mbx; + mailbox #(r_beat_t) r_mbx; + mailbox #(b_beat_t) b_mbx; + + ace_bus_t ace; + clk_if_t clk_if; + + ace_driver #( + .AW(AW), .DW(DW), .IW(IW), + .UW(UW), .TA(TA), .TT(TT), + .ace_bus_t(ace_bus_t), + .aw_beat_t(aw_beat_t), + .ar_beat_t(ar_beat_t), + .w_beat_t(w_beat_t) + ) ace_drv; + + ace_mbox_sequencer #( + .AW(AW), .IW(IW), .UW(UW), .DW(DW), + .aw_beat_t(aw_beat_t), + .ar_beat_t(ar_beat_t), + .w_beat_t(w_beat_t), + .RAND_WAIT(0) + ) ace_seq; + + ace_monitor #( + .TA(TA), .TT(TT), + .ace_bus_t(ace_bus_t), + .ar_beat_t(ar_beat_t), + .r_beat_t(r_beat_t), + .b_beat_t(b_beat_t) + ) ace_mon; + + function new( + ace_bus_t ace, + clk_if_t clk_if, + mailbox #(aw_beat_t) aw_mbx, + mailbox #(w_beat_t) w_mbx, + mailbox #(ar_beat_t) ar_mbx, + mailbox #(r_beat_t) r_mbx, + mailbox #(b_beat_t) b_mbx + ); + this.ace = ace; + this.clk_if = clk_if; + + this.aw_mbx = aw_mbx; + this.w_mbx = w_mbx; + this.ar_mbx = ar_mbx; + this.r_mbx = r_mbx; + this.b_mbx = b_mbx; + + this.ace_drv = new( + this.ace, this.i_aw_mbx, + this.i_w_mbx, this.i_ar_mbx + ); + this.ace_seq = new( + this.clk_if, this.i_aw_mbx, + this.i_w_mbx, this.i_ar_mbx, + this.aw_mbx, this.w_mbx, + this.ar_mbx + ); + this.ace_mon = new( + this.ace, this.ar_mbx, + this.r_mbx, this.b_mbx + ); + endfunction + + task reset; + this.ace_drv.reset(); + endtask + + task run; + fork + this.ace_drv.run(); + this.ace_seq.run(); + this.ace_mon.run(); + join + endtask + +endclass diff --git a/test/vip/ace/ace_beat_types.svh b/test/vip/ace/ace_beat_types.svh new file mode 100644 index 0000000..bf47e21 --- /dev/null +++ b/test/vip/ace/ace_beat_types.svh @@ -0,0 +1,87 @@ +`ifndef _ACE_TEST_PKG +*** INCLUDED IN ace_test_pkg *** +`endif +/// The data transferred on a beat on the AW/AR channels. +class ace_ax_beat #( + parameter AW = 32, + parameter IW = 8 , + parameter UW = 1, + parameter SNP_W = 4 +); + rand logic [IW-1:0] id = '0; + rand logic [AW-1:0] addr = '0; + logic [7:0] len = '0; + logic [2:0] size = '0; + logic [1:0] burst = '0; + logic lock = '0; + logic [3:0] cache = '0; + logic [2:0] prot = '0; + rand logic [3:0] qos = '0; + logic [3:0] region = '0; + rand logic [UW-1:0] user = '0; + rand logic [1:0] bar = '0; + rand logic [1:0] domain = '0; + rand logic [SNP_W-1:0] snoop = '0; +endclass + +class ace_aw_beat #( + parameter AW = 32, + parameter IW = 8 , + parameter UW = 1 +) extends ace_ax_beat #( + .AW(AW), .IW(IW), .UW(UW), .SNP_W(3) +); + logic [5:0] atop = '0; + rand logic awunique = '0; +endclass + +class ace_ar_beat #( + parameter AW = 32, + parameter IW = 8 , + parameter UW = 1 +) extends ace_ax_beat #( + .AW(AW), .IW(IW), .UW(UW), .SNP_W(4) +); +endclass + +class ace_ax_comb_beat #( + parameter AW = 32, + parameter IW = 8 , + parameter UW = 1 +) extends ace_ax_beat #( + .AW(AW), .IW(IW), .UW(UW), .SNP_W(4) +); + logic [5:0] atop = '0; + rand logic awunique = '0; +endclass + +class ace_r_beat #( + parameter DW = 32, + parameter IW = 8 , + parameter UW = 1 +); + rand logic [IW-1:0] id = '0; + rand logic [DW-1:0] data = '0; + ace_pkg::rresp_t resp = '0; + logic last = '0; + rand logic [UW-1:0] user = '0; +endclass + +class ace_w_beat #( + parameter DW = 32, + parameter UW = 1 +); + rand logic [DW-1:0] data = '0; + rand logic [DW/8-1:0] strb = '0; + logic last = '0; + rand logic [UW-1:0] user = '0; +endclass + +class ace_b_beat #( + parameter IW = 8, + parameter UW = 1 +); + rand logic [IW-1:0] id = '0; + logic [1:0] resp = '0; + rand logic [UW-1:0] user = '0; +endclass \ No newline at end of file diff --git a/test/vip/ace/ace_driver.svh b/test/vip/ace/ace_driver.svh new file mode 100644 index 0000000..003ef90 --- /dev/null +++ b/test/vip/ace/ace_driver.svh @@ -0,0 +1,236 @@ +`ifndef _ACE_TEST_PKG +*** INCLUDED IN ace_test_pkg *** +`endif +class ace_driver #( + parameter AW = 32, + parameter DW = 32, + parameter IW = 8 , + parameter UW = 1, + parameter time TA = 0ns, // stimuli application time + parameter time TT = 0ns, // stimuli test time + parameter type ace_bus_t = logic, + parameter type aw_beat_t = logic, + parameter type ar_beat_t = logic, + parameter type w_beat_t = logic +); + aw_beat_t aw_txn; + ar_beat_t ar_txn; + w_beat_t w_txn; + + ace_bus_t ace; + + mailbox #(aw_beat_t) aw_mbx; + mailbox #(w_beat_t) w_mbx; + mailbox #(ar_beat_t) ar_mbx; + + function new( + ace_bus_t ace, + mailbox #(aw_beat_t) aw_mbx, + mailbox #(w_beat_t) w_mbx, + mailbox #(ar_beat_t) ar_mbx + ); + this.ace = ace; + + this.aw_mbx = aw_mbx; + this.ar_mbx = ar_mbx; + this.w_mbx = w_mbx; + endfunction + + task cycle_start; + #TT; + endtask + + task cycle_end; + @(posedge ace.clk_i); + endtask + + task run(); + cycle_end(); + fork + forever begin + if (aw_mbx.try_get(aw_txn)) send_aw(aw_txn); + else cycle_end(); + end + forever begin + if (w_mbx.try_get(w_txn)) send_w(w_txn); + else cycle_end(); + end + forever begin + if (ar_mbx.try_get(ar_txn)) send_ar(ar_txn); + else cycle_end(); + end + forever recv_r(); + forever recv_b(); + join + endtask + + task reset(); + ace.aw_id <= '0; + ace.aw_addr <= '0; + ace.aw_len <= '0; + ace.aw_size <= '0; + ace.aw_burst <= '0; + ace.aw_lock <= '0; + ace.aw_cache <= '0; + ace.aw_prot <= '0; + ace.aw_qos <= '0; + ace.aw_region <= '0; + ace.aw_atop <= '0; + ace.aw_user <= '0; + ace.aw_valid <= '0; + ace.aw_snoop <= '0; + ace.aw_bar <= '0; + ace.aw_domain <= '0; + ace.aw_awunique <= '0; + ace.w_data <= '0; + ace.w_strb <= '0; + ace.w_last <= '0; + ace.w_user <= '0; + ace.w_valid <= '0; + ace.b_ready <= '0; + ace.ar_id <= '0; + ace.ar_addr <= '0; + ace.ar_len <= '0; + ace.ar_size <= '0; + ace.ar_burst <= '0; + ace.ar_lock <= '0; + ace.ar_cache <= '0; + ace.ar_prot <= '0; + ace.ar_qos <= '0; + ace.ar_region <= '0; + ace.ar_user <= '0; + ace.ar_snoop <= '0; + ace.ar_bar <= '0; + ace.ar_domain <= '0; + ace.ar_valid <= '0; + ace.r_ready <= '0; + ace.wack <= '0; + ace.rack <= '0; + endtask + + task send_aw ( + input aw_beat_t beat + ); + ace.aw_id <= #TA beat.id; + ace.aw_addr <= #TA beat.addr; + ace.aw_len <= #TA beat.len; + ace.aw_size <= #TA beat.size; + ace.aw_burst <= #TA beat.burst; + ace.aw_lock <= #TA beat.lock; + ace.aw_cache <= #TA beat.cache; + ace.aw_prot <= #TA beat.prot; + ace.aw_qos <= #TA beat.qos; + ace.aw_region <= #TA beat.region; + ace.aw_atop <= #TA beat.atop; + ace.aw_user <= #TA beat.user; + ace.aw_valid <= #TA 1; + ace.aw_snoop <= #TA beat.snoop; + ace.aw_bar <= #TA beat.bar; + ace.aw_domain <= #TA beat.domain; + ace.aw_awunique <= #TA beat.awunique; + cycle_start(); + while (ace.aw_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + ace.aw_id <= #TA '0; + ace.aw_addr <= #TA '0; + ace.aw_len <= #TA '0; + ace.aw_size <= #TA '0; + ace.aw_burst <= #TA '0; + ace.aw_lock <= #TA '0; + ace.aw_cache <= #TA '0; + ace.aw_prot <= #TA '0; + ace.aw_qos <= #TA '0; + ace.aw_region <= #TA '0; + ace.aw_atop <= #TA '0; + ace.aw_user <= #TA '0; + ace.aw_valid <= #TA 0; + ace.aw_snoop <= #TA '0; + ace.aw_bar <= #TA '0; + ace.aw_domain <= #TA '0; + ace.aw_awunique <= #TA 0; + endtask + + /// Issue a beat on the AR channel. + task send_ar ( + input ar_beat_t beat + ); + ace.ar_id <= #TA beat.id; + ace.ar_addr <= #TA beat.addr; + ace.ar_len <= #TA beat.len; + ace.ar_size <= #TA beat.size; + ace.ar_burst <= #TA beat.burst; + ace.ar_lock <= #TA beat.lock; + ace.ar_cache <= #TA beat.cache; + ace.ar_prot <= #TA beat.prot; + ace.ar_qos <= #TA beat.qos; + ace.ar_region <= #TA beat.region; + ace.ar_user <= #TA beat.user; + ace.ar_valid <= #TA 1; + ace.ar_snoop <= #TA beat.snoop; + ace.ar_bar <= #TA beat.bar; + ace.ar_domain <= #TA beat.domain; + cycle_start(); + while (ace.ar_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + ace.ar_id <= #TA '0; + ace.ar_addr <= #TA '0; + ace.ar_len <= #TA '0; + ace.ar_size <= #TA '0; + ace.ar_burst <= #TA '0; + ace.ar_lock <= #TA '0; + ace.ar_cache <= #TA '0; + ace.ar_prot <= #TA '0; + ace.ar_qos <= #TA '0; + ace.ar_region <= #TA '0; + ace.ar_user <= #TA '0; + ace.ar_valid <= #TA '0; + ace.ar_snoop <= #TA '0; + ace.ar_bar <= #TA '0; + ace.ar_domain <= #TA '0; + endtask + + /// Issue a beat on the W channel. + task send_w ( + input w_beat_t beat + ); + ace.w_data <= #TA beat.data; + ace.w_strb <= #TA beat.strb; + ace.w_last <= #TA beat.last; + ace.w_user <= #TA beat.user; + ace.w_valid <= #TA 1; + cycle_start(); + while (ace.w_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + ace.w_data <= #TA '0; + ace.w_strb <= #TA '0; + ace.w_last <= #TA '0; + ace.w_user <= #TA '0; + ace.w_valid <= #TA 0; + endtask + + task recv_r; + ace.r_ready <= #TA 1; + cycle_start(); + while (!(ace.r_valid && ace.r_last)) begin + cycle_end(); cycle_start(); + end + cycle_end(); + ace.r_ready <= #TA 0; + ace.rack <= #TA 1; + cycle_start(); cycle_end(); + ace.rack <= #TA 0; + endtask + + /// Wait for a beat on the B channel. + task recv_b (); + ace.b_ready <= #TA 1; + cycle_start(); + while (ace.b_valid != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + ace.b_ready <= #TA 0; + ace.wack <= #TA 1; + cycle_start(); cycle_end(); + ace.wack <= #TA 0; + endtask + +endclass diff --git a/test/vip/ace/ace_monitor.svh b/test/vip/ace/ace_monitor.svh new file mode 100644 index 0000000..18e1bed --- /dev/null +++ b/test/vip/ace/ace_monitor.svh @@ -0,0 +1,85 @@ +`ifndef _ACE_TEST_PKG +*** INCLUDED IN ace_test_pkg *** +`endif +class ace_monitor #( + parameter time TA = 0ns, // stimuli application time + parameter time TT = 0ns, // stimuli test time + parameter type ace_bus_t = logic, + parameter type ar_beat_t = logic, + parameter type r_beat_t = logic, + parameter type b_beat_t +); + + ace_bus_t ace; + + mailbox #(ar_beat_t) ar_mbx; + mailbox #(r_beat_t) r_mbx; + mailbox #(b_beat_t) b_mbx; + + task cycle_start; + #TT; + endtask + + task cycle_end; + @(posedge ace.clk_i); + endtask + + function new( + ace_bus_t ace, + mailbox #(ar_beat_t) ar_mbx, + mailbox #(r_beat_t) r_mbx, + mailbox #(b_beat_t) b_mbx + ); + this.ace = ace; + + this.ar_mbx = ar_mbx; + this.r_mbx = r_mbx; + this.b_mbx = b_mbx; + + endfunction + + task mon_r (output r_beat_t beat); + cycle_start(); + while (!(ace.r_valid && ace.r_ready)) begin cycle_end(); cycle_start(); end + beat = new; + beat.id = ace.r_id; + beat.data = ace.r_data; + beat.resp = ace.r_resp; + beat.last = ace.r_last; + beat.user = ace.r_user; + cycle_end(); + endtask + + task mon_b (output b_beat_t beat); + cycle_start(); + while (!(ace.b_valid && ace.b_ready)) begin cycle_end(); cycle_start(); end + beat = new; + beat.id = ace.b_id; + beat.resp = ace.b_resp; + beat.user = ace.b_user; + cycle_end(); + endtask + + task recv_rs; + forever begin + r_beat_t beat; + mon_r(beat); + r_mbx.put(beat); + end + endtask + + task recv_bs; + forever begin + b_beat_t beat; + mon_b(beat); + b_mbx.put(beat); + end + endtask + + task run; + fork + forever recv_rs(); + forever recv_bs(); + join + endtask +endclass diff --git a/test/vip/ace/ace_sequencer.svh b/test/vip/ace/ace_sequencer.svh new file mode 100644 index 0000000..ba20c02 --- /dev/null +++ b/test/vip/ace/ace_sequencer.svh @@ -0,0 +1,216 @@ +`ifndef _ACE_TEST_PKG +*** INCLUDED IN ace_test_pkg *** +`endif + +virtual class ace_sequencer #( + parameter AW = 32, + parameter DW = 32, + parameter IW = 8, + parameter UW = 1, + parameter type aw_beat_t = logic, + parameter type ar_beat_t = logic, + parameter type w_beat_t = logic +); + + // Input mailboxes + mailbox #(aw_beat_t) aw_mbx_i; + mailbox #(ar_beat_t) ar_mbx_i; + mailbox #(w_beat_t) w_mbx_i; + + // Output mailboxes + mailbox #(aw_beat_t) aw_mbx_o; + mailbox #(ar_beat_t) ar_mbx_o; + mailbox #(w_beat_t) w_mbx_o; + + // Clock interface needed for generating delays + // between sending transactions + virtual CLK_IF clk_if; + + function new( + virtual CLK_IF clk_if, + mailbox #(aw_beat_t) aw_mbx_o, + mailbox #(w_beat_t) w_mbx_o, + mailbox #(ar_beat_t) ar_mbx_o + ); + this.clk_if = clk_if; + + this.aw_mbx_o = aw_mbx_o; + this.ar_mbx_o = ar_mbx_o; + this.w_mbx_o = w_mbx_o; + + endfunction + + task automatic rand_wait(input int unsigned min, max); + int unsigned rand_success, cycles; + cycles = $urandom_range(min, max); + repeat (cycles) begin + @(posedge this.clk_if.clk_i); + end + endtask + +endclass + +// Class which generates random sequences +class ace_rand_sequencer #( + parameter AW = 32, + parameter DW = 32, + parameter IW = 8, + parameter UW = 1, + parameter type aw_beat_t = logic, + parameter type ar_beat_t = logic, + parameter type w_beat_t = logic +) extends ace_sequencer #( + .AW(AW), .DW(DW), .IW(IW), .UW(UW), + .aw_beat_t(aw_beat_t), + .ar_beat_t(ar_beat_t), + .w_beat_t(w_beat_t) +); + + function aw_beat_t create_aw(); + aw_beat_t beat = new; + beat.addr = $urandom(); + beat.burst = axi_pkg::BURST_WRAP; + beat.size = $clog2(DW); + beat.len = 3; + beat.id = '0; + beat.qos = '0; + beat.snoop = ace_pkg::WriteUnique; + beat.bar = '0; + beat.domain = 'b1; + beat.awunique = '0; + return beat; + endfunction + + function ar_beat_t create_ar(); + ar_beat_t beat = new; + beat.addr = $urandom(); + beat.burst = axi_pkg::BURST_WRAP; + beat.size = $clog2(DW); + beat.len = 3; + beat.id = '0; + beat.qos = '0; + beat.snoop = ace_pkg::ReadShared; + beat.bar = '0; + beat.domain = 'b1; + return beat; + endfunction + + function w_beat_t create_w(); + w_beat_t beat = new; + beat.data = $urandom(); + beat.strb = '1; + beat.last = '0; + return beat; + endfunction + + task send_aws(); + aw_beat_t aw_txn = new; + repeat (10) begin + rand_wait(2, 20); + aw_txn = create_aw(); + aw_mbx_o.put(aw_txn); + end + endtask + + task send_ws(); + w_beat_t w_txn = new; + repeat (10) begin + for (int i = 0; i < 4; i++) begin + rand_wait(2, 20); + w_txn = create_w(); + if (i == 3) w_txn.last = '1; + w_mbx_o.put(w_txn); + end + end + endtask + + task send_ars(); + ar_beat_t ar_txn = new; + repeat (10) begin + rand_wait(2, 20); + ar_txn = create_ar(); + ar_mbx_o.put(ar_txn); + end + endtask + + task run(); + send_aws(); + send_ws(); + send_ars(); + endtask + +endclass + +// Class which generates sequences when detected in +// input mailboxes +class ace_mbox_sequencer #( + parameter AW = 32, + parameter DW = 32, + parameter IW = 8, + parameter UW = 1, + parameter type aw_beat_t = logic, + parameter type ar_beat_t = logic, + parameter type w_beat_t = logic, + parameter RAND_WAIT = 1 +) extends ace_sequencer #( + .AW(AW), .DW(DW), .IW(IW), .UW(UW), + .aw_beat_t(aw_beat_t), + .ar_beat_t(ar_beat_t), + .w_beat_t(w_beat_t) +); + + function new( + virtual CLK_IF clk_if, + mailbox #(aw_beat_t) aw_mbx_o, + mailbox #(w_beat_t) w_mbx_o, + mailbox #(ar_beat_t) ar_mbx_o, + mailbox #(aw_beat_t) aw_mbx_i, + mailbox #(w_beat_t) w_mbx_i, + mailbox #(ar_beat_t) ar_mbx_i + ); + super.new(clk_if, aw_mbx_o, w_mbx_o, ar_mbx_o); + this.aw_mbx_i = aw_mbx_i; + this.ar_mbx_i = ar_mbx_i; + this.w_mbx_i = w_mbx_i; + endfunction + + task wait_for_aws; + aw_beat_t aw_beat; + forever begin + aw_mbx_i.get(aw_beat); + if (RAND_WAIT) rand_wait(2, 20); + aw_mbx_o.put(aw_beat); + end + endtask + + task wait_for_ars; + ar_beat_t ar_beat; + forever begin + ar_mbx_i.get(ar_beat); + if (RAND_WAIT) rand_wait(2, 20); + ar_mbx_o.put(ar_beat); + end + endtask + + task wait_for_ws; + w_beat_t w_beat; + forever begin + w_mbx_i.get(w_beat); + if (RAND_WAIT) rand_wait(2, 20); + w_mbx_o.put(w_beat); + end + endtask + + task gen_txns_from_mbox; + fork + wait_for_aws(); + wait_for_ws(); + wait_for_ars(); + join + endtask + + task run(); + gen_txns_from_mbox(); + endtask + +endclass \ No newline at end of file diff --git a/test/vip/ace_test_pkg.sv b/test/vip/ace_test_pkg.sv new file mode 100644 index 0000000..424c69b --- /dev/null +++ b/test/vip/ace_test_pkg.sv @@ -0,0 +1,8 @@ +package ace_test_pkg; +`define _ACE_TEST_PKG +`include "ace/ace_beat_types.svh" +`include "ace/ace_driver.svh" +`include "ace/ace_monitor.svh" +`include "ace/ace_sequencer.svh" +`include "ace/ace_agent.svh" +endpackage \ No newline at end of file diff --git a/test/vip/cache/cache_beat_types.svh b/test/vip/cache/cache_beat_types.svh new file mode 100644 index 0000000..791dc42 --- /dev/null +++ b/test/vip/cache/cache_beat_types.svh @@ -0,0 +1,47 @@ +`ifndef _CACHE_TEST_PKG +*** INCLUDED IN cache_test_pkg *** +`endif + + +// Cache Requester operations +localparam int REQ_LOAD = 5'b00000; +localparam int REQ_STORE = 5'b00001; + +// Cache Memory operations +localparam int MEM_READ = 3'b000; +localparam int MEM_WRITE = 3'b001; +//localparam int MEM_ATOMIC = 3'b010; + +/// Datatype to orchestrate cache read and write requests +class cache_req; + int unsigned addr = 0; + logic [7:0] data_q[$]; + int unsigned op = REQ_LOAD; + bit cached = 0; + int unsigned shareability = 0; + int unsigned size = 0; + int unsigned timestamp = 0; +endclass + +/// Datatype to orchestrate cache lookups between +/// cache sequencer and cache scoreboard +class cache_resp; + logic [7:0] data_q[$]; +endclass + +class mem_req; + int unsigned addr = 0; + int unsigned len = 0; + int unsigned size = 0; + int unsigned op = MEM_READ; + logic [7:0] data_q[$]; + int unsigned cacheable = 0; + ace_pkg::arsnoop_t read_snoop_op = ace_pkg::ReadShared; + ace_pkg::awsnoop_t write_snoop_op = ace_pkg::WriteBack; +endclass + +class mem_resp; + logic [7:0] data_q[$]; + bit is_shared = 0; + bit pass_dirty = 0; +endclass diff --git a/test/vip/cache/cache_scoreboard.svh b/test/vip/cache/cache_scoreboard.svh new file mode 100644 index 0000000..8a8c42b --- /dev/null +++ b/test/vip/cache/cache_scoreboard.svh @@ -0,0 +1,575 @@ +`ifndef _CACHE_TEST_PKG +*** INCLUDED IN cache_test_pkg *** +`endif +class cache_scoreboard #( + /// Address space + parameter int AW = 32, + /// Width of the memory bus + parameter int DW = 32, + /// Width of one cache word + parameter int WORD_WIDTH = 0, + /// How many words per cache line + parameter int CACHELINE_WORDS = 0, + /// How many ways per set + parameter int WAYS = 0, + /// How many sets + parameter int SETS = 0, + /// Clock interface type + parameter type clk_if_t = logic +); + + localparam int BYTES_PER_WORD = DW / 8; + localparam int CACHELINE_BYTES = CACHELINE_WORDS * WORD_WIDTH / 8; + localparam int BLOCK_OFFSET_BITS = $clog2(CACHELINE_BYTES); + localparam int INDEX_BITS = $clog2(SETS); + localparam int TAG_BITS = AW - BLOCK_OFFSET_BITS - INDEX_BITS; + + localparam int VALID_IDX = 0; + localparam int SHARD_IDX = 1; + localparam int DIRTY_IDX = 2; + + int INDEX = -1; + + typedef logic [TAG_BITS-1:0] tag_t; + typedef logic [AW-1:0] addr_t; + typedef logic [7:0] byte_t; + typedef logic [2:0] status_t; + typedef logic [$clog2(WAYS)-1:0] lru_rank_t; + typedef logic [INDEX_BITS-1:0] idx_t; + + // Data structure for carrying cache request information + // It also monitors all cache modifications so that they can + // be executed at once and logged easily. + typedef struct { + // Cache hit + logic hit; + // Status of the old cache line + status_t status; + // Way index for hit or replacement + int way; + // Set index of the old cache line + idx_t idx; + // Tag of the old cache line + tag_t tag; + // Cacheline-aligned address of the old cache line + addr_t addr; + // Cacheline-aligned address of the new cache line + addr_t new_addr; + // Byte index within the cache line + logic [BLOCK_OFFSET_BITS-1:0] byte_idx; + // New cache line to be stored + byte_t new_cline [CACHELINE_BYTES]; + // New status for the cache line + status_t new_status; + // New tag for the cache line + tag_t new_tag; + } tag_resp_t; + + byte_t data_q[SETS][WAYS][CACHELINE_BYTES]; // Cache data + status_t status_q[SETS][WAYS]; // Cache state + tag_t tag_q[SETS][WAYS]; // Cache tag + lru_rank_t lru_rank_q[SETS][WAYS]; // LRU ranks + + // Semaphore to ensure only one process accesses the cache at a time + // The two processes are cache requests and snoop requests + // TODO: figure the critical point where using this is necessary + // ATM it is not used + semaphore cache_lookup_sem; + + // Interface to provide simulation clock + clk_if_t clk_if; + + string state_file; + logic first_write = '1; + + // Mailboxes for cache requests + mailbox #(cache_req) cache_req_mbx; + mailbox #(cache_resp) cache_resp_mbx; + // Mailboxes for snoop requests + mailbox #(cache_snoop_req) snoop_req_mbx; + mailbox #(cache_snoop_resp) snoop_resp_mbx; + // Mailboxes for memory requests + mailbox #(mem_req) mem_req_mbx; + mailbox #(mem_resp) mem_resp_mbx; + + function new( + clk_if_t clk_if, + mailbox #(cache_req) cache_req_mbx, + mailbox #(cache_resp) cache_resp_mbx, + mailbox #(cache_snoop_req) snoop_req_mbx, + mailbox #(cache_snoop_resp) snoop_resp_mbx, + mailbox #(mem_req) mem_req_mbx, + mailbox #(mem_resp) mem_resp_mbx, + string state_file, + int index + ); + this.clk_if = clk_if; + this.cache_req_mbx = cache_req_mbx; + this.cache_resp_mbx = cache_resp_mbx; + this.snoop_req_mbx = snoop_req_mbx; + this.snoop_resp_mbx = snoop_resp_mbx; + this.mem_req_mbx = mem_req_mbx; + this.mem_resp_mbx = mem_resp_mbx; + this.state_file = state_file; + this.INDEX = index; + + this.cache_lookup_sem = new(1); + endfunction + + function void init_data_mem_from_file( + string fname + ); + $readmemh(fname, data_q); + endfunction + + function void init_tag_mem_from_file( + string fname + ); + $readmemh(fname, tag_q); + endfunction + + function void init_status_from_file( + string fname + ); + // Initialize all to zeros + for (int set = 0; set < SETS; set++) begin + for (int way = 0; way < WAYS; way++) begin + status_q[set][way] = '0; + lru_rank_q[set][way] = '0; + end + end + // Read initial values from file + $readmemb(fname, status_q); + endfunction + + function void init_mem_from_file( + string data_fname, + string tag_fname, + string status_fname + ); + init_data_mem_from_file(data_fname); + init_tag_mem_from_file(tag_fname); + init_status_from_file(status_fname); + endfunction + + function automatic void log_state_change( + bit initiator, + int unsigned addr, + int unsigned set, + int unsigned way, + tag_t new_tag, + status_t new_status, + byte_t new_data[CACHELINE_BYTES], + bit modify + ); + int fd; + if (first_write) fd = $fopen(this.state_file, "w"); + else fd = $fopen(this.state_file, "a"); + first_write = 0; + $fwrite(fd, "TIME:%0t ADDR:%x INITIATOR:%0d", $time, addr, initiator); + if (modify) begin + $fwrite(fd, " SET:%0d WAY:%0d TAG:%x STATUS:%b DATA:[", + set, way, new_tag, new_status); + for (int i = 0; i < CACHELINE_BYTES; i++) begin + if (i == 0) + $fwrite(fd, "%x", new_data[i]); + else + $fwrite(fd, ",%x", new_data[i]); + end + $fwrite(fd, "]"); + end + $fwrite(fd, "\n"); + $fclose(fd); + endfunction + + // Atomic function for all cache writes + // Cache state is saved optionally + // NO OTHER FUNCTION SHOULD MODIFY THE CACHE + // initiator = 1 when "core" modifies the cache + // initiator = 0 when cache is modified by snooping + function automatic void modify_cache( + tag_resp_t info, bit initiator, bit modify + ); + if (modify) begin + data_q[info.idx][info.way] = info.new_cline; + status_q[info.idx][info.way] = info.new_status; + tag_q[info.idx][info.way] = info.new_tag; + update_lru(info); + end + log_state_change( + initiator, + info.new_addr, + info.idx, + info.way, + tag_q[info.idx][info.way], + status_q[info.idx][info.way], + data_q[info.idx][info.way], + modify + ); + endfunction + + function automatic void update_lru(tag_resp_t info); + for (int way = 0; way < WAYS; way++) begin + if (way == info.way) begin + lru_rank_q[info.idx][way] = WAYS-1; + end else begin + if (lru_rank_q[info.idx][way] != '0) begin + lru_rank_q[info.idx][way]--; + end + end + end + endfunction + + function automatic tag_resp_t read_and_compare_tag(addr_t addr); + tag_resp_t resp; + tag_t lu_tag; + status_t status; + logic hit = '0; + logic invalid_found = '0; + int way; + int i; + idx_t idx = addr[BLOCK_OFFSET_BITS+INDEX_BITS-1:BLOCK_OFFSET_BITS]; + tag_t tag = addr[AW-1:AW-TAG_BITS]; + for (int i = 0; i < WAYS; i++) begin + lu_tag = tag_q[idx][i]; + if (!status_q[idx][i][VALID_IDX]) begin + way = i; + invalid_found = '1; + end else if (!invalid_found && lru_rank_q[idx][i] == 0) begin + // Least recently used + way = i; + end + if (tag == lu_tag && status_q[idx][i][VALID_IDX]) begin + way = i; + hit = 'b1; + break; + end + end + resp.hit = hit; + resp.idx = idx; + resp.way = way; + resp.status = status_q[idx][way]; + resp.tag = tag_q[idx][way]; + resp.addr = {tag_q[idx][way], idx, {BLOCK_OFFSET_BITS{1'b0}}}; + resp.byte_idx = addr[BLOCK_OFFSET_BITS-1:0]; + resp.new_addr = {addr[AW-1:BLOCK_OFFSET_BITS], {BLOCK_OFFSET_BITS{1'b0}}}; + resp.new_tag = tag; + resp.new_status = status_q[idx][way]; + resp.new_cline = data_q[idx][way]; + return resp; + endfunction + + function automatic cache_resp cache_read(tag_resp_t info, cache_req req); + int unsigned n_bytes = 1 << req.size; + cache_resp resp = new; + logic [BLOCK_OFFSET_BITS-1:0] byte_idx = info.byte_idx; + for (int i = 0; i < n_bytes; i++) begin + resp.data_q.push_back(data_q[info.idx][info.way][byte_idx]); + byte_idx++; + end + return resp; + endfunction + + function automatic void cache_write( + ref tag_resp_t info, ref byte_t data_q[$] + ); + logic [BLOCK_OFFSET_BITS-1:0] byte_idx = info.byte_idx; + while (data_q.size() > 0) begin + info.new_cline[byte_idx] = data_q.pop_front(); + byte_idx++; + end + endfunction + + function automatic void cache_evict(ref tag_resp_t info); + info.new_status[VALID_IDX] = 1'b0; + endfunction + + + function automatic mem_req gen_write_back(tag_resp_t info); + mem_req mem_req = new; + mem_req.size = $clog2(BYTES_PER_WORD); + mem_req.len = CACHELINE_WORDS - 1; + mem_req.addr = info.addr; + mem_req.op = MEM_WRITE; + mem_req.write_snoop_op = ace_pkg::WriteBack; + for (int i = 0; i < CACHELINE_WORDS; i++) begin + for (int j = 0; j < BYTES_PER_WORD; j++) begin + mem_req.data_q.push_back( + data_q[info.idx][info.way][i*BYTES_PER_WORD+j]); + end + end + return mem_req; + endfunction + + function automatic mem_req gen_read_allocate(tag_resp_t info, cache_req req); + mem_req mem_req = new; + mem_req.size = $clog2(BYTES_PER_WORD); + mem_req.len = CACHELINE_WORDS - 1; + mem_req.addr = info.new_addr; + mem_req.op = MEM_READ; + mem_req.cacheable = '1; + if (req.op == REQ_STORE) begin + mem_req.read_snoop_op = ace_pkg::ReadUnique; + end else begin + mem_req.read_snoop_op = ace_pkg::ReadShared; + end + return mem_req; + endfunction + + function automatic mem_req gen_clean_unique(tag_resp_t info); + mem_req mem_req = new; + mem_req.size = $clog2(BYTES_PER_WORD); + mem_req.len = CACHELINE_WORDS - 1; + mem_req.addr = info.new_addr; + mem_req.op = MEM_READ; + mem_req.cacheable = '1; + mem_req.read_snoop_op = ace_pkg::CleanUnique; + return mem_req; + endfunction + + function automatic mem_req gen_write_line_unique(tag_resp_t info, cache_req req); + // Merge with write word + mem_req mem_req = new; + logic [BLOCK_OFFSET_BITS-1:0] byte_idx = info.byte_idx; + mem_req.size = $clog2(BYTES_PER_WORD); + mem_req.len = CACHELINE_WORDS - 1; + mem_req.addr = info.new_addr; + mem_req.op = MEM_WRITE; + mem_req.cacheable = '1; + mem_req.write_snoop_op = ace_pkg::WriteLineUnique; + for (int i = 0; i < CACHELINE_WORDS; i++) begin + for (int j = 0; j < BYTES_PER_WORD; j++) begin + mem_req.data_q.push_back( + data_q[info.idx][info.way][i*BYTES_PER_WORD+j]); + end + end + return mem_req; + endfunction + + function automatic mem_req gen_write_unique(cache_req req); + mem_req mem_req = new; + mem_req.size = $clog2(BYTES_PER_WORD); + mem_req.len = 0; + mem_req.addr = req.addr; + mem_req.op = MEM_WRITE; + mem_req.cacheable = '1; + mem_req.write_snoop_op = ace_pkg::WriteUnique; + for (int i = 0; i < BYTES_PER_WORD; i++) begin + mem_req.data_q.push_back(req.data_q.pop_front()); + end + return mem_req; + endfunction + + function automatic void allocate(mem_req req, mem_resp resp, ref tag_resp_t info); + info.new_status[DIRTY_IDX] = resp.pass_dirty; + info.new_status[SHARD_IDX] = resp.is_shared; + info.new_status[VALID_IDX] = 1'b1; + info.byte_idx = 0; // Cache line allocations are always cacheline-aligned + cache_write(info, resp.data_q); + endfunction; + + task automatic snoop(input cache_snoop_req req, output cache_snoop_resp resp); + tag_resp_t tag_lu; + cache_resp cache_resp; + resp = new; + tag_lu = read_and_compare_tag(req.addr); + resp.snoop_resp.Error = 1'b0; + if (tag_lu.hit) begin + cache_req cache_req = new; + cache_req.addr = req.addr; + cache_req.size = $clog2(CACHELINE_BYTES); + cache_resp = cache_read(tag_lu, cache_req); + resp.snoop_resp.WasUnique = !tag_lu.status[SHARD_IDX]; + while (cache_resp.data_q.size() > 0) begin + logic [7:0] data = cache_resp.data_q.pop_front(); + resp.data_q.push_back(data); + end + case (req.snoop_op) + ace_pkg::ReadOnce: begin + resp.snoop_resp.DataTransfer = 1'b1; + resp.snoop_resp.IsShared = 1'b1; + resp.snoop_resp.PassDirty = 1'b0; + end + ace_pkg::ReadClean, ace_pkg::ReadNotSharedDirty: begin + // recommended to pass clean + resp.snoop_resp.DataTransfer = 1'b1; + resp.snoop_resp.IsShared = 1'b1; + resp.snoop_resp.PassDirty = 1'b0; + tag_lu.new_status[SHARD_IDX] = 1'b1; + modify_cache(tag_lu, 0, 1); + end + ace_pkg::ReadShared: begin + // recommended to pass dirty + resp.snoop_resp.DataTransfer = 1'b1; + resp.snoop_resp.IsShared = 1'b1; + tag_lu.new_status[SHARD_IDX] = 1'b1; + resp.snoop_resp.PassDirty = tag_lu.status[DIRTY_IDX]; + tag_lu.new_status[DIRTY_IDX] = 1'b0; + modify_cache(tag_lu, 0, 1); + end + ace_pkg::ReadUnique: begin + // data transfer and invalidate + resp.snoop_resp.DataTransfer = 1'b1; + resp.snoop_resp.IsShared = 1'b0; + resp.snoop_resp.PassDirty = tag_lu.status[DIRTY_IDX]; + tag_lu.new_status[VALID_IDX] = 1'b0; + modify_cache(tag_lu, 0, 1); + end + ace_pkg::CleanInvalid: begin + // data transfer dirty and invalidate + resp.snoop_resp.DataTransfer = tag_lu.status[DIRTY_IDX]; + resp.snoop_resp.IsShared = 1'b0; + resp.snoop_resp.PassDirty = tag_lu.status[DIRTY_IDX]; + tag_lu.new_status[VALID_IDX] = 1'b0; + modify_cache(tag_lu, 0, 1); + end + ace_pkg::MakeInvalid: begin + // invalidate + resp.snoop_resp.DataTransfer = 1'b0; + resp.snoop_resp.IsShared = 1'b0; + resp.snoop_resp.PassDirty = 1'b0; + tag_lu.new_status[VALID_IDX] = 1'b0; + modify_cache(tag_lu, 0, 1); + end + ace_pkg::CleanShared: begin + // pass dirty + resp.snoop_resp.DataTransfer = tag_lu.status[DIRTY_IDX]; + resp.snoop_resp.IsShared = 1'b1; + resp.snoop_resp.PassDirty = tag_lu.status[DIRTY_IDX]; + tag_lu.new_status[DIRTY_IDX] = 1'b0; + tag_lu.new_status[SHARD_IDX] = 1'b1; + modify_cache(tag_lu, 0, 1); + end + default: $fatal(1, "Unsupported snoop op!"); + endcase + end else begin + resp.snoop_resp.WasUnique = 1'b0; + resp.snoop_resp.DataTransfer = 1'b0; + resp.snoop_resp.IsShared = 1'b0; + resp.snoop_resp.PassDirty = 1'b0; + end + endtask + + task automatic cache_fsm(input cache_req req, output cache_resp resp); + bit cache_modified = 1; + tag_resp_t tag_lu; + mem_req mem_req = new; + mem_resp mem_resp; + resp = new; + mem_req.cacheable = '1; + //cache_lookup_sem.get(1); + tag_lu = read_and_compare_tag(req.addr); + if (tag_lu.hit) begin + if (req.op == REQ_LOAD) begin + resp = cache_read(tag_lu, req); + end else if (req.op == REQ_STORE) begin + if (req.cached && tag_lu.status[SHARD_IDX]) begin + // Make unique + mem_req = gen_clean_unique(tag_lu); + mem_req_mbx.put(mem_req); + mem_resp_mbx.get(mem_resp); + allocate(mem_req, mem_resp, tag_lu); + end + cache_write(tag_lu, req.data_q); + if (req.cached) begin + tag_lu.new_status[DIRTY_IDX] = 1'b1; + end else begin + mem_req = gen_write_line_unique(tag_lu, req); + mem_req_mbx.put(mem_req); + mem_resp_mbx.get(mem_resp); + cache_evict(tag_lu); + end + end else begin + $fatal("Unsupported op"); + end + end else begin + if (req.cached) begin + if (tag_lu.status[DIRTY_IDX] && + tag_lu.status[VALID_IDX]) begin + // Generate write-back request + mem_req = gen_write_back(tag_lu); + // Send request and wait for response + mem_req_mbx.put(mem_req); + mem_resp_mbx.get(mem_resp); + end + // Generate read request for new cache line + mem_req = gen_read_allocate(tag_lu, req); + // Send request and wait for response + mem_req_mbx.put(mem_req); + mem_resp_mbx.get(mem_resp); + // Allocate cache line for the new entry + allocate(mem_req, mem_resp, tag_lu); + // Handle the initial cache request + if (req.op == REQ_LOAD) begin + resp = cache_read(tag_lu, req); + end else if (req.op == REQ_STORE) begin + cache_write(tag_lu, req.data_q); + tag_lu.new_status[DIRTY_IDX] = 1'b1; + end else begin + $fatal("Unsupported op"); + end + end else begin + cache_modified = 0; + mem_req = gen_write_unique(req); + mem_req_mbx.put(mem_req); + mem_resp_mbx.get(mem_resp); + end + end + modify_cache(tag_lu, 1, cache_modified); + //cache_resp_mbx.put(resp); + //cache_lookup_sem.put(1); + endtask + + task recv_cache_req; + cache_req req; + cache_resp resp = new; + cache_req_mbx.get(req); + @(posedge clk_if.clk_i); + cache_fsm(req, resp); + cache_resp_mbx.put(resp); + endtask + + task recv_snoop_req; + cache_snoop_req req; + cache_snoop_resp resp = new; + snoop_req_mbx.get(req); + snoop(req, resp); + snoop_resp_mbx.put(resp); + endtask + + // Handle one request per clock cycle + // Snooping gets priority + /* + task handle_reqs; + int snp_exists; + int c_req_exists; + cache_snoop_req snp_req; + cache_req c_req; + @(posedge clk_if.clk_i); + snp_exists = snoop_req_mbx.try_get(snp_req); + if (snp_exists != 0) begin + recv_snoop_req(snp_req); + end + c_req_exists = cache_req_mbx.try_get(c_req); + if (c_req_exists) begin + recv_cache_req(c_req); + end + endtask + */ + + task recv_cache_reqs; + forever recv_cache_req(); + endtask + + task recv_snoop_reqs; + forever recv_snoop_req(); + endtask + + task run; + fork + forever recv_cache_reqs(); + forever recv_snoop_reqs(); + join + endtask + +endclass diff --git a/test/vip/cache/cache_sequencer.svh b/test/vip/cache/cache_sequencer.svh new file mode 100644 index 0000000..c5bbf20 --- /dev/null +++ b/test/vip/cache/cache_sequencer.svh @@ -0,0 +1,151 @@ +`ifndef _CACHE_TEST_PKG +*** INCLUDED IN cache_test_pkg *** +`endif +class cache_sequencer #( + parameter int AW = 32, + parameter int DW = 32, + parameter type clk_if_t = logic +); + + mailbox #(cache_req) cache_req_mbx; + mailbox #(cache_resp) cache_resp_mbx; + + byte delimiter = " "; + string txn_file; + int unsigned txns_remaining; + int unsigned clk_cnt = 0; + + // Interface to provide simulation clock + clk_if_t clk_if; + + function new( + clk_if_t clk_if, + mailbox #(cache_req) cache_req_mbx, + mailbox #(cache_resp) cache_resp_mbx, + string txn_file + ); + this.clk_if = clk_if; + this.cache_req_mbx = cache_req_mbx; + this.cache_resp_mbx = cache_resp_mbx; + this.txn_file = txn_file; + endfunction + + function automatic int parse_op(string op); + if (op == "REQ_LOAD") return REQ_LOAD; + else if (op == "REQ_STORE") return REQ_STORE; + else $fatal(1, "Illegal operation type found"); + endfunction + + function automatic cache_req parse_txn(string line); + cache_req req = new; + logic [DW-1:0] word; + string op; + int size; + op = get_next_word(line); + req.op = parse_op(op); + req.addr = get_next_word(line).atohex(); + word = get_next_word(line).atohex(); + for (int i = 0; i < (DW / 8); i++) begin + req.data_q.push_back(word[i*8 +: 8]); + end + req.size = get_next_word(line).atoi(); + req.cached = get_next_word(line).atoi(); + req.shareability = get_next_word(line).atoi(); + req.timestamp = get_next_word(line).atoi(); + return req; + endfunction + + // Calculates the size of the next word until the delimiter + function automatic int get_next_word_size(string line); + byte char = ""; + int len, i; + len = line.len(); + for (i = 0; i < len; i++) begin + char = line[i]; + if (char == this.delimiter) break; + end + return i; + endfunction + + // Returns the next word and removes it from ``line`` + function automatic string get_next_word(ref string line); + int wsize; + string word; + int line_len = line.len(); + wsize = get_next_word_size(line); + word = line.substr(0, wsize - 1); + line = line.substr(wsize + 1, line_len - 1); + return word.substr(5, word.len()-1); + endfunction + + function automatic int get_n_transactions; + int fd, ret; + string line; + int rows = 0; + fd = $fopen(this.txn_file, "r"); + if (fd) begin + while (!$feof(fd)) begin + ret = $fgets(line, fd); + if (line != "") rows++; + end + end else begin + $fatal("Could not open file %s", txn_file); + end + $fclose(fd); + return rows; + endfunction + + task gen_txns_from_file; + int fd, ret; + string line; + cache_req cache_req; + fd = $fopen(this.txn_file, "r"); + if (fd) begin + while (!$feof(fd)) begin + int mbx_size; + ret = $fgets(line, fd); + if (line != "") begin + cache_req = parse_txn(line); + send_req(cache_req); + end + end + end else begin + $fatal("Could not open file %s", txn_file); + end + $fclose(fd); + endtask + + task send_req(input cache_req req); + while (req.timestamp > clk_cnt) begin + @(posedge clk_if.clk_i); + end + cache_req_mbx.put(req); + endtask + + task recv_resps; + cache_resp cache_resp; + cache_resp_mbx.get(cache_resp); + txns_remaining--; + endtask + + task count_clocks; + forever begin + @(posedge clk_if.clk_i); + clk_cnt++; + end + endtask + + task run; + txns_remaining = get_n_transactions(); + fork + count_clocks(); + fork + gen_txns_from_file(); + while (txns_remaining != 0) begin + recv_resps(); + end + join + join_any + endtask + +endclass diff --git a/test/vip/cache/cache_top_agent.svh b/test/vip/cache/cache_top_agent.svh new file mode 100644 index 0000000..dbd1c38 --- /dev/null +++ b/test/vip/cache/cache_top_agent.svh @@ -0,0 +1,177 @@ +`ifndef _CACHE_TEST_PKG +*** INCLUDED IN cache_test_pkg *** +`endif +class cache_top_agent #( + /// Address width + parameter AW = 32, + /// Data width + parameter DW = 32, + /// Snoop address width + parameter AC_AW = 32, + /// Snoop data width + parameter CD_DW = 32, + /// ID width + parameter IW = 8 , + /// User width + parameter UW = 1, + /// Stimuli application time + parameter time TA = 0ns, + /// Stimuli test time + parameter time TT = 0ns, + /// How many words in a cache line + parameter CACHELINE_WORDS = 4, + /// Width of a cacheline word + parameter WORD_WIDTH = 32, + /// How many ways in the cache + parameter WAYS = 4, + /// How many sets in the cache + parameter SETS = 1024, + /// ACE bus interface type + parameter type ace_bus_t = logic, + /// Clock interface type + parameter type clk_if_t = logic, + /// Snoop bus interface type + parameter type snoop_bus_t = logic +); + ace_bus_t ace; + snoop_bus_t snoop; + clk_if_t clk_if; + + typedef ace_test_pkg::ace_aw_beat #( + .AW(AW), .IW(IW), .UW(UW) + ) aw_beat_t; + + typedef ace_test_pkg::ace_ar_beat #( + .AW(AW), .IW(IW), .UW(UW) + ) ar_beat_t; + + typedef ace_test_pkg::ace_r_beat #( + .DW(DW), .IW(IW), .UW(UW) + ) r_beat_t; + + typedef ace_test_pkg::ace_w_beat #( + .DW(DW), .UW(UW) + ) w_beat_t; + + typedef ace_test_pkg::ace_b_beat #( + .IW(IW), .UW(UW) + ) b_beat_t; + + mailbox #(cache_req) cache_req_mbx = new(); + mailbox #(cache_resp) cache_resp_mbx = new(); + mailbox #(mem_req) mem_req_mbx = new(); + mailbox #(mem_resp) mem_resp_mbx = new(); + mailbox #(aw_beat_t) aw_mbx = new(); + mailbox #(w_beat_t) w_mbx = new(); + mailbox #(ar_beat_t) ar_mbx = new(); + mailbox #(r_beat_t) r_mbx = new(); + mailbox #(b_beat_t) b_mbx = new(); + mailbox #(cache_snoop_req) snoop_req_mbx = new(); + mailbox #(cache_snoop_resp) snoop_resp_mbx = new(); + + logic cache_seq_done = 1'b0; + + int unsigned os_cache_reqs = 0; + localparam int CachelineBytes = (CACHELINE_WORDS * WORD_WIDTH) / 8; + + ace_test_pkg::ace_agent #( + .AW(AW), .DW(DW), .IW(IW), .UW(UW), + .TA(TA), .TT(TT), + .ace_bus_t(ace_bus_t), + .clk_if_t(clk_if_t), + .aw_beat_t(aw_beat_t), + .w_beat_t(w_beat_t), + .ar_beat_t(ar_beat_t), + .r_beat_t(r_beat_t), + .b_beat_t(b_beat_t) + ) ace_agent; + + snoop_test_pkg::snoop_agent #( + .AW(AC_AW), .DW(CD_DW), + .TA(TA), .TT(TT), + .CACHELINE_BYTES(CachelineBytes), + .snoop_bus_t(snoop_bus_t), + .clk_if_t(clk_if_t) + ) snoop_agent; + + cache_scoreboard #( + .AW(AW), + .DW(DW), + .WORD_WIDTH(WORD_WIDTH), + .CACHELINE_WORDS(CACHELINE_WORDS), + .WAYS(WAYS), + .SETS(SETS), + .clk_if_t(clk_if_t) + ) cache_sb; + + cache_sequencer #( + .AW(AW), + .DW(DW), + .clk_if_t(clk_if_t) + ) cache_seq; + + mem_sequencer #( + .aw_beat_t(aw_beat_t), + .ar_beat_t(ar_beat_t), + .r_beat_t(r_beat_t), + .w_beat_t(w_beat_t), + .b_beat_t(b_beat_t) + ) mem_seq; + + function new( + ace_bus_t ace, + snoop_bus_t snoop, + clk_if_t clk_if, + string data_mem_file, + string tag_mem_file, + string status_file, + string txn_file, + string state_file, + int index + ); + this.ace = ace; + this.snoop = snoop; + this.clk_if = clk_if; + + this.ace_agent = new(this.ace, this.clk_if, this.aw_mbx, + this.w_mbx, this.ar_mbx, this.r_mbx, + this.b_mbx); + this.snoop_agent = new(this.snoop, this.clk_if, + this.snoop_req_mbx, + this.snoop_resp_mbx); + this.cache_sb = new(this.clk_if, + this.cache_req_mbx, this.cache_resp_mbx, + this.snoop_req_mbx, this.snoop_resp_mbx, + this.mem_req_mbx, this.mem_resp_mbx, + state_file, index); + this.cache_seq = new(this.clk_if, + this.cache_req_mbx, this.cache_resp_mbx, txn_file); + this.mem_seq = new(this.mem_req_mbx, this.mem_resp_mbx, + this.aw_mbx, this.ar_mbx, this.r_mbx, + this.w_mbx, this.b_mbx); + + this.cache_sb.init_mem_from_file( + data_mem_file, + tag_mem_file, + status_file + ); + endfunction + + task reset; + fork + this.ace_agent.reset(); + this.snoop_agent.reset(); + join + endtask + + task run; + fork + this.ace_agent.run(); + this.snoop_agent.run(); + this.cache_seq.run(); + this.cache_sb.run(); + this.mem_seq.run(); + join_any + endtask + +endclass diff --git a/test/vip/cache/mem_logger.svh b/test/vip/cache/mem_logger.svh new file mode 100644 index 0000000..47732c5 --- /dev/null +++ b/test/vip/cache/mem_logger.svh @@ -0,0 +1,74 @@ +`ifndef _CACHE_TEST_PKG +*** INCLUDED IN cache_test_pkg *** +`endif +class mem_logger #( + parameter int AW = 0, + parameter int DW = 0, + parameter int IW = 0, + parameter int UW = 0, + parameter time TA = 0ns, // stimuli application time + parameter time TT = 0ns, // stimuli test time + parameter type mon_bus_t = logic +); + + typedef logic [AW-1:0] addr_t; + typedef logic [DW-1:0] data_t; + typedef logic [7:0] byte_t; + + mon_bus_t mem_mon_bus; + + string log_file; + bit first_write = 1; + + function new( + mon_bus_t mon, + string log_file + ); + this.mem_mon_bus = mon; + this.log_file = log_file; + endfunction + + function void log_word( + addr_t addr, + data_t data + ); + int fd; + if (first_write) fd = $fopen(log_file, "w"); + else fd = $fopen(log_file, "a"); + first_write = 0; + for (int i = 0; i < DW / 8; i++) begin + addr_t byte_addr = addr + i; + byte_t byte_data = data[i*8 +: 8]; + $fwrite(fd, "ADDR:%x DATA:%x\n", byte_addr, byte_data); + end + $fclose(fd); + endfunction + + function void log_time(); + int fd; + if (first_write) fd = $fopen(log_file, "w"); + else fd = $fopen(log_file, "a"); + first_write = 0; + $fwrite(fd, "TIME:%0t\n", $time); + $fclose(fd); + endfunction + + task recv_writes; + addr_t w_addr; + byte_t data[$]; + int unsigned beat_count = 0; + forever begin + @(posedge mem_mon_bus.clk_i); + if (mem_mon_bus.w_valid) begin + beat_count = mem_mon_bus.w_beat_count; + if (beat_count == 0) log_time(); + log_word(mem_mon_bus.w_addr, mem_mon_bus.w_data); + end + end + endtask + + task run; + recv_writes(); + endtask + +endclass diff --git a/test/vip/cache/mem_sequencer.svh b/test/vip/cache/mem_sequencer.svh new file mode 100644 index 0000000..35def96 --- /dev/null +++ b/test/vip/cache/mem_sequencer.svh @@ -0,0 +1,144 @@ +`ifndef _CACHE_TEST_PKG +*** INCLUDED IN cache_test_pkg *** +`endif +class mem_sequencer #( + parameter type aw_beat_t = logic, + parameter type ar_beat_t = logic, + parameter type r_beat_t = logic, + parameter type w_beat_t = logic, + parameter type b_beat_t = logic +); + mailbox #(mem_req) mem_req_mbx; + mailbox #(mem_resp) mem_resp_mbx; + mailbox #(aw_beat_t) aw_mbx_o; + mailbox #(ar_beat_t) ar_mbx_o; + mailbox #(r_beat_t) r_mbx_o; + mailbox #(w_beat_t) w_mbx_o; + mailbox #(b_beat_t) b_mbx_o; + + function new( + mailbox #(mem_req) mem_req_mbx, + mailbox #(mem_resp) mem_resp_mbx, + mailbox #(aw_beat_t) aw_mbx_o, + mailbox #(ar_beat_t) ar_mbx_o, + mailbox #(r_beat_t) r_mbx_o, + mailbox #(w_beat_t) w_mbx_o, + mailbox #(b_beat_t) b_mbx_o + ); + this.mem_req_mbx = mem_req_mbx; + this.mem_resp_mbx = mem_resp_mbx; + this.aw_mbx_o = aw_mbx_o; + this.ar_mbx_o = ar_mbx_o; + this.r_mbx_o = r_mbx_o; + this.w_mbx_o = w_mbx_o; + this.b_mbx_o = b_mbx_o; + endfunction + + function automatic axi_pkg::cache_t calc_cache(mem_req req); + if (!req.cacheable) begin + return '0; + end else begin + return axi_pkg::CACHE_BUFFERABLE | + axi_pkg::CACHE_MODIFIABLE; + end + endfunction + + function automatic ace_pkg::axdomain_t calc_domain(mem_req req); + if (!req.cacheable) begin + return ace_pkg::System; + end else begin + return ace_pkg::InnerShareable; + end + endfunction + + task recv_mem_req; + mem_req req; + mem_req_mbx.get(req); + if (req.op == MEM_WRITE) begin + send_aw_beat(req); + send_w_beats(req); + end else if (req.op == MEM_READ) begin + send_ar_beat(req); + end else begin + $fatal("Unsupported op!"); + end + endtask + + task send_aw_beat(input mem_req req); + aw_beat_t aw_beat = new; + aw_beat.addr = req.addr; + aw_beat.len = req.len; + aw_beat.size = req.size; + aw_beat.snoop = req.write_snoop_op; + aw_beat.burst = (req.len > 0) ? axi_pkg::BURST_WRAP : axi_pkg::BURST_INCR; + aw_beat.domain = calc_domain(req); + aw_beat.cache = calc_cache(req); + aw_mbx_o.put(aw_beat); + endtask + + task send_w_beats(input mem_req req); + while (req.data_q.size() > 0) begin + w_beat_t w_beat = new; + for (int i = 0; i < (w_beat.DW / 8); i++) begin + w_beat.data[i*8 +: 8] = req.data_q.pop_front(); + end + w_beat.strb = '1; + w_beat.user = '0; + w_beat.last = (req.data_q.size() == 0); + w_mbx_o.put(w_beat); + end + endtask + + task send_ar_beat(input mem_req req); + ar_beat_t ar_beat = new; + ar_beat.addr = req.addr; + ar_beat.len = req.len; + ar_beat.size = req.size; + ar_beat.snoop = req.read_snoop_op; + ar_beat.burst = axi_pkg::BURST_WRAP; + ar_beat.domain = calc_domain(req); + ar_beat.cache = calc_cache(req); + ar_mbx_o.put(ar_beat); + endtask + + task recv_r_beats; + r_beat_t r_beat; + mem_resp resp = new; + do begin + r_mbx_o.get(r_beat); + for (int i = 0; i < (r_beat.DW / 8); i++) begin + resp.data_q.push_back(r_beat.data[i*8 +: 8]); + end + resp.is_shared = r_beat.resp[3]; + resp.pass_dirty = r_beat.resp[2]; + end while (!r_beat.last); + mem_resp_mbx.put(resp); + endtask + + task recv_b_beats; + b_beat_t b_beat; + mem_resp resp = new; + b_mbx_o.get(b_beat); + // Nothing to transfer in the response + mem_resp_mbx.put(resp); + endtask + + task recv_mem_reqs; + forever recv_mem_req(); + endtask + + task send_mem_resps; + fork + forever recv_r_beats(); + forever recv_b_beats();; + join + endtask + + task run; + fork + recv_mem_reqs(); + send_mem_resps(); + join + endtask + +endclass diff --git a/test/vip/cache_test_pkg.sv b/test/vip/cache_test_pkg.sv new file mode 100644 index 0000000..f7ac41d --- /dev/null +++ b/test/vip/cache_test_pkg.sv @@ -0,0 +1,10 @@ +package cache_test_pkg; +`define _CACHE_TEST_PKG +import snoop_test_pkg::*; +`include "cache/cache_beat_types.svh" +`include "cache/cache_sequencer.svh" +`include "cache/mem_sequencer.svh" +`include "cache/mem_logger.svh" +`include "cache/cache_scoreboard.svh" +`include "cache/cache_top_agent.svh" +endpackage diff --git a/test/vip/python/cache_coherency_test.py b/test/vip/python/cache_coherency_test.py new file mode 100644 index 0000000..6459e09 --- /dev/null +++ b/test/vip/python/cache_coherency_test.py @@ -0,0 +1,584 @@ +from cache_state import \ + CacheState, CachelineState, \ + CachelineStateEnum, CacheSetFullException, \ + StateBits +from math import log2 +from typing import List +from memory_state import MemoryState +from common import MemoryRange +from transactions import \ + CacheTransactionSequence, CacheTransaction, CacheReqOp +from random import random, randint, choice, sample +import os +import logging +import pdb +logger = logging.getLogger(__name__) + + +class CoherencyError(AssertionError): + pass + + +class CacheCoherencyTest: + def __init__( + self, + addr_width: int, + data_width: int, + word_width: int, + cacheline_words: int, + ways: int, + sets: int, + n_caches: int, + n_transactions: int, + target_dir: str, + check: bool, + debug: bool, + **kwargs + ): + + logging.basicConfig(filename='cache_python.log', filemode='w', level=logging.INFO) + + self.aw = addr_width + self.dw = data_width + self.word_width = word_width + self.cacheline_words = cacheline_words + self.ways = ways + self.sets = sets + self.n_caches = n_caches + self.n_transactions = n_transactions + self.target_dir = target_dir + self.check = check + self.debug = debug + + self.cacheline_bytes = \ + self.cacheline_words * self.word_width // 8 + + self.mem_ranges : list[MemoryRange] = [] + + @property + def caches(self) -> List[CacheState]: + if not hasattr(self, '_caches'): + self._caches = [] + for _ in range(0, self.n_caches): + cache = CacheState( + addr_width=self.aw, + data_width=self.dw, + word_width=self.word_width, + cacheline_words=self.cacheline_words, + ways=self.ways, + sets=self.sets + ) + cache.init_cache() + self._caches.append(cache) + return self._caches + @caches.setter + def caches(self, caches: List[CacheState]): + self._caches = caches + + @property + def mem_state(self) -> MemoryState: + if not hasattr(self, '_mem_state'): + if not self.mem_ranges: + raise Exception("Define self.mem_ranges!") + self._mem_state = MemoryState(self.mem_ranges) + return self._mem_state + @mem_state.setter + def mem_state(self, mem_state: MemoryState): + self._mem_state = mem_state + + @property + def transactions(self) -> List[CacheTransactionSequence]: + if not hasattr(self, '_transactions'): + if not self.mem_ranges: + raise Exception("Define self.mem_ranges!") + self._transactions = [] + for _ in range(self.n_caches): + self._transactions.append( + CacheTransactionSequence( + self.aw, self.dw, self.mem_ranges + ) + ) + return self._transactions + @transactions.setter + def transactions(self, txns: List[CacheTransactionSequence]): + self._transactions = txns + + def add_memory_range(self, memory_range: MemoryRange): + self.mem_ranges.append(memory_range) + + def set_cache_line( + self, + n_cache: int, + addr: int, + data: List[int], + state: List[bool] + ): + self.caches[n_cache].set_entry( + addr=addr, + data=data, + status=state + ) + + def create_transaction(self, n_cache: int, txn: CacheTransaction): + self.transactions[n_cache].add_transaction(txn) + + def generate_random_memory(self): + self.mem_state.gen_rand_mem() + + def generate_random_transactions(self): + for txn_seq in self.transactions: + txn_seq.generate_rand_sequence(self.n_transactions) + + def save_transactions(self): + for i, txn_seq in enumerate(self.transactions): + txn_seq.generate_file( + os.path.join(self.target_dir, f"txns_{i}.txt")) + + def save_memory(self): + self.mem_state.save_mem( + file=os.path.join(self.target_dir, "main_mem.mem")) + + def save_state(self): + self.save_caches() + self.save_transactions() + self.save_memory() + + def rand_choice(self, odds=0.5): + """Returns true for given odds""" + if random() < odds: + return True + return False + + def rand_index(self, n): + """Return random index from 0 to n""" + return randint(0, n) + + def rand_cache_index(self): + return self.rand_index(self.rand_index(self.n_caches)) + + def rand_sharers(self, owner): + sharers = [] + for idx in range(self.n_caches): + if idx == owner: + sharers.append(True) + else: + sharers.append(self.rand_choice()) + + def get_rand_cacheline_data(self): + data = [] + for _ in range(self.cacheline_bytes): + data.append(randint(0, 255)) + return data + + def get_rand_mem_range(self) -> MemoryRange: + return choice(self.mem_ranges) + + def generate_random_caches(self, n_inited_lines): + for _ in range(n_inited_lines): + # Get a random memory range + rand_mem_range = self.get_rand_mem_range() + # Get a random address from that memory range + # Aligned to cache line boundary + addr = rand_mem_range.get_rand_addr(self.cacheline_bytes) + # Get data from initialized memory + data = rand_mem_range.get_data(addr, self.cacheline_bytes) + + # Check if all caches have space for the new entry + # Skip if not + not_free_found = False + for cache in self.caches: + _, free = cache.get_free_way(cache.get_index(addr)) + if not free: + not_free_found = True + if not_free_found: + continue + + # Check if the address is already stored + # Skip if yes + hit_found = False + for cache in self.caches: + hit, _, _, _, _ = cache.get_addr(addr) + if hit: + hit_found = True + if hit_found: + continue + + # Select random number of masters to have that cache line + n_masters = randint(1, self.n_caches) + # Randomly select the master indices to have that cache line + mst_idxs = sample(range(self.n_caches), n_masters) + # Select whether someone will hold the line in dirty state + dirty = self.rand_choice(odds=0.5) + shared = len(mst_idxs) > 1 + owner = -1 + write_data = data + if dirty: + # Randomly select the owner + owner = sample(mst_idxs, 1)[0] + # All cachelines have the same data + write_data = self.get_rand_cacheline_data() + + for mst_idx in mst_idxs: + if mst_idx == owner: + # Generate random data since data is dirty + if shared: + state = CachelineState(CachelineStateEnum.OWNED) + else: + state = CachelineState(CachelineStateEnum.MODIFIED) + else: + if shared: + state = CachelineState(CachelineStateEnum.SHARED) + else: + state = CachelineState(CachelineStateEnum.EXCLUSIVE) + try: + self.set_cache_line( + mst_idx, + addr, + write_data, + state.get_state_bits() + ) + except CacheSetFullException: + pass + + def get_next_timestamp(self, files, cur_time): + """ + Returns (finish, next_tstamp, addrs_w_same_tstamp). + If finish == True, it means there are no more timestamps\n + `addrs_w_same_tstamp` is a list of (idx, addr), which indicates + the cache index that retires a transaction on this timestamp, and + the address it retires. + """ + # Store: + # - Timestamps that were found + # - Corresponding address + # - Corresponding master index + # There might be situations where some masters have run out of + # transactions while other ones still have outstanding ones, + # so this type of tracking is needed + timestamps = [] + addrs = [] + idxs = [] + addrs_w_tstamp = [] + for i, file in enumerate(files): + with open(file, "r") as cache_file: + for line in cache_file: + words = line.split() + time = None + initiator = None + addr = None + # Iterate over words (separated by whitespace) + for word in words: + # Check which keyword the word is + # The keywords must appear in the line in this order + t_idx = word.find("TIME:") + i_idx = word.find("INITIATOR:") + a_idx = word.find("ADDR:") + payload = word.split(":")[1] + if t_idx != -1: + time = int(payload) + if i_idx != -1: + initiator = bool(int(payload)) + if not initiator: + # Don't store the time of this timestamp marks an + # outstanding transaction + time = None + if a_idx != -1: + addr = int(payload, 16) + # Add to the list only if a transaction was retired on this + # timestamp + if time: + if time > cur_time: + timestamps.append(time) + addrs.append(addr) + idxs.append(i) + break + finish = False + next_tstamp = 0 + if all(x == float("inf") for x in timestamps): + finish = True + else: + next_tstamp = min(timestamps) + idx_w_same_tstamp = [i for i, x in enumerate(timestamps) if x == next_tstamp] + for i in idx_w_same_tstamp: + addrs_w_tstamp.append((idxs[i], addrs[i])) + return finish, next_tstamp, addrs_w_tstamp + + def reconstruct_state(self): + """Reconstruct state into Python datatypes""" + files = [] + start_time = 0 + errors = False + for i in range(self.n_caches): + files.append(os.path.join(self.target_dir, f"cache_diff_{i}.txt")) + while True: + finish, end_time, addrs = self.get_next_timestamp(files, start_time) + if finish: + break + for i, cache in enumerate(self.caches): + cache.reconstruct_state(files[i], start_time, end_time) + self.mem_state.reconstruct_mem(os.path.join(self.target_dir, "main_mem_diff.txt"), start_time, end_time) + logger.info(f"==================== TIMESTAMP: {end_time} ====================") + new_errors = self.check_coherency() + errors = errors or new_errors + for addr in addrs: + # Clear outstanding addresses for the ones that were handled this timestamp + for i in range(self.n_caches): + if i == addr[0]: + continue + if self.caches[i].clear_outstanding_addr(addr[1]): + logger.info("Removing address from outstanding") + self.print_info(addr=addr[1], cache_idx=i) + start_time = end_time + return errors + + def print_info(self, level=logging.INFO, addr=None, cache_idx=None, state=None, + set=None, way=None): + if addr is not None: + logger.log(level, msg=f"Address: {hex(addr)}") + if cache_idx is not None: + logger.log(level, msg=f"Cache: {cache_idx}") + if state is not None: + logger.log(level, msg=f"State: {state}") + if set is not None: + logger.log(level, msg=f"Set: {set}") + if way is not None: + logger.log(level, msg=f"Way: {way}") + + def check_coherency(self): + """Check that caches and main memory are coherent. + Test cases: + - Modified cache line must not be in Exclusive state + - Modified cache line must have it somewhere in either Owned or Modified state + - Cache line states must be compatible (e.g. Modified && Shared is not allowed) + """ + + logger.info("Starting coherency check") + error = False + debug = self.debug + + for mem_range in self.mem_ranges: + for addr in range( + mem_range.start_addr, + mem_range.end_addr, + self.cacheline_bytes): + cached, shared = mem_range.get_addr_properties(addr) + skip_addr = False + if not (shared and cached): + # Currently only checking shared and cached regions + continue + + # Check if there are addresses which have outstanding transactions + # This occurs when a snoop transaction has modified a cache line, but + # the transaction itself didnt finish yet + for cache in self.caches: + if addr in cache.outstanding: + skip_addr = True + logger.info("Skipping address due to an outstanding transaction") + self.print_info(logging.INFO, addr=addr) + break + if skip_addr: + continue + + cacheline = mem_range.get_data(addr, self.cacheline_bytes) + states: List[CachelineState] = [] + modified = False + owner_found = False + + # Check all caches whether they hold a copy + # Compute moesi state + # Check that modified copy is not in Exclusive state + # Monitor whether a modified copy exists + # Monitor whether an owner is found + for i, cache in enumerate(self.caches): + hit, data, state, set, way = cache.get_addr(addr) + moesi: CachelineState = state + if hit: + logger.info("Cacheline found") + self.print_info(logging.INFO, addr=addr, cache_idx=i, state=moesi.state.name, set=set, way=way) + if data != cacheline: + if moesi.state != CachelineStateEnum.INVALID: + modified = True + if moesi.state == CachelineStateEnum.EXCLUSIVE: + logger.error("A modified cache line in Exclusive state") + self.print_info(logging.ERROR, addr=addr, cache_idx=i, state=moesi.state.name, set=set, way=way) + error = True + if debug: import pdb; pdb.set_trace() + if moesi.state in \ + [CachelineStateEnum.OWNED, CachelineStateEnum.MODIFIED]: + owner_found = True + states.append(moesi) + + if modified and not owner_found: + error = True + logger.error("A modified cache line without owner was found!") + self.print_info(logging.ERROR, addr=addr, set=set) + if debug: import pdb; pdb.set_trace() + + # Compare cacheline states + for i in range(len(states)): + for j in range(len(states)): + if i == j: + continue + res = states[i].check_compatibility(states[j].state) + if not res: + a_hit, _, a_state, a_set, a_way = self.caches[i].get_addr(addr) + b_hit, _, b_state, b_set, b_way = self.caches[j].get_addr(addr) + logger.error("Two cache lines in incompatible states!") + self.print_info( + logging.ERROR, + addr=addr, + cache_idx=(i, j), + state=(states[i].state.name, states[j].state.name), + set=(a_set, b_set), + way=(a_way, b_way) + ) + error = True + if debug: import pdb; pdb.set_trace() + logger.info("Coherency check finished") + return error + + def save_caches(self): + for i, cache in enumerate(self.caches): + cache.save_state( + data_file=os.path.join(self.target_dir, f"data_mem_{i}.mem"), + tag_file=os.path.join(self.target_dir, f"tag_mem_{i}.mem"), + state_file=os.path.join(self.target_dir, f"state_{i}.mem") + ) + + def run(self): + errors = False + if self.check: + input("Press enter after simulation finishes to start coherency check") + errors = self.reconstruct_state() + return errors + + + +class RandomTest(CacheCoherencyTest): + def __init__( + self, + **kwargs + ): + super().__init__(**kwargs) + self.define_test() + errors = self.run() + if errors: + print("Errors found") + + def define_test(self): + self.add_memory_range(MemoryRange( + cached=True, shared=True, start_addr=0, end_addr=0x0000_1000 + )) + self.generate_random_memory() + self.generate_random_transactions() + self.generate_random_caches(n_inited_lines=100) + self.check_coherency() + self.save_state() + +class ConflictTest(CacheCoherencyTest): + def __init__( + self, + **kwargs + ): + super().__init__(**kwargs) + self.define_test() + + def define_test(self): + self.add_memory_range(MemoryRange( + cached=True, shared=True, start_addr=0, end_addr=0x0010_0000 + )) + self.generate_random_memory() + self.create_transaction(n_cache=0, txn=CacheTransaction( + addr=0, + op=CacheReqOp.REQ_LOAD, + size=int(log2(self.dw)), + shareability=1, + cached=True, + time=10 + )) + self.create_transaction(n_cache=1, txn=CacheTransaction( + addr=0, + op=CacheReqOp.REQ_LOAD, + size=int(log2(self.dw)), + shareability=1, + cached=True, + time=10 + )) + self.save_state() + + +if __name__ == "__main__": + import argparse + from random import seed + import numpy as np + parser = argparse.ArgumentParser( + description=('Script to write data to a file' + 'based on address space.') + ) + parser.add_argument( + '--addr_width', + type=int, + help='AXI address width' + ) + parser.add_argument( + '--data_width', + type=int, + help='AXI data width' + ) + parser.add_argument( + '--word_width', + type=int, + help='Width of a word in the cache' + ) + parser.add_argument( + '--cacheline_words', + type=int, + help='Number of words in a cacheline' + ) + parser.add_argument( + '--ways', + type=int, + help='Number of ways in the cache' + ) + parser.add_argument( + '--sets', + type=int, + help='Number of sets in the cache' + ) + parser.add_argument( + '--n_caches', + type=int, + help='Number of cached masters in the test' + ) + parser.add_argument( + '--n_transactions', + type=int, + help='Number of transactions generated per cached master' + ) + parser.add_argument( + '--target_dir', + type=str, + help='Target directory for generated files' + ) + parser.add_argument( + '--seed', + type=int, + help="Seed for the simulation", + default=None, + nargs='?' + ) + parser.add_argument( + '--check', + action='store_true', + help="Check for coherency once prompted" + ) + parser.add_argument( + '--debug', + action='store_true', + help="Debug mode. During coherency checking, will open pdb when error is encountered." + ) + parsed_args = vars(parser.parse_args()) + if parsed_args.get("seed", None): + seed(parsed_args["seed"]) + np.random.seed(parsed_args["seed"]) + cct = RandomTest(**parsed_args) diff --git a/test/vip/python/cache_state.py b/test/vip/python/cache_state.py new file mode 100644 index 0000000..521c07b --- /dev/null +++ b/test/vip/python/cache_state.py @@ -0,0 +1,319 @@ +from typing import List, Tuple +from math import log2 +from enum import Enum + +class StateBits(Enum): + VALID_IDX = 0 + SHARED_IDX = 1 + DIRTY_IDX = 2 + +class CachelineStateEnum(Enum): + MODIFIED = 0 + OWNED = 1 + EXCLUSIVE = 2 + SHARED = 3 + INVALID = 4 + +class CachelineState: + def __init__(self, state: CachelineStateEnum = CachelineStateEnum.INVALID): + self.state = state + + def from_state_bits(self, state_bits: List[StateBits]): + if state_bits[StateBits.VALID_IDX.value] == 0: + self.state = CachelineStateEnum.INVALID + elif (state_bits[StateBits.SHARED_IDX.value] and + state_bits[StateBits.DIRTY_IDX.value]): + self.state = CachelineStateEnum.OWNED + elif state_bits[StateBits.SHARED_IDX.value]: + self.state = CachelineStateEnum.SHARED + elif state_bits[StateBits.DIRTY_IDX.value]: + self.state = CachelineStateEnum.MODIFIED + elif state_bits[StateBits.VALID_IDX.value]: + self.state = CachelineStateEnum.EXCLUSIVE + else: + raise Exception("Unexpected state") + + def get_state_bits(self): + state_bits = [False, False, False] + if self.state == CachelineStateEnum.MODIFIED: + state_bits[StateBits.VALID_IDX.value] = True + state_bits[StateBits.DIRTY_IDX.value] = True + elif self.state == CachelineStateEnum.OWNED: + state_bits[StateBits.VALID_IDX.value] = True + state_bits[StateBits.SHARED_IDX.value] = True + state_bits[StateBits.DIRTY_IDX.value] = True + elif self.state == CachelineStateEnum.EXCLUSIVE: + state_bits[StateBits.VALID_IDX.value] = True + elif self.state == CachelineStateEnum.SHARED: + state_bits[StateBits.VALID_IDX.value] = True + state_bits[StateBits.SHARED_IDX.value] = True + return state_bits + + def check_compatibility(self, other: CachelineStateEnum): + if self.state == CachelineStateEnum.MODIFIED: + if other == CachelineStateEnum.INVALID: + return True + return False + elif self.state == CachelineStateEnum.OWNED: + if other in [CachelineStateEnum.INVALID, + CachelineStateEnum.SHARED]: + return True + return False + elif self.state == CachelineStateEnum.EXCLUSIVE: + if other == CachelineStateEnum.INVALID: + return True + return False + elif self.state == CachelineStateEnum.SHARED: + if other in [CachelineStateEnum.EXCLUSIVE, + CachelineStateEnum.MODIFIED]: + return False + return True + elif self.state == CachelineStateEnum.INVALID: + return True + else: + raise Exception("Unexpected state") + +class CacheSetFullException(Exception): + pass + +class CacheState: + def __init__( + self, + addr_width, + data_width, + word_width, + cacheline_words, + ways, + sets + ): + self.aw = addr_width + self.dw = data_width + self.word_width = word_width + self.cacheline_words = cacheline_words + self.ways = ways + self.sets = sets + + self.bytes_per_word = self.dw // 8 + self.cacheline_bytes = \ + self.cacheline_words * self.word_width // 8 + self.block_offset_bits = int(log2(self.cacheline_bytes)) + self.index_bits = int(log2(self.sets)) + self.tag_bits = \ + self.aw - self.block_offset_bits - self.index_bits + + self.index_mask = ((1 << self.index_bits) - 1) << self.block_offset_bits + self.tag_mask = ((1 << self.tag_bits) - 1) << (self.block_offset_bits + self.index_bits) + + self.cache_status = None + self.cache_data = None + self.cache_tag = None + + # Store which cache lines are "outstanding" + # i.e. a snoop has modified their status, but the + # respective transaction has not finished + self.outstanding = [] + + def init_cache(self): + # multi-dimensional lists must be initialized in steps + # to ensure that unique copies are created, instead of + # references to one + self.cache_status = self.sets * [None] + self.cache_tag = self.sets * [None] + self.cache_data = self.sets * [None] + for set in range(self.sets): + self.cache_status[set] = self.ways * [None] + self.cache_tag[set] = self.ways * [None] + self.cache_data[set] = self.ways * [None] + for way in range(self.ways): + self.cache_status[set][way] = 3 * [False] + self.cache_tag[set][way] = 0 + self.cache_data[set][way] = self.cacheline_bytes * [0] + + def get_index(self, addr): + return (addr & self.index_mask) >> self.block_offset_bits + + def get_tag(self, addr): + return (addr & self.tag_mask) >> (self.block_offset_bits + self.index_bits) + + def get_addr(self, addr): + """Returns: (hit, data, state, set, way)""" + set = self.get_index(addr) + hit = False + final_way = 0 + data = [] + state = CachelineState() + tag_bits = self.get_tag(addr) + for way in range(self.ways): + if ((self.cache_tag[set][way] == tag_bits) and + (self.cache_status[set][way][StateBits.VALID_IDX.value])): + hit = self.cache_status[set][way][StateBits.VALID_IDX.value] + data = self.cache_data[set][way] + state.from_state_bits(self.cache_status[set][way]) + final_way = way + return hit, data, state, set, final_way + + def get_free_way(self, set): + """Get first free (non-valid) way in a set.""" + was_free = False + way_idx = 0 + for i, way in enumerate(self.cache_status[set]): + if not way[StateBits.VALID_IDX.value]: + way_idx = i + was_free = True + break + return way_idx, was_free + + def set_entry( + self, + addr: int, + data: List[int], + status: List[bool] + ): + """Write cacheline corresponding to addr with data and status. + Assumes we write the whole cache line byte-by-byte + """ + set_idx = self.get_index(addr) + way_idx, was_free = self.get_free_way(set_idx) + if not was_free: + raise CacheSetFullException + for byte_idx in range(self.cacheline_bytes): + self.cache_data[set_idx][way_idx][byte_idx] = \ + data[byte_idx] + self.cache_tag[set_idx][way_idx] = self.get_tag(addr) + self.cache_status[set_idx][way_idx][0] = status[0] + self.cache_status[set_idx][way_idx][1] = status[1] + self.cache_status[set_idx][way_idx][2] = status[2] + + def save_data( + self, + file + ): + with open(file, "w") as data_file: + for set in range(self.sets): + fmt = [f"@{set:x}"] + any_valid = False + for way in range(self.ways): + if (self.cache_status[set][way][StateBits.VALID_IDX.value]): + any_valid = True + for byte in self.cache_data[set][way]: + fmt += [f"{byte:2x}"] + if any_valid: + data_file.write(" ".join(fmt) + "\n") + + def save_tag( + self, + file + ): + with open(file, "w") as tag_file: + for set in range(self.sets): + fmt = [f"@{set:x}"] + any_valid = False + for way in range(self.ways): + if (self.cache_status[set][way][StateBits.VALID_IDX.value]): + any_valid = True + fmt += [f"{self.cache_tag[set][way]:2x}"] + if any_valid: + tag_file.write(" ".join(fmt) + "\n") + + def status_arr_to_int(self, bool_arr): + bin_str = ''.join(['1' if x else '0' for x in list(reversed(bool_arr))]) + return int(bin_str, 2) + + def save_status( + self, + file + ): + with open(file, "w") as state_file: + for set in range(self.sets): + fmt = [f"@{set:x}"] + any_valid = False + for way in range(self.ways): + if (self.cache_status[set][way][StateBits.VALID_IDX.value]): + any_valid = True + fmt += [f"{self.status_arr_to_int(self.cache_status[set][way]):03b}"] + if any_valid: + state_file.write(" ".join(fmt) + "\n") + + def save_state( + self, + data_file="data_mem.mem", + tag_file="tag_mem.mem", + state_file="state.mem" + ): + self.save_data(data_file) + self.save_tag(tag_file) + self.save_status(state_file) + + def clear_outstanding_addr(self, addr): + """Remove addr from self.outstanding. + Returns True if the address was stored. + Returns False if it wasn't.""" + try: + self.outstanding.remove(addr) + return True + except ValueError: + return False + + def reconstruct_state( + self, + file, + start_time, + end_time + ): + with open(file, "r") as state_file: + for line in state_file: + words = line.split() + addr = None + time = None + initiator = None + set = None + way = None + tag = None + status = None + data = None + modify = True + for word in words: + time_idx = word.find("TIME:") + initiator_idx = word.find("INITIATOR:") + addr_idx = word.find("ADDR:") + set_idx = word.find("SET:") + way_idx = word.find("WAY") + tag_idx = word.find("TAG:") + status_idx = word.find("STATUS:") + data_idx = word.find("DATA:") + payload = word.split(":")[1] + if time_idx != -1: + time = int(payload) + if addr_idx != -1: + addr = int(payload, 16) + if set_idx != -1: + set = int(payload) + if initiator_idx != -1: + initiator = bool(int(payload)) + if way_idx != -1: + way = int(payload) + if tag_idx != -1: + tag = int(payload, 16) + if status_idx != -1: + status = [char == '1' for char in payload] + status.reverse() + if data_idx != -1: + data = [int(x, 16) for x in payload.strip("[]").split(",")] + if None in [time,initiator,set,way,tag,status,data]: + # A row with only time and address present indicates + # a finished transaction which wasn't cached but might've + # modified other cache lines by snooping + if None in [time, addr]: + print("Unexpected state") + import pdb; pdb.set_trace() + modify = False + if time > end_time: + return time + if time <= start_time: + continue + if modify: + self.cache_data[set][way] = data + self.cache_tag[set][way] = tag + self.cache_status[set][way] = status + if not initiator: + self.outstanding.append(addr) diff --git a/test/vip/python/common.py b/test/vip/python/common.py new file mode 100644 index 0000000..df71354 --- /dev/null +++ b/test/vip/python/common.py @@ -0,0 +1,116 @@ +import numpy as np +from random import randrange +from typing import List + +class MemoryRange: + def __init__( + self, + start_addr: int, + end_addr: int, + cached: bool = False, + shared: bool = False, + ): + """ + Parameters + ========== + start_addr Start address.\n + end_addr End address.\n + cached Set whole range as cached.\n + shared Set whole range as shared.\n + """ + + # Start address of the range (inclusive) + self.start_addr = start_addr + # End address of the range (non-inclusive) + self.end_addr = end_addr + # Data + self.mem_data = [] + # Subrange that is cached + self.cached_region: MemoryRange = None + # Subrange that is shared + self.shared_region: MemoryRange = None + + if cached: + self.set_cached_region(start_addr, end_addr) + if shared: + self.set_shared_region(start_addr, end_addr) + + def init_random_mem(self): + self.mem_data = np.random.randint( + 0, 256, size=(self.end_addr-self.start_addr), + dtype=np.uint8) + + def init_zero_mem(self): + self.mem_data = np.zeros( + size=(self.end_addr-self.start_addr), + dtype=np.uint8) + + def set_cached_region(self, start_addr, end_addr): + self.cached_region = MemoryRange( + start_addr=start_addr, + end_addr=end_addr + ) + + def set_shared_region(self, start_addr, end_addr): + self.shared_region = MemoryRange( + start_addr=start_addr, + end_addr=end_addr + ) + + def get_addr_properties(self, addr): + """Get whether address is cached and/or shared + Returns (cached, shared) + """ + cached = False + shared = False + if self.cached_region: + if self.cached_region.start_addr <= addr <= self.cached_region.end_addr: + cached = True + if self.shared_region: + if self.shared_region.start_addr <= addr \ + <= self.shared_region.end_addr: + shared = True + return cached, shared + + def get_rand_addr(self, step): + return randrange(self.start_addr, self.end_addr, step) + + def get_rand_cached_addr(self, step): + return randrange( + self.cached_region.start_addr, + self.cached_region.end_addr, + step) + + def get_rand_shared_addr(self, step): + return randrange( + self.shared_region.start_addr, + self.shared_region.end_addr, + step) + + def get_rand_cached_shared_addr(self, step): + if (not self.cached_region) or (not self.shared_region): + raise Exception("Either cached or shared region is missing") + if (self.cached_region.start_addr <= + self.shared_region.start_addr): + start_addr = self.shared_region.start_addr + else: + start_addr = self.cached_region.start_addr + if (self.cached_region.end_addr >= + self.shared_region.end_addr): + end_addr = self.shared_region.end_addr + else: + end_addr = self.cached_region.end_addr + if end_addr < start_addr: + raise Exception("No overlapping shared and cached regions") + return randrange(start_addr, end_addr, step) + + def get_data(self, addr, len): + """Return an array of length len, consisting of bytes""" + data = [] + start_idx = addr - self.start_addr + end_idx = start_idx + len + for i in range(start_idx, end_idx): + data.append(self.mem_data[i]) + return data + + diff --git a/test/vip/python/memory_state.py b/test/vip/python/memory_state.py new file mode 100644 index 0000000..ce4495f --- /dev/null +++ b/test/vip/python/memory_state.py @@ -0,0 +1,77 @@ +from common import MemoryRange +from typing import List +import pdb + +class MemoryState: + def __init__( + self, + mem_ranges: List[MemoryRange] = [] + ): + self.mem_ranges: List[MemoryRange] = mem_ranges + + def gen_rand_mem(self): + for mem_range in self.mem_ranges: + mem_range.init_random_mem() + + def store(self, addr, data): + range_found = False + for mem_range in self.mem_ranges: + if mem_range.start_addr <= addr <= mem_range.end_addr: + range_found = True + mem_range.mem_data[addr - mem_range.start_addr] = data + if not range_found: + raise Exception("Provided an address outside the memory range(s)") + + def reconstruct_mem( + self, + file, + start_time, + end_time + ) -> int: + """ + Updates memory given the transactions in a file. + Returns the time stamp that was the first one that was not updated. + """ + with open(file, "r") as mem_file: + for line in mem_file: + words = line.split() + time = -1 + addr = None + data = None + for word in words: + t_idx = word.find("TIME:") + a_idx = word.find("ADDR:") + d_idx = word.find("DATA:") + payload = word.split(":")[1] + if t_idx != -1: + time = int(payload) + if a_idx != -1: + addr = int(payload, 16) + if d_idx != -1: + data = int(payload, 16) + if time > end_time: + return time + if (time < start_time) and time != -1: + continue + if (addr is not None) and (data is not None): + self.store(addr, data) + elif (addr is not None) or (data is not None): + raise Exception( + "Either data or addr provided without the other" + ) + + def save_mem( + self, + file="main_mem.mem", + ): + with open(file, "w") as mem_file: + mem_file.write("@0\n") + for mem_range in self.mem_ranges: + for addr in range(mem_range.start_addr, mem_range.end_addr, 4): + fmt = "{:2x} {:2x} {:2x} {:2x}\n".format( + mem_range.mem_data[addr - mem_range.start_addr], + mem_range.mem_data[addr - mem_range.start_addr + 1], + mem_range.mem_data[addr - mem_range.start_addr + 2], + mem_range.mem_data[addr - mem_range.start_addr + 3] + ) + mem_file.write(fmt) diff --git a/test/vip/python/transactions.py b/test/vip/python/transactions.py new file mode 100644 index 0000000..d1fc830 --- /dev/null +++ b/test/vip/python/transactions.py @@ -0,0 +1,168 @@ +from random import choice, randrange, choices +from enum import Enum +from math import log2 +from common import MemoryRange +from typing import List + +class ReadSnoopType(Enum): + READNOSNOOP = 0 + READONCE = 0 + READSHARED = 1 + READCLEAN = 2 + READNOTSHAREDDIRTY = 3 + READUNIQUE = 7 + CLEANUNIQUE = 11 + MAKEUNIQUE = 12 + CLEANSHARED = 8 + CLEANINVALID = 9 + MAKEINVALID = 13 + BARRIER = 0 + DMVCOMPLETE = 14 + DVMMESSAGE = 15 + +class WriteSnoopType(Enum): + WRITENOSNOOP = 0 + WRITEUNIQUE = 0 + WRITELINEUNIQUE = 1 + WRITECLEAN = 2 + WRITEBACK = 3 + EVICT = 4 + WRITEEVICT = 5 + BARRIER = 0 + +class BurstType(Enum): + FIXED = 0 + INCR = 1 + WRAP = 2 + +class CacheReqOp(Enum): + REQ_LOAD = 0 + REQ_STORE = 1 + #CMO_FLUSH_NLINE = 2 + +class WritePolicyHint(Enum): + WR_POLICY_WB = 2 + WR_POLICY_WT = 4 + +class CacheTransaction: + def __init__( + self, + addr: int, + op: CacheReqOp, + data: int = 0, + size: int = 0, + shareability: int = 0, + cached: bool = False, + time: int = 0, + ): + """ + Parameters + ========== + addr + Request address. + op + Operation. Type CacheReqOp. + data + Write data. + size + Size of operation as in AXI AxSIZE. + shareability + Shareable domain. Currently non-shared (0), inner shared (1), + and system (3) supported. + cached + Whether request is cached. + time + The time stamp to send the request. In clock steps after reset. + If 0 (default), it will be sent as soon as possible. + """ + self.addr = addr + self.data = data + self.op = op + self.size = size + self.shareability = shareability + self.cached = cached + self.time = time + +class CacheTransactionSequence: + def __init__( + self, + addr_width, + data_width, + mem_ranges: List[MemoryRange] + ): + self.aw = addr_width + self.dw = data_width + self.mem_ranges = mem_ranges + self.sequence : list[CacheTransaction] = [] + self.separator = " " + + def add_transaction(self, txn: CacheTransaction): + self.sequence.append(txn) + + def generate_rand_sequence(self, n_transactions): + for _ in range(n_transactions): + txn = self.gen_rand_transaction() + self.sequence.append(txn) + + def get_rand_mem_range(self): + return choice(self.mem_ranges) + + def get_rand_data(self): + return randrange(0, (1 << self.dw) - 1) + + def gen_rand_transaction(self): + mem_range = self.get_rand_mem_range() + addr = mem_range.get_rand_cached_shared_addr(self.dw // 8) + shareability = 1 + op = choice(list(CacheReqOp)) + if op == CacheReqOp.REQ_LOAD: + cached = True + else: + # 20% chance to generate uncached request + cached = choices([True, False], weights=[80, 20], k=1)[0] + data = self.get_rand_data() + size = int(log2(self.dw)) + return CacheTransaction( + addr=addr, + op=op, + data=data, + size=size, + shareability=shareability, + cached=cached, + time=0 + ) + + def generate_file(self, filename): + first = True + with open(filename, "w") as file: + for txn in self.sequence: + if not first: + file.write("\n") + else: + first = False + file.write( + f"OPER:{txn.op.name} ADDR:{txn.addr:0{self.aw // 4}x} " + f"DATA:{txn.data:0{self.dw // 4}x} SIZE:{txn.size} " + f"CACH:{int(txn.cached)} SHAR:{txn.shareability} TIME:{txn.time}" + ) + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser( + description=('Script to generate random transactions') + ) + parser.add_argument( + 'file', + type=str, + help='The filename where data will be written' + ) + parser.add_argument( + 'n', + type=int, + help='Number of transactiosn' + ) + args = parser.parse_args() + cts = CacheTransactionSequence() + cts.generate_rand_sequence(args.n) + cts.generate_file(args.file) diff --git a/test/vip/snoop/snoop_agent.svh b/test/vip/snoop/snoop_agent.svh new file mode 100644 index 0000000..05454b3 --- /dev/null +++ b/test/vip/snoop/snoop_agent.svh @@ -0,0 +1,98 @@ +`ifndef _SNOOP_TEST_PKG +*** INCLUDED IN snoop_test_pkg *** +`endif +class snoop_agent #( + /// Snoop address width + parameter AW = 32, + /// Snoop data width + parameter DW = 32, + /// Bytes in a cacheline + parameter CACHELINE_BYTES = 0, + /// Stimuli application time + parameter time TA = 0ns, + /// Stimuli test time + parameter time TT = 0ns, + /// Snoop bus interface type + parameter type snoop_bus_t = logic, + /// Clock interface type + parameter type clk_if_t = logic +); + typedef ace_ac_beat #( + .AW(AW) + ) ac_beat_t; + + typedef ace_cr_beat cr_beat_t; + + typedef ace_cd_beat #( + .DW(DW) + ) cd_beat_t; + + snoop_bus_t snoop; + clk_if_t clk_if; + + mailbox #(ac_beat_t) ac_mbx = new; + mailbox #(cd_beat_t) cd_mbx = new; + mailbox #(cr_beat_t) cr_mbx = new; + + snoop_driver #( + .TA(TA), .TT(TT), + .snoop_bus_t(snoop_bus_t), + .ac_beat_t(ac_beat_t), + .cd_beat_t(cd_beat_t), + .cr_beat_t(cr_beat_t) + ) snoop_drv; + + snoop_monitor #( + .TA(TA), .TT(TT), + .snoop_bus_t(snoop_bus_t), + .ac_beat_t(ac_beat_t), + .cd_beat_t(cd_beat_t), + .cr_beat_t(cr_beat_t) + ) snoop_mon; + + snoop_sequencer #( + .TA(TA), .TT(TT), .CD_DW(DW), + .CACHELINE_BYTES(CACHELINE_BYTES), + .ac_beat_t(ac_beat_t), + .cd_beat_t(cd_beat_t), + .cr_beat_t(cr_beat_t) + ) snoop_seq; + + function new( + snoop_bus_t snoop, + clk_if_t clk_if, + mailbox #(cache_snoop_req) snoop_req_mbx, + mailbox #(cache_snoop_resp) snoop_resp_mbx + ); + this.snoop = snoop; + this.clk_if = clk_if; + + this.snoop_drv = new( + this.snoop, this.cr_mbx, + this.cd_mbx + ); + this.snoop_mon = new( + this.snoop, this.ac_mbx + ); + this.snoop_seq = new( + this.ac_mbx, this.cr_mbx, + this.cd_mbx, + snoop_req_mbx, + snoop_resp_mbx + ); + + endfunction + + task reset; + this.snoop_drv.reset(); + endtask + + task run; + fork + this.snoop_drv.run(); + this.snoop_mon.run(); + this.snoop_seq.run(); + join + endtask + +endclass diff --git a/test/vip/snoop/snoop_beat_types.svh b/test/vip/snoop/snoop_beat_types.svh new file mode 100644 index 0000000..b3c7a82 --- /dev/null +++ b/test/vip/snoop/snoop_beat_types.svh @@ -0,0 +1,36 @@ +`ifndef _SNOOP_TEST_PKG +*** INCLUDED IN snoop_test_pkg *** +`endif +/// The data transferred on a beat on the AC channel. +class ace_ac_beat #( + parameter AW = 32 +); + rand logic [AW-1:0] ac_addr = '0; + logic [3:0] ac_snoop = '0; + logic [2:0] ac_prot = '0; +endclass + +/// The data transferred on a beat on the CR channel. +class ace_cr_beat; + ace_pkg::crresp_t cr_resp = '0; +endclass + +/// The data transferred on a beat on the CD channel. +class ace_cd_beat #( + parameter DW = 32 +); + rand logic [DW-1:0] cd_data = '0; + logic cd_last = '0; +endclass + +// Snoop request to a cache +class cache_snoop_req; + int unsigned addr = 0; + ace_pkg::acsnoop_t snoop_op = '0; +endclass + +// Snoop response from a cache +class cache_snoop_resp; + logic [7:0] data_q[$]; + ace_pkg::crresp_t snoop_resp = '0; +endclass \ No newline at end of file diff --git a/test/vip/snoop/snoop_driver.svh b/test/vip/snoop/snoop_driver.svh new file mode 100644 index 0000000..104256e --- /dev/null +++ b/test/vip/snoop/snoop_driver.svh @@ -0,0 +1,116 @@ +`ifndef _SNOOP_TEST_PKG +*** INCLUDED IN snoop_test_pkg *** +`endif +class snoop_driver #( + parameter time TA = 0ns, // stimuli application time + parameter time TT = 0ns, // stimuli test time + parameter type snoop_bus_t = logic, + parameter type ac_beat_t = logic, + parameter type cd_beat_t = logic, + parameter type cr_beat_t = logic +); + + snoop_bus_t snoop; + + cd_beat_t cd_txn; + cr_beat_t cr_txn; + + // Mailboxes for CD and CR transcations + // Should be created and connected outside + mailbox #(cd_beat_t) cd_mbx; + mailbox #(cr_beat_t) cr_mbx; + + function new ( + snoop_bus_t snoop, + mailbox #(cr_beat_t) cr_mbx, + mailbox #(cd_beat_t) cd_mbx + ); + this.snoop = snoop; + + this.cr_mbx = cr_mbx; + this.cd_mbx = cd_mbx; + endfunction + + task cycle_start; + #TT; + endtask + + task cycle_end; + @(posedge snoop.clk_i); + endtask + + task reset; + snoop.ac_ready <= '0; + snoop.cr_valid <= '0; + snoop.cr_resp <= '0; + snoop.cd_valid <= '0; + snoop.cd_data <= '0; + snoop.cd_last <= '0; + endtask + + task rec_cd_txns; + // Ensure that mailbox is read only + // at cycle_end + forever begin + cd_beat_t beat; + if (cd_mbx.try_get(beat)) begin + send_cd(beat); + end else begin + cycle_end(); + end + end + endtask + + task rec_cr_txns; + // Ensure that mailbox is read only + // at cycle_end + forever begin + cr_beat_t beat; + if (cr_mbx.try_get(beat)) begin + send_cr(beat); + end else begin + cycle_end(); + end + end + endtask + + /// Issue a beat on the CR channel. + task send_cr(cr_beat_t beat); + snoop.cr_valid <= #TA 1; + snoop.cr_resp <= #TA beat.cr_resp; + cycle_start(); + while (snoop.cr_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + snoop.cr_valid <= #TA '0; + snoop.cr_resp <= #TA '0; + endtask + + /// Issue a beat on the CD channel. + task send_cd(cd_beat_t beat); + snoop.cd_valid <= #TA 1; + snoop.cd_data <= #TA beat.cd_data; + snoop.cd_last <= #TA beat.cd_last; + cycle_start(); + while (snoop.cd_ready != 1) begin cycle_end(); cycle_start(); end + cycle_end(); + snoop.cd_valid <= #TA '0; + snoop.cd_data <= #TA '0; + snoop.cd_last <= #TA '0; + endtask + + /// Randomly toggle ACREADY. + /// Address is read in snoop_monitor. + task recv_ac (); + snoop.ac_ready <= #TA $urandom_range(0,1); + cycle_start(); + cycle_end(); + endtask + + task run(); + fork + rec_cd_txns(); + rec_cr_txns(); + forever recv_ac(); + join + endtask +endclass diff --git a/test/vip/snoop/snoop_monitor.svh b/test/vip/snoop/snoop_monitor.svh new file mode 100644 index 0000000..1fec76c --- /dev/null +++ b/test/vip/snoop/snoop_monitor.svh @@ -0,0 +1,50 @@ +`ifndef _SNOOP_TEST_PKG +*** INCLUDED IN snoop_test_pkg *** +`endif +class snoop_monitor #( + parameter time TA = 0ns, // stimuli application time + parameter time TT = 0ns, // stimuli test time + parameter type snoop_bus_t = logic, + parameter type ac_beat_t = logic, + parameter type cd_beat_t = logic, + parameter type cr_beat_t = logic +); + + snoop_bus_t snoop; + + // Mailbox for AC transactions + // Should be created and connected outside + mailbox #(ac_beat_t) ac_mbx; + + task cycle_start; + #TT; + endtask + + task cycle_end; + @(posedge snoop.clk_i); + endtask + + function new ( + snoop_bus_t snoop, + mailbox #(ac_beat_t) ac_mbx + ); + this.snoop = snoop; + this.ac_mbx = ac_mbx; + endfunction + + task mon_ac; + ac_beat_t ac_txn = new; + cycle_start(); + while (!(snoop.ac_valid && snoop.ac_ready)) begin cycle_end(); cycle_start(); end + ac_txn.ac_addr = snoop.ac_addr; + ac_txn.ac_snoop = snoop.ac_snoop; + ac_txn.ac_prot = snoop.ac_prot; + ac_mbx.put(ac_txn); + cycle_end(); + endtask + + task run; + forever mon_ac(); + endtask + +endclass diff --git a/test/vip/snoop/snoop_sequencer.svh b/test/vip/snoop/snoop_sequencer.svh new file mode 100644 index 0000000..0cf4cd7 --- /dev/null +++ b/test/vip/snoop/snoop_sequencer.svh @@ -0,0 +1,90 @@ +`ifndef _SNOOP_TEST_PKG +*** INCLUDED IN snoop_test_pkg *** +`endif +class snoop_sequencer #( + parameter time TA = 0ns, // stimuli application time + parameter time TT = 0ns, // stimuli test time + parameter int CD_DW = 0, + parameter int CACHELINE_BYTES = 0, + parameter type ac_beat_t = logic, + parameter type cd_beat_t = logic, + parameter type cr_beat_t = logic +); + + cd_beat_t cd_txn; + + localparam int BYTES_PER_CD_DW = CD_DW / 8; + + // Mailboxes for snoop transactions + // Should be created and connected outside + mailbox #(ac_beat_t) ac_mbx; + mailbox #(cr_beat_t) cr_mbx; + mailbox #(cd_beat_t) cd_mbx; + + mailbox #(cache_snoop_req) snoop_req_mbx; + mailbox #(cache_snoop_resp) snoop_resp_mbx; + + function new( + mailbox #(ac_beat_t) ac_mbx, + mailbox #(cr_beat_t) cr_mbx, + mailbox #(cd_beat_t) cd_mbx, + mailbox #(cache_snoop_req) snoop_req_mbx, + mailbox #(cache_snoop_resp) snoop_resp_mbx + ); + this.ac_mbx = ac_mbx; + this.cr_mbx = cr_mbx; + this.cd_mbx = cd_mbx; + + this.snoop_req_mbx = snoop_req_mbx; + this.snoop_resp_mbx = snoop_resp_mbx; + endfunction + + function cd_beat_t gen_rand_cd; + cd_beat_t beat = new; + beat.cd_data = $urandom(); + beat.cd_last = '0; + return beat; + endfunction + + function cr_beat_t gen_rand_cr; + cr_beat_t beat = new; + beat.cr_resp[4:2] = $urandom_range(0, 3'b111); + beat.cr_resp[1] = 1'b0; + beat.cr_resp[0] = $urandom_range(0, 1); + return beat; + endfunction + + task gen_snoop_resp; + ac_beat_t ac_beat; + cd_beat_t cd_beat = new; + cr_beat_t cr_beat = new; + cache_snoop_req cache_req = new; + cache_snoop_resp cache_resp; + int byte_count = 0; + ac_mbx.get(ac_beat); + cache_req.addr = ac_beat.ac_addr; + cache_req.snoop_op = ac_beat.ac_snoop; + snoop_req_mbx.put(cache_req); + snoop_resp_mbx.get(cache_resp); + cr_beat.cr_resp = cache_resp.snoop_resp; + cr_mbx.put(cr_beat); + if (cache_resp.snoop_resp.DataTransfer) begin + for (int i = 0; i < CACHELINE_BYTES; i++) begin + cd_beat.cd_data[byte_count*8 +: 8] = cache_resp.data_q.pop_front(); + cd_beat.cd_last = 1'b0; + byte_count++; + if (byte_count == BYTES_PER_CD_DW) begin + if (i == (CACHELINE_BYTES - 1)) cd_beat.cd_last = 1'b1; + cd_mbx.put(cd_beat); + cd_beat = new; + byte_count = 0; + end + end + end + endtask + + task run; + forever gen_snoop_resp(); + endtask + +endclass diff --git a/test/vip/snoop_test_pkg.sv b/test/vip/snoop_test_pkg.sv new file mode 100644 index 0000000..cb00bd6 --- /dev/null +++ b/test/vip/snoop_test_pkg.sv @@ -0,0 +1,24 @@ +package snoop_test_pkg; + `define _SNOOP_TEST_PKG + + typedef enum logic [3:0] { + AC_READ_ONCE = 0, + AC_READ_SHARED = 1, + AC_READ_CLEAN = 2, + AC_READ_NOT_SHARED_DIRTY = 3, + AC_READ_UNIQUE = 4, + AC_CLEAN_SHARED = 5, + AC_CLEAN_INVALID = 6, + AC_MAKE_INVALID = 7, + AC_DVM_COMPLETE = 8, + AC_DVM_MESSAGE = 9 + } ac_snoop_e; + + `include "snoop/snoop_beat_types.svh" + `include "snoop/snoop_driver.svh" + `include "snoop/snoop_monitor.svh" + `include "snoop/snoop_sequencer.svh" + `include "snoop/snoop_agent.svh" + + +endpackage \ No newline at end of file